unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* enable sorting by version in `ls-lisp-handle-switches'
@ 2010-03-07  4:46 Toru TSUNEYOSHI
  2010-03-07 14:49 ` Stefan Monnier
  0 siblings, 1 reply; 5+ messages in thread
From: Toru TSUNEYOSHI @ 2010-03-07  4:46 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: Text/Plain, Size: 1568 bytes --]

Hello.

I made a function sorting by version in `ls-lisp-handle-switches', by
setting `dired-listing-switches' to "-alv".

At first, I made `string-version-lessp', by referring to
`glibc-2.11.1/string/strverscmp.c'.
But I was not satisfied with the spec.
So I made another function `string-logical-lessp'.

If no problems, please apply to `ls-lisp.el'.

Example:

  (sort '(
          "abc-1.0051.tgz"
          "abc-1.00501.tgz"
          "abc-1.007.tgz"
          "abc-1.012b.tgz"
          "abc-1.01a.tgz"
          )
        'string-logical-lessp)
  =>
  ("abc-1.01a.tgz"
   "abc-1.007.tgz"
   "abc-1.012b.tgz"
   "abc-1.0051.tgz"
   "abc-1.00501.tgz")

  (dired "d:/test/")
  =>
    d:/test:
    total used in directory 0 available 20000000
    drwxrwxrwx  1 Administrators none    0 Mar  7 12:03 .
    dr-xr-xr-x  1 Administrators none    0 Mar  7 12:23 ..
    -rw-rw-rw-  1 Administrators none    0 May  7 12:57 7
    -rw-rw-rw-  1 Administrators none    0 May  7 12:57 8
    -rw-rw-rw-  1 Administrators none    0 May  7 12:57 9
    -rw-rw-rw-  1 Administrators none    0 May  7 12:57 10
    -rw-rw-rw-  1 Administrators none    0 May  7 12:57 11
    -rw-rw-rw-  1 Administrators none    0 May  7 12:57 12
    -rw-rw-rw-  1 Administrators none    0 Mar  7 12:57 abc-1.01a.tgz
    -rw-rw-rw-  1 Administrators none    0 Mar  7 12:57 abc-1.007.tgz
    -rw-rw-rw-  1 Administrators none    0 Mar  7 12:57 abc-1.012b.tgz
    -rw-rw-rw-  1 Administrators none    0 Mar  7 12:57 abc-1.0051.tgz
    -rw-rw-rw-  1 Administrators none    0 Mar  7 12:57 abc-1.00501.tgz

[-- Attachment #2: strcmp.el --]
[-- Type: Text/Plain, Size: 7366 bytes --]

(defun string-version-lessp (s1 s2 &optional ignore-case)
  "Return t if first arg string is less than second in version order.
Case is significant in this comparison if IGNORE-CASE is nil.
Symbols are also allowed; their print names are used instead.
See also `string-logical-lessp'.

Example:
  (sort
   '(\"foo.zml-1.gz\"          => (\"foo.zml-1.gz\"
     \"foo.zml-100.gz\"            \"foo.zml-2.gz\"
     \"foo.zml-12.gz\"             \"foo.zml-6.gz\"
     \"foo.zml-13.gz\"             \"foo.zml-12.gz\"
     \"foo.zml-2.gz\"              \"foo.zml-13.gz\"
     \"foo.zml-25.gz\"             \"foo.zml-25.gz\"
     \"foo.zml-6.gz\")             \"foo.zml-100.gz\")
   'string-version-lessp)

  (sort
   '(\"abc-1.01a.tgz\"         => (\"abc-1.007.tgz\"
     \"abc-1.007.tgz\"             \"abc-1.012b.tgz\"
     \"abc-1.012b.tgz\")           \"abc-1.01a.tgz\")
   'string-version-lessp)

  (sort
   '(\"9.000001.10.tgz\"       => (\"009.01.91.tgz\"
     \"009.01.91.tgz\")            \"9.000001.10.tgz\")
   'string-version-lessp)
"
  (let* (;; states
	 (S_N #x0)		     ; normal
	 (S_I #x3)		     ; comparing integral part
	 (S_F #x6)		     ; comparing fractionnal parts
	 (S_Z #x9)		     ; idem but with leading Zeroes only

	 ;; Symbol(s)    0       [1-9]   others
	 ;; Transition   (10) 0  (01) d  (00) x
	 ;;
	 ;;		      x   d   0	   ; state
	 (next-state (vector S_N S_I S_Z   ; S_N
			     S_N S_I S_I   ; S_I
			     S_N S_F S_F   ; S_F
			     S_N S_F S_Z)) ; S_Z

	 ;; result-type
	 (CMP 2)			; return diff
	 (LEN 3)			; compare using len_diff/diff

	 ;; `glibc-2.11.1/string/strverscmp.c'
	 ;;
	 ;;			  *p1	  *p1			    ; pair
	 ;;			      *p2	      *p2	    ;
	 ;;					  *p3	  *p3	    ;
	 ;;
	 ;;		      x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0   ; state
	 (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_N
			      CMP -1  -1  +1  LEN LEN +1  LEN LEN   ; S_I
			      CMP CMP CMP CMP CMP CMP CMP CMP CMP   ; S_F
			      CMP +1  +1  -1  CMP CMP -1  CMP CMP)) ; S_Z

;;;	 ;; like `coreutils-6.12/lib/strverscmp.c'
;;;	 ;;
;;;	 ;;		      x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0   ; state
;;;	 (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_N
;;;			      CMP -1  -1  +1  LEN LEN +1  LEN LEN   ; S_I
;;;			      CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_F
;;;			      CMP +1  +1  -1  CMP CMP -1  CMP CMP)) ; S_Z

	 ret	     ; same style as return value of C language `strcmp'
	 l1 l2	     ; length of string s1, s2
	 (n 0)	     ; index of string s1, s2
	 c1 c2	     ; character of string s1, s2 at index n
	 state
	 diff)
    (setq ret (catch 'end
		(if (eq s1 s2)
		    (throw 'end 0))

		(if (symbolp s1)
		    (setq s1 (symbol-name s1)))
		(if (symbolp s2)
		    (setq s2 (symbol-name s2)))
		(unless (stringp s1)
		  (signal 'wrong-type-argument `(stringp ,s1)))
		(unless (stringp s2)
		  (signal 'wrong-type-argument `(stringp ,s2)))

		(if ignore-case
		    (setq s1 (upcase s1)
			  s2 (upcase s2)))

		(setq l1 (length s1)
		      l2 (length s2)

		      c1 (if (< n l1) (elt s1 n) ?\0) ; ?\0: null terminator
		      c2 (if (< n l2) (elt s2 n) ?\0)
		      n (1+ n)

		      ;; Hint: '0' is a digit too.
		      state (+ S_N
			       (if (= c1 ?0) 1 0)
			       (if (and (<= ?0 c1) (<= c1 ?9)) 1 0))) ; (isdigit (c1) != 0)

		(while (= (setq diff (- c1 c2)) 0)
		  (if (= c1 ?\0)
		      (throw 'end diff))

		  (setq state (aref next-state state)

			c1 (if (< n l1) (elt s1 n) ?\0)
			c2 (if (< n l2) (elt s2 n) ?\0)
			n (1+ n)

			state (+ state
				 (if (= c1 ?0) 1 0)
				 (if (and (<= ?0 c1) (<= c1 ?9)) 1 0))))

		(setq state (aref result-type (+ (* state 3)
						 (if (= c2 ?0) 1 0)
						 (if (and (<= ?0 c2) (<= c2 ?9)) 1 0))))

		(cond ((= state CMP)
		       (setq ret diff))
		      ((= state LEN)
		       (while (progn
				(setq c1 (if (< n l1) (elt s1 n) ?\0)
				      c2 (if (< n l2) (elt s2 n) ?\0)
				      n (1+ n))
				(and (<= ?0 c1) (<= c1 ?9)))
			 (if (not (and (<= ?0 c2) (<= c2 ?9)))
			     (throw 'end 1)))
		       (setq ret (if (and (<= ?0 c2) (<= c2 ?9)) -1 diff)))
		      (t
		       (setq ret state)))

		ret))

    ;; convert ret to the style of `string-lessp'
    (< ret 0)))

(defalias 'string-version< 'string-version-lessp)

(defun string-logical-lessp (s1 s2 &optional ignore-case)
  "Return t if first arg string is less than second in logical version order.
Case is significant in this comparison if IGNORE-CASE is nil.
Symbols are also allowed; their print names are used instead.
See also `string-version-lessp'.

Example:
  (sort
   '(\"foo.zml-1.gz\"          => (\"foo.zml-1.gz\"
     \"foo.zml-100.gz\"            \"foo.zml-2.gz\"
     \"foo.zml-12.gz\"             \"foo.zml-6.gz\"
     \"foo.zml-13.gz\"             \"foo.zml-12.gz\"
     \"foo.zml-2.gz\"              \"foo.zml-13.gz\"
     \"foo.zml-25.gz\"             \"foo.zml-25.gz\"
     \"foo.zml-6.gz\")             \"foo.zml-100.gz\")
   'string-logical-lessp)

  (sort
   '(\"abc-1.01a.tgz\"         => (\"abc-1.01a.tgz\"
     \"abc-1.007.tgz\"             \"abc-1.007.tgz\"
     \"abc-1.012b.tgz\")           \"abc-1.012b.tgz\")
   'string-logical-lessp)

  (sort
   '(\"9.000001.10.tgz\"       => (\"9.000001.10.tgz\"
     \"009.01.91.tgz\")            \"009.01.91.tgz\")
   'string-logical-lessp)
"
  (let (ret	     ; same style as return value of C language `strcmp'
	l1 l2	     ; length of string s1, s2
	(n1 0)	     ; index of string s1, s2
	(n2 0)
	(c1 -1)	; character of string s1, s2 at index n1, n2
	(c2 -1)	; (set dummy code as initial (and invalid as character) value)
	diff)
    (if (symbolp s1)
	(setq s1 (symbol-name s1)))
    (if (symbolp s2)
	(setq s2 (symbol-name s2)))
    (unless (stringp s1)
      (signal 'wrong-type-argument `(stringp ,s1)))
    (unless (stringp s2)
      (signal 'wrong-type-argument `(stringp ,s2)))

    (if ignore-case
	(setq s1 (upcase s1)
	      s2 (upcase s2)))

    (setq l1 (length s1)
	  l2 (length s2))

    (setq ret (catch 'end
		(while (= (setq diff (- c1 c2)) 0)
		  (if (or (= c1 ?\0) (= c2 ?\0))
		      (throw 'end diff))

		  (setq c1 (if (< n1 l1) (elt s1 n1) ?\0) ; ?\0: null terminator
			c2 (if (< n2 l2) (elt s2 n2) ?\0))

		  ;; encounter numbers ?
		  (if (and (<= ?0 c1) (<= c1 ?9)
			   (<= ?0 c2) (<= c2 ?9))
		      (let (sub-s1 sub-s2
			    sub-l1 sub-l2)
			;; skip needless "0"
			;;
			;; example:
			;;  "00...0" => "0"
			;;  "010"    => "10"
			;;  "000305" => "305"
			(string-match "0*\\([0-9]+\\)" s1 n1)
			(setq sub-s1 (match-string 1 s1)
			      sub-l1 (length sub-s1)
			      n1 (match-end 1))

			(string-match "0*\\([0-9]+\\)" s2 n2)
			(setq sub-s2 (match-string 1 s2)
			      sub-l2 (length sub-s2)
			      n2 (match-end 1))

			;; number whose length is shorter is smaller than another
			(cond ((< sub-l1 sub-l2)
			       (throw 'end -1))
			      ((> sub-l1 sub-l2)
			       (throw 'end 1))
			      (t
			       ;; don't use `number-to-string' because of overflow
			       (setq ret (compare-strings sub-s1 0 nil
							  sub-s2 0 nil))
			       (unless (eq ret t)
				 (throw 'end ret))))

			;; as both numbers are equal, prepare for next step
			(setq c1 (if (< n1 l1) (elt s1 n1) ?\0)
			      c2 (if (< n2 l2) (elt s2 n2) ?\0))))

		  (setq n1 (1+ n1)
			n2 (1+ n2)))

		diff))

    ;; convert ret to the style of `string-lessp'
    (< ret 0)))

(defalias 'string-logical< 'string-logical-lessp)

[-- Attachment #3: ls-lisp.el.diff --]
[-- Type: Text/X-Patch, Size: 1783 bytes --]

--- ls-lisp.el.orig	2009-06-21 13:37:45.000000000 +0900
+++ ls-lisp.el	2010-03-07 11:09:33.595406400 +0900
@@ -196,6 +196,9 @@
 (or (featurep 'ls-lisp)  ; FJW: unless this file is being reloaded!
     (setq original-insert-directory (symbol-function 'insert-directory)))
 
+;;(defalias 'ls-lisp-version-lessp 'string-version-lessp)
+(defalias 'ls-lisp-version-lessp 'string-logical-lessp)
+
 \f
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -493,6 +496,32 @@
 	(error (message "Unsorted (ls-lisp sorting error) - %s"
 			(error-message-string err))
 	       (ding) (sit-for 2))))	; to show user the message!
+  ;; Should execute `ls-lisp-version-lessp'
+  ;; after sorting by `ls-lisp-string-lessp' or others
+  ;;
+  ;; The reason:
+  ;;    See the following numbers.
+  ;;      "1.5"
+  ;;	  "1.05"
+  ;;
+  ;;    `ls-lisp-string-lessp' *may* eval that both numbers are equal.
+  ;;    So the function returns `nil'. In other words, the order is unchanged.
+  ;;    But it is clear that these numbers shoud be sorted
+  ;;    in lexicographic order before.
+  (if (and (not (memq ?U switches)) ; unsorted
+	   (memq ?v switches))
+      ;; Catch and ignore unexpected sorting errors
+      (condition-case err
+	  (setq file-alist
+		(let (index)
+		  ;; Copy file-alist in case of error
+		  (sort (copy-sequence file-alist) ; modifies its argument!
+			(lambda (x y) ; sorted on version
+			  (ls-lisp-version-lessp (car x) (car y)
+						 ls-lisp-ignore-case)))))
+	(error (message "Unsorted (ls-lisp sorting error) - %s"
+			(error-message-string err))
+	       (ding) (sit-for 2)))) ; to show user the message!
   (if (memq ?F switches)		; classify switch
       (setq file-alist (mapcar 'ls-lisp-classify file-alist)))
   (if ls-lisp-dirs-first

[-- Attachment #4: test.el --]
[-- Type: Text/Plain, Size: 1536 bytes --]

;; (query-replace "string-version-lessp" "string-logical-lessp")
;; (query-replace "string-logical-lessp" "string-version-lessp")

(sort '(
	"foo.zml-1.gz"
	"foo.zml-100.gz"
	"foo.zml-12.gz"
	"foo.zml-13.gz"
	"foo.zml-2.gz"
	"foo.zml-25.gz"
	"foo.zml-6.gz"
	)
      'string-version-lessp)

(sort '(
	"foo.zml-1 gz"
	"foo.zml-100 gz"
	"foo.zml-12 gz"
	"foo.zml-13 gz"
	"foo.zml-2 gz"
	"foo.zml-25 gz"
	"foo.zml-6 gz"
	)
      'string-version-lessp)

(sort '(
	"foo.zml-1~gz"
	"foo.zml-100~gz"
	"foo.zml-12~gz"
	"foo.zml-13~gz"
	"foo.zml-2~gz"
	"foo.zml-25~gz"
	"foo.zml-6~gz"
	)
      'string-version-lessp)

(sort '(
	"abc-1.0051.tgz"
	"abc-1.00501.tgz"
	"abc-1.007.tgz"
	"abc-1.012b.tgz"
	"abc-1.01a.tgz"
	)
      'string-version-lessp)

(sort '(
	"1.007.tgz"
	"1.01a.tgz"
	)
      'string-version-lessp)

(sort '(
	"012b.tgz"
	"01a.tgz"
	)
      'string-version-lessp)

(sort '(
	"01.012b.tgz"
	"009.01a.tgz"
	)
      'string-version-lessp)

(sort '(
	"9.011.tgz"
	"009.01.tgz"
	)
      'string-version-lessp)

(sort '(
	"9.000001.10tgz"
	"009.01.91tgz"
	;;"009.01.9tgz"
	;;"009.01.50tgz"
	)
      'string-version-lessp)

(sort '(
	"9,001.tgz"
	"9000.tgz"
	"9,000.tgz"
	)
      'string-version-lessp)

(sort '(
	"0123.tgz"
	"01012.tgz"
	)
      'string-version-lessp)

(sort '(
	"1.05.txt"
	"1.5.txt"
	)
      'string-version-lessp)

(sort '(
	"a001b.txt"
	"a0b.txt"
	)
      'string-version-lessp)

(sort '(
	"a01b.txt"
	"a0b.txt"
	)
      'string-version-lessp)

(sort '(
	"abc001.txt"
	"abc0a.txt"
	)
      'string-version-lessp)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: enable sorting by version in `ls-lisp-handle-switches'
  2010-03-07  4:46 enable sorting by version in `ls-lisp-handle-switches' Toru TSUNEYOSHI
@ 2010-03-07 14:49 ` Stefan Monnier
       [not found]   ` <20100312.030317.128157175.tune@to.email.ne.jp>
  0 siblings, 1 reply; 5+ messages in thread
From: Stefan Monnier @ 2010-03-07 14:49 UTC (permalink / raw)
  To: Toru TSUNEYOSHI; +Cc: emacs-devel

> At first, I made `string-version-lessp', by referring to
> `glibc-2.11.1/string/strverscmp.c'.
> But I was not satisfied with the spec.
> So I made another function `string-logical-lessp'.

> If no problems, please apply to `ls-lisp.el'.

You may also want to take a look at mpc-compare-strings which does
something similar, tho simpler (not specific to versions, just taking
care of numbers).


        Stefan




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: enable sorting by version in `ls-lisp-handle-switches'
       [not found]   ` <20100312.030317.128157175.tune@to.email.ne.jp>
@ 2010-03-11 19:20     ` Stefan Monnier
  2010-03-12  5:31       ` Toru TSUNEYOSHI
  2010-03-12  7:00     ` Toru TSUNEYOSHI
  1 sibling, 1 reply; 5+ messages in thread
From: Stefan Monnier @ 2010-03-11 19:20 UTC (permalink / raw)
  To: Toru TSUNEYOSHI; +Cc: emacs-devel

>> You may also want to take a look at mpc-compare-strings which does
>> something similar, tho simpler (not specific to versions, just taking
>> care of numbers).
> I checked `mpc-compare-strings'.
> `mpc-compare-strings' and `string-logical-lessp' are different.

Indeed, I don't expect them to behave identically.

>   (mpc-compare-strings "01-00001" "1-1") => 1
>   (string-logical-lessp "01-00001" "1-1") => nil

>   (mpc-compare-strings "1-1" "01-00001") => -1
>   (string-logical-lessp "1-1" "01-00001") => nil

I remember having struggled over "correct" handling (meaning, making
sure that it's transitive) of leading zeroes in
mpc-compare-strings, so I'm not very surprised.

I don't really care whether "1-1" is considered larger, smaller or
equivalent to "01-00001".  I don't think such issues show up much in
practice and I'm not sure as a user I'd prefer one over the other.

The implementation strategy is also very different, so I'd expect the
performance behavior to be quite different as well.


        Stefan




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: enable sorting by version in `ls-lisp-handle-switches'
  2010-03-11 19:20     ` Stefan Monnier
@ 2010-03-12  5:31       ` Toru TSUNEYOSHI
  0 siblings, 0 replies; 5+ messages in thread
From: Toru TSUNEYOSHI @ 2010-03-12  5:31 UTC (permalink / raw)
  To: monnier; +Cc: emacs-devel

> Indeed, I don't expect them to behave identically.

OK. I wanted to list just different points between those functions.
Those are not alternative, I think.
(I have still struggled to write English properly. :) )

> The implementation strategy is also very different, so I'd expect the
> performance behavior to be quite different as well.

Yes. That's right.

Originally, I wanted `ls-lisp-handle-switches' to sort by version. Then
I made the functions, `string-version-lessp' (based on Gnu) and
`string-logical-lessp' (based on Windows).

I think that users will select the favorite one.




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: enable sorting by version in `ls-lisp-handle-switches'
       [not found]   ` <20100312.030317.128157175.tune@to.email.ne.jp>
  2010-03-11 19:20     ` Stefan Monnier
@ 2010-03-12  7:00     ` Toru TSUNEYOSHI
  1 sibling, 0 replies; 5+ messages in thread
From: Toru TSUNEYOSHI @ 2010-03-12  7:00 UTC (permalink / raw)
  To: monnier; +Cc: emacs-devel

[-- Attachment #1: Type: Text/Plain, Size: 499 bytes --]

I mistaked in the following.

> And, if you eval the following, `string-logical-lessp' emulates sorting
> style by Windows Explorer on Windows XP or later. (Windows Explorer
> seems to use function `StrCmpLogicalW')
> 
>     (put 'string-version-lessp 'strcmplogical t)

I should have written

    (put 'string-logical-lessp 'strcmplogical t)

And for temporary change, I shoud have replaced symbol property with
variable.

    (setq string-logical-lessp--strcmplogical t)

I fixed and attached it.

[-- Attachment #2: strcmp.el --]
[-- Type: Text/Plain, Size: 15240 bytes --]

;; `strcmplogical-trans-tbl-at-1st-char' and
;; `strcmplogical-trans-tbl-after-2nd-char' of `string-logical-lessp'
;; are results composed of only output by Window Explorer on Windows XP
;; and Windows API `StrCmpLogicalW'.
;;
;; It is arranged about some code (?\C-? ?' ?- and array using a case
;; sensitive of alphabet. (Enable case insensitive by `ls-lisp-ignore-case')

(put
 'string-logical-lessp
 'strcmplogical-trans-tbl-at-1st-char
 '(
   #x00 ; \C-@
   #x01 ; \C-a
   #x02 ; \C-b
   #x03 ; \C-c
   #x04 ; \C-d
   #x05 ; \C-e
   #x06 ; \C-f
   #x07 ; \C-g
   #x08 ; \C-h
   #x0e ; \C-n
   #x0f ; \C-o
   #x10 ; \C-p
   #x11 ; \C-q
   #x12 ; \C-r
   #x13 ; \C-s
   #x14 ; \C-t
   #x15 ; \C-u
   #x16 ; \C-v
   #x17 ; \C-w
   #x18 ; \C-x
   #x19 ; \C-y
   #x1a ; \C-z
   #x1b ; \C-[
   #x1c ; \C-\\
   #x1d ; \C-]
   #x1e ; \C-^
   #x1f ; \C-_
   #x20 ;   (SPC)
   #x09 ; 	TAB
   #x0a ; \C-j
   #x0b ; \C-k
   #x0c ; \C-l
   #x0d ; \C-m
   #x21 ; !
   #x22 ; "
   #x23 ; #
   #x24 ; $
   #x25 ; %
   #x26 ; &
   #x27 ; '
   #x28 ; (
   #x29 ; )
   #x2a ; *
   #x2c ; ,
   #x2e ; .
   #x2f ; /
   #x3a ; :
   #x3b ; ;
   #x3f ; ?
   #x40 ; @
   #x5b ; [
   #x5d ; ]
   #x5e ; ^
   #x5f ; _
   #x60 ; `
   #x7b ; {
   #x7c ; |
   #x7d ; }
   #x7e ; ~
   #x2b ; +
   #x2d ; -
   #x3c ; <
   #x3d ; =
   #x3e ; >
   #x5c ; \
   #x7f ; \C-?
   #x30 ; 0
   #x31 ; 1
   #x32 ; 2
   #x33 ; 3
   #x34 ; 4
   #x35 ; 5
   #x36 ; 6
   #x37 ; 7
   #x38 ; 8
   #x39 ; 9
   #x41 ; A
   #x42 ; B
   #x43 ; C
   #x44 ; D
   #x45 ; E
   #x46 ; F
   #x47 ; G
   #x48 ; H
   #x49 ; I
   #x4a ; J
   #x4b ; K
   #x4c ; L
   #x4d ; M
   #x4e ; N
   #x4f ; O
   #x50 ; P
   #x51 ; Q
   #x52 ; R
   #x53 ; S
   #x54 ; T
   #x55 ; U
   #x56 ; V
   #x57 ; W
   #x58 ; X
   #x59 ; Y
   #x5a ; Z
   #x61 ; a
   #x62 ; b
   #x63 ; c
   #x64 ; d
   #x65 ; e
   #x66 ; f
   #x67 ; g
   #x68 ; h
   #x69 ; i
   #x6a ; j
   #x6b ; k
   #x6c ; l
   #x6d ; m
   #x6e ; n
   #x6f ; o
   #x70 ; p
   #x71 ; q
   #x72 ; r
   #x73 ; s
   #x74 ; t
   #x75 ; u
   #x76 ; v
   #x77 ; w
   #x78 ; x
   #x79 ; y
   #x7a ; z
   ))

(put
 'string-logical-lessp
 'strcmplogical-trans-tbl-after-2nd-char
 '(
   #x00 ; \C-@
   #x01 ; \C-a
   #x02 ; \C-b
   #x03 ; \C-c
   #x04 ; \C-d
   #x05 ; \C-e
   #x06 ; \C-f
   #x07 ; \C-g
   #x08 ; \C-h
   #x0e ; \C-n
   #x0f ; \C-o
   #x10 ; \C-p
   #x11 ; \C-q
   #x12 ; \C-r
   #x13 ; \C-s
   #x14 ; \C-t
   #x15 ; \C-u
   #x16 ; \C-v
   #x17 ; \C-w
   #x18 ; \C-x
   #x19 ; \C-y
   #x1a ; \C-z
   #x1b ; \C-[
   #x1c ; \C-\\
   #x1d ; \C-]
   #x1e ; \C-^
   #x1f ; \C-_
   #x30 ; 0
   #x31 ; 1
   #x32 ; 2
   #x33 ; 3
   #x34 ; 4
   #x35 ; 5
   #x36 ; 6
   #x37 ; 7
   #x38 ; 8
   #x39 ; 9
   #x20 ;   (SPC)
   #x09 ; 	TAB
   #x0a ; \C-j
   #x0b ; \C-k
   #x0c ; \C-l
   #x0d ; \C-m
   #x21 ; !
   #x22 ; "
   #x23 ; #
   #x24 ; $
   #x25 ; %
   #x26 ; &
   #x27 ; '
   #x28 ; (
   #x29 ; )
   #x2a ; *
   #x2c ; ,
   #x2e ; .
   #x2f ; /
   #x3a ; :
   #x3b ; ;
   #x3f ; ?
   #x40 ; @
   #x5b ; [
   #x5d ; ]
   #x5e ; ^
   #x5f ; _
   #x60 ; `
   #x7b ; {
   #x7c ; |
   #x7d ; }
   #x7e ; ~
   #x2b ; +
   #x2d ; -
   #x3c ; <
   #x3d ; =
   #x3e ; >
   #x5c ; \
   #x7f ; \C-?
   #x41 ; A
   #x42 ; B
   #x43 ; C
   #x44 ; D
   #x45 ; E
   #x46 ; F
   #x47 ; G
   #x48 ; H
   #x49 ; I
   #x4a ; J
   #x4b ; K
   #x4c ; L
   #x4d ; M
   #x4e ; N
   #x4f ; O
   #x50 ; P
   #x51 ; Q
   #x52 ; R
   #x53 ; S
   #x54 ; T
   #x55 ; U
   #x56 ; V
   #x57 ; W
   #x58 ; X
   #x59 ; Y
   #x5a ; Z
   #x61 ; a
   #x62 ; b
   #x63 ; c
   #x64 ; d
   #x65 ; e
   #x66 ; f
   #x67 ; g
   #x68 ; h
   #x69 ; i
   #x6a ; j
   #x6b ; k
   #x6c ; l
   #x6d ; m
   #x6e ; n
   #x6f ; o
   #x70 ; p
   #x71 ; q
   #x72 ; r
   #x73 ; s
   #x74 ; t
   #x75 ; u
   #x76 ; v
   #x77 ; w
   #x78 ; x
   #x79 ; y
   #x7a ; z
   ))

;; Convert the above translation table to vector indexed by ascii code
(mapc
 (lambda (x)
   (unless (vectorp (get 'string-logical-lessp x))
     ;; make vector from list of cdr part
     ;;
     ;; '((#x00 . 0) (#x01 . 1) ... (#x7a . 127) ... (#x7f . 53))
     ;; => '(0 1 ... 127 ... 53)
     ;; =>  [0 1 ... 127 ... 53]
     (put 'string-logical-lessp
	  x
	  (apply
	   'vector
	   (mapcar
	    'cdr
	    ;; sort by car part
	    ;;
	    ;; '((#x00 . 0) (#x01 . 1) ... (#x7f . 53) ... (#x7a . 127))
	    ;; => '((#x00 . 0) (#x01 . 1) ... (#x7a . 127) ... (#x7f . 53))
	    (sort
	     (let ((i 0))
	       ;; make index on cdr part
	       ;;
	       ;; '(#x00 #x01 ... #x7a)
	       ;; => '((#x00 . 0) (#x01 . 1) ... (#x7f . 53) ... (#x7a . 127))
	       (mapcar
		(lambda (x)
		  (prog1
		      (cons x i)
		    (setq i (1+ i))))
		(get 'string-logical-lessp x)))
	     (lambda (x y)
	       (< (car x) (car y)))))))))
 '(strcmplogical-trans-tbl-at-1st-char
   strcmplogical-trans-tbl-after-2nd-char))

;; Length
(put 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char-len
     (length (get 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char)))
(put 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char-len
     (length (get 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char)))

(defvar string-logical-lessp--strcmplogical nil
  "If non-nil, emulate filename sorting style of Window Explorer on
Windows XP (or later) and Windows API `StrCmpLogicalW' easily.
See also `string-logical-lessp'.

Example:
    (sort
     '(\".emacs\"
       \".emacs-places.~10~\"
       \".emacs-places.~9~\"
       \".emacs.~10~\"
       \".emacs.~9~\")
     'string-logical-lessp)
    =>
    ;; string-logical-lessp--strcmplogical
    ;; =>
    ;; nil:                     t:
    (\".emacs\"                   (\".emacs\"
     \".emacs-places.~9~\"         \".emacs.~9~\"            ; <= just after \".emacs\"
     \".emacs-places.~10~\"        \".emacs.~10~\"
     \".emacs.~9~\"                \".emacs-places.~9~\"
     \".emacs.~10~\")              \".emacs-places.~10~\")")

\f

(defalias 'string-version< 'string-version-lessp)

(defun string-version-lessp (s1 s2 &optional ignore-case)
  "Return t if first arg string is less than second in version order.
Case is significant in this comparison if IGNORE-CASE is nil.
Symbols are also allowed; their print names are used instead.
See also `string-logical-lessp'.

Policy of version order:
  See `glibc-2.11.1/string/strverscmp.c' or it's manual.

Example:
  (sort
   '(\"foo.zml-1.gz\"          => (\"foo.zml-1.gz\"
     \"foo.zml-100.gz\"            \"foo.zml-2.gz\"
     \"foo.zml-12.gz\"             \"foo.zml-6.gz\"
     \"foo.zml-13.gz\"             \"foo.zml-12.gz\"
     \"foo.zml-2.gz\"              \"foo.zml-13.gz\"
     \"foo.zml-25.gz\"             \"foo.zml-25.gz\"
     \"foo.zml-6.gz\")             \"foo.zml-100.gz\")
   'string-version-lessp)

  (sort
   '(\"abc-1.01a.tgz\"         => (\"abc-1.007.tgz\"
     \"abc-1.007.tgz\"             \"abc-1.012b.tgz\"
     \"abc-1.012b.tgz\")           \"abc-1.01a.tgz\")
   'string-version-lessp)

  (sort
   '(\"9.000001.10.tgz\"       => (\"009.01.91.tgz\"
     \"009.01.91.tgz\")            \"9.000001.10.tgz\")
   'string-version-lessp)"
  (let* (;; states
	 (S_N #x0)		     ; normal
	 (S_I #x3)		     ; comparing integral part
	 (S_F #x6)		     ; comparing fractionnal parts
	 (S_Z #x9)		     ; idem but with leading Zeroes only

	 ;; Symbol(s)    0       [1-9]   others
	 ;; Transition   (10) 0  (01) d  (00) x
	 ;;
	 ;;		      x   d   0	   ; state
	 (next-state (vector S_N S_I S_Z   ; S_N
			     S_N S_I S_I   ; S_I
			     S_N S_F S_F   ; S_F
			     S_N S_F S_Z)) ; S_Z

	 ;; result-type
	 (CMP 2)			; return diff
	 (LEN 3)			; compare using len_diff/diff

	 ;; `glibc-2.11.1/string/strverscmp.c'
	 ;;
	 ;;			  *p1	  *p1			    ; pair
	 ;;			      *p2	      *p2	    ;
	 ;;					  *p3	  *p3	    ;
	 ;;
	 ;;		      x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0   ; state
	 (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_N
			      CMP -1  -1  +1  LEN LEN +1  LEN LEN   ; S_I
			      CMP CMP CMP CMP CMP CMP CMP CMP CMP   ; S_F
			      CMP +1  +1  -1  CMP CMP -1  CMP CMP)) ; S_Z

;;;	 ;; like `coreutils-6.12/lib/strverscmp.c'
;;;	 ;;
;;;	 ;;		      x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0   ; state
;;;	 (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_N
;;;			      CMP -1  -1  +1  LEN LEN +1  LEN LEN   ; S_I
;;;			      CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_F
;;;			      CMP +1  +1  -1  CMP CMP -1  CMP CMP)) ; S_Z

	 ret		     ; same style as return value of C language `strcmp'
	 l1 l2		     ; length of string s1, s2
	 (i 0)		     ; index of string s1, s2
	 c1 c2		     ; character of string s1, s2 at index i
	 diff		     ; difference between c1 and c2
	 (dl '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) ; digit-list
	 (dl-except-0 '(?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) ; digit-list except 0
	 (state S_N)
	 (repeat t))
    (setq ret (catch 'end
		(if (eq s1 s2)
		    (throw 'end 0))

		(if (symbolp s1)
		    (setq s1 (symbol-name s1)))
		(if (symbolp s2)
		    (setq s2 (symbol-name s2)))
		(unless (stringp s1)
		  (signal 'wrong-type-argument `(stringp ,s1)))
		(unless (stringp s2)
		  (signal 'wrong-type-argument `(stringp ,s2)))

		(if ignore-case
		    (setq s1 (upcase s1)
			  s2 (upcase s2)))

		(setq l1 (length s1)
		      l2 (length s2))

		(while repeat
		  ;; check limit
		  (if (<= l1 i)
		      (if (<= l2 i)
			  (throw 'end 0) ; s1 and s2 match.
			(throw 'end -1)) ; s1 is less than s2.
		    (if (<= l2 i)
			(throw 'end 1))) ; s1 is greater than s2.

		  (setq c1 (elt s1 i)
			c2 (elt s2 i)
			i (1+ i)

			diff (- c1 c2)
			state (+ state (cond ((eq c1 ?0) 2)
					     ((memq c1 dl-except-0) 1)
					     (t 0))))

		  (if (= diff 0)
		      (setq state (aref next-state state))
		    (setq repeat nil)))

		(setq state (aref result-type (+ (* state 3)
						 (cond ((eq c2 ?0) 2)
						       ((memq c2 dl-except-0) 1)
						       (t 0)))))

		(cond ((eq state CMP)
		       (setq ret diff))
		      ((eq state LEN)
		       (while (progn
				(setq c1 (if (< i l1) (elt s1 i) -1) ; -1: invalid code as character
				      c2 (if (< i l2) (elt s2 i) -1)
				      i (1+ i))
				(memq c1 dl))
			 (unless (memq c2 dl)
			   (throw 'end 1)))
		       (setq ret (if (memq c2 dl) -1 diff)))
		      (t
		       (setq ret state)))

		ret))

    ;; convert ret to the style of `string-lessp'
    (< ret 0)))

(defalias 'string-logical< 'string-logical-lessp)

(defun string-logical-lessp (s1 s2 &optional ignore-case)
  "Return t if first arg string is less than second in logical version order.
Case is significant in this comparison if IGNORE-CASE is nil.
Symbols are also allowed; their print names are used instead.
See also `string-version-lessp'.

Policy of Logical version order:
  Sort by number whose leading 0 is skipped.
  For example, \"1\", \"0001\" and \"0000001\" are equivalent.

  And if `string-logical-lessp--strcmplogical' is non-nil, emulate
  filename sorting style of Window Explorer on Windows XP (or later) and
  Windows API `StrCmpLogicalW' easily. (The emulation is poor and
  incomplete.)
    1st character: sort by alphabetical order (not precisely ascii)
    after 2nd character: digit takes first priority

Example:
  (sort
   '(\"foo.zml-1.gz\"          => (\"foo.zml-1.gz\"
     \"foo.zml-100.gz\"            \"foo.zml-2.gz\"
     \"foo.zml-12.gz\"             \"foo.zml-6.gz\"
     \"foo.zml-13.gz\"             \"foo.zml-12.gz\"
     \"foo.zml-2.gz\"              \"foo.zml-13.gz\"
     \"foo.zml-25.gz\"             \"foo.zml-25.gz\"
     \"foo.zml-6.gz\")             \"foo.zml-100.gz\")
   'string-logical-lessp)

  (sort
   '(\"abc-1.01a.tgz\"         => (\"abc-1.01a.tgz\"
     \"abc-1.007.tgz\"             \"abc-1.007.tgz\"
     \"abc-1.012b.tgz\")           \"abc-1.012b.tgz\")
   'string-logical-lessp)

  (sort
   '(\"9.000001.10.tgz\"       => (\"9.000001.10.tgz\"
     \"009.01.91.tgz\")            \"009.01.91.tgz\")
   'string-logical-lessp)

  ;; if `string-logical-lessp--strcmplogical' is non-nil

  (sort
   '(\"1#.txt\"                => (\"##.txt\"
     \"##.txt\")                   \"1#.txt\")
   'string-logical-lessp)

  (sort
   '(\"#1.txt\"                => (\"#1.txt\"
     \"##.txt\")                   \"##.txt\")
   'string-logical-lessp)"
  (let (ret		     ; same style as return value of C language `strcmp'
	l1 l2		     ; length of string s1, s2
	(i1 0)		     ; index of string s1, s2
	(i2 0)
	(c1 0)			    ; character of string s1, s2 at index n1, n2
	(c2 0)			    ; (set dummy code as initial value)
	d1 d2			    ; digit flag
	(dl '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9))) ; digit-list
    (if (symbolp s1)
	(setq s1 (symbol-name s1)))
    (if (symbolp s2)
	(setq s2 (symbol-name s2)))
    (unless (stringp s1)
      (signal 'wrong-type-argument `(stringp ,s1)))
    (unless (stringp s2)
      (signal 'wrong-type-argument `(stringp ,s2)))

    (if ignore-case
	(setq s1 (upcase s1)
	      s2 (upcase s2)))

    (setq l1 (length s1)
	  l2 (length s2))

    (setq ret (catch 'end
		(while t
		  ;; check limit
		  (if (<= l1 i1)
		      (if (<= l2 i2)
			  (throw 'end 0) ; s1 and s2 match logically.
			(throw 'end -1)) ; s1 is less than s2 logically.
		    (if (<= l2 i2)
			(throw 'end 1))) ; s1 is greater than s2 logically.

		  (setq c1 (elt s1 i1)
			c2 (elt s2 i2)

			d1 (memq c1 dl)
			d2 (memq c2 dl))

		  (cond ((and d1 d2)	; both c1 and c2 are digit.
			 (let (n1 n2	; number
			       w1 w2)	; length (or width) of number
			   ;; skip needless "0"
			   ;;
			   ;; example:
			   ;;  "0"      => "0"
			   ;;  "00000"  => "0"
			   ;;  "10"     => "10"
			   ;;  "010"    => "10"
			   ;;  "000010" => "10"
			   (string-match "0*\\([0-9]+\\)" s1 i1)
			   (setq n1 (match-string 1 s1)
				 w1 (length n1)
				 i1 (match-end 1)) ; next character index after number
			   (string-match "0*\\([0-9]+\\)" s2 i2)
			   (setq n2 (match-string 1 s2)
				 w2 (length n2)
				 i2 (match-end 1))

			   ;; number whose length is shorter is less than another.
			   (cond ((< w1 w2) (throw 'end -1))
				 ((> w1 w2) (throw 'end 1))
				 (t
				  ;; as both lengths are equal,
				  ;; we should use `compare-strings' instead of
				  ;; `number-to-string' to avoid overflow.
				  (setq ret (compare-strings n1 nil nil
							     n2 nil nil))
				  (unless (eq ret t)
				    (throw 'end ret))))))
			(t
			 (setq ret (- c1 c2))
			 (if (= ret 0)
			     ;; next character index
			     (setq i1 (1+ i1)
				   i2 (1+ i2))
			   (when string-logical-lessp--strcmplogical
			     (let (tbl tbl-len)
			       (cond ((= i1 0) ; 1st character: sort by alphabetical order
				      (setq tbl (get 'string-logical-lessp
						     'strcmplogical-trans-tbl-at-1st-char)
					    tbl-len (get 'string-logical-lessp
							 'strcmplogical-trans-tbl-at-1st-char-len)))
				     (t	; after 2nd character: digit takes first priority
				      (setq tbl (get 'string-logical-lessp
						     'strcmplogical-trans-tbl-after-2nd-char)
					    tbl-len (get 'string-logical-lessp
							 'strcmplogical-trans-tbl-after-2nd-char-len))))
			       (if (< c1 tbl-len)
				   (setq c1 (aref tbl c1)))
			       (if (< c2 tbl-len)
				   (setq c2 (aref tbl c2))))
			     (setq ret (- c1 c2)))
			   (throw 'end ret)))))))

    ;; convert ret to the style of `string-lessp'
    (< ret 0)))

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-03-12  7:00 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-07  4:46 enable sorting by version in `ls-lisp-handle-switches' Toru TSUNEYOSHI
2010-03-07 14:49 ` Stefan Monnier
     [not found]   ` <20100312.030317.128157175.tune@to.email.ne.jp>
2010-03-11 19:20     ` Stefan Monnier
2010-03-12  5:31       ` Toru TSUNEYOSHI
2010-03-12  7:00     ` Toru TSUNEYOSHI

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).