;; `strcmplogical-trans-tbl-at-1st-char' and ;; `strcmplogical-trans-tbl-after-2nd-char' of `string-logical-lessp' ;; are results composed of only output by Window Explorer on Windows XP ;; and Windows API `StrCmpLogicalW'. ;; ;; It is arranged about some code (?\C-? ?' ?- and array using a case ;; sensitive of alphabet. (Enable case insensitive by `ls-lisp-ignore-case') (put 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char '( #x00 ; \C-@ #x01 ; \C-a #x02 ; \C-b #x03 ; \C-c #x04 ; \C-d #x05 ; \C-e #x06 ; \C-f #x07 ; \C-g #x08 ; \C-h #x0e ; \C-n #x0f ; \C-o #x10 ; \C-p #x11 ; \C-q #x12 ; \C-r #x13 ; \C-s #x14 ; \C-t #x15 ; \C-u #x16 ; \C-v #x17 ; \C-w #x18 ; \C-x #x19 ; \C-y #x1a ; \C-z #x1b ; \C-[ #x1c ; \C-\\ #x1d ; \C-] #x1e ; \C-^ #x1f ; \C-_ #x20 ; (SPC) #x09 ; TAB #x0a ; \C-j #x0b ; \C-k #x0c ; \C-l #x0d ; \C-m #x21 ; ! #x22 ; " #x23 ; # #x24 ; $ #x25 ; % #x26 ; & #x27 ; ' #x28 ; ( #x29 ; ) #x2a ; * #x2c ; , #x2e ; . #x2f ; / #x3a ; : #x3b ; ; #x3f ; ? #x40 ; @ #x5b ; [ #x5d ; ] #x5e ; ^ #x5f ; _ #x60 ; ` #x7b ; { #x7c ; | #x7d ; } #x7e ; ~ #x2b ; + #x2d ; - #x3c ; < #x3d ; = #x3e ; > #x5c ; \ #x7f ; \C-? #x30 ; 0 #x31 ; 1 #x32 ; 2 #x33 ; 3 #x34 ; 4 #x35 ; 5 #x36 ; 6 #x37 ; 7 #x38 ; 8 #x39 ; 9 #x41 ; A #x42 ; B #x43 ; C #x44 ; D #x45 ; E #x46 ; F #x47 ; G #x48 ; H #x49 ; I #x4a ; J #x4b ; K #x4c ; L #x4d ; M #x4e ; N #x4f ; O #x50 ; P #x51 ; Q #x52 ; R #x53 ; S #x54 ; T #x55 ; U #x56 ; V #x57 ; W #x58 ; X #x59 ; Y #x5a ; Z #x61 ; a #x62 ; b #x63 ; c #x64 ; d #x65 ; e #x66 ; f #x67 ; g #x68 ; h #x69 ; i #x6a ; j #x6b ; k #x6c ; l #x6d ; m #x6e ; n #x6f ; o #x70 ; p #x71 ; q #x72 ; r #x73 ; s #x74 ; t #x75 ; u #x76 ; v #x77 ; w #x78 ; x #x79 ; y #x7a ; z )) (put 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char '( #x00 ; \C-@ #x01 ; \C-a #x02 ; \C-b #x03 ; \C-c #x04 ; \C-d #x05 ; \C-e #x06 ; \C-f #x07 ; \C-g #x08 ; \C-h #x0e ; \C-n #x0f ; \C-o #x10 ; \C-p #x11 ; \C-q #x12 ; \C-r #x13 ; \C-s #x14 ; \C-t #x15 ; \C-u #x16 ; \C-v #x17 ; \C-w #x18 ; \C-x #x19 ; \C-y #x1a ; \C-z #x1b ; \C-[ #x1c ; \C-\\ #x1d ; \C-] #x1e ; \C-^ #x1f ; \C-_ #x30 ; 0 #x31 ; 1 #x32 ; 2 #x33 ; 3 #x34 ; 4 #x35 ; 5 #x36 ; 6 #x37 ; 7 #x38 ; 8 #x39 ; 9 #x20 ; (SPC) #x09 ; TAB #x0a ; \C-j #x0b ; \C-k #x0c ; \C-l #x0d ; \C-m #x21 ; ! #x22 ; " #x23 ; # #x24 ; $ #x25 ; % #x26 ; & #x27 ; ' #x28 ; ( #x29 ; ) #x2a ; * #x2c ; , #x2e ; . #x2f ; / #x3a ; : #x3b ; ; #x3f ; ? #x40 ; @ #x5b ; [ #x5d ; ] #x5e ; ^ #x5f ; _ #x60 ; ` #x7b ; { #x7c ; | #x7d ; } #x7e ; ~ #x2b ; + #x2d ; - #x3c ; < #x3d ; = #x3e ; > #x5c ; \ #x7f ; \C-? #x41 ; A #x42 ; B #x43 ; C #x44 ; D #x45 ; E #x46 ; F #x47 ; G #x48 ; H #x49 ; I #x4a ; J #x4b ; K #x4c ; L #x4d ; M #x4e ; N #x4f ; O #x50 ; P #x51 ; Q #x52 ; R #x53 ; S #x54 ; T #x55 ; U #x56 ; V #x57 ; W #x58 ; X #x59 ; Y #x5a ; Z #x61 ; a #x62 ; b #x63 ; c #x64 ; d #x65 ; e #x66 ; f #x67 ; g #x68 ; h #x69 ; i #x6a ; j #x6b ; k #x6c ; l #x6d ; m #x6e ; n #x6f ; o #x70 ; p #x71 ; q #x72 ; r #x73 ; s #x74 ; t #x75 ; u #x76 ; v #x77 ; w #x78 ; x #x79 ; y #x7a ; z )) ;; Convert the above translation table to vector indexed by ascii code (mapc (lambda (x) (unless (vectorp (get 'string-logical-lessp x)) ;; make vector from list of cdr part ;; ;; '((#x00 . 0) (#x01 . 1) ... (#x7a . 127) ... (#x7f . 53)) ;; => '(0 1 ... 127 ... 53) ;; => [0 1 ... 127 ... 53] (put 'string-logical-lessp x (apply 'vector (mapcar 'cdr ;; sort by car part ;; ;; '((#x00 . 0) (#x01 . 1) ... (#x7f . 53) ... (#x7a . 127)) ;; => '((#x00 . 0) (#x01 . 1) ... (#x7a . 127) ... (#x7f . 53)) (sort (let ((i 0)) ;; make index on cdr part ;; ;; '(#x00 #x01 ... #x7a) ;; => '((#x00 . 0) (#x01 . 1) ... (#x7f . 53) ... (#x7a . 127)) (mapcar (lambda (x) (prog1 (cons x i) (setq i (1+ i)))) (get 'string-logical-lessp x))) (lambda (x y) (< (car x) (car y))))))))) '(strcmplogical-trans-tbl-at-1st-char strcmplogical-trans-tbl-after-2nd-char)) ;; Length (put 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char-len (length (get 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char))) (put 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char-len (length (get 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char))) (defvar string-logical-lessp--strcmplogical nil "If non-nil, emulate filename sorting style of Window Explorer on Windows XP (or later) and Windows API `StrCmpLogicalW' easily. See also `string-logical-lessp'. Example: (sort '(\".emacs\" \".emacs-places.~10~\" \".emacs-places.~9~\" \".emacs.~10~\" \".emacs.~9~\") 'string-logical-lessp) => ;; string-logical-lessp--strcmplogical ;; => ;; nil: t: (\".emacs\" (\".emacs\" \".emacs-places.~9~\" \".emacs.~9~\" ; <= just after \".emacs\" \".emacs-places.~10~\" \".emacs.~10~\" \".emacs.~9~\" \".emacs-places.~9~\" \".emacs.~10~\") \".emacs-places.~10~\")") (defalias 'string-version< 'string-version-lessp) (defun string-version-lessp (s1 s2 &optional ignore-case) "Return t if first arg string is less than second in version order. Case is significant in this comparison if IGNORE-CASE is nil. Symbols are also allowed; their print names are used instead. See also `string-logical-lessp'. Policy of version order: See `glibc-2.11.1/string/strverscmp.c' or it's manual. Example: (sort '(\"foo.zml-1.gz\" => (\"foo.zml-1.gz\" \"foo.zml-100.gz\" \"foo.zml-2.gz\" \"foo.zml-12.gz\" \"foo.zml-6.gz\" \"foo.zml-13.gz\" \"foo.zml-12.gz\" \"foo.zml-2.gz\" \"foo.zml-13.gz\" \"foo.zml-25.gz\" \"foo.zml-25.gz\" \"foo.zml-6.gz\") \"foo.zml-100.gz\") 'string-version-lessp) (sort '(\"abc-1.01a.tgz\" => (\"abc-1.007.tgz\" \"abc-1.007.tgz\" \"abc-1.012b.tgz\" \"abc-1.012b.tgz\") \"abc-1.01a.tgz\") 'string-version-lessp) (sort '(\"9.000001.10.tgz\" => (\"009.01.91.tgz\" \"009.01.91.tgz\") \"9.000001.10.tgz\") 'string-version-lessp)" (let* (;; states (S_N #x0) ; normal (S_I #x3) ; comparing integral part (S_F #x6) ; comparing fractionnal parts (S_Z #x9) ; idem but with leading Zeroes only ;; Symbol(s) 0 [1-9] others ;; Transition (10) 0 (01) d (00) x ;; ;; x d 0 ; state (next-state (vector S_N S_I S_Z ; S_N S_N S_I S_I ; S_I S_N S_F S_F ; S_F S_N S_F S_Z)) ; S_Z ;; result-type (CMP 2) ; return diff (LEN 3) ; compare using len_diff/diff ;; `glibc-2.11.1/string/strverscmp.c' ;; ;; *p1 *p1 ; pair ;; *p2 *p2 ; ;; *p3 *p3 ; ;; ;; x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 ; state (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP ; S_N CMP -1 -1 +1 LEN LEN +1 LEN LEN ; S_I CMP CMP CMP CMP CMP CMP CMP CMP CMP ; S_F CMP +1 +1 -1 CMP CMP -1 CMP CMP)) ; S_Z ;;; ;; like `coreutils-6.12/lib/strverscmp.c' ;;; ;; ;;; ;; x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 ; state ;;; (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP ; S_N ;;; CMP -1 -1 +1 LEN LEN +1 LEN LEN ; S_I ;;; CMP CMP CMP CMP LEN CMP CMP CMP CMP ; S_F ;;; CMP +1 +1 -1 CMP CMP -1 CMP CMP)) ; S_Z ret ; same style as return value of C language `strcmp' l1 l2 ; length of string s1, s2 (i 0) ; index of string s1, s2 c1 c2 ; character of string s1, s2 at index i diff ; difference between c1 and c2 (dl '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) ; digit-list (dl-except-0 '(?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) ; digit-list except 0 (state S_N) (repeat t)) (setq ret (catch 'end (if (eq s1 s2) (throw 'end 0)) (if (symbolp s1) (setq s1 (symbol-name s1))) (if (symbolp s2) (setq s2 (symbol-name s2))) (unless (stringp s1) (signal 'wrong-type-argument `(stringp ,s1))) (unless (stringp s2) (signal 'wrong-type-argument `(stringp ,s2))) (if ignore-case (setq s1 (upcase s1) s2 (upcase s2))) (setq l1 (length s1) l2 (length s2)) (while repeat ;; check limit (if (<= l1 i) (if (<= l2 i) (throw 'end 0) ; s1 and s2 match. (throw 'end -1)) ; s1 is less than s2. (if (<= l2 i) (throw 'end 1))) ; s1 is greater than s2. (setq c1 (elt s1 i) c2 (elt s2 i) i (1+ i) diff (- c1 c2) state (+ state (cond ((eq c1 ?0) 2) ((memq c1 dl-except-0) 1) (t 0)))) (if (= diff 0) (setq state (aref next-state state)) (setq repeat nil))) (setq state (aref result-type (+ (* state 3) (cond ((eq c2 ?0) 2) ((memq c2 dl-except-0) 1) (t 0))))) (cond ((eq state CMP) (setq ret diff)) ((eq state LEN) (while (progn (setq c1 (if (< i l1) (elt s1 i) -1) ; -1: invalid code as character c2 (if (< i l2) (elt s2 i) -1) i (1+ i)) (memq c1 dl)) (unless (memq c2 dl) (throw 'end 1))) (setq ret (if (memq c2 dl) -1 diff))) (t (setq ret state))) ret)) ;; convert ret to the style of `string-lessp' (< ret 0))) (defalias 'string-logical< 'string-logical-lessp) (defun string-logical-lessp (s1 s2 &optional ignore-case) "Return t if first arg string is less than second in logical version order. Case is significant in this comparison if IGNORE-CASE is nil. Symbols are also allowed; their print names are used instead. See also `string-version-lessp'. Policy of Logical version order: Sort by number whose leading 0 is skipped. For example, \"1\", \"0001\" and \"0000001\" are equivalent. And if `string-logical-lessp--strcmplogical' is non-nil, emulate filename sorting style of Window Explorer on Windows XP (or later) and Windows API `StrCmpLogicalW' easily. (The emulation is poor and incomplete.) 1st character: sort by alphabetical order (not precisely ascii) after 2nd character: digit takes first priority Example: (sort '(\"foo.zml-1.gz\" => (\"foo.zml-1.gz\" \"foo.zml-100.gz\" \"foo.zml-2.gz\" \"foo.zml-12.gz\" \"foo.zml-6.gz\" \"foo.zml-13.gz\" \"foo.zml-12.gz\" \"foo.zml-2.gz\" \"foo.zml-13.gz\" \"foo.zml-25.gz\" \"foo.zml-25.gz\" \"foo.zml-6.gz\") \"foo.zml-100.gz\") 'string-logical-lessp) (sort '(\"abc-1.01a.tgz\" => (\"abc-1.01a.tgz\" \"abc-1.007.tgz\" \"abc-1.007.tgz\" \"abc-1.012b.tgz\") \"abc-1.012b.tgz\") 'string-logical-lessp) (sort '(\"9.000001.10.tgz\" => (\"9.000001.10.tgz\" \"009.01.91.tgz\") \"009.01.91.tgz\") 'string-logical-lessp) ;; if `string-logical-lessp--strcmplogical' is non-nil (sort '(\"1#.txt\" => (\"##.txt\" \"##.txt\") \"1#.txt\") 'string-logical-lessp) (sort '(\"#1.txt\" => (\"#1.txt\" \"##.txt\") \"##.txt\") 'string-logical-lessp)" (let (ret ; same style as return value of C language `strcmp' l1 l2 ; length of string s1, s2 (i1 0) ; index of string s1, s2 (i2 0) (c1 0) ; character of string s1, s2 at index n1, n2 (c2 0) ; (set dummy code as initial value) d1 d2 ; digit flag (dl '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9))) ; digit-list (if (symbolp s1) (setq s1 (symbol-name s1))) (if (symbolp s2) (setq s2 (symbol-name s2))) (unless (stringp s1) (signal 'wrong-type-argument `(stringp ,s1))) (unless (stringp s2) (signal 'wrong-type-argument `(stringp ,s2))) (if ignore-case (setq s1 (upcase s1) s2 (upcase s2))) (setq l1 (length s1) l2 (length s2)) (setq ret (catch 'end (while t ;; check limit (if (<= l1 i1) (if (<= l2 i2) (throw 'end 0) ; s1 and s2 match logically. (throw 'end -1)) ; s1 is less than s2 logically. (if (<= l2 i2) (throw 'end 1))) ; s1 is greater than s2 logically. (setq c1 (elt s1 i1) c2 (elt s2 i2) d1 (memq c1 dl) d2 (memq c2 dl)) (cond ((and d1 d2) ; both c1 and c2 are digit. (let (n1 n2 ; number w1 w2) ; length (or width) of number ;; skip needless "0" ;; ;; example: ;; "0" => "0" ;; "00000" => "0" ;; "10" => "10" ;; "010" => "10" ;; "000010" => "10" (string-match "0*\\([0-9]+\\)" s1 i1) (setq n1 (match-string 1 s1) w1 (length n1) i1 (match-end 1)) ; next character index after number (string-match "0*\\([0-9]+\\)" s2 i2) (setq n2 (match-string 1 s2) w2 (length n2) i2 (match-end 1)) ;; number whose length is shorter is less than another. (cond ((< w1 w2) (throw 'end -1)) ((> w1 w2) (throw 'end 1)) (t ;; as both lengths are equal, ;; we should use `compare-strings' instead of ;; `number-to-string' to avoid overflow. (setq ret (compare-strings n1 nil nil n2 nil nil)) (unless (eq ret t) (throw 'end ret)))))) (t (setq ret (- c1 c2)) (if (= ret 0) ;; next character index (setq i1 (1+ i1) i2 (1+ i2)) (when string-logical-lessp--strcmplogical (let (tbl tbl-len) (cond ((= i1 0) ; 1st character: sort by alphabetical order (setq tbl (get 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char) tbl-len (get 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char-len))) (t ; after 2nd character: digit takes first priority (setq tbl (get 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char) tbl-len (get 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char-len)))) (if (< c1 tbl-len) (setq c1 (aref tbl c1))) (if (< c2 tbl-len) (setq c2 (aref tbl c2)))) (setq ret (- c1 c2))) (throw 'end ret))))))) ;; convert ret to the style of `string-lessp' (< ret 0)))