Hi! I just tested your latest patch. Unfortunately, it doesn't work properly. When pressing TAB, it expands the characters correctly. However, `file-name-all-completions' doesn't work: (file-name-all-completions "a" ".") ("åäöfirst.txt" "aaosecond.txt") I haven't had time to see what actually happens in the code, though. However, the " if (STRING_MULTIBYTE (file))" looks suspicious as the decoded value needs to be checked even for strings like "a". (However, I don't really know what STRING_MULTIBYTE does.) -- Anders On Mon, Dec 21, 2015 at 5:09 PM, Eli Zaretskii wrote: > > Date: Mon, 21 Dec 2015 07:52:53 +0100 > > From: Anders Lindgren > > Cc: random832@fastmail.com, 22169@debbugs.gnu.org > > > > I did some simple measurements with and without this patch. I ran > > `(file-name-all-completions "x" "src")' on the Emacs src directory. The > timing > > values were almost identical (varying between 0.001012 and 0.001080). > > You should try it on a larger directory, preferably one that has many > files with non-ASCII file names. > > > The way I see it, the patch doesn't do any harm in any coding system, > and it is > > fast. Hence, I don't really see that it's worth the effort to make this > code > > conditional. > > I'm surprised to hear that. Did you look at the implementation of > Fcompare_strings? It's highly non-trivial. What's more, if the user > sets completion-ignore-case non-nil, Fcompare_strings will call > Fupcase on each character, which is another non-trivial function; if > you are particularly unlucky, Fupcase can even GC (if it needs to set > up the case-table), which will definitely take several hundreds of > milliseconds if not longer. > > And that's today; what if tomorrow someone comes and adds to > Fcompare_strings something that makes it even more complex and slow? > > I've learned long ago not to call any non-trivial API unless I really > need it. You can never know what complexity hides in there. Besides, > it simply looks bad in the code to do processing that is unnecessary. > > > However, please write a patch for this if you still thinks it's > necessary. I > > can test it here to make sure it works under OS X. > > Attached (relative to the current emacs-25 branch). > > Please note that the patch below attempts to solve a couple of > additional subtle aspects of this: > > . it doesn't force the extra comparison for unibyte strings (which > include ASCII strings and unibyte non-ASCII strings), since the > issue doesn't exist then, and ENCODE_FILE/DECODE_FILE are no-ops > > . it forces the FILE argument to have all of its characters > precomposed, since if the caller passes us a file name with > decomposed characters, we risk rejecting them in the code we are > adding > > Please see that these indeed o their job correctly, as I could only > test the code very superficially. > > Thanks. > > diff --git a/lisp/international/ucs-normalize.el > b/lisp/international/ucs-normalize.el > index 8839b00..6f2fb28 100644 > --- a/lisp/international/ucs-normalize.el > +++ b/lisp/international/ucs-normalize.el > @@ -627,6 +627,10 @@ 'utf-8-hfs > :pre-write-conversion 'ucs-normalize-hfs-nfd-pre-write-conversion > ) > > +;; This is tested in dired.c:file_name_completion in order to reject > +;; false positives due to comparison of encoded file names. > +(coding-system-put 'utf-8-hfs 'decomposed-characters 't) > + > (provide 'ucs-normalize) > > ;; Local Variables: > diff --git a/src/dired.c b/src/dired.c > index 84bf247..d5628d5 100644 > --- a/src/dired.c > +++ b/src/dired.c > @@ -467,6 +467,7 @@ file_name_completion (Lisp_Object file, Lisp_Object > dirname, bool all_flag, > well as "." and "..". Until shown otherwise, assume we can't exclude > anything. */ > bool includeall = 1; > + bool check_decoded = false; > ptrdiff_t count = SPECPDL_INDEX (); > > elt = Qnil; > @@ -485,6 +486,28 @@ file_name_completion (Lisp_Object file, Lisp_Object > dirname, bool all_flag, > on the encoded file name. */ > encoded_file = ENCODE_FILE (file); > encoded_dir = ENCODE_FILE (Fdirectory_file_name (dirname)); > + if (STRING_MULTIBYTE (file)) > + { > + Lisp_Object file_encoding = Vfile_name_coding_system; > + > + if (NILP (Vfile_name_coding_system)) > + file_encoding = Vdefault_file_name_coding_system; > + /* If the file-name encoding decomposes characters, as we do for > + HFS+ filesystems, we need to make an additional comparison of > + decoded names in order to filter false positives, such as "a" > + falsely matching "a-ring". */ > + if (!NILP (file_encoding) > + && !NILP (Fplist_get (Fcoding_system_plist (file_encoding), > + Qdecomposed_characters))) > + { > + check_decoded = true; > + /* Recompute FILE to make sure any decomposed characters in > + it are re-composed by the post-read-conversion. > + Otherwise, any decomposed characters will be rejected by > + the additional check below. */ > + file = DECODE_FILE (encoded_file); > + } > + } > int fd; > DIR *d = open_directory (encoded_dir, &fd); > record_unwind_protect_ptr (directory_files_internal_unwind, d); > @@ -637,6 +660,21 @@ file_name_completion (Lisp_Object file, Lisp_Object > dirname, bool all_flag, > if (!NILP (predicate) && NILP (call1 (predicate, name))) > continue; > > + /* Reject entries where the encoded strings match, but the > + decoded don't. For example, "a" should not match "a-ring" on > + file systems that store decomposed characters. */ > + Lisp_Object zero = make_number (0); > + Lisp_Object compare; > + Lisp_Object cmp; > + if (check_decoded && SCHARS (file) <= SCHARS (name)) > + { > + compare = make_number (SCHARS (file)); > + cmp = Fcompare_strings (name, zero, compare, file, zero, compare, > + completion_ignore_case ? Qt : Qnil); > + if (!EQ (cmp, Qt)) > + continue; > + } > + > /* Suitably record this match. */ > > matchcount += matchcount <= 1; > @@ -650,15 +688,13 @@ file_name_completion (Lisp_Object file, Lisp_Object > dirname, bool all_flag, > } > else > { > - Lisp_Object zero = make_number (0); > /* FIXME: This is a copy of the code in Ftry_completion. */ > - ptrdiff_t compare = min (bestmatchsize, SCHARS (name)); > - Lisp_Object cmp > - = Fcompare_strings (bestmatch, zero, > - make_number (compare), > - name, zero, > - make_number (compare), > - completion_ignore_case ? Qt : Qnil); > + compare = min (bestmatchsize, SCHARS (name)); > + cmp = Fcompare_strings (bestmatch, zero, > + make_number (compare), > + name, zero, > + make_number (compare), > + completion_ignore_case ? Qt : Qnil); > ptrdiff_t matchsize = EQ (cmp, Qt) ? compare : eabs (XINT (cmp)) > - 1; > > if (completion_ignore_case) > @@ -1007,6 +1043,7 @@ syms_of_dired (void) > DEFSYM (Qfile_attributes, "file-attributes"); > DEFSYM (Qfile_attributes_lessp, "file-attributes-lessp"); > DEFSYM (Qdefault_directory, "default-directory"); > + DEFSYM (Qdecomposed_characters, "decomposed-characters"); > > defsubr (&Sdirectory_files); > defsubr (&Sdirectory_files_and_attributes); >