unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Eli Zaretskii <eliz@gnu.org>
To: kzhr@d1.dion.ne.jp
Cc: 13515@debbugs.gnu.org
Subject: bug#13515: 24.3.50; file-name operating functions are broken on Japanese Windows
Date: Tue, 22 Jan 2013 15:27:44 +0200	[thread overview]
Message-ID: <831uddny9b.fsf@gnu.org> (raw)
In-Reply-To: <834ni9o1oz.fsf@gnu.org>

> Date: Tue, 22 Jan 2013 14:13:32 +0200
> From: Eli Zaretskii <eliz@gnu.org>
> Cc: 13515@debbugs.gnu.org
> 
> > Date: Mon, 21 Jan 2013 22:48:44 +0900
> > From: Kazuhiro Ito <kzhr@d1.dion.ne.jp>
> > 
> > The below code returns unexpected result in turnk on Windows.
> > 
> > (let ((file-name-coding-system 'cp932))
> >   (expand-file-name "表" "C:/"))
> > 
> > -> "c:/\225/"
> > 
> > dostounix_filename does not support cp932 encoded string, which could
> > contain '\\' as the part of Kankji characters.
> 
> Thanks, I will work on fixing this.

Please try the changes below (relative to the emacs-24 branch).  If no
issues are found with them, I will soon install them on the release
branch.

=== modified file 'src/w32.c'
--- src/w32.c	2013-01-01 09:11:05 +0000
+++ src/w32.c	2013-01-22 13:20:33 +0000
@@ -37,7 +37,7 @@ along with GNU Emacs.  If not, see <http
 /* must include CRT headers *before* config.h */
 
 #include <config.h>
-#include <mbstring.h>	/* for _mbspbrk */
+#include <mbstring.h>	/* for _mbspbrk and _mbslwr */
 
 #undef access
 #undef chdir
@@ -1304,6 +1304,67 @@ srandom (int seed)
   srand (seed);
 }
 
+/* Current codepage for encoding file names.  */
+static int file_name_codepage;
+
+/* Return the maximum length in bytes of a multibyte character
+   sequence encoded in the current ANSI codepage.  This is required to
+   correctly walk the encoded file names one character at a time.  */
+static int
+max_filename_mbslen (void)
+{
+  /* A simple cache to avoid calling GetCPInfo every time we need to
+     normalize a file name.  The file-name encoding is not supposed to
+     be changed too frequently, if ever.  */
+  static Lisp_Object last_file_name_encoding;
+  static int last_max_mbslen;
+  Lisp_Object current_encoding;
+
+  current_encoding = Vfile_name_coding_system;
+  if (NILP (current_encoding))
+    current_encoding = Vdefault_file_name_coding_system;
+
+  if (!EQ (last_file_name_encoding, current_encoding))
+    {
+      CPINFO cp_info;
+
+      last_file_name_encoding = current_encoding;
+      /* Default to the current ANSI codepage.  */
+      file_name_codepage = w32_ansi_code_page;
+      if (!NILP (current_encoding))
+	{
+	  char *cpname = SDATA (SYMBOL_NAME (current_encoding));
+	  char *cp = NULL, *end;
+	  int cpnum;
+
+	  if (strncmp (cpname, "cp", 2) == 0)
+	    cp = cpname + 2;
+	  else if (strncmp (cpname, "windows-", 8) == 0)
+	    cp = cpname + 8;
+
+	  if (cp)
+	    {
+	      end = cp;
+	      cpnum = strtol (cp, &end, 10);
+	      if (cpnum && *end == '\0' && end - cp >= 2)
+		file_name_codepage = cpnum;
+	    }
+	}
+
+      if (!file_name_codepage)
+	file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
+
+      if (!GetCPInfo (file_name_codepage, &cp_info))
+	{
+	  file_name_codepage = CP_ACP;
+	  if (!GetCPInfo (file_name_codepage, &cp_info))
+	    emacs_abort ();
+	}
+      last_max_mbslen = cp_info.MaxCharSize;
+    }
+
+  return last_max_mbslen;
+}
 
 /* Normalize filename by converting all path separators to
    the specified separator.  Also conditionally convert upper
@@ -1313,14 +1374,20 @@ static void
 normalize_filename (register char *fp, char path_sep)
 {
   char sep;
-  char *elem;
+  char *elem, *p2;
+  int dbcs_p = max_filename_mbslen () > 1;
 
   /* Always lower-case drive letters a-z, even if the filesystem
      preserves case in filenames.
      This is so filenames can be compared by string comparison
      functions that are case-sensitive.  Even case-preserving filesystems
      do not distinguish case in drive letters.  */
-  if (fp[1] == ':' && *fp >= 'A' && *fp <= 'Z')
+  if (dbcs_p)
+    p2 = CharNextExA (file_name_codepage, fp, 0);
+  else
+    p2 = fp + 1;
+
+  if (*p2 == ':' && *fp >= 'A' && *fp <= 'Z')
     {
       *fp += 'a' - 'A';
       fp += 2;
@@ -1332,7 +1399,10 @@ normalize_filename (register char *fp, c
 	{
 	  if (*fp == '/' || *fp == '\\')
 	    *fp = path_sep;
-	  fp++;
+	  if (!dbcs_p)
+	    fp++;
+	  else
+	    fp = CharNextExA (file_name_codepage, fp, 0);
 	}
       return;
     }
@@ -1355,13 +1425,20 @@ normalize_filename (register char *fp, c
 	if (elem && elem != fp)
 	  {
 	    *fp = 0;		/* temporary end of string */
-	    _strlwr (elem);	/* while we convert to lower case */
+	    _mbslwr (elem);	/* while we convert to lower case */
 	  }
 	*fp = sep;		/* convert (or restore) path separator */
 	elem = fp + 1;		/* next element starts after separator */
 	sep = path_sep;
       }
-  } while (*fp++);
+    if (*fp)
+      {
+	if (!dbcs_p)
+	  fp++;
+	else
+	  fp = CharNextExA (file_name_codepage, fp, 0);
+      }
+  } while (*fp);
 }
 
 /* Destructively turn backslashes into slashes.  */
@@ -2588,15 +2665,22 @@ readdir (DIR *dirp)
     strcpy (dir_static.d_name, dir_find_data.cFileName);
   dir_static.d_namlen = strlen (dir_static.d_name);
   if (dir_is_fat)
-    _strlwr (dir_static.d_name);
+    _mbslwr (dir_static.d_name);
   else if (downcase)
     {
       register char *p;
-      for (p = dir_static.d_name; *p; p++)
-	if (*p >= 'a' && *p <= 'z')
-	  break;
+      int dbcs_p = max_filename_mbslen () > 1;
+      for (p = dir_static.d_name; *p; )
+	{
+	  if (*p >= 'a' && *p <= 'z')
+	    break;
+	  if (dbcs_p)
+	    p = CharNextExA (file_name_codepage, p, 0);
+	  else
+	    p++;
+	}
       if (!*p)
-	_strlwr (dir_static.d_name);
+	_mbslwr (dir_static.d_name);
     }
 
   return &dir_static;






  reply	other threads:[~2013-01-22 13:27 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-21 13:48 bug#13515: 24.3.50; file-name operating functions are broken on Japanese Windows Kazuhiro Ito
2013-01-22 12:13 ` Eli Zaretskii
2013-01-22 13:27   ` Eli Zaretskii [this message]
2013-01-23  9:39     ` Kazuhiro Ito
2013-01-23 16:13       ` Eli Zaretskii
2013-01-23 16:54         ` Eli Zaretskii
2013-01-23  9:38   ` Kazuhiro Ito
2013-01-23 16:21     ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=831uddny9b.fsf@gnu.org \
    --to=eliz@gnu.org \
    --cc=13515@debbugs.gnu.org \
    --cc=kzhr@d1.dion.ne.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).