unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Eli Zaretskii <eliz@gnu.org>
To: "Márton Marczell" <dalokmarcinak@gmail.com>
Cc: 30755@debbugs.gnu.org
Subject: bug#30755: 25.3; Encoding of load-file-name wrongwhenpathtoworking dir does not contain accented letter
Date: Thu, 17 May 2018 18:19:16 +0300	[thread overview]
Message-ID: <83tvr6ia2z.fsf@gnu.org> (raw)
In-Reply-To: <5afd97c2.1c69fb81.53c57.01cf@mx.google.com> (message from Márton Marczell on Thu, 17 May 2018 16:54:56 +0200)

> Cc: "30755@debbugs.gnu.org" <30755@debbugs.gnu.org>
> From: Márton Marczell <dalokmarcinak@gmail.com>
> Date: Thu, 17 May 2018 16:54:56 +0200
> 
> I cloned git.savannah.gnu.org/r/emacs.git and built emacs. I set the HOME envvar to
> ‘D:\Marci\Programozás\emacsdebug’ which contained the repro case from above. I still get wrong output
> when the load path is printed:

I didn't yet commit my changes, so the fact you still see the problem
is expected.

Please apply the patch below, rebuild Emacs (by typing "make" at the
shell prompt in the top-level directory of the Emacs tree), and see if
the problem goes away.

Thanks.

--- src/fileio.c~0	2018-02-12 12:40:44.000000000 +0200
+++ src/fileio.c	2018-05-15 18:13:28.240161500 +0300
@@ -865,33 +865,71 @@ the root directory.  */)
       }
   }
   multibyte = STRING_MULTIBYTE (name);
-  if (multibyte != STRING_MULTIBYTE (default_directory))
+  bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
+  if (multibyte != defdir_multibyte)
     {
+      /* We want to make both NAME and DEFAULT_DIRECTORY have the same
+	 multibyteness.  Strategy:
+	 . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
+	   can be converted to the multibyteness of the other one
+	   while keeping the same byte sequence.
+	 . If both are non-ASCII, the only safe conversion is to
+	   convert the multibyte one to be unibyte, because the
+	   reverse conversion potentially adds bytes while raw bytes
+	   are converted to their multibyte forms, which we will be
+	   unable to account for, since the information about the
+	   original multibyteness is lost.  If those additional bytes
+	   later leak to system APIs because they are not encoded or
+	   because they are converted to unibyte strings by keeping
+	   the data, file APIs will fail.  */
       if (multibyte)
 	{
-	  unsigned char *p = SDATA (name);
+	  bool name_ascii_p = SCHARS (name) == SBYTES (name);
+	  unsigned char *p = SDATA (default_directory);
 
-	  while (*p && ASCII_CHAR_P (*p))
-	    p++;
-	  if (*p == '\0')
+	  if (!name_ascii_p)
+	    while (*p && ASCII_CHAR_P (*p))
+	      p++;
+	  if (name_ascii_p || *p != '\0')
 	    {
-	      /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is
-		 unibyte.  Do not convert DEFAULT_DIRECTORY to
-		 multibyte; instead, convert NAME to a unibyte string,
-		 so that the result of this function is also a unibyte
-		 string.  This is needed during bootstrapping and
-		 dumping, when Emacs cannot decode file names, because
-		 the locale environment is not set up.  */
+	      /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
+		 Make a unibyte string out of NAME, and arrange for
+		 the result of this function to be a unibyte string.
+		 This is needed during bootstrapping and dumping, when
+		 Emacs cannot decode file names, because the locale
+		 environment is not set up.  */
 	      name = make_unibyte_string (SSDATA (name), SBYTES (name));
 	      multibyte = 0;
 	    }
 	  else
-	    default_directory = string_to_multibyte (default_directory);
+	    {
+	      /* NAME is non-ASCII and multibyte, and
+		 DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
+		 multibyte string out of DEFAULT_DIRECTORY's data.  */
+	      default_directory =
+		make_multibyte_string (SSDATA (default_directory),
+				       SCHARS (default_directory),
+				       SCHARS (default_directory));
+	    }
 	}
       else
 	{
-	  name = string_to_multibyte (name);
-	  multibyte = 1;
+	  unsigned char *p = SDATA (name);
+
+	  while (*p && ASCII_CHAR_P (*p))
+	    p++;
+	  if (*p == '\0')
+	    {
+	      /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
+		 and pure-ASCII.  Make a multibyte string out of
+		 NAME's data.  */
+	      name = make_multibyte_string (SSDATA (name),
+					    SCHARS (name), SCHARS (name));
+	      multibyte = 1;
+	    }
+	  else
+	    default_directory = make_unibyte_string (SSDATA (default_directory),
+						     SBYTES (default_directory));
 	}
     }
 

--- lisp/startup.el~0	2018-03-14 06:40:04.000000000 +0200
+++ lisp/startup.el	2018-05-16 18:48:50.597482900 +0300
@@ -560,9 +560,17 @@
 	    (if default-directory
 		(setq default-directory
                       (if (eq system-type 'windows-nt)
-                          ;; Convert backslashes to forward slashes.
-                          (expand-file-name
-                           (decode-coding-string default-directory coding t))
+                          ;; We pass the decoded default-directory as
+                          ;; the 2nd arg to make sure expand-file-name
+                          ;; sees a multibyte string as the default
+                          ;; directory; this avoids the side effect of
+                          ;; returning a unibyte string from
+                          ;; expand-file-name because it still sees
+                          ;; the undecoded value of default-directory.
+                          (let ((defdir (decode-coding-string default-directory
+                                                              coding t)))
+                            ;; Convert backslashes to forward slashes.
+                            (expand-file-name defdir defdir))
                         (decode-coding-string default-directory coding t))))))
 
 	;; Decode all the important variables and directory lists, now





  reply	other threads:[~2018-05-17 15:19 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-09 11:22 bug#30755: 25.3; Encoding of load-file-name wrong when path to working dir does not contain accented letter Márton Marczell
2018-03-09 13:46 ` Eli Zaretskii
     [not found]   ` <5aa2ff79.c786df0a.cebf4.23cd@mx.google.com>
2018-03-11 16:09     ` Eli Zaretskii
2018-03-17 14:19       ` bug#30755: 25.3; Encoding of load-file-name wrong when path toworking " Márton Marczell
2018-03-17 14:58         ` Eli Zaretskii
     [not found]           ` <CAChNUDE1RMe2SM9m0UJ2QdVYgCuewo_MWVhbPWKadd6pPm65=Q@mail.gmail.com>
     [not found]             ` <CAChNUDE+Et1DtkgS_4-f3EKVvvxbo4PVxPVgGuGP=rScUH=qng@mail.gmail.com>
2018-05-09  9:06               ` Márton Marczell
2018-05-09 17:28             ` Eli Zaretskii
2018-05-10 22:23               ` bug#30755: 25.3; Encoding of load-file-name wrong when pathtoworking " Márton Marczell
2018-05-15 17:22                 ` Eli Zaretskii
2018-05-16  7:42                   ` bug#30755: 25.3; Encoding of load-file-name wrong whenpathtoworking " Márton Marczell
2018-05-16  8:07                     ` Eli Zaretskii
2018-05-17 14:54                       ` bug#30755: 25.3; Encoding of load-file-name wrongwhenpathtoworking " Márton Marczell
2018-05-17 15:19                         ` Eli Zaretskii [this message]
2018-05-18 13:06                           ` Márton Marczell
2018-05-18 13:38                             ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=83tvr6ia2z.fsf@gnu.org \
    --to=eliz@gnu.org \
    --cc=30755@debbugs.gnu.org \
    --cc=dalokmarcinak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).