unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
* bug#13589: 24.2.92; wrongly escaped call-process arguments
@ 2013-01-30 16:36 Shigeru Fukaya
  2013-01-30 17:55 ` Eli Zaretskii
  0 siblings, 1 reply; 4+ messages in thread
From: Shigeru Fukaya @ 2013-01-30 16:36 UTC (permalink / raw)
  To: 13589

Hi,

`call-process' wrongly treats those multibyte characters whose
2nd byte is 0x5c.
It is bacause sys_spawnve in w32proc.c does bad quoting process.

Bad example:

(let ((coding-system-for-write 'cp932))
  (call-process "sh" nil t nil "-c" "ls d:/tmp/表*"))
ls: cannot access d:/tmp/表*: No such file or directory
2

Correct example:

(let ((coding-system-for-write 'cp932))
  (call-process "sh" nil t nil "-c" "ls d:/tmp/表*"))
d:/tmp/表
d:/tmp/表1

d:/tmp/表表:
abc
0

I tried fixing it (after w32.c as a reference).
Additionally, I set uninitialized `escape_char' to 0.

Note that coding of command lines is different from coding of process inputs.

Regards,
Shigeru


*** w32proc.c	Thu Jan 31 01:06:10 2013
--- w32proc.old.c	Sun Jan 27 11:32:55 2013
***************
*** 1419,1488 ****
    *nptr = NULL;
  }
  
- 
- /* Code copied from max_filename_mbslen in w32.c */
- 
- /* Current codepage for encoding command lines.  */
- static int command_line_codepage;
- 
- /* Return the maximum length in bytes of a multibyte character
-    sequence encoded in the current ANSI codepage.  This is required to
-    correctly walk the encoded file names one character at a time.  */
- static int
- max_cmdline_mbslen (void)
- {
-   /* A simple cache to avoid calling GetCPInfo every time we need to
-      normalize a file name.  The file-name encoding is not supposed to
-      be changed too frequently, if ever.  */
-   static Lisp_Object last_cmdline_encoding;
-   static int last_max_mbslen;
-   Lisp_Object current_encoding;
- 
-   current_encoding = Vcoding_system_for_write;
- 
-   if (!EQ (last_cmdline_encoding, current_encoding))
-     {
-       CPINFO cp_info;
- 
-       last_cmdline_encoding = current_encoding;
-       /* Default to the current ANSI codepage.  */
-       command_line_codepage = w32_ansi_code_page;
-       if (!NILP (current_encoding))
- 	{
- 	  char *cpname = SDATA (SYMBOL_NAME (current_encoding));
- 	  char *cp = NULL, *end;
- 	  int cpnum;
- 
- 	  if (strncmp (cpname, "cp", 2) == 0)
- 	    cp = cpname + 2;
- 	  else if (strncmp (cpname, "windows-", 8) == 0)
- 	    cp = cpname + 8;
- 
- 	  if (cp)
- 	    {
- 	      end = cp;
- 	      cpnum = strtol (cp, &end, 10);
- 	      if (cpnum && *end == '\0' && end - cp >= 2)
- 		command_line_codepage = cpnum;
- 	    }
- 	}
- 
-       if (!command_line_codepage)
- 	command_line_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */
- 
-       if (!GetCPInfo (command_line_codepage, &cp_info))
- 	{
- 	  command_line_codepage = CP_ACP;
- 	  if (!GetCPInfo (command_line_codepage, &cp_info))
- 	    emacs_abort ();
- 	}
-       last_max_mbslen = cp_info.MaxCharSize;
-     }
- 
-   return last_max_mbslen;
- }
- 
- 
  /* When a new child process is created we need to register it in our list,
     so intercept spawn requests.  */
  int
--- 1419,1424 ----
***************
*** 1495,1501 ****
    child_process *cp;
    int is_dos_app, is_cygnus_app, is_gui_app;
    int do_quoting = 0;
!   char escape_char = 0;
    /* We pass our process ID to our children by setting up an environment
       variable in their environment.  */
    char ppid_env_var_buffer[64];
--- 1431,1437 ----
    child_process *cp;
    int is_dos_app, is_cygnus_app, is_gui_app;
    int do_quoting = 0;
!   char escape_char;
    /* We pass our process ID to our children by setting up an environment
       variable in their environment.  */
    char ppid_env_var_buffer[64];
***************
*** 1508,1514 ****
       Some extra whitespace characters need quoting in Cygwin programs,
       so this list is conditionally modified below.  */
    char *sepchars = " \t*?";
-   int dbcs_p = max_cmdline_mbslen () > 1;
  
    /* We don't care about the other modes */
    if (mode != _P_NOWAIT)
--- 1444,1449 ----
***************
*** 1622,1628 ****
  
        if (*p == 0)
  	need_quotes = 1;
!       for ( ; *p; p = !dbcs_p ? p + 1 : CharNextExA (command_line_codepage, p, 0))
  	{
  	  if (escape_char == '"' && *p == '\\')
  	    /* If it's a Cygwin app, \ needs to be escaped.  */
--- 1557,1563 ----
  
        if (*p == 0)
  	need_quotes = 1;
!       for ( ; *p; p++)
  	{
  	  if (escape_char == '"' && *p == '\\')
  	    /* If it's a Cygwin app, \ needs to be escaped.  */
***************
*** 1674,1680 ****
  
        if (do_quoting)
  	{
! 	  for ( ; *p; p = !dbcs_p ? p + 1 : CharNextExA (command_line_codepage, p, 0))
  	    if ((strchr (sepchars, *p) != NULL) || *p == '"')
  	      need_quotes = 1;
  	}
--- 1609,1615 ----
  
        if (do_quoting)
  	{
! 	  for ( ; *p; p++)
  	    if ((strchr (sepchars, *p) != NULL) || *p == '"')
  	      need_quotes = 1;
  	}
***************
*** 1702,1708 ****
  	      *parg++ = *p++;
  	    }
  #else
! 	  while (*p)
  	    {
  	      if (*p == '"')
  		{
--- 1637,1643 ----
  	      *parg++ = *p++;
  	    }
  #else
! 	  for ( ; *p; p++)
  	    {
  	      if (*p == '"')
  		{
***************
*** 1717,1736 ****
  		}
  	      else if (escape_char == '"' && *p == '\\')
  		*parg++ = '\\';
  
  	      if (*p == escape_char && escape_char != '"')
  		escape_char_run++;
  	      else
  		escape_char_run = 0;
- 	      if (!dbcs_p)
- 		*parg++ = *p++;
- 	      else
- 		{
- 		  char *p1 = CharNextExA (command_line_codepage, p, 0);
- 
- 		  while (p < p1)
- 		    *parg++ = *p++;
- 		}
  	    }
  	  /* double escape chars before enclosing quote */
  	  while (escape_char_run > 0)
--- 1652,1663 ----
  		}
  	      else if (escape_char == '"' && *p == '\\')
  		*parg++ = '\\';
+ 	      *parg++ = *p;
  
  	      if (*p == escape_char && escape_char != '"')
  		escape_char_run++;
  	      else
  		escape_char_run = 0;
  	    }
  	  /* double escape chars before enclosing quote */
  	  while (escape_char_run > 0)





^ permalink raw reply	[flat|nested] 4+ messages in thread

* bug#13589: 24.2.92; wrongly escaped call-process arguments
  2013-01-30 16:36 bug#13589: 24.2.92; wrongly escaped call-process arguments Shigeru Fukaya
@ 2013-01-30 17:55 ` Eli Zaretskii
  2013-02-01  4:50   ` Shigeru Fukaya
  0 siblings, 1 reply; 4+ messages in thread
From: Eli Zaretskii @ 2013-01-30 17:55 UTC (permalink / raw)
  To: Shigeru Fukaya; +Cc: 13589

> From: Shigeru Fukaya <shigeru.fukaya@gmail.com>
> Date: Thu, 31 Jan 2013 01:36:35 +0900
> 
> `call-process' wrongly treats those multibyte characters whose
> 2nd byte is 0x5c.
> It is bacause sys_spawnve in w32proc.c does bad quoting process.
> 
> Bad example:
> 
> (let ((coding-system-for-write 'cp932))
>   (call-process "sh" nil t nil "-c" "ls d:/tmp/表*"))
> ls: cannot access d:/tmp/表*: No such file or directory
> 2
> 
> Correct example:
> 
> (let ((coding-system-for-write 'cp932))
>   (call-process "sh" nil t nil "-c" "ls d:/tmp/表*"))
> d:/tmp/表
> d:/tmp/表1
> 
> d:/tmp/表表:
> abc
> 0
> 
> I tried fixing it (after w32.c as a reference).
> Additionally, I set uninitialized `escape_char' to 0.

Thanks for the report and the patch.

Unfortunately, I don't think the solution you suggest is the right
one.  It is one thing to support only Windows codepages in encoding of
file names, as w32.c does: after all, that's the limitation of the OS
APIs we use.  But imposing the same limitation for encoding
command-line arguments to subprocesses is something entirely
different.  E.g., consider such trivial example as invoking a speller:
the encoding used to pass non-ASCII characters on the command line in
this case might have nothing at all to do with Windows codepages.  We
can no longer justify not supporting shift_jis in this case, for
example.  IOW, using CharNextExA here would impose an unreasonably bad
restriction on how subprocesses can be invoked from Emacs.

In addition, without also fixing cmdproxy, which does similar things,
this is not going to solve the problem, except in a few select cases.
E.g., I think it's enough for you to use shell-file-name instead of
"sh" to have the command be handed to cmdproxy by default, and then
you'll have the same problem there.

We need a better, less restricted and more thorough solution.  I don't
yet know which one.

Thanks.





^ permalink raw reply	[flat|nested] 4+ messages in thread

* bug#13589: 24.2.92; wrongly escaped call-process arguments
  2013-01-30 17:55 ` Eli Zaretskii
@ 2013-02-01  4:50   ` Shigeru Fukaya
  2013-02-01 10:12     ` Eli Zaretskii
  0 siblings, 1 reply; 4+ messages in thread
From: Shigeru Fukaya @ 2013-02-01  4:50 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: 13589

Thank you for your reply.

Why I used CharNextExA is I can't find such functions for encoded
strings in Emacs.  Possible alternative is to change the quoting
process out of binary to elisp code.  Or provide some hook for it?

And, anyway, could you please initialize 'escape_char', so that we can
safely set `w32-quote-process-args' to nil and do some quoting by
ourselves (using advise or something).

Shigeru

>> From: Shigeru Fukaya <shigeru.fukaya@gmail.com>
>> Date: Thu, 31 Jan 2013 01:36:35 +0900
>> 
>> `call-process' wrongly treats those multibyte characters whose
>> 2nd byte is 0x5c.
>> It is bacause sys_spawnve in w32proc.c does bad quoting process.
>> 
>> Bad example:
>> 
>> (let ((coding-system-for-write 'cp932))
>>   (call-process "sh" nil t nil "-c" "ls d:/tmp/表*"))
>> ls: cannot access d:/tmp/表*: No such file or directory
>> 2
>> 
>> Correct example:
>> 
>> (let ((coding-system-for-write 'cp932))
>>   (call-process "sh" nil t nil "-c" "ls d:/tmp/表*"))
>> d:/tmp/表
>> d:/tmp/表1
>> 
>> d:/tmp/表表:
>> abc
>> 0
>> 
>> I tried fixing it (after w32.c as a reference).
>> Additionally, I set uninitialized `escape_char' to 0.
>
>Thanks for the report and the patch.
>
>Unfortunately, I don't think the solution you suggest is the right
>one.  It is one thing to support only Windows codepages in encoding of
>file names, as w32.c does: after all, that's the limitation of the OS
>APIs we use.  But imposing the same limitation for encoding
>command-line arguments to subprocesses is something entirely
>different.  E.g., consider such trivial example as invoking a speller:
>the encoding used to pass non-ASCII characters on the command line in
>this case might have nothing at all to do with Windows codepages.  We
>can no longer justify not supporting shift_jis in this case, for
>example.  IOW, using CharNextExA here would impose an unreasonably bad
>restriction on how subprocesses can be invoked from Emacs.
>
>In addition, without also fixing cmdproxy, which does similar things,
>this is not going to solve the problem, except in a few select cases.
>E.g., I think it's enough for you to use shell-file-name instead of
>"sh" to have the command be handed to cmdproxy by default, and then
>you'll have the same problem there.
>
>We need a better, less restricted and more thorough solution.  I don't
>yet know which one.
>
>Thanks.





^ permalink raw reply	[flat|nested] 4+ messages in thread

* bug#13589: 24.2.92; wrongly escaped call-process arguments
  2013-02-01  4:50   ` Shigeru Fukaya
@ 2013-02-01 10:12     ` Eli Zaretskii
  0 siblings, 0 replies; 4+ messages in thread
From: Eli Zaretskii @ 2013-02-01 10:12 UTC (permalink / raw)
  To: Shigeru Fukaya; +Cc: 13589

> From: Shigeru Fukaya <shigeru.fukaya@gmail.com>
> Cc: 13589@debbugs.gnu.org
> Date: Fri, 01 Feb 2013 13:50:13 +0900
> 
> Why I used CharNextExA is I can't find such functions for encoded
> strings in Emacs.

There aren't any, except when the strings are encoded in UTF-8.

> Possible alternative is to change the quoting
> process out of binary to elisp code.  Or provide some hook for it?

We cannot move this to Lisp, I think, because C code calls
Fcall_process directly from C.

I think the right place for this processing is in Fcall_process,
around line 425 of callproc.c, where we encode the command-line
arguments and stuff them into new_argv[] array, to be passed to
child_setup.  Each argument should be quoted _before_ it is encoded,
e.g., by running it through a w32-specific function.  Since the Emacs
internal representation of characters is based on UTF-8, the problem
with backslashes incorrectly interpreted cannot happen if we do this
processing before encoding the result.  We can then remove most of the
code in sys_spawnve that quotes special characters, except for quoting
argv[0] if it includes whitespace.

> And, anyway, could you please initialize 'escape_char'

Done in revision 111212 on the emacs-24 branch.

> so that we can safely set `w32-quote-process-args' to nil and do
> some quoting by ourselves (using advise or something).

Note that, even if w32-quote-process-args is nil, the current code in
sys_spawnve can still quote some parts of the command arguments, e.g.,
if an argument is empty.  So initializing escape_char to zero is not
TRT, IMO; I initialized it to '\' instead.

Thanks.





^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-02-01 10:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-30 16:36 bug#13589: 24.2.92; wrongly escaped call-process arguments Shigeru Fukaya
2013-01-30 17:55 ` Eli Zaretskii
2013-02-01  4:50   ` Shigeru Fukaya
2013-02-01 10:12     ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).