bug#8254: race condition in dired.c's scmp function

unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed

* bug#8254: race condition in dired.c's scmp function
@ 2011-03-15  6:16 Paul Eggert
  2011-03-15  7:06 ` Eli Zaretskii
  2011-03-17 16:56 ` Paul Eggert
  0 siblings, 2 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-15  6:16 UTC (permalink / raw)
  To: 8254

The following code in the Emacs trunk src/dired.c's scmp function has
undefined behavior:

      while (l
	     && (DOWNCASE ((unsigned char) *s1++)
		 == DOWNCASE ((unsigned char) *s2++)))
	l--;

Because the DOWNCASE macro assigns to the global variables case_temp1
and case_temp2, (DOWNCASE (x) == DOWNCASE (y)) is not valid, as the
assignments can collide and lead to a race condition.

This bug was found by gcc -Wsequence-point (GCC 4.5.2), which warns:

  dired.c:799:7: error: operation on 'case_temp2' may be undefined [-Wsequence-point]
  dired.c:799:7: error: operation on 'case_temp1' may be undefined [-Wsequence-point]

I plan to work around the problem with something like the following
patch.

----

Fix a race condition diagnosed by gcc -Wsequence-point.
The expression (DOWNCASE ((unsigned char) *s1++) ==
DOWNCASE ((unsigned char) *s2++)), found in dired.c's scmp
function, had undefined behavior.
* lisp.h (DOWNCASE_TABLE, UPCASE_TABLE, DOWNCASE, UPPERCASEP):
(NOCASEP, LOWERCASEP, UPCASE, UPCASE1): Move from here ...
* buffer.h: ... to here, because these macros use current_buffer,
and the new implementation with inline functions needs to have
current_buffer in scope now, rather than later when the macros
are used.
(downcase, upcase1): New static inline functions.
(DOWNCASE, UPCASE1): Reimplement using these functions.
This avoids undefined behavior in expressions like
DOWNCASE (x) == DOWNCASE (y), which previously suffered
from race conditions in accessing the global variables
case_temp1 and case_temp2.
* casetab.c (case_temp1, case_temp2): Remove; no longer needed.
* lisp.h (case_temp1, case_temp2): Remove their decls.
* character.h (ASCII_CHAR_P): Move from here ...
* lisp.h: ... to here, so that the inline functions mentioned
above can use them.
=== modified file 'src/buffer.h'
--- src/buffer.h	2011-03-13 22:25:16 +0000
+++ src/buffer.h	2011-03-15 05:52:48 +0000
@@ -1026,4 +1026,47 @@

 #define PER_BUFFER_VALUE(BUFFER, OFFSET) \
       (*(Lisp_Object *)((OFFSET) + (char *) (BUFFER)))
-
+\f
+/* Current buffer's map from characters to lower-case characters.  */
+
+#define DOWNCASE_TABLE BVAR (current_buffer, downcase_table)
+
+/* Current buffer's map from characters to upper-case characters.  */
+
+#define UPCASE_TABLE BVAR (current_buffer, upcase_table)
+
+/* Downcase a character, or make no change if that cannot be done.  */
+
+static inline EMACS_INT
+downcase (int ch)
+{
+  Lisp_Object down = CHAR_TABLE_REF (DOWNCASE_TABLE, ch);
+  return NATNUMP (down) ? XFASTINT (down) : ch;
+}
+#define DOWNCASE(CH) downcase (CH)
+
+/* 1 if CH is upper case.  */
+
+#define UPPERCASEP(CH) (DOWNCASE (CH) != (CH))
+
+/* 1 if CH is neither upper nor lower case.  */
+
+#define NOCASEP(CH) (UPCASE1 (CH) == (CH))
+
+/* 1 if CH is lower case.  */
+
+#define LOWERCASEP(CH) (!UPPERCASEP (CH) && !NOCASEP(CH))
+
+/* Upcase a character, or make no change if that cannot be done.  */
+
+#define UPCASE(CH) (!UPPERCASEP (CH) ? UPCASE1 (CH) : (CH))
+
+/* Upcase a character known to be not upper case.  */
+
+static inline EMACS_INT
+upcase1 (int ch)
+{
+  Lisp_Object up = CHAR_TABLE_REF (UPCASE_TABLE, ch);
+  return NATNUMP (up) ? XFASTINT (up) : ch;
+}
+#define UPCASE1(CH) upcase1 (CH)

=== modified file 'src/casetab.c'
--- src/casetab.c	2011-02-16 15:02:50 +0000
+++ src/casetab.c	2011-03-15 04:01:35 +0000
@@ -28,11 +28,6 @@
 Lisp_Object Vascii_downcase_table, Vascii_upcase_table;
 Lisp_Object Vascii_canon_table, Vascii_eqv_table;

-/* Used as a temporary in DOWNCASE and other macros in lisp.h.  No
-   need to mark it, since it is used only very temporarily.  */
-int case_temp1;
-Lisp_Object case_temp2;
-
 static void set_canon (Lisp_Object case_table, Lisp_Object range, Lisp_Object elt);
 static void set_identity (Lisp_Object table, Lisp_Object c, Lisp_Object elt);
 static void shuffle (Lisp_Object table, Lisp_Object c, Lisp_Object elt);
@@ -302,4 +297,3 @@
   defsubr (&Sset_case_table);
   defsubr (&Sset_standard_case_table);
 }
-

=== modified file 'src/character.h'
--- src/character.h	2011-03-08 04:37:19 +0000
+++ src/character.h	2011-03-15 05:52:52 +0000
@@ -128,9 +128,6 @@
     XSETCDR ((x), tmp);			\
   } while (0)

-/* Nonzero iff C is an ASCII character.  */
-#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
-
 /* Nonzero iff C is a character of code less than 0x100.  */
 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)


=== modified file 'src/lisp.h'
--- src/lisp.h	2011-03-15 01:32:33 +0000
+++ src/lisp.h	2011-03-15 04:23:51 +0000
@@ -841,6 +841,9 @@

 #endif	/* not __GNUC__ */

+/* Nonzero iff C is an ASCII character.  */
+#define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
+
 /* Almost equivalent to Faref (CT, IDX) with optimization for ASCII
    characters.  Do not check validity of CT.  */
 #define CHAR_TABLE_REF(CT, IDX)					\
@@ -2041,50 +2044,6 @@

 #define QUITP (!NILP (Vquit_flag) && NILP (Vinhibit_quit))
 \f
-/* Variables used locally in the following case handling macros.  */
-extern int case_temp1;
-extern Lisp_Object case_temp2;
-
-/* Current buffer's map from characters to lower-case characters.  */
-
-#define DOWNCASE_TABLE BVAR (current_buffer, downcase_table)
-
-/* Current buffer's map from characters to upper-case characters.  */
-
-#define UPCASE_TABLE BVAR (current_buffer, upcase_table)
-
-/* Downcase a character, or make no change if that cannot be done.  */
-
-#define DOWNCASE(CH)						\
-  ((case_temp1 = (CH),						\
-    case_temp2 = CHAR_TABLE_REF (DOWNCASE_TABLE, case_temp1),	\
-    NATNUMP (case_temp2))					\
-   ? XFASTINT (case_temp2) : case_temp1)
-
-/* 1 if CH is upper case.  */
-
-#define UPPERCASEP(CH) (DOWNCASE (CH) != (CH))
-
-/* 1 if CH is neither upper nor lower case.  */
-
-#define NOCASEP(CH) (UPCASE1 (CH) == (CH))
-
-/* 1 if CH is lower case.  */
-
-#define LOWERCASEP(CH) (!UPPERCASEP (CH) && !NOCASEP(CH))
-
-/* Upcase a character, or make no change if that cannot be done.  */
-
-#define UPCASE(CH) (!UPPERCASEP (CH) ? UPCASE1 (CH) : (CH))
-
-/* Upcase a character known to be not upper case.  */
-
-#define UPCASE1(CH)						\
-  ((case_temp1 = (CH),						\
-    case_temp2 = CHAR_TABLE_REF (UPCASE_TABLE, case_temp1),	\
-    NATNUMP (case_temp2))					\
-   ? XFASTINT (case_temp2) : case_temp1)
-
 extern Lisp_Object Vascii_downcase_table, Vascii_upcase_table;
 extern Lisp_Object Vascii_canon_table, Vascii_eqv_table;
 \f






^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15  6:16 bug#8254: race condition in dired.c's scmp function Paul Eggert
@ 2011-03-15  7:06 ` Eli Zaretskii
  2011-03-15  7:31   ` Paul Eggert
  2011-03-17 16:56 ` Paul Eggert
  1 sibling, 1 reply; 13+ messages in thread
From: Eli Zaretskii @ 2011-03-15  7:06 UTC (permalink / raw)
  To: Paul Eggert; +Cc: 8254

> Date: Mon, 14 Mar 2011 23:16:26 -0700
> From: Paul Eggert <eggert@cs.ucla.edu>
> Cc: 
> 
> The following code in the Emacs trunk src/dired.c's scmp function has
> undefined behavior:
> 
>       while (l
> 	     && (DOWNCASE ((unsigned char) *s1++)
> 		 == DOWNCASE ((unsigned char) *s2++)))
> 	l--;
> 
> Because the DOWNCASE macro assigns to the global variables case_temp1
> and case_temp2, (DOWNCASE (x) == DOWNCASE (y)) is not valid, as the
> assignments can collide and lead to a race condition.
> [...]
> I plan to work around the problem with something like the following
> patch.

Whew!  How about a much simpler fix:

  while (l
  	 && (c1 = DOWNCASE ((unsigned char) *s1++),
	     c2 = DOWNCASE ((unsigned char) *s2++),
	     c1 == c2))
    l--;

(with suitable declarations of c1 and c2)?  Will that fix the
undefined behavior?





^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15  7:06 ` Eli Zaretskii
@ 2011-03-15  7:31   ` Paul Eggert
  2011-03-15 10:50     ` Eli Zaretskii
  2011-03-15 11:36     ` Juanma Barranquero
  0 siblings, 2 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-15  7:31 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: 8254

On 03/15/2011 12:06 AM, Eli Zaretskii wrote:
> How about a much simpler fix:
> 
>   while (l
>   	 && (c1 = DOWNCASE ((unsigned char) *s1++),
> 	     c2 = DOWNCASE ((unsigned char) *s2++),
> 	     c1 == c2))
>     l--;
> 
> (with suitable declarations of c1 and c2)?  Will that fix the
> undefined behavior?

Yes.  But surely it's better to fix the problem so that
usage of DOWNCASE is less error-prone.  Generally speaking,
code is easier to read and contains fewer errors
when function-like macros act like functions.





^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15  7:31   ` Paul Eggert
@ 2011-03-15 10:50     ` Eli Zaretskii
  2011-03-15 16:53       ` Paul Eggert
  2011-03-15 11:36     ` Juanma Barranquero
  1 sibling, 1 reply; 13+ messages in thread
From: Eli Zaretskii @ 2011-03-15 10:50 UTC (permalink / raw)
  To: Paul Eggert; +Cc: 8254

> Date: Tue, 15 Mar 2011 00:31:51 -0700
> From: Paul Eggert <eggert@cs.ucla.edu>
> CC: 8254@debbugs.gnu.org
> 
> On 03/15/2011 12:06 AM, Eli Zaretskii wrote:
> > How about a much simpler fix:
> > 
> >   while (l
> >   	 && (c1 = DOWNCASE ((unsigned char) *s1++),
> > 	     c2 = DOWNCASE ((unsigned char) *s2++),
> > 	     c1 == c2))
> >     l--;
> > 
> > (with suitable declarations of c1 and c2)?  Will that fix the
> > undefined behavior?
> 
> Yes.  But surely it's better to fix the problem so that
> usage of DOWNCASE is less error-prone.  Generally speaking,
> code is easier to read and contains fewer errors
> when function-like macros act like functions.

Maybe, but I wonder if there's a better solution even if we decide to
make these macros functions: I don't like to have the same static
function in every file that includes buffer.h, on platforms that don't
support inline functions.

Anyway, this is Stefan's and Chong's call.  I just voiced my
astonishment that such a simple problem needs such a jumbo change.





^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15  7:31   ` Paul Eggert
  2011-03-15 10:50     ` Eli Zaretskii
@ 2011-03-15 11:36     ` Juanma Barranquero
  2011-03-15 16:52       ` Paul Eggert
  2011-03-15 16:58       ` Paul Eggert
  1 sibling, 2 replies; 13+ messages in thread
From: Juanma Barranquero @ 2011-03-15 11:36 UTC (permalink / raw)
  To: Paul Eggert; +Cc: 8254

On Tue, Mar 15, 2011 at 08:31, Paul Eggert <eggert@cs.ucla.edu> wrote:

> Generally speaking,
> code is easier to read and contains fewer errors
> when function-like macros act like functions.

That's true, but then there's adding complexity when it is not needed.

In this case, it is hard to know whether it is needed or not. On one
hand, the only suspicious use of DOWNCASE is the one you pointed out.

On the other hand, DOWNCASE begat UPPERCASEP, who begat LOWERCASEP and
UPCASE (and also ISUPPER in regex.c). Not to mention UPCASE1, which
uses the same variables and begat UPCASE (with LOWERCASEP) and
NOCASEP... Though most of these cases use ?: and && and not ==, it is
certainly tedious to check every instance of these macros.

A (perhaps stupid) idea: would it be possible to define
-DENABLE-CHECKING alternate versions of DOWNCASE and UPCASE1 which do
some additional checking for side effects? That would allow finding
the possible bugs during development.

    Juanma

^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15 11:36     ` Juanma Barranquero
@ 2011-03-15 16:52       ` Paul Eggert
  2011-03-15 16:58       ` Paul Eggert
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-15 16:52 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: 8254

On 03/15/2011 04:36 AM, Juanma Barranquero wrote:

> there's adding complexity when it is not needed.

The patch subtracts complexity in one place (by removing global
variables) and adds it in another (by creating static inline
functions).  Whether the overall effect is to decrease complexity,
or to increase it, is debatable.  Either way, it's not much of
a change in complexity.

There are efforts underway to make Emacs multithreaded.  If that
happens, a change like this will be needed, as the existing
code is obviously not thread-safe.  I don't see any real downside
to installing this change in the trunk now.

> A (perhaps stupid) idea: would it be possible to define
> -DENABLE-CHECKING alternate versions of DOWNCASE and UPCASE1 which do
> some additional checking for side effects?

I plan to implement that sort of suggestion, but in a different
way, by adding an --enable-gcc-warnings option to 'configure',
which will cause it to pass extra options to GCC to catch
this sort of problem.

This option is already in used in several other projects, such
as GNU coreutils, and Emacs would benefit from it as well.
The option will be disabled by default, though, so that the warnings
don't surprise people who don't expect them.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15 10:50     ` Eli Zaretskii
@ 2011-03-15 16:53       ` Paul Eggert
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-15 16:53 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: 8254

On 03/15/2011 03:50 AM, Eli Zaretskii wrote:

> I wonder if there's a better solution even if we decide to
> make these macros functions

We could move these macros to a new include file, and have it
be included only by .c files that need the macros.  That would
be easy to do; the only real cost is that of having another
include file to worry about.

> I don't like to have the same static function in every file that
> includes buffer.h, on platforms that don't support inline functions.

These days, it's routine for compilers to inline.  For
old fashioned compilers that don't inline, it's routine to optimize
away static functions that are never used.  So, from an optimization
viewpoint, this problem is relatively unimportant.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15 11:36     ` Juanma Barranquero
  2011-03-15 16:52       ` Paul Eggert
@ 2011-03-15 16:58       ` Paul Eggert
  2011-03-15 19:13         ` Stefan Monnier
  2011-03-16 13:19         ` Richard Stallman
  1 sibling, 2 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-15 16:58 UTC (permalink / raw)
  To: Juanma Barranquero; +Cc: 8254

On 03/15/2011 04:36 AM, Juanma Barranquero wrote:
  
> On the other hand, DOWNCASE begat UPPERCASEP, who begat LOWERCASEP and
> UPCASE (and also ISUPPER in regex.c). Not to mention UPCASE1, which
> uses the same variables and begat UPCASE (with LOWERCASEP) and
> NOCASEP... Though most of these cases use ?: and&&  and not ==, it is
> certainly tedious to check every instance of these macros.

I agree; and not only is it tedious, it's error-prone.  It's better
to fix the macros so that there's no need to check, as follows.  While
we're at it, we should simply get rid of the macros, by replacing
every use of UPPERCASEP with uppercasep, etc.

=== modified file 'src/buffer.h'
--- src/buffer.h	2011-03-15 07:04:00 +0000
+++ src/buffer.h	2011-03-15 07:28:00 +0000
@@ -1047,19 +1047,12 @@
  
  /* 1 if CH is upper case.  */
  
-#define UPPERCASEP(CH) (DOWNCASE (CH) != (CH))
-
-/* 1 if CH is neither upper nor lower case.  */
-
-#define NOCASEP(CH) (UPCASE1 (CH) == (CH))
-
-/* 1 if CH is lower case.  */
-
-#define LOWERCASEP(CH) (!UPPERCASEP (CH) && !NOCASEP(CH))
-
-/* Upcase a character, or make no change if that cannot be done.  */
-
-#define UPCASE(CH) (!UPPERCASEP (CH) ? UPCASE1 (CH) : (CH))
+static inline int
+uppercasep (int ch)
+{
+  return downcase (ch) != ch;
+}
+#define UPPERCASEP(CH) uppercasep (CH)
  
  /* Upcase a character known to be not upper case.  */
  
@@ -1070,3 +1063,30 @@
    return NATNUMP (up) ? XFASTINT (up) : ch;
  }
  #define UPCASE1(CH) upcase1 (CH)
+
+/* 1 if CH is neither upper nor lower case.  */
+
+static inline int
+nocasep (int ch)
+{
+  return upcase1 (ch) == ch;
+}
+#define NOCASEP(CH) nocasep (CH)
+
+/* 1 if CH is lower case.  */
+
+static inline int
+lowercasep (int ch)
+{
+  return !uppercasep (ch) && !nocasep (ch);
+}
+#define LOWERCASEP(CH) lowercasep (CH)
+
+/* Upcase a character, or make no change if that cannot be done.  */
+
+static inline EMACS_INT
+upcase (int ch)
+{
+  return uppercasep (ch) ? ch : upcase1 (ch);
+}
+#define UPCASE(CH) upcase (CH)







^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15 16:58       ` Paul Eggert
@ 2011-03-15 19:13         ` Stefan Monnier
  2011-03-15 21:27           ` Paul Eggert
  2011-03-16 13:19         ` Richard Stallman
  1 sibling, 1 reply; 13+ messages in thread
From: Stefan Monnier @ 2011-03-15 19:13 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Juanma Barranquero, 8254

> I agree; and not only is it tedious, it's error-prone.  It's better
> to fix the macros so that there's no need to check, as follows.  While
> we're at it, we should simply get rid of the macros, by replacing
> every use of UPPERCASEP with uppercasep, etc.

If we're turning those macros into functions, then I indeed think we
should get rid of the macros.  I would also welcome those functions
being real one-liners, as in:

static inline int uppercasep (int ch) { return downcase (ch) != ch; }


        Stefan





^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15 19:13         ` Stefan Monnier
@ 2011-03-15 21:27           ` Paul Eggert
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-15 21:27 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Juanma Barranquero, 8254

On 03/15/2011 12:13 PM, Stefan Monnier wrote:

> If we're turning those macros into functions, then I indeed think we
> should get rid of the macros.  I would also welcome those functions
> being real one-liners

OK, thanks, then I'll add the following patch to my planned change:

=== modified file 'src/ChangeLog'
--- src/ChangeLog	2011-03-15 18:53:29 +0000
+++ src/ChangeLog	2011-03-15 21:23:54 +0000
@@ -1,5 +1,16 @@
  2011-03-15  Paul Eggert  <eggert@cs.ucla.edu>
  
+	Use functions, not macros, for up- and down-casing (Bug#8254).
+	* buffer.h (DOWNCASE_TABLE, UPCASE_TABLE, DOWNCASE, UPPERCASEP):
+	(NOCASEP, LOWERCASEP, UPCASE, UPCASE1): Remove.  All callers changed
+	to use the following functions instead of these macros.
+	(downcase): Adjust to lack of DOWNCASE_TABLE.  Return int, not
+	EMACS_INT, since callers assume the returned value fits in int.
+	(upcase1): Likewise, for UPCASE_TABLE.
+	(uppercasep, lowercasep, upcase): New static inline functions.
+	* editfns.c (Fchar_equal): Remove no-longer-needed workaround for
+	the race-condition problem in the old DOWNCASE.
+
  	* regex.c (CHARSET_LOOKUP_RANGE_TABLE_RAW, POP_FAILURE_REG_OR_COUNT):
  	Rename locals to avoid shadowing.
  	(regex_compile, re_match_2_internal): Move locals to avoid shadowing.

=== modified file 'src/buffer.h'
--- src/buffer.h	2011-03-15 07:04:00 +0000
+++ src/buffer.h	2011-03-15 21:14:06 +0000
@@ -1027,46 +1027,30 @@
  #define PER_BUFFER_VALUE(BUFFER, OFFSET) \
        (*(Lisp_Object *)((OFFSET) + (char *) (BUFFER)))
  \f
-/* Current buffer's map from characters to lower-case characters.  */
-
-#define DOWNCASE_TABLE BVAR (current_buffer, downcase_table)
-
-/* Current buffer's map from characters to upper-case characters.  */
-
-#define UPCASE_TABLE BVAR (current_buffer, upcase_table)
-
-/* Downcase a character, or make no change if that cannot be done.  */
-
-static inline EMACS_INT
-downcase (int ch)
-{
-  Lisp_Object down = CHAR_TABLE_REF (DOWNCASE_TABLE, ch);
-  return NATNUMP (down) ? XFASTINT (down) : ch;
-}
-#define DOWNCASE(CH) downcase (CH)
-
-/* 1 if CH is upper case.  */
-
-#define UPPERCASEP(CH) (DOWNCASE (CH) != (CH))
-
-/* 1 if CH is neither upper nor lower case.  */
-
-#define NOCASEP(CH) (UPCASE1 (CH) == (CH))
-
-/* 1 if CH is lower case.  */
-
-#define LOWERCASEP(CH) (!UPPERCASEP (CH) && !NOCASEP(CH))
-
-/* Upcase a character, or make no change if that cannot be done.  */
-
-#define UPCASE(CH) (!UPPERCASEP (CH) ? UPCASE1 (CH) : (CH))
-
-/* Upcase a character known to be not upper case.  */
-
-static inline EMACS_INT
-upcase1 (int ch)
-{
-  Lisp_Object up = CHAR_TABLE_REF (UPCASE_TABLE, ch);
-  return NATNUMP (up) ? XFASTINT (up) : ch;
-}
-#define UPCASE1(CH) upcase1 (CH)
+/* Downcase a character C, or make no change if that cannot be done.  */
+static inline int
+downcase (int c)
+{
+  Lisp_Object downcase_table = BVAR (current_buffer, downcase_table);
+  Lisp_Object down = CHAR_TABLE_REF (downcase_table, c);
+  return NATNUMP (down) ? XFASTINT (down) : c;
+}
+
+/* 1 if C is upper case.  */
+static inline int uppercasep (int c) { return downcase (c) != c; }
+
+/* Upcase a character C known to be not upper case.  */
+static inline int
+upcase1 (int c)
+{
+  Lisp_Object upcase_table = BVAR (current_buffer, upcase_table);
+  Lisp_Object up = CHAR_TABLE_REF (upcase_table, c);
+  return NATNUMP (up) ? XFASTINT (up) : c;
+}
+
+/* 1 if C is lower case.  */
+static inline int lowercasep (int c)
+{ return !uppercasep (c) && upcase1 (c) != c; }
+
+/* Upcase a character C, or make no change if that cannot be done.  */
+static inline int upcase (int c) { return uppercasep (c) ? c : upcase1 (c); }

=== modified file 'src/casefiddle.c'
--- src/casefiddle.c	2011-03-15 17:18:02 +0000
+++ src/casefiddle.c	2011-03-15 21:14:06 +0000
@@ -64,13 +64,13 @@
  	multibyte = 1;
        if (! multibyte)
  	MAKE_CHAR_MULTIBYTE (c1);
-      c = DOWNCASE (c1);
+      c = downcase (c1);
        if (inword)
  	XSETFASTINT (obj, c | flags);
        else if (c == (XFASTINT (obj) & ~flagbits))
  	{
  	  if (! inword)
-	    c = UPCASE1 (c1);
+	    c = upcase1 (c1);
  	  if (! multibyte)
  	    MAKE_CHAR_UNIBYTE (c);
  	  XSETFASTINT (obj, c | flags);
@@ -92,10 +92,10 @@
  	  MAKE_CHAR_MULTIBYTE (c);
  	  c1 = c;
  	  if (inword && flag != CASE_CAPITALIZE_UP)
-	    c = DOWNCASE (c);
-	  else if (!UPPERCASEP (c)
+	    c = downcase (c);
+	  else if (!uppercasep (c)
  		   && (!inword || flag != CASE_CAPITALIZE_UP))
-	    c = UPCASE1 (c1);
+	    c = upcase1 (c1);
  	  if ((int) flag >= (int) CASE_CAPITALIZE)
  	    inword = (SYNTAX (c) == Sword);
  	  if (c != c1)
@@ -133,10 +133,10 @@
  	    }
  	  c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
  	  if (inword && flag != CASE_CAPITALIZE_UP)
-	    c = DOWNCASE (c);
-	  else if (!UPPERCASEP (c)
+	    c = downcase (c);
+	  else if (!uppercasep (c)
  		   && (!inword || flag != CASE_CAPITALIZE_UP))
-	    c = UPCASE1 (c);
+	    c = upcase1 (c);
  	  if ((int) flag >= (int) CASE_CAPITALIZE)
  	    inword = (SYNTAX (c) == Sword);
  	  o += CHAR_STRING (c, o);
@@ -243,10 +243,10 @@
  	}
        c2 = c;
        if (inword && flag != CASE_CAPITALIZE_UP)
-	c = DOWNCASE (c);
-      else if (!UPPERCASEP (c)
+	c = downcase (c);
+      else if (!uppercasep (c)
  	       && (!inword || flag != CASE_CAPITALIZE_UP))
-	c = UPCASE1 (c);
+	c = upcase1 (c);
        if ((int) flag >= (int) CASE_CAPITALIZE)
  	inword = ((SYNTAX (c) == Sword)
  		  && (inword || !syntax_prefix_flag_p (c)));

=== modified file 'src/dired.c'
--- src/dired.c	2011-03-15 18:08:06 +0000
+++ src/dired.c	2011-03-15 21:14:06 +0000
@@ -790,8 +790,8 @@
    if (completion_ignore_case)
      {
        while (l
-	     && (DOWNCASE ((unsigned char) *s1++)
-		 == DOWNCASE ((unsigned char) *s2++)))
+	     && (downcase ((unsigned char) *s1++)
+		 == downcase ((unsigned char) *s2++)))
  	l--;
      }
    else

=== modified file 'src/editfns.c'
--- src/editfns.c	2011-03-13 06:27:18 +0000
+++ src/editfns.c	2011-03-15 21:23:02 +0000
@@ -1374,7 +1374,7 @@
        memcpy (r, p, q - p);
        r[q - p] = 0;
        strcat (r, SSDATA (login));
-      r[q - p] = UPCASE ((unsigned char) r[q - p]);
+      r[q - p] = upcase ((unsigned char) r[q - p]);
        strcat (r, q + 1);
        full = build_string (r);
      }
@@ -4213,7 +4213,7 @@
  {
    int i1, i2;
    /* Check they're chars, not just integers, otherwise we could get array
-     bounds violations in DOWNCASE.  */
+     bounds violations in downcase.  */
    CHECK_CHARACTER (c1);
    CHECK_CHARACTER (c2);
  
@@ -4222,9 +4222,6 @@
    if (NILP (BVAR (current_buffer, case_fold_search)))
      return Qnil;
  
-  /* Do these in separate statements,
-     then compare the variables.
-     because of the way DOWNCASE uses temp variables.  */
    i1 = XFASTINT (c1);
    if (NILP (BVAR (current_buffer, enable_multibyte_characters))
        && ! ASCII_CHAR_P (i1))
@@ -4237,9 +4234,7 @@
      {
        MAKE_CHAR_MULTIBYTE (i2);
      }
-  i1 = DOWNCASE (i1);
-  i2 = DOWNCASE (i2);
-  return (i1 == i2 ? Qt :  Qnil);
+  return (downcase (i1) == downcase (i2) ? Qt :  Qnil);
  }
  \f
  /* Transpose the markers in two regions of the current buffer, and

=== modified file 'src/fileio.c'
--- src/fileio.c	2011-03-15 03:17:20 +0000
+++ src/fileio.c	2011-03-15 21:14:06 +0000
@@ -178,7 +178,7 @@
  
  	    str = SSDATA (errstring);
  	    c = STRING_CHAR ((unsigned char *) str);
-	    Faset (errstring, make_number (0), make_number (DOWNCASE (c)));
+	    Faset (errstring, make_number (0), make_number (downcase (c)));
  	  }
  
  	xsignal (Qfile_error,

=== modified file 'src/keyboard.c'
--- src/keyboard.c	2011-03-15 17:13:02 +0000
+++ src/keyboard.c	2011-03-15 21:14:06 +0000
@@ -9836,7 +9836,7 @@
  	  && /* indec.start >= t && fkey.start >= t && */ keytran.start >= t
  	  && INTEGERP (key)
  	  && ((CHARACTERP (make_number (XINT (key) & ~CHAR_MODIFIER_MASK))
-	       && UPPERCASEP (XINT (key) & ~CHAR_MODIFIER_MASK))
+	       && uppercasep (XINT (key) & ~CHAR_MODIFIER_MASK))
  	      || (XINT (key) & shift_modifier)))
  	{
  	  Lisp_Object new_key;
@@ -9847,7 +9847,7 @@
  	  if (XINT (key) & shift_modifier)
  	    XSETINT (new_key, XINT (key) & ~shift_modifier);
  	  else
-	    XSETINT (new_key, (DOWNCASE (XINT (key) & ~CHAR_MODIFIER_MASK)
+	    XSETINT (new_key, (downcase (XINT (key) & ~CHAR_MODIFIER_MASK)
  			       | (XINT (key) & CHAR_MODIFIER_MASK)));
  
  	  /* We have to do this unconditionally, regardless of whether
@@ -9875,13 +9875,13 @@
  	      || (INTEGERP (key)
  		  && (KEY_TO_CHAR (key)
  		      < XCHAR_TABLE (BVAR (current_buffer, downcase_table))->size)
-		  && UPPERCASEP (KEY_TO_CHAR (key))))
+		  && uppercasep (KEY_TO_CHAR (key))))
  	    {
  	      Lisp_Object new_key
  		= (modifiers & shift_modifier
  		   ? apply_modifiers (modifiers & ~shift_modifier,
  				      XCAR (breakdown))
-		   : make_number (DOWNCASE (KEY_TO_CHAR (key)) | modifiers));
+		   : make_number (downcase (KEY_TO_CHAR (key)) | modifiers));
  
  	      original_uppercase = key;
  	      original_uppercase_position = t - 1;

=== modified file 'src/process.c'
--- src/process.c	2011-03-14 22:49:41 +0000
+++ src/process.c	2011-03-15 21:14:06 +0000
@@ -495,7 +495,7 @@
  	    string = (code_convert_string_norecord
  		      (string, Vlocale_coding_system, 0));
  	  c1 = STRING_CHAR (SDATA (string));
-	  c2 = DOWNCASE (c1);
+	  c2 = downcase (c1);
  	  if (c1 != c2)
  	    Faset (string, make_number (0), make_number (c2));
  	}

=== modified file 'src/regex.c'
--- src/regex.c	2011-03-15 18:53:29 +0000
+++ src/regex.c	2011-03-15 21:14:06 +0000
@@ -340,7 +340,7 @@
  		       || ((c) >= 'A' && (c) <= 'Z'))	\
  		    : SYNTAX (c) == Sword)
  
-# define ISLOWER(c) (LOWERCASEP (c))
+# define ISLOWER(c) lowercasep (c)
  
  # define ISPUNCT(c) (IS_REAL_ASCII (c)				\
  		    ? ((c) > ' ' && (c) < 0177			\
@@ -351,7 +351,7 @@
  
  # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
  
-# define ISUPPER(c) (UPPERCASEP (c))
+# define ISUPPER(c) uppercasep (c)
  
  # define ISWORD(c) (SYNTAX (c) == Sword)
  

=== modified file 'src/search.c'
--- src/search.c	2011-03-15 18:13:15 +0000
+++ src/search.c	2011-03-15 21:14:06 +0000
@@ -2469,7 +2469,7 @@
  	  else
  	    FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
  
-	  if (LOWERCASEP (c))
+	  if (lowercasep (c))
  	    {
  	      /* Cannot be all caps if any original char is lower case */
  
@@ -2479,7 +2479,7 @@
  	      else
  		some_multiletter_word = 1;
  	    }
-	  else if (UPPERCASEP (c))
+	  else if (uppercasep (c))
  	    {
  	      some_uppercase = 1;
  	      if (SYNTAX (prevc) != Sword)






^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15 16:58       ` Paul Eggert
  2011-03-15 19:13         ` Stefan Monnier
@ 2011-03-16 13:19         ` Richard Stallman
  2011-03-16 20:02           ` Paul Eggert
  1 sibling, 1 reply; 13+ messages in thread
From: Richard Stallman @ 2011-03-16 13:19 UTC (permalink / raw)
  To: Paul Eggert; +Cc: lekktu, 8254

    I agree; and not only is it tedious, it's error-prone.  It's better
    to fix the macros so that there's no need to check, as follows.  While
    we're at it, we should simply get rid of the macros, by replacing
    every use of UPPERCASEP with uppercasep, etc.

I see that uppercasep uses inline.  That works fine in GCC, but does
it work in all compilers anyone wants to use?  If not, we should leave
them as macros.  These are used in some loops so their speed makes a
difference.


-- 
Dr Richard Stallman
President, Free Software Foundation
51 Franklin St
Boston MA 02110
USA
www.fsf.org, www.gnu.org





^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-16 13:19         ` Richard Stallman
@ 2011-03-16 20:02           ` Paul Eggert
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-16 20:02 UTC (permalink / raw)
  To: rms; +Cc: lekktu, 8254

On 03/16/2011 06:19 AM, Richard Stallman wrote:
> I see that uppercasep uses inline.  That works fine in GCC, but does
> it work in all compilers anyone wants to use?

I expect so, yes.  That is, for all compilers that anybody
wants to use,  I expect that typically the performance difference
will be so small that it will be hard to measure and will not be
worth worrying about.  I attempted to simulate this by compiling
without optimization, both without and with the change,
and measured roughly a 3% performance degradation on a simple
large test case (running emacs interactively to upcase an 80 MB
text file that was mostly lower case).

This is sort of the worst case, since it assumes no inlining.
In the normal case, when optimization is enabled, I observed
a 3% performance improvement (on the same benchmark) due to
the change.

These measurements are noisy and approximate, but they indicate
that there's not much performance impact either way in practice.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* bug#8254: race condition in dired.c's scmp function
  2011-03-15  6:16 bug#8254: race condition in dired.c's scmp function Paul Eggert
  2011-03-15  7:06 ` Eli Zaretskii
@ 2011-03-17 16:56 ` Paul Eggert
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Eggert @ 2011-03-17 16:56 UTC (permalink / raw)
  To: 8254-done

I've committed a patch for this in bzr 103679.





^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-03-17 16:56 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-15  6:16 bug#8254: race condition in dired.c's scmp function Paul Eggert
2011-03-15  7:06 ` Eli Zaretskii
2011-03-15  7:31   ` Paul Eggert
2011-03-15 10:50     ` Eli Zaretskii
2011-03-15 16:53       ` Paul Eggert
2011-03-15 11:36     ` Juanma Barranquero
2011-03-15 16:52       ` Paul Eggert
2011-03-15 16:58       ` Paul Eggert
2011-03-15 19:13         ` Stefan Monnier
2011-03-15 21:27           ` Paul Eggert
2011-03-16 13:19         ` Richard Stallman
2011-03-16 20:02           ` Paul Eggert
2011-03-17 16:56 ` Paul Eggert

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).