unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Dmitry Antipov <dmantipov@yandex.ru>
To: emacs-devel@gnu.org
Subject: Old topic(s) again [was: Re: Proposal: immediate strings]
Date: Wed, 04 Jul 2012 12:27:26 +0400	[thread overview]
Message-ID: <4FF3FE6E.8080202@yandex.ru> (raw)
In-Reply-To: <83ipf4wk00.fsf@gnu.org>

[-- Attachment #1: Type: text/plain, Size: 89 bytes --]

This feature is not thrown away, so I would like to get more comments around it.

Dmitry

[-- Attachment #2: immstr.patch --]
[-- Type: text/plain, Size: 22868 bytes --]

=== modified file 'configure.in'
--- configure.in	2012-07-04 08:07:26 +0000
+++ configure.in	2012-07-04 08:09:13 +0000
@@ -3080,6 +3080,29 @@
      declarations.  Define as empty for no equivalent.])
 fi
 
+dnl Determine the basic type of ptrdiff_t.
+AC_CHECK_SIZEOF([int])
+AC_CHECK_SIZEOF([long])
+AC_CHECK_SIZEOF([long long])
+AC_CHECK_SIZEOF([ptrdiff_t])
+AC_MSG_CHECKING([for the basic type of ptrdiff_t])
+if test $ac_cv_sizeof_int = $ac_cv_sizeof_ptrdiff_t; then
+   emacs_cv_type_ptrdiff_t="int"
+elif test $ac_cv_sizeof_long = $ac_cv_sizeof_ptrdiff_t; then
+   emacs_cv_type_ptrdiff_t="long"
+elif test $ac_cv_sizeof_long_long = $ac_cv_sizeof_ptrdiff_t; then
+   emacs_cv_type_ptrdiff_t="long long"
+else
+   emacs_cv_type_ptrdiff_t="unknown"
+fi
+AC_MSG_RESULT([$emacs_cv_type_ptrdiff_t])
+if test $emacs_cv_type_ptrdiff_t != "unknown"; then
+   AC_DEFINE_UNQUOTED([TYPE_PTRDIFF_T], [$emacs_cv_type_ptrdiff_t],
+                      [Define to the basic type of ptrdiff_t])
+else
+   AC_MSG_ERROR([Unable to find the basic type of ptrdiff_t.])
+fi
+
 dnl Fixme: AC_SYS_POSIX_TERMIOS should probably be used, but it's not clear
 dnl how the tty code is related to POSIX and/or other versions of termios.
 dnl The following looks like a useful start.

=== modified file 'src/alloc.c'
--- src/alloc.c	2012-07-03 16:35:53 +0000
+++ src/alloc.c	2012-07-04 08:09:13 +0000
@@ -147,20 +147,14 @@
 /* Mark, unmark, query mark bit of a Lisp string.  S must be a pointer
    to a struct Lisp_String.  */
 
-#define MARK_STRING(S)		((S)->size |= ARRAY_MARK_FLAG)
-#define UNMARK_STRING(S)	((S)->size &= ~ARRAY_MARK_FLAG)
-#define STRING_MARKED_P(S)	(((S)->size & ARRAY_MARK_FLAG) != 0)
+#define MARK_STRING(S)		((S)->u.imm.gcmarkbit = 1)
+#define UNMARK_STRING(S)	((S)->u.imm.gcmarkbit = 0)
+#define STRING_MARKED_P(S)	((S)->u.imm.gcmarkbit)
 
 #define VECTOR_MARK(V)		((V)->header.size |= ARRAY_MARK_FLAG)
 #define VECTOR_UNMARK(V)	((V)->header.size &= ~ARRAY_MARK_FLAG)
 #define VECTOR_MARKED_P(V)	(((V)->header.size & ARRAY_MARK_FLAG) != 0)
 
-/* Value is the number of bytes of S, a pointer to a struct Lisp_String.
-   Be careful during GC, because S->size contains the mark bit for
-   strings.  */
-
-#define GC_STRING_BYTES(S)	(STRING_BYTES (S))
-
 /* Global variables.  */
 struct emacs_globals globals;
 
@@ -394,6 +388,7 @@
 static void mark_stack (void);
 static int live_vector_p (struct mem_node *, void *);
 static int live_buffer_p (struct mem_node *, void *);
+static int live_string_data_p (struct Lisp_String *);
 static int live_string_p (struct mem_node *, void *);
 static int live_cons_p (struct mem_node *, void *);
 static int live_symbol_p (struct mem_node *, void *);
@@ -1709,6 +1704,37 @@
 
 static EMACS_INT total_string_size;
 
+#ifdef GC_STRING_STATS
+
+/* All of these counters are invalid between GCs
+   because they are updated at the sweep phase.  */
+
+/* Number of live immediate strings.  */
+
+static EMACS_INT total_imm_strings;
+
+/* Amount of data bytes used by them.  */
+
+static EMACS_INT total_imm_bytes;
+
+/* Number of intervals attached to them.  */
+
+static EMACS_INT total_imm_intervals;
+
+/* Number of live normal strings.  */
+
+static EMACS_INT total_dat_strings;
+
+/* Amount of data bytes used by them.  */
+
+static EMACS_INT total_dat_bytes;
+
+/* Number of intervals attached to them.  */
+
+static EMACS_INT total_dat_intervals;
+
+#endif /* GC_STRING_STATS */
+
 /* Given a pointer to a Lisp_String S which is on the free-list
    string_free_list, return a pointer to its successor in the
    free-list.  */
@@ -1720,7 +1746,8 @@
    a pointer to the `u.data' member of its sdata structure; the
    structure starts at a constant offset in front of that.  */
 
-#define SDATA_OF_STRING(S) ((struct sdata *) ((S)->data - SDATA_DATA_OFFSET))
+#define SDATA_OF_STRING(S) ((S)->u.imm.immbit ? (struct sdata *) NULL \
+  : ((struct sdata *) ((S)->u.dat.data - SDATA_DATA_OFFSET)))
 
 
 #ifdef GC_CHECK_STRING_OVERRUN
@@ -1797,26 +1824,32 @@
   empty_multibyte_string = make_pure_string ("", 0, 0, 1);
 }
 
-
 #ifdef GC_CHECK_STRING_BYTES
 
 static int check_string_bytes_count;
 
-#define CHECK_STRING_BYTES(S)	STRING_BYTES (S)
-
-
-/* Like GC_STRING_BYTES, but with debugging check.  */
+#define CHECK_STRING_BYTES(S) string_bytes (S)
 
 ptrdiff_t
 string_bytes (struct Lisp_String *s)
 {
-  ptrdiff_t nbytes =
-    (s->size_byte < 0 ? s->size & ~ARRAY_MARK_FLAG : s->size_byte);
-
-  if (!PURE_POINTER_P (s)
-      && s->data
-      && nbytes != SDATA_NBYTES (SDATA_OF_STRING (s)))
-    abort ();
+  ptrdiff_t nbytes;
+
+  if (s->u.imm.immbit)
+    {
+      nbytes = s->u.imm.size_byte == STRING_UNIBYTE_IMM_MARK ?
+	s->u.imm.size : s->u.imm.size_byte;
+      eassert (nbytes < STRING_IMM_SIZE);
+    }
+  else
+    {
+      nbytes = s->u.dat.size_byte == STRING_UNIBYTE_DAT_MARK ?
+	s->u.dat.size : s->u.dat.size_byte;
+      eassert (nbytes >= STRING_IMM_SIZE);
+      if (!PURE_POINTER_P (s) && s->u.dat.data)
+	eassert (nbytes == SDATA_NBYTES (SDATA_OF_STRING (s)));
+    }
+
   return nbytes;
 }
 
@@ -1841,7 +1874,7 @@
 	CHECK_STRING_BYTES (from->string);
 
       if (from->string)
-	nbytes = GC_STRING_BYTES (from->string);
+	nbytes = string_bytes (from->string);
       else
 	nbytes = SDATA_NBYTES (from);
 
@@ -1926,8 +1959,9 @@
       for (i = STRING_BLOCK_SIZE - 1; i >= 0; --i)
 	{
 	  s = b->strings + i;
-	  /* Every string on a free list should have NULL data pointer.  */
-	  s->data = NULL;
+	  /* Every string on a free list is immediate.  */
+	  s->u.imm.immbit = 1;
+	  s->u.imm.gcmarkbit = 0;
 	  NEXT_FREE_LISP_STRING (s) = string_free_list;
 	  string_free_list = s;
 	}
@@ -2043,13 +2077,11 @@
   MALLOC_UNBLOCK_INPUT;
 
   data->string = s;
-  s->data = SDATA_DATA (data);
+  s->u.dat.data = SDATA_DATA (data);
 #ifdef GC_CHECK_STRING_BYTES
   SDATA_NBYTES (data) = nbytes;
 #endif
-  s->size = nchars;
-  s->size_byte = nbytes;
-  s->data[nbytes] = '\0';
+  s->u.dat.data[nbytes] = '\0';
 #ifdef GC_CHECK_STRING_OVERRUN
   memcpy ((char *) data + needed, string_overrun_cookie,
 	  GC_STRING_OVERRUN_COOKIE_SIZE);
@@ -2070,6 +2102,12 @@
   total_strings = total_free_strings = 0;
   total_string_size = 0;
 
+#ifdef GC_STRING_STATS
+  total_imm_strings = total_dat_strings = 0;
+  total_imm_bytes = total_dat_bytes = 0;
+  total_imm_intervals = total_dat_intervals = 0;
+#endif
+
   /* Scan strings_blocks, free Lisp_Strings that aren't marked.  */
   for (b = string_blocks; b; b = next)
     {
@@ -2082,49 +2120,64 @@
 	{
 	  struct Lisp_String *s = b->strings + i;
 
-	  if (s->data)
+	  if (STRING_MARKED_P (s))
+	    {	      
+	      /* String is live; unmark it and its intervals.  */
+	      UNMARK_STRING (s);
+
+	      if (!NULL_INTERVAL_P (s->intervals))
+		UNMARK_BALANCE_INTERVALS (s->intervals);
+
+	      ++total_strings;
+	      total_string_size += string_bytes (s);
+#ifdef GC_STRING_STATS
+	      if (s->u.imm.immbit)
+		{
+		  total_imm_strings++;
+		  total_imm_bytes += string_bytes (s);
+		  if (!NULL_INTERVAL_P (s->intervals))
+		    total_imm_intervals++;
+		}
+	      else
+		{
+		  total_dat_strings++;
+		  total_dat_bytes += string_bytes (s);
+		  if (!NULL_INTERVAL_P (s->intervals))
+		    total_dat_intervals++;
+		}
+#endif /* GC_STRING_STATS */
+	    }
+	  else
 	    {
-	      /* String was not on free-list before.  */
-	      if (STRING_MARKED_P (s))
-		{
-		  /* String is live; unmark it and its intervals.  */
-		  UNMARK_STRING (s);
-
-		  if (!NULL_INTERVAL_P (s->intervals))
-		    UNMARK_BALANCE_INTERVALS (s->intervals);
-
-		  ++total_strings;
-		  total_string_size += STRING_BYTES (s);
-		}
+	      if (s->u.imm.immbit)
+		/* Fill data with special pattern. Used
+		   by GC to find dead immediate strings.  */
+		memset (s->u.imm.data, 0xff, STRING_IMM_SIZE);
 	      else
 		{
-		  /* String is dead.  Put it on the free-list.  */
-		  struct sdata *data = SDATA_OF_STRING (s);
+		  if (s->u.dat.data)
+		    {
+		      /* String is dead.  Put it on the free-list.  */
+		      struct sdata *data = SDATA_OF_STRING (s);
 
-		  /* Save the size of S in its sdata so that we know
-		     how large that is.  Reset the sdata's string
-		     back-pointer so that we know it's free.  */
+		      /* Save the size of S in its sdata so that we know
+			 how large that is.  Reset the sdata's string
+			 back-pointer so that we know it's free.  */
 #ifdef GC_CHECK_STRING_BYTES
-		  if (GC_STRING_BYTES (s) != SDATA_NBYTES (data))
-		    abort ();
+		      if (string_bytes (s) != SDATA_NBYTES (data))
+			abort ();
 #else
-		  data->u.nbytes = GC_STRING_BYTES (s);
+		      data->u.nbytes = string_bytes (s);
 #endif
-		  data->string = NULL;
-
-		  /* Reset the strings's `data' member so that we
-		     know it's free.  */
-		  s->data = NULL;
-
-		  /* Put the string on the free-list.  */
-		  NEXT_FREE_LISP_STRING (s) = string_free_list;
-		  string_free_list = s;
-		  ++nfree;
+		      data->string = NULL;
+
+		      /* Reset the strings's `data' member so that we
+			 know it's free.  */
+		      s->u.dat.data = NULL;
+		    }
 		}
-	    }
-	  else
-	    {
-	      /* S was on the free-list before.  Put it there again.  */
+
+	      /* Put the string on the free-list.  */
 	      NEXT_FREE_LISP_STRING (s) = string_free_list;
 	      string_free_list = s;
 	      ++nfree;
@@ -2216,12 +2269,12 @@
 	  /* Check that the string size recorded in the string is the
 	     same as the one recorded in the sdata structure. */
 	  if (from->string
-	      && GC_STRING_BYTES (from->string) != SDATA_NBYTES (from))
+	      && string_bytes (from->string) != SDATA_NBYTES (from))
 	    abort ();
 #endif /* GC_CHECK_STRING_BYTES */
 
 	  if (from->string)
-	    nbytes = GC_STRING_BYTES (from->string);
+	    nbytes = string_bytes (from->string);
 	  else
 	    nbytes = SDATA_NBYTES (from);
 
@@ -2257,7 +2310,7 @@
 		{
 		  eassert (tb != b || to < from);
 		  memmove (to, from, nbytes + GC_STRING_EXTRA);
-		  to->string->data = SDATA_DATA (to);
+		  to->string->u.dat.data = SDATA_DATA (to);
 		}
 
 	      /* Advance past the sdata we copied to.  */
@@ -2497,8 +2550,25 @@
     return empty_multibyte_string;
 
   s = allocate_string ();
+  s->u.imm.gcmarkbit = 0;
+
+  if (nbytes < STRING_IMM_SIZE)
+    {
+      s->u.imm.immbit = 1;
+      s->u.imm.data[nbytes] = '\0';
+      s->u.imm.size = nchars;
+      s->u.imm.size_byte = nbytes;
+    }
+  else
+    {
+      s->u.imm.immbit = 0;
+      s->u.dat.data = NULL;
+      s->u.dat.size = nchars;
+      s->u.dat.size_byte = nbytes;
+      allocate_string_data (s, nchars, nbytes);
+    }
+
   s->intervals = NULL_INTERVAL;
-  allocate_string_data (s, nchars, nbytes);
   XSETSTRING (string, s);
   string_chars_consed += nbytes;
   return string;
@@ -4196,6 +4266,22 @@
   x->color = MEM_BLACK;
 }
 
+/* Non-zero if data of S is valid.  */
+
+static inline int
+live_string_data_p (struct Lisp_String *s)
+{
+  if (s->u.imm.immbit)
+    {
+      unsigned char *p;
+
+      for (p = s->u.imm.data; p < s->u.imm.data + STRING_IMM_SIZE; p++)
+	if (*p != 0xff)
+	  return 1;
+      return 0;
+    }
+  return s->u.dat.data != NULL;
+}
 
 /* Value is non-zero if P is a pointer to a live Lisp string on
    the heap.  M is a pointer to the mem_block for P.  */
@@ -4213,7 +4299,7 @@
       return (offset >= 0
 	      && offset % sizeof b->strings[0] == 0
 	      && offset < (STRING_BLOCK_SIZE * sizeof b->strings[0])
-	      && ((struct Lisp_String *) p)->data != NULL);
+	      && live_string_data_p ((struct Lisp_String *) p));
     }
   else
     return 0;
@@ -5159,15 +5245,29 @@
   struct Lisp_String *s;
 
   s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String);
-  s->data = (unsigned char *) find_string_data_in_pure (data, nbytes);
-  if (s->data == NULL)
-    {
-      s->data = (unsigned char *) pure_alloc (nbytes + 1, -1);
-      memcpy (s->data, data, nbytes);
-      s->data[nbytes] = '\0';
-    }
-  s->size = nchars;
-  s->size_byte = multibyte ? nbytes : -1;
+
+  if (nbytes < STRING_IMM_SIZE)
+    {
+      memcpy (s->u.imm.data, data, nbytes);
+      s->u.imm.data[nbytes] = '\0';
+      s->u.imm.immbit = 1;
+      s->u.imm.size = nchars;
+      s->u.imm.size_byte = multibyte ? nbytes : STRING_UNIBYTE_IMM_MARK;
+    }
+  else
+    {
+      s->u.dat.data = (unsigned char *) find_string_data_in_pure (data, nbytes);
+      if (s->u.dat.data == NULL)
+	{
+	  s->u.dat.data = (unsigned char *) pure_alloc (nbytes + 1, -1);
+	  memcpy (s->u.dat.data, data, nbytes);
+	  s->u.dat.data[nbytes] = '\0';
+	}
+      s->u.imm.immbit = 0;
+      s->u.dat.size = nchars;
+      s->u.dat.size_byte = multibyte ? nbytes : STRING_UNIBYTE_DAT_MARK;
+    }
+
   s->intervals = NULL_INTERVAL;
   XSETSTRING (string, s);
   return string;
@@ -5184,9 +5284,23 @@
   ptrdiff_t nchars = strlen (data);
 
   s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String);
-  s->size = nchars;
-  s->size_byte = -1;
-  s->data = (unsigned char *) data;
+
+  if (nchars < STRING_IMM_SIZE)
+    {
+      memcpy (s->u.imm.data, data, nchars);
+      s->u.imm.data[nchars] = '\0';
+      s->u.imm.immbit = 1;
+      s->u.imm.size = nchars;
+      s->u.imm.size_byte = STRING_UNIBYTE_IMM_MARK;
+    }
+  else
+    {
+      s->u.dat.data = (unsigned char *) data;
+      s->u.imm.immbit = 0;
+      s->u.dat.size = nchars;
+      s->u.dat.size_byte = STRING_UNIBYTE_DAT_MARK;
+    }
+
   s->intervals = NULL_INTERVAL;
   XSETSTRING (string, s);
   return string;
@@ -6587,6 +6701,34 @@
   return Flist (8, consed);
 }
 
+#ifdef GC_STRING_STATS
+
+DEFUN ("string-stats", Fstring_stats, Sstring_stats, 0, 0, 0,
+       doc: /* Return a list of counters that measures how much
+strings of a particular internal structure are alive after last
+garbage collection, and how many bytes are in them.
+The elements of the value are are as follows:
+  (IMM-STRINGS IMM-BYTES IMM-INTERVALS DAT-STRINGS DAT-BYTES DAT-INTERVALS)
+where IMM-STRINGS is the number of immediate strings, IMM-BYTES is the total
+number of bytes in them, and IMM-INTERVALS is the number of immediate string
+with non-nil text properties. The rest three numbers has the same meaning
+for normal strings, respectively.  */)
+  (void)
+{
+  Lisp_Object data[6];
+
+  data[0] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_strings));
+  data[1] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_bytes));
+  data[2] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_intervals));
+  data[3] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_strings));
+  data[4] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_bytes));
+  data[5] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_intervals));
+
+  return Flist (6, data);
+}
+
+#endif /* GC_STRING_STATS */
+
 /* Find at most FIND_MAX symbols which have OBJ as their value or
    function.  This is used in gdbinit's `xwhichsymbols' command.  */
 
@@ -6815,7 +6957,9 @@
   defsubr (&Sgarbage_collect);
   defsubr (&Smemory_limit);
   defsubr (&Smemory_use_counts);
-
+#ifdef GC_STRING_STATS
+  defsubr (&Sstring_stats);
+#endif
 #if GC_MARK_STACK == GC_USE_GCPROS_CHECK_ZOMBIES
   defsubr (&Sgc_status);
 #endif

=== modified file 'src/fns.c'
--- src/fns.c	2012-06-28 07:50:27 +0000
+++ src/fns.c	2012-07-04 08:09:13 +0000
@@ -2166,8 +2166,8 @@
 	  int len = CHAR_STRING (charval, str);
 	  ptrdiff_t size_byte = SBYTES (array);
 
-	  if (INT_MULTIPLY_OVERFLOW (SCHARS (array), len)
-	      || SCHARS (array) * len != size_byte)
+	  if (INT_MULTIPLY_OVERFLOW (size, len)
+	      || size * len != size_byte)
 	    error ("Attempt to change byte length of a string");
 	  for (idx = 0; idx < size_byte; idx++)
 	    *p++ = str[idx % len];

=== modified file 'src/lisp.h'
--- src/lisp.h	2012-07-03 20:34:47 +0000
+++ src/lisp.h	2012-07-04 08:09:13 +0000
@@ -69,7 +69,8 @@
     BITS_PER_SHORT     = CHAR_BIT * sizeof (short),
     BITS_PER_INT       = CHAR_BIT * sizeof (int),
     BITS_PER_LONG      = CHAR_BIT * sizeof (long int),
-    BITS_PER_EMACS_INT = CHAR_BIT * sizeof (EMACS_INT)
+    BITS_PER_EMACS_INT = CHAR_BIT * sizeof (EMACS_INT),
+    BITS_PER_PTRDIFF_T = CHAR_BIT * sizeof (ptrdiff_t)
   };
 
 /* printmax_t and uprintmax_t are types for printing large integers.
@@ -576,23 +577,6 @@
    eassert ((IDX) >= 0 && (IDX) < ASIZE (ARRAY)),	\
    AREF ((ARRAY), (IDX)) = (VAL))
 
-/* Convenience macros for dealing with Lisp strings.  */
-
-#define SDATA(string)		(XSTRING (string)->data + 0)
-#define SREF(string, index)	(SDATA (string)[index] + 0)
-#define SSET(string, index, new) (SDATA (string)[index] = (new))
-#define SCHARS(string)		(XSTRING (string)->size + 0)
-#define SBYTES(string)		(STRING_BYTES (XSTRING (string)) + 0)
-
-/* Avoid "differ in sign" warnings.  */
-#define SSDATA(x)  ((char *) SDATA (x))
-
-#define STRING_SET_CHARS(string, newsize) \
-    (XSTRING (string)->size = (newsize))
-
-#define STRING_COPYIN(string, index, new, count) \
-    memcpy (SDATA (string) + index, new, count)
-
 /* Type checking.  */
 
 #define CHECK_TYPE(ok, Qxxxp, x) \
@@ -678,24 +662,40 @@
 #define CDR_SAFE(c)				\
   (CONSP ((c)) ? XCDR ((c)) : Qnil)
 
+/* Convenience macros for dealing with Lisp strings.  */
+
+#define SDATA(string)		(XSTRING (string)->u.imm.immbit ? \
+				 (XSTRING (string)->u.imm.data) : \
+				 (XSTRING (string)->u.dat.data))
+#define SREF(string, index)	(SDATA (string)[index] + 0)
+#define SSET(string, index, new) (SDATA (string)[index] = (new))
+#define SCHARS(string)		(XSTRING (string)->u.imm.immbit ? \
+				 (XSTRING (string)->u.imm.size) : \
+				 (XSTRING (string)->u.dat.size))
+#define SBYTES(string)		(string_bytes (XSTRING (string)))
+
+/* Avoid "differ in sign" warnings.  */
+#define SSDATA(x)  ((char *) SDATA (x))
+
+#define STRING_SET_CHARS(string, newsize)	\
+  (XSTRING (string)->u.imm.immbit ?		\
+   (XSTRING (string)->u.imm.size = (newsize)) : \
+   (XSTRING (string)->u.dat.size = (newsize)))
+
+#define STRING_COPYIN(string, index, new, count)	\
+  memcpy (SDATA (string) + index, new, count)
+
+/* For unibyte immediate string, SIZE_BYTE field is always set to this.  */
+#define STRING_UNIBYTE_IMM_MARK ((1 << (BITS_PER_CHAR - 1)) - 1)
+
+/* For unibyte normal string, SIZE_BYTE field is always set to this.  */
+#define STRING_UNIBYTE_DAT_MARK ((1UL << (BITS_PER_PTRDIFF_T - 1)) - 1)
+
 /* Nonzero if STR is a multibyte string.  */
-#define STRING_MULTIBYTE(STR)  \
-  (XSTRING (STR)->size_byte >= 0)
-
-/* Return the length in bytes of STR.  */
-
-#ifdef GC_CHECK_STRING_BYTES
-
-struct Lisp_String;
-extern ptrdiff_t string_bytes (struct Lisp_String *);
-#define STRING_BYTES(S) string_bytes ((S))
-
-#else /* not GC_CHECK_STRING_BYTES */
-
-#define STRING_BYTES(STR)  \
-  ((STR)->size_byte < 0 ? (STR)->size : (STR)->size_byte)
-
-#endif /* not GC_CHECK_STRING_BYTES */
+#define STRING_MULTIBYTE(string)					\
+  (XSTRING (string)->u.imm.immbit ?					\
+   (XSTRING (string)->u.imm.size_byte != STRING_UNIBYTE_IMM_MARK) :	\
+   (XSTRING (string)->u.dat.size_byte != STRING_UNIBYTE_DAT_MARK))
 
 /* An upper bound on the number of bytes in a Lisp string, not
    counting the terminating null.  This a tight enough bound to
@@ -708,21 +708,33 @@
    would expose alloc.c internal details that we'd rather keep
    private.  The cast to ptrdiff_t ensures that STRING_BYTES_BOUND is
    signed.  */
-#define STRING_BYTES_BOUND  \
-  min (MOST_POSITIVE_FIXNUM, (ptrdiff_t) min (SIZE_MAX, PTRDIFF_MAX) - 1)
+#define STRING_BYTES_BOUND						\
+  min (MOST_POSITIVE_FIXNUM,						\
+       (ptrdiff_t) min (SIZE_MAX, STRING_UNIBYTE_DAT_MARK - 1) - 1)
+
+/* Maximum amount of bytes, including '\0', in an immediate string.  */
+#define STRING_IMM_SIZE (sizeof (void *) + 2 * sizeof (TYPE_PTRDIFF_T) - 2)
 
 /* Mark STR as a unibyte string.  */
 #define STRING_SET_UNIBYTE(STR)  \
-  do { if (EQ (STR, empty_multibyte_string))  \
-      (STR) = empty_unibyte_string;  \
-    else XSTRING (STR)->size_byte = -1; } while (0)
+  do { if (EQ (STR, empty_multibyte_string))			\
+      (STR) = empty_unibyte_string;				\
+    else if (XSTRING (STR)->u.imm.immbit)			\
+      XSTRING (STR)->u.imm.size_byte = STRING_UNIBYTE_IMM_MARK;	\
+    else							\
+      XSTRING (STR)->u.dat.size_byte = STRING_UNIBYTE_DAT_MARK;	\
+  } while (0)
 
 /* Mark STR as a multibyte string.  Assure that STR contains only
    ASCII characters in advance.  */
-#define STRING_SET_MULTIBYTE(STR)  \
-  do { if (EQ (STR, empty_unibyte_string))  \
-      (STR) = empty_multibyte_string;  \
-    else XSTRING (STR)->size_byte = XSTRING (STR)->size; } while (0)
+#define STRING_SET_MULTIBYTE(STR)				  \
+  do { if (EQ (STR, empty_unibyte_string))			  \
+      (STR) = empty_multibyte_string;				  \
+    else if (XSTRING (STR)->u.imm.immbit)			  \
+      XSTRING (STR)->u.imm.size_byte = XSTRING (STR)->u.imm.size; \
+    else							  \
+      XSTRING (STR)->u.dat.size_byte = XSTRING (STR)->u.dat.size; \
+  } while (0)
 
 /* Get text properties.  */
 #define STRING_INTERVALS(STR)  (XSTRING (STR)->intervals + 0)
@@ -730,15 +742,54 @@
 /* Set text properties.  */
 #define STRING_SET_INTERVALS(STR, INT) (XSTRING (STR)->intervals = (INT))
 
-/* In a string or vector, the sign bit of the `size' is the gc mark bit */
-
 struct Lisp_String
-  {
-    ptrdiff_t size;
-    ptrdiff_t size_byte;
-    INTERVAL intervals;		/* text properties in this string */
-    unsigned char *data;
-  };
+{
+  /* Text properties in this string.  Should be the first
+     member since NEXT_FREE_LISP_STRING from alloc.c uses it.  */
+  INTERVAL intervals;
+
+  union {
+    /* GC mark bit and subtype bit are in IMM just by convention - when
+       IMMBIT is 0, the DAT field is used except it's UNUSED field.  */
+    struct {
+      unsigned immbit : 1;
+      unsigned size : BITS_PER_CHAR - 1;
+      unsigned char data[STRING_IMM_SIZE];
+      unsigned size_byte : BITS_PER_CHAR - 1;
+      unsigned gcmarkbit : 1;
+    } imm;
+
+    struct {
+      unsigned immbit : 1;
+      unsigned TYPE_PTRDIFF_T size : BITS_PER_PTRDIFF_T - 1;
+      unsigned char *data;
+      unsigned TYPE_PTRDIFF_T size_byte : BITS_PER_PTRDIFF_T - 1;
+      unsigned gcmarkbit : 1;
+    } dat;
+  } u;
+};
+
+/* Return the length in bytes of STR.  */
+
+#ifdef GC_CHECK_STRING_BYTES
+
+struct Lisp_String;
+extern ptrdiff_t string_bytes (struct Lisp_String *);
+#define STRING_BYTES(S) string_bytes ((S))
+
+#else /* not GC_CHECK_STRING_BYTES */
+
+static inline
+ptrdiff_t string_bytes (struct Lisp_String *s)
+{
+  if (s->u.imm.immbit)
+    return s->u.imm.size_byte == STRING_UNIBYTE_IMM_MARK ?
+      s->u.imm.size : s->u.imm.size_byte;
+  return s->u.dat.size_byte == STRING_UNIBYTE_DAT_MARK ?
+    s->u.dat.size : s->u.dat.size_byte;
+}
+
+#endif /* not GC_CHECK_STRING_BYTES */
 
 /* Header of vector-like objects.  This documents the layout constraints on
    vectors and pseudovectors other than struct Lisp_Subr.  It also prevents


  parent reply	other threads:[~2012-07-04  8:27 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-22  8:44 Proposal: immediate strings Dmitry Antipov
2012-05-22 20:51 ` Miles Bader
2012-05-22 22:13   ` Paul Eggert
2012-05-24  5:17 ` Stefan Monnier
2012-05-24  5:41   ` Ken Raeburn
2012-05-24  5:50     ` Miles Bader
2012-05-24  6:08   ` Paul Eggert
2012-05-24  7:14     ` Stefan Monnier
2012-05-24  7:52       ` Paul Eggert
2012-05-24 12:51         ` Stefan Monnier
2012-05-24 16:35           ` Paul Eggert
2012-05-25  6:43             ` Dmitry Antipov
2012-05-25  7:30               ` Paul Eggert
2012-05-28 11:32       ` Dmitry Antipov
2012-05-28 14:25         ` Stefan Monnier
2012-05-29  6:55   ` Dmitry Antipov
2012-05-29  7:38     ` Paul Eggert
2012-05-29 13:33       ` Dmitry Antipov
2012-05-29 15:24         ` Paul Eggert
2012-05-31  9:28           ` Dmitry Antipov
2012-05-31 16:34             ` Paul Eggert
2012-06-06  6:14               ` Dmitry Antipov
2012-06-06  6:41                 ` Paul Eggert
2012-06-06  7:29                   ` Dmitry Antipov
2012-06-06 15:14                     ` Eli Zaretskii
2012-06-06 21:44                       ` Paul Eggert
2012-07-04  8:27                       ` Dmitry Antipov [this message]
2012-07-04 13:08                         ` Old topic(s) again [was: Re: Proposal: immediate strings] Stefan Monnier
2012-07-04 19:32                           ` Paul Eggert
2012-05-29  7:38     ` Proposal: immediate strings Andreas Schwab

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FF3FE6E.8080202@yandex.ru \
    --to=dmantipov@yandex.ru \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).