From: Dmitry Antipov <dmantipov@yandex.ru>
To: emacs-devel@gnu.org
Subject: Old topic(s) again [was: Re: Proposal: immediate strings]
Date: Wed, 04 Jul 2012 12:27:26 +0400 [thread overview]
Message-ID: <4FF3FE6E.8080202@yandex.ru> (raw)
In-Reply-To: <83ipf4wk00.fsf@gnu.org>
[-- Attachment #1: Type: text/plain, Size: 89 bytes --]
This feature is not thrown away, so I would like to get more comments around it.
Dmitry
[-- Attachment #2: immstr.patch --]
[-- Type: text/plain, Size: 22868 bytes --]
=== modified file 'configure.in'
--- configure.in 2012-07-04 08:07:26 +0000
+++ configure.in 2012-07-04 08:09:13 +0000
@@ -3080,6 +3080,29 @@
declarations. Define as empty for no equivalent.])
fi
+dnl Determine the basic type of ptrdiff_t.
+AC_CHECK_SIZEOF([int])
+AC_CHECK_SIZEOF([long])
+AC_CHECK_SIZEOF([long long])
+AC_CHECK_SIZEOF([ptrdiff_t])
+AC_MSG_CHECKING([for the basic type of ptrdiff_t])
+if test $ac_cv_sizeof_int = $ac_cv_sizeof_ptrdiff_t; then
+ emacs_cv_type_ptrdiff_t="int"
+elif test $ac_cv_sizeof_long = $ac_cv_sizeof_ptrdiff_t; then
+ emacs_cv_type_ptrdiff_t="long"
+elif test $ac_cv_sizeof_long_long = $ac_cv_sizeof_ptrdiff_t; then
+ emacs_cv_type_ptrdiff_t="long long"
+else
+ emacs_cv_type_ptrdiff_t="unknown"
+fi
+AC_MSG_RESULT([$emacs_cv_type_ptrdiff_t])
+if test $emacs_cv_type_ptrdiff_t != "unknown"; then
+ AC_DEFINE_UNQUOTED([TYPE_PTRDIFF_T], [$emacs_cv_type_ptrdiff_t],
+ [Define to the basic type of ptrdiff_t])
+else
+ AC_MSG_ERROR([Unable to find the basic type of ptrdiff_t.])
+fi
+
dnl Fixme: AC_SYS_POSIX_TERMIOS should probably be used, but it's not clear
dnl how the tty code is related to POSIX and/or other versions of termios.
dnl The following looks like a useful start.
=== modified file 'src/alloc.c'
--- src/alloc.c 2012-07-03 16:35:53 +0000
+++ src/alloc.c 2012-07-04 08:09:13 +0000
@@ -147,20 +147,14 @@
/* Mark, unmark, query mark bit of a Lisp string. S must be a pointer
to a struct Lisp_String. */
-#define MARK_STRING(S) ((S)->size |= ARRAY_MARK_FLAG)
-#define UNMARK_STRING(S) ((S)->size &= ~ARRAY_MARK_FLAG)
-#define STRING_MARKED_P(S) (((S)->size & ARRAY_MARK_FLAG) != 0)
+#define MARK_STRING(S) ((S)->u.imm.gcmarkbit = 1)
+#define UNMARK_STRING(S) ((S)->u.imm.gcmarkbit = 0)
+#define STRING_MARKED_P(S) ((S)->u.imm.gcmarkbit)
#define VECTOR_MARK(V) ((V)->header.size |= ARRAY_MARK_FLAG)
#define VECTOR_UNMARK(V) ((V)->header.size &= ~ARRAY_MARK_FLAG)
#define VECTOR_MARKED_P(V) (((V)->header.size & ARRAY_MARK_FLAG) != 0)
-/* Value is the number of bytes of S, a pointer to a struct Lisp_String.
- Be careful during GC, because S->size contains the mark bit for
- strings. */
-
-#define GC_STRING_BYTES(S) (STRING_BYTES (S))
-
/* Global variables. */
struct emacs_globals globals;
@@ -394,6 +388,7 @@
static void mark_stack (void);
static int live_vector_p (struct mem_node *, void *);
static int live_buffer_p (struct mem_node *, void *);
+static int live_string_data_p (struct Lisp_String *);
static int live_string_p (struct mem_node *, void *);
static int live_cons_p (struct mem_node *, void *);
static int live_symbol_p (struct mem_node *, void *);
@@ -1709,6 +1704,37 @@
static EMACS_INT total_string_size;
+#ifdef GC_STRING_STATS
+
+/* All of these counters are invalid between GCs
+ because they are updated at the sweep phase. */
+
+/* Number of live immediate strings. */
+
+static EMACS_INT total_imm_strings;
+
+/* Amount of data bytes used by them. */
+
+static EMACS_INT total_imm_bytes;
+
+/* Number of intervals attached to them. */
+
+static EMACS_INT total_imm_intervals;
+
+/* Number of live normal strings. */
+
+static EMACS_INT total_dat_strings;
+
+/* Amount of data bytes used by them. */
+
+static EMACS_INT total_dat_bytes;
+
+/* Number of intervals attached to them. */
+
+static EMACS_INT total_dat_intervals;
+
+#endif /* GC_STRING_STATS */
+
/* Given a pointer to a Lisp_String S which is on the free-list
string_free_list, return a pointer to its successor in the
free-list. */
@@ -1720,7 +1746,8 @@
a pointer to the `u.data' member of its sdata structure; the
structure starts at a constant offset in front of that. */
-#define SDATA_OF_STRING(S) ((struct sdata *) ((S)->data - SDATA_DATA_OFFSET))
+#define SDATA_OF_STRING(S) ((S)->u.imm.immbit ? (struct sdata *) NULL \
+ : ((struct sdata *) ((S)->u.dat.data - SDATA_DATA_OFFSET)))
#ifdef GC_CHECK_STRING_OVERRUN
@@ -1797,26 +1824,32 @@
empty_multibyte_string = make_pure_string ("", 0, 0, 1);
}
-
#ifdef GC_CHECK_STRING_BYTES
static int check_string_bytes_count;
-#define CHECK_STRING_BYTES(S) STRING_BYTES (S)
-
-
-/* Like GC_STRING_BYTES, but with debugging check. */
+#define CHECK_STRING_BYTES(S) string_bytes (S)
ptrdiff_t
string_bytes (struct Lisp_String *s)
{
- ptrdiff_t nbytes =
- (s->size_byte < 0 ? s->size & ~ARRAY_MARK_FLAG : s->size_byte);
-
- if (!PURE_POINTER_P (s)
- && s->data
- && nbytes != SDATA_NBYTES (SDATA_OF_STRING (s)))
- abort ();
+ ptrdiff_t nbytes;
+
+ if (s->u.imm.immbit)
+ {
+ nbytes = s->u.imm.size_byte == STRING_UNIBYTE_IMM_MARK ?
+ s->u.imm.size : s->u.imm.size_byte;
+ eassert (nbytes < STRING_IMM_SIZE);
+ }
+ else
+ {
+ nbytes = s->u.dat.size_byte == STRING_UNIBYTE_DAT_MARK ?
+ s->u.dat.size : s->u.dat.size_byte;
+ eassert (nbytes >= STRING_IMM_SIZE);
+ if (!PURE_POINTER_P (s) && s->u.dat.data)
+ eassert (nbytes == SDATA_NBYTES (SDATA_OF_STRING (s)));
+ }
+
return nbytes;
}
@@ -1841,7 +1874,7 @@
CHECK_STRING_BYTES (from->string);
if (from->string)
- nbytes = GC_STRING_BYTES (from->string);
+ nbytes = string_bytes (from->string);
else
nbytes = SDATA_NBYTES (from);
@@ -1926,8 +1959,9 @@
for (i = STRING_BLOCK_SIZE - 1; i >= 0; --i)
{
s = b->strings + i;
- /* Every string on a free list should have NULL data pointer. */
- s->data = NULL;
+ /* Every string on a free list is immediate. */
+ s->u.imm.immbit = 1;
+ s->u.imm.gcmarkbit = 0;
NEXT_FREE_LISP_STRING (s) = string_free_list;
string_free_list = s;
}
@@ -2043,13 +2077,11 @@
MALLOC_UNBLOCK_INPUT;
data->string = s;
- s->data = SDATA_DATA (data);
+ s->u.dat.data = SDATA_DATA (data);
#ifdef GC_CHECK_STRING_BYTES
SDATA_NBYTES (data) = nbytes;
#endif
- s->size = nchars;
- s->size_byte = nbytes;
- s->data[nbytes] = '\0';
+ s->u.dat.data[nbytes] = '\0';
#ifdef GC_CHECK_STRING_OVERRUN
memcpy ((char *) data + needed, string_overrun_cookie,
GC_STRING_OVERRUN_COOKIE_SIZE);
@@ -2070,6 +2102,12 @@
total_strings = total_free_strings = 0;
total_string_size = 0;
+#ifdef GC_STRING_STATS
+ total_imm_strings = total_dat_strings = 0;
+ total_imm_bytes = total_dat_bytes = 0;
+ total_imm_intervals = total_dat_intervals = 0;
+#endif
+
/* Scan strings_blocks, free Lisp_Strings that aren't marked. */
for (b = string_blocks; b; b = next)
{
@@ -2082,49 +2120,64 @@
{
struct Lisp_String *s = b->strings + i;
- if (s->data)
+ if (STRING_MARKED_P (s))
+ {
+ /* String is live; unmark it and its intervals. */
+ UNMARK_STRING (s);
+
+ if (!NULL_INTERVAL_P (s->intervals))
+ UNMARK_BALANCE_INTERVALS (s->intervals);
+
+ ++total_strings;
+ total_string_size += string_bytes (s);
+#ifdef GC_STRING_STATS
+ if (s->u.imm.immbit)
+ {
+ total_imm_strings++;
+ total_imm_bytes += string_bytes (s);
+ if (!NULL_INTERVAL_P (s->intervals))
+ total_imm_intervals++;
+ }
+ else
+ {
+ total_dat_strings++;
+ total_dat_bytes += string_bytes (s);
+ if (!NULL_INTERVAL_P (s->intervals))
+ total_dat_intervals++;
+ }
+#endif /* GC_STRING_STATS */
+ }
+ else
{
- /* String was not on free-list before. */
- if (STRING_MARKED_P (s))
- {
- /* String is live; unmark it and its intervals. */
- UNMARK_STRING (s);
-
- if (!NULL_INTERVAL_P (s->intervals))
- UNMARK_BALANCE_INTERVALS (s->intervals);
-
- ++total_strings;
- total_string_size += STRING_BYTES (s);
- }
+ if (s->u.imm.immbit)
+ /* Fill data with special pattern. Used
+ by GC to find dead immediate strings. */
+ memset (s->u.imm.data, 0xff, STRING_IMM_SIZE);
else
{
- /* String is dead. Put it on the free-list. */
- struct sdata *data = SDATA_OF_STRING (s);
+ if (s->u.dat.data)
+ {
+ /* String is dead. Put it on the free-list. */
+ struct sdata *data = SDATA_OF_STRING (s);
- /* Save the size of S in its sdata so that we know
- how large that is. Reset the sdata's string
- back-pointer so that we know it's free. */
+ /* Save the size of S in its sdata so that we know
+ how large that is. Reset the sdata's string
+ back-pointer so that we know it's free. */
#ifdef GC_CHECK_STRING_BYTES
- if (GC_STRING_BYTES (s) != SDATA_NBYTES (data))
- abort ();
+ if (string_bytes (s) != SDATA_NBYTES (data))
+ abort ();
#else
- data->u.nbytes = GC_STRING_BYTES (s);
+ data->u.nbytes = string_bytes (s);
#endif
- data->string = NULL;
-
- /* Reset the strings's `data' member so that we
- know it's free. */
- s->data = NULL;
-
- /* Put the string on the free-list. */
- NEXT_FREE_LISP_STRING (s) = string_free_list;
- string_free_list = s;
- ++nfree;
+ data->string = NULL;
+
+ /* Reset the strings's `data' member so that we
+ know it's free. */
+ s->u.dat.data = NULL;
+ }
}
- }
- else
- {
- /* S was on the free-list before. Put it there again. */
+
+ /* Put the string on the free-list. */
NEXT_FREE_LISP_STRING (s) = string_free_list;
string_free_list = s;
++nfree;
@@ -2216,12 +2269,12 @@
/* Check that the string size recorded in the string is the
same as the one recorded in the sdata structure. */
if (from->string
- && GC_STRING_BYTES (from->string) != SDATA_NBYTES (from))
+ && string_bytes (from->string) != SDATA_NBYTES (from))
abort ();
#endif /* GC_CHECK_STRING_BYTES */
if (from->string)
- nbytes = GC_STRING_BYTES (from->string);
+ nbytes = string_bytes (from->string);
else
nbytes = SDATA_NBYTES (from);
@@ -2257,7 +2310,7 @@
{
eassert (tb != b || to < from);
memmove (to, from, nbytes + GC_STRING_EXTRA);
- to->string->data = SDATA_DATA (to);
+ to->string->u.dat.data = SDATA_DATA (to);
}
/* Advance past the sdata we copied to. */
@@ -2497,8 +2550,25 @@
return empty_multibyte_string;
s = allocate_string ();
+ s->u.imm.gcmarkbit = 0;
+
+ if (nbytes < STRING_IMM_SIZE)
+ {
+ s->u.imm.immbit = 1;
+ s->u.imm.data[nbytes] = '\0';
+ s->u.imm.size = nchars;
+ s->u.imm.size_byte = nbytes;
+ }
+ else
+ {
+ s->u.imm.immbit = 0;
+ s->u.dat.data = NULL;
+ s->u.dat.size = nchars;
+ s->u.dat.size_byte = nbytes;
+ allocate_string_data (s, nchars, nbytes);
+ }
+
s->intervals = NULL_INTERVAL;
- allocate_string_data (s, nchars, nbytes);
XSETSTRING (string, s);
string_chars_consed += nbytes;
return string;
@@ -4196,6 +4266,22 @@
x->color = MEM_BLACK;
}
+/* Non-zero if data of S is valid. */
+
+static inline int
+live_string_data_p (struct Lisp_String *s)
+{
+ if (s->u.imm.immbit)
+ {
+ unsigned char *p;
+
+ for (p = s->u.imm.data; p < s->u.imm.data + STRING_IMM_SIZE; p++)
+ if (*p != 0xff)
+ return 1;
+ return 0;
+ }
+ return s->u.dat.data != NULL;
+}
/* Value is non-zero if P is a pointer to a live Lisp string on
the heap. M is a pointer to the mem_block for P. */
@@ -4213,7 +4299,7 @@
return (offset >= 0
&& offset % sizeof b->strings[0] == 0
&& offset < (STRING_BLOCK_SIZE * sizeof b->strings[0])
- && ((struct Lisp_String *) p)->data != NULL);
+ && live_string_data_p ((struct Lisp_String *) p));
}
else
return 0;
@@ -5159,15 +5245,29 @@
struct Lisp_String *s;
s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String);
- s->data = (unsigned char *) find_string_data_in_pure (data, nbytes);
- if (s->data == NULL)
- {
- s->data = (unsigned char *) pure_alloc (nbytes + 1, -1);
- memcpy (s->data, data, nbytes);
- s->data[nbytes] = '\0';
- }
- s->size = nchars;
- s->size_byte = multibyte ? nbytes : -1;
+
+ if (nbytes < STRING_IMM_SIZE)
+ {
+ memcpy (s->u.imm.data, data, nbytes);
+ s->u.imm.data[nbytes] = '\0';
+ s->u.imm.immbit = 1;
+ s->u.imm.size = nchars;
+ s->u.imm.size_byte = multibyte ? nbytes : STRING_UNIBYTE_IMM_MARK;
+ }
+ else
+ {
+ s->u.dat.data = (unsigned char *) find_string_data_in_pure (data, nbytes);
+ if (s->u.dat.data == NULL)
+ {
+ s->u.dat.data = (unsigned char *) pure_alloc (nbytes + 1, -1);
+ memcpy (s->u.dat.data, data, nbytes);
+ s->u.dat.data[nbytes] = '\0';
+ }
+ s->u.imm.immbit = 0;
+ s->u.dat.size = nchars;
+ s->u.dat.size_byte = multibyte ? nbytes : STRING_UNIBYTE_DAT_MARK;
+ }
+
s->intervals = NULL_INTERVAL;
XSETSTRING (string, s);
return string;
@@ -5184,9 +5284,23 @@
ptrdiff_t nchars = strlen (data);
s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String);
- s->size = nchars;
- s->size_byte = -1;
- s->data = (unsigned char *) data;
+
+ if (nchars < STRING_IMM_SIZE)
+ {
+ memcpy (s->u.imm.data, data, nchars);
+ s->u.imm.data[nchars] = '\0';
+ s->u.imm.immbit = 1;
+ s->u.imm.size = nchars;
+ s->u.imm.size_byte = STRING_UNIBYTE_IMM_MARK;
+ }
+ else
+ {
+ s->u.dat.data = (unsigned char *) data;
+ s->u.imm.immbit = 0;
+ s->u.dat.size = nchars;
+ s->u.dat.size_byte = STRING_UNIBYTE_DAT_MARK;
+ }
+
s->intervals = NULL_INTERVAL;
XSETSTRING (string, s);
return string;
@@ -6587,6 +6701,34 @@
return Flist (8, consed);
}
+#ifdef GC_STRING_STATS
+
+DEFUN ("string-stats", Fstring_stats, Sstring_stats, 0, 0, 0,
+ doc: /* Return a list of counters that measures how much
+strings of a particular internal structure are alive after last
+garbage collection, and how many bytes are in them.
+The elements of the value are are as follows:
+ (IMM-STRINGS IMM-BYTES IMM-INTERVALS DAT-STRINGS DAT-BYTES DAT-INTERVALS)
+where IMM-STRINGS is the number of immediate strings, IMM-BYTES is the total
+number of bytes in them, and IMM-INTERVALS is the number of immediate string
+with non-nil text properties. The rest three numbers has the same meaning
+for normal strings, respectively. */)
+ (void)
+{
+ Lisp_Object data[6];
+
+ data[0] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_strings));
+ data[1] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_bytes));
+ data[2] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_intervals));
+ data[3] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_strings));
+ data[4] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_bytes));
+ data[5] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_intervals));
+
+ return Flist (6, data);
+}
+
+#endif /* GC_STRING_STATS */
+
/* Find at most FIND_MAX symbols which have OBJ as their value or
function. This is used in gdbinit's `xwhichsymbols' command. */
@@ -6815,7 +6957,9 @@
defsubr (&Sgarbage_collect);
defsubr (&Smemory_limit);
defsubr (&Smemory_use_counts);
-
+#ifdef GC_STRING_STATS
+ defsubr (&Sstring_stats);
+#endif
#if GC_MARK_STACK == GC_USE_GCPROS_CHECK_ZOMBIES
defsubr (&Sgc_status);
#endif
=== modified file 'src/fns.c'
--- src/fns.c 2012-06-28 07:50:27 +0000
+++ src/fns.c 2012-07-04 08:09:13 +0000
@@ -2166,8 +2166,8 @@
int len = CHAR_STRING (charval, str);
ptrdiff_t size_byte = SBYTES (array);
- if (INT_MULTIPLY_OVERFLOW (SCHARS (array), len)
- || SCHARS (array) * len != size_byte)
+ if (INT_MULTIPLY_OVERFLOW (size, len)
+ || size * len != size_byte)
error ("Attempt to change byte length of a string");
for (idx = 0; idx < size_byte; idx++)
*p++ = str[idx % len];
=== modified file 'src/lisp.h'
--- src/lisp.h 2012-07-03 20:34:47 +0000
+++ src/lisp.h 2012-07-04 08:09:13 +0000
@@ -69,7 +69,8 @@
BITS_PER_SHORT = CHAR_BIT * sizeof (short),
BITS_PER_INT = CHAR_BIT * sizeof (int),
BITS_PER_LONG = CHAR_BIT * sizeof (long int),
- BITS_PER_EMACS_INT = CHAR_BIT * sizeof (EMACS_INT)
+ BITS_PER_EMACS_INT = CHAR_BIT * sizeof (EMACS_INT),
+ BITS_PER_PTRDIFF_T = CHAR_BIT * sizeof (ptrdiff_t)
};
/* printmax_t and uprintmax_t are types for printing large integers.
@@ -576,23 +577,6 @@
eassert ((IDX) >= 0 && (IDX) < ASIZE (ARRAY)), \
AREF ((ARRAY), (IDX)) = (VAL))
-/* Convenience macros for dealing with Lisp strings. */
-
-#define SDATA(string) (XSTRING (string)->data + 0)
-#define SREF(string, index) (SDATA (string)[index] + 0)
-#define SSET(string, index, new) (SDATA (string)[index] = (new))
-#define SCHARS(string) (XSTRING (string)->size + 0)
-#define SBYTES(string) (STRING_BYTES (XSTRING (string)) + 0)
-
-/* Avoid "differ in sign" warnings. */
-#define SSDATA(x) ((char *) SDATA (x))
-
-#define STRING_SET_CHARS(string, newsize) \
- (XSTRING (string)->size = (newsize))
-
-#define STRING_COPYIN(string, index, new, count) \
- memcpy (SDATA (string) + index, new, count)
-
/* Type checking. */
#define CHECK_TYPE(ok, Qxxxp, x) \
@@ -678,24 +662,40 @@
#define CDR_SAFE(c) \
(CONSP ((c)) ? XCDR ((c)) : Qnil)
+/* Convenience macros for dealing with Lisp strings. */
+
+#define SDATA(string) (XSTRING (string)->u.imm.immbit ? \
+ (XSTRING (string)->u.imm.data) : \
+ (XSTRING (string)->u.dat.data))
+#define SREF(string, index) (SDATA (string)[index] + 0)
+#define SSET(string, index, new) (SDATA (string)[index] = (new))
+#define SCHARS(string) (XSTRING (string)->u.imm.immbit ? \
+ (XSTRING (string)->u.imm.size) : \
+ (XSTRING (string)->u.dat.size))
+#define SBYTES(string) (string_bytes (XSTRING (string)))
+
+/* Avoid "differ in sign" warnings. */
+#define SSDATA(x) ((char *) SDATA (x))
+
+#define STRING_SET_CHARS(string, newsize) \
+ (XSTRING (string)->u.imm.immbit ? \
+ (XSTRING (string)->u.imm.size = (newsize)) : \
+ (XSTRING (string)->u.dat.size = (newsize)))
+
+#define STRING_COPYIN(string, index, new, count) \
+ memcpy (SDATA (string) + index, new, count)
+
+/* For unibyte immediate string, SIZE_BYTE field is always set to this. */
+#define STRING_UNIBYTE_IMM_MARK ((1 << (BITS_PER_CHAR - 1)) - 1)
+
+/* For unibyte normal string, SIZE_BYTE field is always set to this. */
+#define STRING_UNIBYTE_DAT_MARK ((1UL << (BITS_PER_PTRDIFF_T - 1)) - 1)
+
/* Nonzero if STR is a multibyte string. */
-#define STRING_MULTIBYTE(STR) \
- (XSTRING (STR)->size_byte >= 0)
-
-/* Return the length in bytes of STR. */
-
-#ifdef GC_CHECK_STRING_BYTES
-
-struct Lisp_String;
-extern ptrdiff_t string_bytes (struct Lisp_String *);
-#define STRING_BYTES(S) string_bytes ((S))
-
-#else /* not GC_CHECK_STRING_BYTES */
-
-#define STRING_BYTES(STR) \
- ((STR)->size_byte < 0 ? (STR)->size : (STR)->size_byte)
-
-#endif /* not GC_CHECK_STRING_BYTES */
+#define STRING_MULTIBYTE(string) \
+ (XSTRING (string)->u.imm.immbit ? \
+ (XSTRING (string)->u.imm.size_byte != STRING_UNIBYTE_IMM_MARK) : \
+ (XSTRING (string)->u.dat.size_byte != STRING_UNIBYTE_DAT_MARK))
/* An upper bound on the number of bytes in a Lisp string, not
counting the terminating null. This a tight enough bound to
@@ -708,21 +708,33 @@
would expose alloc.c internal details that we'd rather keep
private. The cast to ptrdiff_t ensures that STRING_BYTES_BOUND is
signed. */
-#define STRING_BYTES_BOUND \
- min (MOST_POSITIVE_FIXNUM, (ptrdiff_t) min (SIZE_MAX, PTRDIFF_MAX) - 1)
+#define STRING_BYTES_BOUND \
+ min (MOST_POSITIVE_FIXNUM, \
+ (ptrdiff_t) min (SIZE_MAX, STRING_UNIBYTE_DAT_MARK - 1) - 1)
+
+/* Maximum amount of bytes, including '\0', in an immediate string. */
+#define STRING_IMM_SIZE (sizeof (void *) + 2 * sizeof (TYPE_PTRDIFF_T) - 2)
/* Mark STR as a unibyte string. */
#define STRING_SET_UNIBYTE(STR) \
- do { if (EQ (STR, empty_multibyte_string)) \
- (STR) = empty_unibyte_string; \
- else XSTRING (STR)->size_byte = -1; } while (0)
+ do { if (EQ (STR, empty_multibyte_string)) \
+ (STR) = empty_unibyte_string; \
+ else if (XSTRING (STR)->u.imm.immbit) \
+ XSTRING (STR)->u.imm.size_byte = STRING_UNIBYTE_IMM_MARK; \
+ else \
+ XSTRING (STR)->u.dat.size_byte = STRING_UNIBYTE_DAT_MARK; \
+ } while (0)
/* Mark STR as a multibyte string. Assure that STR contains only
ASCII characters in advance. */
-#define STRING_SET_MULTIBYTE(STR) \
- do { if (EQ (STR, empty_unibyte_string)) \
- (STR) = empty_multibyte_string; \
- else XSTRING (STR)->size_byte = XSTRING (STR)->size; } while (0)
+#define STRING_SET_MULTIBYTE(STR) \
+ do { if (EQ (STR, empty_unibyte_string)) \
+ (STR) = empty_multibyte_string; \
+ else if (XSTRING (STR)->u.imm.immbit) \
+ XSTRING (STR)->u.imm.size_byte = XSTRING (STR)->u.imm.size; \
+ else \
+ XSTRING (STR)->u.dat.size_byte = XSTRING (STR)->u.dat.size; \
+ } while (0)
/* Get text properties. */
#define STRING_INTERVALS(STR) (XSTRING (STR)->intervals + 0)
@@ -730,15 +742,54 @@
/* Set text properties. */
#define STRING_SET_INTERVALS(STR, INT) (XSTRING (STR)->intervals = (INT))
-/* In a string or vector, the sign bit of the `size' is the gc mark bit */
-
struct Lisp_String
- {
- ptrdiff_t size;
- ptrdiff_t size_byte;
- INTERVAL intervals; /* text properties in this string */
- unsigned char *data;
- };
+{
+ /* Text properties in this string. Should be the first
+ member since NEXT_FREE_LISP_STRING from alloc.c uses it. */
+ INTERVAL intervals;
+
+ union {
+ /* GC mark bit and subtype bit are in IMM just by convention - when
+ IMMBIT is 0, the DAT field is used except it's UNUSED field. */
+ struct {
+ unsigned immbit : 1;
+ unsigned size : BITS_PER_CHAR - 1;
+ unsigned char data[STRING_IMM_SIZE];
+ unsigned size_byte : BITS_PER_CHAR - 1;
+ unsigned gcmarkbit : 1;
+ } imm;
+
+ struct {
+ unsigned immbit : 1;
+ unsigned TYPE_PTRDIFF_T size : BITS_PER_PTRDIFF_T - 1;
+ unsigned char *data;
+ unsigned TYPE_PTRDIFF_T size_byte : BITS_PER_PTRDIFF_T - 1;
+ unsigned gcmarkbit : 1;
+ } dat;
+ } u;
+};
+
+/* Return the length in bytes of STR. */
+
+#ifdef GC_CHECK_STRING_BYTES
+
+struct Lisp_String;
+extern ptrdiff_t string_bytes (struct Lisp_String *);
+#define STRING_BYTES(S) string_bytes ((S))
+
+#else /* not GC_CHECK_STRING_BYTES */
+
+static inline
+ptrdiff_t string_bytes (struct Lisp_String *s)
+{
+ if (s->u.imm.immbit)
+ return s->u.imm.size_byte == STRING_UNIBYTE_IMM_MARK ?
+ s->u.imm.size : s->u.imm.size_byte;
+ return s->u.dat.size_byte == STRING_UNIBYTE_DAT_MARK ?
+ s->u.dat.size : s->u.dat.size_byte;
+}
+
+#endif /* not GC_CHECK_STRING_BYTES */
/* Header of vector-like objects. This documents the layout constraints on
vectors and pseudovectors other than struct Lisp_Subr. It also prevents
next prev parent reply other threads:[~2012-07-04 8:27 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-22 8:44 Proposal: immediate strings Dmitry Antipov
2012-05-22 20:51 ` Miles Bader
2012-05-22 22:13 ` Paul Eggert
2012-05-24 5:17 ` Stefan Monnier
2012-05-24 5:41 ` Ken Raeburn
2012-05-24 5:50 ` Miles Bader
2012-05-24 6:08 ` Paul Eggert
2012-05-24 7:14 ` Stefan Monnier
2012-05-24 7:52 ` Paul Eggert
2012-05-24 12:51 ` Stefan Monnier
2012-05-24 16:35 ` Paul Eggert
2012-05-25 6:43 ` Dmitry Antipov
2012-05-25 7:30 ` Paul Eggert
2012-05-28 11:32 ` Dmitry Antipov
2012-05-28 14:25 ` Stefan Monnier
2012-05-29 6:55 ` Dmitry Antipov
2012-05-29 7:38 ` Paul Eggert
2012-05-29 13:33 ` Dmitry Antipov
2012-05-29 15:24 ` Paul Eggert
2012-05-31 9:28 ` Dmitry Antipov
2012-05-31 16:34 ` Paul Eggert
2012-06-06 6:14 ` Dmitry Antipov
2012-06-06 6:41 ` Paul Eggert
2012-06-06 7:29 ` Dmitry Antipov
2012-06-06 15:14 ` Eli Zaretskii
2012-06-06 21:44 ` Paul Eggert
2012-07-04 8:27 ` Dmitry Antipov [this message]
2012-07-04 13:08 ` Old topic(s) again [was: Re: Proposal: immediate strings] Stefan Monnier
2012-07-04 19:32 ` Paul Eggert
2012-05-29 7:38 ` Proposal: immediate strings Andreas Schwab
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FF3FE6E.8080202@yandex.ru \
--to=dmantipov@yandex.ru \
--cc=emacs-devel@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).