1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
| | /* Declarations having to do with GNU Emacs syntax tables.
Copyright (C) 1985, 1993-1994, 1997-1998, 2001-2023 Free Software
Foundation, Inc.
This file is part of GNU Emacs.
GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#ifndef EMACS_SYNTAX_H
#define EMACS_SYNTAX_H
#include "buffer.h"
#include "lisp.h"
INLINE_HEADER_BEGIN
extern void update_syntax_table (ptrdiff_t, EMACS_INT, bool, Lisp_Object);
extern void update_syntax_table_forward (ptrdiff_t, bool, Lisp_Object);
/* The standard syntax table is stored where it will automatically
be used in all new buffers. */
#define Vstandard_syntax_table BVAR (&buffer_defaults, syntax_table)
/* A syntax table is a chartable whose elements are cons cells
(CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
is not a kind of parenthesis.
The low 8 bits of CODE+FLAGS is a code, as follows: */
enum syntaxcode
{
Swhitespace, /* for a whitespace character */
Spunct, /* for random punctuation characters */
Sword, /* for a word constituent */
Ssymbol, /* symbol constituent but not word constituent */
Sopen, /* for a beginning delimiter */
Sclose, /* for an ending delimiter */
Squote, /* for a prefix character like Lisp ' */
Sstring, /* for a string-grouping character like Lisp " */
Smath, /* for delimiters like $ in Tex. */
Sescape, /* for a character that begins a C-style escape */
Scharquote, /* for a character that quotes the following character */
Scomment, /* for a comment-starting character */
Sendcomment, /* for a comment-ending character */
Sinherit, /* use the standard syntax table for this character */
Scomment_fence, /* Starts/ends comment which is delimited on the
other side by any char with the same syntaxcode. */
Sstring_fence, /* Starts/ends string which is delimited on the
other side by any char with the same syntaxcode. */
Smax /* Upper bound on codes that are meaningful. */
};
struct gl_state_s
{
Lisp_Object object; /* The object we are scanning. */
ptrdiff_t start; /* Where to stop(?FIXME?). */
ptrdiff_t stop; /* Where to stop. */
bool use_global; /* Whether to use global_code
or c_s_t. */
Lisp_Object global_code; /* Syntax code of current char. */
Lisp_Object current_syntax_table; /* Syntax table for current pos. */
Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
ptrdiff_t b_property; /* First index where c_s_t is valid. */
ptrdiff_t e_property; /* First index where c_s_t is
not valid. */
bool e_property_truncated; /* true if e_property if was truncated
by parse_sexp_propertize_done. */
INTERVAL forward_i; /* Where to start lookup on forward. */
INTERVAL backward_i; /* or backward movement. The
data in c_s_t is valid
between these intervals,
and possibly at the
intervals too, depending
on: */
/* The regexp engine prefers byteoffsets over char positions, so
store those to try and reduce the number of byte<->char conversions.
This is only kept uptodate when used from the regexp engine. */
ptrdiff_t b_re_byte; /* First byteoffset where c_s_t is valid. */
ptrdiff_t e_re_byte; /* First byteoffset where c_s_t is not valid. */
};
extern struct gl_state_s gl_state;
/* Fetch the information from the entry for character C
in the current buffer's syntax table,
or (if VIA_PROPERTY) from globally kept data (gl_state).
Does inheritance. */
INLINE Lisp_Object
syntax_property_entry (int c, bool via_property)
{
if (via_property)
return (gl_state.use_global
? gl_state.global_code
: CHAR_TABLE_REF (gl_state.current_syntax_table, c));
return CHAR_TABLE_REF (BVAR (current_buffer, syntax_table), c);
}
INLINE Lisp_Object
SYNTAX_ENTRY (int c)
{
return syntax_property_entry (c, false);
}
/* Extract the information from the entry for character C
in the current syntax table. */
INLINE int
syntax_property_with_flags (int c, bool via_property)
{
Lisp_Object ent = syntax_property_entry (c, via_property);
return CONSP (ent) ? XFIXNUM (XCAR (ent)) : Swhitespace;
}
INLINE int
SYNTAX_WITH_FLAGS (int c)
{
return syntax_property_with_flags (c, false);
}
INLINE enum syntaxcode
syntax_property (int c, bool via_property)
{
return syntax_property_with_flags (c, via_property) & 0xff;
}
INLINE enum syntaxcode
SYNTAX (int c)
{
return syntax_property (c, false);
}
/* Whether the syntax of the character C has the prefix flag set. */
extern bool syntax_prefix_flag_p (int c);
/* This array, indexed by a character less than 256, contains the
syntax code which that character signifies (as an unsigned char).
For example, syntax_spec_code['w'] == Sword. */
extern unsigned char const syntax_spec_code[0400];
/* Convert the BYTEOFFSET into a character position, for the object
recorded in gl_state with RE_SETUP_SYNTAX_TABLE_FOR_OBJECT. */
INLINE ptrdiff_t
RE_SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t byteoffset)
{
eassert (parse_sexp_lookup_properties);
return (STRINGP (gl_state.object)
? string_byte_to_char (gl_state.object, byteoffset)
: BUFFERP (gl_state.object)
? ((buf_bytepos_to_charpos
(XBUFFER (gl_state.object),
(byteoffset + BUF_BEGV_BYTE (XBUFFER (gl_state.object))))))
: NILP (gl_state.object)
? BYTE_TO_CHAR (byteoffset + BEGV_BYTE)
: byteoffset);
}
INLINE ptrdiff_t
RE_SYNTAX_TABLE_CHAR_TO_BYTE (ptrdiff_t charpos)
{
eassert (parse_sexp_lookup_properties);
return (STRINGP (gl_state.object)
? string_char_to_byte (gl_state.object, charpos)
: BUFFERP (gl_state.object)
? ((buf_charpos_to_bytepos
(XBUFFER (gl_state.object), charpos)
- BUF_BEGV_BYTE (XBUFFER (gl_state.object))))
: NILP (gl_state.object)
? CHAR_TO_BYTE (charpos) - BEGV_BYTE
: charpos);
}
static void re_update_byteoffsets (void)
{
gl_state.b_re_byte = RE_SYNTAX_TABLE_CHAR_TO_BYTE (gl_state.b_property);
eassert (gl_state.b_property
== RE_SYNTAX_TABLE_BYTE_TO_CHAR (gl_state.b_re_byte));
/* `e_property` is often set to EOB+1 (or to some value
much further than `stop` in narrowed buffers). */
gl_state.e_re_byte
= gl_state.e_property > gl_state.stop
? 1 + RE_SYNTAX_TABLE_CHAR_TO_BYTE (gl_state.stop)
: RE_SYNTAX_TABLE_CHAR_TO_BYTE (gl_state.e_property);
eassert (gl_state.e_property > gl_state.stop
? gl_state.e_property
>= 1 + RE_SYNTAX_TABLE_BYTE_TO_CHAR (gl_state.e_re_byte - 1)
: gl_state.e_property
== RE_SYNTAX_TABLE_BYTE_TO_CHAR (gl_state.e_re_byte));
}
/* The regexp-engine doesn't keep track of char positions, but instead
uses byteoffsets, so `syntax.c` uses `UPDATE_SYNTAX_TABLE_*` functions,
passing them `charpos`s whereas `regexp.c` uses `RE_UPDATE_SYNTAX_TABLE_*`
functions, passing them byteoffsets. */
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
currently good for a position before CHARPOS. */
INLINE void
UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos)
{ /* Performs just-in-time syntax-propertization. */
if (parse_sexp_lookup_properties && charpos >= gl_state.e_property)
update_syntax_table_forward (charpos, false, gl_state.object);
}
INLINE void
RE_UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t byteoffset)
{ /* Performs just-in-time syntax-propertization. */
if (!parse_sexp_lookup_properties)
return;
eassert (gl_state.e_re_byte >= 0); /* gl_state.b_re_byte can be negative. */
if (byteoffset >= gl_state.e_re_byte)
{
ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (byteoffset);
eassert (charpos >= gl_state.e_property);
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
re_update_byteoffsets ();
}
}
INLINE void
RE_UPDATE_SYNTAX_TABLE_FORWARD_BEFORE (ptrdiff_t byteoffset)
{ /* Performs just-in-time syntax-propertization. */
if (!parse_sexp_lookup_properties)
return;
eassert (gl_state.e_re_byte >= 0); /* gl_state.b_re_byte can be negative. */
if (byteoffset > gl_state.e_re_byte)
{
ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (byteoffset) - 1;
eassert (charpos >= gl_state.e_property);
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
re_update_byteoffsets ();
}
}
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
currently good for a position after CHARPOS. */
INLINE void
UPDATE_SYNTAX_TABLE_BACKWARD (ptrdiff_t charpos)
{
if (parse_sexp_lookup_properties && charpos < gl_state.b_property)
update_syntax_table (charpos, -1, false, gl_state.object);
}
INLINE void
RE_UPDATE_SYNTAX_TABLE_BACKWARD (ptrdiff_t byteoffset)
{
if (!parse_sexp_lookup_properties)
return;
eassert (gl_state.e_re_byte >= 0); /* gl_state.b_re_byte can be negative. */
if (byteoffset < gl_state.b_re_byte)
{
ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (byteoffset);
eassert (charpos < gl_state.b_property);
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
re_update_byteoffsets ();
}
}
INLINE void
RE_UPDATE_SYNTAX_TABLE_BACKWARD_BEFORE (ptrdiff_t byteoffset)
{
if (!parse_sexp_lookup_properties)
return;
eassert (gl_state.e_re_byte >= 0); /* gl_state.b_re_byte can be negative. */
if (byteoffset <= gl_state.b_re_byte)
{
ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (byteoffset);
eassert (charpos <= gl_state.b_property);
UPDATE_SYNTAX_TABLE_FORWARD (charpos - 1);
re_update_byteoffsets ();
}
}
/* Make syntax table good for CHARPOS. */
INLINE void
UPDATE_SYNTAX_TABLE (ptrdiff_t charpos)
{
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
}
INLINE void
RE_UPDATE_SYNTAX_TABLE (ptrdiff_t byteoffset)
{
RE_UPDATE_SYNTAX_TABLE_BACKWARD (byteoffset);
RE_UPDATE_SYNTAX_TABLE_FORWARD (byteoffset);
}
INLINE void
RE_UPDATE_SYNTAX_TABLE_BEFORE (ptrdiff_t byteoffset)
{
RE_UPDATE_SYNTAX_TABLE_BACKWARD_BEFORE (byteoffset);
RE_UPDATE_SYNTAX_TABLE_FORWARD_BEFORE (byteoffset);
}
/* Set up the buffer-global syntax table. */
INLINE void
SETUP_BUFFER_SYNTAX_TABLE (void)
{
gl_state.use_global = false;
gl_state.e_property_truncated = false;
gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
}
extern ptrdiff_t scan_words (ptrdiff_t, EMACS_INT);
extern void RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object, ptrdiff_t);
INLINE_HEADER_END
#endif /* EMACS_SYNTAX_H */
|