From: npostavs@users.sourceforge.net
To: Eli Zaretskii <eliz@gnu.org>
Cc: sam.halliday@gmail.com, 24358@debbugs.gnu.org
Subject: bug#24358: 25.1.50; re-search-forward errors with "Variable binding depth exceeds max-specpdl-size"
Date: Mon, 24 Oct 2016 22:00:02 -0400 [thread overview]
Message-ID: <878ttdrxwd.fsf@users.sourceforge.net> (raw)
In-Reply-To: <8337jl4tes.fsf@gnu.org> (Eli Zaretskii's message of "Mon, 24 Oct 2016 19:13:15 +0300")
[-- Attachment #1: Type: text/plain, Size: 432 bytes --]
Eli Zaretskii <eliz@gnu.org> writes:
>> Oh, I see. Yes, I think you're right, the pointers stored in regstart,
>> regend, and fail_stack could become inconsistent. Hard to say what
>> kind of regex could trigger it, but it seems quite possible.
>
> Maybe we should simply call r_alloc_inhibit_buffer_relocation around
> any calls to regex.c functions, and remove the code you wrote to
> handle relocations internally.
How's this:
[-- Attachment #2: reverting patch --]
[-- Type: text/plain, Size: 12015 bytes --]
From c3e87ee1e1399f3687db02dc71b13830852a50eb Mon Sep 17 00:00:00 2001
From: Noam Postavsky <npostavs@gmail.com>
Date: Mon, 24 Oct 2016 19:54:29 -0400
Subject: [PATCH v4 1/2] Revert fixes to allocation of regex matching
The fix was not complete, and completing it was proving too complicated.
- Revert "* src/regex.c (re_search_2): Make new code safe for -Wjump-misses-init."
This reverts commit c2a17924a57483d14692c8913edbe8ad24b5ffbb.
- Revert "Port to GCC 6.2.1 + --enable-gcc-warnings"
This reverts commit f6134bbda259c115c06d4a9a3ab5c39340a15949.
- Revert "Fix handling of allocation in regex matching"
This reverts commit ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897.
- Revert "Fix handling of buffer relocation in regex.c functions"
This reverts commit ee04aedc723b035eedaf975422d4eb242894121b.
---
src/dired.c | 4 +---
src/regex.c | 73 ------------------------------------------------------------
src/regex.h | 4 +---
src/search.c | 40 ++++++++++-----------------------
4 files changed, 14 insertions(+), 107 deletions(-)
diff --git a/src/dired.c b/src/dired.c
index 006f74c..dba575c 100644
--- a/src/dired.c
+++ b/src/dired.c
@@ -259,11 +259,9 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full,
QUIT;
bool wanted = (NILP (match)
- || (re_match_object = name,
- re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0));
+ || re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0);
immediate_quit = 0;
- re_match_object = Qnil; /* Stop protecting name from GC. */
if (wanted)
{
diff --git a/src/regex.c b/src/regex.c
index b12e95b..56b18e6 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -1438,62 +1438,11 @@ WEAK_ALIAS (__re_set_syntax, re_set_syntax)
#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
#define TOP_FAILURE_HANDLE() fail_stack.frame
-#ifdef emacs
-# define STR_BASE_PTR(obj) \
- (NILP (obj) ? current_buffer->text->beg \
- : STRINGP (obj) ? SDATA (obj) \
- : NULL)
-#else
-# define STR_BASE_PTR(obj) NULL
-#endif
#define ENSURE_FAIL_STACK(space) \
while (REMAINING_AVAIL_SLOTS <= space) { \
- re_char *orig_base = STR_BASE_PTR (re_match_object); \
- bool might_relocate = orig_base != NULL; \
- ptrdiff_t string1_off, end1_off, end_match_1_off; \
- ptrdiff_t string2_off, end2_off, end_match_2_off; \
- ptrdiff_t d_off, dend_off, dfail_off; \
- if (might_relocate) \
- { \
- if (string1) \
- { \
- string1_off = string1 - orig_base; \
- end1_off = end1 - orig_base; \
- end_match_1_off = end_match_1 - orig_base; \
- } \
- if (string2) \
- { \
- string2_off = string2 - orig_base; \
- end2_off = end2 - orig_base; \
- end_match_2_off = end_match_2 - orig_base; \
- } \
- d_off = d - orig_base; \
- dend_off = dend - orig_base; \
- dfail_off = dfail - orig_base; \
- } \
if (!GROW_FAIL_STACK (fail_stack)) \
return -2; \
- /* In Emacs, GROW_FAIL_STACK might relocate string pointers. */ \
- if (might_relocate) \
- { \
- re_char *new_base = STR_BASE_PTR (re_match_object); \
- if (string1) \
- { \
- string1 = new_base + string1_off; \
- end1 = new_base + end1_off; \
- end_match_1 = new_base + end_match_1_off; \
- } \
- if (string2) \
- { \
- string2 = new_base + string2_off; \
- end2 = new_base + end2_off; \
- end_match_2 = new_base + end_match_2_off; \
- } \
- d = new_base + d_off; \
- dend = new_base + dend_off; \
- dfail = new_base + dfail_off; \
- } \
DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\
DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
}
@@ -4380,10 +4329,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
/* Loop through the string, looking for a place to start matching. */
for (;;)
{
- ptrdiff_t offset1, offset2;
- re_char *orig_base;
- bool might_relocate;
-
/* If the pattern is anchored,
skip quickly past places we cannot match.
We don't bother to treat startpos == 0 specially
@@ -4500,17 +4445,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
&& !bufp->can_be_null)
return -1;
- /* re_match_2_internal may allocate, relocating the Lisp text
- object that we're searching. */
- IF_LINT (offset2 = 0); /* Work around GCC bug 78081. */
- orig_base = STR_BASE_PTR (re_match_object);
- might_relocate = orig_base != NULL;
- if (might_relocate)
- {
- if (string1) offset1 = string1 - orig_base;
- if (string2) offset2 = string2 - orig_base;
- }
-
val = re_match_2_internal (bufp, string1, size1, string2, size2,
startpos, regs, stop);
@@ -4520,13 +4454,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
if (val == -2)
return -2;
- if (might_relocate)
- {
- re_char *new_base = STR_BASE_PTR (re_match_object);
- if (string1) string1 = offset1 + new_base;
- if (string2) string2 = offset2 + new_base;
- }
-
advance:
if (!range)
break;
diff --git a/src/regex.h b/src/regex.h
index 61c771c..51f4424 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -169,9 +169,7 @@ extern reg_syntax_t re_syntax_options;
#ifdef emacs
# include "lisp.h"
/* In Emacs, this is the string or buffer in which we are matching.
- It is used for looking up syntax properties, and also to recompute
- pointers in case the object is relocated as a side effect of
- calling malloc (if it calls r_alloc_sbrk in ralloc.c).
+ It is used for looking up syntax properties.
If the value is a Lisp string object, we are matching text in that
string; if it's nil, we are matching text in the current buffer; if
diff --git a/src/search.c b/src/search.c
index b50e7f0..fa5ac44 100644
--- a/src/search.c
+++ b/src/search.c
@@ -287,10 +287,8 @@ looking_at_1 (Lisp_Object string, bool posix)
immediate_quit = 1;
QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
- /* Get pointers and sizes of the two strings that make up the
- visible portion of the buffer. Note that we can use pointers
- here, unlike in search_buffer, because we only call re_match_2
- once, after which we never use the pointers again. */
+ /* Get pointers and sizes of the two strings
+ that make up the visible portion of the buffer. */
p1 = BEGV_ADDR;
s1 = GPT_BYTE - BEGV_BYTE;
@@ -409,7 +407,6 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
(NILP (Vinhibit_changing_match_data)
? &search_regs : NULL));
immediate_quit = 0;
- re_match_object = Qnil; /* Stop protecting string from GC. */
/* Set last_thing_searched only when match data is changed. */
if (NILP (Vinhibit_changing_match_data))
@@ -480,7 +477,6 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
SBYTES (string), 0,
SBYTES (string), 0);
immediate_quit = 0;
- re_match_object = Qnil; /* Stop protecting string from GC. */
return val;
}
@@ -568,7 +564,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
pos_byte, NULL, limit_byte);
immediate_quit = 0;
- re_match_object = Qnil; /* Stop protecting string from GC. */
return len;
}
@@ -1183,8 +1178,8 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
{
- unsigned char *base;
- ptrdiff_t off1, off2, s1, s2;
+ unsigned char *p1, *p2;
+ ptrdiff_t s1, s2;
struct re_pattern_buffer *bufp;
bufp = compile_pattern (string,
@@ -1198,19 +1193,16 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
can take too long. */
QUIT; /* Do a pending quit right away,
to avoid paradoxical behavior */
- /* Get offsets and sizes of the two strings that make up the
- visible portion of the buffer. We compute offsets instead of
- pointers because re_search_2 may call malloc and therefore
- change the buffer text address. */
+ /* Get pointers and sizes of the two strings
+ that make up the visible portion of the buffer. */
- base = current_buffer->text->beg;
- off1 = BEGV_ADDR - base;
+ p1 = BEGV_ADDR;
s1 = GPT_BYTE - BEGV_BYTE;
- off2 = GAP_END_ADDR - base;
+ p2 = GAP_END_ADDR;
s2 = ZV_BYTE - GPT_BYTE;
if (s1 < 0)
{
- off2 = off1;
+ p2 = p1;
s2 = ZV_BYTE - BEGV_BYTE;
s1 = 0;
}
@@ -1225,16 +1217,12 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
{
ptrdiff_t val;
- val = re_search_2 (bufp,
- (char*) (base + off1), s1,
- (char*) (base + off2), s2,
+ val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
(NILP (Vinhibit_changing_match_data)
? &search_regs : &search_regs_1),
/* Don't allow match past current point */
pos_byte - BEGV_BYTE);
- /* Update 'base' due to possible relocation inside re_search_2. */
- base = current_buffer->text->beg;
if (val == -2)
{
matcher_overflow ();
@@ -1274,15 +1262,11 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
{
ptrdiff_t val;
- val = re_search_2 (bufp,
- (char*) (base + off1), s1,
- (char*) (base + off2), s2,
- pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+ val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ pos_byte - BEGV_BYTE, lim_byte - pos_byte,
(NILP (Vinhibit_changing_match_data)
? &search_regs : &search_regs_1),
lim_byte - BEGV_BYTE);
- /* Update 'base' due to possible relocation inside re_search_2. */
- base = current_buffer->text->beg;
if (val == -2)
{
matcher_overflow ();
--
2.9.3
[-- Attachment #3: patch --]
[-- Type: text/plain, Size: 2823 bytes --]
From 79806c03ad9f45d8ed016a06f5e24cf69df85ad2 Mon Sep 17 00:00:00 2001
From: Noam Postavsky <npostavs@gmail.com>
Date: Mon, 24 Oct 2016 21:22:07 -0400
Subject: [PATCH v4 2/2] Inhibit buffer relocation during regex searches
* src/search.c (looking_at_1, fast_looking_at, search_buffer): Prevent
relocation of buffer contents during calls to re_search_2. This ensures
the pointers into buffer text won't be invalidated by
r_alloc_sbrk (called from malloc with configurations where
REL_ALLOC=yes).
---
src/search.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/src/search.c b/src/search.c
index fa5ac44..15504be 100644
--- a/src/search.c
+++ b/src/search.c
@@ -308,12 +308,20 @@ looking_at_1 (Lisp_Object string, bool posix)
re_match_object = Qnil;
+#ifdef REL_ALLOC
+ /* Prevent ralloc.c from relocating the current buffer while
+ searching it. */
+ r_alloc_inhibit_buffer_relocation (1);
+#endif
i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
PT_BYTE - BEGV_BYTE,
(NILP (Vinhibit_changing_match_data)
? &search_regs : NULL),
ZV_BYTE - BEGV_BYTE);
immediate_quit = 0;
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
if (i == -2)
matcher_overflow ();
@@ -561,8 +569,16 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
immediate_quit = 1;
+#ifdef REL_ALLOC
+ /* Prevent ralloc.c from relocating the current buffer while
+ searching it. */
+ r_alloc_inhibit_buffer_relocation (1);
+#endif
len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
pos_byte, NULL, limit_byte);
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
immediate_quit = 0;
return len;
@@ -1213,6 +1229,12 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
}
re_match_object = Qnil;
+#ifdef REL_ALLOC
+ /* Prevent ralloc.c from relocating the current buffer while
+ searching it. */
+ r_alloc_inhibit_buffer_relocation (1);
+#endif
+
while (n < 0)
{
ptrdiff_t val;
@@ -1254,6 +1276,9 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
else
{
immediate_quit = 0;
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
return (n);
}
n++;
@@ -1296,11 +1321,17 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
else
{
immediate_quit = 0;
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
return (0 - n);
}
n--;
}
immediate_quit = 0;
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
return (pos);
}
else /* non-RE case */
--
2.9.3
[-- Attachment #4: Type: text/plain, Size: 336 bytes --]
I did 'make extraclean && . ../config-debug.sh && make' (all in one, to
ensure I didn't forget to clean) and it succeeded.
>
> I'm sorry I didn't look at this closer before, and thus caused you to
> work on all those changes, which we might now need to revert.
Yes, that's how hindsight works, you only get it when it's too late. ;)
next prev parent reply other threads:[~2016-10-25 2:00 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-26 20:17 bug#24315: 25.1.50; re-search-forward errors with "Variable binding depth exceeds max-specpdl-size" Peder O. Klingenberg
2016-08-27 3:35 ` npostavs
2016-08-30 13:09 ` Peder O. Klingenberg
2016-09-02 1:58 ` npostavs
2016-09-02 13:45 ` Peder O. Klingenberg
2016-09-03 14:21 ` npostavs
2016-09-06 8:18 ` Peder O. Klingenberg
2016-09-07 23:27 ` npostavs
2016-09-03 15:43 ` bug#24358: " npostavs
2016-10-08 0:29 ` npostavs
2016-10-08 5:55 ` Eli Zaretskii
2016-10-08 13:45 ` npostavs
2016-10-08 14:39 ` Eli Zaretskii
2016-10-08 14:47 ` Eli Zaretskii
2016-10-08 16:57 ` npostavs
2016-10-08 17:23 ` Eli Zaretskii
2016-10-08 18:52 ` npostavs
2016-10-08 19:47 ` Eli Zaretskii
2016-10-08 20:55 ` npostavs
2016-10-09 6:52 ` Eli Zaretskii
2016-10-13 1:29 ` npostavs
2016-10-13 6:19 ` Eli Zaretskii
2016-10-14 2:19 ` npostavs
2016-10-14 7:02 ` Eli Zaretskii
2016-10-19 3:11 ` npostavs
2016-10-19 7:02 ` Eli Zaretskii
2016-10-19 12:29 ` npostavs
2016-10-19 14:37 ` Eli Zaretskii
2016-10-20 4:31 ` npostavs
2016-10-20 8:39 ` Eli Zaretskii
2016-10-21 1:22 ` npostavs
2016-10-21 7:17 ` Eli Zaretskii
2016-10-22 2:36 ` npostavs
2016-10-22 21:54 ` Sam Halliday
2016-10-22 22:46 ` npostavs
2016-10-23 6:41 ` Eli Zaretskii
2016-10-23 8:57 ` Sam Halliday
2016-10-23 9:19 ` Eli Zaretskii
2016-10-23 13:40 ` Sam Halliday
2016-10-23 14:07 ` Eli Zaretskii
2016-10-23 15:42 ` Sam Halliday
2016-10-23 15:48 ` Eli Zaretskii
2016-10-23 15:58 ` Sam Halliday
2016-10-23 15:58 ` Sam Halliday
2016-10-23 16:44 ` Eli Zaretskii
2016-10-23 17:19 ` Eli Zaretskii
2016-10-23 18:06 ` Eli Zaretskii
2016-10-23 18:14 ` Noam Postavsky
2016-10-23 19:18 ` Eli Zaretskii
2016-10-24 13:29 ` npostavs
2016-10-24 13:39 ` Eli Zaretskii
2016-10-24 15:33 ` Noam Postavsky
2016-10-24 16:13 ` Eli Zaretskii
2016-10-25 2:00 ` npostavs [this message]
2016-10-25 16:03 ` Eli Zaretskii
2016-10-26 0:16 ` npostavs
2016-10-24 13:43 ` Eli Zaretskii
2016-10-24 14:03 ` Eli Zaretskii
2016-10-24 20:13 ` Sam Halliday
2016-10-24 23:44 ` npostavs
2016-11-07 3:39 ` Eli Zaretskii
2016-11-07 3:56 ` Noam Postavsky
2016-11-07 15:10 ` Eli Zaretskii
2016-10-23 18:16 ` Sam Halliday
2016-10-23 19:10 ` Eli Zaretskii
2016-10-23 19:32 ` Eli Zaretskii
2016-10-23 20:15 ` Sam Halliday
2016-10-23 20:27 ` Eli Zaretskii
2016-10-23 20:18 ` Eli Zaretskii
2016-10-23 23:18 ` Noam Postavsky
2016-10-24 7:05 ` Eli Zaretskii
2016-10-24 8:40 ` Eli Zaretskii
2016-10-23 18:11 ` Sam Halliday
2016-10-18 8:16 ` bug#24358: 25.1.50; Sam Halliday
2016-10-18 8:56 ` Sam Halliday
2016-10-18 9:28 ` Eli Zaretskii
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=878ttdrxwd.fsf@users.sourceforge.net \
--to=npostavs@users.sourceforge.net \
--cc=24358@debbugs.gnu.org \
--cc=eliz@gnu.org \
--cc=sam.halliday@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).