* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures [not found] <mailman.12421.1623412982.2554.bug-gnu-emacs@gnu.org> @ 2021-06-14 15:10 ` fabrice nicol 2021-06-14 16:04 ` Eli Zaretskii 0 siblings, 1 reply; 13+ messages in thread From: fabrice nicol @ 2021-06-14 15:10 UTC (permalink / raw) To: 47408, Eli Zaretskii, Francesco Potortì [-- Attachment #1: Type: text/plain, Size: 331 bytes --] Hi, I'm sending the announced patch, which enables existentially-quantified procedures for both etags and ctags in Mercury etags/ctags support. Taking advantage of this to revise my prior contribution, I fixed an incidental issue (single-word declarations, which are very rare, were not tagged). I hope this works, Fabrice [-- Attachment #2: 0001-Fix-explicit-tag-issue-with-Mercury-etags-ctags-supp.patch --] [-- Type: text/x-patch, Size: 7259 bytes --] From 58e85042f31716a4ae607ae4f93ce75f8065b006 Mon Sep 17 00:00:00 2001 From: Fabrice Nicol <fabrnicol@gmail.com> Date: Mon, 14 Jun 2021 14:30:54 +0200 Subject: [PATCH] Fix explicit tag issue with Mercury etags/ctags support Redesign of prior fix, which did not handle type quantifiers. Fix omission of single-word declarations like ':- interface.' * lib-src/etags.c (mercury_pr): Pass the newly corrected NAME and NAMELEN arguments to 'make_tag'. --- lib-src/etags.c | 114 +++++++++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 41 deletions(-) diff --git a/lib-src/etags.c b/lib-src/etags.c index 9f20e44caf..b96a44ec7a 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -6081,10 +6081,10 @@ prolog_atom (char *s, size_t pos) pos++; if (s[pos] != '\'') break; - pos++; /* A double quote */ + pos++; /* A double quote */ } else if (s[pos] == '\0') - /* Multiline quoted atoms are ignored. */ + /* Multiline quoted atoms are ignored. */ return 0; else if (s[pos] == '\\') { @@ -6119,6 +6119,13 @@ prolog_atom (char *s, size_t pos) static bool is_mercury_type = false; static bool is_mercury_quantifier = false; static bool is_mercury_declaration = false; +typedef struct +{ + size_t pos; /* Position reached in parsing tag name. */ + size_t namelength; /* Length of tag name */ + size_t totlength; /* Total length of parsed tag: this field is currently + reserved for control and debugging. */ +} mercury_pos_t; /* * Objective-C and Mercury have identical file extension .m. @@ -6374,10 +6381,12 @@ mercury_skip_comment (linebuffer *plb, FILE *inf) "initialise", "finalise", "mutable", "module", "interface", "implementation", "import_module", "use_module", "include_module", "end_module", "some", "all"}; -static size_t +static mercury_pos_t mercury_decl (char *s, size_t pos) { - if (s == NULL) return 0; + mercury_pos_t null_pos = {0, 0, 0}; + + if (s == NULL) return null_pos; size_t origpos; origpos = pos; @@ -6398,7 +6407,8 @@ mercury_decl (char *s, size_t pos) if (is_mercury_quantifier) { if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */ - return 0; + return null_pos; + is_mercury_quantifier = false; /* Reset to base value. */ found_decl_tag = true; } @@ -6418,7 +6428,7 @@ mercury_decl (char *s, size_t pos) is_mercury_quantifier = true; } - break; /* Found declaration tag of rank j. */ + break; /* Found declaration tag of rank j. */ } else /* 'solver type' has a blank in the middle, @@ -6461,11 +6471,13 @@ mercury_decl (char *s, size_t pos) if (found_decl_tag) pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */ else - return 0; + return null_pos; } /* From now on it is the same as for Prolog except for module dots. */ + size_t start_of_name = pos; + if (c_islower (s[pos]) || s[pos] == '_' ) { /* The name is unquoted. @@ -6478,7 +6490,8 @@ mercury_decl (char *s, size_t pos) && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_'))) ++pos; - return pos - origpos; + mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos}; + return position; } else if (s[pos] == '\'') { @@ -6493,28 +6506,37 @@ mercury_decl (char *s, size_t pos) ++pos; /* A double quote. */ } else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */ - return 0; + return null_pos; else if (s[pos] == '\\') { if (s[pos+1] == '\0') - return 0; + return null_pos; pos += 2; } else ++pos; } - return pos - origpos; + + mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos}; + return position; } else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */ { for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {} - if (s + pos == NULL) return 0; + if (s + pos == NULL) return null_pos; ++pos; pos = skip_spaces (s + pos) - s; - return mercury_decl (s, pos) + pos - origpos; + mercury_pos_t position = mercury_decl (s, pos); + position.totlength += pos - origpos; + return position; + } + else if (s[pos] == '.') /* as in ':- interface.' */ + { + mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos}; + return position; } else - return 0; + return null_pos; } static ptrdiff_t @@ -6523,6 +6545,7 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) size_t len0 = 0; is_mercury_type = false; is_mercury_quantifier = false; + bool stop_at_rule = false; if (is_mercury_declaration) { @@ -6530,38 +6553,47 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) len0 = skip_spaces (s + 2) - s; } - size_t len = mercury_decl (s, len0); - if (len == 0) return 0; - len += len0; + mercury_pos_t position = mercury_decl (s, len0); + size_t pos = position.pos; + int offset = 0; /* may be < 0 */ + if (pos == 0) return 0; + + /* Skip white space for rules in definitions before :- + and possibly multiline type definitions */ - if (( (s[len] == '.' /* This is a statement dot, not a module dot. */ - || (s[len] == '(' && (len += 1)) - || (s[len] == ':' /* Stopping in case of a rule. */ - && s[len + 1] == '-' - && (len += 2))) - && (lastlen != len || memcmp (s, last, len) != 0) + while (c_isspace (s[pos])) { ++pos; ++offset; } + + if (( ((s[pos] == '.' && (pos += 1)) /* case 1 + This is a statement dot, + not a module dot. */ + || (s[pos] == '(' && (pos += 1)) /* case 2 */ + || ((s[pos] == ':') /* case 3 */ + && s[pos + 1] == '-' && (stop_at_rule = true))) + && (lastlen != pos || memcmp (s, last, pos) != 0) ) /* Types are often declared on several lines so keeping just the first line. */ - || is_mercury_type) + + || is_mercury_type) /* When types are implemented. */ { - char *name = skip_non_spaces (s + len0); - size_t namelen; - if (name >= s + len) - { - name = s; - namelen = len; - } - else - { - name = skip_spaces (name); - namelen = len - (name - s); - } - /* Remove trailing non-name characters. */ - while (namelen > 0 && notinname (name[namelen - 1])) - namelen--; - make_tag (name, namelen, true, s, len, lineno, linecharno); - return len; + char *name = xnew (pos + 1, char); + size_t namelength = position.namelength; + if (stop_at_rule && offset) --offset; + + /* Left-trim type definitions. */ + + while (pos > namelength + offset + && c_isspace (s[pos - namelength - offset])) + --offset; + + memcpy (name, s + pos - namelength - offset, namelength); + + /* There is no need to correct namelength or call notinname. */ + name[namelength - 1] = '\0'; + + make_tag (name, namelength, true, s, pos, lineno, linecharno); + free (name); + return pos; } return 0; -- 2.31.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-14 15:10 ` bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures fabrice nicol @ 2021-06-14 16:04 ` Eli Zaretskii 2021-06-14 17:10 ` fabrice nicol 0 siblings, 1 reply; 13+ messages in thread From: Eli Zaretskii @ 2021-06-14 16:04 UTC (permalink / raw) To: fabrice nicol; +Cc: 47408 > From: fabrice nicol <fabrnicol@gmail.com> > Date: Mon, 14 Jun 2021 17:10:26 +0200 > > I'm sending the announced patch, which enables existentially-quantified > procedures for both etags and ctags in Mercury etags/ctags support. > > Taking advantage of this to revise my prior contribution, I fixed an > incidental issue (single-word declarations, which are very rare, were > not tagged). Thanks. I didn't yet try to apply and run the patch, but one aspect of the patch caused me to raise mu brow: > + char *name = xnew (pos + 1, char); > + size_t namelength = position.namelength; > + if (stop_at_rule && offset) --offset; > + > + /* Left-trim type definitions. */ > + > + while (pos > namelength + offset > + && c_isspace (s[pos - namelength - offset])) > + --offset; > + > + memcpy (name, s + pos - namelength - offset, namelength); > + > + /* There is no need to correct namelength or call notinname. */ > + name[namelength - 1] = '\0'; > + > + make_tag (name, namelength, true, s, pos, lineno, linecharno); > + free (name); Why do you copy the identifier's name into a newly-allocated buffer, instead of just passing 's + pos - namelength - offset' and 'namelength' as the first 2 arguments of make_tag? Isn't this xnew+memcpy+free dance here redundant? Or what did I miss? ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-14 16:04 ` Eli Zaretskii @ 2021-06-14 17:10 ` fabrice nicol 2021-06-14 17:42 ` Eli Zaretskii 0 siblings, 1 reply; 13+ messages in thread From: fabrice nicol @ 2021-06-14 17:10 UTC (permalink / raw) To: Eli Zaretskii; +Cc: 47408 Yes, I'm afraid that you missed something there this time around. If you take a look at other languages that implement explicit tags (like Fortran) you will see that there are buffers coming in to avoid having 'name' and 'linestart' (1st and 4th arguments to 'make_tag') share a same string pointer (here 's'). This is explained in the header comment to 'make_tag': " 2. LINESTART contains name as either a rightmost, or rightmost but * one character, substring;" which is a bit of a convoluted constraint: better protect oneself and bufferize from the ground up. In most cases (though perhaps with occasional exceptions), if I followed your suggestion, 'name' would be a substring with aon offset of at least +2 bytes from start of string s. When I applied you suggestion and tested, the TAGS base was as expected accordingly: wrong. Fabrice >> Thanks. I didn't yet try to apply and run the patch, but one aspect >> of the patch caused me to raise mu brow: >> >> + char *name = xnew (pos + 1, char); >> + size_t namelength = position.namelength; >> + if (stop_at_rule && offset) --offset; >> + >> + /* Left-trim type definitions. */ >> + >> + while (pos > namelength + offset >> + && c_isspace (s[pos - namelength - offset])) >> + --offset; >> + >> + memcpy (name, s + pos - namelength - offset, namelength); >> + >> + /* There is no need to correct namelength or call notinname. */ >> + name[namelength - 1] = '\0'; >> + >> + make_tag (name, namelength, true, s, pos, lineno, linecharno); >> + free (name); > Why do you copy the identifier's name into a newly-allocated buffer, > instead of just passing 's + pos - namelength - offset' and > 'namelength' as the first 2 arguments of make_tag? Isn't this > xnew+memcpy+free dance here redundant? Or what did I miss? ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-14 17:10 ` fabrice nicol @ 2021-06-14 17:42 ` Eli Zaretskii 2021-06-14 18:52 ` fabrice nicol 0 siblings, 1 reply; 13+ messages in thread From: Eli Zaretskii @ 2021-06-14 17:42 UTC (permalink / raw) To: fabrice nicol; +Cc: 47408 > Cc: 47408@debbugs.gnu.org, pot@gnu.org > From: fabrice nicol <fabrnicol@gmail.com> > Date: Mon, 14 Jun 2021 19:10:53 +0200 > > If you take a look at other languages that implement explicit tags (like > Fortran) you will see that there are buffers coming in to avoid having > 'name' and 'linestart' (1st and 4th arguments to 'make_tag') share a > same string pointer (here 's'). > > This is explained in the header comment to 'make_tag': > > " 2. LINESTART contains name as either a rightmost, or rightmost but > * one character, substring;" This is just a condition for generating "implicitly named" tags. There's nothing wrong with having explicitly named tags, if there are good reasons for that. > When I applied you suggestion and tested, the TAGS base was as expected > accordingly: wrong. "Wrong" in what sense? Can you show an example of such a wrong tag? And how does Mercury differ from Prolog in this sense? prolog_pr doesn't allocate new strings before calling make_tag. Thanks. ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-14 17:42 ` Eli Zaretskii @ 2021-06-14 18:52 ` fabrice nicol 2021-06-17 10:50 ` Eli Zaretskii 0 siblings, 1 reply; 13+ messages in thread From: fabrice nicol @ 2021-06-14 18:52 UTC (permalink / raw) To: Eli Zaretskii; +Cc: 47408 [-- Attachment #1: Type: text/plain, Size: 1518 bytes --] Agreed. (But you may have more systematically explicit tags because of this condition 2.) The attached patch was tested, builds and runs OK. Replaces the former one, is you stick to the option of modifying 's' by reference rather than creating a copy. Note: Prolog has a much simpler naming pattern, so differences are normal. No need for explicit tagging in Prolog, see the call to 'make_tag' in Prolog support. Fabrice Le 14/06/2021 à 19:42, Eli Zaretskii a écrit : >> Cc: 47408@debbugs.gnu.org, pot@gnu.org >> From: fabrice nicol <fabrnicol@gmail.com> >> Date: Mon, 14 Jun 2021 19:10:53 +0200 >> >> If you take a look at other languages that implement explicit tags (like >> Fortran) you will see that there are buffers coming in to avoid having >> 'name' and 'linestart' (1st and 4th arguments to 'make_tag') share a >> same string pointer (here 's'). >> >> This is explained in the header comment to 'make_tag': >> >> " 2. LINESTART contains name as either a rightmost, or rightmost but >> * one character, substring;" > This is just a condition for generating "implicitly named" tags. > There's nothing wrong with having explicitly named tags, if there are > good reasons for that. > >> When I applied you suggestion and tested, the TAGS base was as expected >> accordingly: wrong. > "Wrong" in what sense? Can you show an example of such a wrong tag? > And how does Mercury differ from Prolog in this sense? prolog_pr > doesn't allocate new strings before calling make_tag. > > Thanks. [-- Attachment #2: 0001-Fix-explicit-tag-issue-with-Mercury-etags-ctags-supp-v2.patch --] [-- Type: text/x-patch, Size: 7154 bytes --] From 882ba6a0f51a95893cac798c721e3ccd4ad4e0f6 Mon Sep 17 00:00:00 2001 From: Fabrice Nicol <fabrnicol@gmail.com> Date: Mon, 14 Jun 2021 14:30:54 +0200 Subject: [PATCH] Fix explicit tag issue with Mercury etags/ctags support Redesign of prior fix, which did not handle type quantifiers. Fix omission of single-word declarations like ':- interface.' * lib-src/etags.c (mercury_pr): Pass the newly corrected NAME and NAMELEN arguments to 'make_tag'. --- lib-src/etags.c | 110 ++++++++++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/lib-src/etags.c b/lib-src/etags.c index 9f20e44caf..370e825111 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -6081,10 +6081,10 @@ prolog_atom (char *s, size_t pos) pos++; if (s[pos] != '\'') break; - pos++; /* A double quote */ + pos++; /* A double quote */ } else if (s[pos] == '\0') - /* Multiline quoted atoms are ignored. */ + /* Multiline quoted atoms are ignored. */ return 0; else if (s[pos] == '\\') { @@ -6119,6 +6119,13 @@ prolog_atom (char *s, size_t pos) static bool is_mercury_type = false; static bool is_mercury_quantifier = false; static bool is_mercury_declaration = false; +typedef struct +{ + size_t pos; /* Position reached in parsing tag name. */ + size_t namelength; /* Length of tag name */ + size_t totlength; /* Total length of parsed tag: this field is currently + reserved for control and debugging. */ +} mercury_pos_t; /* * Objective-C and Mercury have identical file extension .m. @@ -6374,10 +6381,12 @@ mercury_skip_comment (linebuffer *plb, FILE *inf) "initialise", "finalise", "mutable", "module", "interface", "implementation", "import_module", "use_module", "include_module", "end_module", "some", "all"}; -static size_t +static mercury_pos_t mercury_decl (char *s, size_t pos) { - if (s == NULL) return 0; + mercury_pos_t null_pos = {0, 0, 0}; + + if (s == NULL) return null_pos; size_t origpos; origpos = pos; @@ -6398,7 +6407,8 @@ mercury_decl (char *s, size_t pos) if (is_mercury_quantifier) { if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */ - return 0; + return null_pos; + is_mercury_quantifier = false; /* Reset to base value. */ found_decl_tag = true; } @@ -6418,7 +6428,7 @@ mercury_decl (char *s, size_t pos) is_mercury_quantifier = true; } - break; /* Found declaration tag of rank j. */ + break; /* Found declaration tag of rank j. */ } else /* 'solver type' has a blank in the middle, @@ -6461,11 +6471,13 @@ mercury_decl (char *s, size_t pos) if (found_decl_tag) pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */ else - return 0; + return null_pos; } /* From now on it is the same as for Prolog except for module dots. */ + size_t start_of_name = pos; + if (c_islower (s[pos]) || s[pos] == '_' ) { /* The name is unquoted. @@ -6478,7 +6490,8 @@ mercury_decl (char *s, size_t pos) && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_'))) ++pos; - return pos - origpos; + mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos}; + return position; } else if (s[pos] == '\'') { @@ -6493,28 +6506,37 @@ mercury_decl (char *s, size_t pos) ++pos; /* A double quote. */ } else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */ - return 0; + return null_pos; else if (s[pos] == '\\') { if (s[pos+1] == '\0') - return 0; + return null_pos; pos += 2; } else ++pos; } - return pos - origpos; + + mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos}; + return position; } else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */ { for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {} - if (s + pos == NULL) return 0; + if (s + pos == NULL) return null_pos; ++pos; pos = skip_spaces (s + pos) - s; - return mercury_decl (s, pos) + pos - origpos; + mercury_pos_t position = mercury_decl (s, pos); + position.totlength += pos - origpos; + return position; + } + else if (s[pos] == '.') /* as in ':- interface.' */ + { + mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos}; + return position; } else - return 0; + return null_pos; } static ptrdiff_t @@ -6523,6 +6545,7 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) size_t len0 = 0; is_mercury_type = false; is_mercury_quantifier = false; + bool stop_at_rule = false; if (is_mercury_declaration) { @@ -6530,38 +6553,43 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) len0 = skip_spaces (s + 2) - s; } - size_t len = mercury_decl (s, len0); - if (len == 0) return 0; - len += len0; + mercury_pos_t position = mercury_decl (s, len0); + size_t pos = position.pos; + int offset = 0; /* may be < 0 */ + if (pos == 0) return 0; - if (( (s[len] == '.' /* This is a statement dot, not a module dot. */ - || (s[len] == '(' && (len += 1)) - || (s[len] == ':' /* Stopping in case of a rule. */ - && s[len + 1] == '-' - && (len += 2))) - && (lastlen != len || memcmp (s, last, len) != 0) + /* Skip white space for rules in definitions before :- + and possibly multiline type definitions */ + + while (c_isspace (s[pos])) { ++pos; ++offset; } + + if (( ((s[pos] == '.' && (pos += 1)) /* case 1 + This is a statement dot, + not a module dot. */ + || (s[pos] == '(' && (pos += 1)) /* case 2 */ + || ((s[pos] == ':') /* case 3 */ + && s[pos + 1] == '-' && (stop_at_rule = true))) + && (lastlen != pos || memcmp (s, last, pos) != 0) ) /* Types are often declared on several lines so keeping just the first line. */ - || is_mercury_type) + + || is_mercury_type) /* When types are implemented. */ { - char *name = skip_non_spaces (s + len0); - size_t namelen; - if (name >= s + len) - { - name = s; - namelen = len; - } - else - { - name = skip_spaces (name); - namelen = len - (name - s); - } - /* Remove trailing non-name characters. */ - while (namelen > 0 && notinname (name[namelen - 1])) - namelen--; - make_tag (name, namelen, true, s, len, lineno, linecharno); - return len; + size_t namelength = position.namelength; + if (stop_at_rule && offset) --offset; + + /* Left-trim type definitions. */ + + while (pos > namelength + offset + && c_isspace (s[pos - namelength - offset])) + --offset; + + /* There is no need to correct namelength or call notinname. */ + s[pos - offset - 1] = '\0'; + + make_tag (s + pos - namelength - offset, namelength, true, s, pos, lineno, linecharno); + return pos; } return 0; -- 2.31.1 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-14 18:52 ` fabrice nicol @ 2021-06-17 10:50 ` Eli Zaretskii 2021-06-17 11:19 ` Fabrice Nicol 0 siblings, 1 reply; 13+ messages in thread From: Eli Zaretskii @ 2021-06-17 10:50 UTC (permalink / raw) To: fabrice nicol; +Cc: 47408 > Cc: 47408@debbugs.gnu.org, pot@gnu.org > From: fabrice nicol <fabrnicol@gmail.com> > Date: Mon, 14 Jun 2021 20:52:54 +0200 > > Agreed. (But you may have more systematically explicit tags because of > this condition 2.) > > The attached patch was tested, builds and runs OK. Replaces the former > one, is you stick to the option of modifying 's' by reference rather > than creating a copy. Thanks. I confirm that this works, but I have 2 follow-up issues with this patch: 1. It adds tags for some identifiers that AFAUI are actually keywords, and shouldn't be in the TAGS tables. Examples: "interface" (e.g., on line 146 of accumulator.m) and "implementation" (e.g., on line 166). I guess this is unintended? If so, how to fix it? 2. It always produces "explicitly named" tags, which I think is unnecessary. AFAICT, this is related to the following snippet from mercury_pr: > + /* Left-trim type definitions. */ > + > + while (pos > namelength + offset > + && c_isspace (s[pos - namelength - offset])) > + --offset; > + > + /* There is no need to correct namelength or call notinname. */ > + s[pos - offset - 1] = '\0'; > + > + make_tag (s + pos - namelength - offset, namelength, true, s, pos, lineno, linecharno); > + return pos; I don't understand why you need to overwrite s[pos - offset -1] with the null byte: the same effect could be obtained by adjusting the POS argument passed to make_tag. Also, you in effect chop off the last character of NAME, but don't adjust NAMELENGTH accordingly. These factors together cause make_tag to decide that an explicitly-named tag is in order, because name[namelength-1] is a null byte, which is rejected as being "not-a-name" character. To fix this second issue, I propose the change below, which should be applied on top of your patches: diff --git a/lib-src/etags.c b/lib-src/etags.c index 370e825..2b0288e 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -6585,10 +6585,8 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) && c_isspace (s[pos - namelength - offset])) --offset; - /* There is no need to correct namelength or call notinname. */ - s[pos - offset - 1] = '\0'; - - make_tag (s + pos - namelength - offset, namelength, true, s, pos, lineno, linecharno); + make_tag (s + pos - namelength - offset, namelength - 1, true, + s, pos - offset - 1, lineno, linecharno); return pos; } I've verified that etags after this change still produces the correct TAGS file, including for the file univ.m you sent up-thread. Do you agree with the changes I propose? If not, could you please explain what I miss here? ^ permalink raw reply related [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-17 10:50 ` Eli Zaretskii @ 2021-06-17 11:19 ` Fabrice Nicol 2021-06-17 11:42 ` Eli Zaretskii 0 siblings, 1 reply; 13+ messages in thread From: Fabrice Nicol @ 2021-06-17 11:19 UTC (permalink / raw) To: Eli Zaretskii; +Cc: 47408 [-- Attachment #1: Type: text/plain, Size: 3568 bytes --] Hi Eli, Thanks. I confirm that this works, but I have 2 follow-up issues with > this patch: > > 1. It adds tags for some identifiers that AFAUI are actually > keywords, and shouldn't be in the TAGS tables. Examples: > "interface" (e.g., on line 146 of accumulator.m) and > "implementation" (e.g., on line 166). I guess this is unintended? > If so, how to fix it? > This is intended. I commented this in the commit message (one-word declarations). I confirm that ':- implementation' and ':-interface' are *formally* declarations in Mercury as all others are. They were not included in the previous version bercause of an incomplete switch. It is quite useful from a practical point of view to add them to the tag base. When interfaces are big (they can reach a couple of thousand lines in real-world programming) it is sometimes useful to have a bookmark to jump to the start of the implementation section and back. I used to create an ad-hoc emacs bookmark for this. Tagging removes the need for this. Simply strike M-. at a blank line an select the interface/implementation tag. In the C family this is the same as striking M-. on an header include declaration and jumping to the header file and back. Some IDEs use F4 for this. Think of Mercury interfaces as C headers. > > > 2. It always produces "explicitly named" tags, which I think is > unnecessary. AFAICT, this is related to the following snippet from > mercury_pr: > > > + /* Left-trim type definitions. */ > > + > > + while (pos > namelength + offset > > + && c_isspace (s[pos - namelength - offset])) > > + --offset; > > + > > + /* There is no need to correct namelength or call notinname. */ > > + s[pos - offset - 1] = '\0'; > > + > > + make_tag (s + pos - namelength - offset, namelength, true, s, > pos, lineno, linecharno); > > + return pos; > > I don't understand why you need to overwrite s[pos - offset -1] > with the null byte: the same effect could be obtained by adjusting > the POS argument passed to make_tag. Also, you in effect chop off > the last character of NAME, but don't adjust NAMELENGTH > accordingly. These factors together cause make_tag to decide that > an explicitly-named tag is in order, because name[namelength-1] is > a null byte, which is rejected as being "not-a-name" character. > > To fix this second issue, I propose the change below, which should > be applied on top of your patches: > > diff --git a/lib-src/etags.c b/lib-src/etags.c > index 370e825..2b0288e 100644 > --- a/lib-src/etags.c > +++ b/lib-src/etags.c > @@ -6585,10 +6585,8 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) > && c_isspace (s[pos - namelength - offset])) > --offset; > > - /* There is no need to correct namelength or call notinname. */ > - s[pos - offset - 1] = '\0'; > - > - make_tag (s + pos - namelength - offset, namelength, true, s, pos, > lineno, linecharno); > + make_tag (s + pos - namelength - offset, namelength - 1, true, > + s, pos - offset - 1, lineno, linecharno); > return pos; > } > > I've verified that etags after this change still produces the correct > TAGS file, including for the file univ.m you sent up-thread. > > Do you agree with the changes I propose? If not, could you please > explain what I miss here? > OK, this is another way of achieving an equivalent result. Please leave me until tomorrow to perform more tests so that I can formally confirm that this is fine. Best Fabrice > [-- Attachment #2: Type: text/html, Size: 4950 bytes --] ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-17 11:19 ` Fabrice Nicol @ 2021-06-17 11:42 ` Eli Zaretskii 2021-06-17 18:36 ` fabrice nicol 0 siblings, 1 reply; 13+ messages in thread From: Eli Zaretskii @ 2021-06-17 11:42 UTC (permalink / raw) To: Fabrice Nicol; +Cc: 47408 > From: Fabrice Nicol <fabrnicol@gmail.com> > Date: Thu, 17 Jun 2021 13:19:26 +0200 > Cc: Francesco Potortì <pot@gnu.org>, 47408@debbugs.gnu.org > > 1. It adds tags for some identifiers that AFAUI are actually > keywords, and shouldn't be in the TAGS tables. Examples: > "interface" (e.g., on line 146 of accumulator.m) and > "implementation" (e.g., on line 166). I guess this is unintended? > If so, how to fix it? > > This is intended. I commented this in the commit message (one-word declarations). Understood, thanks. > To fix this second issue, I propose the change below, which should > be applied on top of your patches: > > diff --git a/lib-src/etags.c b/lib-src/etags.c > index 370e825..2b0288e 100644 > --- a/lib-src/etags.c > +++ b/lib-src/etags.c > @@ -6585,10 +6585,8 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) > && c_isspace (s[pos - namelength - offset])) > --offset; > > - /* There is no need to correct namelength or call notinname. */ > - s[pos - offset - 1] = '\0'; > - > - make_tag (s + pos - namelength - offset, namelength, true, s, pos, lineno, linecharno); > + make_tag (s + pos - namelength - offset, namelength - 1, true, > + s, pos - offset - 1, lineno, linecharno); > return pos; > } > > I've verified that etags after this change still produces the correct > TAGS file, including for the file univ.m you sent up-thread. > > Do you agree with the changes I propose? If not, could you please > explain what I miss here? > > OK, this is another way of achieving an equivalent result. Please leave me until tomorrow to perform more > tests so that I can formally confirm that this is fine. Thanks. I also plan on adding a few lines from univ.m to accumulator.m, because those few lines use a feature accumulator.m doesn't. Is this OK with you? ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-17 11:42 ` Eli Zaretskii @ 2021-06-17 18:36 ` fabrice nicol 2021-06-18 11:29 ` Eli Zaretskii 0 siblings, 1 reply; 13+ messages in thread From: fabrice nicol @ 2021-06-17 18:36 UTC (permalink / raw) To: Eli Zaretskii; +Cc: 47408 [-- Attachment #1: Type: text/plain, Size: 2336 bytes --] Hi, Eli, I could finalize my tests against the entire Mercury library code. All is OK. I applied your patch on top of mine. Also, I added two new corner-case fixes, which are mentioned in the commit message: 1. The first new fix is for 0-arity predicates and functions. Yes, Mercury has them. (They play the role of global immutable constants in other languages). They happened not to be caught by the previous code, now they are. 2. I also removed module names from within tag names. The point is that module name prefixing is optional in most cases, so if you leave the module prefix within the tag, you will fail to get to the declaration when striking M-. on a (non-prefixed) predicate name. It is better to remove the name altogether. This will automatically trigger an explicit tag. Fabrice >> This is intended. I commented this in the commit message (one-word declarations). > Understood, thanks. > >> To fix this second issue, I propose the change below, which should >> be applied on top of your patches: >> >> diff --git a/lib-src/etags.c b/lib-src/etags.c >> index 370e825..2b0288e 100644 >> --- a/lib-src/etags.c >> +++ b/lib-src/etags.c >> @@ -6585,10 +6585,8 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) >> && c_isspace (s[pos - namelength - offset])) >> --offset; >> >> - /* There is no need to correct namelength or call notinname. */ >> - s[pos - offset - 1] = '\0'; >> - >> - make_tag (s + pos - namelength - offset, namelength, true, s, pos, lineno, linecharno); >> + make_tag (s + pos - namelength - offset, namelength - 1, true, >> + s, pos - offset - 1, lineno, linecharno); >> return pos; >> } >> >> I've verified that etags after this change still produces the correct >> TAGS file, including for the file univ.m you sent up-thread. >> >> Do you agree with the changes I propose? If not, could you please >> explain what I miss here? >> >> OK, this is another way of achieving an equivalent result. Please leave me until tomorrow to perform more >> tests so that I can formally confirm that this is fine. > Thanks. > > I also plan on adding a few lines from univ.m to accumulator.m, > because those few lines use a feature accumulator.m doesn't. Is this > OK with you? [-- Attachment #2: 0001-Fix-Mercury-support-notably-quantified-procedures.patch --] [-- Type: text/x-patch, Size: 8004 bytes --] From b4db1894e71b7aaa0be28b604a814f58bdabeef9 Mon Sep 17 00:00:00 2001 From: Fabrice Nicol <fabrnicol@gmail.com> Date: Thu, 17 Jun 2021 19:59:52 +0200 Subject: [PATCH] Fix Mercury support, notably quantified procedures. Correct the previous fix (did not correctly handle quantified types). Also fix the following issues: - remove module name (+ dot) from tags, as prefixing module name is often inconsistent in code and may cause tags to be too specific. - now tag 0-arity predicates and functions (':- func foo_14.') - now tag one-word declarations (':- interface.') * lib-src/etags.c (mercury_pr): Pass the correct NAME and NAMELEN arguments to 'make_tag'. --- lib-src/etags.c | 126 +++++++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 43 deletions(-) diff --git a/lib-src/etags.c b/lib-src/etags.c index 9f20e44caf..bd57ede2f3 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -6081,10 +6081,10 @@ prolog_atom (char *s, size_t pos) pos++; if (s[pos] != '\'') break; - pos++; /* A double quote */ + pos++; /* A double quote */ } else if (s[pos] == '\0') - /* Multiline quoted atoms are ignored. */ + /* Multiline quoted atoms are ignored. */ return 0; else if (s[pos] == '\\') { @@ -6119,6 +6119,13 @@ prolog_atom (char *s, size_t pos) static bool is_mercury_type = false; static bool is_mercury_quantifier = false; static bool is_mercury_declaration = false; +typedef struct +{ + size_t pos; /* Position reached in parsing tag name. */ + size_t namelength; /* Length of tag name */ + size_t totlength; /* Total length of parsed tag: this field is currently + reserved for control and debugging. */ +} mercury_pos_t; /* * Objective-C and Mercury have identical file extension .m. @@ -6374,10 +6381,12 @@ mercury_skip_comment (linebuffer *plb, FILE *inf) "initialise", "finalise", "mutable", "module", "interface", "implementation", "import_module", "use_module", "include_module", "end_module", "some", "all"}; -static size_t +static mercury_pos_t mercury_decl (char *s, size_t pos) { - if (s == NULL) return 0; + mercury_pos_t null_pos = {0, 0, 0}; + + if (s == NULL) return null_pos; size_t origpos; origpos = pos; @@ -6398,7 +6407,8 @@ mercury_decl (char *s, size_t pos) if (is_mercury_quantifier) { if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */ - return 0; + return null_pos; + is_mercury_quantifier = false; /* Reset to base value. */ found_decl_tag = true; } @@ -6418,7 +6428,7 @@ mercury_decl (char *s, size_t pos) is_mercury_quantifier = true; } - break; /* Found declaration tag of rank j. */ + break; /* Found declaration tag of rank j. */ } else /* 'solver type' has a blank in the middle, @@ -6461,24 +6471,36 @@ mercury_decl (char *s, size_t pos) if (found_decl_tag) pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */ else - return 0; + return null_pos; } /* From now on it is the same as for Prolog except for module dots. */ + size_t start_of_name = pos; + if (c_islower (s[pos]) || s[pos] == '_' ) { /* The name is unquoted. Do not confuse module dots with end-of-declaration dots. */ + int module_dot_pos = 0; while (c_isalnum (s[pos]) || s[pos] == '_' || (s[pos] == '.' /* A module dot. */ && s + pos + 1 != NULL - && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_'))) + && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_') + && (module_dot_pos = pos))) /* Record module dot position. + Erase module from name. */ ++pos; - return pos - origpos; + if (module_dot_pos) + { + start_of_name = module_dot_pos + 2; + ++pos; + } + + mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos}; + return position; } else if (s[pos] == '\'') { @@ -6493,28 +6515,37 @@ mercury_decl (char *s, size_t pos) ++pos; /* A double quote. */ } else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */ - return 0; + return null_pos; else if (s[pos] == '\\') { if (s[pos+1] == '\0') - return 0; + return null_pos; pos += 2; } else ++pos; } - return pos - origpos; + + mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos}; + return position; } else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */ { for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {} - if (s + pos == NULL) return 0; + if (s + pos == NULL) return null_pos; ++pos; pos = skip_spaces (s + pos) - s; - return mercury_decl (s, pos) + pos - origpos; + mercury_pos_t position = mercury_decl (s, pos); + position.totlength += pos - origpos; + return position; + } + else if (s[pos] == '.') /* as in ':- interface.' */ + { + mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos}; + return position; } else - return 0; + return null_pos; } static ptrdiff_t @@ -6523,6 +6554,7 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) size_t len0 = 0; is_mercury_type = false; is_mercury_quantifier = false; + bool stop_at_rule = false; if (is_mercury_declaration) { @@ -6530,38 +6562,46 @@ mercury_pr (char *s, char *last, ptrdiff_t lastlen) len0 = skip_spaces (s + 2) - s; } - size_t len = mercury_decl (s, len0); - if (len == 0) return 0; - len += len0; - - if (( (s[len] == '.' /* This is a statement dot, not a module dot. */ - || (s[len] == '(' && (len += 1)) - || (s[len] == ':' /* Stopping in case of a rule. */ - && s[len + 1] == '-' - && (len += 2))) - && (lastlen != len || memcmp (s, last, len) != 0) + mercury_pos_t position = mercury_decl (s, len0); + size_t pos = position.pos; + int offset = 0; /* may be < 0 */ + if (pos == 0) return 0; + + /* Skip white space for: + a. rules in definitions before :- + b. 0-arity predicates with inlined modes. + c. possibly multiline type definitions */ + + while (c_isspace (s[pos])) { ++pos; ++offset; } + + if (( ((s[pos] == '.' && (pos += 1)) /* case 1 + This is a statement dot, + not a module dot. */ + || c_isalnum(s[pos]) /* 0-arity procedures */ + || (s[pos] == '(' && (pos += 1)) /* case 2: arity > 0 */ + || ((s[pos] == ':') /* case 3: rules */ + && s[pos + 1] == '-' && (stop_at_rule = true))) + && (lastlen != pos || memcmp (s, last, pos) != 0) ) /* Types are often declared on several lines so keeping just the first line. */ - || is_mercury_type) + + || is_mercury_type) /* When types are implemented. */ { - char *name = skip_non_spaces (s + len0); - size_t namelen; - if (name >= s + len) - { - name = s; - namelen = len; - } - else - { - name = skip_spaces (name); - namelen = len - (name - s); - } - /* Remove trailing non-name characters. */ - while (namelen > 0 && notinname (name[namelen - 1])) - namelen--; - make_tag (name, namelen, true, s, len, lineno, linecharno); - return len; + size_t namelength = position.namelength; + if (stop_at_rule && offset) --offset; + + /* Left-trim type definitions. */ + + while (pos > namelength + offset + && c_isspace (s[pos - namelength - offset])) + --offset; + + /* There is no need to correct namelength or call notinname. */ + + make_tag (s + pos - namelength - offset, namelength - 1, true, + s, pos - offset - 1, lineno, linecharno); + return pos; } return 0; -- 2.32.0 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-17 18:36 ` fabrice nicol @ 2021-06-18 11:29 ` Eli Zaretskii 2021-06-18 11:54 ` Francesco Potortì 0 siblings, 1 reply; 13+ messages in thread From: Eli Zaretskii @ 2021-06-18 11:29 UTC (permalink / raw) To: fabrice nicol; +Cc: 47408 > Cc: pot@gnu.org, 47408@debbugs.gnu.org > From: fabrice nicol <fabrnicol@gmail.com> > Date: Thu, 17 Jun 2021 20:36:56 +0200 > > I could finalize my tests against the entire Mercury library code. > > All is OK. I applied your patch on top of mine. > > Also, I added two new corner-case fixes, which are mentioned in the > commit message: > > 1. The first new fix is for 0-arity predicates and functions. Yes, > Mercury has them. (They play the role of global immutable constants in > other languages). > > They happened not to be caught by the previous code, now they are. > > 2. I also removed module names from within tag names. The point is that > module name prefixing is optional in most cases, so if you leave the > module prefix within the tag, you will fail to get to the declaration > when striking M-. on a (non-prefixed) predicate name. It is better to > remove the name altogether. This will automatically trigger an explicit tag. Thanks, installed. ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-18 11:29 ` Eli Zaretskii @ 2021-06-18 11:54 ` Francesco Potortì 2021-06-18 12:33 ` Eli Zaretskii 0 siblings, 1 reply; 13+ messages in thread From: Francesco Potortì @ 2021-06-18 11:54 UTC (permalink / raw) To: fabrice nicol; +Cc: 47408 > 2. I also removed module names from within tag names. The point is that > module name prefixing is optional in most cases I don't know about Mercurial, but this sounds similar to C++ classes: the fully-qualified class name is not always necessary. When possible, etags creates a fully-qualified name for the tag, like this: I used ^? and ^A for the non-printable separation characters ipc3dLinkControlSetup setup;^?CMultiChannelCSC19_3D::setup^A5,190 If that makes sense, maybe you could choose to do something similar for Mercurial ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-18 11:54 ` Francesco Potortì @ 2021-06-18 12:33 ` Eli Zaretskii 2021-06-18 13:53 ` fabrice nicol 0 siblings, 1 reply; 13+ messages in thread From: Eli Zaretskii @ 2021-06-18 12:33 UTC (permalink / raw) To: Francesco Potortì; +Cc: fabrnicol, 47408 > From: Francesco Potortì <pot@gnu.org> > Date: Fri, 18 Jun 2021 13:54:54 +0200 > Cc: 47408@debbugs.gnu.org, > Eli Zaretskii <eliz@gnu.org> > > > 2. I also removed module names from within tag names. The point is that > > module name prefixing is optional in most cases > > I don't know about Mercurial, but this sounds similar to C++ classes: > the fully-qualified class name is not always necessary. When possible, > etags creates a fully-qualified name for the tag, like this: Under the --class-qualify switch, yes. Maybe we should support that for Mercury as well. ^ permalink raw reply [flat|nested] 13+ messages in thread
* bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures 2021-06-18 12:33 ` Eli Zaretskii @ 2021-06-18 13:53 ` fabrice nicol 0 siblings, 0 replies; 13+ messages in thread From: fabrice nicol @ 2021-06-18 13:53 UTC (permalink / raw) To: Eli Zaretskii, Francesco Potortì; +Cc: 47408 Well, I suggest adding this to a todo-list, leaving things as they stand in the meantime. There are other features that might be added to this list too, possibly higher: - tag type non 0-arity type constructors (as in inbuilt Vim support); - tag embedded foreign code (at least for C): there is a lot of embedded C in real-world Mercury code; I'll keep this for the summer vacation. F. >>> 2. I also removed module names from within tag names. The point is that >>> module name prefixing is optional in most cases >> I don't know about Mercurial, but this sounds similar to C++ classes: >> the fully-qualified class name is not always necessary. When possible, >> etags creates a fully-qualified name for the tag, like this: > Under the --class-qualify switch, yes. Maybe we should support that > for Mercury as well. ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2021-06-18 13:53 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <mailman.12421.1623412982.2554.bug-gnu-emacs@gnu.org> 2021-06-14 15:10 ` bug#47408: [PATCH] Etags support for Mercury -- fix explicit tags for existentially-quantified procedures fabrice nicol 2021-06-14 16:04 ` Eli Zaretskii 2021-06-14 17:10 ` fabrice nicol 2021-06-14 17:42 ` Eli Zaretskii 2021-06-14 18:52 ` fabrice nicol 2021-06-17 10:50 ` Eli Zaretskii 2021-06-17 11:19 ` Fabrice Nicol 2021-06-17 11:42 ` Eli Zaretskii 2021-06-17 18:36 ` fabrice nicol 2021-06-18 11:29 ` Eli Zaretskii 2021-06-18 11:54 ` Francesco Potortì 2021-06-18 12:33 ` Eli Zaretskii 2021-06-18 13:53 ` fabrice nicol
Code repositories for project(s) associated with this external index https://git.savannah.gnu.org/cgit/emacs.git https://git.savannah.gnu.org/cgit/emacs/org-mode.git This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.