unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Shigeru Fukaya <shigeru.fukaya@gmail.com>
To: Stefan Monnier <monnier@iro.umontreal.ca>, 15998@debbugs.gnu.org
Subject: bug#15998: 24.3; forward-sexp (scan-sexps) doesn't do well with some SEXPs
Date: Tue, 03 Dec 2013 18:47:24 +0900	[thread overview]
Message-ID: <8CEF00CAB165Bshigeru.fukaya@gmail.com> (raw)
In-Reply-To: <jwv61r85mov.fsf-monnier+emacsbugs@gnu.org>

[-- Attachment #1: Type: text/plain, Size: 2152 bytes --]

>> 2. ^[...] ^^[...]   char table

Sorry, but it's #^[...] #^^[...]


>> 4. $!               beginning of executable file

>I don't know what 4 is.  Or do you mean "#!" as the first two chars of
>the file?  We could setup syntax-propertize-function to mark them as
>comments, indeed.

It is just magic number of unix files.

(read-from-string "#!/usr/local/bin/emacs\n(pwd)") --> ((pwd) . 28)

see read1 in lread.c.


And, my changes to the latest emacs are,

1) add `scan-sexps' an optional argument, `elisp-syntax'.

2) pass `scan_lists' the optional argument.

3) `scan_lists' handles elisp specific handling when the argument is set.

4) `forward-sexp' calls `scan-sexps' with the optional argument non-nil
value when the current buffer's mode is elisp related mode.
Anso, do additional movement for labels of cyclic object, as is for
prefixes.

(Is a change in the last sentence above unnecessary?)


As a result, I seems doing expectedly on such an object as below.

(a  b  #&3"d"  #&99"ZZZ"  #1=a  #2= b  #1# #3= #40= (c) #40# #2# #10= #&10"A"
   #s(dummy 1 2 3)  #^^[dummy 1 2 3]  #^[dummy 1 2 3]  ##  xyz)


Regards,
Shigeru


---------------------
ChangeLog

	Add support for elisp syntax of bool vector, label of cyclic object,
	hash table, char-table and empty symbol.
	* lisp.el (forward-sexp)

	* syntax.c (scan_lists, Fscan_lists, Fscan_sexps)



lisp.el

(defun forward-sexp (&optional arg)
  "Move forward across one balanced expression (sexp).
With ARG, do it that many times.  Negative arg -N means
move backward across N balanced expressions.
This command assumes point is not in a string or comment.
Calls `forward-sexp-function' to do the work, if that is non-nil."
  (interactive "^p")
  (let ((elisp (memq major-mode '(emacs-lisp-mode
				  ;;eshell-mode
				  inferior-emacs-lisp-mode
				  lisp-interaction-mode))))
    (or arg (setq arg 1))
    (if forward-sexp-function
	(funcall forward-sexp-function arg)
      (goto-char (or (scan-sexps (point) arg elisp) (buffer-end arg)))
      (when (< arg 0)
	(when elisp
	  (save-match-data
	    (while (re-search-backward "#[0-9]+=\\s-*\\=" nil t))))
	(backward-prefix-chars)))))


[-- Attachment #2: syntax.patch --]
[-- Type: application/octet-stream, Size: 9577 bytes --]

*** syntax.c.orig	Mon Sep 23 15:37:54 2013
--- syntax.c	Tue Dec  3 07:45:17 2013
***************
*** 179,185 ****
  
  static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
  static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
! static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
  static void scan_sexps_forward (struct lisp_parse_state *,
                                  ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
                                  bool, Lisp_Object, int);
--- 179,185 ----
  
  static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
  static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
! static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool, bool);
  static void scan_sexps_forward (struct lisp_parse_state *,
                                  ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
                                  bool, Lisp_Object, int);
***************
*** 2562,2569 ****
    return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
  }
  
  static Lisp_Object
! scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
  {
    Lisp_Object val;
    ptrdiff_t stop = count > 0 ? ZV : BEGV;
--- 2562,2582 ----
    return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
  }
  
+ /* ELISPFLAG stands for emacs lisp syntax.
+    ELISPFLAG won't be true without SEXPFLAG is true.
+    When ELISPFLAG is true, special scan, over syntax table, is done.
+    They are:;
+     a. #N= -- label of cyclic object
+     b. #&N"S" -- bool vector
+     c. #s(...) -- hash table
+     d. #^[...] -- char-table
+     e. #^^[...] -- sub-char-table
+     f. ## -- empty symbol
+    where N is digits, "S" is string, ... is sexps.
+    Note label of cyclic object is not sexp, therefore skipped. */
+ 
  static Lisp_Object
! scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag, bool elispflag)
  {
    Lisp_Object val;
    ptrdiff_t stop = count > 0 ? ZV : BEGV;
***************
*** 2593,2598 ****
--- 2606,2614 ----
    immediate_quit = 1;
    QUIT;
  
+   /* for insurance. elispflag implies sexpflag below */
+   if (!sexpflag) elispflag = 0;
+ 
    SETUP_SYNTAX_TABLE (from, count);
    while (count > 0)
      {
***************
*** 2633,2638 ****
--- 2649,2756 ----
  	  if (prefix)
  	    continue;
  
+ 	  /* check elisp special syntax starting with '#'.
+ 	     need at least one char. */
+ 	  if (c == '#' && !depth && elispflag && from < stop)
+ 	    {
+ 	      EMACS_INT f = from;
+ 	      ptrdiff_t b = from_byte;
+ 	      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 	      switch (c)
+ 		{
+ 		case '0': case '1': case '2': case '3': case '4':
+ 		case '5': case '6': case '7': case '8': case '9':
+ 		  /* #N= */
+ 		  do
+ 		    {
+ 		      if (f == stop) break;
+ 		      INC_BOTH (f, b);
+ 		      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		    } while (c >= '0' && c <= '9');
+ 		  if (c == '=')
+ 		    {
+ 		      INC_BOTH (f, b);
+ 		      from = f;
+ 		      from_byte = b;
+ 		      continue;	/* skip this label */
+ 		    }
+ 		  break;
+ 
+ 		case '&':
+ 		  /* #&N"X" */
+ 		  if (f + 2 < stop)
+ 		    {
+ 		      INC_BOTH (f, b);
+ 		      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		      if (c >= '0' && c <= '9')
+ 			{
+ 			  do
+ 			    {
+ 			      if (f == stop) break;
+ 			      INC_BOTH (f, b);
+ 			      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			    } while (c >= '0' && c <= '9');
+ 			  if (c == '"')
+ 			    {
+ 			      code = Sstring;
+ 			      INC_BOTH (f, b);
+ 			      from = f;
+ 			      from_byte = b;
+ 			      /* next in Sstring */
+ 			    }
+ 			}
+ 		    }
+ 		  break;
+ 
+ 		case 's':
+ 		  /* #s(...) */
+ 		  if (f + 1 < stop)
+ 		    {
+ 		      INC_BOTH (f, b);
+ 		      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		      if (c == '(')
+ 			{
+ 			  code = Sopen;
+ 			  INC_BOTH (f, b);
+ 			  from = f;
+ 			  from_byte = b;
+ 			  /* next in Sopen */
+ 			}
+ 		    }
+ 		  break;
+ 
+ 		case '^':
+ 		  /* #^[...] #^^[...]*/
+ 		  if (f + 1 < stop)
+ 		    {
+ 		      INC_BOTH (f, b);
+ 		      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		      if (c == '^' && f + 1 < stop)
+ 			{
+ 			  INC_BOTH (f, b);
+ 			  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			}
+ 		      if (c == '[')
+ 			{
+ 			  code = Sopen;
+ 			  INC_BOTH (f, b);
+ 			  from = f;
+ 			  from_byte = b;
+ 			  /* next in Sopen */
+ 			}
+ 		    }
+ 		  break;
+ 
+ 		case '#':
+ 		  /* ## */
+ 		  INC_BOTH (f, b);
+ 		  from = f;
+ 		  from_byte = b;
+ 		  if (depth) continue;
+ 		  goto done;
+ 		}
+ 	    }
+ 
  	  switch (code)
  	    {
  	    case Sescape:
***************
*** 2805,2810 ****
--- 2923,2965 ----
  	    }
  	  else if (SYNTAX_FLAGS_PREFIX (syntax))
  	    continue;
+ 	  else if (!depth && elispflag && from > stop)
+ 	    {
+ 	      EMACS_INT f = from;
+ 	      ptrdiff_t b = from_byte;
+ 	      if (c == '=')
+ 		{
+ 		  /* #N= */
+ 		  DEC_BOTH (f, b);
+ 		  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		  if (c >= '0' && c <= '9')
+ 		    {
+ 		      do
+ 			{
+ 			  if (f == stop) break;
+ 			  DEC_BOTH (f, b);
+ 			  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			} while (c >= '0' && c <= '9');
+ 		      if (c == '#')
+ 			{
+ 			  from = f;
+ 			  from_byte = b;
+ 			  continue; /* skip this label */
+ 			}
+ 		    }
+ 		}
+ 	      else if (c == '#')
+ 		{
+ 		  /* ## */
+ 		  DEC_BOTH (f, b);
+ 		  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		  if (c == '#') {
+ 		    from = f;
+ 		    from_byte = b;
+ 		    goto done2;
+ 		  }
+ 		}
+ 	    }
  
  	  switch (code)
  	    {
***************
*** 2864,2869 ****
--- 3019,3069 ----
  	      break;
  
  	    case Sopen:
+ 	      /* scan back #s(, #^[, #^^[ as if prefixes.
+ 		 need at least 2 chars. */
+ 	      if (depth == 1 && elispflag && from + 1 > stop)
+ 		{
+ 		  EMACS_INT f = from;
+ 		  ptrdiff_t b = from_byte;
+ 		  if (c == '(')
+ 		    {
+ 		      /* #s(...) */
+ 		      DEC_BOTH (f, b);
+ 		      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		      if (c == 's')
+ 			{
+ 			  DEC_BOTH (f, b);
+ 			  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			  if (c == '#')
+ 			    {
+ 			      from = f;
+ 			      from_byte = b;
+ 			    }
+ 			}
+ 		    }
+ 		  else if (c == '[')
+ 		    {
+ 		      /* #^[...] or #^^[...] */
+ 		      DEC_BOTH (f, b);
+ 		      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		      if (c == '^')
+ 			{
+ 			  DEC_BOTH (f, b);
+ 			  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			  if (c == '^' && f > stop)
+ 			    {
+ 			      /* #^^[...] */
+ 			      DEC_BOTH (f, b);
+ 			      c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			    }
+ 			  if (c == '#')
+ 			    {
+ 			      from = f;
+ 			      from_byte = b;
+ 			    }
+ 			}
+ 		    }
+ 		}
  	    open2:
  	      if (!--depth) goto done2;
  	      if (depth < min_depth)
***************
*** 2922,2927 ****
--- 3122,3156 ----
  			break;
  		    }
  		}
+ 
+ 	      /* #&N"STRING" */
+ 	      if (!depth && elispflag && from + 2 > stop)
+ 		{
+ 		  EMACS_INT f = from;
+ 		  ptrdiff_t b = from_byte;
+ 		  DEC_BOTH (f, b);
+ 		  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 		  if (c >= '0' && c <= '9')
+ 		    {
+ 		      do
+ 			{
+ 			  if (f == stop) break;
+ 			  DEC_BOTH (f, b);
+ 			  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			} while (c >= '0' && c <= '9');
+ 		      if (c == '&' && f > stop)
+ 			{
+ 			  DEC_BOTH (f, b);
+ 			  c = FETCH_CHAR_AS_MULTIBYTE (b);
+ 			  if (c == '#')
+ 			    {
+ 			      from = f;
+ 			      from_byte = b;
+ 			    }
+ 			}
+ 		    }
+ 		}
+ 
  	      if (!depth && sexpflag) goto done2;
  	      break;
  	    default:
***************
*** 2977,2988 ****
    CHECK_NUMBER (count);
    CHECK_NUMBER (depth);
  
!   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
  }
  
! DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
         doc: /* Scan from character number FROM by COUNT balanced expressions.
  If COUNT is negative, scan backwards.
  Returns the character number of the position thus found.
  
  Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
--- 3206,3219 ----
    CHECK_NUMBER (count);
    CHECK_NUMBER (depth);
  
!   return scan_lists (XINT (from), XINT (count), XINT (depth), 0, 0);
  }
  
! DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 3, 0,
         doc: /* Scan from character number FROM by COUNT balanced expressions.
  If COUNT is negative, scan backwards.
+ If optional ELISP-SYNTAX is non-nil, handle elisp specific syntax as bool vector,
+ label of cyclic object, hash table, char-table, empty symbol.
  Returns the character number of the position thus found.
  
  Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
***************
*** 2991,3002 ****
  in the middle of a parenthetical grouping, an error is signaled.
  If the beginning or end is reached between groupings
  but before count is used up, nil is returned.  */)
!   (Lisp_Object from, Lisp_Object count)
  {
    CHECK_NUMBER (from);
    CHECK_NUMBER (count);
  
!   return scan_lists (XINT (from), XINT (count), 0, 1);
  }
  
  DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
--- 3222,3233 ----
  in the middle of a parenthetical grouping, an error is signaled.
  If the beginning or end is reached between groupings
  but before count is used up, nil is returned.  */)
!   (Lisp_Object from, Lisp_Object count, Lisp_Object elisp_syntax)
  {
    CHECK_NUMBER (from);
    CHECK_NUMBER (count);
  
!   return scan_lists (XINT (from), XINT (count), 0, 1, !NILP (elisp_syntax));
  }
  
  DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,

  reply	other threads:[~2013-12-03  9:47 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-29 14:45 bug#15998: 24.3; forward-sexp (scan-sexps) doesn't do well with some SEXPs Shigeru Fukaya
2013-11-29 17:12 ` Stefan Monnier
2013-12-01 20:08   ` Shigeru Fukaya
2013-12-01 20:44     ` Stefan Monnier
2013-12-03  9:47       ` Shigeru Fukaya [this message]
2013-12-03 17:09         ` Stefan Monnier
2019-06-26 15:23           ` Lars Ingebrigtsen
2019-06-26 17:06             ` Stefan Monnier
2019-06-27 10:30               ` Lars Ingebrigtsen
2019-07-07  2:08               ` Noam Postavsky
2019-07-07 13:28                 ` Stefan Monnier
2019-07-07 13:47                   ` Noam Postavsky
2022-05-06 15:50                 ` bug#30132: 27.0.50; scan-sexps and ## Lars Ingebrigtsen
2022-05-06 16:30                   ` bug#15998: " Lars Ingebrigtsen
2022-05-07  0:08                     ` Michael Heerdegen
2022-05-07  4:05                       ` bug#15998: " Michael Heerdegen
2022-05-07 10:17                         ` Lars Ingebrigtsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8CEF00CAB165Bshigeru.fukaya@gmail.com \
    --to=shigeru.fukaya@gmail.com \
    --cc=15998@debbugs.gnu.org \
    --cc=monnier@iro.umontreal.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).