unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* Re: master a5eb9f6ad4e 2/3: Catch signals produced by PRED in tree-sitter search functions
       [not found] ` <20230413220905.CB635C13A82@vcs2.savannah.gnu.org>
@ 2023-04-13 23:50   ` Po Lu
  0 siblings, 0 replies; 6+ messages in thread
From: Po Lu @ 2023-04-13 23:50 UTC (permalink / raw)
  To: emacs-devel; +Cc: Yuan Fu

Yuan Fu <casouri@gmail.com> writes:

> +/** Cleanup function for cursor.  */
> +static void treesit_traverse_cleanup_cursor(void *cursor)
> +{
> +  ts_tree_cursor_delete ((TSTreeCursor *) cursor);
> +}

Please fix your coding style: place a newline after `void', and a space
after `treesit_traverse_cleanup_cursor'.

>  DEFUN ("treesit-search-subtree",
>         Ftreesit_search_subtree,
>         Streesit_search_subtree, 2, 5, 0,
> @@ -3288,12 +3294,18 @@ Return the first matched node, or nil if none matches.  */)
>    if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
>      return return_value;
>  
> +  specpdl_ref count = SPECPDL_INDEX ();
> +  record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
> +
>    if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
>  			  NILP (all), the_limit, false))
>      {
>        TSNode node = ts_tree_cursor_current_node (&cursor);
>        return_value = make_treesit_node (parser, node);
>      }
> +
> +  unbind_to (count, Qnil);
> +
>    ts_tree_cursor_delete (&cursor);
>    return return_value;
>  }
> @@ -3345,12 +3357,18 @@ always traverse leaf nodes first, then upwards.  */)
>    if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
>      return return_value;
>  
> +  specpdl_ref count = SPECPDL_INDEX ();
> +  record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
> +
>    if (treesit_search_forward (&cursor, predicate, parser,
>  			      NILP (backward), NILP (all)))
>      {
>        TSNode node = ts_tree_cursor_current_node (&cursor);
>        return_value = make_treesit_node (parser, node);
>      }
> +
> +  unbind_to (count, Qnil);
> +
>    ts_tree_cursor_delete (&cursor);
>    return return_value;
>  }
> @@ -3467,8 +3485,14 @@ a regexp.  */)
>       to use treesit_cursor_helper.  */
>    TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
>  
> +  specpdl_ref count = SPECPDL_INDEX ();
> +  record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
> +
>    treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
>  			     the_limit, parser);
> +
> +  unbind_to (count, Qnil);
> +
>    ts_tree_cursor_delete (&cursor);
>    Fsetcdr (parent, Fnreverse (Fcdr (parent)));
>    if (NILP (Fcdr (parent)))

Unless you remove the redundant call to `ts_tree_cursor_delete' after
`unbind_to', you get a double free.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: master 361c5fc2d8e 3/3: Support more predicates in tree-sitter search functions
       [not found] ` <20230413220905.E3C7BC13A84@vcs2.savannah.gnu.org>
@ 2023-04-13 23:55   ` Po Lu
  2023-04-14  1:57     ` Yuan Fu
  0 siblings, 1 reply; 6+ messages in thread
From: Po Lu @ 2023-04-13 23:55 UTC (permalink / raw)
  To: emacs-devel; +Cc: Yuan Fu

Yuan Fu <casouri@gmail.com> writes:

> +/* Validate the PRED passed to treesit_traverse_match_predicate.  If
> +   there's an error, set SIGNAL_DATA to something signal accepts, and
> +   return false, otherwise return true.  */
> +static bool
> +treesit_traverse_validate_predicate (Lisp_Object pred,
> +				     Lisp_Object *signal_data)
> +{
> +  if (STRINGP (pred))
> +    return true;
> +  /* We want to allow cl-labels-defined functions, so we allow
> +     symbols.  */
> +  else if (FUNCTIONP (pred) || SYMBOLP (pred))
> +    return true;
> +  else if (CONSP (pred))
> +    {
> +      Lisp_Object car = XCAR (pred);
> +      Lisp_Object cdr = XCDR (pred);
> +      if (EQ (car, Qnot))
> +	{
> +	  if (!CONSP (cdr))
> +	    {
> +	      *signal_data = list2 (build_string ("Invalide `not' "
> +						  "predicate"),
> +				    pred);
> +	      return false;
> +	    }
> +	  /* At this point CDR must be a cons.  */
> +	  if (XFIXNUM (Flength (cdr)) != 1)
> +	    {
> +	      *signal_data = list2 (build_string ("`not' can only "
> +						  "have one argument"),
> +				    pred);
> +	      return false;
> +	    }
> +	  return treesit_traverse_validate_predicate (XCAR (cdr),
> +						      signal_data);
> +	}
> +      else if (EQ (car, Qor))
> +	{
> +	  if (!CONSP (cdr) || NILP (cdr))
> +	    {
> +	      *signal_data = list2 (build_string ("`or' must have a list "
> +						  "of patterns as "
> +						  "arguments "),
> +				    pred);
> +	      return false;
> +	    }
> +	  FOR_EACH_TAIL (cdr)
> +	    {
> +	      if (!treesit_traverse_validate_predicate (XCAR (cdr),
> +							signal_data))
> +		return false;
> +	    }
> +	  return true;
> +	}
> +      /* We allow the function to be a symbol to support cl-label. */
> +      else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr)))
> +	return true;
> +    }
> +  *signal_data = list2 (build_string ("Invalid predicate, see TODO for "
> +				      "valid forms of predicate"),
> +			pred);
> +  return false;
> +}

How is this function robust against deeply nested or circular predicate
structures?  Shouldn't there be a recursion limit?

> +/* Return true if the node at CURSOR matches PRED.  PRED can be a lot
> +   of things:
> +
> +   PRED := string | function | (string . function)
> +         | (or PRED...) | (not PRED)
> +
> +   See docstring of treesit-search-forward and friends for the meaning
> +   of each shape.
> +
> +   This function assumes PRED is in one of its valid forms.  If NAMED
> +   is true, also check that the node is named.
> +
> +   This function may signal if the predicate function signals.  */
>  static bool
>  treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
>  				  Lisp_Object parser, bool named)
> @@ -3156,24 +3230,63 @@ treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
>        const char *type = ts_node_type (node);
>        return fast_c_string_match (pred, type, strlen (type)) >= 0;
>      }
> -  else
> +  /* We want to allow cl-labels-defined functions, so we allow
> +     symbols.  */
> +  else if (FUNCTIONP (pred) || SYMBOLP (pred))
>      {
>        Lisp_Object lisp_node = make_treesit_node (parser, node);
>        return !NILP (CALLN (Ffuncall, pred, lisp_node));
>      }
> +  else if (CONSP (pred))
> +    {
> +      Lisp_Object car = XCAR (pred);
> +      Lisp_Object cdr = XCDR (pred);
> +
> +      if (EQ (car, Qnot))
> +	{
> +	  return !treesit_traverse_match_predicate (cursor, XCAR (cdr),
> +						    parser, named);
> +	}

Please remove the redundant braces.

> +      else if (EQ (car, Qor))
> +	{
> +	  FOR_EACH_TAIL (cdr)
> +	    {
> +	      if (treesit_traverse_match_predicate (cursor, XCAR (cdr),
> +						    parser, named))
> +		return true;
> +	    }
> +	  return false;
> +	}
> +      /* We want to allow cl-labels-defined functions, so we allow
> +	 symbols.  */
> +      else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr)))
> +	{
> +	  /* A bit of code duplication here, but should be fine.  */
> +	  const char *type = ts_node_type (node);
> +	  if (!(fast_c_string_match (pred, type, strlen (type)) >= 0))
> +	    return false;
> +
> +	  Lisp_Object lisp_node = make_treesit_node (parser, node);
> +	  if (NILP (CALLN (Ffuncall, pred, lisp_node)))
> +	    return false;
> +
> +	  return true;
> +	}
> +    }
> +  /* Returning false is better than UB. */
> +  return false;
>  }
>  
> -/* Traverse the parse tree starting from CURSOR.  PRED can be a
> -   function (takes a node and returns nil/non-nil), or a string
> -   (treated as regexp matching the node's type, must be all single
> -   byte characters).  If the node satisfies PRED, leave CURSOR on that
> -   node and return true.  If no node satisfies PRED, move CURSOR back
> -   to starting position and return false.
> +/* Traverse the parse tree starting from CURSOR.  See TODO for the
> +   shapes PRED can have.  If the node satisfies PRED, leave CURSOR on
> +   that node and return true.  If no node satisfies PRED, move CURSOR
> +   back to starting position and return false.
>  
>     LIMIT is the number of levels we descend in the tree.  FORWARD
>     controls the direction in which we traverse the tree, true means
>     forward, false backward.  If SKIP_ROOT is true, don't match ROOT.
> -   */
> +
> +   This function may signal if the predicate function signals.  */
>  static bool
>  treesit_search_dfs (TSTreeCursor *cursor,
>  		    Lisp_Object pred, Lisp_Object parser,
> @@ -3209,7 +3322,9 @@ treesit_search_dfs (TSTreeCursor *cursor,
>     START.  PRED, PARSER, NAMED, FORWARD are the same as in
>     ts_search_subtree.  If a match is found, leave CURSOR at that node,
>     and return true, if no match is found, return false, and CURSOR's
> -   position is undefined.  */
> +   position is undefined.
> +
> +   This function may signal if the predicate function signals.  */
>  static bool
>  treesit_search_forward (TSTreeCursor *cursor,
>  			Lisp_Object pred, Lisp_Object parser,
> @@ -3272,11 +3387,13 @@ Return the first matched node, or nil if none matches.  */)
>     Lisp_Object all, Lisp_Object depth)
>  {
>    CHECK_TS_NODE (node);
> -  CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
> -	      list3 (Qor, Qstringp, Qfunctionp), predicate);
>    CHECK_SYMBOL (all);
>    CHECK_SYMBOL (backward);
>  
> +  Lisp_Object signal_data = Qnil;
> +  if (!treesit_traverse_validate_predicate (predicate, &signal_data))
> +    xsignal1 (Qtreesit_invalid_predicate, signal_data);
> +
>    /* We use a default limit of 1000.  See bug#59426 for the
>       discussion.  */
>    ptrdiff_t the_limit = treesit_recursion_limit;
> @@ -3344,11 +3461,13 @@ always traverse leaf nodes first, then upwards.  */)
>     Lisp_Object all)
>  {
>    CHECK_TS_NODE (start);
> -  CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
> -	      list3 (Qor, Qstringp, Qfunctionp), predicate);
>    CHECK_SYMBOL (all);
>    CHECK_SYMBOL (backward);
>  
> +  Lisp_Object signal_data = Qnil;
> +  if (!treesit_traverse_validate_predicate (predicate, &signal_data))
> +    xsignal1 (Qtreesit_invalid_predicate, signal_data);
> +
>    treesit_initialize ();
>  
>    Lisp_Object parser = XTS_NODE (start)->parser;
> @@ -3376,7 +3495,9 @@ always traverse leaf nodes first, then upwards.  */)
>  /* Recursively traverse the tree under CURSOR, and append the result
>     subtree to PARENT's cdr.  See more in Ftreesit_induce_sparse_tree.
>     Note that the top-level children list is reversed, because
> -   reasons.  */
> +   reasons.
> +
> +   This function may signal if the predicate function signals.  */
>  static void
>  treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
>  			   Lisp_Object pred, Lisp_Object process_fn,
> @@ -3462,8 +3583,10 @@ a regexp.  */)
>     Lisp_Object depth)
>  {
>    CHECK_TS_NODE (root);
> -  CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
> -	      list3 (Qor, Qstringp, Qfunctionp), predicate);
> +
> +  Lisp_Object signal_data = Qnil;
> +  if (!treesit_traverse_validate_predicate (predicate, &signal_data))
> +    xsignal1 (Qtreesit_invalid_predicate, signal_data);

ISTM that if `treesit_traverse_validate_predicate' returns false,
`signal_data' is always initialized.

The initialization of `signal_data' is thus redundant.  In fact, I'm
pretty sure you could have `treesit_traverse_validate_predicate' return
Qnil upon success, and a signal upon failure.

>    if (!NILP (process_fn))
>      CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
> @@ -3595,6 +3718,7 @@ syms_of_treesit (void)
>    DEFSYM (Qoutdated, "outdated");
>    DEFSYM (Qhas_error, "has-error");
>    DEFSYM (Qlive, "live");
> +  DEFSYM (Qnot, "not");
>  
>    DEFSYM (QCanchor, ":anchor");
>    DEFSYM (QCequal, ":equal");
> @@ -3619,6 +3743,7 @@ syms_of_treesit (void)
>  	  "user-emacs-directory");
>    DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
>    DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
> +  DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
>  
>    DEFSYM (Qor, "or");
>  
> @@ -3646,6 +3771,9 @@ syms_of_treesit (void)
>    define_error (Qtreesit_parser_deleted,
>  		"This parser is deleted and cannot be used",
>  		Qtreesit_error);
> +  define_error (Qtreesit_invalid_predicate,
> +		"Invalid predicate, see TODO for valid forms for a predicate",
> +		Qtreesit_error);

Shouldn't this be in actual documentation and not etc/TODO?



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: master 361c5fc2d8e 3/3: Support more predicates in tree-sitter search functions
  2023-04-13 23:55   ` master 361c5fc2d8e 3/3: Support more predicates " Po Lu
@ 2023-04-14  1:57     ` Yuan Fu
  2023-04-14  4:31       ` Po Lu
  0 siblings, 1 reply; 6+ messages in thread
From: Yuan Fu @ 2023-04-14  1:57 UTC (permalink / raw)
  To: Po Lu; +Cc: emacs-devel

It seems that you’ve fixes most of the things you mentioned, thanks.

> How is this function robust against deeply nested or circular predicate
> structures?  Shouldn't there be a recursion limit?

Ok, I can add a recursion limit check.

> Please remove the redundant braces.

Is it really necessary? IMHO having the braces is clearer when the statement has more than one line.
> 
> ISTM that if `treesit_traverse_validate_predicate' returns false,
> `signal_data' is always initialized.

I like to always initialize variables.

> 
> The initialization of `signal_data' is thus redundant.  In fact, I'm
> pretty sure you could have `treesit_traverse_validate_predicate' return
> Qnil upon success, and a signal upon failure.

Either way should be fine. I’ve been using the current style throughout treesit.c.

> 
>>   if (!NILP (process_fn))
>>     CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
>> @@ -3595,6 +3718,7 @@ syms_of_treesit (void)
>>   DEFSYM (Qoutdated, "outdated");
>>   DEFSYM (Qhas_error, "has-error");
>>   DEFSYM (Qlive, "live");
>> +  DEFSYM (Qnot, "not");
>> 
>>   DEFSYM (QCanchor, ":anchor");
>>   DEFSYM (QCequal, ":equal");
>> @@ -3619,6 +3743,7 @@ syms_of_treesit (void)
>>  "user-emacs-directory");
>>   DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
>>   DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
>> +  DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
>> 
>>   DEFSYM (Qor, "or");
>> 
>> @@ -3646,6 +3771,9 @@ syms_of_treesit (void)
>>   define_error (Qtreesit_parser_deleted,
>> "This parser is deleted and cannot be used",
>> Qtreesit_error);
>> +  define_error (Qtreesit_invalid_predicate,
>> + "Invalid predicate, see TODO for valid forms for a predicate",
>> + Qtreesit_error);
> 
> Shouldn't this be in actual documentation and not etc/TODO?

Oh, that’s not etc/TODO. I’m going to add a variable in treesit.el which would document the shape of predicates, then I’ll replace the TODO with the name of that variable. At least that’s the plan for now.

Yuan




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: master 361c5fc2d8e 3/3: Support more predicates in tree-sitter search functions
  2023-04-14  1:57     ` Yuan Fu
@ 2023-04-14  4:31       ` Po Lu
  2023-04-15  0:05         ` Yuan Fu
  0 siblings, 1 reply; 6+ messages in thread
From: Po Lu @ 2023-04-14  4:31 UTC (permalink / raw)
  To: Yuan Fu; +Cc: emacs-devel

Yuan Fu <casouri@gmail.com> writes:

> Ok, I can add a recursion limit check.

Great, thanks.

> Is it really necessary? IMHO having the braces is clearer when the statement has more than one line.

We prefer to avoid redundant braces, yes.

> I like to always initialize variables.

OK, but this wastes cycles.

> Either way should be fine. I’ve been using the current style
> throughout treesit.c.

The compiler cannot perform automatic register allocation on
`signal_data' your way.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: master 361c5fc2d8e 3/3: Support more predicates in tree-sitter search functions
  2023-04-14  4:31       ` Po Lu
@ 2023-04-15  0:05         ` Yuan Fu
  2023-04-15  0:18           ` Po Lu
  0 siblings, 1 reply; 6+ messages in thread
From: Yuan Fu @ 2023-04-15  0:05 UTC (permalink / raw)
  To: Po Lu; +Cc: emacs-devel



> On Apr 13, 2023, at 9:31 PM, Po Lu <luangruo@yahoo.com> wrote:
> 
> Yuan Fu <casouri@gmail.com> writes:
> 
>> Ok, I can add a recursion limit check.
> 
> Great, thanks.
> 
>> Is it really necessary? IMHO having the braces is clearer when the statement has more than one line.
> 
> We prefer to avoid redundant braces, yes.

If you insist, ok.

>> I like to always initialize variables.
> 
> OK, but this wastes cycles.
> 
>> Either way should be fine. I’ve been using the current style
>> throughout treesit.c.
> 
> The compiler cannot perform automatic register allocation on
> `signal_data' your way.

While you are absolutely right, the performance benefit surely would be negligible, no?

Yuan


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: master 361c5fc2d8e 3/3: Support more predicates in tree-sitter search functions
  2023-04-15  0:05         ` Yuan Fu
@ 2023-04-15  0:18           ` Po Lu
  0 siblings, 0 replies; 6+ messages in thread
From: Po Lu @ 2023-04-15  0:18 UTC (permalink / raw)
  To: Yuan Fu; +Cc: emacs-devel

Yuan Fu <casouri@gmail.com> writes:

> While you are absolutely right, the performance benefit surely would
> be negligible, no?

Maybe if you only consider this specific case.  But it adds up over
time, so please don't write code like this.



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-04-15  0:18 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <168142374534.3804.10641447592850495150@vcs2.savannah.gnu.org>
     [not found] ` <20230413220905.CB635C13A82@vcs2.savannah.gnu.org>
2023-04-13 23:50   ` master a5eb9f6ad4e 2/3: Catch signals produced by PRED in tree-sitter search functions Po Lu
     [not found] ` <20230413220905.E3C7BC13A84@vcs2.savannah.gnu.org>
2023-04-13 23:55   ` master 361c5fc2d8e 3/3: Support more predicates " Po Lu
2023-04-14  1:57     ` Yuan Fu
2023-04-14  4:31       ` Po Lu
2023-04-15  0:05         ` Yuan Fu
2023-04-15  0:18           ` Po Lu

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).