tail-call elimination

unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed

* tail-call elimination
@ 2012-12-11  2:57 Chris Gray
  2012-12-11  3:17 ` Stefan Monnier
  2012-12-11  6:13 ` Daniel Colascione
  0 siblings, 2 replies; 9+ messages in thread
From: Chris Gray @ 2012-12-11  2:57 UTC (permalink / raw)
  To: emacs-devel


[-- Attachment #1.1: Type: text/plain, Size: 774 bytes --]

Hello,

I have attached a patch that implements tail-call elimination for a subset
of emacs lisp.  This will be helpful in allowing coding styles which
emphasize tail recursion, such as is usual in languages like Scheme.
The subset of emacs lisp that is targeted is compiled and lexically bound.

There are some downsides to tail-call elimination.  The most obvious is
that debugging will be more complicated.  Since the memory usage is not
allowed to grow because of a tail call, the debug stack will not show the
function that was called in the tail call.  I think the benefits outweigh
this, but I'm obviously biased. :)

I'm also not a regular contributor to emacs, so please let me know if there
is anything I need to do to the patch to get it accepted.

Cheers,
Chris

[-- Attachment #1.2: Type: text/html, Size: 825 bytes --]

[-- Attachment #2: tco.diff --]
[-- Type: application/octet-stream, Size: 5376 bytes --]

commit a112728e08b7ec114dc60107db994bd3e27cc4c6 (HEAD, refs/heads/tco-2)
Author: Chris Gray <chrismgray@gmail.com>
Date:   Mon Dec 10 18:37:08 2012 -0800

    Change bytecode interpreter to allow for tail-call elimination in some cases

	Modified src/bytecode.c
diff --git a/src/bytecode.c b/src/bytecode.c
index 4c5ac15..9544090 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -45,6 +45,8 @@ by Hallvard:
 #include "xterm.h"
 #endif
 
+Lisp_Object Ffetch_bytecode (Lisp_Object);
+
 /*
  * define BYTE_CODE_SAFE to enable some minor sanity checking (useful for
  * debugging the byte compiler...)
@@ -481,8 +483,8 @@ If the third argument is incorrect, Emacs may crash.  */)
    executing BYTESTR.  */
 
 Lisp_Object
-exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
-		Lisp_Object args_template, ptrdiff_t nargs, Lisp_Object *args)
+exec_byte_code (volatile Lisp_Object bytestr, volatile Lisp_Object vector, volatile Lisp_Object maxdepth,
+		volatile Lisp_Object args_template, volatile ptrdiff_t nargs, Lisp_Object *args)
 {
   ptrdiff_t count = SPECPDL_INDEX ();
 #ifdef BYTE_CODE_METER
@@ -498,8 +500,11 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
   ptrdiff_t bytestr_length;
 #endif
   struct byte_stack stack;
-  Lisp_Object *top;
+  Lisp_Object *top = NULL;
+  Lisp_Object *bottom = NULL;
   Lisp_Object result;
+  jmp_buf env;
+  volatile Lisp_Object *funargs;
 
 #if 0 /* CHECK_FRAME_FONT */
  {
@@ -511,9 +516,23 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
  }
 #endif
 
+ funargs = xmalloc (nargs * sizeof(Lisp_Object));
+ funargs = *(volatile Lisp_Object *) &funargs;
+  {
+    int i;
+    for (i = 0; i < nargs; i++)
+      {
+        funargs[i] = args[i];
+      }
+  }
+  stack.next = byte_stack_list;
+  byte_stack_list = &stack;
+
+  setjmp(env);
   CHECK_STRING (bytestr);
   CHECK_VECTOR (vector);
   CHECK_NATNUM (maxdepth);
+  args = funargs;
 
 #ifdef BYTE_CODE_SAFE
   const_length = ASIZE (vector);
@@ -538,12 +557,11 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
   if (MAX_ALLOCA / word_size <= XFASTINT (maxdepth))
     memory_full (SIZE_MAX);
   top = alloca ((XFASTINT (maxdepth) + 1) * sizeof *top);
+
 #if BYTE_MAINTAIN_TOP
   stack.bottom = top + 1;
   stack.top = NULL;
 #endif
-  stack.next = byte_stack_list;
-  byte_stack_list = &stack;
 
 #ifdef BYTE_CODE_SAFE
   stacke = stack.bottom - 1 + XFASTINT (maxdepth);
@@ -595,6 +613,8 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
       error ("Unknown args template!");
     }
 
+  xfree (funargs);
+
   while (1)
     {
 #ifdef BYTE_CODE_SAFE
@@ -894,6 +914,42 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
 		  }
 	      }
 #endif
+            /* If the next op is return, maybe we can eliminate the tail call */
+            if (*stack.pc == Breturn)
+              {
+                Lisp_Object fun, original_fun, syms_left;
+                fun = original_fun = TOP;
+                
+                if (SYMBOLP (fun) && !EQ (fun, Qunbound)
+                    && (fun = XSYMBOL (fun)->function, SYMBOLP (fun)))
+                  fun = indirect_function (fun);
+                if (COMPILEDP(fun))
+                  {
+                    syms_left = AREF (fun, COMPILED_ARGLIST);
+                    if (INTEGERP (syms_left))
+                      {
+                        int i;
+                        volatile Lisp_Object *funargs2;
+                        if (CONSP (AREF (fun, COMPILED_BYTECODE)))
+                          Ffetch_bytecode (fun);
+                        bytestr = AREF (fun, COMPILED_BYTECODE);
+                        vector = AREF (fun, COMPILED_CONSTANTS);
+                        maxdepth = AREF (fun, COMPILED_STACK_DEPTH);
+                        args_template = syms_left;
+                        nargs = op;
+                        funargs2 = xmalloc (nargs * sizeof(Lisp_Object));
+                        funargs = *(volatile Lisp_Object *) &funargs2;
+                        for (i = 0; i < nargs; i++) {
+                          funargs[i] = top[i + 1];
+                        }
+                        /* uses setjmp/longjmp rather than goto so that the emacs-lisp stack
+                           can be allocated on the CPU stack.  This is what the garbage collector
+                           assumes, so it is preferable to changing the garbage collector.
+                        */
+                        longjmp(env, 1);
+                      }
+                  }
+              }
 	    TOP = Ffuncall (op + 1, &TOP);
 	    AFTER_POTENTIAL_GC ();
 	    NEXT;
	Modified src/lisp.h
diff --git a/src/lisp.h b/src/lisp.h
index 5acb37f..2bb8e39 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -3401,8 +3401,8 @@ extern struct byte_stack *byte_stack_list;
 extern void mark_byte_stack (void);
 #endif
 extern void unmark_byte_stack (void);
-extern Lisp_Object exec_byte_code (Lisp_Object, Lisp_Object, Lisp_Object,
-				   Lisp_Object, ptrdiff_t, Lisp_Object *);
+extern Lisp_Object exec_byte_code (volatile Lisp_Object, volatile Lisp_Object, volatile Lisp_Object,
+				   volatile Lisp_Object, volatile ptrdiff_t, Lisp_Object *);
 
 /* Defined in macros.c.  */
 extern Lisp_Object Qexecute_kbd_macro;


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11  2:57 tail-call elimination Chris Gray
@ 2012-12-11  3:17 ` Stefan Monnier
  2012-12-11  6:13 ` Daniel Colascione
  1 sibling, 0 replies; 9+ messages in thread
From: Stefan Monnier @ 2012-12-11  3:17 UTC (permalink / raw)
  To: Chris Gray; +Cc: emacs-devel

> + funargs = xmalloc (nargs * sizeof(Lisp_Object));
> + funargs = *(volatile Lisp_Object *) &funargs;
> +  {
> +    int i;
> +    for (i = 0; i < nargs; i++)
> +      {
> +        funargs[i] = args[i];
> +      }
> +  }
> +  stack.next = byte_stack_list;
> +  byte_stack_list = &stack;

Can you explain what this is doing?

> +   /* uses setjmp/longjmp rather than goto so that the emacs-lisp stack
> +      can be allocated on the CPU stack.  This is what the garbage collector
> +      assumes, so it is preferable to changing the garbage collector.
> +   */

Can you explain a bit more why `goto' wouldn't work?


        Stefan



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11  2:57 tail-call elimination Chris Gray
  2012-12-11  3:17 ` Stefan Monnier
@ 2012-12-11  6:13 ` Daniel Colascione
  2012-12-11  6:45   ` Chris Gray
  2012-12-11 13:34   ` Stefan Monnier
  1 sibling, 2 replies; 9+ messages in thread
From: Daniel Colascione @ 2012-12-11  6:13 UTC (permalink / raw)
  To: Chris Gray; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 857 bytes --]

On 12/10/2012 6:57 PM, Chris Gray wrote:
> Hello,
> 
> I have attached a patch that implements tail-call elimination for a subset of
> emacs lisp.  This will be helpful in allowing coding styles which emphasize tail
> recursion, such as is usual in languages like Scheme. 

Your patch eliminates tail calls only in byte compiled code. Until the
interpreter also supports guaranteed tail call elimination or we byte-compile
all forms before evaluating them, elisp developers cannot rely on the
optimization and cannot write idiomatic tail recursive code. As a purely
opportunistic optimization, not as a guaranteed language feature, I doubt tail
call elimination is worth the complexity.

I also don't particularly like code that relies on tail call elimination. I find
it difficult to read, and as you note, the results can be surprising.

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 258 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11  6:13 ` Daniel Colascione
@ 2012-12-11  6:45   ` Chris Gray
  2012-12-11 13:34   ` Stefan Monnier
  1 sibling, 0 replies; 9+ messages in thread
From: Chris Gray @ 2012-12-11  6:45 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: emacs-devel

Daniel Colascione <dancol@dancol.org> writes:
> On 12/10/2012 6:57 PM, Chris Gray wrote:
>> Hello,
>> 
>> I have attached a patch that implements tail-call elimination for a subset of
>> emacs lisp.  This will be helpful in allowing coding styles which emphasize tail
>> recursion, such as is usual in languages like Scheme. 
>
> Your patch eliminates tail calls only in byte compiled code. Until the
> interpreter also supports guaranteed tail call elimination or we byte-compile
> all forms before evaluating them, elisp developers cannot rely on the
> optimization and cannot write idiomatic tail recursive code. As a purely
> opportunistic optimization, not as a guaranteed language feature, I doubt tail
> call elimination is worth the complexity.

I agree that the fact that the code must be byte-compiled makes it
harder to do interactive programming.  In many cases, however, small
tests can be done interactively to verify that the function is correct
before compiling it.  These would likely not blow up the stack, and thus
tail-call elimination would not be needed.  When the function has been
shown to work, it can be byte-compiled fairly simply.

I do, however, think that it's a bit inaccurate to say that this is a
purely opportunistic optimization.  It is guaranteed in the case that
your code is byte-compiled and lexically bound.  I would love to widen
that subset of the language to include things that aren't byte-compiled
(or to byte-compile automatically), but I've gotta start somewhere.

Cheers,
Chris

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11  6:13 ` Daniel Colascione
  2012-12-11  6:45   ` Chris Gray
@ 2012-12-11 13:34   ` Stefan Monnier
  2012-12-11 14:30     ` Wolfgang Jenkner
  2012-12-31 18:16     ` Chris Gray
  1 sibling, 2 replies; 9+ messages in thread
From: Stefan Monnier @ 2012-12-11 13:34 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Chris Gray, emacs-devel

>> I have attached a patch that implements tail-call elimination for
>> a subset of emacs lisp.  This will be helpful in allowing coding
>> styles which emphasize tail recursion, such as is usual in languages
>> like Scheme.
> Your patch eliminates tail calls only in byte compiled code.  Until the
> interpreter also supports guaranteed tail call elimination or we byte-compile
> all forms before evaluating them, elisp developers cannot rely on the
> optimization and cannot write idiomatic tail recursive code.

I'm not too worried about that.  For one, you can argue that bumping
into max-lisp-eval-depth for lack of byte-compilation is similar to being
too slow for lack of byte-compilation.  And in any case we can hope that
non-byte-compiled code is on the way to extinction.

> As a purely opportunistic optimization, not as a guaranteed language
> feature, I doubt tail call elimination is worth the complexity.

If it speeds up execution, I think it can be worth the trouble.


        Stefan



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11 13:34   ` Stefan Monnier
@ 2012-12-11 14:30     ` Wolfgang Jenkner
  2012-12-11 15:13       ` Stefan Monnier
  2012-12-31 18:16     ` Chris Gray
  1 sibling, 1 reply; 9+ messages in thread
From: Wolfgang Jenkner @ 2012-12-11 14:30 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Troels Nielsen, Daniel Colascione, Chris Gray, emacs-devel

On Tue, Dec 11 2012, Stefan Monnier wrote:

>>> I have attached a patch that implements tail-call elimination for
>>> a subset of emacs lisp.  This will be helpful in allowing coding
>>> styles which emphasize tail recursion, such as is usual in languages
>>> like Scheme.
> If it speeds up execution, I think it can be worth the trouble.

What about Troels Nielsen's implementation of the same feature?

http://permalink.gmane.org/gmane.emacs.devel/153399

Wolfgang



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11 14:30     ` Wolfgang Jenkner
@ 2012-12-11 15:13       ` Stefan Monnier
  0 siblings, 0 replies; 9+ messages in thread
From: Stefan Monnier @ 2012-12-11 15:13 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Troels Nielsen, Chris Gray, emacs-devel

>>>> I have attached a patch that implements tail-call elimination for
>>>> a subset of emacs lisp.  This will be helpful in allowing coding
>>>> styles which emphasize tail recursion, such as is usual in languages
>>>> like Scheme.
>> If it speeds up execution, I think it can be worth the trouble.
> What about Troels Nielsen's implementation of the same feature?
> http://permalink.gmane.org/gmane.emacs.devel/153399

Very nice attempt as well, indeed.
Not sure why he hasn't followed up on this.


        Stefan



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-11 13:34   ` Stefan Monnier
  2012-12-11 14:30     ` Wolfgang Jenkner
@ 2012-12-31 18:16     ` Chris Gray
  2013-01-07 18:28       ` Stefan Monnier
  1 sibling, 1 reply; 9+ messages in thread
From: Chris Gray @ 2012-12-31 18:16 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 267 bytes --]

Hello,

Based on feedback by Stefan, I have updated the patch.  This is a
slightly less "pure" version of TCO than the original patch, but it
should be faster.  (And really, the difference between this and the
"pure" version is pretty much academic.)

Cheers,
Chris


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: tail-call.diff --]
[-- Type: text/x-diff, Size: 3812 bytes --]

diff --git a/src/bytecode.c b/src/bytecode.c
index 4c5ac15..16495ea 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -45,6 +45,8 @@ by Hallvard:
 #include "xterm.h"
 #endif
 
+Lisp_Object Ffetch_bytecode (Lisp_Object);
+
 /*
  * define BYTE_CODE_SAFE to enable some minor sanity checking (useful for
  * debugging the byte compiler...)
@@ -498,7 +500,8 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
   ptrdiff_t bytestr_length;
 #endif
   struct byte_stack stack;
-  Lisp_Object *top;
+  Lisp_Object *top = NULL;
+  Lisp_Object *bottom = NULL;
   Lisp_Object result;
 
 #if 0 /* CHECK_FRAME_FONT */
@@ -511,6 +514,9 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
  }
 #endif
 
+  stack.next = byte_stack_list;
+  byte_stack_list = &stack;
+
   CHECK_STRING (bytestr);
   CHECK_VECTOR (vector);
   CHECK_NATNUM (maxdepth);
@@ -532,18 +538,24 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
 #endif
   vectorp = XVECTOR (vector)->contents;
 
-  stack.byte_string = bytestr;
-  stack.pc = stack.byte_string_start = SDATA (bytestr);
-  stack.constants = vector;
   if (MAX_ALLOCA / word_size <= XFASTINT (maxdepth))
     memory_full (SIZE_MAX);
   top = alloca ((XFASTINT (maxdepth) + 1) * sizeof *top);
+  bottom = top;
+
+ tail_call:
+  CHECK_STRING (bytestr);
+  CHECK_VECTOR (vector);
+  CHECK_NATNUM (maxdepth);
+  stack.byte_string = bytestr;
+  stack.pc = stack.byte_string_start = SDATA (bytestr);
+  stack.constants = vector;
+  vectorp = XVECTOR (vector)->contents;
+  
 #if BYTE_MAINTAIN_TOP
   stack.bottom = top + 1;
   stack.top = NULL;
 #endif
-  stack.next = byte_stack_list;
-  byte_stack_list = &stack;
 
 #ifdef BYTE_CODE_SAFE
   stacke = stack.bottom - 1 + XFASTINT (maxdepth);
@@ -894,6 +906,45 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
 		  }
 	      }
 #endif
+            /* If the next op is return, maybe we can eliminate the tail call */
+            if (*stack.pc == Breturn)
+              {
+                Lisp_Object fun, original_fun, syms_left;
+                fun = original_fun = TOP;
+                
+                if (SYMBOLP (fun) && !EQ (fun, Qunbound)
+                    && (fun = XSYMBOL (fun)->function, SYMBOLP (fun)))
+                  fun = indirect_function (fun);
+                if (COMPILEDP(fun))
+                  {
+                    syms_left = AREF (fun, COMPILED_ARGLIST);
+                    if (INTEGERP (syms_left))
+                      {
+                        int i;
+                        int prev_maxdepth = XFASTINT(maxdepth);
+                        if (CONSP (AREF (fun, COMPILED_BYTECODE)))
+                          Ffetch_bytecode (fun);
+                        bytestr = AREF (fun, COMPILED_BYTECODE);
+                        vector = AREF (fun, COMPILED_CONSTANTS);
+                        maxdepth = AREF (fun, COMPILED_STACK_DEPTH);
+                        args_template = syms_left;
+                        nargs = op;
+                        args = top + 1;
+                        if (XFASTINT(maxdepth) > prev_maxdepth)
+                          {
+                              if (MAX_ALLOCA / word_size <= XFASTINT (maxdepth))
+                                memory_full (SIZE_MAX);
+                              top = alloca ((XFASTINT (maxdepth) + 1) * sizeof *top);
+                              bottom = top;
+                          }
+                        else
+                          {
+                            top = bottom;
+                          }
+                        goto tail_call;
+                      }
+                  }
+              }
 	    TOP = Ffuncall (op + 1, &TOP);
 	    AFTER_POTENTIAL_GC ();
 	    NEXT;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: tail-call elimination
  2012-12-31 18:16     ` Chris Gray
@ 2013-01-07 18:28       ` Stefan Monnier
  0 siblings, 0 replies; 9+ messages in thread
From: Stefan Monnier @ 2013-01-07 18:28 UTC (permalink / raw)
  To: Chris Gray; +Cc: emacs-devel

> Based on feedback by Stefan, I have updated the patch.  This is a
> slightly less "pure" version of TCO than the original patch, but it
> should be faster.  (And really, the difference between this and the
> "pure" version is pretty much academic.)

It's looking pretty good.  I have a few remaining questions/comments/nitpicks:

> +  stack.next = byte_stack_list;
> +  byte_stack_list = &stack;
> +
>    CHECK_STRING (bytestr);
>    CHECK_VECTOR (vector);
>    CHECK_NATNUM (maxdepth);

IIUC These checks are "runtime redundant" with the checks you added
right after the "tail_call:" label.  Try to restructure the code to
remove this redundancy.

> +            /* If the next op is return, maybe we can eliminate the tail call */

Please add a "." at the end of that sentence and make sure it is
followed by 2 spaces.

> +                if (SYMBOLP (fun) && !EQ (fun, Qunbound)
> +                    && (fun = XSYMBOL (fun)->function, SYMBOLP (fun)))

You can drop the !EQ (fun, Qunbound) test.

> +                  fun = indirect_function (fun);
> +                if (COMPILEDP(fun))

Please add a space between COMPILEDP and the following open parenthesis.

> +                        int prev_maxdepth = XFASTINT(maxdepth);

Add the same space here too.

> +                        args_template = syms_left;

Not sure why you named the var "syms_left".

> +                        else
> +                          {
> +                            top = bottom;
> +                          }

You don't need that braces (tho they don't hurt), but you do need to add
a comment explaining that that elements that were on top are now
referenced by `args' and will be moved back to `top' and that this move
needs to be careful not to overwrite data before it reads it since the
two areas may (and often will) overlap.

One more thing: have you performed some performance tests?
I'd recommend you compare something like

   cd lisp; rm **/*.elc; time make compile

-- Stefan

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-01-07 18:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-12-11  2:57 tail-call elimination Chris Gray
2012-12-11  3:17 ` Stefan Monnier
2012-12-11  6:13 ` Daniel Colascione
2012-12-11  6:45   ` Chris Gray
2012-12-11 13:34   ` Stefan Monnier
2012-12-11 14:30     ` Wolfgang Jenkner
2012-12-11 15:13       ` Stefan Monnier
2012-12-31 18:16     ` Chris Gray
2013-01-07 18:28       ` Stefan Monnier

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).