request for review: Doing direct file I/O in Emacs Lisp

unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed

* request for review: Doing direct file I/O in Emacs Lisp
@ 2004-05-10  5:59 John Wiegley
  2004-05-10  6:52 ` Kim F. Storm
                   ` (4 more replies)
  0 siblings, 5 replies; 37+ messages in thread
From: John Wiegley @ 2004-05-10  5:59 UTC (permalink / raw)


The following patch implements a file-handle interface for Emacs Lisp,
which allows files to be directly opened and read/written to without
an intervening buffer.  Eshell can now use this, for example, to
greatly speed up output redirection (by several orders of magnitude).

It is a simple interface that reads in strings, given a length, and
writes strings by examining their length:

  (let ((handle (file-handle-open "/tmp/some-file" "w")))
    (file-handle-write handle "Test data\n")
    (file-handle-close handle)

    (setq handle (file-handle-open "/tmp/some-file" "r"))
    (message (file-handle-read handle 128))
    (file-handle-close handle))

Please post comments here, or mail them to johnw@gnu.org.

Thanks,
  John

----------------------------------------------------------------------
Index: src/ChangeLog
===================================================================
RCS file: /cvsroot/emacs/emacs/src/ChangeLog,v
retrieving revision 1.3671
diff -w -U3 -r1.3671 ChangeLog
--- src/ChangeLog	10 May 2004 04:15:14 -0000	1.3671
+++ src/ChangeLog	10 May 2004 05:51:30 -0000
@@ -3,6 +3,26 @@
 	* fns.c (count_combining): Delete it.
 	(concat): Don't check combining bytes.
 
+2004-05-09  John Wiegley  <johnw@gnu.org>
+
+	* lisp.h (enum pvec_type): Added PVEC_FILE_HANDLE type.  Added
+	Lisp_File_Handle structure, and several macros for dealing with
+	these types.
+
+	* fileio.c: Implemented several new functions: file-handle-p,
+	file-handle-open, file-handle-close, file-handle-read,
+	file-handle-write.
+	(syms_of_fileio): Declare these routines to the lisp interpretor.
+
+	* data.c: Added global Qfile_handle.
+	(Ftype_of): Check for file handles.
+	(syms_of_data): Intern the symbol "file-handle".
+	(syms_of_data): Setup the variable Qfile_handle.
+
+	* alloc.c (enum mem_type): Added MEM_TYPE_FILE_HANDLE.
+	(allocate_file_handle): New routine for allocating file handle
+	objects.
+
 2004-05-09  Jason Rumney  <jasonr@gnu.org>
 
 	* w32fns.c (Vw32_ansi_code_page): New Lisp variable.
Index: src/alloc.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/alloc.c,v
retrieving revision 1.333
diff -w -U3 -r1.333 alloc.c
--- src/alloc.c	26 Apr 2004 21:42:49 -0000	1.333
+++ src/alloc.c	10 May 2004 05:51:35 -0000
@@ -291,6 +291,7 @@
   MEM_TYPE_VECTOR,
   MEM_TYPE_PROCESS,
   MEM_TYPE_HASH_TABLE,
+  MEM_TYPE_FILE_HANDLE,
   MEM_TYPE_FRAME,
   MEM_TYPE_WINDOW
 };
@@ -2558,6 +2559,21 @@
     v->contents[i] = Qnil;
 
   return (struct Lisp_Hash_Table *) v;
+}
+
+
+struct Lisp_File_Handle *
+allocate_file_handle ()
+{
+  EMACS_INT len = VECSIZE (struct Lisp_File_Handle);
+  struct Lisp_Vector *v = allocate_vectorlike (len, MEM_TYPE_FILE_HANDLE);
+  EMACS_INT i;
+
+  for (i = 0; i < len; ++i)
+    v->contents[i] = Qnil;
+  v->size = len;
+
+  return (struct Lisp_File_Handle *) v;
 }
 
 
Index: src/data.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/data.c,v
retrieving revision 1.239
diff -w -U3 -r1.239 data.c
--- src/data.c	9 May 2004 00:49:06 -0000	1.239
+++ src/data.c	10 May 2004 05:51:49 -0000
@@ -93,7 +93,7 @@
 static Lisp_Object Qfloat, Qwindow_configuration, Qwindow;
 Lisp_Object Qprocess;
 static Lisp_Object Qcompiled_function, Qbuffer, Qframe, Qvector;
-static Lisp_Object Qchar_table, Qbool_vector, Qhash_table;
+static Lisp_Object Qchar_table, Qbool_vector, Qhash_table, Qfile_handle;
 static Lisp_Object Qsubrp, Qmany, Qunevalled;
 
 static Lisp_Object swap_in_symval_forwarding P_ ((Lisp_Object, Lisp_Object));
@@ -243,6 +243,8 @@
 	return Qframe;
       if (GC_HASH_TABLE_P (object))
 	return Qhash_table;
+      if (GC_FILE_HANDLEP (object))
+	return Qfile_handle;
       return Qvector;
 
     case Lisp_Float:
@@ -3227,6 +3229,7 @@
   Qchar_table = intern ("char-table");
   Qbool_vector = intern ("bool-vector");
   Qhash_table = intern ("hash-table");
+  Qfile_handle = intern ("file-handle");
 
   staticpro (&Qinteger);
   staticpro (&Qsymbol);
@@ -3246,6 +3249,7 @@
   staticpro (&Qchar_table);
   staticpro (&Qbool_vector);
   staticpro (&Qhash_table);
+  staticpro (&Qfile_handle);
 
   defsubr (&Sindirect_variable);
   defsubr (&Sinteractive_form);
Index: src/fileio.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/fileio.c,v
retrieving revision 1.503
diff -w -U3 -r1.503 fileio.c
--- src/fileio.c	4 May 2004 19:23:31 -0000	1.503
+++ src/fileio.c	10 May 2004 05:51:50 -0000
@@ -6365,6 +6365,152 @@
 }
 
 \f
+DEFUN ("file-handle-p", Ffile_handle_p, Sfile_handle_p, 1, 1, 0,
+       doc: /* Return t if OBJECT is a direct file handle.  */)
+     (object)
+     Lisp_Object object;
+{
+  if (FILE_HANDLEP (object))
+    return Qt;
+  return Qnil;
+}
+
+
+DEFUN ("file-handle-open", Ffile_handle_open, Sfile_handle_open,
+       2, 2, 0,
+       doc: /* Open a file handle for direct reading/writing. */)
+     (path, mode)
+     Lisp_Object path, mode;
+{
+  FILE *stream;
+  Lisp_Object handle, lispstream;
+  struct Lisp_File_Handle *lh;
+
+  if (! STRINGP (path) || ! STRINGP (mode))
+    return Qnil;
+
+  if (! Ffile_exists_p (path))
+    return Qnil;
+
+  stream = fopen(SDATA (path), SDATA (mode));
+  if (! stream)
+    return Qnil;
+
+  lh = allocate_file_handle ();
+
+  /* Arrange to close that file whether or not we get an error.
+     Also reset auto_saving to 0.  */
+  lispstream = Fcons (Qnil, Qnil);
+  XSETCARFASTINT (lispstream, (EMACS_UINT)stream >> 16);
+  XSETCDRFASTINT (lispstream, (EMACS_UINT)stream & 0xffff);
+
+  lh->handle = lispstream;
+
+  XSETFILE_HANDLE (handle, lh);
+  xassert (FILE_HANDLEP (handle));
+  xassert (XFILE_HANDLE (handle) == lh);
+
+  return handle;
+}
+
+DEFUN ("file-handle-close", Ffile_handle_close, Sfile_handle_close,
+       1, 1, 0,
+       doc: /* Close a direct file handle. */)
+     (handle)
+     Lisp_Object handle;
+{
+  FILE *stream;
+  Lisp_Object lispstream;
+  struct Lisp_File_Handle *lh;
+
+  if (! FILE_HANDLEP (handle))
+    return Qnil;
+
+  lh = XFILE_HANDLE(handle);
+
+  lispstream = lh->handle;
+  if (! CONSP(lispstream))
+    return Qnil;
+
+  stream = (FILE *) (XFASTINT (XCAR (lispstream)) << 16 |
+		     XFASTINT (XCDR (lispstream)));
+  lh->handle = Qnil;
+  if (! stream)
+    return Qnil;
+
+  fclose(stream);
+
+  return Qt;
+}
+
+DEFUN ("file-handle-read", Ffile_handle_read, Sfile_handle_read,
+       2, 2, 0,
+       doc: /* Close a direct file handle. */)
+     (handle, length)
+     Lisp_Object handle, length;
+{
+  FILE *stream;
+  Lisp_Object lispstream, data;
+  struct Lisp_File_Handle *lh;
+  unsigned char *buf;
+  int read;
+
+  if (! FILE_HANDLEP (handle))
+    return Qnil;
+
+  lh = XFILE_HANDLE(handle);
+
+  lispstream = lh->handle;
+  if (! CONSP(lispstream))
+    return Qnil;
+
+  stream = (FILE *) (XFASTINT (XCAR (lispstream)) << 16 |
+		     XFASTINT (XCDR (lispstream)));
+  if (! stream)
+    return Qnil;
+
+  buf = (unsigned char *) alloca (XFASTINT (length));
+  data = make_string (buf, XFASTINT (length));
+  read = fread(SDATA (data), 1, XFASTINT (length), stream);
+  if (read != XFASTINT (length))
+    return Fsubstring (data, make_number (0), make_number (read));
+
+  return data;
+}
+
+DEFUN ("file-handle-write", Ffile_handle_write, Sfile_handle_write,
+       2, 2, 0,
+       doc: /* Close a direct file handle. */)
+     (handle, data)
+     Lisp_Object handle, data;
+{
+  FILE *stream;
+  Lisp_Object lispstream;
+  struct Lisp_File_Handle *lh;
+  int wrote;
+
+  if (! FILE_HANDLEP (handle))
+    return Qnil;
+
+  lh = XFILE_HANDLE(handle);
+
+  lispstream = lh->handle;
+  if (! CONSP(lispstream))
+    return Qnil;
+
+  stream = (FILE *) (XFASTINT (XCAR (lispstream)) << 16 |
+		     XFASTINT (XCDR (lispstream)));
+  if (! stream)
+    return Qnil;
+
+  wrote = fwrite(SDATA (data), 1, SCHARS (data), stream);
+  if (wrote != SCHARS (data))
+    return Qnil;
+
+  return Qt;
+}
+
+\f
 void
 init_fileio_once ()
 {
@@ -6678,6 +6824,12 @@
 
   defsubr (&Sread_file_name_internal);
   defsubr (&Sread_file_name);
+
+  defsubr (&Sfile_handle_p);
+  defsubr (&Sfile_handle_open);
+  defsubr (&Sfile_handle_close);
+  defsubr (&Sfile_handle_read);
+  defsubr (&Sfile_handle_write);
 
 #ifdef unix
   defsubr (&Sunix_sync);
Index: src/lisp.h
===================================================================
RCS file: /cvsroot/emacs/emacs/src/lisp.h,v
retrieving revision 1.489
diff -w -U3 -r1.489 lisp.h
--- src/lisp.h	26 Apr 2004 21:26:17 -0000	1.489
+++ src/lisp.h	10 May 2004 05:51:54 -0000
@@ -267,7 +267,8 @@
   PVEC_BOOL_VECTOR = 0x10000,
   PVEC_BUFFER = 0x20000,
   PVEC_HASH_TABLE = 0x40000,
-  PVEC_TYPE_MASK = 0x7fe00
+  PVEC_FILE_HANDLE = 0x80000,
+  PVEC_TYPE_MASK = 0xffe00
 
 #if 0 /* This is used to make the value of PSEUDOVECTOR_FLAG available to
 	 GDB.  It doesn't work on OS Alpha.  Moved to a variable in
@@ -513,6 +514,16 @@
 #define XSETCHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_CHAR_TABLE))
 #define XSETBOOL_VECTOR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BOOL_VECTOR))
 
+struct Lisp_File_Handle
+  {
+    EMACS_INT size;
+    struct Lisp_Vector *v_next;
+    Lisp_Object handle;
+};
+
+#define XSETFILE_HANDLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_FILE_HANDLE))
+#define XFILE_HANDLE(a) ((struct Lisp_File_Handle *) XPNTR (a))
+
 /* Convenience macros for dealing with Lisp arrays.  */
 
 #define AREF(ARRAY, IDX)	XVECTOR ((ARRAY))->contents[IDX]
@@ -1421,6 +1432,8 @@
 #define GC_BOOL_VECTOR_P(x) GC_PSEUDOVECTORP (x, PVEC_BOOL_VECTOR)
 #define FRAMEP(x) PSEUDOVECTORP (x, PVEC_FRAME)
 #define GC_FRAMEP(x) GC_PSEUDOVECTORP (x, PVEC_FRAME)
+#define FILE_HANDLEP(x) PSEUDOVECTORP (x, PVEC_FILE_HANDLE)
+#define GC_FILE_HANDLEP(x) GC_PSEUDOVECTORP (x, PVEC_FILE_HANDLE)
 
 #define SUB_CHAR_TABLE_P(x) (CHAR_TABLE_P (x) && NILP (XCHAR_TABLE (x)->top))
 
@@ -2447,6 +2460,7 @@
 extern struct Lisp_Vector *allocate_vector P_ ((EMACS_INT));
 extern struct Lisp_Vector *allocate_other_vector P_ ((EMACS_INT));
 extern struct Lisp_Hash_Table *allocate_hash_table P_ ((void));
+extern struct Lisp_File_Handle *allocate_file_handle P_ ((void));
 extern struct window *allocate_window P_ ((void));
 extern struct frame *allocate_frame P_ ((void));
 extern struct Lisp_Process *allocate_process P_ ((void));
Index: src/print.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/print.c,v
retrieving revision 1.199
diff -w -U3 -r1.199 print.c
--- src/print.c	26 Apr 2004 21:56:26 -0000	1.199
+++ src/print.c	10 May 2004 05:51:57 -0000
@@ -1872,6 +1872,10 @@
 	  strout (buf, -1, -1, printcharfun, 0);
 	  PRINTCHAR ('>');
 	}
+      else if (FILE_HANDLEP (obj))
+	{
+	  strout ("#<file-handle>", -1, -1, printcharfun, 0);
+	}
       else if (BUFFERP (obj))
 	{
 	  if (NILP (XBUFFER (obj)->name))
Index: lisp/eshell/esh-io.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/eshell/esh-io.el,v
retrieving revision 1.8
diff -w -U3 -r1.8 esh-io.el
--- lisp/eshell/esh-io.el	1 Sep 2003 15:45:23 -0000	1.8
+++ lisp/eshell/esh-io.el	10 May 2004 05:51:57 -0000
@@ -260,6 +260,10 @@
 
    ;; If we were redirecting to a file, save the file and close the
    ;; buffer.
+   ((and (fboundp 'file-handle-p)
+	 (file-handle-p target))
+    (file-handle-close target))
+
    ((markerp target)
     (let ((buf (marker-buffer target)))
       (when buf                         ; somebody's already killed it!
@@ -337,6 +341,11 @@
 	 (if (nth 2 redir)
 	     (funcall (nth 1 redir) mode)
 	   (nth 1 redir))
+       (if (fboundp 'file-handle-open)
+	   (cond ((eq mode 'overwrite)
+		  (file-handle-open target "w"))
+		 ((eq mode 'append)
+		  (file-handle-open target "a")))
        (let* ((exists (get-file-buffer target))
 	      (buf (find-file-noselect target t)))
 	 (with-current-buffer buf
@@ -348,7 +357,7 @@
 		  (erase-buffer))
 		 ((eq mode 'append)
 		  (goto-char (point-max))))
-	   (point-marker))))))
+	     (point-marker)))))))
    ((or (bufferp target)
 	(and (boundp 'eshell-buffer-shorthand)
 	     (symbol-value 'eshell-buffer-shorthand)
@@ -461,6 +470,11 @@
   "Insert OBJECT into TARGET.
 Returns what was actually sent, or nil if nothing was sent."
   (cond
+   ((and (fboundp 'file-handle-p)
+	 (file-handle-p target))
+    (setq object (eshell-stringify object))
+    (file-handle-write target object))
+
    ((functionp target)
     (funcall target object))

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  5:59 request for review: Doing direct file I/O in Emacs Lisp John Wiegley
@ 2004-05-10  6:52 ` Kim F. Storm
  2004-05-10  8:27 ` David Kastrup
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 37+ messages in thread
From: Kim F. Storm @ 2004-05-10  6:52 UTC (permalink / raw)
  Cc: emacs-devel

John Wiegley <johnw@gnu.org> writes:

> The following patch implements a file-handle interface for Emacs Lisp,
> which allows files to be directly opened and read/written to without
> an intervening buffer.  Eshell can now use this, for example, to
> greatly speed up output redirection (by several orders of magnitude).
> 
> It is a simple interface that reads in strings, given a length, and
> writes strings by examining their length:
> 
>   (let ((handle (file-handle-open "/tmp/some-file" "w")))
>     (file-handle-write handle "Test data\n")
>     (file-handle-close handle)
> 
>     (setq handle (file-handle-open "/tmp/some-file" "r"))
>     (message (file-handle-read handle 128))
>     (file-handle-close handle))
> 

This seems like a great idea, but it is not up to me to decide.

Some comments:

The doc string for -open need improvement.  I'm not sure about using
"r", "w", etc for the mode; using things like 'read 'write and 'append
seems more lisp like.  But I don't prefer either.

The doc strings for -read and -write are bad.

Instead of an explicit CONS as the handle, I would rather declare
the file handle like this:

struct Lisp_File_Handle
  {
    EMACS_INT size;
    struct Lisp_Vector *v_next;
    Lisp_Object handle_hi;
    Lisp_Object handle_lo;
    Lisp_Object file_name;
    Lisp_Object open_mode;
};

and use handle_hi and handle_lo directly instead of all the CAR and
CDR'ing.

When you close the file handle, open_mode is set to nil.

The file_name and open_mode can be used to improve the printing of
the file handle as in

      else if (FILE_HANDLEP (obj))
	{
	  strout ("#<file-handle ", -1, -1, printcharfun, 0);
          if (!NILP (XFILE_HANDLE (obj)->open_mode))
            {
	      strout (XFILE_HANDLE (obj)->file_name, -1, -1, printcharfun, 0);
	      strout (" ", -1, -1, printcharfun, 0);
	      strout (XFILE_HANDLE (obj)->open_mode, -1, -1, printcharfun, 0);
            }
          else
            strout (" closed", -1, -1, printcharfun, 0);
          PRINTCHAR (')>');
	}

You don't address the issue of multibyte in -read and -write.
It seems that you always assume things to be unibyte.

I don't know what the right thing to do is, but maybe you should
at least signal an error if things are not unibyte ?  

Otherwise, you should associate a coding system with the file-handle
and use that for read and write.  As a first shot at this, you could
add the coding system as an optional third arg to -open, and assume
unibyte/binary if no coding system was specified.

A -seek operation would be nice I guess.
And a -position (aka ftell) would be nice too.

-- 
Kim F. Storm <storm@cua.dk> http://www.cua.dk

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  5:59 request for review: Doing direct file I/O in Emacs Lisp John Wiegley
  2004-05-10  6:52 ` Kim F. Storm
@ 2004-05-10  8:27 ` David Kastrup
  2004-05-10 14:21   ` Stefan Monnier
  2004-05-11  9:23   ` John Wiegley
  2004-05-10  9:38 ` Andreas Schwab
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 37+ messages in thread
From: David Kastrup @ 2004-05-10  8:27 UTC (permalink / raw)
  Cc: emacs-devel

John Wiegley <johnw@gnu.org> writes:

> The following patch implements a file-handle interface for Emacs Lisp,
> which allows files to be directly opened and read/written to without
> an intervening buffer.  Eshell can now use this, for example, to
> greatly speed up output redirection (by several orders of magnitude).
> 
> It is a simple interface that reads in strings, given a length, and
> writes strings by examining their length:
> 
>   (let ((handle (file-handle-open "/tmp/some-file" "w")))
>     (file-handle-write handle "Test data\n")
>     (file-handle-close handle)
> 
>     (setq handle (file-handle-open "/tmp/some-file" "r"))
>     (message (file-handle-read handle 128))
>     (file-handle-close handle))
> 
> Please post comments here, or mail them to johnw@gnu.org.

I had posted a proposal to change start-process instead some time ago
that would have allowed for efficient I/O redirection under eshell.
Have you seen that proposal?  That would be, IMO, a much more Emacsy
solution.  The above looks like being quite ugly low level.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  5:59 request for review: Doing direct file I/O in Emacs Lisp John Wiegley
  2004-05-10  6:52 ` Kim F. Storm
  2004-05-10  8:27 ` David Kastrup
@ 2004-05-10  9:38 ` Andreas Schwab
  2004-05-10 11:29   ` Eli Zaretskii
  2004-05-10 14:19 ` Stefan Monnier
  2004-05-10 17:54 ` Richard Stallman
  4 siblings, 1 reply; 37+ messages in thread
From: Andreas Schwab @ 2004-05-10  9:38 UTC (permalink / raw)
  Cc: emacs-devel

John Wiegley <johnw@gnu.org> writes:

> +DEFUN ("file-handle-open", Ffile_handle_open, Sfile_handle_open,
> +       2, 2, 0,
> +       doc: /* Open a file handle for direct reading/writing. */)
> +     (path, mode)
> +     Lisp_Object path, mode;
> +{
> +  FILE *stream;
> +  Lisp_Object handle, lispstream;
> +  struct Lisp_File_Handle *lh;
> +
> +  if (! STRINGP (path) || ! STRINGP (mode))
> +    return Qnil;
> +
> +  if (! Ffile_exists_p (path))
> +    return Qnil;
> +
> +  stream = fopen(SDATA (path), SDATA (mode));
> +  if (! stream)
> +    return Qnil;

You should signal an error instead of just returning nil.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux AG, Maxfeldstraße 5, 90409 Nürnberg, Germany
Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 11:29   ` Eli Zaretskii
@ 2004-05-10 11:23     ` Andreas Schwab
  2004-05-10 15:04       ` Eli Zaretskii
  0 siblings, 1 reply; 37+ messages in thread
From: Andreas Schwab @ 2004-05-10 11:23 UTC (permalink / raw)
  Cc: johnw, emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

>> From: Andreas Schwab <schwab@suse.de>
>> Date: Mon, 10 May 2004 11:38:09 +0200
>> 
>> You should signal an error instead of just returning nil.
>
> Why?  Shouldn't this be up to a higher level of code, the one that
> calls file-handle-open, and knows what it is going to do with the
> handle?

How can you find out which error happend?  There are already three
different cases where the function just returns nil, and the caller would
have to repeat the very same tests to find out which error path was taken.
Argument type checks should just use CHECK_FOO.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux AG, Maxfeldstraße 5, 90409 Nürnberg, Germany
Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  9:38 ` Andreas Schwab
@ 2004-05-10 11:29   ` Eli Zaretskii
  2004-05-10 11:23     ` Andreas Schwab
  0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2004-05-10 11:29 UTC (permalink / raw)
  Cc: johnw, emacs-devel

> From: Andreas Schwab <schwab@suse.de>
> Date: Mon, 10 May 2004 11:38:09 +0200
> 
> You should signal an error instead of just returning nil.

Why?  Shouldn't this be up to a higher level of code, the one that
calls file-handle-open, and knows what it is going to do with the
handle?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  5:59 request for review: Doing direct file I/O in Emacs Lisp John Wiegley
                   ` (2 preceding siblings ...)
  2004-05-10  9:38 ` Andreas Schwab
@ 2004-05-10 14:19 ` Stefan Monnier
  2004-05-10 17:46   ` Oliver Scholz
  2004-05-10 17:54 ` Richard Stallman
  4 siblings, 1 reply; 37+ messages in thread
From: Stefan Monnier @ 2004-05-10 14:19 UTC (permalink / raw)
  Cc: emacs-devel

> The following patch implements a file-handle interface for Emacs Lisp,
> which allows files to be directly opened and read/written to without
> an intervening buffer.  Eshell can now use this, for example, to
> greatly speed up output redirection (by several orders of magnitude).

Doesn't seem like a bad idea, but I'd rather postpone it to
after-the-release.


        Stefan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  8:27 ` David Kastrup
@ 2004-05-10 14:21   ` Stefan Monnier
  2004-05-10 15:59     ` David Kastrup
  2004-05-11  9:23   ` John Wiegley
  1 sibling, 1 reply; 37+ messages in thread
From: Stefan Monnier @ 2004-05-10 14:21 UTC (permalink / raw)
  Cc: John Wiegley, emacs-devel

> I had posted a proposal to change start-process instead some time ago
> that would have allowed for efficient I/O redirection under eshell.

The two don't have to be competing.  Instead I expect they'd be
used together (by extending your proposal to allow a file-handle
as "output port").


        Stefan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 11:23     ` Andreas Schwab
@ 2004-05-10 15:04       ` Eli Zaretskii
  0 siblings, 0 replies; 37+ messages in thread
From: Eli Zaretskii @ 2004-05-10 15:04 UTC (permalink / raw)
  Cc: johnw, emacs-devel

> From: Andreas Schwab <schwab@suse.de>
> Date: Mon, 10 May 2004 13:23:12 +0200
> 
> How can you find out which error happend?  There are already three
> different cases where the function just returns nil, and the caller would
> have to repeat the very same tests to find out which error path was taken.

You are right, we do throw errors in other file-related primitives, so
I guess we should do that here as well.

> Argument type checks should just use CHECK_FOO.

Agreed.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 14:21   ` Stefan Monnier
@ 2004-05-10 15:59     ` David Kastrup
  2004-05-10 16:36       ` Stefan Monnier
  0 siblings, 1 reply; 37+ messages in thread
From: David Kastrup @ 2004-05-10 15:59 UTC (permalink / raw)
  Cc: John Wiegley, emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> > I had posted a proposal to change start-process instead some time ago
> > that would have allowed for efficient I/O redirection under eshell.
> 
> The two don't have to be competing.  Instead I expect they'd be
> used together (by extending your proposal to allow a file-handle
> as "output port").

There is not much of an incentive to have explicit file handles if
you can establish all of the necessary redirections without them.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 15:59     ` David Kastrup
@ 2004-05-10 16:36       ` Stefan Monnier
  2004-05-10 17:00         ` David Kastrup
  0 siblings, 1 reply; 37+ messages in thread
From: Stefan Monnier @ 2004-05-10 16:36 UTC (permalink / raw)
  Cc: John Wiegley, emacs-devel

>> The two don't have to be competing.  Instead I expect they'd be
>> used together (by extending your proposal to allow a file-handle
>> as "output port").
> There is not much of an incentive to have explicit file handles if
> you can establish all of the necessary redirections without them.

Piping the output of one process to the input of another, maybe?


        Stefan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 16:36       ` Stefan Monnier
@ 2004-05-10 17:00         ` David Kastrup
  2004-05-10 17:22           ` Stefan Monnier
  0 siblings, 1 reply; 37+ messages in thread
From: David Kastrup @ 2004-05-10 17:00 UTC (permalink / raw)
  Cc: John Wiegley, emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> >> The two don't have to be competing.  Instead I expect they'd be
> >> used together (by extending your proposal to allow a file-handle
> >> as "output port").
> > There is not much of an incentive to have explicit file handles if
> > you can establish all of the necessary redirections without them.
> 
> Piping the output of one process to the input of another, maybe?

Academical right now.  You know the complicated procedure of pipe(2),
fork, selective closes and/or dup2 calls needed to properly create a
working pipeline?  I am afraid that "file-handle" does not buy us
that at all.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 17:00         ` David Kastrup
@ 2004-05-10 17:22           ` Stefan Monnier
  0 siblings, 0 replies; 37+ messages in thread
From: Stefan Monnier @ 2004-05-10 17:22 UTC (permalink / raw)
  Cc: John Wiegley, emacs-devel

>> >> The two don't have to be competing.  Instead I expect they'd be
>> >> used together (by extending your proposal to allow a file-handle
>> >> as "output port").
>> > There is not much of an incentive to have explicit file handles if
>> > you can establish all of the necessary redirections without them.
>> 
>> Piping the output of one process to the input of another, maybe?

> Academical right now.  You know the complicated procedure of pipe(2),
> fork, selective closes and/or dup2 calls needed to properly create a
> working pipeline?  I am afraid that "file-handle" does not buy us
> that at all.

Obviously, John's code won't give us pipes for free.
But without both file handles and your suggested changes, I don't see how
we'll ever get eshell to fully support pipes.  I.e. it's not sufficient but
it's necessary.


        Stefan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 14:19 ` Stefan Monnier
@ 2004-05-10 17:46   ` Oliver Scholz
  2004-05-10 18:21     ` Stefan Monnier
  2004-05-11 12:22     ` Richard Stallman
  0 siblings, 2 replies; 37+ messages in thread
From: Oliver Scholz @ 2004-05-10 17:46 UTC (permalink / raw)
  Cc: emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

>> The following patch implements a file-handle interface for Emacs Lisp,
>> which allows files to be directly opened and read/written to without
>> an intervening buffer.  Eshell can now use this, for example, to
>> greatly speed up output redirection (by several orders of magnitude).
>
> Doesn't seem like a bad idea, but I'd rather postpone it to
> after-the-release.
[...]

FWIW … while working on an RTF reader I found it very annoying that I
have to insert the entire RTF file into a buffer, then tokenize it
character-wise, then delete that parts of the buffer that are
recognized as markup, applying text properties to the remaining text
as appropriate.

Being able to fetch characters from a stream, and deal with them
through several layers of abstraction, and then inserting the actual
text with properties into a buffer would be much nicer and cleaner.

Thus I have a low-level feature request (for after-the-release):
provide an interface to the file visiting routines such that Lisp code
which is meant to deal with formatted text can handle the file’s
content as a character stream (of course this makes sense only if it
is efficient).

For example, add an optional CHARSTREAMP element to the elements of
‘format-alist’. If it is non-nil, FROM-FN and TO-FN get a file handler
as additional argument and take the full responsibility for writing to
the file/inserting the file’s content into the buffer.

    Oliver
-- 
Oliver Scholz               22 Floréal an 212 de la Révolution
Taunusstr. 25               Liberté, Egalité, Fraternité!
60329 Frankfurt a. M.       http://www.jungdemokratenhessen.de
Tel. (069) 97 40 99 42      http://www.jdjl.org

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  5:59 request for review: Doing direct file I/O in Emacs Lisp John Wiegley
                   ` (3 preceding siblings ...)
  2004-05-10 14:19 ` Stefan Monnier
@ 2004-05-10 17:54 ` Richard Stallman
  2004-05-11  9:20   ` John Wiegley
  4 siblings, 1 reply; 37+ messages in thread
From: Richard Stallman @ 2004-05-10 17:54 UTC (permalink / raw)
  Cc: emacs-devel

I deliberately decided not to add this feature to Emacs because it
tends to lead to a demand for lots more features to "make it
complete", and that ultimately leads to a lot more complexity.  For
most purposes, reading files into buffers is just as good.

Why do you think we should add this feature now?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 17:46   ` Oliver Scholz
@ 2004-05-10 18:21     ` Stefan Monnier
  2004-05-10 22:40       ` Oliver Scholz
  2004-05-11 12:22     ` Richard Stallman
  1 sibling, 1 reply; 37+ messages in thread
From: Stefan Monnier @ 2004-05-10 18:21 UTC (permalink / raw)
  Cc: emacs-devel

> FWIW … while working on an RTF reader I found it very annoying that I
> have to insert the entire RTF file into a buffer, then tokenize it
> character-wise, then delete that parts of the buffer that are
> recognized as markup, applying text properties to the remaining text
> as appropriate.

> Being able to fetch characters from a stream, and deal with them
> through several layers of abstraction, and then inserting the actual
> text with properties into a buffer would be much nicer and cleaner.

While I like code to be functional, I must say that from a memory management
point of view, buffer modifications are significantly more efficient than
string manipulation.
So the other approach you suggest might indeed be cleaner, but it might
also turn out to stress Emacs's GC too hard and the performance might suck.


        Stefan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 18:21     ` Stefan Monnier
@ 2004-05-10 22:40       ` Oliver Scholz
  0 siblings, 0 replies; 37+ messages in thread
From: Oliver Scholz @ 2004-05-10 22:40 UTC (permalink / raw)
  Cc: emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

[...]
> While I like code to be functional, I must say that from a memory management
> point of view, buffer modifications are significantly more efficient than
> string manipulation.
> So the other approach you suggest might indeed be cleaner, but it might
> also turn out to stress Emacs's GC too hard and the performance might suck.
[...]

Ah, o.k. Too bad. Thank you for the clarification, anyways.

    Oliver
-- 
Oliver Scholz               23 Floréal an 212 de la Révolution
Ostendstr. 61               Liberté, Egalité, Fraternité!
60314 Frankfurt a. M.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 17:54 ` Richard Stallman
@ 2004-05-11  9:20   ` John Wiegley
  2004-05-12 19:41     ` Richard Stallman
  0 siblings, 1 reply; 37+ messages in thread
From: John Wiegley @ 2004-05-11  9:20 UTC (permalink / raw)

Richard Stallman <rms@gnu.org> writes:

> I deliberately decided not to add this feature to Emacs because it
> tends to lead to a demand for lots more features to "make it
> complete", and that ultimately leads to a lot more complexity.  For
> most purposes, reading files into buffers is just as good.
>
> Why do you think we should add this feature now?

I understand that rationale, but in this case Eshell has a crying need
for being able to directly write to files.  It makes output
redirection thousands of times faster, and also makes it possible to
output to device files and named pipes.

At present, output redirection is so slow that I have to spawn zsh to
run commands which generate over 10,000 lines of output (such as
debugging printfs).  I have always deplored this, since Eshell
satisfies me in almost every other respect.

I don't mind even if we use a custom interface, strictly for Eshell's
needs; but the file handle API seemed so simple, I thought others
might be interested in it too.

John

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10  8:27 ` David Kastrup
  2004-05-10 14:21   ` Stefan Monnier
@ 2004-05-11  9:23   ` John Wiegley
  2004-05-11 10:22     ` David Kastrup
  1 sibling, 1 reply; 37+ messages in thread
From: John Wiegley @ 2004-05-11  9:23 UTC (permalink / raw)

David Kastrup <dak@gnu.org> writes:

> I had posted a proposal to change start-process instead some time
> ago that would have allowed for efficient I/O redirection under
> eshell.  Have you seen that proposal?  That would be, IMO, a much
> more Emacsy solution.  The above looks like being quite ugly low
> level.

I would be interested in seeing it, though it would not help the
output redirection case (since Eshell's "cat" is in Lisp).  And
actually, I don't get bitten by the speed of piping in Eshell --
though maybe I just didn't notice how slow it was.

As for the comments from others: thank you, I will be reading through
them in more detail soon.

John

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-11  9:23   ` John Wiegley
@ 2004-05-11 10:22     ` David Kastrup
  0 siblings, 0 replies; 37+ messages in thread
From: David Kastrup @ 2004-05-11 10:22 UTC (permalink / raw)
  Cc: emacs-devel

John Wiegley <johnw@gnu.org> writes:

> David Kastrup <dak@gnu.org> writes:
> 
> > I had posted a proposal to change start-process instead some time
> > ago that would have allowed for efficient I/O redirection under
> > eshell.  Have you seen that proposal?  That would be, IMO, a much
> > more Emacsy solution.  The above looks like being quite ugly low
> > level.
> 
> I would be interested in seeing it, though it would not help the
> output redirection case (since Eshell's "cat" is in Lisp).

Ok, that _is_ a consideration I was not thinking of.  Of course, you
can't solve that with start-process alone.

> And actually, I don't get bitten by the speed of piping in Eshell --
> though maybe I just didn't notice how slow it was.

What's much more relevant is that it will merge stdout and stderr:
that makes it almost useless.

> As for the comments from others: thank you, I will be reading
> through them in more detail soon.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-10 17:46   ` Oliver Scholz
  2004-05-10 18:21     ` Stefan Monnier
@ 2004-05-11 12:22     ` Richard Stallman
  1 sibling, 0 replies; 37+ messages in thread
From: Richard Stallman @ 2004-05-11 12:22 UTC (permalink / raw)
  Cc: monnier, emacs-devel

    FWIW ? while working on an RTF reader I found it very annoying that I
    have to insert the entire RTF file into a buffer, then tokenize it
    character-wise, then delete that parts of the buffer that are
    recognized as markup, applying text properties to the remaining text
    as appropriate.

    Being able to fetch characters from a stream, and deal with them
    through several layers of abstraction, and then inserting the actual
    text with properties into a buffer would be much nicer and cleaner.

I expect this would be much slower, because you'd need to have Lisp
code that handles characters one by one.  Parsing them in the buffer
is much faster, when you can use one call to skip-chars-forward and
one call to buffer-substring to process a whole token at once, without
ever needing a Lisp-level loop to process the token's characters one
by one.

Also, character set detection works more reliably on the entire file.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-11  9:20   ` John Wiegley
@ 2004-05-12 19:41     ` Richard Stallman
  2004-05-13  7:59       ` Kai Grossjohann
  2004-05-13 22:50       ` John Wiegley
  0 siblings, 2 replies; 37+ messages in thread
From: Richard Stallman @ 2004-05-12 19:41 UTC (permalink / raw)
  Cc: emacs-devel

    I understand that rationale, but in this case Eshell has a crying need
    for being able to directly write to files.  It makes output
    redirection thousands of times faster, and also makes it possible to
    output to device files and named pipes.

Could you tell me more?  What data does eshell want to write into
files?  Where is the data coming from?  Is it coming from subprocesses
started by eshell, or is it generated from eshell itself?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-12 19:41     ` Richard Stallman
@ 2004-05-13  7:59       ` Kai Grossjohann
  2004-05-14  9:21         ` Richard Stallman
  2004-05-13 22:50       ` John Wiegley
  1 sibling, 1 reply; 37+ messages in thread
From: Kai Grossjohann @ 2004-05-13  7:59 UTC (permalink / raw)

Richard Stallman <rms@gnu.org> writes:

>     I understand that rationale, but in this case Eshell has a crying need
>     for being able to directly write to files.  It makes output
>     redirection thousands of times faster, and also makes it possible to
>     output to device files and named pipes.
>
> Could you tell me more?  What data does eshell want to write into
> files?  Where is the data coming from?  Is it coming from subprocesses
> started by eshell, or is it generated from eshell itself?

Both.  Like any shell, eshell provides builtins and it is able to
invoke subprocesses.  For example, you can type "find-file foo" as a
command to eshell, and eshell will then invoke the builtin find-file.

Just as a random command example, perhaps somebody would like to type
"ls > foo" at a shell prompt.  Surely that person would also use the
same command from eshell.  In eshell, ls is a builtin command, so this
is an example of redirection of output generated from eshell itself.

As another random example, perhaps people type commands similar to
"awk '{print $1}' bar > bar.out".  awk is not a builtin in eshell, so
this is an example of redirection of output generated from a
subprocess.

I would very much like to combine input and output redirection with
builtin and external commands, just as it is possible in other shells:
if I type "x | y | z > a" at a shell, then I don't normally think
about whether x, y, z are builtin commands.

Kai

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-12 19:41     ` Richard Stallman
  2004-05-13  7:59       ` Kai Grossjohann
@ 2004-05-13 22:50       ` John Wiegley
  2004-05-14 21:02         ` Richard Stallman
  1 sibling, 1 reply; 37+ messages in thread
From: John Wiegley @ 2004-05-13 22:50 UTC (permalink / raw)

Richard Stallman <rms@gnu.org> writes:

> Could you tell me more?  What data does eshell want to write into
> files?  Where is the data coming from?  Is it coming from
> subprocesses started by eshell, or is it generated from eshell
> itself?

Like Kai said, output comes both from eshell (Lisp) and subprocesses.

Requiring all data to pass through a buffer, in order to save it to
disk, is difficult to handle efficiently.  Because the whole file must
be stored in the buffer before being saved, it consumes unnecessary
amounts of memory and processor time.

One could make the argument that Emacs shouldn't be doing the work of
a shell, and so shouldn't need this kind of efficiency.  But having a
command shell integrated with Lisp -- that works on all platforms --
has been so useful to me, that I'd really like it to be more efficient
if it's not too much trouble.

John

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-13  7:59       ` Kai Grossjohann
@ 2004-05-14  9:21         ` Richard Stallman
  2004-05-14 10:42           ` Kai Grossjohann
  2004-05-14 21:43           ` John Wiegley
  0 siblings, 2 replies; 37+ messages in thread
From: Richard Stallman @ 2004-05-14  9:21 UTC (permalink / raw)
  Cc: emacs-devel

    Both.  Like any shell, eshell provides builtins and it is able to
    invoke subprocesses.  For example, you can type "find-file foo" as a
    command to eshell, and eshell will then invoke the builtin find-file.

When eshell sees a redirect from a builtin, I think it would be most
efficient to produce the output in a buffer and write it with
write-region into the file.

When it is a matter of a subprocess, what could be useful is a feature
for start-process to specify files for additional descriptors.
That would be a completely different feature.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-14  9:21         ` Richard Stallman
@ 2004-05-14 10:42           ` Kai Grossjohann
  2004-05-15  8:53             ` Richard Stallman
  2004-05-14 21:43           ` John Wiegley
  1 sibling, 1 reply; 37+ messages in thread
From: Kai Grossjohann @ 2004-05-14 10:42 UTC (permalink / raw)
  Cc: emacs-devel

Richard Stallman <rms@gnu.org> writes:

>     Both.  Like any shell, eshell provides builtins and it is able to
>     invoke subprocesses.  For example, you can type "find-file foo" as a
>     command to eshell, and eshell will then invoke the builtin find-file.
>
> When eshell sees a redirect from a builtin, I think it would be most
> efficient to produce the output in a buffer and write it with
> write-region into the file.

What about "ls foo | grep bla"?  Then the output isn't a file.

> When it is a matter of a subprocess, what could be useful is a feature
> for start-process to specify files for additional descriptors.
> That would be a completely different feature.

That would be an interesting feature.  But I think there is also the
need to send output from a process to a Lisp command, and to send
output from a Lisp command to a process.  This way, one can do:

    shell_command | lisp_builtin | shell_command_2

WDYT?

But talk is cheap...
Kai

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-13 22:50       ` John Wiegley
@ 2004-05-14 21:02         ` Richard Stallman
  0 siblings, 0 replies; 37+ messages in thread
From: Richard Stallman @ 2004-05-14 21:02 UTC (permalink / raw)
  Cc: emacs-devel

    Requiring all data to pass through a buffer, in order to save it to
    disk, is difficult to handle efficiently.  Because the whole file must
    be stored in the buffer before being saved, 

write-region can append to a file.  That's probably the easiest
way to handle eshell builtins.

write-region can also append the contents of a string.  So you don't
have to construct the builtin's output in a buffer--you can do it
in a string.  However, a buffer will be more efficient in most
cases.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-14  9:21         ` Richard Stallman
  2004-05-14 10:42           ` Kai Grossjohann
@ 2004-05-14 21:43           ` John Wiegley
  2004-05-15 18:33             ` Richard Stallman
  1 sibling, 1 reply; 37+ messages in thread
From: John Wiegley @ 2004-05-14 21:43 UTC (permalink / raw)

Richard Stallman <rms@gnu.org> writes:

> When eshell sees a redirect from a builtin, I think it would be most
> efficient to produce the output in a buffer and write it with
> write-region into the file.

Below are the results of comparing write-region to the file-handle
API.  This test used Eshell's builtin cat to write 21MB to another
file.

  ~74s  save-buffer (the current method)
  ~82s  file-handle-write
 ~250s  write-region [with call to `message' disabled]
 ~318s  write-region

It is interesting that write-region is by far the most inefficient,
and that my file-handle API is not the fastest method.  I therefore
retract my earlier proposal.

However, the real slowdown is with start-process, as others mentioned.
Here are the times for doing the same cat, but using the system's cat:

 ~952s  file-handle-write
  ...s  save-buffer (the current method)

In this case, save-buffer took so long that after 20 minutes Emacs
hung (for some other reason), and I didn't have sufficient patience to
run the test again.

> When it is a matter of a subprocess, what could be useful is a
> feature for start-process to specify files for additional
> descriptors.  That would be a completely different feature.

This would be very useful.

John

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-14 10:42           ` Kai Grossjohann
@ 2004-05-15  8:53             ` Richard Stallman
  2004-05-15 16:27               ` Kai Grossjohann
  0 siblings, 1 reply; 37+ messages in thread
From: Richard Stallman @ 2004-05-15  8:53 UTC (permalink / raw)
  Cc: emacs-devel

    > When eshell sees a redirect from a builtin, I think it would be most
    > efficient to produce the output in a buffer and write it with
    > write-region into the file.

    What about "ls foo | grep bla"?  Then the output isn't a file.

Then you could put the ls output in a buffer and feed it to grep.
That would work fine.

    That would be an interesting feature.  But I think there is also the
    need to send output from a process to a Lisp command, and to send
    output from a Lisp command to a process.  This way, one can do:

	shell_command | lisp_builtin | shell_command_2

You can do this with the existing facilities of start-process.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-15  8:53             ` Richard Stallman
@ 2004-05-15 16:27               ` Kai Grossjohann
  2004-05-16 13:20                 ` Richard Stallman
  0 siblings, 1 reply; 37+ messages in thread
From: Kai Grossjohann @ 2004-05-15 16:27 UTC (permalink / raw)


Richard Stallman <rms@gnu.org> writes:

>     > When eshell sees a redirect from a builtin, I think it would be most
>     > efficient to produce the output in a buffer and write it with
>     > write-region into the file.
>
>     What about "ls foo | grep bla"?  Then the output isn't a file.
>
> Then you could put the ls output in a buffer and feed it to grep.
> That would work fine.

But then, you would have to wait until ls is finished.  It would be
very useful to be able to see partial results while the command is
running.  (In this particular case, it is unlikely that the builtin
takes a lot of time, but it is only an example.)

Kai

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-14 21:43           ` John Wiegley
@ 2004-05-15 18:33             ` Richard Stallman
  2004-05-15 21:36               ` John Wiegley
  0 siblings, 1 reply; 37+ messages in thread
From: Richard Stallman @ 2004-05-15 18:33 UTC (permalink / raw)
  Cc: emacs-devel

    Below are the results of comparing write-region to the file-handle
    API.  This test used Eshell's builtin cat to write 21MB to another
    file.

      ~74s  save-buffer (the current method)
      ~82s  file-handle-write
     ~250s  write-region [with call to `message' disabled]
     ~318s  write-region

    It is interesting that write-region is by far the most inefficient,
    and that my file-handle API is not the fastest method.

Since save-buffer calls write-region, both the fastest and the slowest
method use write-region.  What is the difference between these two
methods?

I wonder if it has to do with whether a coding system is being
used, or whether a coding system is being detected automatically.

    However, the real slowdown is with start-process, as others mentioned.
    Here are the times for doing the same cat, but using the system's cat:

Sorry, I do not understand what that means.

     ~952s  file-handle-write
      ...s  save-buffer (the current method)

I can't understand what alternatives you have compared.
Could you please explain them more concretely?

start-process takes a certain amount of time, but it should be a fixed
amount, and less than a second.  How many times did you start a
process here?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-15 18:33             ` Richard Stallman
@ 2004-05-15 21:36               ` John Wiegley
  2004-05-15 22:13                 ` David Kastrup
  2004-05-17 11:04                 ` Richard Stallman
  0 siblings, 2 replies; 37+ messages in thread
From: John Wiegley @ 2004-05-15 21:36 UTC (permalink / raw)

Richard Stallman <rms@gnu.org> writes:

> Since save-buffer calls write-region, both the fastest and the
> slowest method use write-region.  What is the difference between
> these two methods?

Hmm.. that IS curious.  In the save-buffer case, I call
insert-and-inherit in a temp buffer thousands of times, then one call
to save-buffer.  In the write-region case, I am calling write-region
thousands of times.  I think the difference is the constant filesystem
access in the latter case.

>     However, the real slowdown is with start-process, as others
>     mentioned.  Here are the times for doing the same cat, but using
>     the system's cat:
>
> Sorry, I do not understand what that means.
>
>      ~952s  file-handle-write
>       ...s  save-buffer (the current method)
>
> I can't understand what alternatives you have compared.
> Could you please explain them more concretely?

If I start a process with start-process (/usr/bin/cat) and redirect
its output to a file, it is far slower than if I simply output the
same data to a file (eshell/cat) -- even though the resulting "output"
in both cases is the same.  Why is receiving output via a process
sentinel so slow?

> start-process takes a certain amount of time, but it should be a
> fixed amount, and less than a second.  How many times did you start
> a process here?

I am only starting the system cat once.

John

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-15 21:36               ` John Wiegley
@ 2004-05-15 22:13                 ` David Kastrup
  2004-05-16  6:41                   ` Eli Zaretskii
  2004-05-17 11:04                 ` Richard Stallman
  1 sibling, 1 reply; 37+ messages in thread
From: David Kastrup @ 2004-05-15 22:13 UTC (permalink / raw)
  Cc: emacs-devel

John Wiegley <johnw@gnu.org> writes:

> If I start a process with start-process (/usr/bin/cat) and redirect
> its output to a file, it is far slower than if I simply output the
> same data to a file (eshell/cat) -- even though the resulting
> "output" in both cases is the same.  Why is receiving output via a
> process sentinel so slow?

Use a multiprocessor machine.

Part of the reason is the same because of which
process-adaptive-read-buffering exists:

cat produces some output.  As soon as one line (or whatever unit) is
full, the operating system intervenes and schedules Emacs.  Emacs
processes one line of output, then checks whether it can run timers,
whatever.  If Emacs finally has decided it can't do anything more
useful, it puts itself to sleep, reading on the pipe.  Then the
operating system wakes up cat again, for another single line.

The main fault, in my opinion, lies with the "low-latency" operating
system that schedules away the CPU from the writing process as soon as
it has produced any output.  That makes pipes pretty inefficient.  If
you have a multiprocessor machine, a simple job like "cat" can easily
stuff the pipe completely while Emacs is processing the last chunk.
On a uniprocessor machine, this does not happen since cat does not
even get the tiny amount of CPU power necessary to fill the pipe.

The problem is that I/O using "select" is ready the moment a _single_
byte is available.

Perhaps one would need some nicer system calls for telling Linux "ok,
wake me up immediately if any pipe is _full_.  And wake me up
immediately if there is input on some of [list of files].  Other than
that, only wake me up if there is input and no other process is
wanting the CPU".  So we'd need to tell the operating system how
urgent we want what amount of data on what input to be scheduled for
processing.

process-adaptive-read-buffering tries to fudge around this problem.  I
have some feeling that it might still be buggy.  I seem to remember
some inconclusive reports where larger delays occured, maybe under
MSWindows.

Basically, reports indicate that even with
process-adaptive-read-buffering one is maybe 30% slower than if the
command gets started with a trailing "|dd obs=8k" pipeline, but still
quite faster than if you don't use  process-adaptive-read-buffering.

A mess.

Anyway, it might be worth optimizing and profiling Emacs for good
typical filter routine performance even when small data chunks are
involved.  The more administrational overhead Emacs tries to do on
each little wakeup, the slower we get.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-15 22:13                 ` David Kastrup
@ 2004-05-16  6:41                   ` Eli Zaretskii
  2004-05-16 17:46                     ` David Kastrup
  0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2004-05-16  6:41 UTC (permalink / raw)
  Cc: emacs-devel

> From: David Kastrup <dak@gnu.org>
> Date: 16 May 2004 00:13:57 +0200
> 
> The problem is that I/O using "select" is ready the moment a _single_
> byte is available.

Forgive me for a possibly stupid question, but with applications such
as `cat' with their stdout redirected to a pipe, shouldn't we have
`cat' write the pipe in relatively large blocks, as defined by the
default buffering of its stdout?

If such buffering does take place, then, for this specific scenario,
we should already have the OS reschedule only when a large chunk of
data is written to the pipe.

(I realize that, in general, the reschedule-on-every-byte is a
possibility, but I thought it happens mainly with `dd'-type
applications that write binary data.)

> Perhaps one would need some nicer system calls for telling Linux "ok,
> wake me up immediately if any pipe is _full_.

Almost full, you mean.  If the pipe is full, it's too late, since the
pipe could be written again before the pipe reader actually awakes and
runs, in which case you will lose characters at best and get SIGPIPE
at worst.

Also, this has complications when the pipe writer exits without
filling the pipe (which is the normal case).

> And wake me up
> immediately if there is input on some of [list of files].  Other than
> that, only wake me up if there is input and no other process is
> wanting the CPU".  So we'd need to tell the operating system how
> urgent we want what amount of data on what input to be scheduled for
> processing.

Sounds like a brutal intervention into the OS scheduler, which most
systems will not allow.

It's possible that a much simpler solution would be to have a separate
thread or a helper subprocess read the pipe until a certain amount of
data is available or a timeout expires, and only then present the data
to Emacs's process filter/sentinel.

Again, apologies if this is just line noise.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-15 16:27               ` Kai Grossjohann
@ 2004-05-16 13:20                 ` Richard Stallman
  0 siblings, 0 replies; 37+ messages in thread
From: Richard Stallman @ 2004-05-16 13:20 UTC (permalink / raw)
  Cc: emacs-devel

    >     What about "ls foo | grep bla"?  Then the output isn't a file.
    >
    > Then you could put the ls output in a buffer and feed it to grep.
    > That would work fine.

    But then, you would have to wait until ls is finished.  It would be
    very useful to be able to see partial results while the command is
    running.

You could feed the output to grep in whatever size chunk you like.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-16  6:41                   ` Eli Zaretskii
@ 2004-05-16 17:46                     ` David Kastrup
  0 siblings, 0 replies; 37+ messages in thread
From: David Kastrup @ 2004-05-16 17:46 UTC (permalink / raw)
  Cc: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

> > From: David Kastrup <dak@gnu.org>
> > Date: 16 May 2004 00:13:57 +0200
> > 
> > The problem is that I/O using "select" is ready the moment a _single_
> > byte is available.
> 
> Forgive me for a possibly stupid question, but with applications such
> as `cat' with their stdout redirected to a pipe, shouldn't we have
> `cat' write the pipe in relatively large blocks, as defined by the
> default buffering of its stdout?

cat will probably write in lines when writing to a pseudo tty.

Anyway, it is not as much cat I am concerned with mostly: the most
common problem are applications with small buffer size running in
comint mode.

Running TeX inside of an Emacs buffer is slow, easily 30% slower than
running it in an XTerm.  And it is the console I/O that causes this.

> > Perhaps one would need some nicer system calls for telling Linux
> > "ok, wake me up immediately if any pipe is _full_.
> 
> Almost full, you mean.  If the pipe is full, it's too late, since
> the pipe could be written again before the pipe reader actually
> awakes and runs, in which case you will lose characters at best and
> get SIGPIPE at worst.

I recommend that you look up the semantics of Unix pipes again.  You
seem to labor under the delusion that there is a constant race
condition going on.  If the pipe is filled, the writing process will
simply get blocked.  SIGPIPE is never raised unless the reading
process has _closed_ its end of the pipe, for example by exiting.

> Also, this has complications when the pipe writer exits without
> filling the pipe (which is the normal case).

Again, I don't know what kind of pipes you are talking of here.  A
read call on a pipe will block the reader until enough data is
available, or until the writer has closed its end of the pipe.

> > And wake me up immediately if there is input on some of [list of
> > files].  Other than that, only wake me up if there is input and no
> > other process is wanting the CPU".  So we'd need to tell the
> > operating system how urgent we want what amount of data on what
> > input to be scheduled for processing.
> 
> Sounds like a brutal intervention into the OS scheduler, which most
> systems will not allow.

Intervention?  Telling the scheduler where I expect interactive
response, and where I expect batch processing?  That is no
"intervention", it is information.

> It's possible that a much simpler solution would be to have a
> separate thread or a helper subprocess read the pipe until a certain
> amount of data is available or a timeout expires, and only then
> present the data to Emacs's process filter/sentinel.

Maybe.  If we had some fast preprocessor that would score only a
moderate penalty when the operating system schedules it all the time,
this might help quite a bit.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: request for review: Doing direct file I/O in Emacs Lisp
  2004-05-15 21:36               ` John Wiegley
  2004-05-15 22:13                 ` David Kastrup
@ 2004-05-17 11:04                 ` Richard Stallman
  1 sibling, 0 replies; 37+ messages in thread
From: Richard Stallman @ 2004-05-17 11:04 UTC (permalink / raw)
  Cc: emacs-devel

    Hmm.. that IS curious.  In the save-buffer case, I call
    insert-and-inherit in a temp buffer thousands of times, then one call
    to save-buffer.  In the write-region case, I am calling write-region
    thousands of times.  I think the difference is the constant filesystem
    access in the latter case.

Now that I see what the last alternative consists of, I am sure that
is true.

You could try calling write-region using a temporary buffer
after making sure that buffer-file-coding-system is set.
That might be somewhat faster than using save-buffer.

    > I can't understand what alternatives you have compared.
    > Could you please explain them more concretely?

    If I start a process with start-process (/usr/bin/cat) and redirect
    its output to a file, it is far slower than if I simply output the
    same data to a file (eshell/cat) -- even though the resulting "output"
    in both cases is the same.  Why is receiving output via a process
    sentinel so slow?

As far as I can see, neither of the two cases you described involves
receiving output from a subprocesses at all, so neither one involves
the sentinel.  I suspect that the former case is slow because sending
input to the subprocess takes time.

Someone else suggested process switching overhead.may be a big
factor in that.  It sounds plausible.

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2004-05-17 11:04 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-05-10  5:59 request for review: Doing direct file I/O in Emacs Lisp John Wiegley
2004-05-10  6:52 ` Kim F. Storm
2004-05-10  8:27 ` David Kastrup
2004-05-10 14:21   ` Stefan Monnier
2004-05-10 15:59     ` David Kastrup
2004-05-10 16:36       ` Stefan Monnier
2004-05-10 17:00         ` David Kastrup
2004-05-10 17:22           ` Stefan Monnier
2004-05-11  9:23   ` John Wiegley
2004-05-11 10:22     ` David Kastrup
2004-05-10  9:38 ` Andreas Schwab
2004-05-10 11:29   ` Eli Zaretskii
2004-05-10 11:23     ` Andreas Schwab
2004-05-10 15:04       ` Eli Zaretskii
2004-05-10 14:19 ` Stefan Monnier
2004-05-10 17:46   ` Oliver Scholz
2004-05-10 18:21     ` Stefan Monnier
2004-05-10 22:40       ` Oliver Scholz
2004-05-11 12:22     ` Richard Stallman
2004-05-10 17:54 ` Richard Stallman
2004-05-11  9:20   ` John Wiegley
2004-05-12 19:41     ` Richard Stallman
2004-05-13  7:59       ` Kai Grossjohann
2004-05-14  9:21         ` Richard Stallman
2004-05-14 10:42           ` Kai Grossjohann
2004-05-15  8:53             ` Richard Stallman
2004-05-15 16:27               ` Kai Grossjohann
2004-05-16 13:20                 ` Richard Stallman
2004-05-14 21:43           ` John Wiegley
2004-05-15 18:33             ` Richard Stallman
2004-05-15 21:36               ` John Wiegley
2004-05-15 22:13                 ` David Kastrup
2004-05-16  6:41                   ` Eli Zaretskii
2004-05-16 17:46                     ` David Kastrup
2004-05-17 11:04                 ` Richard Stallman
2004-05-13 22:50       ` John Wiegley
2004-05-14 21:02         ` Richard Stallman

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).