all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* decompress.c now also compresses
@ 2020-03-29 15:52 Juan José García-Ripoll
  2020-03-29 15:57 ` Juan José García-Ripoll
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Juan José García-Ripoll @ 2020-03-29 15:52 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 1039 bytes --]

Hi,

I attach a patch that adds support for compressing buffers using
zlib. It is a minor extension to the file src/decompress.c but it may be
useful because of two reasons (i) in Windows, Emasc is shipped without
g[un]zip.exe, (ii) the whole process of compression takes about 20 times
less time than calling gzip.

(benchmark 1
  '(mapc 'simple-zlib-compress
      (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 2.602588s (0.014894s in 1 GCs)

(benchmark 1
  '(mapc 'simple-gzip-compress
       (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 61.986128s (0.039815s in 3 GCs)

I attach a patch that was produced against emacs-27 but also seems to
work against emacs-28 (at least the decompress.c part, I am unsure about
how NEWS should be edited).

I also attach a lisp file that test and benchmark the
compression/decompression using Emacs' lisp sources.

Best

Juanjo

-- 
Juan José García Ripoll
http://juanjose.garciaripoll.com
http://quinfog.hbar.es

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: decompress.diff --]
[-- Type: text/x-patch, Size: 20186 bytes --]

diff --git a/etc/NEWS b/etc/NEWS
index d3f27e3..96ddf5c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -3656,6 +3656,10 @@ easier to undo immediately afterwards.
 ** When called interactively, 'next-buffer' and 'previous-buffer' now
 signal 'user-error' if there is no buffer to switch to.
 
+---
+** New function 'zlib-compress-region' compresses a unibyte buffer region using
+gzip's format, via the zlib library.
+
 \f
 * Changes in Emacs 27.1 on Non-Free Operating Systems
 
diff --git a/src/decompress.c b/src/decompress.c
index 5d24638..68178db 100644
--- a/src/decompress.c
+++ b/src/decompress.c
@@ -1,247 +1,394 @@
-/* Interface to zlib.
-   Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
-This file is part of GNU Emacs.
-
-GNU Emacs is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or (at
-your option) any later version.
-
-GNU Emacs is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-
-#ifdef HAVE_ZLIB
-
-#include <zlib.h>
-
-#include "lisp.h"
-#include "buffer.h"
-#include "composite.h"
-
-#include <verify.h>
-
-#ifdef WINDOWSNT
-# include <windows.h>
-# include "w32common.h"
-# include "w32.h"
-
-DEF_DLL_FN (int, inflateInit2_,
-	    (z_streamp strm, int windowBits, const char *version,
-	     int stream_size));
-DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
-DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
-
-static bool zlib_initialized;
-
-static bool
-init_zlib_functions (void)
-{
-  HMODULE library = w32_delayed_load (Qzlib);
-
-  if (!library)
-    return false;
-
-  LOAD_DLL_FN (library, inflateInit2_);
-  LOAD_DLL_FN (library, inflate);
-  LOAD_DLL_FN (library, inflateEnd);
-  return true;
-}
-
-# undef inflate
-# undef inflateEnd
-# undef inflateInit2_
-
-# define inflate fn_inflate
-# define inflateEnd fn_inflateEnd
-# define inflateInit2_ fn_inflateInit2_
-
-#endif	/* WINDOWSNT */
-
-\f
-struct decompress_unwind_data
-{
-  ptrdiff_t old_point, orig, start, nbytes;
-  z_stream *stream;
-};
-
-static void
-unwind_decompress (void *ddata)
-{
-  struct decompress_unwind_data *data = ddata;
-  inflateEnd (data->stream);
-
-  /* Delete any uncompressed data already inserted on error, but
-     without calling the change hooks.  */
-  if (data->start)
-    {
-      del_range_2 (data->start, data->start, /* byte, char offsets the same */
-                   data->start + data->nbytes, data->start + data->nbytes,
-                   0);
-      update_compositions (data->start, data->start, CHECK_HEAD);
-      /* "Balance" the before-change-functions call, which would
-         otherwise be left "hanging". */
-      signal_after_change (data->orig, data->start - data->orig,
-                           data->start - data->orig);
-    }
-  /* Put point where it was, or if the buffer has shrunk because the
-     compressed data is bigger than the uncompressed, at
-     point-max.  */
-  SET_PT (min (data->old_point, ZV));
-}
-
-DEFUN ("zlib-available-p", Fzlib_available_p, Szlib_available_p, 0, 0, 0,
-       doc: /* Return t if zlib decompression is available in this instance of Emacs.  */)
-     (void)
-{
-#ifdef WINDOWSNT
-  Lisp_Object found = Fassq (Qzlib, Vlibrary_cache);
-  if (CONSP (found))
-    return XCDR (found);
-  else
-    {
-      Lisp_Object status;
-      zlib_initialized = init_zlib_functions ();
-      status = zlib_initialized ? Qt : Qnil;
-      Vlibrary_cache = Fcons (Fcons (Qzlib, status), Vlibrary_cache);
-      return status;
-    }
-#else
-  return Qt;
-#endif
-}
-
-DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
-       Szlib_decompress_region,
-       2, 3, 0,
-       doc: /* Decompress a gzip- or zlib-compressed region.
-Replace the text in the region by the decompressed data.
-
-If optional parameter ALLOW-PARTIAL is nil or omitted, then on
-failure, return nil and leave the data in place.  Otherwise, return
-the number of bytes that were not decompressed and replace the region
-text by whatever data was successfully decompressed (similar to gzip).
-If decompression is completely successful return t.
-
-This function can be called only in unibyte buffers.  */)
-  (Lisp_Object start, Lisp_Object end, Lisp_Object allow_partial)
-{
-  ptrdiff_t istart, iend, pos_byte;
-  z_stream stream;
-  int inflate_status;
-  struct decompress_unwind_data unwind_data;
-  ptrdiff_t count = SPECPDL_INDEX ();
-
-  validate_region (&start, &end);
-
-  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
-    error ("This function can be called only in unibyte buffers");
-
-#ifdef WINDOWSNT
-  if (!zlib_initialized)
-    zlib_initialized = init_zlib_functions ();
-  if (!zlib_initialized)
-    {
-      message1 ("zlib library not found");
-      return Qnil;
-    }
-#endif
-
-  /* This is a unibyte buffer, so character positions and bytes are
-     the same.  */
-  istart = XFIXNUM (start);
-  iend = XFIXNUM (end);
-
-  /* Do the following before manipulating the gap. */
-  modify_text (istart, iend);
-
-  move_gap_both (iend, iend);
-
-  stream.zalloc = Z_NULL;
-  stream.zfree = Z_NULL;
-  stream.opaque = Z_NULL;
-  stream.avail_in = 0;
-  stream.next_in = Z_NULL;
-
-  /* The magic number 32 apparently means "autodetect both the gzip and
-     zlib formats" according to zlib.h.  */
-  if (inflateInit2 (&stream, MAX_WBITS + 32) != Z_OK)
-    return Qnil;
-
-  unwind_data.orig = istart;
-  unwind_data.start = iend;
-  unwind_data.stream = &stream;
-  unwind_data.old_point = PT;
-  unwind_data.nbytes = 0;
-  record_unwind_protect_ptr (unwind_decompress, &unwind_data);
-
-  /* Insert the decompressed data at the end of the compressed data.  */
-  SET_PT (iend);
-
-  pos_byte = istart;
-
-  /* Keep calling 'inflate' until it reports an error or end-of-input.  */
-  do
-    {
-      /* Maximum number of bytes that one 'inflate' call should read and write.
-	 Do not make avail_out too large, as that might unduly delay C-g.
-	 zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
-      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
-      int avail_out = 16 * 1024;
-      int decompressed;
-
-      if (GAP_SIZE < avail_out)
-	make_gap (avail_out - GAP_SIZE);
-      stream.next_in = BYTE_POS_ADDR (pos_byte);
-      stream.avail_in = avail_in;
-      stream.next_out = GPT_ADDR;
-      stream.avail_out = avail_out;
-      inflate_status = inflate (&stream, Z_NO_FLUSH);
-      pos_byte += avail_in - stream.avail_in;
-      decompressed = avail_out - stream.avail_out;
-      insert_from_gap (decompressed, decompressed, 0);
-      unwind_data.nbytes += decompressed;
-      maybe_quit ();
-    }
-  while (inflate_status == Z_OK);
-
-  Lisp_Object ret = Qt;
-  if (inflate_status != Z_STREAM_END)
-    {
-      if (!NILP (allow_partial))
-        ret = make_int (iend - pos_byte);
-      else
-        return unbind_to (count, Qnil);
-    }
-
-  unwind_data.start = 0;
-
-  /* Delete the compressed data.  */
-  del_range_2 (istart, istart, /* byte and char offsets are the same. */
-               iend, iend, 0);
-
-  signal_after_change (istart, iend - istart, unwind_data.nbytes);
-  update_compositions (istart, istart, CHECK_HEAD);
-
-  return unbind_to (count, ret);
-}
-
-\f
-/***********************************************************************
-			    Initialization
- ***********************************************************************/
-void
-syms_of_decompress (void)
-{
-  defsubr (&Szlib_decompress_region);
-  defsubr (&Szlib_available_p);
-}
-
-#endif /* HAVE_ZLIB */
+/* Interface to zlib.
+   Copyright (C) 2013-2020 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#ifdef HAVE_ZLIB
+
+#include <zlib.h>
+
+#include "lisp.h"
+#include "buffer.h"
+#include "composite.h"
+
+#include <verify.h>
+
+#ifdef WINDOWSNT
+# include <windows.h>
+# include "w32common.h"
+# include "w32.h"
+
+/* We import inflateInit2_ and deflateInit2_ because inflateInit and
+   deflateInit are macros defined on top of these symbols by zlib.h */
+DEF_DLL_FN (int, inflateInit2_,
+	    (z_streamp strm, int windowBits, const char *version,
+	     int stream_size));
+DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
+DEF_DLL_FN (int, deflateInit2_,
+	    (z_streamp strm, int level, int method, int windowBits,
+             int memLevel, int strategy, const char *version,
+             int stream_size));
+DEF_DLL_FN (int, deflateInit2_,
+	    (z_streamp strm, int level, int method, int windowBits,
+             int memLevel, int strategy, const char *version,
+             int stream_size));
+DEF_DLL_FN (int, deflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, deflateEnd, (z_streamp strm));
+
+static bool zlib_initialized;
+
+static bool
+init_zlib_functions (void)
+{
+  HMODULE library = w32_delayed_load (Qzlib);
+
+  if (!library)
+    return false;
+
+  LOAD_DLL_FN (library, inflateInit2_);
+  LOAD_DLL_FN (library, inflate);
+  LOAD_DLL_FN (library, inflateEnd);
+  LOAD_DLL_FN (library, deflateInit2_);
+  LOAD_DLL_FN (library, deflate);
+  LOAD_DLL_FN (library, deflateEnd);
+  return true;
+}
+
+# undef inflate
+# undef inflateEnd
+# undef inflateInit2_
+# undef deflate
+# undef deflateEnd
+# undef deflateInit2_
+
+# define inflate fn_inflate
+# define inflateEnd fn_inflateEnd
+# define inflateInit2_ fn_inflateInit2_
+# define deflate fn_deflate
+# define deflateEnd fn_deflateEnd
+# define deflateInit2_ fn_deflateInit2_
+
+#endif	/* WINDOWSNT */
+
+\f
+struct decompress_unwind_data
+{
+  ptrdiff_t old_point, orig, start, nbytes;
+  z_stream *stream;
+  int deflating;
+};
+
+static void
+unwind_zlib (void *ddata)
+{
+  struct decompress_unwind_data *data = ddata;
+  (data->deflating? deflateEnd : inflateEnd) (data->stream);
+
+  /* Delete any uncompressed data already inserted on error, but
+     without calling the change hooks.  */
+  if (data->start)
+    {
+      del_range_2 (data->start, data->start, /* byte, char offsets the same */
+                   data->start + data->nbytes, data->start + data->nbytes,
+                   0);
+      update_compositions (data->start, data->start, CHECK_HEAD);
+      /* "Balance" the before-change-functions call, which would
+         otherwise be left "hanging". */
+      signal_after_change (data->orig, data->start - data->orig,
+                           data->start - data->orig);
+    }
+  /* Put point where it was, or if the buffer has shrunk because the
+     compressed data is bigger than the uncompressed, at
+     point-max.  */
+  SET_PT (min (data->old_point, ZV));
+}
+
+DEFUN ("zlib-available-p", Fzlib_available_p, Szlib_available_p, 0, 0, 0,
+       doc: /* Return t if zlib decompression is available in this instance of Emacs.  */)
+     (void)
+{
+#ifdef WINDOWSNT
+  Lisp_Object found = Fassq (Qzlib, Vlibrary_cache);
+  if (CONSP (found))
+    return XCDR (found);
+  else
+    {
+      Lisp_Object status;
+      zlib_initialized = init_zlib_functions ();
+      status = zlib_initialized ? Qt : Qnil;
+      Vlibrary_cache = Fcons (Fcons (Qzlib, status), Vlibrary_cache);
+      return status;
+    }
+#else
+  return Qt;
+#endif
+}
+
+DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
+       Szlib_decompress_region,
+       2, 3, 0,
+       doc: /* Decompress a gzip- or zlib-compressed region.
+Replace the text in the region by the decompressed data.
+
+If optional parameter ALLOW-PARTIAL is nil or omitted, then on
+failure, return nil and leave the data in place.  Otherwise, return
+the number of bytes that were not decompressed and replace the region
+text by whatever data was successfully decompressed (similar to gzip).
+If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers.  */)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object allow_partial)
+{
+  ptrdiff_t istart, iend, pos_byte;
+  z_stream stream;
+  int inflate_status;
+  struct decompress_unwind_data unwind_data;
+  ptrdiff_t count = SPECPDL_INDEX ();
+
+  validate_region (&start, &end);
+
+  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+  if (!zlib_initialized)
+    zlib_initialized = init_zlib_functions ();
+  if (!zlib_initialized)
+    {
+      message1 ("zlib library not found");
+      return Qnil;
+    }
+#endif
+
+  /* This is a unibyte buffer, so character positions and bytes are
+     the same.  */
+  istart = XFIXNUM (start);
+  iend = XFIXNUM (end);
+
+  /* Do the following before manipulating the gap. */
+  modify_text (istart, iend);
+
+  move_gap_both (iend, iend);
+
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.avail_in = 0;
+  stream.next_in = Z_NULL;
+
+  /* The magic number 32 apparently means "autodetect both the gzip and
+     zlib formats" according to zlib.h.  */
+  if (inflateInit2 (&stream, MAX_WBITS + 32) != Z_OK)
+    return Qnil;
+
+  unwind_data.orig = istart;
+  unwind_data.start = iend;
+  unwind_data.stream = &stream;
+  unwind_data.old_point = PT;
+  unwind_data.nbytes = 0;
+  unwind_data.deflating = 0;
+  record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  SET_PT (iend);
+
+  pos_byte = istart;
+
+  /* Keep calling 'inflate' until it reports an error or end-of-input.  */
+  do
+    {
+      /* Maximum number of bytes that one 'inflate' call should read and write.
+	 Do not make avail_out too large, as that might unduly delay C-g.
+	 zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
+      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+      int avail_out = 16 * 1024;
+      int decompressed;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      stream.next_in = BYTE_POS_ADDR (pos_byte);
+      stream.avail_in = avail_in;
+      stream.next_out = GPT_ADDR;
+      stream.avail_out = avail_out;
+      inflate_status = inflate (&stream, Z_NO_FLUSH);
+      pos_byte += avail_in - stream.avail_in;
+      decompressed = avail_out - stream.avail_out;
+      insert_from_gap (decompressed, decompressed, 0);
+      unwind_data.nbytes += decompressed;
+      maybe_quit ();
+    }
+  while (inflate_status == Z_OK);
+
+  Lisp_Object ret = Qt;
+  if (inflate_status != Z_STREAM_END)
+    {
+      if (!NILP (allow_partial))
+        ret = make_int (iend - pos_byte);
+      else
+        return unbind_to (count, Qnil);
+    }
+
+  unwind_data.start = 0;
+
+  /* Delete the compressed data.  */
+  del_range_2 (istart, istart, /* byte and char offsets are the same. */
+               iend, iend, 0);
+
+  signal_after_change (istart, iend - istart, unwind_data.nbytes);
+  update_compositions (istart, istart, CHECK_HEAD);
+
+  return unbind_to (count, ret);
+}
+
+\f
+DEFUN ("zlib-compress-region", Fzlib_compress_region,
+       Szlib_compress_region,
+       2, 3, 0,
+       doc: /* Compress a region to a gzip or zlib stream.
+Replace the text in the region by the compressed data.
+
+If optional parameter NO-WRAPPER is nil or omitted, use the GZIP
+wrapper format; otherwise, output just a deflated stream of
+bytes. If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers.*/)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object zlib)
+{
+  ptrdiff_t istart, iend, pos_byte;
+  z_stream stream;
+  int deflate_status, flush;
+  struct decompress_unwind_data unwind_data;
+  ptrdiff_t count = SPECPDL_INDEX ();
+  bool gzipp = NILP (zlib);
+
+  validate_region (&start, &end);
+
+  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+  if (!zlib_initialized)
+    zlib_initialized = init_zlib_functions ();
+  if (!zlib_initialized)
+    {
+      message1 ("zlib library not found");
+      return Qnil;
+    }
+#endif
+
+  /* This is a unibyte buffer, so character positions and bytes are
+     the same.  */
+  istart = XFIXNUM (start);
+  iend = XFIXNUM (end);
+
+  /* Do the following before manipulating the gap. */
+  modify_text (istart, iend);
+
+  move_gap_both (iend, iend);
+
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.avail_in = 0;
+  stream.next_in = Z_NULL;
+
+  /* Initiate the deflate() process, choosing the format, compression
+     strategy and level (9), and amount of memory used.  */
+  if (deflateInit2 (&stream, 9, Z_DEFLATED, MAX_WBITS + (gzipp? 16: 0),
+                    8, Z_DEFAULT_STRATEGY) != Z_OK)
+    return Qnil;
+
+  unwind_data.orig = istart;
+  unwind_data.start = iend;
+  unwind_data.stream = &stream;
+  unwind_data.old_point = PT;
+  unwind_data.nbytes = 0;
+  unwind_data.deflating = 1;
+  record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  SET_PT (iend);
+
+  pos_byte = istart;
+
+  /* Keep calling 'deflate' until it reports an error or end-of-input.  */
+  flush = Z_NO_FLUSH;
+  do
+    {
+      /* Maximum number of bytes that one 'deflate' call should read and write.
+	 Do not make avail_out too large, as that might unduly delay C-g.
+	 zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
+      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+      int avail_out = 16 * 1024;
+      int compressed;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      stream.next_in = BYTE_POS_ADDR (pos_byte);
+      stream.avail_in = avail_in;
+      stream.next_out = GPT_ADDR;
+      stream.avail_out = avail_out;
+      deflate_status = deflate (&stream, flush);
+
+      pos_byte += avail_in - stream.avail_in;
+      compressed = avail_out - stream.avail_out;
+      insert_from_gap (compressed, compressed, 0);
+      unwind_data.nbytes += compressed;
+      if (deflate_status == Z_BUF_ERROR && flush == Z_NO_FLUSH) {
+        /* When we run out of input, zlib returns Z_BUF_ERROR.
+           We then have to flush all output. */
+        flush = Z_FINISH;
+        deflate_status = Z_OK;
+      }
+      maybe_quit ();
+    }
+  while (deflate_status == Z_OK);
+
+  Lisp_Object ret = Qt;
+  if (deflate_status != Z_STREAM_END)
+    {
+      /* When compression did not succeed, delete output. */
+      ret = make_int (iend - pos_byte);
+    }
+
+  unwind_data.start = 0;
+
+  /* Delete the uncompressed data.  */
+  del_range_2 (istart, istart, /* byte and char offsets are the same. */
+               iend, iend, 0);
+
+  signal_after_change (istart, iend - istart, unwind_data.nbytes);
+  update_compositions (istart, istart, CHECK_HEAD);
+
+  return unbind_to (count, ret);
+}
+
+\f
+/***********************************************************************
+			    Initialization
+ ***********************************************************************/
+void
+syms_of_decompress (void)
+{
+  defsubr (&Szlib_decompress_region);
+  defsubr (&Szlib_compress_region);
+  defsubr (&Szlib_available_p);
+}
+
+#endif /* HAVE_ZLIB */

[-- Attachment #3: test.lisp --]
[-- Type: application/octet-stream, Size: 3637 bytes --]

(switch-to-buffer "*Messages*")

(setq debug-on-error t)

(defun test-zlib-compress (filename &optional show-diff)
  "Test compression passing the output of zlib-compress-region to
gzip -d and comparing the output. If SHOW-DIFF is not nil, save the
failing files and output a diff.

Returns T on success, NIL on failure of the comparison."
  (with-temp-buffer
    (set-buffer-multibyte nil)
    (insert-file-contents-literally filename nil nil nil t)
    (let ((b-zlib (current-buffer))
          (s-orig (buffer-string))
          (coding buffer-file-coding-system))
      (unless (zlib-compress-region (point-min) (point-max))
        (error "Failed to compress %s" filename))
      (with-temp-buffer
        (let ((b-gunzipped (current-buffer)))
          (set-buffer-multibyte nil)
          (with-current-buffer b-zlib
            (let ((coding-system-for-write 'raw-text-unix)
                  (coding-system-for-read 'raw-text-unix))
              (call-process-region (point-min) (point-max)
                                   "gzip.exe" nil b-gunzipped nil
                                   "-d" "-c" "-q" "-"))
            (delete-region (point-min) (point-max))
            (insert-file filename)
            (let* ((s-uncomp (with-current-buffer b-gunzipped (buffer-string)))
                   (success (string-equal s-orig s-uncomp)))
              (when show-diff
                (message "Compression/decompression with %S %s "
                         filename
                         (if success "succeeded!" "failed!"))
                (unless success
                  (with-current-buffer b-zlib
                    (write-region (point-min) (point-max) "test-file-a"))
                  (with-current-buffer b-gunzipped
                    (write-region (point-min) (point-max) "test-file-b"))
                  (diff "test-file-a" "test-file-b")))
              success
              )))))))

(defun test-directory (dir pattern)
  "Runs TEST-ZLIB-COMPRESS on all files in DIR that match PATTERN.
Returns a list of files that fail the test."
  (let ((failures nil))
    (dolist (f (directory-files dir t pattern))
      (unless (test-zlib-compress f)
        (push f failures)))
    (message "Files that failed test: %S" failures)))

(test-directory "~/emacs-build/git/emacs-27/lisp/" ".*\\.el")

;; Run this if some file fails to pass
;; (test-zlib-compress "c:/Users/juanj/emacs-build/git/emacs-27/lisp/finder-inf.el" t)


(defun simple-zlib-compress (filename)
  "Compress the buffer directly with ZLIB-COMPRESS-REGION"
  (with-temp-buffer
    (set-buffer-multibyte nil)
    (insert-file-contents-literally filename nil nil nil t)
    (unless (zlib-compress-region (point-min) (point-max))
      (error "Failed to compress %s" filename))))

(defun simple-gzip-compress (filename)
  "Compress the buffer indirectly, passing its content through gzip."
  (with-temp-buffer
    (set-buffer-multibyte nil)
    (insert-file-contents-literally filename nil nil nil t)
    (let ((coding-system-for-write 'raw-text-unix)
          (coding-system-for-read 'raw-text-unix))
      (call-process-region (point-min) (point-max)
                           "gzip.exe" t t))))

(benchmark 1 '(mapc 'simple-zlib-compress
                    (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 2.602588s (0.014894s in 1 GCs)

(benchmark 1 '(mapc 'simple-gzip-compress
                    (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
;; => Elapsed time: 61.986128s (0.039815s in 3 GCs)

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: decompress.c now also compresses
  2020-03-29 15:52 decompress.c now also compresses Juan José García-Ripoll
@ 2020-03-29 15:57 ` Juan José García-Ripoll
  2020-03-29 16:11 ` Eli Zaretskii
  2020-03-29 16:54 ` Stefan Monnier
  2 siblings, 0 replies; 6+ messages in thread
From: Juan José García-Ripoll @ 2020-03-29 15:57 UTC (permalink / raw)
  To: emacs-devel

[-- Attachment #1: Type: text/plain, Size: 232 bytes --]

Apologies for the corrupted patch: I used emacs' C-c C-v and it looked
good, but "git diff" failed because of cr/lf issues. Here it goes again.

-- 
Juan José García Ripoll
http://juanjose.garciaripoll.com
http://quinfog.hbar.es


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: decompress.diff --]
[-- Type: text/x-patch, Size: 6952 bytes --]

diff --git a/src/decompress.c b/src/decompress.c
index 5d24638..7f20cb0 100644
--- a/src/decompress.c
+++ b/src/decompress.c
@@ -33,11 +33,23 @@
 # include "w32common.h"
 # include "w32.h"

+/* We import inflateInit2_ and deflateInit2_ because inflateInit and
+   deflateInit are macros defined on top of these symbols by zlib.h */
 DEF_DLL_FN (int, inflateInit2_,
 	    (z_streamp strm, int windowBits, const char *version,
 	     int stream_size));
 DEF_DLL_FN (int, inflate, (z_streamp strm, int flush));
 DEF_DLL_FN (int, inflateEnd, (z_streamp strm));
+DEF_DLL_FN (int, deflateInit2_,
+	    (z_streamp strm, int level, int method, int windowBits,
+             int memLevel, int strategy, const char *version,
+             int stream_size));
+DEF_DLL_FN (int, deflateInit2_,
+	    (z_streamp strm, int level, int method, int windowBits,
+             int memLevel, int strategy, const char *version,
+             int stream_size));
+DEF_DLL_FN (int, deflate, (z_streamp strm, int flush));
+DEF_DLL_FN (int, deflateEnd, (z_streamp strm));

 static bool zlib_initialized;

@@ -52,16 +64,25 @@ init_zlib_functions (void)
   LOAD_DLL_FN (library, inflateInit2_);
   LOAD_DLL_FN (library, inflate);
   LOAD_DLL_FN (library, inflateEnd);
+  LOAD_DLL_FN (library, deflateInit2_);
+  LOAD_DLL_FN (library, deflate);
+  LOAD_DLL_FN (library, deflateEnd);
   return true;
 }

 # undef inflate
 # undef inflateEnd
 # undef inflateInit2_
+# undef deflate
+# undef deflateEnd
+# undef deflateInit2_

 # define inflate fn_inflate
 # define inflateEnd fn_inflateEnd
 # define inflateInit2_ fn_inflateInit2_
+# define deflate fn_deflate
+# define deflateEnd fn_deflateEnd
+# define deflateInit2_ fn_deflateInit2_

 #endif	/* WINDOWSNT */

@@ -70,13 +91,14 @@ init_zlib_functions (void)
 {
   ptrdiff_t old_point, orig, start, nbytes;
   z_stream *stream;
+  int deflating;
 };

 static void
-unwind_decompress (void *ddata)
+unwind_zlib (void *ddata)
 {
   struct decompress_unwind_data *data = ddata;
-  inflateEnd (data->stream);
+  (data->deflating? deflateEnd : inflateEnd) (data->stream);

   /* Delete any uncompressed data already inserted on error, but
      without calling the change hooks.  */
@@ -180,7 +202,8 @@ DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
   unwind_data.stream = &stream;
   unwind_data.old_point = PT;
   unwind_data.nbytes = 0;
-  record_unwind_protect_ptr (unwind_decompress, &unwind_data);
+  unwind_data.deflating = 0;
+  record_unwind_protect_ptr (unwind_zlib, &unwind_data);

   /* Insert the decompressed data at the end of the compressed data.  */
   SET_PT (iend);
@@ -233,6 +256,129 @@ DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
   return unbind_to (count, ret);
 }

+\f
+DEFUN ("zlib-compress-region", Fzlib_compress_region,
+       Szlib_compress_region,
+       2, 3, 0,
+       doc: /* Compress a region to a gzip or zlib stream.
+Replace the text in the region by the compressed data.
+
+If optional parameter NO-WRAPPER is nil or omitted, use the GZIP
+wrapper format; otherwise, output just a deflated stream of
+bytes. If decompression is completely successful return t.
+
+This function can be called only in unibyte buffers.*/)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object zlib)
+{
+  ptrdiff_t istart, iend, pos_byte;
+  z_stream stream;
+  int deflate_status, flush;
+  struct decompress_unwind_data unwind_data;
+  ptrdiff_t count = SPECPDL_INDEX ();
+  bool gzipp = NILP (zlib);
+
+  validate_region (&start, &end);
+
+  if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+#ifdef WINDOWSNT
+  if (!zlib_initialized)
+    zlib_initialized = init_zlib_functions ();
+  if (!zlib_initialized)
+    {
+      message1 ("zlib library not found");
+      return Qnil;
+    }
+#endif
+
+  /* This is a unibyte buffer, so character positions and bytes are
+     the same.  */
+  istart = XFIXNUM (start);
+  iend = XFIXNUM (end);
+
+  /* Do the following before manipulating the gap. */
+  modify_text (istart, iend);
+
+  move_gap_both (iend, iend);
+
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.avail_in = 0;
+  stream.next_in = Z_NULL;
+
+  /* Initiate the deflate() process, choosing the format, compression
+     strategy and level (9), and amount of memory used.  */
+  if (deflateInit2 (&stream, 9, Z_DEFLATED, MAX_WBITS + (gzipp? 16: 0),
+                    8, Z_DEFAULT_STRATEGY) != Z_OK)
+    return Qnil;
+
+  unwind_data.orig = istart;
+  unwind_data.start = iend;
+  unwind_data.stream = &stream;
+  unwind_data.old_point = PT;
+  unwind_data.nbytes = 0;
+  unwind_data.deflating = 1;
+  record_unwind_protect_ptr (unwind_zlib, &unwind_data);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  SET_PT (iend);
+
+  pos_byte = istart;
+
+  /* Keep calling 'deflate' until it reports an error or end-of-input.  */
+  flush = Z_NO_FLUSH;
+  do
+    {
+      /* Maximum number of bytes that one 'deflate' call should read and write.
+	 Do not make avail_out too large, as that might unduly delay C-g.
+	 zlib requires that avail_in and avail_out not exceed UINT_MAX.  */
+      ptrdiff_t avail_in = min (iend - pos_byte, UINT_MAX);
+      int avail_out = 16 * 1024;
+      int compressed;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      stream.next_in = BYTE_POS_ADDR (pos_byte);
+      stream.avail_in = avail_in;
+      stream.next_out = GPT_ADDR;
+      stream.avail_out = avail_out;
+      deflate_status = deflate (&stream, flush);
+
+      pos_byte += avail_in - stream.avail_in;
+      compressed = avail_out - stream.avail_out;
+      insert_from_gap (compressed, compressed, 0);
+      unwind_data.nbytes += compressed;
+      if (deflate_status == Z_BUF_ERROR && flush == Z_NO_FLUSH) {
+        /* When we run out of input, zlib returns Z_BUF_ERROR.
+           We then have to flush all output. */
+        flush = Z_FINISH;
+        deflate_status = Z_OK;
+      }
+      maybe_quit ();
+    }
+  while (deflate_status == Z_OK);
+
+  Lisp_Object ret = Qt;
+  if (deflate_status != Z_STREAM_END)
+    {
+      /* When compression did not succeed, delete output. */
+      ret = make_int (iend - pos_byte);
+    }
+
+  unwind_data.start = 0;
+
+  /* Delete the uncompressed data.  */
+  del_range_2 (istart, istart, /* byte and char offsets are the same. */
+               iend, iend, 0);
+
+  signal_after_change (istart, iend - istart, unwind_data.nbytes);
+  update_compositions (istart, istart, CHECK_HEAD);
+
+  return unbind_to (count, ret);
+}
+
 \f
 /***********************************************************************
 			    Initialization
@@ -241,6 +387,7 @@ DEFUN ("zlib-decompress-region", Fzlib_decompress_region,
 syms_of_decompress (void)
 {
   defsubr (&Szlib_decompress_region);
+  defsubr (&Szlib_compress_region);
   defsubr (&Szlib_available_p);
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: decompress.c now also compresses
  2020-03-29 15:52 decompress.c now also compresses Juan José García-Ripoll
  2020-03-29 15:57 ` Juan José García-Ripoll
@ 2020-03-29 16:11 ` Eli Zaretskii
  2020-03-29 16:48   ` Juan José García-Ripoll
  2020-03-29 16:54 ` Stefan Monnier
  2 siblings, 1 reply; 6+ messages in thread
From: Eli Zaretskii @ 2020-03-29 16:11 UTC (permalink / raw)
  To: Juan José García-Ripoll; +Cc: emacs-devel

> From: Juan José García-Ripoll
>  <juanjose.garciaripoll@gmail.com>
> Date: Sun, 29 Mar 2020 17:52:03 +0200
> 
> I attach a patch that adds support for compressing buffers using
> zlib. It is a minor extension to the file src/decompress.c but it may be
> useful because of two reasons (i) in Windows, Emasc is shipped without
> g[un]zip.exe, (ii) the whole process of compression takes about 20 times
> less time than calling gzip.
> 
> (benchmark 1
>   '(mapc 'simple-zlib-compress
>       (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
> ;; => Elapsed time: 2.602588s (0.014894s in 1 GCs)
> 
> (benchmark 1
>   '(mapc 'simple-gzip-compress
>        (directory-files  "~/emacs-build/git/emacs-27/lisp/" t ".*\\.el")))
> ;; => Elapsed time: 61.986128s (0.039815s in 3 GCs)

The timing might look very different on platforms other than Windows.

> I attach a patch that was produced against emacs-27 but also seems to
> work against emacs-28 (at least the decompress.c part, I am unsure about
> how NEWS should be edited).

Thanks.

Something is wrong with your Git installation, I think: for some
reason Git thinks that you are replacing the entire decompress.c file,
with nothing in common.  I suspect some end-of-line convention snafu.
Did you perhaps install Git with option other than "checkout as-is,
commit as-is"?  If so, please reinstall Git.  Or maybe you saved the
modified decompress.c with CRLF end-of-line format?

> --- a/etc/NEWS
> +++ b/etc/NEWS
> @@ -3656,6 +3656,10 @@ easier to undo immediately afterwards.
>  ** When called interactively, 'next-buffer' and 'previous-buffer' now
>  signal 'user-error' if there is no buffer to switch to.
>  
> +---
> +** New function 'zlib-compress-region' compresses a unibyte buffer region using
> +gzip's format, via the zlib library.

This should be +++, not ---, and we should document this primitive in
the Elisp manual, like we do with zlib-decompress-region.

We also request a ChangeLog-style commit log message to go with ach
contribution; please provide one.

Last, but not least: I'm not convinced that we would need such a
primitive (the decompression primitive was provided to support
decompression of payloads received via network protocols, but there's
no similar reason for the compression routine).  So before you invest
more work in this, let's hear opinions from others regarding the
necessity.

A couple more specific comments below:

> +If optional parameter NO-WRAPPER is nil or omitted, use the GZIP
> +wrapper format; otherwise, output just a deflated stream of
> +bytes. If decompression is completely successful return t.
        ^^
In doc strings, comments, and manuals, we use the US English
convention of leaving 2 spaces between sentences.

> +This function can be called only in unibyte buffers.*/)
                                                      ^^
Please leave two spaces between the end of the sentence and the
comment delimiter (here and elsewhere in the patch). 



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: decompress.c now also compresses
  2020-03-29 16:11 ` Eli Zaretskii
@ 2020-03-29 16:48   ` Juan José García-Ripoll
  0 siblings, 0 replies; 6+ messages in thread
From: Juan José García-Ripoll @ 2020-03-29 16:48 UTC (permalink / raw)
  To: emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:
> Something is wrong with your Git installation, I think: for some
> reason Git thinks that you are replacing the entire decompress.c file,

Yes, apologies. I have two copies of git and emacs keeps finding the
right one, but my msys buffer does not.

> This should be +++, not ---, and we should document this primitive in
> the Elisp manual, like we do with zlib-decompress-region.
> We also request a ChangeLog-style commit log message to go with ach
> contribution; please provide one.

Thanks for this and the style comments.

> Last, but not least: I'm not convinced that we would need such a
> primitive (the decompression primitive was provided to support
> decompression of payloads received via network protocols, but there's
> no similar reason for the compression routine).  So before you invest
> more work in this, let's hear opinions from others regarding the
> necessity.

My reasoning was that (i) it was easy to do while I was watching
COVID-19 news and (ii) on top of this one can almost trivially build a
file handler for *.gz files that does not require gzip (as in the
automatic compression mode). Currently *-deps.zip ships all types of
compressors, including zlib, but not gzip. There may be other use cases,
such as the Emacs web server by E. Schulte (uses zlib-flate and
gzip). But I understand if you find it yet another maintenance hassle.

Cheers,

-- 
Juan José García Ripoll
http://juanjose.garciaripoll.com
http://quinfog.hbar.es




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: decompress.c now also compresses
  2020-03-29 15:52 decompress.c now also compresses Juan José García-Ripoll
  2020-03-29 15:57 ` Juan José García-Ripoll
  2020-03-29 16:11 ` Eli Zaretskii
@ 2020-03-29 16:54 ` Stefan Monnier
  2020-03-29 19:27   ` Juan José García-Ripoll
  2 siblings, 1 reply; 6+ messages in thread
From: Stefan Monnier @ 2020-03-29 16:54 UTC (permalink / raw)
  To: Juan José García-Ripoll; +Cc: emacs-devel

> I attach a patch that adds support for compressing buffers using
> zlib. It is a minor extension to the file src/decompress.c but it may be
> useful because of two reasons (i) in Windows, Emasc is shipped without
> g[un]zip.exe,

This sounds great.  Does that mean that we can make jka-compr work with
.gz files without any external tools at all?


        Stefan "thinking of when that will also apply to .lz files"




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: decompress.c now also compresses
  2020-03-29 16:54 ` Stefan Monnier
@ 2020-03-29 19:27   ` Juan José García-Ripoll
  0 siblings, 0 replies; 6+ messages in thread
From: Juan José García-Ripoll @ 2020-03-29 19:27 UTC (permalink / raw)
  To: emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

>> I attach a patch that adds support for compressing buffers using
>> zlib. It is a minor extension to the file src/decompress.c but it may be
>> useful because of two reasons (i) in Windows, Emasc is shipped without
>> g[un]zip.exe,
>
> This sounds great.  Does that mean that we can make jka-compr work with
> .gz files without any external tools at all?

One needs to replace jka-compr with a couple of functions that are
installed before it, because the way jka-compr works it assumes the
compressor/decompressor is an external program, not a lisp function.

Cheers,

-- 
Juan José García Ripoll
http://juanjose.garciaripoll.com
http://quinfog.hbar.es




^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-03-29 19:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-03-29 15:52 decompress.c now also compresses Juan José García-Ripoll
2020-03-29 15:57 ` Juan José García-Ripoll
2020-03-29 16:11 ` Eli Zaretskii
2020-03-29 16:48   ` Juan José García-Ripoll
2020-03-29 16:54 ` Stefan Monnier
2020-03-29 19:27   ` Juan José García-Ripoll

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.