unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* More (de)compress?
@ 2013-08-19 12:17 Dmitry Antipov
  2013-08-19 16:13 ` Eli Zaretskii
  2013-08-19 16:41 ` Paul Eggert
  0 siblings, 2 replies; 8+ messages in thread
From: Dmitry Antipov @ 2013-08-19 12:17 UTC (permalink / raw)
  To: Emacs development discussions

[-- Attachment #1: Type: text/plain, Size: 278 bytes --]

Inspired by current decompress.c, I would like to propose the symmetric
compress function(s) as well as support for more compression methods.
This is what I have now, and basically it works; still needs MS-Windows
support, better error detection and a few other things.

Dmitry

[-- Attachment #2: compress.patch --]
[-- Type: text/plain, Size: 20817 bytes --]

=== modified file 'configure.ac'
--- configure.ac	2013-08-15 16:37:15 +0000
+++ configure.ac	2013-08-19 04:49:53 +0000
@@ -211,7 +211,9 @@
 OPTION_DEFAULT_ON([gsettings],[don't compile with GSettings support])
 OPTION_DEFAULT_ON([selinux],[don't compile with SELinux support])
 OPTION_DEFAULT_ON([gnutls],[don't use -lgnutls for SSL/TLS support])
-OPTION_DEFAULT_ON([zlib],[don't compile with zlib decompression support])
+OPTION_DEFAULT_ON([zlib],[don't compile with zlib compression support])
+OPTION_DEFAULT_ON([bzlib],[don't compile with bz2 compression support])
+OPTION_DEFAULT_ON([lzma],[don't compile with lzma compression support])
 
 AC_ARG_WITH([file-notification],[AS_HELP_STRING([--with-file-notification=LIB],
  [use a file notification library (LIB one of: yes, gfile, inotify, w32, no)])],
@@ -2962,6 +2964,43 @@
 fi
 AC_SUBST(LIBZ)
 
+HAVE_BZLIB=no
+LIBBZ2=
+if test "${with_bzlib}" != "no"; then
+  OLIBS=$LIBS
+  AC_SEARCH_LIBS([BZ2_bzCompress], [bz2], [HAVE_BZLIB=yes])
+  LIBS=$OLIBS
+  case $ac_cv_search_BZ2_bzCompress in
+    -*) LIBBZ2=$ac_cv_search_BZ2_bzCompress ;;
+  esac
+fi
+if test "${HAVE_BZLIB}" = "yes"; then
+  AC_DEFINE([HAVE_BZLIB], 1, [Define to 1 if you have the bzlib library (-lbz2).])
+  ### mingw32 doesn't use -lbz2, since it loads the library dynamically.
+  if test "${opsys}" = "mingw32"; then
+     LIBBZ2=
+  fi
+fi
+AC_SUBST(LIBBZ2)
+
+HAVE_LZMA=no
+LIBLZMA=
+if test "${with_lzma}" != "no"; then
+  OLIBS=$LIBS
+  AC_SEARCH_LIBS([lzma_lzma_preset], [lzma], [HAVE_LZMA=yes])
+  LIBS=$OLIBS
+  case $ac_cv_search_lzma_lzma_preset in
+    -*) LIBLZMA=$ac_cv_search_lzma_lzma_preset ;;
+  esac
+fi
+if test "${HAVE_LZMA}" = "yes"; then
+  AC_DEFINE([HAVE_LZMA], 1, [Define to 1 if you have the lzma library (-llzma).])
+  ### mingw32 doesn't use -llzma, since it loads the library dynamically.
+  if test "${opsys}" = "mingw32"; then
+     LIBLZMA=
+  fi
+fi
+AC_SUBST(LIBLZMA)
 
 ### Use -ltiff if available, unless `--with-tiff=no'.
 ### mingw32 doesn't use -ltiff, since it loads the library dynamically.
@@ -4818,6 +4857,8 @@
 echo "  Does Emacs use -lotf?                                   ${HAVE_LIBOTF}"
 echo "  Does Emacs use -lxft?                                   ${HAVE_XFT}"
 echo "  Does Emacs directly use zlib?                           ${HAVE_ZLIB}"
+echo "  Does Emacs directly use bzlib?                          ${HAVE_BZLIB}"
+echo "  Does Emacs directly use lzma?                           ${HAVE_LZMA}"
 
 echo "  Does Emacs use toolkit scroll bars?                     ${USE_TOOLKIT_SCROLL_BARS}"
 echo

=== modified file 'src/Makefile.in'
--- src/Makefile.in	2013-08-11 19:43:36 +0000
+++ src/Makefile.in	2013-08-19 04:44:28 +0000
@@ -244,6 +244,8 @@
 LIBXML2_CFLAGS = @LIBXML2_CFLAGS@
 
 LIBZ = @LIBZ@
+LIBBZ2 = @LIBBZ2@
+LIBLZMA = @LIBLZMA@
 
 XRANDR_LIBS = @XRANDR_LIBS@
 XRANDR_CFLAGS = @XRANDR_CFLAGS@
@@ -376,7 +378,7 @@
 	process.o gnutls.o callproc.o \
 	region-cache.o sound.o atimer.o \
 	doprnt.o intervals.o textprop.o composite.o xml.o $(NOTIFY_OBJ) \
-	profiler.o decompress.o \
+	profiler.o decompress.o compress.o \
 	$(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \
 	$(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ)
 obj = $(base_obj) $(NS_OBJC_OBJ)
@@ -431,7 +433,7 @@
    $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) $(SETTINGS_LIBS) $(LIBSELINUX_LIBS) \
    $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \
    $(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(LIB_PTHREAD_SIGMASK) \
-   $(GFILENOTIFY_LIBS) $(LIB_MATH) $(LIBZ)
+   $(GFILENOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBBZ2) $(LIBLZMA)
 
 all: emacs$(EXEEXT) $(OTHER_FILES)
 .PHONY: all

=== added file 'src/compress.c'
--- src/compress.c	1970-01-01 00:00:00 +0000
+++ src/compress.c	2013-08-19 12:07:04 +0000
@@ -0,0 +1,619 @@
+/* Interface to compression libraries.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "lisp.h"
+#include "character.h"
+#include "buffer.h"
+
+/* Maximum number of bytes that one call to compression or decompression
+   routine should write into an output buffer.  Making it too large may
+   delay C-g; on the other side, it looks unlikely because we want to
+   read as much as possible since most compression methods works better
+   if they can look through larger input sequences.  */
+
+#define OUTPUT_BLOCK_SIZE (16 * 1024)
+
+static Lisp_Object Qzlib, Qbzlib, Qlzma;
+
+/*
+ * ZLIB support
+ */
+
+#ifdef HAVE_ZLIB
+
+#include <zlib.h>
+
+struct z_unwind
+{
+  unsigned inflate : 1;
+  ptrdiff_t oldpt;
+  ptrdiff_t start;
+  z_stream *z;
+};
+
+static void
+zlib_unwind (void *arg)
+{
+  struct z_unwind *u = arg;
+
+  /* Finalize stream state.  */
+  if (u->inflate)
+    inflateEnd (u->z);
+  else
+    deflateEnd (u->z);
+
+  /* Delete any produced data already inserted.  */
+  if (u->start)
+    del_range (u->start, PT);
+
+  /* Put point where it was, or if the buffer has shrunk because
+     the produced data is bigger than the original, at point-max.  */
+  SET_PT (min (u->oldpt, ZV));
+}
+
+/* Nonzero if current buffer probably has zlib-compressed data started
+   at byte position B.  */
+
+static bool
+zlib_detect (ptrdiff_t b, ptrdiff_t e)
+{
+  /* Check for '\037\213' header.
+     FIXME: add more zlib-supported formats.  */
+  return ((e - b > 2)
+	  && (FETCH_BYTE (b) == 31)
+	  && (FETCH_BYTE (b + 1) == 139));
+}
+
+static Lisp_Object
+zlib_compress_region (ptrdiff_t b, ptrdiff_t e)
+{
+  int status;
+  z_stream z;
+  struct z_unwind u;
+  ptrdiff_t bytepos = b, count = SPECPDL_INDEX ();
+
+  memset (&z, 0, sizeof z);
+
+  /* Use MAX_WBITS + 16 to force gzip-compatible header.  */
+  if (deflateInit2 (&z, Z_DEFAULT_COMPRESSION, Z_DEFLATED, MAX_WBITS + 16,
+		    MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK)
+    return Qnil;
+
+  /* Setup call to zlib_unwind in case of error.  */
+  u.inflate = 0;
+  u.start = e;
+  u.z = &z;
+  u.oldpt = PT;
+  record_unwind_protect_ptr (zlib_unwind, &u);
+
+  /* Insert the compressed data at the end of the original data.  */
+  move_gap_both (e, e);
+  SET_PT (e);
+
+  /* Compress until we have an error or input ends.  */
+  do
+    {
+      ptrdiff_t avail_in = min (e - bytepos, UINT_MAX);
+      ptrdiff_t compressed, avail_out = OUTPUT_BLOCK_SIZE;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      z.next_in = BYTE_POS_ADDR (bytepos);
+      z.avail_in = avail_in;
+      z.next_out = GPT_ADDR;
+      z.avail_out = avail_out;
+      status = deflate (&z, bytepos + avail_in == e ? Z_FINISH : Z_NO_FLUSH);
+      bytepos += avail_in - z.avail_in;
+      compressed = avail_out - z.avail_out;
+      insert_from_gap (compressed, compressed, 0);
+      QUIT;
+    }
+  while (status == Z_OK);
+
+  if (status == Z_STREAM_END)
+    {
+      /* Delete the original data.  */
+      u.start = 0;
+      del_range (b, e);
+    }
+  return unbind_to (count, status == Z_STREAM_END ? Qt : Qnil);
+}
+
+static Lisp_Object
+zlib_decompress_region (ptrdiff_t b, ptrdiff_t e)
+{
+  int status;
+  z_stream z;
+  struct z_unwind u;
+  ptrdiff_t bytepos = b, count = SPECPDL_INDEX ();
+
+  if (!zlib_detect (b, e))
+    error ("Not a zlib-compressed data");
+
+  memset (&z, 0, sizeof z);
+
+  /* The magic number 32 apparently means "autodect both
+     the gzip and zlib formats" according to zlib.h.  */
+  if (inflateInit2 (&z, MAX_WBITS + 32) != Z_OK)
+    return Qnil;
+
+  /* Setup call to zlib_unwind in case of error.  */
+  u.inflate = 1;
+  u.start = e;
+  u.z = &z;
+  u.oldpt = PT;
+  record_unwind_protect_ptr (zlib_unwind, &u);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  move_gap_both (e, e);
+  SET_PT (e);
+
+  /* Decompress until we have an error or input ends.  */
+  do
+    {
+      ptrdiff_t avail_in = min (e - bytepos, UINT_MAX);
+      ptrdiff_t decompressed, avail_out = OUTPUT_BLOCK_SIZE;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      z.next_in = BYTE_POS_ADDR (bytepos);
+      z.avail_in = avail_in;
+      z.next_out = GPT_ADDR;
+      z.avail_out = avail_out;
+      status = inflate (&z, Z_NO_FLUSH);
+      bytepos += avail_in - z.avail_in;
+      decompressed = avail_out - z.avail_out;
+      insert_from_gap (decompressed, decompressed, 0);
+      QUIT;
+    }
+  while (status == Z_OK);
+
+  if (status == Z_STREAM_END)
+    {
+      /* Delete the compressed data.  */
+      u.start = 0;
+      del_range (b, e);
+    }
+  return unbind_to (count, status == Z_STREAM_END ? Qt : Qnil);
+}
+
+#endif /* HAVE_ZLIB */
+
+/*
+ * BZLIB support
+ */
+
+#ifdef HAVE_BZLIB
+
+#include <bzlib.h>
+
+struct bz_unwind
+{
+  unsigned compress : 1;
+  ptrdiff_t oldpt;
+  ptrdiff_t start;
+  bz_stream *bz;
+};
+
+static void
+bzlib_unwind (void *arg)
+{
+  struct bz_unwind *u = arg;
+
+  /* Finalize stream state.  */
+  if (u->compress)
+    BZ2_bzCompressEnd (u->bz);
+  else
+    BZ2_bzDecompressEnd (u->bz);
+
+  /* Delete any produced data already inserted.  */
+  if (u->start)
+    del_range (u->start, PT);
+
+  /* Put point where it was, or if the buffer has shrunk because
+     the produced data is bigger than the original, at point-max.  */
+  SET_PT (min (u->oldpt, ZV));
+}
+
+/* Nonzero if current buffer probably has bzlib-compressed data started
+   at byte position B.  */
+
+static bool
+bzlib_detect (ptrdiff_t b, ptrdiff_t e)
+{
+  /* Check for 'BZh' header.  */
+  return ((e - b > 3) && !memcmp (BYTE_POS_ADDR (b), "BZh", 3));
+}
+
+static Lisp_Object
+bzlib_compress_region (ptrdiff_t b, ptrdiff_t e)
+{
+  int status;
+  bz_stream bz;
+  struct bz_unwind u;
+  ptrdiff_t bytepos = b, count = SPECPDL_INDEX ();
+
+  memset (&bz, 0, sizeof bz);
+
+  /* Use 900k block size and defaults for others.  */
+  if (BZ2_bzCompressInit (&bz, 9, 0, 0) != BZ_OK)
+    return Qnil;
+
+  /* Setup call to bzlib_unwind in case of error.  */
+  u.compress = 1;
+  u.start = e;
+  u.bz = &bz;
+  u.oldpt = PT;
+  record_unwind_protect_ptr (bzlib_unwind, &u);
+
+  /* Insert the compressed data at the end of the original data.  */
+  move_gap_both (e, e);
+  SET_PT (e);
+
+  /* Compress until we have an error or input ends.  */
+  do
+    {
+      ptrdiff_t avail_in = min (e - bytepos, UINT_MAX);
+      ptrdiff_t compressed, avail_out = OUTPUT_BLOCK_SIZE;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      bz.next_in = (char *) BYTE_POS_ADDR (bytepos);
+      bz.avail_in = avail_in;
+      bz.next_out = (char *) GPT_ADDR;
+      bz.avail_out = avail_out;
+      status = BZ2_bzCompress (&bz, bytepos + avail_in == e ? BZ_FINISH : BZ_RUN);
+      bytepos += avail_in - bz.avail_in;
+      compressed = avail_out - bz.avail_out;
+      insert_from_gap (compressed, compressed, 0);
+      QUIT;
+    }
+  while (status == BZ_FINISH_OK);
+
+  if (status == BZ_STREAM_END)
+    {
+      /* Delete the original data.  */
+      u.start = 0;
+      del_range (b, e);
+    }
+  return unbind_to (count, status == BZ_STREAM_END ? Qt : Qnil);
+}
+
+static Lisp_Object
+bzlib_decompress_region (ptrdiff_t b, ptrdiff_t e)
+{
+  int status;
+  bz_stream bz;
+  struct bz_unwind u;
+  ptrdiff_t bytepos = b, count = SPECPDL_INDEX ();
+
+  if (!bzlib_detect (b, e))
+    error ("Not a bzlib-compressed data");
+
+  memset (&bz, 0, sizeof bz);
+
+  if (BZ2_bzDecompressInit (&bz, 0, 0) != BZ_OK)
+    return Qnil;
+
+  /* Setup call to bzlib_unwind in case of error.  */
+  u.compress = 0;
+  u.start = e;
+  u.bz = &bz;
+  u.oldpt = PT;
+  record_unwind_protect_ptr (bzlib_unwind, &u);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  move_gap_both (e, e);
+  SET_PT (e);
+
+  /* Decompress until we have an error or input ends.  */
+  do
+    {
+      ptrdiff_t avail_in = min (e - bytepos, UINT_MAX);
+      ptrdiff_t decompressed, avail_out = OUTPUT_BLOCK_SIZE;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      bz.next_in = (char *) BYTE_POS_ADDR (bytepos);
+      bz.avail_in = avail_in;
+      bz.next_out = (char *) GPT_ADDR;
+      bz.avail_out = avail_out;
+      status = BZ2_bzDecompress (&bz);
+      bytepos += avail_in - bz.avail_in;
+      decompressed = avail_out - bz.avail_out;
+      insert_from_gap (decompressed, decompressed, 0);
+      QUIT;
+    }
+  while (status == BZ_OK);
+
+  if (status == BZ_STREAM_END)
+    {
+      /* Delete the compressed data.  */
+      u.start = 0;
+      del_range (b, e);
+    }
+  return unbind_to (count, status == BZ_STREAM_END ? Qt : Qnil);
+}
+
+#endif /* HAVE_BZLIB */
+
+/*
+ * LZMA support
+ */
+
+#ifdef HAVE_LZMA
+
+#include <lzma.h>
+
+struct lz_unwind
+{
+  ptrdiff_t oldpt;
+  ptrdiff_t start;
+  lzma_stream *lz;
+};
+
+static void
+lzma_unwind (void *arg)
+{
+  struct lz_unwind *u = arg;
+
+  /* Finalize stream state.  */
+  lzma_end (u->lz);
+
+  /* Delete any produced data already inserted.  */
+  if (u->start)
+    del_range (u->start, PT);
+
+  /* Put point where it was, or if the buffer has shrunk because
+     the produced data is bigger than the original, at point-max.  */
+  SET_PT (min (u->oldpt, ZV));
+}
+
+/* Nonzero if current buffer probably has lzma-compressed data started
+   at byte position B.  */
+
+static bool
+lzma_detect (ptrdiff_t b, ptrdiff_t e)
+{
+  /* Check for '\3757ZXZ' header.  */
+  return ((e - b > 5) && (FETCH_BYTE (b) == 253)
+	  && !memcmp (BYTE_POS_ADDR (b + 1), "7zXZ", 4));
+}
+
+static Lisp_Object
+lzma_compress_region (ptrdiff_t b, ptrdiff_t e)
+{
+  int status;
+  struct lz_unwind u;
+  lzma_stream lz = LZMA_STREAM_INIT;
+  ptrdiff_t bytepos = b, count = SPECPDL_INDEX ();
+
+  if (lzma_easy_encoder (&lz, LZMA_PRESET_DEFAULT,
+			 LZMA_CHECK_CRC32) != LZMA_OK)
+    return Qnil;
+
+  /* Setup call to lzma_unwind in case of error.  */
+  u.start = e;
+  u.lz = &lz;
+  u.oldpt = PT;
+  record_unwind_protect_ptr (lzma_unwind, &u);
+
+  /* Insert the compressed data at the end of the original data.  */
+  move_gap_both (e, e);
+  SET_PT (e);
+
+  /* Compress until we have an error or input ends.  */
+  do
+    {
+      ptrdiff_t avail_in = min (e - bytepos, UINT_MAX);
+      ptrdiff_t compressed, avail_out = OUTPUT_BLOCK_SIZE;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      lz.next_in = BYTE_POS_ADDR (bytepos);
+      lz.avail_in = avail_in;
+      lz.next_out = GPT_ADDR;
+      lz.avail_out = avail_out;
+      status = lzma_code (&lz, bytepos + avail_in == e ? LZMA_FINISH : LZMA_RUN);
+      bytepos += avail_in - lz.avail_in;
+      compressed = avail_out - lz.avail_out;
+      insert_from_gap (compressed, compressed, 0);
+      QUIT;
+    }
+  while (status == LZMA_OK);
+
+  if (status == LZMA_STREAM_END)
+    {
+      /* Delete the original data.  */
+      u.start = 0;
+      del_range (b, e);
+    }
+  return unbind_to (count, status == LZMA_STREAM_END ? Qt : Qnil);
+}
+
+static Lisp_Object
+lzma_decompress_region (ptrdiff_t b, ptrdiff_t e)
+{
+  int status;
+  struct lz_unwind u;
+  lzma_stream lz = LZMA_STREAM_INIT;
+  ptrdiff_t bytepos = b, count = SPECPDL_INDEX ();
+
+  if (!lzma_detect (b, e))
+    error ("Not a lzma-compressed data");
+
+  if (lzma_auto_decoder (&lz, UINT64_MAX, 0) != LZMA_OK)
+    return Qnil;
+
+  /* Setup call to lzma_unwind in case of error.  */
+  u.start = e;
+  u.lz = &lz;
+  u.oldpt = PT;
+  record_unwind_protect_ptr (lzma_unwind, &u);
+
+  /* Insert the decompressed data at the end of the compressed data.  */
+  move_gap_both (e, e);
+  SET_PT (e);
+
+  /* Decompress until we have an error or input ends.  */
+  do
+    {
+      ptrdiff_t avail_in = min (e - bytepos, UINT_MAX);
+      ptrdiff_t decompressed, avail_out = OUTPUT_BLOCK_SIZE;
+
+      if (GAP_SIZE < avail_out)
+	make_gap (avail_out - GAP_SIZE);
+      lz.next_in = BYTE_POS_ADDR (bytepos);
+      lz.avail_in = avail_in;
+      lz.next_out = GPT_ADDR;
+      lz.avail_out = avail_out;
+      status = lzma_code (&lz, bytepos + avail_in == e ? LZMA_FINISH : LZMA_RUN);
+      bytepos += avail_in - lz.avail_in;
+      decompressed = avail_out - lz.avail_out;
+      insert_from_gap (decompressed, decompressed, 0);
+      QUIT;
+    }
+  while (status == LZMA_OK);
+
+  if (status == LZMA_STREAM_END)
+    {
+      /* Delete the compressed data.  */
+      u.start = 0;
+      del_range (b, e);
+    }
+  return unbind_to (count, status == LZMA_STREAM_END ? Qt : Qnil);
+}
+
+#endif /* HAVE_LZMA */
+
+/*
+ * Lisp interface
+ */
+
+DEFUN ("compression-available-p", Fcompression_available_p,
+       Scompression_available_p, 1, 1, 0,
+       doc: /* Return t if METHOD of compression and decompression is available.
+Valid METHOD should be one of `zlib', `bzlib' or 'lzma'.  */)
+     (Lisp_Object method)
+{
+#ifdef HAVE_ZLIB
+  if (EQ (method, Qzlib))
+    return Qt;
+#endif
+#ifdef HAVE_BZLIB
+  if (EQ (method, Qbzlib))
+    return Qt;
+#endif
+#ifdef HAVE_LZMA
+  if (EQ (method, Qlzma))
+    return Qt;
+#endif
+  return Qnil;
+}
+
+#if defined HAVE_ZLIB || defined HAVE_BZLIB || defined HAVE_LZMA
+
+/* Try to detect compression method for contents of current buffer from byte
+   position B to E.  Return one of zlib, bzlib or lzma if compressed data of
+   appropriate format is supported and detected, or nil otherwise.  */
+
+static Lisp_Object
+detect_compressed_data (ptrdiff_t b, ptrdiff_t e)
+{
+#ifdef HAVE_ZLIB
+  if (zlib_detect (b, e))
+    return Qzlib;
+#endif
+#ifdef HAVE_BZLIB
+  if (bzlib_detect (b, e))
+    return Qbzlib;
+#endif
+#ifdef HAVE_LZMA
+  if (lzma_detect (b, e))
+    return Qlzma;
+#endif
+  return Qnil;
+}
+
+DEFUN ("compress-region", Fcompress_region, Scompress_region, 3, 3, 0,
+       doc: /* Compress region by using METHOD from START to END.  */)
+     (Lisp_Object method, Lisp_Object start, Lisp_Object end)
+{
+  validate_region (&start, &end);
+  if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+#ifdef HAVE_ZLIB
+  if (EQ (method, Qzlib))
+    return zlib_compress_region (XINT (start), XINT (end));
+#endif
+#ifdef HAVE_BZLIB
+  if (EQ (method, Qbzlib))
+    return bzlib_compress_region (XINT (start), XINT (end));
+#endif
+#ifdef HAVE_LZMA
+  if (EQ (method, Qlzma))
+    return lzma_compress_region (XINT (start), XINT (end));
+#endif
+  error ("Unsupported compression method");
+  return Qnil;
+}
+
+DEFUN ("decompress-region", Fdecompress_region, Sdecompress_region, 3, 3, 0,
+       doc: /* Decompress region by using METHOD from START to END.  */)
+     (Lisp_Object method, Lisp_Object start, Lisp_Object end)
+{
+  validate_region (&start, &end);
+  if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))
+    error ("This function can be called only in unibyte buffers");
+
+  if (NILP (method))
+    method = detect_compressed_data (XINT (start), XINT (end));
+#ifdef HAVE_ZLIB
+  if (EQ (method, Qzlib))
+    return zlib_decompress_region (XINT (start), XINT (end));
+#endif
+#ifdef HAVE_BZLIB
+  if (EQ (method, Qbzlib))
+    return bzlib_decompress_region (XINT (start), XINT (end));
+#endif
+#ifdef HAVE_LZMA
+  if (EQ (method, Qlzma))
+    return lzma_decompress_region (XINT (start), XINT (end));
+#endif
+  error ("Unsupported decompression method");
+  return Qnil;
+}
+
+#endif /* HAVE_ZLIB || HAVE_BZLIB || HAVE_LZMA */
+
+void
+syms_of_compress (void)
+{
+  DEFSYM (Qzlib, "zlib");
+  DEFSYM (Qbzlib, "bzlib");
+  DEFSYM (Qlzma, "lzma");
+
+  defsubr (&Scompression_available_p);
+#if defined HAVE_ZLIB || defined HAVE_BZLIB || defined HAVE_LZMA
+  defsubr (&Scompress_region);
+  defsubr (&Sdecompress_region);
+#endif
+}

=== modified file 'src/emacs.c'
--- src/emacs.c	2013-08-15 16:37:15 +0000
+++ src/emacs.c	2013-08-19 04:44:28 +0000
@@ -1410,6 +1410,7 @@
 #ifdef HAVE_ZLIB
       syms_of_decompress ();
 #endif
+      syms_of_compress ();
 
       syms_of_menu ();
 

=== modified file 'src/lisp.h'
--- src/lisp.h	2013-08-15 14:52:53 +0000
+++ src/lisp.h	2013-08-19 04:44:28 +0000
@@ -4227,6 +4227,8 @@
 void syms_of_dbusbind (void);
 #endif
 
+/* Defined in compress.c.  */
+extern void syms_of_compress (void);
 
 /* Defined in profiler.c.  */
 extern bool profiler_memory_running;


[-- Attachment #3: compress-test.el --]
[-- Type: text/plain, Size: 459 bytes --]

(defun compress-test ()
  (interactive)
  (let ((sum (md5 (current-buffer) (point-min) (point-max))))
    (dolist (method '(zlib bzlib lzma))
      (message "Using %S for %d..%d" method (point-min) (point-max))
      (let ((test (progn (compress-region method (point-min) (point-max))
			 (decompress-region method (point-min) (point-max))
			 (md5 (current-buffer) (point-min) (point-max)))))
	(or (string-equal sum test) (error "%S test error" method))))))

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-19 12:17 More (de)compress? Dmitry Antipov
@ 2013-08-19 16:13 ` Eli Zaretskii
  2013-08-19 16:28   ` Lars Magne Ingebrigtsen
  2013-08-19 16:41 ` Paul Eggert
  1 sibling, 1 reply; 8+ messages in thread
From: Eli Zaretskii @ 2013-08-19 16:13 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: emacs-devel

> Date: Mon, 19 Aug 2013 16:17:46 +0400
> From: Dmitry Antipov <dmantipov@yandex.ru>
> 
> Inspired by current decompress.c, I would like to propose the symmetric
> compress function(s) as well as support for more compression methods.
> This is what I have now, and basically it works; still needs MS-Windows
> support, better error detection and a few other things.

What kind of Windows support do you think is needed?  Are you thinking
about dynamic loading (which is a nice-to-have feature), or about
something else?

> +  ### mingw32 doesn't use -lbz2, since it loads the library dynamically.
> +  if test "${opsys}" = "mingw32"; then
> +     LIBBZ2=
> +  fi

This will only work if you implement dynamic loading for libbz2.
Otherwise, Emacs will fail to link, because you call libbz2 functions,
but don't us -lbz2 on the link command line.

> +  ### mingw32 doesn't use -llzma, since it loads the library dynamically.
> +  if test "${opsys}" = "mingw32"; then
> +     LIBLZMA=
> +  fi

Likewise.

> -	profiler.o decompress.o \
> +	profiler.o decompress.o compress.o \

Why do we need both decompress.c and compress.c?  The latter covers
the same ground as the former, no?

More generally, if this is a real submission (as opposed to just a
POC), then I really don't understand where is this going.  E.g., do
you intend to use these primitives, when they are available, in
preference to jka-compr?  If so, why aren't there changes to that
package as part of the changeset?  Likewise for info.el, where it uses
external programs to decompress compressed Info files.

If you do not intend to bypass jka-compr etc., then I must ask what
are the use cases for these primitives, and whether they are different
from those of jka-compr?



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-19 16:13 ` Eli Zaretskii
@ 2013-08-19 16:28   ` Lars Magne Ingebrigtsen
  0 siblings, 0 replies; 8+ messages in thread
From: Lars Magne Ingebrigtsen @ 2013-08-19 16:28 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: Dmitry Antipov, emacs-devel

Eli Zaretskii <eliz@gnu.org> writes:

> If you do not intend to bypass jka-compr etc., then I must ask what
> are the use cases for these primitives, and whether they are different
> from those of jka-compr?

If the point is to add more C code just to add more C code just because,
then I don't really see the point.  Are we using bz2/yz decompression in
any performance-critical paths?  Are we compressing even anything in a
performance-critical path?

C code is more segfaulty than Lisp code, so unless we have a good use
case for the C code, I think we should avoid adding more.

-- 
(domestic pets only, the antidote for overdose, milk.)
  No Gnus T-Shirt for sale: http://ingebrigtsen.no/no.php
  and http://lars.ingebrigtsen.no/2013/08/twenty-years-of-september.html



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-19 12:17 More (de)compress? Dmitry Antipov
  2013-08-19 16:13 ` Eli Zaretskii
@ 2013-08-19 16:41 ` Paul Eggert
  2013-08-19 16:57   ` Eli Zaretskii
  2013-08-20  8:19   ` Dmitry Antipov
  1 sibling, 2 replies; 8+ messages in thread
From: Paul Eggert @ 2013-08-19 16:41 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Emacs development discussions

Thanks for taking this on.  Some comments on the patch,
in addition to Eli's:

* It can be faster to compress using an external program,
  since the compression can be done in parallel.  Have you
  timed your compression approach on a multicore platform,
  and compared its real time to doing it with external
  compression?  (Similarly for decompression, though I
  expect there we won't find the external program faster.)
  You might try "pigz" for compression, since it's multicore
  internally.

* There seems to be quite a bit of repetition in configure.ac
  and in the C code -- each compression package does pretty
  much the same thing with respect to allocating buffers,
  saving point, etc.  Could this be factored out to simplify
  the code and make it easier to add future compression
  algorithms?

* bzlib_detect and lzm_detect mishandle the case where the
  buffer gap is located very near the start of the buffer.

* If the buffer contains random garbage,
  (decompress-region nil 1 100000)
  signals "Unsupported decompression method", which
  isn't very clear.  It should signal something like
  "Unknown compression format".

* The functions compress-region and decompress-region
  should be defined on all platforms, even those that
  lack all compression libraries.  They'll simply return
  nil on such platforms, since they can't compress or
  decompress anything.  This simplifies the C code and
  will simplify Lisp code too.




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-19 16:41 ` Paul Eggert
@ 2013-08-19 16:57   ` Eli Zaretskii
  2013-08-20  8:19   ` Dmitry Antipov
  1 sibling, 0 replies; 8+ messages in thread
From: Eli Zaretskii @ 2013-08-19 16:57 UTC (permalink / raw)
  To: Paul Eggert; +Cc: dmantipov, emacs-devel

> Date: Mon, 19 Aug 2013 09:41:49 -0700
> From: Paul Eggert <eggert@cs.ucla.edu>
> Cc: Emacs development discussions <emacs-devel@gnu.org>
> 
> * The functions compress-region and decompress-region
>   should be defined on all platforms, even those that
>   lack all compression libraries.  They'll simply return
>   nil on such platforms, since they can't compress or
>   decompress anything.  This simplifies the C code and
>   will simplify Lisp code too.

I think this should somehow integrated with jka-compr, so that
jka-compr could be the fallback.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-19 16:41 ` Paul Eggert
  2013-08-19 16:57   ` Eli Zaretskii
@ 2013-08-20  8:19   ` Dmitry Antipov
  2013-08-20 14:32     ` Stefan Monnier
  2013-08-20 14:34     ` Stefan Monnier
  1 sibling, 2 replies; 8+ messages in thread
From: Dmitry Antipov @ 2013-08-20  8:19 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Emacs development discussions

On 08/19/2013 08:41 PM, Paul Eggert wrote:

> * It can be faster to compress using an external program,
>    since the compression can be done in parallel.  Have you
>    timed your compression approach on a multicore platform,
>    and compared its real time to doing it with external
>    compression?  (Similarly for decompression, though I
>    expect there we won't find the external program faster.)
>    You might try "pigz" for compression, since it's multicore
>    internally.

It's faster because the buffer machinery is slower than external
compression program's input reader (at least, in case of gzip).
I tried to compress 959 small text files (~16Mb in total) with
'gzip *.txt' (0.67s), dired-compress-file (6.15s, but don't forget
about fork+exec overhead) and simple ad-hoc function using
compress-region and zlib method:

(defun compress-file (name method)
   (message "Compress %s" name)
   (let ((ext (cdr (assoc method '((zlib . "gz") (bzlib . "bz2") (lzma . "xz")))))
	(buffer (find-file-literally name)))
     (when (null ext) (error "Unsupported compression method '%S'" method))
     (save-excursion
       (set-buffer buffer)
       (compress-region method)
       (delete-file (buffer-file-name))
       (rename-buffer (concat (buffer-name) "." ext))
       (write-file (concat (buffer-file-name) "." ext))
       (kill-buffer))))

The latter version deliberately takes ~19s. Unfortunately internal
compression support can't replace calls to external programs, especially
in batch operations where we need to (de)compress multiple files at once.
But internal compression should have some advantages when we just need
to show the contents of compressed buffer (I didn't try to check this
yet, BTW).

> * There seems to be quite a bit of repetition in configure.ac
>    and in the C code -- each compression package does pretty
>    much the same thing with respect to allocating buffers,
>    saving point, etc.  Could this be factored out to simplify
>    the code and make it easier to add future compression
>    algorithms?

Yes.

> * bzlib_detect and lzm_detect mishandle the case where the
>    buffer gap is located very near the start of the buffer.

Argh, yes.

> * If the buffer contains random garbage,
>    (decompress-region nil 1 100000)
>    signals "Unsupported decompression method", which
>    isn't very clear.  It should signal something like
>    "Unknown compression format".
>
> * The functions compress-region and decompress-region
>    should be defined on all platforms, even those that
>    lack all compression libraries.  They'll simply return
>    nil on such platforms, since they can't compress or
>    decompress anything.  This simplifies the C code and
>    will simplify Lisp code too.

OK.

Dmitry




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-20  8:19   ` Dmitry Antipov
@ 2013-08-20 14:32     ` Stefan Monnier
  2013-08-20 14:34     ` Stefan Monnier
  1 sibling, 0 replies; 8+ messages in thread
From: Stefan Monnier @ 2013-08-20 14:32 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Paul Eggert, Emacs development discussions

>     (save-excursion
>       (set-buffer buffer)

You mean (with-current-buffer buffer


        Stefan



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: More (de)compress?
  2013-08-20  8:19   ` Dmitry Antipov
  2013-08-20 14:32     ` Stefan Monnier
@ 2013-08-20 14:34     ` Stefan Monnier
  1 sibling, 0 replies; 8+ messages in thread
From: Stefan Monnier @ 2013-08-20 14:34 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: Paul Eggert, Emacs development discussions

I'm not really interested in providing those functions to replace
existing uses.  IOW, if there's no new use for it (i.e. if it doesn't
enable new uses where running an external program is
impractical/impossible), then I think it's not worth the trouble.


        Stefan



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-08-20 14:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-19 12:17 More (de)compress? Dmitry Antipov
2013-08-19 16:13 ` Eli Zaretskii
2013-08-19 16:28   ` Lars Magne Ingebrigtsen
2013-08-19 16:41 ` Paul Eggert
2013-08-19 16:57   ` Eli Zaretskii
2013-08-20  8:19   ` Dmitry Antipov
2013-08-20 14:32     ` Stefan Monnier
2013-08-20 14:34     ` Stefan Monnier

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).