all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Lars Magne Ingebrigtsen <larsi@gnus.org>
To: emacs-devel@gnu.org
Subject: Re: Linking Emacs with libxml2
Date: Wed, 08 Sep 2010 22:58:05 +0200	[thread overview]
Message-ID: <m37hiw2bs2.fsf@quimbies.gnus.org> (raw)
In-Reply-To: m3bp882d23.fsf@quimbies.gnus.org

[-- Attachment #1: Type: text/plain, Size: 22 bytes --]

And here's take two:


[-- Attachment #2: libxml.diff-2 --]
[-- Type: application/octet-stream, Size: 11581 bytes --]

=== modified file 'ChangeLog'
--- ChangeLog	2010-09-04 07:30:14 +0000
+++ ChangeLog	2010-09-08 16:12:36 +0000
@@ -1,3 +1,7 @@
+2010-09-08  Lars Magne Ingebrigtsen  <larsi@gnus.org>
+
+	* configure.in: Check for libxml2/htmlReadMemory().
+
 2010-09-04  Eli Zaretskii  <eliz@gnu.org>
 
 	* config.bat: Produce lisp/gnus/_dir-locals.el from

=== modified file 'configure'
--- configure	2010-08-23 12:54:09 +0000
+++ configure	2010-09-08 20:50:02 +0000
@@ -660,6 +660,8 @@
 LIBS_MAIL
 liblockfile
 ALLOCA
+LIBXML2_LIBS
+LIBXML2_CFLAGS
 LIBXSM
 LIBGPM
 LIBGIF
@@ -11070,6 +11072,109 @@
 fi
 
 
+### Use libxml (-lxml2) if available
+
+  succeeded=no
+
+  # Extract the first word of "pkg-config", so it can be a program name with args.
+set dummy pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_path_PKG_CONFIG+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $PKG_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_PKG_CONFIG" && ac_cv_path_PKG_CONFIG="no"
+  ;;
+esac
+fi
+PKG_CONFIG=$ac_cv_path_PKG_CONFIG
+if test -n "$PKG_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
+$as_echo "$PKG_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+
+  if test "$PKG_CONFIG" = "no" ; then
+     HAVE_LIBXML2=no
+  else
+     PKG_CONFIG_MIN_VERSION=0.9.0
+     if $PKG_CONFIG --atleast-pkgconfig-version $PKG_CONFIG_MIN_VERSION; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libxml-2.0 > 2.5.0" >&5
+$as_echo_n "checking for libxml-2.0 > 2.5.0... " >&6; }
+
+        if $PKG_CONFIG --exists "libxml-2.0 > 2.5.0" 2>&5; then
+            { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+            succeeded=yes
+
+            { $as_echo "$as_me:${as_lineno-$LINENO}: checking LIBXML2_CFLAGS" >&5
+$as_echo_n "checking LIBXML2_CFLAGS... " >&6; }
+            LIBXML2_CFLAGS=`$PKG_CONFIG --cflags "libxml-2.0 > 2.5.0"|sed -e 's,///*,/,g'`
+            { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIBXML2_CFLAGS" >&5
+$as_echo "$LIBXML2_CFLAGS" >&6; }
+
+            { $as_echo "$as_me:${as_lineno-$LINENO}: checking LIBXML2_LIBS" >&5
+$as_echo_n "checking LIBXML2_LIBS... " >&6; }
+            LIBXML2_LIBS=`$PKG_CONFIG --libs "libxml-2.0 > 2.5.0"|sed -e 's,///*,/,g'`
+            { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIBXML2_LIBS" >&5
+$as_echo "$LIBXML2_LIBS" >&6; }
+        else
+            { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+            LIBXML2_CFLAGS=""
+            LIBXML2_LIBS=""
+            ## If we have a custom action on failure, don't print errors, but
+            ## do set a variable so people can do so.
+            LIBXML2_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "libxml-2.0 > 2.5.0"`
+
+        fi
+
+
+
+     else
+        echo "*** Your version of pkg-config is too old. You need version $PKG_CONFIG_MIN_VERSION or newer."
+        echo "*** See http://www.freedesktop.org/software/pkgconfig"
+     fi
+  fi
+
+  if test $succeeded = yes; then
+     HAVE_LIBXML2=yes
+  else
+     HAVE_LIBXML2=no
+  fi
+
+
+
+if test "${HAVE_LIBXML2}" = "yes"; then
+
+$as_echo "#define HAVE_LIBXML2 1" >>confdefs.h
+
+fi
+
 # If netdb.h doesn't declare h_errno, we must declare it by hand.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether netdb declares h_errno" >&5
 $as_echo_n "checking whether netdb declares h_errno... " >&6; }

=== modified file 'configure.in'
--- configure.in	2010-08-23 12:54:09 +0000
+++ configure.in	2010-09-08 20:50:00 +0000
@@ -2535,6 +2535,14 @@
 fi
 AC_SUBST(LIBXSM)
 
+### Use libxml (-lxml2) if available
+PKG_CHECK_MODULES(LIBXML2, libxml-2.0 > 2.5.0, HAVE_LIBXML2=yes, HAVE_LIBXML2=no)
+AC_SUBST(LIBXML2_LIBS)
+AC_SUBST(LIBXML2_CFLAGS)
+if test "${HAVE_LIBXML2}" = "yes"; then
+  AC_DEFINE(HAVE_LIBXML2, 1, [Define to 1 if you have the libxml library (-lxml2).])
+fi
+
 # If netdb.h doesn't declare h_errno, we must declare it by hand.
 AC_CACHE_CHECK(whether netdb declares h_errno,
 	       emacs_cv_netdb_declares_h_errno,

=== modified file 'src/ChangeLog'
--- src/ChangeLog	2010-09-05 02:06:39 +0000
+++ src/ChangeLog	2010-09-08 16:12:09 +0000
@@ -1,3 +1,9 @@
+2010-09-08  Lars Magne Ingebrigtsen  <larsi@gnus.org>
+
+	* xml.c: New file.
+	(Fhtml_parse_buffer): New function to interface to the libxml2
+	html parsing function.
+
 2010-09-05  Juanma Barranquero  <lekktu@gmail.com>
 
 	* biditype.h: Regenerate.

=== modified file 'src/Makefile.in'
--- src/Makefile.in	2010-08-17 21:19:11 +0000
+++ src/Makefile.in	2010-09-08 15:52:01 +0000
@@ -226,6 +226,9 @@
 IMAGEMAGICK_LIBS= @IMAGEMAGICK_LIBS@
 IMAGEMAGICK_CFLAGS= @IMAGEMAGICK_CFLAGS@
 
+LIBXML2_LIBS = @LIBXML2_LIBS@
+LIBXML2_CFLAGS = @LIBXML2_CFLAGS@
+
 
 ## widget.o if USE_X_TOOLKIT, otherwise empty.
 WIDGET_OBJ=@WIDGET_OBJ@
@@ -320,7 +323,8 @@
 ## FIXME? MYCPPFLAGS only referenced in etc/DEBUG.
 ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(MYCPPFLAGS) -I. -I${srcdir} \
   ${C_SWITCH_MACHINE} ${C_SWITCH_SYSTEM} ${C_SWITCH_X_SITE} \
-  ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} ${DBUS_CFLAGS} \
+  ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} \
+  ${LIBXML2_CFLAGS} ${DBUS_CFLAGS} \
   ${GCONF_CFLAGS} ${FREETYPE_CFLAGS} ${FONTCONFIG_CFLAGS} \
   ${LIBOTF_CFLAGS} ${M17N_FLT_CFLAGS} ${DEPFLAGS} ${PROFILING_CFLAGS} \
   ${C_WARNINGS_SWITCH} ${CFLAGS}
@@ -349,7 +353,7 @@
 	syntax.o $(UNEXEC_OBJ) bytecode.o \
 	process.o callproc.o \
 	region-cache.o sound.o atimer.o \
-	doprnt.o strftime.o intervals.o textprop.o composite.o md5.o \
+	doprnt.o strftime.o intervals.o textprop.o composite.o md5.o xml.o \
 	$(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ)
 
 ## Object files used on some machine or other.
@@ -595,7 +599,8 @@
 ## duplicated symbols.  If the standard libraries were compiled
 ## with GCC, we might need LIB_GCC again after them.
 LIBES = $(LIBS) $(LIBX_BASE) $(LIBX_OTHER) $(LIBSOUND) \
-   $(RSVG_LIBS) ${IMAGEMAGICK_LIBS}  $(DBUS_LIBS) $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \
+   $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) \
+   ${LIBXML2_LIBS} $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \
    $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) ${GCONF_LIBS} ${LIBSELINUX_LIBS} \
    $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \
    $(LIB_GCC) $(LIB_MATH) $(LIB_STANDARD) $(LIB_GCC)

=== modified file 'src/config.in'
--- src/config.in	2010-08-17 21:19:11 +0000
+++ src/config.in	2010-09-08 15:37:34 +0000
@@ -813,6 +813,9 @@
 /* Define to 1 if you have the SM library (-lSM). */
 #undef HAVE_X_SM
 
+/* Define to 1 if you have the libxml2 library (-lxml2). */
+#undef HAVE_LIBXML2
+
 /* Define to 1 if you want to use the X window system. */
 #undef HAVE_X_WINDOWS
 

=== modified file 'src/emacs.c'
--- src/emacs.c	2010-08-22 21:15:20 +0000
+++ src/emacs.c	2010-09-08 13:39:17 +0000
@@ -1543,6 +1543,7 @@
       syms_of_xselect ();
 #endif
 #endif /* HAVE_X_WINDOWS */
+      syms_of_xml ();
 
       syms_of_menu ();
 

=== modified file 'src/lisp.h'
--- src/lisp.h	2010-08-09 19:25:41 +0000
+++ src/lisp.h	2010-09-08 13:40:50 +0000
@@ -3559,6 +3559,9 @@
 /* Defined in xsmfns.c */
 extern void syms_of_xsmfns (void);
 
+/* Defined in xml.c */
+extern void syms_of_xml (void);
+
 /* Defined in xselect.c */
 EXFUN (Fx_send_client_event, 6);
 extern void syms_of_xselect (void);

=== added file 'src/xml.c'
--- src/xml.c	1970-01-01 00:00:00 +0000
+++ src/xml.c	2010-09-08 20:54:02 +0000
@@ -0,0 +1,129 @@
+/* Interface to libxml2.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#ifdef HAVE_LIBXML2
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <setjmp.h>
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
+
+#include "lisp.h"
+#include "systime.h"
+#include "sysselect.h"
+#include "frame.h"
+#include "buffer.h"
+
+Lisp_Object make_dom (xmlNode *node)
+{
+  Lisp_Object result = Qnil;
+  xmlNode *child;
+  xmlAttr *property;
+
+  if (node != NULL) {
+    result = Fcons (intern(node->name), Qnil);
+    property = node->properties;
+    while (property != NULL) {
+      if (property->children &&
+	   property->children->content) {
+	char *pname = xmalloc(strlen(property->name) + 2);
+	*pname = ':';
+	strcpy(pname + 1, property->name);
+	result = Fcons (Fcons (intern (pname),
+			       build_string(property->children->content)),
+			result);
+	xfree (pname);
+      }
+      property = property->next;
+    }
+    child = node->children;
+    while (child != NULL) {
+      result = Fcons (make_dom (child), result);
+      child = child->next;
+    }
+    if (node->content)
+      result = Fcons (Fcons (intern ("text"), 
+			     build_string(node->content)),
+		      result);
+  }
+  return Fnreverse(result);
+}
+
+DEFUN ("html-parse-buffer", Fhtml_parse_buffer, Shtml_parse_buffer,
+       0, 1, 0,
+       doc: /* Parse the buffer as an HTML document and return the parse tree.*/)
+  (Lisp_Object object)
+{
+  xmlDoc *doc;
+  struct buffer *buffer;
+  xmlNode *node;
+  Lisp_Object result, string;
+  int ibeg, iend;
+  struct buffer *prev = current_buffer;
+
+  LIBXML_TEST_VERSION
+	
+  if (NILP (object))
+    buffer = current_buffer;
+  else {
+    CHECK_BUFFER (object);
+    buffer = XBUFFER (object);
+  }
+
+  record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
+
+  if (buffer != current_buffer)
+    set_buffer_internal (buffer);
+  
+  string = make_buffer_string (BEGV, Z, 0);
+  
+  doc = htmlReadMemory (SDATA (string), SBYTES (string), "", "utf-8", 0);
+
+  if (doc == NULL)
+    return Qnil;
+
+  node = xmlDocGetRootElement (doc);
+  result = make_dom (node);
+  
+  xmlFreeDoc(doc);
+  xmlCleanupParser();
+      
+  if (prev != current_buffer)
+    set_buffer_internal (prev);
+  /* Discard the unwind protect for recovering the current
+     buffer.  */
+  specpdl_ptr--;
+
+  return result;
+}
+
+\f
+/***********************************************************************
+			    Initialization
+ ***********************************************************************/
+void
+syms_of_xml (void)
+{
+  defsubr (&Shtml_parse_buffer);
+}
+
+#endif /* HAVE_LIBXML2 */


[-- Attachment #3: Type: text/plain, Size: 103 bytes --]


-- 
(domestic pets only, the antidote for overdose, milk.)
  larsi@gnus.org * Lars Magne Ingebrigtsen

  reply	other threads:[~2010-09-08 20:58 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-06 15:21 Linking Emacs with libxml2 Lars Magne Ingebrigtsen
2010-09-06 15:54 ` Wojciech Meyer
2010-09-06 18:26 ` Chad Brown
2010-09-06 21:01   ` Lars Magne Ingebrigtsen
2010-09-06 18:44 ` Lennart Borgman
2010-09-06 18:56   ` Chad Brown
2010-09-06 19:08     ` Chong Yidong
2010-09-06 19:17     ` joakim
2010-09-07  0:36       ` Jason Rumney
2010-09-07  0:58         ` Lars Magne Ingebrigtsen
2010-09-08 14:10           ` Lars Magne Ingebrigtsen
2010-09-08 14:25             ` Andreas Schwab
2010-09-08 14:40             ` Stefan Monnier
2010-09-08 15:16               ` Lars Magne Ingebrigtsen
2010-09-08 16:15                 ` Lars Magne Ingebrigtsen
2010-09-08 18:17                   ` joakim
2010-09-08 18:19                     ` Lars Magne Ingebrigtsen
2010-09-08 19:10                   ` Andreas Schwab
2010-09-08 20:11                     ` Lars Magne Ingebrigtsen
2010-09-08 20:30                       ` Lars Magne Ingebrigtsen
2010-09-08 20:58                         ` Lars Magne Ingebrigtsen [this message]
2010-09-08 21:51                           ` Andreas Schwab
2010-09-08 21:54                             ` Lars Magne Ingebrigtsen
2010-09-09 17:00                             ` Stefan Monnier
2010-09-09 21:56                               ` Lars Magne Ingebrigtsen
2010-09-09 22:28                                 ` Stefan Monnier
2010-09-09 22:37                                   ` Lars Magne Ingebrigtsen
2010-09-10  8:14                                     ` Andreas Schwab
2010-09-10 10:46                                       ` Stefan Monnier
2010-09-10 10:56                                         ` Lars Magne Ingebrigtsen
2010-09-10 12:37                                           ` Lars Magne Ingebrigtsen
2010-09-10 16:47                                             ` Lars Magne Ingebrigtsen
2010-09-10 16:54                                               ` Lars Magne Ingebrigtsen
2010-09-10 17:05                                                 ` Ted Zlatanov
2010-09-10 17:14                                                   ` Lars Magne Ingebrigtsen
2010-09-10 17:34                                                 ` Glenn Morris
2010-09-10 17:41                                                   ` Glenn Morris
2010-09-10 17:44                                                     ` Lars Magne Ingebrigtsen
2010-09-10 18:39                                                       ` Ted Zlatanov
2010-09-12 16:56                                                       ` Andreas Schwab
2010-09-12 17:05                                                         ` Lars Magne Ingebrigtsen
2010-09-10 21:12                                               ` Chad Brown
2010-09-10 21:40                                                 ` Lars Magne Ingebrigtsen
2010-09-10 22:45                                                   ` chad
2010-09-10 23:19                                                     ` Lars Magne Ingebrigtsen
2010-09-11  7:18                                                       ` Andreas Schwab
2010-09-11 12:48                                                         ` Lars Magne Ingebrigtsen
2010-09-13 18:37                                                 ` Leo
2010-09-13 18:49                                                   ` Lars Magne Ingebrigtsen
2010-09-13 19:08                                                     ` Leo
2010-09-13 19:16                                                   ` Chad Brown
2010-09-13 19:23                                                     ` Chad Brown
2010-09-13 22:24                                                     ` Leo
2010-09-13 16:06                                               ` Christian Faulhammer
2010-09-10 11:37                                         ` Andreas Schwab
2010-09-10 14:12                                         ` Andrew W. Nosenko
2010-09-09  8:35                           ` Christian Faulhammer
2010-09-09 10:33                             ` Lars Magne Ingebrigtsen
2010-09-09 11:07                               ` Christian Faulhammer
2010-09-09 11:09                                 ` Lars Magne Ingebrigtsen
2010-09-06 19:19 ` Chong Yidong
2010-09-06 21:03   ` Lars Magne Ingebrigtsen
2010-09-15  0:55   ` Eric M. Ludlam
2010-09-15 15:52     ` Ted Zlatanov
2010-09-06 21:08 ` Stefan Monnier
2010-09-06 21:17   ` Lars Magne Ingebrigtsen
2010-09-06 21:30     ` joakim
2010-09-07  1:40     ` Chad Brown
2010-09-07  1:47       ` Lars Magne Ingebrigtsen
2010-09-06 21:18   ` Lennart Borgman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m37hiw2bs2.fsf@quimbies.gnus.org \
    --to=larsi@gnus.org \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.