all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: joakim@verona.se
To: emacs-devel@gnu.org
Subject: Re: Linking Emacs with libxml2
Date: Wed, 08 Sep 2010 20:17:33 +0200	[thread overview]
Message-ID: <m3mxrs85he.fsf@verona.se> (raw)
In-Reply-To: <m3vd6ggqjm.fsf@quimbies.gnus.org> (Lars Magne Ingebrigtsen's message of "Wed, 08 Sep 2010 18:15:25 +0200")

Maybe make a svannah bzr branch for this then?

Lars Magne Ingebrigtsen <larsi@gnus.org> writes:

> I did it the hard way:
>
>
> === modified file 'ChangeLog'
> --- ChangeLog	2010-09-04 07:30:14 +0000
> +++ ChangeLog	2010-09-08 16:12:36 +0000
> @@ -1,3 +1,7 @@
> +2010-09-08  Lars Magne Ingebrigtsen  <larsi@gnus.org>
> +
> +	* configure.in: Check for libxml2/htmlReadMemory().
> +
>  2010-09-04  Eli Zaretskii  <eliz@gnu.org>
>  
>  	* config.bat: Produce lisp/gnus/_dir-locals.el from
>
> === modified file 'configure'
> --- configure	2010-08-23 12:54:09 +0000
> +++ configure	2010-09-08 15:55:18 +0000
> @@ -660,6 +660,8 @@
>  LIBS_MAIL
>  liblockfile
>  ALLOCA
> +LIBXML2_CFLAGS
> +LIBXML2_LIBS
>  LIBXSM
>  LIBGPM
>  LIBGIF
> @@ -11070,6 +11072,74 @@
>  fi
>  
>  
> +### Use libxml2 (-lxml2) if available
> +HAVE_LIBXML2=no
> +LIBXML2_LIBS=
> +if test -n xml2-config; then
> +  LIBXML2_CFLAGS="`xml2-config --cflags`"
> +  SAVE_CFLAGS="$CFLAGS"
> +  CFLAGS="$LIBXML2_CFLAGS $CFLAGS"
> +  ac_fn_c_check_header_mongrel "$LINENO" "libxml/xmlexports.h" "ac_cv_header_libxml_xmlexports_h" "$ac_includes_default"
> +if test "x$ac_cv_header_libxml_xmlexports_h" = x""yes; then :
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for htmlReadMemory in -lxml2" >&5
> +$as_echo_n "checking for htmlReadMemory in -lxml2... " >&6; }
> +if test "${ac_cv_lib_xml2_htmlReadMemory+set}" = set; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +  ac_check_lib_save_LIBS=$LIBS
> +LIBS="-lxml2 -lxml2 $LIBS"
> +cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +
> +/* Override any GCC internal prototype to avoid an error.
> +   Use char because int might match the return type of a GCC
> +   builtin and then its argument prototype would still apply.  */
> +#ifdef __cplusplus
> +extern "C"
> +#endif
> +char htmlReadMemory ();
> +int
> +main ()
> +{
> +return htmlReadMemory ();
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +if ac_fn_c_try_link "$LINENO"; then :
> +  ac_cv_lib_xml2_htmlReadMemory=yes
> +else
> +  ac_cv_lib_xml2_htmlReadMemory=no
> +fi
> +rm -f core conftest.err conftest.$ac_objext \
> +    conftest$ac_exeext conftest.$ac_ext
> +LIBS=$ac_check_lib_save_LIBS
> +fi
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_xml2_htmlReadMemory" >&5
> +$as_echo "$ac_cv_lib_xml2_htmlReadMemory" >&6; }
> +if test "x$ac_cv_lib_xml2_htmlReadMemory" = x""yes; then :
> +  HAVE_LIBXML2=yes
> +fi
> +
> +fi
> +
> +
> +
> +  if test "${HAVE_LIBXML2}" = "yes"; then
> +
> +$as_echo "#define HAVE_LIBXML2 1" >>confdefs.h
> +
> +    LIBXML2_LIBS="-lxml2"
> +    case "$LIBS" in
> +      *-lxml2*) ;;
> +      *)      LIBS="$LIBXML2_LIBS $LIBS" ;;
> +    esac
> +  fi
> +  CFLAGS="$SAVE_CFLAGS"
> +fi
> +
> +
> +
>  # If netdb.h doesn't declare h_errno, we must declare it by hand.
>  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether netdb declares h_errno" >&5
>  $as_echo_n "checking whether netdb declares h_errno... " >&6; }
>
> === modified file 'configure.in'
> --- configure.in	2010-08-23 12:54:09 +0000
> +++ configure.in	2010-09-08 15:55:38 +0000
> @@ -2535,6 +2535,29 @@
>  fi
>  AC_SUBST(LIBXSM)
>  
> +### Use libxml2 (-lxml2) if available
> +HAVE_LIBXML2=no
> +LIBXML2_LIBS=
> +if test -n xml2-config; then
> +  LIBXML2_CFLAGS="`xml2-config --cflags`"
> +  SAVE_CFLAGS="$CFLAGS"
> +  CFLAGS="$LIBXML2_CFLAGS $CFLAGS"
> +  AC_CHECK_HEADER(libxml/xmlversion.h,
> +    [AC_CHECK_LIB(xml2, htmlReadMemory, HAVE_LIBXML2=yes, , -lxml2)])
> +
> +  if test "${HAVE_LIBXML2}" = "yes"; then
> +    AC_DEFINE(HAVE_LIBXML2, 1, [Define to 1 if you have the libxml2 library (-lxml2).])
> +    LIBXML2_LIBS="-lxml2"
> +    case "$LIBS" in
> +      *-lxml2*) ;;
> +      *)      LIBS="$LIBXML2_LIBS $LIBS" ;;
> +    esac
> +  fi
> +  CFLAGS="$SAVE_CFLAGS"
> +fi
> +AC_SUBST(LIBXML2_LIBS)
> +AC_SUBST(LIBXML2_CFLAGS)
> +
>  # If netdb.h doesn't declare h_errno, we must declare it by hand.
>  AC_CACHE_CHECK(whether netdb declares h_errno,
>  	       emacs_cv_netdb_declares_h_errno,
>
> === modified file 'src/ChangeLog'
> --- src/ChangeLog	2010-09-05 02:06:39 +0000
> +++ src/ChangeLog	2010-09-08 16:12:09 +0000
> @@ -1,3 +1,9 @@
> +2010-09-08  Lars Magne Ingebrigtsen  <larsi@gnus.org>
> +
> +	* xml.c: New file.
> +	(Fhtml_parse_buffer): New function to interface to the libxml2
> +	html parsing function.
> +
>  2010-09-05  Juanma Barranquero  <lekktu@gmail.com>
>  
>  	* biditype.h: Regenerate.
>
> === modified file 'src/Makefile.in'
> --- src/Makefile.in	2010-08-17 21:19:11 +0000
> +++ src/Makefile.in	2010-09-08 15:52:01 +0000
> @@ -226,6 +226,9 @@
>  IMAGEMAGICK_LIBS= @IMAGEMAGICK_LIBS@
>  IMAGEMAGICK_CFLAGS= @IMAGEMAGICK_CFLAGS@
>  
> +LIBXML2_LIBS = @LIBXML2_LIBS@
> +LIBXML2_CFLAGS = @LIBXML2_CFLAGS@
> +
>  
>  ## widget.o if USE_X_TOOLKIT, otherwise empty.
>  WIDGET_OBJ=@WIDGET_OBJ@
> @@ -320,7 +323,8 @@
>  ## FIXME? MYCPPFLAGS only referenced in etc/DEBUG.
>  ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(MYCPPFLAGS) -I. -I${srcdir} \
>    ${C_SWITCH_MACHINE} ${C_SWITCH_SYSTEM} ${C_SWITCH_X_SITE} \
> -  ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} ${DBUS_CFLAGS} \
> +  ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} \
> +  ${LIBXML2_CFLAGS} ${DBUS_CFLAGS} \
>    ${GCONF_CFLAGS} ${FREETYPE_CFLAGS} ${FONTCONFIG_CFLAGS} \
>    ${LIBOTF_CFLAGS} ${M17N_FLT_CFLAGS} ${DEPFLAGS} ${PROFILING_CFLAGS} \
>    ${C_WARNINGS_SWITCH} ${CFLAGS}
> @@ -349,7 +353,7 @@
>  	syntax.o $(UNEXEC_OBJ) bytecode.o \
>  	process.o callproc.o \
>  	region-cache.o sound.o atimer.o \
> -	doprnt.o strftime.o intervals.o textprop.o composite.o md5.o \
> +	doprnt.o strftime.o intervals.o textprop.o composite.o md5.o xml.o \
>  	$(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ)
>  
>  ## Object files used on some machine or other.
> @@ -595,7 +599,8 @@
>  ## duplicated symbols.  If the standard libraries were compiled
>  ## with GCC, we might need LIB_GCC again after them.
>  LIBES = $(LIBS) $(LIBX_BASE) $(LIBX_OTHER) $(LIBSOUND) \
> -   $(RSVG_LIBS) ${IMAGEMAGICK_LIBS}  $(DBUS_LIBS) $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \
> +   $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) \
> +   ${LIBXML2_LIBS} $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \
>     $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) ${GCONF_LIBS} ${LIBSELINUX_LIBS} \
>     $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \
>     $(LIB_GCC) $(LIB_MATH) $(LIB_STANDARD) $(LIB_GCC)
>
> === modified file 'src/config.in'
> --- src/config.in	2010-08-17 21:19:11 +0000
> +++ src/config.in	2010-09-08 15:37:34 +0000
> @@ -813,6 +813,9 @@
>  /* Define to 1 if you have the SM library (-lSM). */
>  #undef HAVE_X_SM
>  
> +/* Define to 1 if you have the libxml2 library (-lxml2). */
> +#undef HAVE_LIBXML2
> +
>  /* Define to 1 if you want to use the X window system. */
>  #undef HAVE_X_WINDOWS
>  
>
> === modified file 'src/emacs.c'
> --- src/emacs.c	2010-08-22 21:15:20 +0000
> +++ src/emacs.c	2010-09-08 13:39:17 +0000
> @@ -1543,6 +1543,7 @@
>        syms_of_xselect ();
>  #endif
>  #endif /* HAVE_X_WINDOWS */
> +      syms_of_xml ();
>  
>        syms_of_menu ();
>  
>
> === modified file 'src/lisp.h'
> --- src/lisp.h	2010-08-09 19:25:41 +0000
> +++ src/lisp.h	2010-09-08 13:40:50 +0000
> @@ -3559,6 +3559,9 @@
>  /* Defined in xsmfns.c */
>  extern void syms_of_xsmfns (void);
>  
> +/* Defined in xml.c */
> +extern void syms_of_xml (void);
> +
>  /* Defined in xselect.c */
>  EXFUN (Fx_send_client_event, 6);
>  extern void syms_of_xselect (void);
>
> === added file 'src/xml.c'
> --- src/xml.c	1970-01-01 00:00:00 +0000
> +++ src/xml.c	2010-09-08 16:10:36 +0000
> @@ -0,0 +1,131 @@
> +/* Interface to libxml2.
> +   Copyright (C) 2010 Free Software Foundation, Inc.
> +
> +This file is part of GNU Emacs.
> +
> +GNU Emacs is free software: you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation, either version 3 of the License, or
> +(at your option) any later version.
> +
> +GNU Emacs is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include <config.h>
> +
> +#ifdef HAVE_LIBXML2
> +
> +#include <sys/param.h>
> +#include <stdio.h>
> +#include <setjmp.h>
> +#include <libxml/tree.h>
> +#include <libxml/parser.h>
> +#include <libxml/HTMLparser.h>
> +
> +#include "lisp.h"
> +#include "systime.h"
> +#include "sysselect.h"
> +#include "frame.h"
> +#include "buffer.h"
> +
> +Lisp_Object make_dom (xmlNode *node)
> +{
> +  Lisp_Object result = Qnil;
> +  xmlNode *child;
> +  xmlAttr *property;
> +
> +  if (node != NULL) {
> +    result = Fcons (Fintern (build_string (node->name),
> +			     Vobarray),
> +		    Qnil);
> +    property = node->properties;
> +    while (property != NULL) {
> +      if (property->children &&
> +	   property->children->content) {
> +	char *pname = xmalloc(strlen(property->name) + 2);
> +	*pname = ':';
> +	strcpy(pname + 1, property->name);
> +	result = Fcons (Fcons (Fintern (build_string (pname), Vobarray),
> +			       build_string(property->children->content)),
> +			result);
> +	xfree (pname);
> +      }
> +      property = property->next;
> +    }
> +    child = node->children;
> +    while (child != NULL) {
> +      result = Fcons (make_dom (child), result);
> +      child = child->next;
> +    }
> +    if (node->content)
> +      result = Fcons (Fcons (Fintern (build_string ("text"), Vobarray),
> +			     build_string(node->content)),
> +		      result);
> +  }
> +  return Fnreverse(result);
> +}
> +
> +DEFUN ("html-parse-buffer", Fhtml_parse_buffer, Shtml_parse_buffer,
> +       0, 1, 0,
> +       doc: /* Parse the buffer as an HTML document and return the parse tree.*/)
> +  (Lisp_Object object)
> +{
> +  xmlDoc *doc;
> +  struct buffer *buffer;
> +  xmlNode *node;
> +  unsigned char *string, *s;
> +  Lisp_Object result;
> +  int ibeg, iend;
> +
> +  LIBXML_TEST_VERSION
> +	
> +  if (NILP (object))
> +    buffer = current_buffer;
> +  else {
> +    CHECK_BUFFER (object);
> +    buffer = XBUFFER (object);
> +  }
> +
> +  ibeg = CHAR_TO_BYTE (XFASTINT (Fpoint_min ()));
> +  iend = CHAR_TO_BYTE (XFASTINT (Fpoint_max ()));
> +  move_gap_both (XFASTINT (Fpoint_min ()), ibeg);
> +  
> +  string = (unsigned char *) xmalloc (iend - ibeg + 1);
> +  s = string;
> +  
> +  while (ibeg < iend) {
> +    *s++ = *(BYTE_POS_ADDR (ibeg));
> +    ibeg++;
> +  }
> +  *s = 0;
> +  
> +  doc = htmlReadMemory (string, strlen(string), "", "utf-8", 0);
> +
> +  if (doc == NULL)
> +    return Qnil;
> +
> +  node = xmlDocGetRootElement (doc);
> +  result = make_dom (node);
> +  
> +  xmlFreeDoc(doc);
> +  xmlCleanupParser();
> +      
> +  return result;
> +}
> +
> +\f
> +/***********************************************************************
> +			    Initialization
> + ***********************************************************************/
> +void
> +syms_of_xml (void)
> +{
> +  defsubr (&Shtml_parse_buffer);
> +}
> +
> +#endif /* HAVE_LIBXML2 */
>
>
>
> This compiles and works for me, but I'm not really an Emacs internals
> expert.  Ahem.
>
> Or an autoconf one, for that matter.  ./configure finds the stuff it's
> looking for, but I get this warning:
>
> -------
> [larsi@quimbies ~/src/emacs/trunk]$ ./configure  | grep xml
> checking libxml/xmlversion.h usability... yes
> checking libxml/xmlversion.h presence... no
> configure: WARNING: libxml/xmlversion.h: accepted by the compiler, rejected by the preprocessor!
> configure: WARNING: libxml/xmlversion.h: proceeding with the compiler's result
> checking for libxml/xmlversion.h... yes
> checking for htmlReadMemory in -lxml2... yes
> -------
>
> I'm not sure what that means...

-- 
Joakim Verona



  reply	other threads:[~2010-09-08 18:17 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-06 15:21 Linking Emacs with libxml2 Lars Magne Ingebrigtsen
2010-09-06 15:54 ` Wojciech Meyer
2010-09-06 18:26 ` Chad Brown
2010-09-06 21:01   ` Lars Magne Ingebrigtsen
2010-09-06 18:44 ` Lennart Borgman
2010-09-06 18:56   ` Chad Brown
2010-09-06 19:08     ` Chong Yidong
2010-09-06 19:17     ` joakim
2010-09-07  0:36       ` Jason Rumney
2010-09-07  0:58         ` Lars Magne Ingebrigtsen
2010-09-08 14:10           ` Lars Magne Ingebrigtsen
2010-09-08 14:25             ` Andreas Schwab
2010-09-08 14:40             ` Stefan Monnier
2010-09-08 15:16               ` Lars Magne Ingebrigtsen
2010-09-08 16:15                 ` Lars Magne Ingebrigtsen
2010-09-08 18:17                   ` joakim [this message]
2010-09-08 18:19                     ` Lars Magne Ingebrigtsen
2010-09-08 19:10                   ` Andreas Schwab
2010-09-08 20:11                     ` Lars Magne Ingebrigtsen
2010-09-08 20:30                       ` Lars Magne Ingebrigtsen
2010-09-08 20:58                         ` Lars Magne Ingebrigtsen
2010-09-08 21:51                           ` Andreas Schwab
2010-09-08 21:54                             ` Lars Magne Ingebrigtsen
2010-09-09 17:00                             ` Stefan Monnier
2010-09-09 21:56                               ` Lars Magne Ingebrigtsen
2010-09-09 22:28                                 ` Stefan Monnier
2010-09-09 22:37                                   ` Lars Magne Ingebrigtsen
2010-09-10  8:14                                     ` Andreas Schwab
2010-09-10 10:46                                       ` Stefan Monnier
2010-09-10 10:56                                         ` Lars Magne Ingebrigtsen
2010-09-10 12:37                                           ` Lars Magne Ingebrigtsen
2010-09-10 16:47                                             ` Lars Magne Ingebrigtsen
2010-09-10 16:54                                               ` Lars Magne Ingebrigtsen
2010-09-10 17:05                                                 ` Ted Zlatanov
2010-09-10 17:14                                                   ` Lars Magne Ingebrigtsen
2010-09-10 17:34                                                 ` Glenn Morris
2010-09-10 17:41                                                   ` Glenn Morris
2010-09-10 17:44                                                     ` Lars Magne Ingebrigtsen
2010-09-10 18:39                                                       ` Ted Zlatanov
2010-09-12 16:56                                                       ` Andreas Schwab
2010-09-12 17:05                                                         ` Lars Magne Ingebrigtsen
2010-09-10 21:12                                               ` Chad Brown
2010-09-10 21:40                                                 ` Lars Magne Ingebrigtsen
2010-09-10 22:45                                                   ` chad
2010-09-10 23:19                                                     ` Lars Magne Ingebrigtsen
2010-09-11  7:18                                                       ` Andreas Schwab
2010-09-11 12:48                                                         ` Lars Magne Ingebrigtsen
2010-09-13 18:37                                                 ` Leo
2010-09-13 18:49                                                   ` Lars Magne Ingebrigtsen
2010-09-13 19:08                                                     ` Leo
2010-09-13 19:16                                                   ` Chad Brown
2010-09-13 19:23                                                     ` Chad Brown
2010-09-13 22:24                                                     ` Leo
2010-09-13 16:06                                               ` Christian Faulhammer
2010-09-10 11:37                                         ` Andreas Schwab
2010-09-10 14:12                                         ` Andrew W. Nosenko
2010-09-09  8:35                           ` Christian Faulhammer
2010-09-09 10:33                             ` Lars Magne Ingebrigtsen
2010-09-09 11:07                               ` Christian Faulhammer
2010-09-09 11:09                                 ` Lars Magne Ingebrigtsen
2010-09-06 19:19 ` Chong Yidong
2010-09-06 21:03   ` Lars Magne Ingebrigtsen
2010-09-15  0:55   ` Eric M. Ludlam
2010-09-15 15:52     ` Ted Zlatanov
2010-09-06 21:08 ` Stefan Monnier
2010-09-06 21:17   ` Lars Magne Ingebrigtsen
2010-09-06 21:30     ` joakim
2010-09-07  1:40     ` Chad Brown
2010-09-07  1:47       ` Lars Magne Ingebrigtsen
2010-09-06 21:18   ` Lennart Borgman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m3mxrs85he.fsf@verona.se \
    --to=joakim@verona.se \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.