From mboxrd@z Thu Jan 1 00:00:00 1970 From: Eric Bavier Subject: [PATCH 3/4] gnu: Add Swish-e. Date: Tue, 23 Aug 2016 01:15:11 -0500 Message-ID: <20160823061512.13024-3-ericbavier@openmailbox.org> References: <20160823061512.13024-1-ericbavier@openmailbox.org> Return-path: Received: from eggs.gnu.org ([2001:4830:134:3::10]:39299) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bc4zx-0006ys-Ed for guix-devel@gnu.org; Tue, 23 Aug 2016 02:15:49 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bc4zu-0005vv-It for guix-devel@gnu.org; Tue, 23 Aug 2016 02:15:44 -0400 Received: from mail2.openmailbox.org ([62.4.1.33]:34153) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bc4zu-0005ve-8w for guix-devel@gnu.org; Tue, 23 Aug 2016 02:15:42 -0400 In-Reply-To: <20160823061512.13024-1-ericbavier@openmailbox.org> List-Id: "Development of GNU Guix and the GNU System distribution." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: guix-devel-bounces+gcggd-guix-devel=m.gmane.org@gnu.org Sender: "Guix-devel" To: guix-devel@gnu.org From: Eric Bavier * gnu/packages/search.scm (swish-e): New variable. * gnu/packages/patches/swish-e-search.patch: New patch. * gnu/local.mk (dist_patch_DATA): Add it. --- gnu/local.mk | 2 + gnu/packages/patches/swish-e-search.patch | 43 ++++++++++++++++++++ gnu/packages/search.scm | 67 ++++++++++++++++++++++++++++++- 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 gnu/packages/patches/swish-e-search.patch diff --git a/gnu/local.mk b/gnu/local.mk index 02a7cc4..59f22d4 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -779,6 +779,8 @@ dist_patch_DATA = \ %D%/packages/patches/soprano-find-clucene.patch \ %D%/packages/patches/steghide-fixes.patch \ %D%/packages/patches/superlu-dist-scotchmetis.patch \ + %D%/packages/patches/swish-e-search.patch \ + %D%/packages/patches/swish-e-format-security.patch \ %D%/packages/patches/synfig-build-fix.patch \ %D%/packages/patches/t1lib-CVE-2010-2642.patch \ %D%/packages/patches/t1lib-CVE-2011-0764.patch \ diff --git a/gnu/packages/patches/swish-e-search.patch b/gnu/packages/patches/swish-e-search.patch new file mode 100644 index 0000000..2a57a31 --- /dev/null +++ b/gnu/packages/patches/swish-e-search.patch @@ -0,0 +1,43 @@ +From http://swish-e.org/archive/2015-09/13295.html + +--- a/src/compress.c ++++ a/src/compress.c +@@ -995,7 +995,7 @@ void remove_worddata_longs(unsigned char *worddata,int *sz_worddata) + progerr("Internal error in remove_worddata_longs"); + + /* dst may be smaller than src. So move the data */ +- memcpy(dst,src,data_len); ++ memmove(dst,src,data_len); + + /* Increase pointers */ + src += data_len; +--- a/src/headers.c ++++ a/src/headers.c +@@ -280,7 +280,7 @@ static SWISH_HEADER_VALUE fetch_single_header( IndexFILE *indexf, HEADER_MAP *he + + case SWISH_NUMBER: + case SWISH_BOOL: +- value.number = *(unsigned long *) data_pointer; ++ value.number = *(unsigned int *) data_pointer; + + /* $$$ Ugly hack alert! */ + /* correct for removed files */ +--- a/src/swishspider ++++ a/src/swishspider +@@ -27,6 +27,7 @@ use LWP::UserAgent; + use HTTP::Status; + use HTML::Parser 3.00; + use HTML::LinkExtor; ++use Encode; + + if (scalar(@ARGV) != 2) { + print STDERR "Usage: $0 localpath url\n"; +@@ -94,7 +95,7 @@ use HTML::LinkExtor; + # Don't allow links above the base + $URI::ABS_REMOTE_LEADING_DOTS = 1; + +- $p->parse( $$content_ref ); ++ $p->parse( decode_utf8 $$content_ref ); + close( LINKS ); + + exit; diff --git a/gnu/packages/search.scm b/gnu/packages/search.scm index 9a7bc76..60f902f 100644 --- a/gnu/packages/search.scm +++ b/gnu/packages/search.scm @@ -23,10 +23,14 @@ #:use-module (guix packages) #:use-module (guix download) #:use-module (guix build-system gnu) + #:use-module (gnu packages) #:use-module (gnu packages compression) #:use-module (gnu packages check) #:use-module (gnu packages databases) - #:use-module (gnu packages linux)) + #:use-module (gnu packages linux) + #:use-module (gnu packages perl) + #:use-module (gnu packages web) + #:use-module (gnu packages xml)) (define-public xapian (package @@ -171,4 +175,65 @@ with slocate, and attempts to be compatible to GNU locate when it does not conflict with slocate compatibility.") (license gpl2))) +(define-public swish-e + (package + (name "swish-e") + (version "2.4.7") + (source (origin + (method url-fetch) + (uri (list (string-append "http://swish-e.org/distribution/" + "swish-e-" version ".tar.gz") + ;; The upstream swish-e.org appears to be down... so + ;; use debian's copy as a fallback. + (string-append "http://http.debian.net/debian/pool/" + "main/s/swish-e/swish-e_" version + ".orig.tar.gz"))) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0qkrk7z25yp9hynj21vxkyn7yi8gcagcfxnass5cgczcz0gm9pax")) + (patches (search-patches "swish-e-search.patch" + "swish-e-format-security.patch")))) + (build-system gnu-build-system) + ;; Several other packages and perl modules may be installed alongside + ;; swish-e to extend its features at runtime, but are not required for + ;; building: xpdf, catdoc, MP3::Tag, Spreadsheet::ParseExcel, + ;; HTML::Entities. + (inputs + `(("libxml" ,libxml2) + ("zlib" ,zlib) + ("perl" ,perl) + ("perl-uri" ,perl-uri) + ("perl-html-parser" ,perl-html-parser) + ("perl-html-tagset" ,perl-html-tagset) + ("perl-mime-types" ,perl-mime-types))) + (arguments + `(#:phases (modify-phases %standard-phases + (add-after 'install 'wrap-programs + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out"))) + (for-each + (lambda (program) + (wrap-program program + `("PERL5LIB" ":" prefix + ,(map (lambda (i) + (string-append (assoc-ref inputs i) + "/lib/perl5/site_perl")) + ;; These perl modules have no propagated + ;; inputs, so no further analysis needed. + '("perl-uri" + "perl-html-parser" + "perl-html-tagset" + "perl-mime-types"))))) + (list (string-append out "/lib/swish-e/swishspider") + (string-append out "/bin/swish-filter-test"))) + #t)))))) + (home-page "http://swish-e.org") + (synopsis "Web indexing system") + (description + "Swish-e is Simple Web Indexing System for Humans - Enhanced. Swish-e +can quickly and easily index directories of files or remote web sites and +search the generated indexes.") + (license gpl2+))) ;with exception + ;;; search.scm ends here -- 2.9.2