From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:470:142:3::10]:55794) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1izWRe-0000Dl-As for guix-patches@gnu.org; Wed, 05 Feb 2020 20:59:08 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1izWRb-0001af-7T for guix-patches@gnu.org; Wed, 05 Feb 2020 20:59:06 -0500 Received: from debbugs.gnu.org ([209.51.188.43]:40993) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1izWRa-0001Yb-7k for guix-patches@gnu.org; Wed, 05 Feb 2020 20:59:03 -0500 Received: from Debian-debbugs by debbugs.gnu.org with local (Exim 4.84_2) (envelope-from ) id 1izWRa-0006kW-7k for guix-patches@gnu.org; Wed, 05 Feb 2020 20:59:02 -0500 Subject: [bug#39258] Faster guix search using an sqlite cache Resent-Message-ID: From: Arun Isaac In-Reply-To: References: Date: Thu, 06 Feb 2020 07:28:22 +0530 Message-ID: MIME-Version: 1.0 Content-Type: multipart/signed; boundary="==-=-="; micalg=pgp-sha256; protocol="application/pgp-signature" List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: guix-patches-bounces+kyle=kyleam.com@gnu.org Sender: "Guix-patches" To: zimoun Cc: 39258@debbugs.gnu.org --==-=-= Content-Type: multipart/mixed; boundary="=-=-=" --=-=-= Content-Type: text/plain >> Thank you, this was useful. I was able to catch and report the error. I > > Where have you reported the error? I reported the error to the derivation log. For example, if the derivation for the guix-package-cache derivation is /gnu/store/cyf2h3frcjxm147dii5qic8d6kpm39nq-guix-package-cache.drv, the log file will be at /var/log/guix/drvs/cy/f2h3frcjxm147dii5qic8d6kpm39nq-guix-package-cache.drv.bz2. Notice that the directory name under drvs is the first two letters of the hash, and the file name under that directory is the remaining letters. Also please find attached a dump of my code so far. >> This could be a permission error, or something to do with the existence >> or lack thereof of certain directories (such as /var) in the chroot of >> the build daemon. I'm still figuring it out. > > Hum? And this should explain why it is working with the REPL and not > with the CLI, right? This could expalin it, but I am not sure if this is the correct explanation. >> I'm also in half a mind to get some guile xapian bindings ready so we >> can just do that instead of messing with sqlite here. But, let's >> see. :-P > > Cool! > Let me know if you push something somewhere. Sure, will let you know. --=-=-= Content-Type: text/x-patch Content-Disposition: attachment; filename=0001-fast-search.patch Content-Transfer-Encoding: quoted-printable From=204c883fcff1f44339b28df6ccdb2b10c906439e3d Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Tue, 21 Jan 2020 20:45:43 +0530 Subject: [PATCH] fast search =2D-- build-aux/build-self.scm | 5 + gnu/packages.scm | 234 +++++++++++++++++++++++++-------------- 2 files changed, 155 insertions(+), 84 deletions(-) diff --git a/build-aux/build-self.scm b/build-aux/build-self.scm index fc13032b73..c123ad3b11 100644 =2D-- a/build-aux/build-self.scm +++ b/build-aux/build-self.scm @@ -264,6 +264,9 @@ interface (FFI) of Guile.") (define fake-git (scheme-file "git.scm" #~(define-module (git)))) =20 + (define fake-sqlite3 + (scheme-file "sqlite3.scm" #~(define-module (sqlite3)))) + (with-imported-modules `(((guix config) =3D> ,(make-config.scm)) =20 @@ -278,6 +281,8 @@ interface (FFI) of Guile.") ;; (git) to placate it. ((git) =3D> ,fake-git) =20 + ((sqlite3) =3D> ,fake-sqlite3) + ,@(source-module-closure `((guix store) (guix self) (guix derivations) diff --git a/gnu/packages.scm b/gnu/packages.scm index d22c992bb1..0ae5b84284 100644 =2D-- a/gnu/packages.scm +++ b/gnu/packages.scm @@ -43,6 +43,7 @@ #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) #:use-module (srfi srfi-39) + #:use-module (sqlite3) #:export (search-patch search-patches search-auxiliary-file @@ -204,10 +205,8 @@ PROC is called along these lines: PROC can use #:allow-other-keys to ignore the bits it's not interested in. When a package cache is available, this procedure does not actually load a= ny package module." =2D (define cache =2D (load-package-cache (current-profile))) =2D =2D (if (and cache (cache-is-authoritative?)) + (if (and (cache-is-authoritative?) + (current-profile)) (vhash-fold (lambda (name vector result) (match vector (#(name version module symbol outputs @@ -220,7 +219,7 @@ package module." #:supported? supported? #:deprecated? deprecated?)))) init =2D cache) + (cache-lookup (current-profile))) (fold-packages (lambda (package result) (proc (package-name package) (package-version package) @@ -252,31 +251,7 @@ is guaranteed to never traverse the same package twice= ." =20 (define %package-cache-file ;; Location of the package cache. =2D "/lib/guix/package.cache") =2D =2D(define load-package-cache =2D (mlambda (profile) =2D "Attempt to load the package cache. On success return a vhash keyed= by =2Dpackage names. Return #f on failure." =2D (match profile =2D (#f #f) =2D (profile =2D (catch 'system-error =2D (lambda () =2D (define lst =2D (load-compiled (string-append profile %package-cache-file))) =2D (fold (lambda (item vhash) =2D (match item =2D (#(name version module symbol outputs =2D supported? deprecated? =2D file line column) =2D (vhash-cons name item vhash)))) =2D vlist-null =2D lst)) =2D (lambda args =2D (if (=3D ENOENT (system-error-errno args)) =2D #f =2D (apply throw args)))))))) + "/lib/guix/package-cache.sqlite") =20 (define find-packages-by-name/direct ;bypass the cache (let ((packages (delay @@ -297,25 +272,57 @@ decreasing version order." matching) matching))))) =20 =2D(define (cache-lookup cache name) +(define* (cache-lookup profile #:optional name) "Lookup package NAME in CACHE. Return a list sorted in increasing versi= on order." (define (package-version? (vector-ref v2 1) (vector-ref v1 1))) =20 =2D (sort (vhash-fold* cons '() name cache) =2D package-versionboolean n) + (case n + ((0) #f) + ((1) #t))) + + (define (string->list str) + (call-with-input-string str read)) + + (define select-statement + (string-append + "SELECT name, version, module, symbol, outputs, supported, superseded= , locationFile, locationLine, locationColumn from packages" + (if name " WHERE name =3D :name" ""))) + + (define cache-file + (string-append profile %package-cache-file)) + + (let* ((db (sqlite-open cache-file SQLITE_OPEN_READONLY)) + (statement (sqlite-prepare db select-statement))) + (when name + (sqlite-bind-arguments statement #:name name)) + (let ((result (sqlite-fold (lambda (v result) + (match v + (#(name version module symbol outputs s= upported superseded file line column) + (cons + (vector name + version + (string->list module) + (string->symbol symbol) + (string->list outputs) + (int->boolean supported) + (int->boolean superseded) + (list file line column)) + result)))) + '() statement))) + (sqlite-finalize statement) + (sqlite-close db) + (sort result package-versionint x) + (if x 1 0)) + + (define (list->string x) + (call-with-output-string (cut write x <>))) + + (define (insert-package db module symbol variable seen) (match (false-if-exception (variable-ref variable)) ((? package? package) =2D (match result+seen =2D ((result . seen) =2D (if (or (vhash-assq package seen) =2D (hidden-package? package)) =2D (cons result seen) =2D (cons (cons `#(,(package-name package) =2D ,(package-version package) =2D ,(module-name module) =2D ,symbol =2D ,(package-outputs package) =2D ,(->bool (supported-package? package)) =2D ,(->bool (package-superseded package)) =2D ,@(let ((loc (package-location package))) =2D (if loc =2D `(,(location-file loc) =2D ,(location-line loc) =2D ,(location-column loc)) =2D '(#f #f #f)))) =2D result) =2D (vhash-consq package #t seen)))))) =2D (_ =2D result+seen))) =2D =2D (define exp =2D (first =2D (fold-module-public-variables* expand-cache =2D (cons '() vlist-null) =2D (all-modules (%package-module-path) =2D #:warn =2D warn-about-load-error))= )) + (cond + ((or (vhash-assq package seen) + (hidden-package? package)) + seen) + (else + (let ((statement (sqlite-prepare db insert-statement))) + (sqlite-bind-arguments statement + #:name (package-name package) + #:version (package-version package) + #:module (list->string (module-name modu= le)) + #:symbol (symbol->string symbol) + #:outputs (list->string (package-outputs= package)) + #:supported (boolean->int (supported-pac= kage? package)) + #:superseded (boolean->int (package-supe= rseded package)) + #:locationfile (cond + ((package-location packa= ge) =3D> location-file) + (else #f)) + #:locationline (cond + ((package-location packa= ge) =3D> location-line) + (else #f)) + #:locationcolumn (cond + ((package-location pac= kage) =3D> location-column) + (else #f))) + (sqlite-fold cons '() statement) + (sqlite-finalize statement)) + (let ((statement (sqlite-prepare db insert-package-search-stateme= nt))) + (sqlite-bind-arguments statement + #:name (package-name package) + #:searchtext (package-description packag= e)) + (sqlite-fold cons '() statement) + (sqlite-finalize statement)) + (vhash-consq package #t seen)))) + (_ seen))) =20 (mkdir-p (dirname cache-file)) =2D (call-with-output-file cache-file =2D (lambda (port) =2D ;; Store the cache as a '.go' file. This makes loading fast and r= educes =2D ;; heap usage since some of the static data is directly mmapped. =2D (put-bytevector port =2D (compile `'(,@exp) =2D #:to 'bytecode =2D #:opts '(#:to-file? #t))))) + (let ((tmp (string-append (dirname cache-file) "/tmp"))) + (mkdir-p tmp) + (setenv "SQLITE_TMPDIR" tmp)) + (let ((db (sqlite-open cache-file))) + (sqlite-exec db schema) + (call-with-transaction db + (lambda () + (fold-module-public-variables* (cut insert-package db <> <> <> <= >) + vlist-null + (all-modules (%package-module-pat= h) + #:warn + warn-about-load-erro= r)))) + (sqlite-close db)) + cache-file) =20 =2D-=20 2.23.0 --=-=-=-- --==-=-= Content-Type: application/pgp-signature; name="signature.asc" -----BEGIN PGP SIGNATURE----- iQEzBAEBCAAdFiEEf3MDQ/Lwnzx3v3nTLiXui2GAK7MFAl47cr4ACgkQLiXui2GA K7OLPwf9FgQ/N8AazOSXRwuXn8N+1QZ0b0G1bq2ZpQYppHpBl0e8SgmTyYXpE4za U2tBfJEMTUXW7YUZD/36DKNpN5JUD1c4Hnsn/BDqRQFFXt4agzTE3PhtOV2gFLjC wYH/i9xtLoRM+7g36/ZYAmTfAZoiFro4A3kq1ndy8nQCcf++v/v3tQvFVWn3ywKT Fig63/wm8hXz7kP0+fi6MVKlhKe05VDBXxZCNfM7tSjmEw2qFNmwALUncWUyN9Sz uF0aU+/JzPUF6kvXMZR9cQxBI5ISZ+gxsdg7rikmwso9jvyxRbEDFAck4Etv9j32 drcNfNH4ZSUaZY/NcxIpzu/yI9dKKA== =M4NZ -----END PGP SIGNATURE----- --==-=-=--