all messages for Guix-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Arun Isaac <arunisaac@systemreboot.net>
To: zimoun <zimon.toutoune@gmail.com>
Cc: 39258@debbugs.gnu.org
Subject: [bug#39258] Faster guix search using an sqlite cache
Date: Thu, 06 Feb 2020 07:28:22 +0530	[thread overview]
Message-ID: <cu7v9oka341.fsf@systemreboot.net> (raw)
In-Reply-To: <CAJ3okZ37rFh=SB-f6nqc0kMpCai=pw2D0-GFiakXY1p4FHHftA@mail.gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 1213 bytes --]


>> Thank you, this was useful. I was able to catch and report the error. I
>
> Where have you reported the error?

I reported the error to the derivation log. For example, if the
derivation for the guix-package-cache derivation is
/gnu/store/cyf2h3frcjxm147dii5qic8d6kpm39nq-guix-package-cache.drv, the
log file will be at
/var/log/guix/drvs/cy/f2h3frcjxm147dii5qic8d6kpm39nq-guix-package-cache.drv.bz2. Notice
that the directory name under drvs is the first two letters of the hash,
and the file name under that directory is the remaining letters.

Also please find attached a dump of my code so far.

>> This could be a permission error, or something to do with the existence
>> or lack thereof of certain directories (such as /var) in the chroot of
>> the build daemon. I'm still figuring it out.
>
> Hum? And this should explain why it is working with the REPL and not
> with the CLI, right?

This could expalin it, but I am not sure if this is the correct
explanation.

>> I'm also in half a mind to get some guile xapian bindings ready so we
>> can just do that instead of messing with sqlite here. But, let's
>> see. :-P
>
> Cool!
> Let me know if you push something somewhere.

Sure, will let you know.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: 0001-fast-search.patch --]
[-- Type: text/x-patch, Size: 14366 bytes --]

From 4c883fcff1f44339b28df6ccdb2b10c906439e3d Mon Sep 17 00:00:00 2001
From: Arun Isaac <arunisaac@systemreboot.net>
Date: Tue, 21 Jan 2020 20:45:43 +0530
Subject: [PATCH] fast search

---
 build-aux/build-self.scm |   5 +
 gnu/packages.scm         | 234 +++++++++++++++++++++++++--------------
 2 files changed, 155 insertions(+), 84 deletions(-)

diff --git a/build-aux/build-self.scm b/build-aux/build-self.scm
index fc13032b73..c123ad3b11 100644
--- a/build-aux/build-self.scm
+++ b/build-aux/build-self.scm
@@ -264,6 +264,9 @@ interface (FFI) of Guile.")
   (define fake-git
     (scheme-file "git.scm" #~(define-module (git))))
 
+  (define fake-sqlite3
+    (scheme-file "sqlite3.scm" #~(define-module (sqlite3))))
+
   (with-imported-modules `(((guix config)
                             => ,(make-config.scm))
 
@@ -278,6 +281,8 @@ interface (FFI) of Guile.")
                            ;; (git) to placate it.
                            ((git) => ,fake-git)
 
+                           ((sqlite3) => ,fake-sqlite3)
+
                            ,@(source-module-closure `((guix store)
                                                       (guix self)
                                                       (guix derivations)
diff --git a/gnu/packages.scm b/gnu/packages.scm
index d22c992bb1..0ae5b84284 100644
--- a/gnu/packages.scm
+++ b/gnu/packages.scm
@@ -43,6 +43,7 @@
   #:use-module (srfi srfi-34)
   #:use-module (srfi srfi-35)
   #:use-module (srfi srfi-39)
+  #:use-module (sqlite3)
   #:export (search-patch
             search-patches
             search-auxiliary-file
@@ -204,10 +205,8 @@ PROC is called along these lines:
 PROC can use #:allow-other-keys to ignore the bits it's not interested in.
 When a package cache is available, this procedure does not actually load any
 package module."
-  (define cache
-    (load-package-cache (current-profile)))
-
-  (if (and cache (cache-is-authoritative?))
+  (if (and (cache-is-authoritative?)
+           (current-profile))
       (vhash-fold (lambda (name vector result)
                     (match vector
                       (#(name version module symbol outputs
@@ -220,7 +219,7 @@ package module."
                              #:supported? supported?
                              #:deprecated? deprecated?))))
                   init
-                  cache)
+                  (cache-lookup (current-profile)))
       (fold-packages (lambda (package result)
                        (proc (package-name package)
                              (package-version package)
@@ -252,31 +251,7 @@ is guaranteed to never traverse the same package twice."
 
 (define %package-cache-file
   ;; Location of the package cache.
-  "/lib/guix/package.cache")
-
-(define load-package-cache
-  (mlambda (profile)
-    "Attempt to load the package cache.  On success return a vhash keyed by
-package names.  Return #f on failure."
-    (match profile
-      (#f #f)
-      (profile
-       (catch 'system-error
-         (lambda ()
-           (define lst
-             (load-compiled (string-append profile %package-cache-file)))
-           (fold (lambda (item vhash)
-                   (match item
-                     (#(name version module symbol outputs
-                             supported? deprecated?
-                             file line column)
-                      (vhash-cons name item vhash))))
-                 vlist-null
-                 lst))
-         (lambda args
-           (if (= ENOENT (system-error-errno args))
-               #f
-               (apply throw args))))))))
+  "/lib/guix/package-cache.sqlite")
 
 (define find-packages-by-name/direct              ;bypass the cache
   (let ((packages (delay
@@ -297,25 +272,57 @@ decreasing version order."
                     matching)
             matching)))))
 
-(define (cache-lookup cache name)
+(define* (cache-lookup profile #:optional name)
   "Lookup package NAME in CACHE.  Return a list sorted in increasing version
 order."
   (define (package-version<? v1 v2)
     (version>? (vector-ref v2 1) (vector-ref v1 1)))
 
-  (sort (vhash-fold* cons '() name cache)
-        package-version<?))
+  (define (int->boolean n)
+    (case n
+      ((0) #f)
+      ((1) #t)))
+
+  (define (string->list str)
+    (call-with-input-string str read))
+
+  (define select-statement
+    (string-append
+     "SELECT name, version, module, symbol, outputs, supported, superseded, locationFile, locationLine, locationColumn from packages"
+     (if name " WHERE name = :name" "")))
+
+  (define cache-file
+    (string-append profile %package-cache-file))
+
+  (let* ((db (sqlite-open cache-file SQLITE_OPEN_READONLY))
+         (statement (sqlite-prepare db select-statement)))
+    (when name
+      (sqlite-bind-arguments statement #:name name))
+    (let ((result (sqlite-fold (lambda (v result)
+                                 (match v
+                                   (#(name version module symbol outputs supported superseded file line column)
+                                    (cons
+                                     (vector name
+                                             version
+                                             (string->list module)
+                                             (string->symbol symbol)
+                                             (string->list outputs)
+                                             (int->boolean supported)
+                                             (int->boolean superseded)
+                                             (list file line column))
+                                     result))))
+                               '() statement)))
+      (sqlite-finalize statement)
+      (sqlite-close db)
+      (sort result package-version<?))))
 
 (define* (find-packages-by-name name #:optional version)
   "Return the list of packages with the given NAME.  If VERSION is not #f,
 then only return packages whose version is prefixed by VERSION, sorted in
 decreasing version order."
-  (define cache
-    (load-package-cache (current-profile)))
-
-  (if (and (cache-is-authoritative?) cache)
-      (match (cache-lookup cache name)
-        (#f #f)
+  (if (and (cache-is-authoritative?)
+           (current-profile))
+      (match (cache-lookup (current-profile) name)
         ((#(_ versions modules symbols _ _ _ _ _ _) ...)
          (fold (lambda (version* module symbol result)
                  (if (or (not version)
@@ -331,12 +338,9 @@ decreasing version order."
 (define* (find-package-locations name #:optional version)
   "Return a list of version/location pairs corresponding to each package
 matching NAME and VERSION."
-  (define cache
-    (load-package-cache (current-profile)))
-
-  (if (and cache (cache-is-authoritative?))
-      (match (cache-lookup cache name)
-        (#f '())
+  (if (and (cache-is-authoritative?)
+           (current-profile))
+      (match (cache-lookup (current-profile) name)
         ((#(name versions modules symbols outputs
                  supported? deprecated?
                  files lines columns) ...)
@@ -372,6 +376,33 @@ VERSION."
 ;; Prevent Guile 3 from inlining this procedure so we can mock it in tests.
 (set! find-best-packages-by-name find-best-packages-by-name)
 
+;; (generate-package-cache "/tmp/test")
+
+;; XXX: missing in guile-sqlite3@0.1.0
+(define SQLITE_BUSY 5)
+
+(define (call-with-transaction db proc)
+  "Start a transaction with DB (make as many attempts as necessary) and run
+PROC.  If PROC exits abnormally, abort the transaction, otherwise commit the
+transaction after it finishes."
+  (catch 'sqlite-error
+    (lambda ()
+      ;; We use begin immediate here so that if we need to retry, we
+      ;; figure that out immediately rather than because some SQLITE_BUSY
+      ;; exception gets thrown partway through PROC - in which case the
+      ;; part already executed (which may contain side-effects!) would be
+      ;; executed again for every retry.
+      (sqlite-exec db "begin immediate;")
+      (let ((result (proc)))
+        (sqlite-exec db "commit;")
+        result))
+    (lambda (key who error description)
+      (if (= error SQLITE_BUSY)
+          (call-with-transaction db proc)
+          (begin
+            (sqlite-exec db "rollback;")
+            (throw 'sqlite-error who error description))))))
+
 (define (generate-package-cache directory)
   "Generate under DIRECTORY a cache of all the available packages.
 
@@ -381,49 +412,84 @@ reducing the memory footprint."
   (define cache-file
     (string-append directory %package-cache-file))
 
-  (define (expand-cache module symbol variable result+seen)
+  (define schema
+    "CREATE TABLE packages (name text,
+version text,
+module text,
+symbol text,
+outputs text,
+supported int,
+superseded int,
+locationFile text,
+locationLine int,
+locationColumn int);
+CREATE VIRTUAL TABLE packageSearch USING fts5(name, searchText);")
+
+  (define insert-statement
+    "INSERT INTO packages(name, version, module, symbol, outputs, supported, superseded, locationFile, locationLine, locationColumn)
+VALUES(:name, :version, :module, :symbol, :outputs, :supported, :superseded, :locationfile, :locationline, :locationcolumn)")
+
+  (define insert-package-search-statement
+    "INSERT INTO packageSearch(name, searchText) VALUES(:name, :searchtext)")
+
+  (define (boolean->int x)
+    (if x 1 0))
+
+  (define (list->string x)
+    (call-with-output-string (cut write x <>)))
+
+  (define (insert-package db module symbol variable seen)
     (match (false-if-exception (variable-ref variable))
       ((? package? package)
-       (match result+seen
-         ((result . seen)
-          (if (or (vhash-assq package seen)
-                  (hidden-package? package))
-              (cons result seen)
-              (cons (cons `#(,(package-name package)
-                             ,(package-version package)
-                             ,(module-name module)
-                             ,symbol
-                             ,(package-outputs package)
-                             ,(->bool (supported-package? package))
-                             ,(->bool (package-superseded package))
-                             ,@(let ((loc (package-location package)))
-                                 (if loc
-                                     `(,(location-file loc)
-                                       ,(location-line loc)
-                                       ,(location-column loc))
-                                     '(#f #f #f))))
-                          result)
-                    (vhash-consq package #t seen))))))
-      (_
-       result+seen)))
-
-  (define exp
-    (first
-     (fold-module-public-variables* expand-cache
-                                    (cons '() vlist-null)
-                                    (all-modules (%package-module-path)
-                                                 #:warn
-                                                 warn-about-load-error))))
+       (cond
+        ((or (vhash-assq package seen)
+             (hidden-package? package))
+         seen)
+        (else
+         (let ((statement (sqlite-prepare db insert-statement)))
+           (sqlite-bind-arguments statement
+                                  #:name (package-name package)
+                                  #:version (package-version package)
+                                  #:module (list->string (module-name module))
+                                  #:symbol (symbol->string symbol)
+                                  #:outputs (list->string (package-outputs package))
+                                  #:supported (boolean->int (supported-package? package))
+                                  #:superseded (boolean->int (package-superseded package))
+                                  #:locationfile (cond
+                                                  ((package-location package) => location-file)
+                                                  (else #f))
+                                  #:locationline (cond
+                                                  ((package-location package) => location-line)
+                                                  (else #f))
+                                  #:locationcolumn (cond
+                                                    ((package-location package) => location-column)
+                                                    (else #f)))
+           (sqlite-fold cons '() statement)
+           (sqlite-finalize statement))
+         (let ((statement (sqlite-prepare db insert-package-search-statement)))
+           (sqlite-bind-arguments statement
+                                  #:name (package-name package)
+                                  #:searchtext (package-description package))
+           (sqlite-fold cons '() statement)
+           (sqlite-finalize statement))
+         (vhash-consq package #t seen))))
+      (_ seen)))
 
   (mkdir-p (dirname cache-file))
-  (call-with-output-file cache-file
-    (lambda (port)
-      ;; Store the cache as a '.go' file.  This makes loading fast and reduces
-      ;; heap usage since some of the static data is directly mmapped.
-      (put-bytevector port
-                      (compile `'(,@exp)
-                               #:to 'bytecode
-                               #:opts '(#:to-file? #t)))))
+  (let ((tmp (string-append (dirname cache-file) "/tmp")))
+    (mkdir-p tmp)
+    (setenv "SQLITE_TMPDIR" tmp))
+  (let ((db (sqlite-open cache-file)))
+    (sqlite-exec db schema)
+    (call-with-transaction db
+        (lambda ()
+          (fold-module-public-variables* (cut insert-package db <> <> <> <>)
+                                         vlist-null
+                                         (all-modules (%package-module-path)
+                                                      #:warn
+                                                      warn-about-load-error))))
+    (sqlite-close db))
+
   cache-file)
 
 \f
-- 
2.23.0


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 487 bytes --]

  reply	other threads:[~2020-02-06  1:59 UTC|newest]

Thread overview: 126+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-23 19:51 [bug#39258] Faster guix search using an sqlite cache Arun Isaac
2020-01-29 23:33 ` zimoun
2020-01-30 13:48   ` Arun Isaac
2020-01-31 12:48     ` zimoun
2020-02-02 21:16       ` Arun Isaac
2020-02-04 10:19         ` zimoun
2020-02-06  1:58           ` Arun Isaac [this message]
2020-02-11 16:29             ` Ludovic Courtès
2020-02-11 18:21               ` zimoun
2020-02-11 18:39                 ` Ludovic Courtès
2020-02-11 19:07                   ` Arun Isaac
2020-02-11 20:20                     ` zimoun
2020-02-15 14:50                     ` Arun Isaac
2020-02-11 20:13                   ` zimoun
2020-02-27 20:41 ` [bug#39258] [PATCH 0/4] Xapian for Guix package search Arun Isaac
2020-02-27 20:41   ` [bug#39258] [PATCH 1/4] gnu: Add guile-xapian Arun Isaac
2020-03-03 16:29     ` zimoun
2020-02-27 20:41   ` [bug#39258] [PATCH 2/4] build-self: Add guile-xapian to Guix dependencies Arun Isaac
2020-02-27 20:41   ` [bug#39258] [PATCH 3/4] gnu: Generate xapian package search index Arun Isaac
2020-02-28  8:04     ` Pierre Neidhardt
2020-03-05 20:26       ` Arun Isaac
2020-03-03 18:29     ` zimoun
2020-02-27 20:41   ` [bug#39258] [PATCH 4/4] gnu: Use xapian index for package search Arun Isaac
2020-02-28  8:11     ` Pierre Neidhardt
2020-03-03 19:21     ` zimoun
2020-03-03 19:51       ` zimoun
2020-02-28  8:13   ` [bug#39258] [PATCH 0/4] Xapian for Guix " Pierre Neidhardt
2020-02-28 12:39     ` zimoun
2020-02-28 12:49       ` Pierre Neidhardt
2020-02-28 15:36     ` Arun Isaac
2020-02-28 16:04       ` Arun Isaac
2020-03-02 18:37         ` zimoun
2020-03-02 19:13           ` zimoun
2020-03-03 20:04             ` zimoun
2020-02-29  8:25       ` Arun Isaac
2020-03-02 18:27         ` zimoun
2020-02-28 12:36   ` zimoun
2020-03-05 16:46   ` Ludovic Courtès
2020-03-07 13:31 ` [bug#39258] [PATCH v2 0/3] " Arun Isaac
2020-03-07 13:31   ` [bug#39258] [PATCH v2 1/3] build-self: Add guile-xapian to Guix dependencies Arun Isaac
2020-03-09 18:14     ` zimoun
2020-03-09 23:40     ` Jonathan Brielmaier
2020-03-10  5:24       ` Arun Isaac
2020-03-07 13:31   ` [bug#39258] [PATCH v2 2/3] gnu: Generate Xapian package search index Arun Isaac
2020-03-09 18:19     ` zimoun
2020-03-07 13:31   ` [bug#39258] [PATCH v2 3/3] gnu: Use Xapian index for package search Arun Isaac
2020-03-07 20:33   ` [bug#39258] [PATCH v2 0/3] Xapian for Guix " Ludovic Courtès
2020-03-08  9:01     ` Arun Isaac
2020-03-08 11:33       ` Ludovic Courtès
2020-03-08 20:27         ` Arun Isaac
2020-03-09  7:42           ` Pierre Neidhardt
2020-03-09 12:50             ` zimoun
2020-03-09 10:35           ` Ludovic Courtès
2020-03-10 14:17             ` Arun Isaac
2020-03-10 14:33               ` zimoun
2020-03-11 13:50               ` Ludovic Courtès
2020-03-13  5:37                 ` Arun Isaac
2020-03-15 20:40                   ` Ludovic Courtès
2020-03-09  7:50         ` Pierre Neidhardt
2020-03-09 10:28           ` Ludovic Courtès
2020-03-09 13:03             ` zimoun
2020-03-09 12:53           ` zimoun
2020-03-09 12:47         ` zimoun
2020-03-09 12:40       ` zimoun
2020-03-09 12:34     ` zimoun
2020-03-08 20:27   ` zimoun
2020-03-08 20:40     ` Arun Isaac
2020-03-09 12:28   ` zimoun
2020-03-27 16:26 ` [bug#39258] [PATCH v3 0/3] Package metadata cache for guix search Arun Isaac
2020-03-27 16:26   ` [bug#39258] [PATCH v3 1/3] guix: Generate package metadata cache Arun Isaac
2020-04-24 20:48     ` Ludovic Courtès
2020-04-26  9:48       ` zimoun
2020-04-26 14:35         ` Ludovic Courtès
2020-04-26 14:54           ` Pierre Neidhardt
2020-04-26 15:33             ` Ludovic Courtès
2020-04-26 15:05           ` zimoun
2020-03-27 16:26   ` [bug#39258] [PATCH v3 2/3] guix: Search " Arun Isaac
2020-04-24 20:58     ` Ludovic Courtès
2020-03-27 16:26   ` [bug#39258] [PATCH v3 3/3] guix: Use package metadata cache for package search Arun Isaac
2020-04-24 21:03     ` Ludovic Courtès
2020-04-05 14:08   ` [bug#39258] [PATCH v3 0/3] Package metadata cache for guix search Ludovic Courtès
2020-04-24 21:05   ` Ludovic Courtès
2020-04-26  3:54 ` [bug#39258] benchmark search: default vs v2 vs v3 zimoun
2020-04-26  7:29   ` Pierre Neidhardt
2020-04-26 15:49   ` Ludovic Courtès
2020-04-26 17:01     ` zimoun
2020-04-26 20:22       ` Ludovic Courtès
2020-04-30 13:10     ` zimoun
2020-05-03 15:01 ` [bug#39258] [PATCH v4 0/3] Faster cache generation (similar as v3) zimoun
2020-05-03 15:01   ` [bug#39258] [PATCH v4 1/3] DRAFT packages: Add fields to packages cache zimoun
2020-05-03 15:01   ` [bug#39258] [PATCH v4 2/3] DRAFT packages: Add new procedure 'fold-packages*' zimoun
2020-05-03 15:01   ` [bug#39258] [PATCH v4 3/3] DRAFT guix package: Use cache in 'find-packages-by-description' zimoun
2020-05-03 16:43   ` [bug#39258] [PATCH v4 0/3] Faster cache generation (similar as v3) Ludovic Courtès
2020-05-03 18:10     ` zimoun
2020-05-03 19:49       ` Ludovic Courtès
2020-06-01  0:00 ` [bug#39258] [PATCH 0/4] Optimize guix search Arun Isaac
2020-06-01  0:00   ` [bug#39258] [PATCH 1/4] ui: Cut off search early if any regexp does not match Arun Isaac
2020-06-09  8:29     ` Ludovic Courtès
2020-06-01  0:00   ` [bug#39258] [PATCH 2/4] ui: Use string matching with literal search strings Arun Isaac
2020-06-09  8:33     ` Ludovic Courtès
2020-06-09  9:55       ` zimoun
2020-06-13 12:37       ` Arun Isaac
2020-06-13 13:36         ` zimoun
2020-06-13 17:21           ` Arun Isaac
2020-06-14 19:14             ` zimoun
2020-06-13 19:32         ` Ludovic Courtès
2020-06-15 20:18           ` Arun Isaac
2020-06-01  0:00   ` [bug#39258] [PATCH 3/4] ui: Do not translate package synopsis a second time Arun Isaac
2020-06-09  8:33     ` Ludovic Courtès
2020-06-01  0:00   ` [bug#39258] [PATCH 4/4] ui: Use package-description-string Arun Isaac
2020-06-09  8:34     ` Ludovic Courtès
2020-06-01  1:25   ` [bug#39258] [PATCH v5 0/4] Optimize guix search zimoun
2020-06-01  2:24     ` Arun Isaac
2020-06-01 10:01     ` zimoun
2020-06-01 10:11 ` [bug#39258] KMP string search algorithm? zimoun
2020-06-01 22:24   ` Leo Famulari
2020-06-01 23:48     ` Arun Isaac
2020-06-02  8:49       ` Ludovic Courtès
2021-07-15  7:33 ` [bug#39258] [PATCH v6 0/2] DRAFT "guix search" performances zimoun
2021-07-15  7:33   ` [bug#39258] [PATCH v6 1/2] DRAFT packages: Add fields to packages cache zimoun
2021-07-17  8:31     ` Arun Isaac
2021-07-23 15:30       ` Ludovic Courtès
2021-08-17 14:03         ` zimoun
2021-07-15  7:33   ` [bug#39258] [PATCH v6 2/2] DRAFT scripts: package: Use cache in 'find-packages-by-description' zimoun
2021-07-23 15:43   ` [bug#39258] [PATCH v6 0/2] DRAFT "guix search" performances Ludovic Courtès
2021-08-20 15:42     ` zimoun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cu7v9oka341.fsf@systemreboot.net \
    --to=arunisaac@systemreboot.net \
    --cc=39258@debbugs.gnu.org \
    --cc=zimon.toutoune@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/guix.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.