(use-modules (gnu packages) (guix packages) (ice-9 match) (ice-9 time) (srfi srfi-1) (srfi srfi-26)) ;;; Implementation of sets using hash tables (define make-set make-hash-table) (define set-element-of? hash-ref) (define set-add! (cut hash-set! <> <> #t)) (define set->list (cut hash-map->list (lambda (element _) element) <>)) (define (set-intersection set1 set2) "Return the intersection of SET1 with SET2" (let ((result (make-set))) (hash-for-each (lambda (element _) (when (set-element-of? set2 element) (set-add! result element))) set1) result)) (define (trigrams str) "Return all trigrams in STR." (if (< (string-length str) 3) (list) (map (lambda (start) (substring str start (+ start 3))) (iota (- (string-length str) 2))))) ;; Inverted index (define index (make-hash-table)) (define (build-index!) "Return an inverted index of all trigrams in the descriptions of all packages." (fold-packages (lambda (package _) (for-each (lambda (trigram) (let ((set (hash-ref index trigram (make-set)))) (set-add! set (package-name package)) (hash-set! index trigram set))) (trigrams (package-description package)))) #f) index) (define (brute-force-retrieve query) "Return names of all packages whose descriptions contain the string QUERY. Search brute force by folding over all packages." (fold-packages (lambda (package result) (if (string-contains (package-description package) query) (cons (package-name package) result) result)) (list))) (define (inverted-index-retrieve query) "Return names of all packages whose descriptions contain the string QUERY. Search using the pre-built inverted index." (match (trigrams query) ((first-trigram . remaining-trigrams) (set->list (fold (lambda (trigram result) (set-intersection result (hash-ref index trigram))) (hash-ref index first-trigram) remaining-trigrams))) (() (brute-force-retrieve query)))) (define (repeat thunk times) (unless (zero? times) (thunk) (repeat thunk (1- times)))) (display "Building index") (newline) (time (build-index!)) (newline) (display "Brute force search (repeated 1000 times)") (newline) (time (repeat (cut brute-force-retrieve "strategy") 1000)) (newline) (display "Inverted index search (repeated 1000 times)") (newline) (time (repeat (cut inverted-index-retrieve "strategy") 1000)) (newline)