unofficial mirror of guile-user@gnu.org 
 help / color / mirror / Atom feed
* [ANN] guile-snowball-stemmer 0.1.0
@ 2019-05-07 13:28 amirouche
  2019-05-07 18:30 ` amirouche
  0 siblings, 1 reply; 3+ messages in thread
From: amirouche @ 2019-05-07 13:28 UTC (permalink / raw)
  To: guile-user gnu

[-- Attachment #1: Type: text/plain, Size: 1915 bytes --]

I am pleased to announce the immediate availability of 
guile-snowball-stemmer.

This is binding library that allows to compute the stem of words in 
various
languages. The list of supported language is available in the following 
REPL
run.

This is a binding library. The official website is at 
https://snowballstem.org/

It is mostly useful in the context of information retrieval.

The code is at https://git.sr.ht/~amz3/guile-snowball-stemmer

The libstemmer shared library path is hardcoded as guix path of the 
library.
A guix package definition of the C library is available in my guix 
channel at:

   https://git.sr.ht/~amz3/guix-amz3-channel

That said there is no guix package for the bindings. Just include the 
file
attached to this mail in you project.

Here is a demo:

scheme@(guile-user)> (import (snowball-stemmer))
stemmer/snowball-stemmer.scm.go

scheme@(guile-user)> (stemmers)
$1 = ("turkish" "swedish" "spanish" "russian" "romanian" "portuguese" 
"porter" "norwegian" "italian" "hungarian" "german" "french" "finnish" 
"english" "dutch" "danish")

scheme@(guile-user)> (make-stemmer "amazigh")
ERROR: In procedure scm-error:
ERROR: snowball-stemmer "Oops! Stemmer not found" "amazigh"

scheme@(guile-user)> (define english (make-stemmer "english"))
scheme@(guile-user)> (stem english "cycling")
$2 = "cycl"
scheme@(guile-user)> (stem english "ecology")
$3 = "ecolog"
scheme@(guile-user)> (stem english "library")
$4 = "librari"
scheme@(guile-user)> (stem english "virtual")
$5 = "virtual"
scheme@(guile-user)> (stem english "environment")
$6 = "environ"

scheme@(guile-user)> (define french (make-stemmer "french"))
scheme@(guile-user)> (stem french "environnement")
$7 = "environ"
scheme@(guile-user)> (stem french "bibliotheque")
$8 = "bibliothequ"
scheme@(guile-user)> (stem french "gazette")
$9 = "gazet"
scheme@(guile-user)> (stem french "constituant")
$10 = "constitu"


Happy hacking!

[-- Attachment #2: snowball-stemmer.scm --]
[-- Type: text/plain, Size: 3208 bytes --]

;; guile-snowball-stemmer
;; Copyright (C) 2019 Amirouche Boubekki <amirouche@hyper.dev>

;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.

;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.

;; You should have received a copy of the GNU Lesser General Public License
;; along with this library; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(define-module (snowball-stemmer))

(import (system foreign))
(import (only (rnrs bytevectors)
              bytevector-length
              string->utf8
              utf8->string))

(export stemmers make-stemmer stem)

;;; ffi helpers

(define NULL %null-pointer)
(define POINTER '*)

;; XXX: only use that procedure in your project if you don't need to
;; access static variables
(define (dynamic-link* library-name)
  (let ((shared-object (dynamic-link library-name)))
    (lambda (return-value function-name . arguments)
      (let ((function (dynamic-func function-name shared-object)))
        (pointer->procedure return-value function arguments)))))

;; bindings

(define snowball-stemmer
  (dynamic-link* "/gnu/store/rzvlish3vsidfmvv74f74s2854wn8yii-stemmer-0.0.0/lib/libstemmer.so"))

(define stemmers
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_list")))
    (lambda ()
      (let ((array (pointer-address (proc))))
        (let loop ((out '())
                   (index 0))
          (let ((pointer (dereference-pointer (make-pointer (+ array (* 8 index))))))
            (if (eq? pointer NULL)
                out
                (loop (cons (pointer->string pointer) out)
                      (+ index 1)))))))))

(define %stemmer-delete
  (let ((proc (snowball-stemmer void "sb_stemmer_delete" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stemmers-guardian (make-guardian))

(define make-stemmer
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_new" POINTER POINTER)))
    (lambda (algorithm)
      (let ((out (proc (string->pointer algorithm) NULL)))
        (when(eq? out NULL)
          (error 'snowball-stemmer "Oops! Stemmer not found" algorithm))
        out))))

(define (reap-stemmers)
  (let loop ()
    (let ((stemmer (stemmers-guardian)))
      (when stemmer
        (%stemmer-delete stemmer)
        (loop)))))

(add-hook! after-gc-hook reap-stemmers)

(define %stemmer-length
  (let ((proc (snowball-stemmer int "sb_stemmer_length" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stem
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_stem" POINTER POINTER int)))
    (lambda (stemmer word)
      (let ((bv (string->utf8 word)))
        (let ((pointer (proc stemmer (bytevector->pointer bv) (bytevector-length bv))))
          (utf8->string (pointer->bytevector pointer (%stemmer-length stemmer))))))))

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [ANN] guile-snowball-stemmer 0.1.0
  2019-05-07 13:28 [ANN] guile-snowball-stemmer 0.1.0 amirouche
@ 2019-05-07 18:30 ` amirouche
  2019-05-07 20:36   ` amirouche
  0 siblings, 1 reply; 3+ messages in thread
From: amirouche @ 2019-05-07 18:30 UTC (permalink / raw)
  To: guile-user gnu; +Cc: guile-user

[-- Attachment #1: Type: text/plain, Size: 2571 bytes --]

On 2019-05-07 15:28, amirouche@hyper.dev wrote:
> I am pleased to announce the immediate availability of 
> guile-snowball-stemmer.
> 
> This is binding library that allows to compute the stem of words in 
> various
> languages. The list of supported language is available in the following 
> REPL
> run.
> 
> This is a binding library. The official website is at 
> https://snowballstem.org/
> 
> It is mostly useful in the context of information retrieval.
> 
> The code is at https://git.sr.ht/~amz3/guile-snowball-stemmer
> 
> The libstemmer shared library path is hardcoded as guix path of the 
> library.
> A guix package definition of the C library is available in my guix 
> channel at:
> 
>   https://git.sr.ht/~amz3/guix-amz3-channel
> 
> That said there is no guix package for the bindings. Just include the 
> file
> attached to this mail in you project.
> 
> Here is a demo:
> 
> scheme@(guile-user)> (import (snowball-stemmer))
> 
> scheme@(guile-user)> (stemmers)
> $1 = ("turkish" "swedish" "spanish" "russian" "romanian" "portuguese"
> "porter" "norwegian" "italian" "hungarian" "german" "french" "finnish"
> "english" "dutch" "danish")
> 
> scheme@(guile-user)> (make-stemmer "amazigh")
> ERROR: In procedure scm-error:
> ERROR: snowball-stemmer "Oops! Stemmer not found" "amazigh"
> 
> scheme@(guile-user)> (define english (make-stemmer "english"))
> scheme@(guile-user)> (stem english "cycling")
> $2 = "cycl"
> scheme@(guile-user)> (stem english "ecology")
> $3 = "ecolog"
> scheme@(guile-user)> (stem english "library")
> $4 = "librari"
> scheme@(guile-user)> (stem english "virtual")
> $5 = "virtual"
> scheme@(guile-user)> (stem english "environment")
> $6 = "environ"
> 
> scheme@(guile-user)> (define french (make-stemmer "french"))
> scheme@(guile-user)> (stem french "environnement")
> $7 = "environ"
> scheme@(guile-user)> (stem french "bibliotheque")
> $8 = "bibliothequ"
> scheme@(guile-user)> (stem french "gazette")
> $9 = "gazet"
> scheme@(guile-user)> (stem french "constituant")
> $10 = "constitu"


Small update, I forgot to actually guard the stemmer.

Here is the patch:

diff --git a/snowball-stemmer.scm b/snowball-stemmer.scm
index b754808..603a97e 100644
--- a/snowball-stemmer.scm
+++ b/snowball-stemmer.scm
@@ -67,6 +67,7 @@
        (let ((out (proc (string->pointer algorithm) NULL)))
          (when(eq? out NULL)
            (error 'snowball-stemmer "Oops! Stemmer not found" 
algorithm))
+        (stemmers-guardian out)
          out))))

  (define (reap-stemmers)

You will find attached to this mail the fixed version.

[-- Attachment #2: snowball-stemmer.scm --]
[-- Type: text/plain, Size: 3241 bytes --]

;; guile-snowball-stemmer
;; Copyright (C) 2019 Amirouche Boubekki <amirouche@hyper.dev>

;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.

;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.

;; You should have received a copy of the GNU Lesser General Public License
;; along with this library; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(define-module (snowball-stemmer))

(import (system foreign))
(import (only (rnrs bytevectors)
              bytevector-length
              string->utf8
              utf8->string))

(export stemmers make-stemmer stem)

;;; ffi helpers

(define NULL %null-pointer)
(define POINTER '*)

;; XXX: only use that procedure in your project if you don't need to
;; access static variables
(define (dynamic-link* library-name)
  (let ((shared-object (dynamic-link library-name)))
    (lambda (return-value function-name . arguments)
      (let ((function (dynamic-func function-name shared-object)))
        (pointer->procedure return-value function arguments)))))

;; bindings

(define snowball-stemmer
  (dynamic-link* "/gnu/store/rzvlish3vsidfmvv74f74s2854wn8yii-stemmer-0.0.0/lib/libstemmer.so"))

(define stemmers
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_list")))
    (lambda ()
      (let ((array (pointer-address (proc))))
        (let loop ((out '())
                   (index 0))
          (let ((pointer (dereference-pointer (make-pointer (+ array (* 8 index))))))
            (if (eq? pointer NULL)
                out
                (loop (cons (pointer->string pointer) out)
                      (+ index 1)))))))))

(define %stemmer-delete
  (let ((proc (snowball-stemmer void "sb_stemmer_delete" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stemmers-guardian (make-guardian))

(define make-stemmer
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_new" POINTER POINTER)))
    (lambda (algorithm)
      (let ((out (proc (string->pointer algorithm) NULL)))
        (when (eq? out NULL)
          (error 'snowball-stemmer "Oops! Stemmer not found" algorithm))
        (stemmers-guardian out)
        out))))

(define (reap-stemmers)
  (let loop ()
    (let ((stemmer (stemmers-guardian)))
      (when stemmer
        (%stemmer-delete stemmer)
        (loop)))))

(add-hook! after-gc-hook reap-stemmers)

(define %stemmer-length
  (let ((proc (snowball-stemmer int "sb_stemmer_length" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stem
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_stem" POINTER POINTER int)))
    (lambda (stemmer word)
      (let ((bv (string->utf8 word)))
        (let ((pointer (proc stemmer (bytevector->pointer bv) (bytevector-length bv))))
          (utf8->string (pointer->bytevector pointer (%stemmer-length stemmer))))))))

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [ANN] guile-snowball-stemmer 0.1.0
  2019-05-07 18:30 ` amirouche
@ 2019-05-07 20:36   ` amirouche
  0 siblings, 0 replies; 3+ messages in thread
From: amirouche @ 2019-05-07 20:36 UTC (permalink / raw)
  To: guile-user gnu; +Cc: guile-user

[-- Attachment #1: Type: text/plain, Size: 1608 bytes --]

On 2019-05-07 20:30, amirouche@hyper.dev wrote:
> On 2019-05-07 15:28, amirouche@hyper.dev wrote:
>> I am pleased to announce the immediate availability of 
>> guile-snowball-stemmer.
>> 

I made (yet another toy) search engine. It is a small command
line tool that I attach to this mail. The code can be found at:

   https://git.sr.ht/~amz3/guile-gotofish

Here is an example run:

$ mkdir ~/.gotofish  # Database is stored there
$ guile -L . gotofish.scm search gnu guile  # Nothing yet!

# Let'index a couple of articles

$ curl https://en.wikipedia.org/wiki/GNU_Guile | html2text | guile -L . 
gotofish.scm index "GNU Guile"
Done!
$  curl https://en.wikipedia.org/wiki/Scheme_%28programming_language%29 
| html2text | guile -L . gotofish.scm index "Scheme"
Done!
$ curl https://en.wikipedia.org/wiki/GNU | html2text | guile -L . 
gotofish.scm index "GNU"
Done!
$ curl https://en.wikipedia.org/wiki/Tf%E2%80%93idf | html2text | guile 
-L . gotofish.scm index "tf-idf"
Done!

# Let's search

$ guile -L . gotofish.scm search gnu guile
** Scheme
** GNU Guile

$  guile -L . gotofish.scm search gnu
** GNU
** GNU Guile
** Scheme

$  guile -L . gotofish.scm search science
** GNU
** GNU Guile
** Scheme

$  guile -L . gotofish.scm search retrieval

# Even if the exact word "retrieval" is not in those pages,
# "retrieved" has the same stem as "retrieval" so all are
# matches

** GNU
** tf-idf
** GNU Guile
** Scheme

$ guile -L . gotofish.scm search idf
** tf-idf


Also one can use multiple words to do a lookup.

This is very primitive but hopefully it will help get going
tomorrow to build my great app!

[-- Attachment #2: gotofish.scm --]
[-- Type: text/plain, Size: 10245 bytes --]

;; guile-gotofish
;;
;; Copyright (C) 2019 Amirouche Boubekki <amirouche@hyper.dev>
;;
;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.
;;
;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.
;;
;; You should have received a copy of the GNU Lesser General Public License
;; along with this library; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(export string->stem)

(import (scheme base))
(import (scheme hash-table))
(import (scheme comparator))
(import (scheme generator))
(import (scheme process-context))
(import (scheme mapping hash))
(import (scheme set))

(import (ice-9 match))

(import (snowball-stemmer))
(import (wiredtiger okvs))
(import (wiredtiger nstore))


(define comparator (make-default-comparator))

(define punctuation (make-hash-table comparator))

(let loop ((chars (string->list "!\"#$%&\\'()*+,-./:;<=>?@[\\]^_`{|}~\n\t")))
  (unless (null? chars)
    (hash-table-set! punctuation (car chars) #t)
    (loop (cdr chars))))

(define (clean string)
  "Replace punctuation characters from STRING with a space character"
  (string-map
   (lambda (char) (if (hash-table-ref punctuation char (const #f)) #\space char)) string))

(define split (lambda (x) (string-split x #\space)))

;; setup stopwords

(define %stopwords "a
able
about
above
abst
accordance
according
accordingly
across
act
actually
added
adj
affected
affecting
affects
after
afterwards
again
against
ah
ain't
all
allow
allows
almost
alone
along
already
also
although
always
am
among
amongst
an
and
announce
another
any
anybody
anyhow
anymore
anyone
anything
anyway
anyways
anywhere
apart
apparently
appear
appreciate
appropriate
approximately
are
aren
arent
aren't
arise
around
as
a's
aside
ask
asking
associated
at
auth
available
away
awfully
b
back
be
became
because
become
becomes
becoming
been
before
beforehand
begin
beginning
beginnings
begins
behind
being
believe
below
beside
besides
best
better
between
beyond
biol
both
brief
briefly
but
by
c
ca
came
can
cannot
cant
can't
cause
causes
certain
certainly
changes
clearly
c'mon
co
com
come
comes
concerning
consequently
consider
considering
contain
containing
contains
corresponding
could
couldnt
couldn't
course
c's
currently
d
date
definitely
described
despite
did
didn't
different
do
does
doesn't
doing
done
don't
down
downwards
due
during
e
each
ed
edu
effect
eg
eight
eighty
either
else
elsewhere
end
ending
enough
entirely
especially
et
et-al
etc
even
ever
every
everybody
everyone
everything
everywhere
ex
exactly
example
except
f
far
few
ff
fifth
first
five
fix
followed
following
follows
for
former
formerly
forth
found
four
from
further
furthermore
g
gave
get
gets
getting
give
given
gives
giving
go
goes
going
gone
got
gotten
greetings
h
had
hadn't
happens
hardly
has
hasn't
have
haven't
having
he
hed
he'd
he'll
hello
hence
her
here
hereafter
hereby
herein
heres
here's
hereupon
hers
herself
hes
he's
hi
hid
him
himself
his
hither
home
hopefully
how
howbeit
however
how's
hundred
i
I
id
i'd
ie
if
ignored
i'll
im
i'm
immediate
immediately
importance
important
in
inasmuch
inc
indeed
index
indicate
indicated
indicates
information
inner
insofar
instead
into
invention
inward
is
isn't
it
itd
it'd
it'll
its
it's
itself
i've
j
just
k
keep
keep 	keeps
keeps
kept
kg
km
know
known
knows
l
largely
last
lately
later
latter
latterly
least
less
lest
let
lets
let's
like
liked
likely
line
little
'll
look
looking
looks
ltd
m
made
mainly
make
makes
many
may
maybe
me
mean
means
meantime
meanwhile
merely
mg
might
million
miss
ml
more
moreover
most
mostly
mr
mrs
much
mug
must
mustn't
my
myself
n
na
name
namely
nay
nd
near
nearly
necessarily
necessary
need
needs
neither
never
nevertheless
new
next
nine
ninety
no
nobody
non
none
nonetheless
noone
nor
normally
nos
not
noted
nothing
novel
now
nowhere
o
obtain
obtained
obviously
of
off
often
oh
ok
okay
old
omitted
on
once
one
ones
only
onto
or
ord
other
others
otherwise
ought
our
ours
ourselves
out
outside
over
overall
owing
own
p
page
pages
part
particular
particularly
past
per
perhaps
placed
please
plus
poorly
possible
possibly
potentially
pp
predominantly
present
presumably
previously
primarily
probably
promptly
proud
provides
put
q
que
quickly
quite
qv
r
ran
rather
rd
re
readily
really
reasonably
recent
recently
ref
refs
regarding
regardless
regards
related
relatively
research
respectively
resulted
resulting
results
right
run
s
said
same
saw
say
saying
says
sec
second
secondly
section
see
seeing
seem
seemed
seeming
seems
seen
self
selves
sensible
sent
serious
seriously
seven
several
shall
shan't
she
shed
she'd
she'll
shes
she's
should
shouldn't
show
showed
shown
showns
shows
significant
significantly
similar
similarly
since
six
slightly
so
some
somebody
somehow
someone
somethan
something
sometime
sometimes
somewhat
somewhere
soon
sorry
specifically
specified
specify
specifying
still
stop
strongly
sub
substantially
successfully
such
sufficiently
suggest
sup
sure
sure 	t
take
taken
taking
tell
tends
th
than
thank
thanks
thanx
that
that'll
thats
that's
that've
the
their
theirs
them
themselves
then
thence
there
thereafter
thereby
thered
therefore
therein
there'll
thereof
therere
theres
there's
thereto
thereupon
there've
these
they
theyd
they'd
they'll
theyre
they're
they've
think
third
this
thorough
thoroughly
those
thou
though
thoughh
thousand
three
throug
through
throughout
thru
thus
til
tip
to
together
too
took
toward
towards
tried
tries
truly
try
trying
ts
t's
twice
two
u
un
under
unfortunately
unless
unlike
unlikely
until
unto
up
upon
ups
us
use
used
useful
usefully
usefulness
uses
using
usually
v
value
various
've
very
via
viz
vol
vols
vs
w
want
wants
was
wasnt
wasn't
way
we
wed
we'd
welcome
well
we'll
went
were
we're
werent
weren't
we've
what
whatever
what'll
whats
what's
when
whence
whenever
when's
where
whereafter
whereas
whereby
wherein
wheres
where's
whereupon
wherever
whether
which
while
whim
whither
who
whod
whoever
whole
who'll
whom
whomever
whos
who's
whose
why
why's
widely
will
willing
wish
with
within
without
wonder
wont
won't
words
world
would
wouldnt
wouldn't
www
x
y
yes
yet
you
youd
you'd
you'll
your
youre
you're
yours
yourself
yourselves
you've
z
zero
")

(define stopwords (make-hash-table comparator))

(let loop ((words (split (clean %stopwords))))
  (unless (null? words)
    (hash-table-set! stopwords (car words) #t)
    (loop (cdr words))))

(define (filter-stopwords lst)
  (filter (lambda (token) (not (hash-table-ref stopwords token (const #f)))) lst))

;; string->stems

(define (sanitize words)
  "Only keep words that have length bigger than one"
  (filter (lambda (word) (< 1 (string-length word))) words))

(define english (make-stemmer "english"))

(define string->stems
  ;; TODO: uniquify
  (compose (lambda (words) (map (lambda (word) (stem english word)) words))
           filter-stopwords
           sanitize
           split
           string-downcase
           clean))

;;; helpers

(define-public (scm->string scm)
  (call-with-output-string
    (lambda (port)
      (write scm port))))

(define-public (string->scm string)
  (call-with-input-string string read))

;; okvs & nstore helpers

(define *bigish* (expt 2 256))

(define (some? transaction nstore collection uid)
  (let ((generator (nstore-from transaction nstore
                                (list collection
                                      uid
                                      (nstore-var 'key)
                                      (nstore-var 'value)))))
    (not (eof-object? (generator)))))

(define random-uid
  (lambda (transaction nstore collection)
    (let loop ((index 3))
      (if (zero? index)
          (raise (cons 'gotofish "Ooops! No more random uid!"))
          (let ((candidate (number->string (random *bigish*) 36)))
            (if (some? transaction nstore collection candidate)
                (loop (- index 1))
                candidate))))))

;; database setup

(define (triplestore)
  (let ((engine (nstore-engine okvs-ref okvs-set! okvs-rm! okvs-prefix)))
    (nstore engine #vu8(01) '(uid key value))))

;; TODO: mkdir directory
(define home (string-append (getenv "HOME") "/.gotofish"))
(define okvs (okvs `((home . ,home) (create? . #t) (wal? . #t))))
(define store (triplestore))

;; index

(define (read-lines)
  (let loop ((line (read-line))
             (out '()))
    (if (eof-object? line)
        (string-join out " ")
        (loop (read-line) (cons line out)))))

(define add!
  (okvs-transactional
   (lambda (transaction store uid stem)
     (nstore-add! transaction store (list uid 'stem stem)))))

(define (index uid)
  (let ((document (read-lines)))
    (let loop ((stems (string->stems document)))
      (unless (null? stems)
        (add! okvs store uid (car stems))
        (loop (cdr stems)))))
  (close (current-input-port))
  (format #t "Done!\n"))

;; search

(define %lookup
  (okvs-transactional
   (lambda (transaction store stem)
     (apply set comparator
            (map (lambda (x) (hashmap-ref x 'uid (const #f)))
                 (generator->list
                  (nstore-from transaction store
                               (list (nstore-var 'uid)
                                     'stem
                                     stem))))))))

(define (lookup keyword)
  (%lookup okvs store (stem english (string-downcase keyword))))

(define (search seed keywords)
  (let loop ((keywords keywords)
             (out (lookup seed)))
    (if (null? keywords)
        (for-each (lambda (x) (format #t "** ~a\n" x)) (set->list out))
        (loop (cdr keywords) (set-intersection (lookup (car keywords)) out)))))

;; exec

(match (cdr (command-line))
       (("index" uid) (index uid))
       (("search" keyword keywords ...) (search keyword keywords))
       (else (format #t "I do not understand what you want!\n")))

(okvs-close okvs)

[-- Attachment #3: README.md --]
[-- Type: text/plain, Size: 277 bytes --]

# guile-gotofish

Small command line utility to index and search plain text.

```
$ gotofish --help

  cat hello | gotofish index UNIQUE-IDENTIFIER

  curl https://en.wikipedia.org/wiki/GNU_Guile | html2text | gotofish index "GNU Guile"

  gotofish search +keyword -minus

```

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-05-07 20:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-07 13:28 [ANN] guile-snowball-stemmer 0.1.0 amirouche
2019-05-07 18:30 ` amirouche
2019-05-07 20:36   ` amirouche

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).