unofficial mirror of guile-user@gnu.org 
 help / color / mirror / Atom feed
* [ANN] guile-snowball-stemmer 0.1.0
@ 2019-05-07 13:28 amirouche
  2019-05-07 18:30 ` amirouche
  0 siblings, 1 reply; 3+ messages in thread
From: amirouche @ 2019-05-07 13:28 UTC (permalink / raw)
  To: guile-user gnu

[-- Attachment #1: Type: text/plain, Size: 1915 bytes --]

I am pleased to announce the immediate availability of 
guile-snowball-stemmer.

This is binding library that allows to compute the stem of words in 
various
languages. The list of supported language is available in the following 
REPL
run.

This is a binding library. The official website is at 
https://snowballstem.org/

It is mostly useful in the context of information retrieval.

The code is at https://git.sr.ht/~amz3/guile-snowball-stemmer

The libstemmer shared library path is hardcoded as guix path of the 
library.
A guix package definition of the C library is available in my guix 
channel at:

   https://git.sr.ht/~amz3/guix-amz3-channel

That said there is no guix package for the bindings. Just include the 
file
attached to this mail in you project.

Here is a demo:

scheme@(guile-user)> (import (snowball-stemmer))
stemmer/snowball-stemmer.scm.go

scheme@(guile-user)> (stemmers)
$1 = ("turkish" "swedish" "spanish" "russian" "romanian" "portuguese" 
"porter" "norwegian" "italian" "hungarian" "german" "french" "finnish" 
"english" "dutch" "danish")

scheme@(guile-user)> (make-stemmer "amazigh")
ERROR: In procedure scm-error:
ERROR: snowball-stemmer "Oops! Stemmer not found" "amazigh"

scheme@(guile-user)> (define english (make-stemmer "english"))
scheme@(guile-user)> (stem english "cycling")
$2 = "cycl"
scheme@(guile-user)> (stem english "ecology")
$3 = "ecolog"
scheme@(guile-user)> (stem english "library")
$4 = "librari"
scheme@(guile-user)> (stem english "virtual")
$5 = "virtual"
scheme@(guile-user)> (stem english "environment")
$6 = "environ"

scheme@(guile-user)> (define french (make-stemmer "french"))
scheme@(guile-user)> (stem french "environnement")
$7 = "environ"
scheme@(guile-user)> (stem french "bibliotheque")
$8 = "bibliothequ"
scheme@(guile-user)> (stem french "gazette")
$9 = "gazet"
scheme@(guile-user)> (stem french "constituant")
$10 = "constitu"


Happy hacking!

[-- Attachment #2: snowball-stemmer.scm --]
[-- Type: text/plain, Size: 3208 bytes --]

;; guile-snowball-stemmer
;; Copyright (C) 2019 Amirouche Boubekki <amirouche@hyper.dev>

;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.

;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.

;; You should have received a copy of the GNU Lesser General Public License
;; along with this library; if not, write to the Free Software Foundation,
;; Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(define-module (snowball-stemmer))

(import (system foreign))
(import (only (rnrs bytevectors)
              bytevector-length
              string->utf8
              utf8->string))

(export stemmers make-stemmer stem)

;;; ffi helpers

(define NULL %null-pointer)
(define POINTER '*)

;; XXX: only use that procedure in your project if you don't need to
;; access static variables
(define (dynamic-link* library-name)
  (let ((shared-object (dynamic-link library-name)))
    (lambda (return-value function-name . arguments)
      (let ((function (dynamic-func function-name shared-object)))
        (pointer->procedure return-value function arguments)))))

;; bindings

(define snowball-stemmer
  (dynamic-link* "/gnu/store/rzvlish3vsidfmvv74f74s2854wn8yii-stemmer-0.0.0/lib/libstemmer.so"))

(define stemmers
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_list")))
    (lambda ()
      (let ((array (pointer-address (proc))))
        (let loop ((out '())
                   (index 0))
          (let ((pointer (dereference-pointer (make-pointer (+ array (* 8 index))))))
            (if (eq? pointer NULL)
                out
                (loop (cons (pointer->string pointer) out)
                      (+ index 1)))))))))

(define %stemmer-delete
  (let ((proc (snowball-stemmer void "sb_stemmer_delete" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stemmers-guardian (make-guardian))

(define make-stemmer
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_new" POINTER POINTER)))
    (lambda (algorithm)
      (let ((out (proc (string->pointer algorithm) NULL)))
        (when(eq? out NULL)
          (error 'snowball-stemmer "Oops! Stemmer not found" algorithm))
        out))))

(define (reap-stemmers)
  (let loop ()
    (let ((stemmer (stemmers-guardian)))
      (when stemmer
        (%stemmer-delete stemmer)
        (loop)))))

(add-hook! after-gc-hook reap-stemmers)

(define %stemmer-length
  (let ((proc (snowball-stemmer int "sb_stemmer_length" POINTER)))
    (lambda (stemmer)
      (proc stemmer))))

(define stem
  (let ((proc (snowball-stemmer POINTER "sb_stemmer_stem" POINTER POINTER int)))
    (lambda (stemmer word)
      (let ((bv (string->utf8 word)))
        (let ((pointer (proc stemmer (bytevector->pointer bv) (bytevector-length bv))))
          (utf8->string (pointer->bytevector pointer (%stemmer-length stemmer))))))))

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-05-07 20:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-07 13:28 [ANN] guile-snowball-stemmer 0.1.0 amirouche
2019-05-07 18:30 ` amirouche
2019-05-07 20:36   ` amirouche

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).