unofficial mirror of guix-patches@gnu.org 
 help / color / mirror / code / Atom feed
From: Nicolas Graves via Guix-patches via <guix-patches@gnu.org>
To: 73115@debbugs.gnu.org
Cc: ngraves@ngraves.fr
Subject: [bug#73115] [PATCH] gnu: Add python-sentence-transformers.
Date: Sun,  8 Sep 2024 02:09:24 +0200	[thread overview]
Message-ID: <20240908000927.29091-1-ngraves@ngraves.fr> (raw)

* gnu/packages/machine-learning.scm (python-sentence-transformers): New variable.

Change-Id: Iedab56f6c2bdde12e654ba67695cd996122bdb0b
---
 gnu/packages/machine-learning.scm | 54 +++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index 42842d7d61..b2da07e8f0 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -1239,6 +1239,60 @@ (define-public python-sentencepiece
 unsupervised text tokenizer.")
     (license license:asl2.0)))
 
+(define-public python-sentence-transformers
+  (package
+    (name "python-sentence-transformers")
+    (version "3.0.1")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "sentence_transformers" version))
+       (sha256
+        (base32 "1xmzbyrlp6wa7adf42n67c544db17nz95b10ri603lf4gi9jqgca"))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      #:test-flags `(list
+                     ;; Missing fixture / train or test data.
+                     ;; Requires internet access.
+                     "--ignore=tests/test_sentence_transformer.py"
+                     "--ignore=tests/test_train_stsb.py"
+                     "--ignore=tests/test_compute_embeddings.py"
+                     "--ignore=tests/test_cross_encoder.py"
+                     "--ignore=tests/test_model_card_data.py"
+                     "--ignore=tests/test_multi_process.py"
+                     "--ignore=tests/test_pretrained_stsb.py"
+                     "-k" ,(string-append
+                            "not test_LabelAccuracyEvaluator"
+                            " and not test_ParaphraseMiningEvaluator"
+                            " and not test_cmnrl_same_grad"
+                            " and not test_paraphrase_mining"
+                            " and not test_simple_encode"))))
+    (propagated-inputs (list python-huggingface-hub
+                             python-numpy
+                             python-pillow
+                             python-scikit-learn
+                             python-scipy
+                             python-pytorch
+                             python-tqdm
+                             python-transformers))
+    (native-inputs (list python-pytest))
+    (home-page "https://www.SBERT.net")
+    (synopsis "Multilingual text embeddings")
+    (description "This framework provides an easy method to compute dense
+vector representations for sentences, paragraphs, and images.  The models are
+based on transformer networks like BERT / RoBERTa / XLM-RoBERTa and achieve
+state-of-the-art performance in various tasks. Text is embedded in vector
+space such that similar text are closer and can efficiently be found using
+cosine similarity.
+
+This package provides easy access to pretrained models for more than 100
+languages, fine-tuned for various use-cases.
+
+Further, this framework allows an easy fine-tuning of custom embeddings
+models, to achieve maximal performance on your specific task.")
+    (license license:asl2.0)))
+
 (define-public python-spacy-legacy
   (package
     (name "python-spacy-legacy")
-- 
2.45.2





                 reply	other threads:[~2024-09-08  0:10 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://guix.gnu.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240908000927.29091-1-ngraves@ngraves.fr \
    --to=guix-patches@gnu.org \
    --cc=73115@debbugs.gnu.org \
    --cc=ngraves@ngraves.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).