unofficial mirror of guix-patches@gnu.org 
 help / color / mirror / code / Atom feed
* [bug#73115] [PATCH] gnu: Add python-sentence-transformers.
@ 2024-09-08  0:09 Nicolas Graves via Guix-patches via
  0 siblings, 0 replies; only message in thread
From: Nicolas Graves via Guix-patches via @ 2024-09-08  0:09 UTC (permalink / raw)
  To: 73115; +Cc: ngraves

* gnu/packages/machine-learning.scm (python-sentence-transformers): New variable.

Change-Id: Iedab56f6c2bdde12e654ba67695cd996122bdb0b
---
 gnu/packages/machine-learning.scm | 54 +++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index 42842d7d61..b2da07e8f0 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -1239,6 +1239,60 @@ (define-public python-sentencepiece
 unsupervised text tokenizer.")
     (license license:asl2.0)))
 
+(define-public python-sentence-transformers
+  (package
+    (name "python-sentence-transformers")
+    (version "3.0.1")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "sentence_transformers" version))
+       (sha256
+        (base32 "1xmzbyrlp6wa7adf42n67c544db17nz95b10ri603lf4gi9jqgca"))))
+    (build-system pyproject-build-system)
+    (arguments
+     (list
+      #:test-flags `(list
+                     ;; Missing fixture / train or test data.
+                     ;; Requires internet access.
+                     "--ignore=tests/test_sentence_transformer.py"
+                     "--ignore=tests/test_train_stsb.py"
+                     "--ignore=tests/test_compute_embeddings.py"
+                     "--ignore=tests/test_cross_encoder.py"
+                     "--ignore=tests/test_model_card_data.py"
+                     "--ignore=tests/test_multi_process.py"
+                     "--ignore=tests/test_pretrained_stsb.py"
+                     "-k" ,(string-append
+                            "not test_LabelAccuracyEvaluator"
+                            " and not test_ParaphraseMiningEvaluator"
+                            " and not test_cmnrl_same_grad"
+                            " and not test_paraphrase_mining"
+                            " and not test_simple_encode"))))
+    (propagated-inputs (list python-huggingface-hub
+                             python-numpy
+                             python-pillow
+                             python-scikit-learn
+                             python-scipy
+                             python-pytorch
+                             python-tqdm
+                             python-transformers))
+    (native-inputs (list python-pytest))
+    (home-page "https://www.SBERT.net")
+    (synopsis "Multilingual text embeddings")
+    (description "This framework provides an easy method to compute dense
+vector representations for sentences, paragraphs, and images.  The models are
+based on transformer networks like BERT / RoBERTa / XLM-RoBERTa and achieve
+state-of-the-art performance in various tasks. Text is embedded in vector
+space such that similar text are closer and can efficiently be found using
+cosine similarity.
+
+This package provides easy access to pretrained models for more than 100
+languages, fine-tuned for various use-cases.
+
+Further, this framework allows an easy fine-tuning of custom embeddings
+models, to achieve maximal performance on your specific task.")
+    (license license:asl2.0)))
+
 (define-public python-spacy-legacy
   (package
     (name "python-spacy-legacy")
-- 
2.45.2





^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-09-08  0:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-08  0:09 [bug#73115] [PATCH] gnu: Add python-sentence-transformers Nicolas Graves via Guix-patches via

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).