unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Philip Kaludercic <philipk@posteo.net>
To: Sergey Kostyaev <sskostyaev@gmail.com>
Cc: emacs-devel@gnu.org
Subject: Re: Add elisa to GNU ELPA
Date: Tue, 16 Jul 2024 12:54:47 +0000	[thread overview]
Message-ID: <8734o9sdig.fsf@posteo.net> (raw)
In-Reply-To: <A34CBE88-91B5-494C-AAE1-B13F2ACD3C3F@gmail.com> (Sergey Kostyaev's message of "Fri, 12 Jul 2024 23:47:49 +0700")

[-- Attachment #1: Type: text/plain, Size: 777 bytes --]

Sergey Kostyaev <sskostyaev@gmail.com> writes:

> Hi all,
>
> Please add https://github.com/s-kostyaev/elisa to GNU ELPA. This
> package implements RAG (Retrieval Augmented Generation) for
> `ellama'. Today I have released 1.0.0 with info manuals, web search
> and local files support.
>
> Best regards,
> Sergey Kostyaev

I'm attaching my comments and suggestions to the end of this message.
Just one point at the beginning, so that you don't miss it: Despite
reading through the entire source code, I have no idea what you are
trying to do with this package.  It would be nice to have some more
context in the Commentary section and elaborate a number of user-facing
docstrings, unless this is intended to be expert software, for people
familiar with whatever the field is.


[-- Attachment #2: Type: text/plain, Size: 25207 bytes --]

diff --git a/elisa.el b/elisa.el
index 09cc28975d..0b79745bd5 100644
--- a/elisa.el
+++ b/elisa.el
@@ -28,7 +28,6 @@
 ;; ELISA (Emacs Lisp Information System Assistant) is a system designed
 ;; to provide informative answers to user queries by leveraging a
 ;; Retrieval Augmented Generation (RAG) approach.
-;;
 
 ;;; Code:
 (require 'ellama)
@@ -48,68 +47,61 @@
 					    (make-llm-ollama
 					     :embedding-model "nomic-embed-text"))
   "Embeddings provider to generate embeddings."
-  :group 'elisa
-  :type '(sexp :validate 'cl-struct-p))
+  :type '(sexp :validate 'cl-struct-p)) ;a more specific predicate here?
 
 (defcustom elisa-chat-provider (progn (require 'llm-ollama)
 				      (make-llm-ollama
 				       :chat-model "sskostyaev/openchat:8k-rag"
 				       :embedding-model "nomic-embed-text"))
   "Chat provider."
-  :group 'elisa
   :type '(sexp :validate 'cl-struct-p))
 
 (defcustom elisa-db-directory (file-truename
 			       (file-name-concat
 				user-emacs-directory "elisa"))
   "Directory for elisa database."
-  :group 'elisa
-  :type 'directory)
+  :type 'directory)                     ;is it necessary that it exists?
 
 (defcustom elisa-limit 5
   "Count quotes to pass into llm context for answer."
-  :group 'elisa
-  :type 'integer)
+  :type 'integer)                       ;or natnum?
 
-(defcustom elisa-find-executable "find"
+(defcustom elisa-find-executable find-program
   "Path to find executable."
-  :group 'elisa
   :type 'string)
 
 (defcustom elisa-tar-executable "tar"
   "Path to tar executable."
-  :group 'elisa
   :type 'string)
 
 (defcustom elisa-sqlite-vss-version "v0.1.2"
   "Sqlite VSS version."
-  :group 'elisa
   :type 'string)
 
 (defcustom elisa-sqlite-vss-path nil
   "Path to sqlite-vss extension."
-  :group 'elisa
   :type 'file)
 
 (defcustom elisa-sqlite-vector-path nil
   "Path to sqlite-vector extension."
-  :group 'elisa
   :type 'file)
 
-(defcustom elisa-semantic-split-function 'elisa-split-by-paragraph
+(defcustom elisa-semantic-split-function #'elisa-split-by-paragraph
   "Function for semantic text split."
-  :group 'elisa
   :type 'function)
 
 (defcustom elisa-prompt-rewriting-enabled t
   "Enable prompt rewriting for better retrieving."
-  :group 'elisa
   :type 'boolean)
 
-(defcustom elisa-chat-prompt-template "Answer user query based on context above. If you can answer it partially do it. Provide list of open questions if any. Say \"not enough data\" if you can't answer user query based on provided context. User query:
+(defcustom elisa-chat-prompt-template
+  "Answer user query based on context above. \
+If you can answer it partially do it. \
+Provide list of open questions if any. \
+Say \"not enough data\" if you can't answer user \
+query based on provided context. User query:
 %s"
-  "Chat prompt template."
-  :group 'elisa
+  "Chat prompt template."               ;some more explanation would be nice.
   :type 'string)
 
 (defcustom elisa-rewrite-prompt-template
@@ -130,81 +122,70 @@ How to buy a pony?
  User prompt:
 %s"
   "Prompt template for prompt rewriting."
-  :group 'elisa
   :type 'string)
 
 (defcustom elisa-searxng-url "http://localhost:8080/"
   "Searxng url for web search.  Json format should be enabled for this instance."
-  :group 'elisa
   :type 'string)
 
 (defcustom elisa-pandoc-executable "pandoc"
-  "Path to pandoc executable."
-  :group 'elisa
+  "Path to pandoc (https://pandoc.org/) executable."
   :type 'string)
 
-(defcustom elisa-webpage-extraction-function 'elisa-get-webpage-buffer
+(defcustom elisa-webpage-extraction-function #'elisa-get-webpage-buffer
   "Function to get buffer with webpage content."
-  :group 'elisa
   :type 'function)
 
-(defcustom elisa-web-search-function 'elisa-search-duckduckgo
+(defcustom elisa-web-search-function #'elisa-search-duckduckgo
   "Function to search the web.
 Function should get prompt and return list of urls."
-  :group 'elisa
   :type 'function)
 
 (defcustom elisa-web-pages-limit 10
   "Limit of web pages to parse during web search."
-  :group 'elisa
-  :type 'integer)
+  :type 'natnum)
 
 (defcustom elisa-breakpoint-threshold-amount 0.4
   "Breakpoint threshold amount.
 Increase it if you need decrease semantic split granularity."
-  :group 'elisa
-  :type 'float)
+  :type 'number)
 
 (defcustom elisa-reranker-enabled nil
   "Enable reranker to improve retrieving quality."
-  :group 'elisa
   :type 'boolean)
 
 (defcustom elisa-reranker-url "http://127.0.0.1:8787/"
-  "Reranker service url."
-  :group 'elisa
+  "Reranker service url."               ;here as well, this doesn't mean much to someone who don't already know what is going on.
   :type 'string)
 
 (defcustom elisa-reranker-similarity-threshold 0
   "Reranker similarity threshold.
 If set, all quotes with similarity less than threshold will be filtered out."
-  :group 'elisa
-  :type 'string)
+  :type 'string)                        ;wrong type?
 
 (defcustom elisa-reranker-limit 20
   "Number of quotes for send to reranker."
-  :group 'elisa
   :type 'integer)
 
 (defcustom elisa-ignore-patterns-files '(".gitignore" ".ignore" ".rgignore")
   "Files with patterns to ignore during file parsing."
-  :group 'elisa
-  :type '(list string))
+  :type '(repeat string))
 
 (defcustom elisa-ignore-invisible-files t
   "Ignore invisible files and directories during file parsing."
-  :group 'elisa
   :type 'boolean)
 
 (defcustom elisa-enabled-collections '("builtin manuals" "external manuals")
   "Enabled collections for elisa chat."
-  :group 'elisa
-  :type '(list string))
+  :type '(repeat string))
 
 (defun elisa-sqlite-vss-download-url ()
+  ;; It seems to be a general problem that your documentation strings
+  ;; are not giving any context, and just barley touching on what is
+  ;; going on...
   "Generate sqlite vss download url based on current system."
-  (cond  ((string-equal system-type "darwin")
-	  (if (string-prefix-p "aarch64" system-configuration)
+  (cond  ((eq system-type 'darwin)
+	  (if (string-prefix-p "aarch64" system-configuration) ;how robust is this?
 	      (format
 	       "https://github.com/asg017/sqlite-vss/releases/download/%s/sqlite-vss-%s-loadable-macos-aarch64.tar.gz"
 	       elisa-sqlite-vss-version
@@ -213,7 +194,7 @@ If set, all quotes with similarity less than threshold will be filtered out."
 	     "https://github.com/asg017/sqlite-vss/releases/download/%s/sqlite-vss-%s-loadable-macos-x86_64.tar.gz"
 	     elisa-sqlite-vss-version
 	     elisa-sqlite-vss-version)))
-	 ((string-equal system-type "gnu/linux")
+	 ((eq system-type 'gnu/linux)
 	  (format
 	   "https://github.com/asg017/sqlite-vss/releases/download/%s/sqlite-vss-%s-loadable-linux-x86_64.tar.gz"
 	   elisa-sqlite-vss-version
@@ -223,18 +204,14 @@ If set, all quotes with similarity less than threshold will be filtered out."
 (defun elisa--vss-path ()
   "Path to vss sqlite extension."
   (or elisa-sqlite-vss-path
-      (let* ((ext (if (string-equal system-type "darwin")
-		      "dylib"
-		    "so"))
+      (let* ((ext (if (eq system-type 'darwin) "dylib" "so"))
 	     (file (format "vss0.%s" ext)))
 	(file-name-concat elisa-db-directory file))))
 
 (defun elisa--vector-path ()
   "Path to vector sqlite extension."
   (or elisa-sqlite-vector-path
-      (let* ((ext (if (string-equal system-type "darwin")
-		      "dylib"
-		    "so"))
+      (let* ((ext (if (string-equal system-type 'darwin) "dylib" "so"))
 	     (file (format "vector0.%s" ext)))
 	(file-name-concat elisa-db-directory file))))
 
@@ -260,45 +237,45 @@ If set, all quotes with similarity less than threshold will be filtered out."
 
 (defun elisa-embeddings-create-table-sql ()
   "Generate sql for create embeddings table."
-  "drop table if exists elisa_embeddings;")
+  "DROP TABLE IF EXISTS elisa_embeddings;") ;just my personal taste, ignore if you disagree (i do it because there is no sql syntax highlighting in elisp strings)
 
 (defun elisa-data-embeddings-create-table-sql ()
   "Generate sql for create data embeddings table."
-  (format "create virtual table if not exists data_embeddings using vss0(embedding(%d));"
+  (format "CREATE VIRTUAL TABLE IF NOT EXISTS data_embeddings USING vss0(embedding(%d));"
 	  (elisa-get-embedding-size)))
 
 (defun elisa-data-fts-create-table-sql ()
   "Generate sql for create full text search table."
-  "create virtual table if not exists data_fts using fts5(data);")
+  "CREATE VIRTUAL TABLE IF NOT EXISTS data_fts USING FTS5(data);")
 
 (defun elisa-info-create-table-sql ()
   "Generate sql for create info table."
-  "drop table if exists info;")
+  "DROP TABLE IF EXISTS info;")
 
 (defun elisa-collections-create-table-sql ()
   "Generate sql for create collections table."
-  "create table if not exists collections (name text unique);")
+  "CREATE TABLE IF NOT EXISTS collections (name TEXT UNIQUE);")
 
 (defun elisa-kinds-create-table-sql ()
   "Generate sql for create kinds table."
-  "create table if not exists kinds (name text unique);")
+  "CREATE TABLE IF NOT EXISTS kinds (name TEXT UNIQUE);")
 
 (defun elisa-fill-kinds-sql ()
   "Generate sql for fill kinds table."
-  "insert into kinds (name) values ('web'), ('file'), ('info') on conflict do nothing;")
+  "INSERT INTO KINDS (name) VALUES ('web'), ('file'), ('info') ON CONFLICT DO NOTHING;")
 
 (defun elisa-files-create-table-sql ()
   "Generate sql for create files table."
-  "create table if not exists files (path text unique, hash text)")
+  "CREATE TABLE IF NOT EXISTS files (path TEXT UNIQUE, hash TEXT)")
 
 (defun elisa-data-create-table-sql ()
   "Generate sql for create data table."
-  "create table if not exists data (
+  "CREATE TABLE IF NOT EXISTS data (
 kind_id INTEGER,
 collection_id INTEGER,
-path text,
-hash text,
-data text,
+path TEXT,
+hash TEXT,
+data TEXT,
 FOREIGN KEY(kind_id) REFERENCES kinds(rowid),
 FOREIGN KEY(collection_id) REFERENCES collections(rowid)
 );")
@@ -307,13 +284,9 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
   "Initialize elisa DB."
   (if (not (file-exists-p (elisa--vss-path)))
       (warn "Please run M-x `elisa-download-sqlite-vss' to use this package")
-    (sqlite-pragma db "PRAGMA journal_mode=WAL;")
-    (sqlite-load-extension
-     db
-     (elisa--vector-path))
-    (sqlite-load-extension
-     db
-     (elisa--vss-path))
+    (sqlite-pragma db "PRAGMA journal_mode=WALL;")
+    (sqlite-load-extension db (elisa--vector-path))
+    (sqlite-load-extension db (elisa--vss-path))
     (sqlite-execute db (elisa-embeddings-create-table-sql))
     (sqlite-execute db (elisa-info-create-table-sql))
     (sqlite-execute db (elisa-collections-create-table-sql))
@@ -324,44 +297,46 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
     (sqlite-execute db (elisa-data-embeddings-create-table-sql))
     (sqlite-execute db (elisa-data-fts-create-table-sql))))
 
-(defvar elisa-db (progn
-		   (make-directory elisa-db-directory t)
-		   (let ((db (sqlite-open (file-name-concat elisa-db-directory "elisa.sqlite"))))
-		     (elisa--init-db db)
-		     db)))
+(defvar elisa-db
+  (let ((_ (make-directory elisa-db-directory t))
+        (db (sqlite-open (file-name-concat elisa-db-directory "elisa.sqlite"))))
+    (elisa--init-db db)
+    db))
 
 (defun elisa-vector-to-sqlite (data)
   "Convert DATA to sqlite vector representation."
-  (format "vector_from_json(json('%s'))"
-	  (json-encode data)))
-
-(defun elisa-sqlite-escape (s)
-  "Escape single quotes in S for sqlite."
-  (thread-last
-    s
-    (string-replace "'" "''")
-    (string-replace "\\" "\\\\")
-    (string-replace "\0" "\n")))
+  (format "vector_from_json(json('%s'))" (json-encode data)))
+
+(defun elisa-sqlite-escape (string)
+  "Escape single quotes in STRING for sqlite."
+  (let ((reps '(("'" . "''")
+                ("\\" . "\\\\")
+                ("\0" . "\n"))))
+    (replace-regexp-in-string         ;simultanious replacement
+     (regexp-opt (mapcar #'car reps)) ;is the last one really \0 or \\0?
+     (lambda (str) (alist-get str reps nil nil #'string=))
+     string nil t)))
 
 (defun elisa-sqlite-format-int-list (ids)
   "Convert list of integer IDS list to sqlite list representation."
   (format
    "(%s)"
-   (string-join (mapcar (lambda (id) (format "%d" id)) ids) ", ")))
+   (mapconcat (lambda (id) (format "%d" id)) ids ", ")))
 
 (defun elisa-sqlite-format-string-list (names)
   "Convert list of string NAMES list to sqlite list representation."
   (format
    "(%s)"
-   (string-join (mapcar (lambda (name)
-			  (format "'%s'"
-				  (elisa-sqlite-escape name))) names) ", ")))
+   (mapconcat (lambda (name)
+		(format "'%s'"
+			(elisa-sqlite-escape name)))
+              names ", ")))
 
-(defun elisa-avg (lst)
-  "Calculate arithmetic average value of LST."
-  (let ((len (length lst))
-	(sum (cl-reduce #'+ lst :initial-value 0.0)))
-    (/ sum len)))
+(defun elisa-avg (list)
+  "Calculate arithmetic average value of LIST."
+  (cl-loop for elem in list for count from 0
+           summing elem into sum
+           finally (return (/ sum (float count)))))
 
 (defun elisa-std-dev (lst)
   "Calculate standart deviation value of LST."
@@ -450,6 +425,11 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
 		     (setq continue nil))))
 	      (setq continue nil))))))))
 
+;; this is pretty verbose, do you think there might be a more readable
+;; way to express this?  It is pretty easy to create a small
+;; templating language in Elisp
+;; (e.g. https://git.savannah.gnu.org/cgit/emacs/elpa.git/tree/elpa-admin.el?h=elpa-admin#n1138),
+;; perhaps that could also be useful here.
 (defun elisa--find-similar (text collections)
   "Find similar to TEXT results in COLLECTIONS.
 Return sqlite query.  For asyncronous execution."
@@ -520,7 +500,7 @@ Evaluate ON-DONE with result."
 	(result nil))
     (save-excursion
       (goto-char (point-min))
-      (while (< (point) (point-max))
+      (while (not (eobp))
 	(funcall func)
 	(push (buffer-substring-no-properties pt (point)) result)
 	(setq pt (point)))
@@ -534,6 +514,7 @@ Evaluate ON-DONE with result."
   "Split buffer to list of paragraphs."
   (elisa--split-by #'forward-paragraph))
 
+;; a number of these functions seem like something that should be added to the core of Emacs or at least a common ELPA package...
 (defun elisa-dot-product (v1 v2)
   "Calculate the dot produce of vectors V1 and V2."
   (let ((result 0))
@@ -608,14 +589,12 @@ than T, it will be packed into single semantic chunk."
 	    (current (car chunks))
 	    (tail (cdr chunks)))
       (let* ((result nil))
-	(mapc
-	 (lambda (el)
-	   (if (<= el threshold)
+        (dolist (el distances)
+          (if (<= el threshold)
 	       (setq current (concat current (car tail)))
 	     (push current result)
 	     (setq current (car tail)))
 	   (setq tail (cdr tail)))
-	 distances)
 	(push current result)
 	(cl-remove-if
 	 #'string-empty-p
@@ -626,6 +605,7 @@ than T, it will be packed into single semantic chunk."
 		 (nreverse result))))
     (list (buffer-substring-no-properties (point-min) (point-max)))))
 
+;; why not use `wildcard-to-regexp'?
 (defun elisa--gitignore-to-elisp-regexp (pattern)
   "Convert a .gitignore PATTERN to an Emacs Lisp regexp."
   (let ((result "")
@@ -676,11 +656,11 @@ than T, it will be packed into single semantic chunk."
 
 (defun elisa--text-file-p (filename)
   "Check if FILENAME contain text."
-  (or (when (get-file-buffer filename) t) ;; if file opened assume it text
+  (or (and (get-file-buffer filename) t) ;; if file opened assume it text
       (with-current-buffer (find-file-noselect filename t t)
 	(prog1
 	    ;; if there is null byte in file, file is binary
-	    (not (re-search-forward "\0" nil t 1))
+	    (not (search-forward "\0" nil t 1))
 	  (kill-buffer)))))
 
 (defun elisa--file-list (directory)
@@ -727,41 +707,39 @@ When FORCE parse even if already parsed."
 	     (format "delete from files where path = '%s';"
 		     (elisa-sqlite-escape path))))
 	  ;; add new data
-	  (mapc
-	   (lambda (text)
-	     (let* ((hash (secure-hash 'sha256 text))
-		    (rowid
-		     (if-let ((rowid (caar (sqlite-select
-					    elisa-db
-					    (format "select rowid from data where kind_id = %s and collection_id = %s and path = '%s' and hash = '%s';"
-						    kind-id collection-id
-						    (elisa-sqlite-escape path) hash)))))
-			 (progn
-			   (push rowid row-ids)
-			   nil)
-		       (sqlite-execute
-			elisa-db
-			(format
-			 "insert into data(kind_id, collection_id, path, hash, data) values (%s, %s, '%s', '%s', '%s');"
-			 kind-id collection-id
-			 (elisa-sqlite-escape path) hash (elisa-sqlite-escape text)))
-		       (caar (sqlite-select
-			      elisa-db
-			      (format "select rowid from data where kind_id = %s and collection_id = %s and path = '%s' and hash = '%s';"
-				      kind-id collection-id
-				      (elisa-sqlite-escape path) hash))))))
-	       (when rowid
-		 (sqlite-execute
-		  elisa-db
-		  (format "insert into data_embeddings(rowid, embedding) values (%s, %s);"
-			  rowid (elisa-vector-to-sqlite
-				 (llm-embedding elisa-embeddings-provider text))))
-		 (sqlite-execute
-		  elisa-db
-		  (format "insert into data_fts(rowid, data) values (%s, '%s');"
-			  rowid (elisa-sqlite-escape text)))
-		 (push rowid row-ids))))
-	   chunks)
+          (dolist (text chunks)
+            (let* ((hash (secure-hash 'sha256 text))
+		   (rowid
+		    (if-let ((rowid (caar (sqlite-select
+					   elisa-db
+					   (format "select rowid from data where kind_id = %s and collection_id = %s and path = '%s' and hash = '%s';"
+						   kind-id collection-id
+						   (elisa-sqlite-escape path) hash)))))
+			(progn
+			  (push rowid row-ids)
+			  nil)
+		      (sqlite-execute
+		       elisa-db
+		       (format
+			"insert into data(kind_id, collection_id, path, hash, data) values (%s, %s, '%s', '%s', '%s');"
+			kind-id collection-id
+			(elisa-sqlite-escape path) hash (elisa-sqlite-escape text)))
+		      (caar (sqlite-select
+			     elisa-db
+			     (format "select rowid from data where kind_id = %s and collection_id = %s and path = '%s' and hash = '%s';"
+				     kind-id collection-id
+				     (elisa-sqlite-escape path) hash))))))
+	      (when rowid
+		(sqlite-execute
+		 elisa-db
+		 (format "insert into data_embeddings(rowid, embedding) values (%s, %s);"
+			 rowid (elisa-vector-to-sqlite
+				(llm-embedding elisa-embeddings-provider text))))
+		(sqlite-execute
+		 elisa-db
+		 (format "insert into data_fts(rowid, data) values (%s, '%s');"
+			 rowid (elisa-sqlite-escape text)))
+		(push rowid row-ids))))
 	  ;; remove old data
 	  (when row-ids
 	    (let ((delete-rows (cl-remove-if (lambda (id)
@@ -779,7 +757,7 @@ When FORCE parse even if already parsed."
 
 (defun elisa--delete-data (ids)
   "Delete data with IDS."
-  (sqlite-execute
+  (sqlite-execute                       ;perhaps it would be worth extracting the (sqlite-execute elisa-db ...) part into a separate function
    elisa-db
    (format "delete from data_fts where rowid in %s;"
 	   (elisa-sqlite-format-int-list ids)))
@@ -815,10 +793,9 @@ When FORCE parse even if already parsed."
 			collection-id
 			(elisa-sqlite-format-string-list files))))))
     (elisa--delete-data delete-ids)
-    (mapc (lambda (file)
-	    (message "parsing %s" file)
-	    (elisa-parse-file collection-id file))
-	  files)))
+    (dolist (file files)
+      (message "parsing %s" file)
+      (elisa-parse-file collection-id file))))
 
 ;;;###autoload
 (defun elisa-async-parse-directory (dir)
@@ -853,14 +830,14 @@ When FORCE parse even if already parsed."
 	  (libxml-parse-html-region
 	   (point) (point-max))
 	  'a))
-	:test 'string-equal)))))
+	:test #'string-equal)))))
 
 (defun elisa-search-searxng (prompt)
   "Search searxng for PROMPT and return list of urls.
 You can customize `elisa-searxng-url' to use non local instance."
   (let ((url (format "%s/search?format=json&q=%s" elisa-searxng-url (url-hexify-string prompt))))
     (thread-last
-      (plz 'get url :as 'json-read)
+      (plz 'get url :as 'json-read)     ;I am not familiar with the "plz" library, is the `json-read' a function?
       (alist-get 'results)
       (mapcar (lambda (el) (alist-get 'url el))))))
 
@@ -896,14 +873,13 @@ You can customize `elisa-searxng-url' to use non local instance."
     (with-current-buffer buffer-name
       (shell-command-on-region
        (point-min) (point-max)
-       (format "%s -f html --to plain"
-	       (executable-find elisa-pandoc-executable))
+       (format "%s --from html --to plain" elisa-pandoc-executable)
        buffer-name t)
       buffer-name)))
 
 (defun elisa-fts-query (prompt)
   "Return fts match query for PROMPT."
-  (thread-last
+  (thread-last                          ;i belive you can do all of this with a single regular expression...
     prompt
     (string-trim)
     (downcase)
@@ -938,7 +914,7 @@ You can customize `elisa-searxng-url' to use non local instance."
 		  :headers `(("Content-Type" . "application/json"))
 		  :body-type 'text
 		  :body (elisa--rerank-request prompt ids)
-		  :as #'json-read)))))
+		  :as #'json-read)))))  ;so it is a function!
 
 (defun elisa-rerank (prompt ids)
   "Rerank IDS according to PROMPT and return top `elisa-limit' IDS."
@@ -962,11 +938,10 @@ You can customize `elisa-searxng-url' to use non local instance."
 (defun elisa--parse-web-page (collection-id url)
   "Parse URL into collection with COLLECTION-ID."
   (let ((kind-id (caar (sqlite-select
-			elisa-db "select rowid from kinds where name = 'web';"))))
-    (message "collecting data from %s" url)
-    (mapc
-     (lambda (chunk)
-       (let* ((hash (secure-hash 'sha256 chunk))
+			elisa-db "SELECT rowid FROM kinds WHERE name = 'web';"))))
+    (message "collecting data from %S..." url)
+    (dolist (chunk (elisa-extact-webpage-chunks url))
+      (let* ((hash (secure-hash 'sha256 chunk))
 	      (embedding (llm-embedding elisa-embeddings-provider chunk))
 	      (rowid
 	       (if-let ((rowid (caar (sqlite-select
@@ -989,8 +964,7 @@ You can customize `elisa-searxng-url' to use non local instance."
 	   (sqlite-execute
 	    elisa-db
 	    (format "insert into data_fts(rowid, data) values (%s, '%s');"
-		    rowid (elisa-sqlite-escape chunk))))))
-     (elisa-extact-webpage-chunks url))))
+		    rowid (elisa-sqlite-escape chunk))))))))
 
 (defun elisa--web-search (prompt)
   "Search the web for PROMPT.
@@ -1007,11 +981,10 @@ Return sqlite query that extract data for adding to context."
 				(elisa-sqlite-escape prompt)))))
 	 (urls (funcall elisa-web-search-function prompt))
 	 (collected-pages 0))
-    (mapc (lambda (url)
-	    (when (<= collected-pages elisa-web-pages-limit)
-	      (elisa--parse-web-page collection-id url)
-	      (cl-incf collected-pages)))
-	  urls)))
+    (dolist (url urls)
+      (when (<= collected-pages elisa-web-pages-limit)
+	(elisa--parse-web-page collection-id url)
+	(cl-incf collected-pages)))))
 
 (defun elisa--rewrite-prompt (prompt action)
   "Rewrite PROMPT if `elisa-prompt-rewriting-enabled'.
@@ -1071,7 +1044,7 @@ WHERE d.rowid in %s;"
 		   (when-let ((kind (cl-first row))
 			      (path (cl-second row))
 			      (text (cl-third row)))
-		     (pcase kind
+		     (pcase kind        ;is this a `pcase-exhaustive'?
 		       ("web"
 			(ellama-context-add-webpage-quote-noninteractive path path text))
 		       ("file"
@@ -1096,15 +1069,16 @@ WHERE d.rowid in %s;"
   (mapcar
    #'file-name-base
    (cl-remove-if-not
-    (lambda (s) (or (string-suffix-p ".info" s)
-		    (string-suffix-p ".info.gz" s)))
+    (lambda (s)
+      (or (string-suffix-p ".info" s)
+	  (string-suffix-p ".info.gz" s)))
     (directory-files (with-temp-buffer
 		       (info "emacs" (current-buffer))
 		       (file-name-directory Info-current-file))))))
 
 (defun elisa-get-external-manuals ()
   "Get external manual names list."
-  (cl-remove-if
+  (cl-remove-if                         ;a `thread-last' might be nice here
    #'not
    (mapcar
     #'elisa--info-valid-p
@@ -1112,7 +1086,7 @@ WHERE d.rowid in %s;"
      (mapcar
       #'file-name-base
       (process-lines
-       (executable-find elisa-find-executable)
+       elisa-find-executable
        (file-truename
 	(file-name-concat user-emacs-directory "elpa"))
        "-name" "*.info"))))))
@@ -1207,7 +1181,7 @@ It does nothing if buffer file not inside one of existing collections."
   (when-let* ((collections (flatten-tree
 			    (sqlite-select
 			     elisa-db
-			     "select name from collections;")))
+			     "SELECT name FROM collections;")))
 	      (dirs (cl-remove-if-not #'file-directory-p collections))
 	      (file (buffer-file-name))
 	      (collection (cl-find-if (lambda (dir)
@@ -1283,7 +1257,7 @@ It does nothing if buffer file not inside one of existing collections."
   "Add webpage by URL to COLLECTION."
   (interactive
    (list
-    (if-let ((url (or (and (fboundp 'thing-at-point) (thing-at-point 'url))
+    (if-let ((url (or (and (fboundp 'thing-at-point) (thing-at-point 'url)) ;why not always use `thing-at-point'?
                       (shr-url-at-point nil))))
         url
       (read-string "Enter URL you want to summarize: "))

[-- Attachment #3: Type: text/plain, Size: 39 bytes --]




-- 
	Philip Kaludercic on peregrine

  reply	other threads:[~2024-07-16 12:54 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-12 16:47 Add elisa to GNU ELPA Sergey Kostyaev
2024-07-16 12:54 ` Philip Kaludercic [this message]
2024-07-16 13:57   ` Sergey Kostyaev
2024-07-16 16:04     ` Philip Kaludercic
2024-07-16 16:35       ` Sergey Kostyaev
2024-07-17 13:21         ` Andrew Hyatt
2024-07-16 16:41       ` Sergey Kostyaev
2024-07-16 17:02         ` Philip Kaludercic
2024-07-16 17:47           ` Adding a generic mathematical library Philip Kaludercic
2024-07-16 22:06             ` Emanuel Berg
2024-07-17  2:54               ` Christopher Dimech
2024-07-17  5:58                 ` Emanuel Berg
2024-07-19 16:16               ` Richard Stallman
2024-07-19 17:38                 ` Christopher Dimech
2024-07-21  5:20                   ` Emanuel Berg
2024-07-20 12:45                 ` Max Nikulin
2024-07-20 13:53                   ` Christopher Dimech
2024-07-21  5:19                 ` Emanuel Berg
2024-07-21  6:15                   ` Emanuel Berg
2024-07-21  7:40                     ` Emanuel Berg
2024-07-21  8:45                       ` Emanuel Berg
2024-07-21  8:29                     ` Emanuel Berg
2024-07-21  7:27                   ` Christopher Dimech
2024-07-21  8:03                     ` Emanuel Berg
2024-07-21  9:14                       ` Christopher Dimech
2024-07-21  9:48                         ` Emanuel Berg
2024-07-21 11:20                           ` Emanuel Berg
2024-07-21 11:53                             ` Christopher Dimech
2024-07-21 12:10                               ` Emanuel Berg
2024-07-21 12:27                                 ` Emanuel Berg
2024-07-21 12:46                                   ` Emanuel Berg
2024-07-21 13:03                                   ` Christopher Dimech
2024-07-21 13:17                                     ` Emanuel Berg
2024-07-21 14:33                                       ` Eli Zaretskii
2024-07-21 14:41                                         ` Christopher Dimech
2024-07-21 14:49                                           ` Eli Zaretskii
2024-07-21 14:58                                             ` Christopher Dimech
2024-07-21 15:02                                               ` Eli Zaretskii
2024-07-21 15:18                                                 ` Christopher Dimech
2024-07-21 13:18                                     ` Christopher Dimech
2024-07-21 13:26                                       ` Emanuel Berg
2024-07-21 14:35                                         ` Christopher Dimech
2024-07-21 19:28                                           ` Emanuel Berg
2024-07-21 19:33                                           ` Emanuel Berg
2024-07-21 19:51                                           ` Emanuel Berg
2024-07-21 20:01                                           ` Emanuel Berg
2024-07-21 20:17                                           ` Emanuel Berg
2024-07-21 12:41                                 ` Christopher Dimech
2024-07-21 13:13                                   ` Emanuel Berg
2024-07-21 13:41                                   ` Emanuel Berg
2024-07-21 12:20                               ` Emanuel Berg
2024-07-21 12:04                             ` Emanuel Berg
2024-07-21 14:30                 ` hypotenuse (was: Re: Adding a generic mathematical library) Emanuel Berg
2024-07-21 14:44                   ` Eli Zaretskii
2024-07-21 15:00                     ` Eli Zaretskii
2024-07-21 15:12                       ` Christopher Dimech
2024-07-21 15:42                         ` Eli Zaretskii
2024-07-21 16:13                           ` Christopher Dimech
2024-07-21 15:54                         ` hypotenuse Max Nikulin
2024-07-21 16:12                           ` hypotenuse Eli Zaretskii
2024-07-21 16:17                             ` hypotenuse Christopher Dimech
2024-07-21 19:01                     ` hypotenuse (was: Re: Adding a generic mathematical library) Emanuel Berg
2024-07-21 19:13                     ` Emanuel Berg
2024-07-21 17:38                   ` tomas
2024-07-17  7:09             ` Adding a generic mathematical library Michael Heerdegen via Emacs development discussions.
2024-07-17  7:54               ` Philip Kaludercic
2024-07-17  7:56               ` Michael Heerdegen via Emacs development discussions.
2024-07-18  6:07                 ` Emanuel Berg
2024-07-18  6:45                   ` Christopher Dimech
2024-07-18  7:12                     ` Emanuel Berg
2024-07-18  7:49                       ` Christopher Dimech
2024-07-21  4:56                         ` Emanuel Berg
2024-07-18  7:29                   ` Eli Zaretskii
2024-07-18  7:57                     ` Emanuel Berg
2024-07-18  9:03                       ` Eli Zaretskii
2024-07-21  4:52                         ` Emanuel Berg
2024-07-18  8:15                     ` Emanuel Berg
2024-07-18  9:04                       ` Eli Zaretskii
2024-07-18  9:13                       ` Christopher Dimech
2024-07-21  4:59                         ` Emanuel Berg
2024-07-19 13:22                       ` Emanuel Berg
2024-07-19 16:12                         ` Christopher Dimech
2024-07-19 16:15                           ` Stefan Kangas
2024-07-19 16:29                             ` Christopher Dimech
2024-07-19 16:16               ` Richard Stallman
2024-07-19 18:00                 ` Christopher Dimech
2024-07-17 21:26   ` Add elisa to GNU ELPA Sergey Kostyaev
2024-07-17 22:12     ` Philip Kaludercic
2024-07-18  3:45       ` Sergey Kostyaev
2024-07-18 11:06       ` Sergey Kostyaev
     [not found] <D57DBB96-82DE-4697-A358-032B04190724@gmail.com>
2024-03-09  9:03 ` Add elisa to gnu elpa Philip Kaludercic

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8734o9sdig.fsf@posteo.net \
    --to=philipk@posteo.net \
    --cc=emacs-devel@gnu.org \
    --cc=sskostyaev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).