unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Philip Kaludercic <philipk@posteo.net>
To: Harald Judt <h.judt@gmx.at>
Cc: emacs-devel@gnu.org
Subject: Re: [ELPA] New package: dired-duplicates
Date: Tue, 31 Oct 2023 12:21:51 +0000	[thread overview]
Message-ID: <875y2nugm8.fsf@posteo.net> (raw)
In-Reply-To: <aa5b8dfd-ca39-4663-b3ee-ba2c825dd0de@gmx.at> (Harald Judt's message of "Tue, 31 Oct 2023 11:34:24 +0100")

[-- Attachment #1: Type: text/plain, Size: 1422 bytes --]

Harald Judt <h.judt@gmx.at> writes:

> Hi,
>
> I have developed the following package and assigned copyright for it
> to the FSF and would now like to publish it to GNU ELPA (it has
> previously been published to MELPA):
>
> URL: https://codeberg.org/hjudt/dired-duplicates
>
> Description:
> This package helps to find duplicate files on local and remote filesystems.
> It is similar to the fdupes command-line utility but written in Emacs Lisp
> and should also work on every remote filesystem that TRAMP supports and
> where executable commands can be called remotely.  The only external
> requirement is a checksum program like md5 or sha256sum that generates a
> hash value from the contents of a file used for comparison, because Emacs
> cannot do that in a performance-efficient way.
>
> dired-duplicates works by first searching files of the same size, then
> invoking the calculation of the checksum for these files, and presents the
> grouped results in a Dired buffer that the user can work with similarly to
> a regular Dired buffer.
> ----
>
> Could someone help me with the next steps?

Sure, I can take care of adding the package to the archive, you'll only
have to bump the "Version" header when you want the package to be
released (and in the future as well, the commit that touches the
"Version" header triggers a new release).

In the meantime, here is some quick and superficial feedback on the code:


[-- Attachment #2: Type: text/plain, Size: 6202 bytes --]

diff --git a/dired-duplicates.el b/dired-duplicates.el
index d62af02..4af37a3 100644
--- a/dired-duplicates.el
+++ b/dired-duplicates.el
@@ -54,7 +54,6 @@
 (defcustom dired-duplicates-separate-results
   t
   "Boolean value indicating whether to separate results with new-lines."
-  :group 'dired-duplicates
   :tag "Separate results"
   :type 'boolean)
 
@@ -64,7 +63,6 @@
 
 The checksums will be used for comparison of files of the same
 size."
-  :group 'dired-duplicates
   :tag "Checksum executable"
   :type 'string)
 
@@ -73,10 +71,9 @@ size."
   "The comparison function used for sorting grouped results.
 
 The sorting can be in ascending (<) or descending (>) order."
-  :group 'dired-duplicates
   :tag "Ascending or descending file size sort order"
-  :type '(choice (const :tag "Ascending" :value <)
-                 (const :tag "Descending" :value >)))
+  :type '(choice (const :tag "Ascending" <)
+                 (const :tag "Descending" >)))
 
 (defcustom dired-duplicates-file-filter-functions
   nil
@@ -84,14 +81,12 @@ The sorting can be in ascending (<) or descending (>) order."
 
 A filter function must accept as its single argument the file and
 return boolean t if the file matches a criteria, otherwise nil."
-  :group 'dired-duplicates
   :tag "File filter functions"
   :type 'hook)
 
 (defcustom dired-duplicates-search-directories-recursively
   t
   "Search directories recursively."
-  :group 'dired-duplicates
   :tag "Search directories recursively"
   :type 'boolean)
 
@@ -103,21 +98,22 @@ return boolean t if the file matches a criteria, otherwise nil."
 
 The executable used is defined by `dired-duplicates-checksum-exec'."
   (let* ((default-directory (file-name-directory (expand-file-name file)))
-         (exec (executable-find dired-duplicates-checksum-exec t)))
+         (exec (executable-find dired-duplicates-checksum-exec t))
+	 (file (expand-file-name (file-local-name file))))
     (unless exec
-      (user-error "Checksum program %s not found in exec-path" exec))
-    (car (split-string
-          (shell-command-to-string
-           (concat exec " \"" (expand-file-name (file-local-name file)) "\""))
-          nil
-          t))))
+      (user-error "Checksum program %s not found in `exec-path'" exec))
+    (with-temp-buffer
+      (unless (zerop (call-process exec nil t nil file))
+	(error "Failed to start checksum program %s" exec))
+      (goto-char (point-min))
+      (if (looking-at "\\`[[:alnum:]]+")
+	  (match-string 0)
+	(error "Unexpected output from checksum program %s" exec)))))
 
 (defun dired-duplicates--apply-file-filter-functions (files)
   "Apply file filter functions to FILES, returning the resulting list."
-  (if (and dired-duplicates-file-filter-functions files)
-      (dolist (filter-func dired-duplicates-file-filter-functions files)
-        (setf files (cl-delete-if-not filter-func files)))
-    files))
+  (dolist (filter-func dired-duplicates-file-filter-functions files)
+    (setf files (cl-delete-if-not filter-func files))))
 
 (defun dired-duplicates--find-and-filter-files (directories)
   "Search below DIRECTORIES for duplicate files.
@@ -140,14 +136,14 @@ duplicate files as values."
                     (append (gethash size same-size-table) (list f)))
            finally
            (cl-loop for same-size-files being the hash-values in same-size-table
-                    if (> (length same-size-files) 1) do
+                    if (> (length same-size-files) 1) do ;see length>
                     (cl-loop for f in same-size-files
                              for checksum = (dired-duplicates-checksum-file f)
                              do (setf (gethash checksum checksum-table)
                                       (append (gethash checksum checksum-table) (list f)))))
            (cl-loop for same-files being the hash-value in checksum-table using (hash-key checksum)
                     do
-                    (if (> (length same-files) 1)
+                    (if (cdr same-files) ;(> (length same-files) 1) in O(1)
                         (setf (gethash checksum checksum-table)
                               (cons (file-attribute-size (file-attributes (car same-files)))
                                     (sort same-files #'string<)))
@@ -180,7 +176,6 @@ Currently, this simply adds a new-line after each results group."
                for len in lengths
                do
                (forward-line len)
-               ;; (forward-line len)
                (let ((inhibit-read-only t))
                  (beginning-of-line)
                  (unless (= (point) (point-max))
@@ -225,7 +220,7 @@ This is the same as `dired-do-flagged-delete', but calls
 
 (defvar dired-duplicates-map
   (let ((map (make-sparse-keymap)))
-    ;; workaround for Emacs bug #57565
+    ;; workaround for Emacs bug#57565
     (when (< emacs-major-version 29)
       (define-key map (kbd "x") 'dired-duplicates--do-flagged-delete)
       (define-key map (kbd "D") 'dired-duplicates--do-delete))
@@ -248,7 +243,7 @@ The results will be shown in a Dired buffer."
                                                default-directory)))
   (unless directories
     (user-error "Specify one or more directories to search in"))
-  (let* ((directories (if (listp directories) directories (list directories)))
+  (let* ((directories (if (listp directories) directories (list directories))) ;see `ensure-list'
          (truncated-dirs (truncate-string-to-width (string-join directories ", ") 40 0 nil t)))
     (message "Finding duplicate files in %s..." truncated-dirs)
     (if-let ((default-directory "/")
@@ -257,7 +252,7 @@ The results will be shown in a Dired buffer."
           (message "Finding duplicate files in %s completed." truncated-dirs)
           (dired (cons "/" (flatten-list results)))
           (set-keymap-parent dired-duplicates-map (current-local-map))
-          (setf (current-local-map) dired-duplicates-map)
+	  (use-local-map dired-duplicates-map)
           (setq-local dired-duplicates-directories directories)
           (setq-local revert-buffer-function 'dired-duplicates-dired-revert)
           (dired-duplicates--post-process-dired-buffer results))

[-- Attachment #3: Type: text/plain, Size: 44 bytes --]


> Regards,
> Harald

-- 
Philip Kaludercic

  reply	other threads:[~2023-10-31 12:21 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-31 10:34 [ELPA] New package: dired-duplicates Harald Judt
2023-10-31 12:21 ` Philip Kaludercic [this message]
2023-10-31 21:05   ` Harald Judt
2023-11-01  2:14     ` Michael Heerdegen
2023-11-01 15:16       ` [External] : " Drew Adams
2023-11-01 15:45         ` Eli Zaretskii
2023-11-01 16:28           ` Drew Adams
2023-11-01 16:47             ` Eli Zaretskii
2023-11-01 16:33         ` Philip Kaludercic
2023-11-01 11:32     ` Philip Kaludercic
2023-11-01 20:04       ` Harald Judt
2023-11-01 21:29         ` Philip Kaludercic
2023-11-02  8:44           ` Harald Judt
2023-11-03  8:19             ` Philip Kaludercic
2023-11-03 20:19               ` Harald Judt
2023-11-04 15:27                 ` Philip Kaludercic
2023-11-06  9:33                   ` Harald Judt
2023-11-10  8:37                     ` Philip Kaludercic
2023-11-10 10:02                       ` Harald Judt
2023-11-23  6:12                         ` Philip Kaludercic
2023-10-31 21:24   ` Harald Judt
2023-11-01 17:40     ` Stefan Kangas
2023-11-01  3:30   ` Eli Zaretskii
2023-11-01 13:53     ` Visuwesh
2023-11-01 14:12       ` Eli Zaretskii
2023-11-01 17:57         ` Philip Kaludercic
2023-11-01 19:24           ` Eli Zaretskii
2023-11-01 20:09             ` Harald Judt
2023-11-02  5:55               ` Eli Zaretskii
2023-11-08 20:29                 ` Harald Judt
2023-11-09  5:52                   ` Eli Zaretskii
2023-11-09  8:00                     ` Harald Judt
2023-11-09  8:38                       ` tomas
2023-11-09  8:48                         ` Eli Zaretskii
2023-11-09  8:43                       ` Eli Zaretskii
2023-11-09  8:53                         ` tomas
2023-11-09  9:18                         ` Harald Judt
2023-11-09 14:52                           ` Harald Judt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=875y2nugm8.fsf@posteo.net \
    --to=philipk@posteo.net \
    --cc=emacs-devel@gnu.org \
    --cc=h.judt@gmx.at \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).