From: Arthur Miller <arthur.miller@live.com>
To: emacs-devel@gnu.org
Subject: empty-directory predicate, native implementation
Date: Tue, 13 Oct 2020 04:22:36 +0200 [thread overview]
Message-ID: <VI1PR06MB4526ACBABDE795DDD49A5A5896040@VI1PR06MB4526.eurprd06.prod.outlook.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1704 bytes --]
It is easy to check for an empty dir in elisp; we can just list files
and check if there is a list or not:
(null (directory-files directory-name nil nodots t)))
where nodots is just regex to omit dot files (from dired+).
But then this is quite inneficient. We are listing all files in each
dir since directory-files will return entire content of directory. Also
we are matching every filename to a regex just to eliminate first two.
Alternative would be to take length and see if it is > 2; but then we
would iterate whole list twice. So I can't see anything avialable in
dired/elisp and I think a predicate implemented in low-level is better solution.
We are really interested just to see if there is some file; so we can
just open dir, and read first few entries, if there is more then 2 files
(. and .. on *nix) we can just abort and return true.
I have tested an idea with getdents (Linux syscall) and I can see
difference. Attached is a patch for dired.c and a test file to play with
some benchmark.
In somewhat synthetic test where I just looped a "lisp" and "native"
predicate over a several hundred directories, I can see quite drammatic
difference in performance. On a directory with something about ~800
subdirs, native prediate takes ~0.002s while lisp predicate goes in ~0.01s.
I have made also a small test to mark empty dirs in dired, and there I
see some difference. On same directory, I get consistently
~2.4s for lisp version and ~2s for native version.
This isn't any kind of drammatic difference for most use; file I/O
is dominated by disk access anyway, but i still don't like to spend cpu
on unnecessary evaluations, so I wonder if we could get native predicate
in elisp?
[-- Attachment #2: dired-mark-empty.el --]
[-- Type: text/plain, Size: 2175 bytes --]
(require 'dired)
(defvar nodots "^\\([^.]\\|\\.\\([^.]\\|\\..\\)\\).*")
(defun dired-go-to-first ()
(interactive)
(goto-char (point-min))
(dired-next-line 1)
(skip-chars-forward " \n\t"))
(defun dired-go-to-last ()
(interactive)
(goto-char (point-max))
(dired-next-line -1)
(skip-chars-forward " \n\t"))
(defun dired-is-empty-p (directory-name)
(null (directory-files directory-name nil nodots t)))
(defun directory-number-files (directory-name &optional omit-filter)
(length (directory-files directory-name nil omit-filter t)))
(defun dired-mark-empty-dirs ()
(interactive)
(when (equal major-mode 'dired-mode)
(let ((curr-dir))
(save-excursion
(dired-go-to-first)
(while (not (eobp))
(setq curr-dir (dired-file-name-at-point))
(cond ((or (null curr-dir)
(string= curr-dir ".")
(string= curr-dir ".."))
;; do nothing here
)
((file-directory-p curr-dir)
(when (dired-is-empty-p curr-dir)
(dired-mark 1)
(dired-previous-line 1))))
(dired-next-line 1))))))
(defun dired-mark-empty-dirs-native ()
(interactive)
(when (equal major-mode 'dired-mode)
(let ((curr-dir))
(save-excursion
(dired-go-to-first)
(while (not (eobp))
(setq curr-dir (dired-file-name-at-point))
(cond ((or (null curr-dir)
(string= curr-dir ".")
(string= curr-dir ".."))
;; do nothing here
)
((file-directory-p curr-dir)
(when (directory-empty-p curr-dir)
(dired-mark 1)
(dired-previous-line 1))))
(dired-next-line 1))))))
;;(benchmark-run-compiled 10 (directory-empty-p "/some/directory/here"))
;;(benchmark-run-compiled 10 (dired-is-empty-p "/some/directory/here"))
;; must be in some dired buffer for this
;;(benchmark-run-compiled 10 (dired-mark-empty-dirs))
;;(benchmark-run-compiled 10 (dired-mark-empty-dirs-native))
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: dired.patch --]
[-- Type: text/x-patch, Size: 2749 bytes --]
--- src/dired.c 2020-10-13 04:08:36.028838472 +0200
+++ ../dired.c 2020-10-13 04:07:48.374572510 +0200
@@ -21,6 +21,7 @@
#include <config.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#ifdef HAVE_PWD_H
#include <pwd.h>
@@ -39,6 +40,7 @@
#include "systime.h"
#include "buffer.h"
#include "coding.h"
+#include "blockinput.h"
#ifdef MSDOS
#include "msdos.h" /* for fstatat */
@@ -929,7 +931,7 @@
struct stat s;
/* An array to hold the mode string generated by filemodestring,
- including its terminating space and null byte. */
+ including its terminating space and NUL byte. */
char modes[sizeof "-rwxr-xr-x "];
char *uname = NULL, *gname = NULL;
@@ -1078,6 +1080,50 @@
return groups;
}
+typedef struct dirent* pdirent;
+DEFUN ("directory-empty-p", Fdirectory_empty_p,
+ Sdirectory_empty_p, 1, 1, 0,
+ doc: /* Returns t if directory DIRNAME does not contain any
+ user files (special files . and .. are excluded
+ automatically), nil otherwise. */)
+(Lisp_Object dirname)
+{
+ #define BSIZE 1024
+ char buf[BSIZE];
+ const char* name;
+ int fd, n = 0, p = 0, c = 0;
+ pdirent d;
+
+ if(!STRINGP(dirname))
+ error("Directory name not a string object.");
+
+ dirname = Fexpand_file_name(dirname, Qnil);
+ name = SSDATA(dirname);
+
+ fd = open (name, O_RDONLY | O_DIRECTORY);
+
+ if( fd == -1 )
+ error("Can't open directory.");
+
+ //block_input();
+ /* 32-bit version of getdents should be good enough;
+ we are just looking at first 3 files*/
+ n = syscall(SYS_getdents,fd,buf, BSIZE);
+ if(n == -1)
+ error("Can't read directory data.");
+
+ while(p < n && c < 3) {
+ d = (pdirent) (buf + p);
+ p += d->d_reclen;
+ c++;
+ }
+ //unblock_input();
+
+ close(fd);
+ return (c > 2) ? Qnil : Qt;
+}
+
+
void
syms_of_dired (void)
{
@@ -1089,7 +1135,8 @@
DEFSYM (Qfile_attributes_lessp, "file-attributes-lessp");
DEFSYM (Qdefault_directory, "default-directory");
DEFSYM (Qdecomposed_characters, "decomposed-characters");
-
+ DEFSYM (Qdirectory_empty_p, "directory-empty-p")
+
defsubr (&Sdirectory_files);
defsubr (&Sdirectory_files_and_attributes);
defsubr (&Sfile_name_completion);
@@ -1098,6 +1145,7 @@
defsubr (&Sfile_attributes_lessp);
defsubr (&Ssystem_users);
defsubr (&Ssystem_groups);
+ defsubr (&Sdirectory_empty_p);
DEFVAR_LISP ("completion-ignored-extensions", Vcompletion_ignored_extensions,
doc: /* Completion ignores file names ending in any string in this list.
next reply other threads:[~2020-10-13 2:22 UTC|newest]
Thread overview: 71+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-13 2:22 Arthur Miller [this message]
2020-10-13 8:01 ` empty-directory predicate, native implementation Michael Albinus
2020-10-13 11:42 ` Arthur Miller
2020-10-13 13:16 ` Michael Albinus
2020-10-13 18:32 ` Arthur Miller
2020-10-13 18:39 ` Michael Albinus
2020-10-13 23:20 ` Arthur Miller
2020-10-14 9:19 ` Michael Albinus
2020-10-14 13:53 ` Arthur Miller
2020-10-13 14:48 ` Eli Zaretskii
2020-10-13 18:43 ` Arthur Miller
2020-10-13 19:12 ` Eli Zaretskii
2020-10-13 19:59 ` Arthur Miller
2020-10-14 14:08 ` Eli Zaretskii
2020-10-14 14:43 ` Arthur Miller
2020-10-13 18:44 ` Michael Albinus
2020-10-13 19:14 ` Eli Zaretskii
2020-10-13 20:08 ` Arthur Miller
2020-10-14 1:52 ` Arthur Miller
2020-10-14 9:21 ` Michael Albinus
2020-10-14 13:56 ` Arthur Miller
2020-10-14 14:41 ` Michael Albinus
2020-10-14 15:07 ` Arthur Miller
2020-10-14 15:53 ` Michael Albinus
2020-10-14 16:12 ` Eli Zaretskii
2020-10-14 16:21 ` Michael Albinus
2020-10-14 16:29 ` Eli Zaretskii
2020-10-15 5:53 ` Arthur Miller
2020-10-15 9:12 ` Michael Albinus
2020-10-15 11:33 ` Arthur Miller
2020-10-15 12:21 ` Michael Albinus
2020-10-15 13:29 ` Arthur Miller
2020-10-15 14:01 ` Arthur Miller
2020-10-15 14:41 ` Michael Albinus
2020-10-15 15:22 ` Arthur Miller
2020-10-16 23:31 ` Arthur Miller
2020-10-17 8:13 ` Michael Albinus
2020-10-17 19:03 ` Arthur Miller
2020-10-17 20:03 ` Drew Adams
2020-10-17 20:27 ` Arthur Miller
2020-10-17 21:18 ` Drew Adams
2020-10-17 22:06 ` Arthur Miller
2020-10-17 21:02 ` Arthur Miller
2020-10-17 21:27 ` Drew Adams
2020-10-17 21:58 ` Arthur Miller
2020-10-18 12:06 ` Michael Albinus
2020-10-18 2:47 ` Eli Zaretskii
2020-10-18 11:52 ` Michael Albinus
2020-10-18 16:15 ` Drew Adams
2020-10-18 16:43 ` Michael Albinus
2020-10-18 20:15 ` Stefan Monnier
2020-10-18 21:25 ` Drew Adams
2020-10-19 0:03 ` Arthur Miller
2020-10-18 22:21 ` Arthur Miller
2020-10-19 8:04 ` Michael Albinus
2020-10-19 14:01 ` Arthur Miller
2020-10-19 14:50 ` Michael Albinus
[not found] ` <VI1PR06MB45266BE5DFC72AEB27567A6C961E0@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <87a6wixoim.fsf@gmx.de>
[not found] ` <VI1PR06MB4526280D5B81531D06E58BC1961D0@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <87wnzev6i3.fsf@gmx.de>
[not found] ` <VI1PR06MB45264E1CB34EECE86672581C96100@VI1PR06MB4526.eurprd06.prod.outlook.com>
2020-11-02 17:02 ` Michael Albinus
2020-11-03 15:20 ` Arthur Miller
2020-10-15 13:38 ` Stefan Monnier
2020-10-16 23:33 ` Arthur Miller
2020-10-14 14:49 ` Arthur Miller
[not found] <<VI1PR06MB4526ACBABDE795DDD49A5A5896040@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<83y2ka18t7.fsf@gnu.org>
[not found] ` <<87y2kaj799.fsf@gmx.de>
[not found] ` <<83blh60wgr.fsf@gnu.org>
[not found] ` <<VI1PR06MB452688C9C71D5463D9497A2A96050@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<87h7qxjh7g.fsf@gmx.de>
[not found] ` <<VI1PR06MB45269B3924B44A555428F00596050@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<878sc8kgy8.fsf@gmx.de>
[not found] ` <<VI1PR06MB4526FDD3D3EB4867AF837C8F96050@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<87imbcls71.fsf@gmx.de>
[not found] ` <<83eem0zt0b.fsf@gnu.org>
[not found] ` <<87k0vsrd6m.fsf@gmx.de>
[not found] ` <<83a6wozs7h.fsf@gnu.org>
[not found] ` <<VI1PR06MB45267C7D83E77C3F307FF34E96020@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<87sgafq2e2.fsf@gmx.de>
[not found] ` <<AM6PR06MB4518BCD25B93987390D7D6D596020@AM6PR06MB4518.eurprd06.prod.outlook.com>
[not found] ` <<87h7qvptm3.fsf@gmx.de>
[not found] ` <<VI1PR06MB452605D66CDE84BAA25A257696030@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<871rhxp8we.fsf@gmx.de>
[not found] ` <<VI1PR06MB45261F3309D31EC7DEDE4C8B96000@VI1PR06MB4526.eurprd06.prod.outlook.com>
[not found] ` <<237bd21b-96c7-4433-a5bc-34b64a9f4250@default>
[not found] ` <<83ft6cs10u.fsf@gnu.org>
2020-10-18 4:05 ` Drew Adams
-- strict thread matches above, loose matches on Subject: below --
2020-10-18 21:13 Drew Adams
2020-10-18 22:15 ` Stefan Monnier
2020-10-19 7:54 ` Michael Albinus
2020-10-19 0:24 ` Arthur Miller
2020-10-19 0:37 ` Drew Adams
2020-10-19 2:15 ` Arthur Miller
2020-10-19 7:51 ` Michael Albinus
2020-10-19 15:25 ` Drew Adams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=VI1PR06MB4526ACBABDE795DDD49A5A5896040@VI1PR06MB4526.eurprd06.prod.outlook.com \
--to=arthur.miller@live.com \
--cc=emacs-devel@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).