all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
blob 51848fbae8c3e8d2c9d5b0dd8d166b21163f5f7b 9975 bytes (raw)
name: test/manual/regexp/regexp-perf.el 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
 
;; -*- lexical-binding:t -*-

;; Copyright (C) 2024 Free Software Foundation, Inc.

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

;;; Code:
(require 'rx)

\f
;;; helpers
(defconst perf-buffer-base-len 100
  "Length multiple of temp buffer to generate for search tests.")

(defconst perf-string-base-len 100
  "Length multiple of string to generate for search tests.")

(defmacro perf-with-random-buffer (size &rest body)
  "Generate a temp buffer with SIZE random ASCII chars, executing BODY."
  (declare (indent 1) (debug (fixnum &rest form)))
  `(with-temp-buffer
     (perf-insert-random ,size)
     (goto-char (point-min))
     ,@body))

(defconst perf-regexp-short-base-len 2
  "Length of components for short regexps to generate for search tests.")

(defconst perf-regexp-long-base-len 40
  "Length of components for long regexps to generate for search tests.")

(defun perf-generate-random-alternation-pattern (case-len)
  (rx-to-string
   `(|
     (literal ,(perf-random-string case-len))
     (literal ,(perf-random-string case-len)))
   t))

(defun perf-generate-random-grouped-pattern (component-len)
  (rx-to-string
   `(:
     (literal ,(perf-random-string component-len))
     (? (group (| (literal ,(perf-random-string component-len))
                  (literal ,(perf-random-string component-len)))))
     (literal ,(perf-random-string component-len)))
   t))

(defun perf-generate-regexp-strings (n base-len kind)
  (cl-loop
   with v = (make-vector n nil)
   with pattern-fun =
   (cl-ecase kind
     (alternation #'perf-generate-random-alternation-pattern)
     (grouped #'perf-generate-random-grouped-pattern))
   for el across-ref v
   for r = (funcall pattern-fun base-len)
   do (setf el r)
   finally return v))

(defun perf-compile-regexps (regexp-strings)
  (cl-loop with v = (make-vector (length regexp-strings) nil)
           for el across-ref v
           for r across regexp-strings
           do (setf el (make-regexp r))
           finally return v))

(defconst perf-num-few-regexp-patterns-to-generate 4
  "A small number of regexp patterns to try one after another.")

(defconst perf-num-many-regexp-patterns-to-generate 30
  "A large number of regexp patterns to try one after another.")

(defconst perf-num-regexp-match-loops 60
  "Number of times to try matching each regexp in a search test.")

(defmacro perf-define-parameterized-regexp-strings-test
    (base-name args &optional doc &rest body)
  "Define a set of test cases with varying types of generated
regexp patterns."
  (declare (indent 2) (debug defun))
  (unless (and (consp args)
               (= (length args) 2))
    (error "Base function %s should accept exactly two arguments."
           base-name))
  (let ((all-variants
         '((short-patterns/few-patterns/alternation perf-regexp-short-base-len perf-num-few-regexp-patterns-to-generate 'alternation)
           (short-patterns/few-patterns/grouped perf-regexp-short-base-len perf-num-few-regexp-patterns-to-generate 'grouped)
           (short-patterns/many-patterns/alternation perf-regexp-short-base-len perf-num-many-regexp-patterns-to-generate 'alternation)
           (short-patterns/many-patterns/grouped perf-regexp-short-base-len perf-num-many-regexp-patterns-to-generate 'grouped)
           (long-patterns/few-patterns/alternation perf-regexp-long-base-len perf-num-few-regexp-patterns-to-generate 'alternation)
           (long-patterns/few-patterns/grouped perf-regexp-long-base-len perf-num-few-regexp-patterns-to-generate 'grouped)
           (long-patterns/many-patterns/alternation perf-regexp-long-base-len perf-num-many-regexp-patterns-to-generate 'alternation)
           (long-patterns/many-patterns/grouped perf-regexp-long-base-len perf-num-many-regexp-patterns-to-generate 'grouped))))
    `(progn
       ,@(cl-loop
          with base-str = (symbol-name base-name)
          with (rx-str-arg n-arg) = args
          for (ext-name regexp-len-sym num-patterns-sym kind) in all-variants
          for ext-str = (symbol-name ext-name)
          for full-name = (intern (format "%s/%s" base-str ext-str))
          collect `(perf-define-variable-test
                       ,full-name (,n-arg) ,doc
                       (let ((,rx-str-arg
                              (perf-generate-regexp-strings
                               ,num-patterns-sym
                               ,regexp-len-sym
                               ,kind)))
                         ,@body))))))

(defmacro perf-define-parameterized-compile-regexp-test
    (base-name args &optional doc &rest body)
  "Define a pair of test cases with pre-compiled regexp patterns as well
as raw strings (which get compiled and cached on the fly).

NB: compilation time via `perf-compile-regexps' ('compile-fun' in the
implementation) is *not* tracked in these generated benchmark tests,
while any just-in-time regex compilation from pattern strings *is*
tracked in these benchmark timings. This is intentional."
  (declare (indent 2) (debug defun))
  (unless (and (consp args)
               (= (length args) 2))
    (error "Base function %s should accept exactly two arguments."
           base-name))
  (let ((all-variants
         '((compiled . perf-compile-regexps)
           (no-compile . nil))))
    `(progn
       ,@(cl-loop
          with base-str = (symbol-name base-name)
          with (rx-str-arg n-arg) = args
          for (ext-name . maybe-compile-fun) in all-variants
          for ext-str = (symbol-name ext-name)
          for full-name = (intern (format "%s/%s" base-str ext-str))
          collect `(perf-define-parameterized-regexp-strings-test
                       ,full-name (,rx-str-arg ,n-arg) ,doc
                       ,@(if maybe-compile-fun
                             `((let ((,rx-str-arg
                                      (,maybe-compile-fun ,rx-str-arg)))
                                 ,@body))
                           body))))))

\f
;; +============================================================+
;; | Matching performance tests without recording any match data.
;; +============================================================+

(perf-define-parameterized-compile-regexp-test
    perf-match/no-match-data/buffer (regexps n)
  "Generate random regexps and repeatedly regex search a random buffer."
  (perf-with-random-buffer (* n perf-buffer-base-len)
    (benchmark-run perf-num-regexp-match-loops
      (cl-loop for r across regexps
               do (save-excursion
                    (re-search-forward r nil t nil t))))))

(perf-define-parameterized-compile-regexp-test
    perf-match/no-match-data/string (regexps n)
  "Generate random regexps and repeatedly regex search a random string."
  (let ((haystack (perf-random-string (* n perf-string-base-len))))
    (benchmark-run perf-num-regexp-match-loops
      (cl-loop for r across regexps
               do (string-match r haystack nil t)))))

\f
;; +============================================================+
;; | Match data manipulation.
;; +============================================================+
(defconst perf-num-match-data-loops 600
  "Number of times to extract and reset match data in a test.")

(defun perf-generate-simple-consecutive-groups-pattern (num-groups)
  "Create a regex pattern with NUM-GROUPS subexpressions, each matching
a single '.' (any character except newline)."
  (rx-to-string
   (cl-loop for i from 1 upto num-groups
            collecting '(group not-newline) into r
            finally return `(: ,@r))
   t))

(perf-define-variable-test perf-match/match-data/string/legacy (n)
  (let* ((haystack (perf-random-string (* n perf-string-base-len)))
         (num-groups (max n 4))
         (r (perf-generate-simple-consecutive-groups-pattern num-groups))
         (m '(0 0)))
    (benchmark-run perf-num-match-data-loops
      (cl-assert (string-match r haystack nil nil))
      (match-data t m nil)
      (cl-assert (and (= (cl-first m) 0)
                      (= (cl-second m) (1- (length haystack)))
                      (= (cl-third m) 0)
                      (= (cl-fourth m) 1)))
      (cl-assert (string-match "" haystack))
      (match-data t m nil)
      (cl-assert (and (= (cl-first m) 0)
                      (= (cl-second m) 0)
                      (null (cl-third m)))))))

(perf-define-variable-test perf-match/match-data/string/match-vectors (n)
  (let* ((haystack (perf-random-string (* n perf-string-base-len)))
         (num-groups (max n 4))
         (r (make-regexp
             (perf-generate-simple-consecutive-groups-pattern num-groups)))
         (r-blank (make-regexp ""))
         (starts (match-allocate-results r))
         (ends (match-allocate-results r)))
    (benchmark-run perf-num-match-data-loops
      (cl-assert (string-match r haystack nil nil))
      (match-extract-starts r starts)
      (match-extract-ends r ends)
      (cl-assert (= (length starts) (match-num-registers r)))
      (cl-assert (= (length ends) (match-num-registers r)))
      (cl-assert (and (= (aref starts 0) 0)
                      (> (aref ends 0) 0)
                      (> (aref starts 1) (aref ends 0))))
      (cl-assert (string-match r-blank haystack nil nil))
      (match-extract-starts r-blank starts)
      (match-extract-ends r-blank ends)
      (cl-assert (and (= (aref starts 0) 0)
                      (= (aref ends 0) 0))))))

debug log:

solving 51848fbae8c ...
found 51848fbae8c in https://yhetil.org/emacs/1VzhyYQmzkY8ZudqyPNciqngpUIHE7bpzCHQJQJhmJUvyq0OSOCMOKiAOS3s7FVO67oKaW-67ZrLQhvT_4Pth1TlHl8Z9XfGEvV8WD16m0c=@pm.me/ ||
	https://yhetil.org/emacs/obyPxjc1-gUH7aZNNaGIIzybT6m7sLarevWp3z6KXBhb4rCC9G_FzTOuPtDs2saC3vQOVoMNPO6Bn3UQFa5KvbgtbNUjefNA3ArTasTnDy4=@hypnicjerk.ai/ ||
	https://yhetil.org/emacs/2LOLmIp1X8w4CGbqq3qDrzmKVA0KzYNL1N9lBtWdB-MtEv9oCuYgJMYprG170wMPjYxeQImAmWOPatGTTl4KxZMlptNo9A9hnHt84vdN9EA=@hypnicjerk.ai/

applying [1/1] https://yhetil.org/emacs/1VzhyYQmzkY8ZudqyPNciqngpUIHE7bpzCHQJQJhmJUvyq0OSOCMOKiAOS3s7FVO67oKaW-67ZrLQhvT_4Pth1TlHl8Z9XfGEvV8WD16m0c=@pm.me/
diff --git a/test/manual/regexp/regexp-perf.el b/test/manual/regexp/regexp-perf.el
new file mode 100644
index 00000000000..51848fbae8c

Checking patch test/manual/regexp/regexp-perf.el...
Applied patch test/manual/regexp/regexp-perf.el cleanly.

skipping https://yhetil.org/emacs/obyPxjc1-gUH7aZNNaGIIzybT6m7sLarevWp3z6KXBhb4rCC9G_FzTOuPtDs2saC3vQOVoMNPO6Bn3UQFa5KvbgtbNUjefNA3ArTasTnDy4=@hypnicjerk.ai/ for 51848fbae8c
skipping https://yhetil.org/emacs/2LOLmIp1X8w4CGbqq3qDrzmKVA0KzYNL1N9lBtWdB-MtEv9oCuYgJMYprG170wMPjYxeQImAmWOPatGTTl4KxZMlptNo9A9hnHt84vdN9EA=@hypnicjerk.ai/ for 51848fbae8c
index at:
100644 51848fbae8c3e8d2c9d5b0dd8d166b21163f5f7b	test/manual/regexp/regexp-perf.el

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.