unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Richard Hansen <rhansen@rhansen.org>
To: Eli Zaretskii <eliz@gnu.org>
Cc: 56048@debbugs.gnu.org, monnier@iro.umontreal.ca
Subject: bug#56048: [PATCH] bindat (strz): Null terminate fixed-length strings if there is room
Date: Sat, 18 Jun 2022 18:57:23 -0400	[thread overview]
Message-ID: <aa00f619-cefb-a64b-22a2-212c589467e6@rhansen.org> (raw)
In-Reply-To: <83r13mo50s.fsf@gnu.org>


[-- Attachment #1.1.1: Type: text/plain, Size: 1171 bytes --]

On 2022-06-18 02:32, Eli Zaretskii wrote:
>> Subject: [PATCH v2 1/2] ; bindat (strz): Move all pack logic to pack function
> 
> What is the motivation/rationale for this refactoring?

The attached revision updates the commit message to explain:

     ; bindat (strz): Move all pack logic to pack function

     Motivation/rationale:
       * Improve code readability.  Now `bindat--pack-strz` is used for all
         `strz` packing, not just variable-length `strz` packing.
       * Make it easier to change the behavior of fixed-length `strz`
         packing without also affecting the behavior of `str` packing.  (A
         future commit will modify `strz` to write a null terminator if
         there is room.)

> 
>> +When packing, a null terminator is written after the packed string if
>> +the length of the input string is less than @var{len}.
> 
> Since "length of a string" is highly ambiguous in Emacs, please always
> make a point of saying "@var{len} bytes" explicitly.  Byte length is
> something rarely seen or used in Emacs, so people must be informed
> about that each time.

Done; see attached revision.

Thanks,
Richard

[-- Attachment #1.1.2: v3-0001-bindat-strz-Move-all-pack-logic-to-pack-function.patch --]
[-- Type: text/x-patch, Size: 4124 bytes --]

From cbd138671c371e9919a2ed345d3579a8a163abc8 Mon Sep 17 00:00:00 2001
From: Richard Hansen <rhansen@rhansen.org>
Date: Sun, 29 May 2022 21:23:57 -0400
Subject: [PATCH v3 1/2] ; bindat (strz): Move all pack logic to pack function

Motivation/rationale:
  * Improve code readability.  Now `bindat--pack-strz` is used for all
    `strz` packing, not just variable-length `strz` packing.
  * Make it easier to change the behavior of fixed-length `strz`
    packing without also affecting the behavior of `str` packing.  (A
    future commit will modify `strz` to write a null terminator if
    there is room.)
---
 lisp/emacs-lisp/bindat.el | 49 ++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/lisp/emacs-lisp/bindat.el b/lisp/emacs-lisp/bindat.el
index 46e2a4901c..4a642bb9c5 100644
--- a/lisp/emacs-lisp/bindat.el
+++ b/lisp/emacs-lisp/bindat.el
@@ -440,20 +440,27 @@ bindat--pack-str
       (aset bindat-raw (+ bindat-idx i) (aref v i)))
     (setq bindat-idx (+ bindat-idx len))))
 
-(defun bindat--pack-strz (v)
+(defun bindat--pack-strz (len v)
   (let* ((v (string-to-unibyte v))
-         (len (length v)))
-    (dotimes (i len)
-      (when (= (aref v i) 0)
-        ;; Alternatively we could pretend that this was the end of
-        ;; the string and stop packing, but then bindat-length would
-        ;; need to scan the input string looking for a null byte.
-        (error "Null byte encountered in input strz string"))
-      (aset bindat-raw (+ bindat-idx i) (aref v i)))
-    ;; Explicitly write a null terminator in case the user provided a
-    ;; pre-allocated string to bindat-pack that wasn't zeroed first.
-    (aset bindat-raw (+ bindat-idx len) 0)
-    (setq bindat-idx (+ bindat-idx len 1))))
+         (vlen (length v)))
+    (if len
+        ;; When len is specified, behave the same as the str type
+        ;; since we don't actually add the terminating zero anyway
+        ;; (because we rely on the fact that `bindat-raw' was
+        ;; presumably initialized with all-zeroes before we started).
+        (bindat--pack-str len v)
+      (dotimes (i vlen)
+        (when (= (aref v i) 0)
+          ;; Alternatively we could pretend that this was the end of
+          ;; the string and stop packing, but then bindat-length would
+          ;; need to scan the input string looking for a null byte.
+          (error "Null byte encountered in input strz string"))
+        (aset bindat-raw (+ bindat-idx i) (aref v i)))
+      ;; Explicitly write a null terminator in case the user provided
+      ;; a pre-allocated string to `bindat-pack' that wasn't already
+      ;; zeroed.
+      (aset bindat-raw (+ bindat-idx vlen) 0)
+      (setq bindat-idx (+ bindat-idx vlen 1)))))
 
 (defun bindat--pack-bits (len v)
   (let ((bnum (1- (* 8 len))) j m)
@@ -482,7 +489,8 @@ bindat--pack-item
    ('u24r (bindat--pack-u24r v))
    ('u32r (bindat--pack-u32r v))
    ('bits (bindat--pack-bits len v))
-   ((or 'str 'strz) (bindat--pack-str len v))
+   ('str (bindat--pack-str len v))
+   ('strz (bindat--pack-strz len v))
    ('vec
     (let ((l (length v)) (vlen 1))
       (if (consp vectype)
@@ -699,18 +707,7 @@ bindat--type
                             ((numberp len) len)
                             ;; General expression support.
                             (t `(or ,len (1+ (length ,val)))))))
-    (`(pack . ,args)
-     ;; When len is specified, behave the same as the str type since we don't
-     ;; actually add the terminating zero anyway (because we rely on the fact
-     ;; that `bindat-raw' was presumably initialized with all-zeroes before we
-     ;; started).
-     (cond ; Same optimizations as 'length above.
-      ((null len) `(bindat--pack-strz . ,args))
-      ((numberp len) `(bindat--pack-str ,len . ,args))
-      (t (macroexp-let2 nil len len
-           `(if ,len
-                (bindat--pack-str ,len . ,args)
-              (bindat--pack-strz . ,args))))))))
+    (`(pack . ,args) `(bindat--pack-strz ,len . ,args))))
 
 (cl-defmethod bindat--type (op (_ (eql 'bits))  len)
   (bindat--pcase op
-- 
2.36.1


[-- Attachment #1.1.3: v3-0002-bindat-strz-Null-terminate-fixed-length-strings-i.patch --]
[-- Type: text/x-patch, Size: 5298 bytes --]

From cf65b56e537fb7bab2b8776dfa876fa56f1839bc Mon Sep 17 00:00:00 2001
From: Richard Hansen <rhansen@rhansen.org>
Date: Thu, 16 Jun 2022 15:21:57 -0400
Subject: [PATCH v3 2/2] bindat (strz): Null terminate fixed-length strings if
 there is room

* lisp/emacs-lisp/bindat.el (bindat--pack-strz): For fixed-length strz
fields, explicitly write a null terminator after the packed string if
there is room (bug#56048).
* doc/lispref/processes.texi (Bindat Types): Update documentation.
* test/lisp/emacs-lisp/bindat-tests.el (bindat-test--str-strz-prealloc):
Update tests.
---
 doc/lispref/processes.texi           | 31 ++++++++++++++--------------
 lisp/emacs-lisp/bindat.el            | 13 ++++++------
 test/lisp/emacs-lisp/bindat-tests.el | 12 +++++------
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/doc/lispref/processes.texi b/doc/lispref/processes.texi
index b9200aedde..9e0bd98a54 100644
--- a/doc/lispref/processes.texi
+++ b/doc/lispref/processes.texi
@@ -3509,23 +3509,24 @@ Bindat Types
 (but excluding) the null byte that terminated the input string.
 
 If @var{len} is provided, @code{strz} behaves the same as @code{str},
-but with one difference: when unpacking, the first null byte
-encountered in the packed string is interpreted as the terminating
-byte, and it and all subsequent bytes are excluded from the result of
-the unpacking.
+but with a couple of differences:
+
+@itemize @bullet
+@item
+When packing, a null terminator is written after the packed input
+string if the number of characters in the input string is less than
+@var{len}.
+
+@item
+When unpacking, the first null byte encountered in the packed string
+is interpreted as the terminating byte, and it and all subsequent
+bytes are excluded from the result of the unpacking.
+@end itemize
 
 @quotation Caution
-The packed output will not be null-terminated unless one of the
-following is true:
-@itemize
-@item
-The input string is shorter than @var{len} bytes and either no pre-allocated
-string was provided to @code{bindat-pack} or the appropriate byte in
-the pre-allocated string was already null.
-@item
-The input string contains a null byte within the first @var{len}
-bytes.
-@end itemize
+The packed output will not be null-terminated unless the input string
+is shorter than @var{len} bytes or it contains a null byte within the
+first @var{len} bytes.
 @end quotation
 
 @item vec @var{len} [@var{type}]
diff --git a/lisp/emacs-lisp/bindat.el b/lisp/emacs-lisp/bindat.el
index 4a642bb9c5..0ecac3d52a 100644
--- a/lisp/emacs-lisp/bindat.el
+++ b/lisp/emacs-lisp/bindat.el
@@ -443,11 +443,14 @@ bindat--pack-str
 (defun bindat--pack-strz (len v)
   (let* ((v (string-to-unibyte v))
          (vlen (length v)))
+    ;; Explicitly write a null terminator (if there's room) in case
+    ;; the user provided a pre-allocated string to `bindat-pack' that
+    ;; wasn't already zeroed.
+    (when (or (null len) (< vlen len))
+      (aset bindat-raw (+ bindat-idx vlen) 0))
     (if len
         ;; When len is specified, behave the same as the str type
-        ;; since we don't actually add the terminating zero anyway
-        ;; (because we rely on the fact that `bindat-raw' was
-        ;; presumably initialized with all-zeroes before we started).
+        ;; (except for the null terminator possibly written above).
         (bindat--pack-str len v)
       (dotimes (i vlen)
         (when (= (aref v i) 0)
@@ -456,10 +459,6 @@ bindat--pack-strz
           ;; need to scan the input string looking for a null byte.
           (error "Null byte encountered in input strz string"))
         (aset bindat-raw (+ bindat-idx i) (aref v i)))
-      ;; Explicitly write a null terminator in case the user provided
-      ;; a pre-allocated string to `bindat-pack' that wasn't already
-      ;; zeroed.
-      (aset bindat-raw (+ bindat-idx vlen) 0)
       (setq bindat-idx (+ bindat-idx vlen 1)))))
 
 (defun bindat--pack-bits (len v)
diff --git a/test/lisp/emacs-lisp/bindat-tests.el b/test/lisp/emacs-lisp/bindat-tests.el
index cc223ad14e..0c03c51e2e 100644
--- a/test/lisp/emacs-lisp/bindat-tests.el
+++ b/test/lisp/emacs-lisp/bindat-tests.el
@@ -172,14 +172,14 @@ bindat-test--str-strz-prealloc
                 ((((x str 2)) ((x . "a"))) . "ax")
                 ((((x str 2)) ((x . "ab"))) . "ab")
                 ((((x str 2)) ((x . "abc"))) . "ab")
-                ((,(bindat-type strz 1) "") . "xx")
-                ((,(bindat-type strz 2) "") . "xx")
-                ((,(bindat-type strz 2) "a") . "ax")
+                ((,(bindat-type strz 1) "") . "\0x")
+                ((,(bindat-type strz 2) "") . "\0x")
+                ((,(bindat-type strz 2) "a") . "a\0")
                 ((,(bindat-type strz 2) "ab") . "ab")
                 ((,(bindat-type strz 2) "abc") . "ab")
-                ((((x strz 1)) ((x . ""))) . "xx")
-                ((((x strz 2)) ((x . ""))) . "xx")
-                ((((x strz 2)) ((x . "a"))) . "ax")
+                ((((x strz 1)) ((x . ""))) . "\0x")
+                ((((x strz 2)) ((x . ""))) . "\0x")
+                ((((x strz 2)) ((x . "a"))) . "a\0")
                 ((((x strz 2)) ((x . "ab"))) . "ab")
                 ((((x strz 2)) ((x . "abc"))) . "ab")
                 ((,(bindat-type strz) "") . "\0x")
-- 
2.36.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2022-06-18 22:57 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-17 22:52 bug#56048: [PATCH] bindat (strz): Null terminate fixed-length strings if there is room Richard Hansen
2022-06-18  3:02 ` Richard Hansen
2022-06-18  6:32   ` Eli Zaretskii
2022-06-18 22:57     ` Richard Hansen [this message]
2022-06-21 20:44   ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2022-06-22 13:57     ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aa00f619-cefb-a64b-22a2-212c589467e6@rhansen.org \
    --to=rhansen@rhansen.org \
    --cc=56048@debbugs.gnu.org \
    --cc=eliz@gnu.org \
    --cc=monnier@iro.umontreal.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).