all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* struct.el -- a package to encode/decode binary data
@ 2002-03-18 23:12 Kim F. Storm
  2002-03-19  0:25 ` Miles Bader
                   ` (3 more replies)
  0 siblings, 4 replies; 27+ messages in thread
From: Kim F. Storm @ 2002-03-18 23:12 UTC (permalink / raw)



While writing a package that sends and receives datagrams using the
new make-network-process functionality, I quickly found that I needed
to be able to encode and decode binary data structures, so I came up
with the following package (struct.el).

I'd like to hear if something like this already exists, or if others
find it should be added to emacs (with more complete documentation of
course).  [Also, the struct-pack function doesn't work with nested
data, but I'll fix that if there is an interest in this package].

++kfs

------------------------- struct.el --------------------
;;; struct.el --- basic data structure packing and unpacking.

;; Copyright (C) 2002 Free Software Foundation, Inc.

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;;  Packing and unpacking of (binary) data structures.
;;
;;  The data formats used in binary files and network protocols are
;;  often structed data which can be described by a C-style structure
;;  such as the one shown below.  Using the struct package, decoding
;;  and encoding binary data formats like these is made simple using a
;;  structure specification which closely resembles the C style
;;  structure declarations.
;;  
;;  Encoded (binary) data is stored in a unibyte string or vector,
;;  while the decoded data is stored in an alist with (FIELD . VALUE) 
;;  pairs.
;;

;;; Example:
  
;;  Consider the following C structures:
;;  
;;  struct header {
;;	unsigned long	dest_ip;
;;	unsigned long	src_ip;
;;	unsigned short	dest_port;
;;	unsigned short	src_port;
;;  };
;;  
;;  struct data {
;;	unsigned char	type;
;;	unsigned char	opcode;
;;	unsigned long	length;  /* In little endian order */
;;	unsigned char	id[8];   /* nul-terminated string  */
;;	unsigned char	data[/* (length + 3) & ~3 */];
;;  };
;;  
;;  struct packet {
;;	struct header	header;
;;	unsigned char	items;
;;	unsigned char   filler[3];
;;	struct data	item[/* items */];
;;  };
;;  
;;  The corresponding Lisp struct specification looks like this:
;;  
;;  (setq header-spec
;;    '((dest-ip   ip)
;;	(src-ip    ip)
;;	(dest-port u16)
;;	(src-port  u16)))
;;  
;;  (setq data-spec
;;    '((type      u8)
;;	(opcode	   u8)
;;	(length	   u16r)  ;; little endian order
;;	(id	   strz 8)
;;	(data	   vec (length))
;;	(align     4)))
;;  
;;  (setq packet-spec
;;    '((header    struct header-spec)
;;	(items	   u8)
;;	(fill 3)
;;	(item	   repeat (items)
;;		   ((struct data-spec)))))
;;  
;;
;;  A binary representation may look like
;;   [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0  
;;     2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0
;;     1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ]
;;  
;;  The corresponding decoded structure looks like
;;
;;      ((header
;;        (dest-ip   . [192 168 1 100])
;;        (src-ip    . [192 168 1 101])
;;        (dest-port . 284)
;;        (src-port  . 5408))
;;       (items . 2)
;;       (item ((data . [1 2 3 4 5])
;;      	(id . "ABCDEF")
;;      	(length . 5)
;;      	(opcode . 3)
;;      	(type . 2))
;;             ((data . [6 7 8 9 10 11 12])
;;      	(id . "BCDEFG")
;;      	(length . 7)
;;      	(opcode . 4)
;;      	(type . 1))))

;;; Code:

;; Helper functions for structure unpacking.
;; Relies on dynamic binding of RAW-DATA and POS

(eval-when-compile
  (defvar raw-data)
  (defvar pos))

(defun struct--unpack-u8 ()
  (prog1
      (if (stringp raw-data)
	  (string-to-char (substring raw-data pos (1+ pos)))
	(aref raw-data pos))
    (setq pos (1+ pos))))
    
(defun struct--unpack-u16 ()
  (let* ((a (struct--unpack-u8)) (b (struct--unpack-u8)))
    (+ (* a 256) b)))

(defun struct--unpack-u24 ()
  (let* ((a (struct--unpack-u16)) (b (struct--unpack-u8)))
    (+ (* a 256) b)))

(defun struct--unpack-u32 ()
  (let* ((a (struct--unpack-u16)) (b (struct--unpack-u16)))
    (+ (* a 65536) b)))

(defun struct--unpack-u16r ()
  (let* ((a (struct--unpack-u8)) (b (struct--unpack-u8)))
    (+ (* b 256) a)))

(defun struct--unpack-u24r ()
  (let* ((a (struct--unpack-u16r)) (b (struct--unpack-u8)))
    (+ (* b 65536) a)))

(defun struct--unpack-u32r ()
  (let* ((a (struct--unpack-u16r)) (b (struct--unpack-u16r)))
    (+ (* b 65536) a)))

(defun struct--unpack-item (type len)
  (if (eq type 'ip)
      (setq type 'vec len 4))
  (cond
   ((memq type '(u8 byte))
    (struct--unpack-u8))
   ((memq type '(u16 word short))
    (struct--unpack-u16))
   ((eq type 'u24)
    (struct--unpack-u24))
   ((memq type '(u32 dword long))
    (struct--unpack-u32))
   ((eq type 'u16r)
    (struct--unpack-u16r))
   ((eq type 'u24r)
    (struct--unpack-u24r))
   ((eq type 'u32r)
    (struct--unpack-u32r))
   ((eq type 'str)
    (let ((s (substring raw-data pos (+ pos len))))
      (setq pos (+ pos len))
      (if (stringp s) s
	(string-make-unibyte (concat s)))))
   ((eq type 'strz)
    (let ((i 0) s)
      (while (and (< i len) (/= (aref raw-data (+ pos i)) 0))
	(setq i (1+ i)))
      (setq s (substring raw-data pos (+ pos i)))
      (setq pos (+ pos len))
      (if (stringp s) s
	(string-make-unibyte (concat s)))))
   ((eq type 'vec)
    (let ((v (make-vector len 0)) (i 0))
      (while (< i len)
	(aset v i (struct--unpack-u8))
	(setq i (1+ i)))
      v))
   (t nil)))

(defun struct--unpack-group (spec)
  (let (result)
    (while spec
      (let* ((item (car spec))
	     (field (car item))
	     (type (nth 1 item))
	     (len (nth 2 item))
	     data)
	(cond 
	 ((eq field 'fill)
	  (setq pos (+ pos type)))
	 ((eq field 'align)
	  (while (/= (% pos type) 0)
	    (setq pos (1+ pos))))
	 ((eq field 'struct)
	  (setq result (append (struct--unpack-group (eval type)) result)))
	 ((eq type 'struct)
	  (setq data (struct--unpack-group (eval len)))
	  (setq result (cons (cons field data) result)))
	 (t
	  (if (consp len)
	      (setq len (apply 'struct-field result len)))
	  (if (not len)
	      (setq len 1))
	  (if (eq type 'repeat)
	      (let ((i 0))
		(while (< i len)
		  (setq data (cons (struct--unpack-group (nth 3 item)) data))
		  (setq i (1+ i)))
		(setq data (reverse data)))
	    (setq data (struct--unpack-item type len)))
	  (setq result (cons (cons field data) result))))
	(setq spec (cdr spec))))
      (reverse result)))

(defun struct-unpack (raw-data spec)
  "Unpack RAW-DATA according to struct specification SPEC."
  (let ((pos 0))
    (struct--unpack-group spec)))

(defun struct-field (struct &rest field)
  (while (and struct field)
    (setq struct (if (integerp (car field))
		     (nth (car field) struct)
		   (let ((val (assq (car field) struct)))
		     (if (consp val) (cdr val)))))
    (setq field (cdr field)))
  struct)



(defun struct-ip-to-string (ip)
  (format "%d.%d.%d.%d"
	  (aref ip 0) (aref ip 1) (aref ip 2) (aref ip 3)))

(defun struct-vector-to-hex (v)
  (let ((i 0) (len (length v)) s)
    (while (< i len)
      (setq s (cons (format ":%02x" (aref v i)) s)
	    i (1+ i)))
    (setq s (reverse s))
    (substring (apply 'concat s) 1)))


;; Pack structured data into raw-data

(defun struct--pack-u8 (v)
  (if v
      (char-to-string v)
    [0]))
    
(defun struct--pack-u16 (v)
  (if v
      (vector (% (/ v 256) 256)
	      (% v 256))
    [0 0]))

(defun struct--pack-u24 (v)
  (if v
      (vector (% (/ v 65536) 256)
	      (% (/ v 256) 256)
	      (% v 256))
    [0 0 0]))

(defun struct--pack-u32 (v)
  (if v
      (vector (% (/ v 16777216) 256)
	      (% (/ v 65536) 256)
	      (% (/ v 256) 256)
	      (% v 256))
    [0 0 0 0]))

(defun struct--pack-u16r (v)
  (if v
      (vector (% v 256)
	      (% (/ v 256) 256))
    [0 0]))

(defun struct--pack-u24r (v)
  (if v
      (vector (% v 256)
	      (% (/ v 256) 256)
	      (% (/ v 65536) 256))
    [0 0 0]))

(defun struct--pack-u32r (v)
  (if v
      (vector (% v 256)
	      (% (/ v 256) 256)
	      (% (/ v 65536) 256)
	      (% (/ v 16777216) 256))
    [0 0 0 0]))

(defun struct--pack-item (v type len)
  (if (eq type 'ip)
      (setq type 'vec len 4))
  (cond
   ((memq type '(u8 byte))
    (struct--pack-u8 v))
   ((memq type '(u16 word short))
    (struct--pack-u16 v))
   ((eq type 'u24)
    (struct--pack-u24 v))
   ((memq type '(u32 dword long))
    (struct--pack-u32 v))
   ((eq type 'u16r)
    (struct--pack-u16r v))
   ((eq type 'u24r)
    (struct--pack-u24r v))
   ((eq type 'u32r)
    (struct--pack-u32r v))
   ((memq type '(str strz vec))
    (let ((l (length v)))
      (if (>= l len)
	  (substring v 0 len)
	(concat v (make-vector (- len l) 0)))))
   (t 
    (make-vector len 0))))

(defun struct--pack-group (struct spec offset)
  (let (result)
    (while spec
      (let* ((item (car spec))
	     (field (car item))
	     (type (nth 1 item))
	     (len (nth 2 item))
	     data)
	(cond 
	 ((eq field 'fill)
	  (setq data (make-vector type 0)))
	 ((eq field 'align)
	  (let ((extra (- type (% (+ (length result) offset) type))))
	    (setq data (if (> extra 0) (make-vector extra 0)))))
	 ((eq field 'struct)
	  (setq result
		(append result
			(struct--pack-group struct (eval type)
					    (length result)))))
	 ((eq type 'struct)
	  (setq result
		(append result
			(struct--pack-group (struct-field struct field)
					    (eval len) (length result)))))
	 (t
	  (if (consp len)
	      (setq len (apply 'struct-field result len)))
	  (if (not len)
	      (setq len 1))
	  (if (eq type 'repeat)
	      (let ((i 0))
		(while (< i len)
		  (setq result
			(append result
				(struct--pack-group struct (nth 3 item)
						    (length result))))
		  (setq i (1+ i))))
	    (setq data (struct--pack-item (struct-field struct field) type len)))))
	(if data
	    (setq result (append result (list data)))))
      (setq spec (cdr spec)))
    result))

(defun struct-pack (struct spec)
  "Pack STRUCT according to struct specification SPEC."
  (string-make-unibyte
   (apply 'concat (struct--pack-group struct spec 0))))


(provide 'struct)


_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://mail.gnu.org/mailman/listinfo/emacs-devel


^ permalink raw reply	[flat|nested] 27+ messages in thread
[parent not found: <Pine.SUN.3.91.1020319133840.17982G-100000@is>]
* Re: struct.el -- a package to encode/decode binary data
@ 2002-03-22  1:25 Kenichi Handa
  2002-03-22  1:27 ` Stefan Monnier
  0 siblings, 1 reply; 27+ messages in thread
From: Kenichi Handa @ 2002-03-22  1:25 UTC (permalink / raw)
  Cc: eliz, monnier+gnu/emacs, storm, emacs-devel

"Stefan Monnier" <monnier+gnu/emacs@RUM.cs.yale.edu> writes:
> But now that I think about it, if 160-255 can be an eight-bit-graphic
> character, how does the code does with "backward-char" ?
> Looking at DEC_POS in charset.h I see that we do

> 	while (p > p_min && !CHAR_HEAD_P (*p)) p--;

Please don't skip the following four lines:

	len = pend + 1 - p;						\
	PARSE_MULTIBYTE_SEQ (p, len, bytes);				\
	if (bytes == len)						\
	  pos_byte -= len - 1;						\

which handles the above case.  When we at last reach a
char-head, PARSE_MULTIBYTE_SEQ checks how long the byte
sequence should be.  We update pos_byte only if the length
is the same as what we decreased.

---
Ken'ichi HANDA
handa@etl.go.jp

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://mail.gnu.org/mailman/listinfo/emacs-devel


^ permalink raw reply	[flat|nested] 27+ messages in thread
* Re: struct.el -- a package to encode/decode binary data
@ 2002-03-22  1:30 Kenichi Handa
  0 siblings, 0 replies; 27+ messages in thread
From: Kenichi Handa @ 2002-03-22  1:30 UTC (permalink / raw)
  Cc: eliz, monnier+gnu/emacs, storm, emacs-devel

I wrote:
> which handles the above case.  When we at last reach a
> char-head, PARSE_MULTIBYTE_SEQ checks how long the byte
> sequence should be.  We update pos_byte only if the length
> is the same as what we decreased.

Oops, it seems that I misunderstood what Stefan meant.  If
what he meant is about efficiency, yes, he is write.  It is
better that p_min is set to:
	max (p_min, p - MAX_MULTIBYTE_LENGTH)

---
Ken'ichi HANDA
handa@etl.go.jp

_______________________________________________
Emacs-devel mailing list
Emacs-devel@gnu.org
http://mail.gnu.org/mailman/listinfo/emacs-devel


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2002-03-23  2:36 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-03-18 23:12 struct.el -- a package to encode/decode binary data Kim F. Storm
2002-03-19  0:25 ` Miles Bader
2002-03-19  1:38 ` Re[1]: " Eric M. Ludlam
2002-03-19 12:56   ` Kim F. Storm
2002-03-19 13:26     ` Re[3]: " Eric M. Ludlam
2002-03-19  7:02 ` Eli Zaretskii
2002-03-19 12:34   ` Stefan Monnier
2002-03-19 14:38     ` Eli Zaretskii
2002-03-19 14:33 ` Luke Gorrie
     [not found] <Pine.SUN.3.91.1020319133840.17982G-100000@is>
2002-03-19 14:34 ` Kim F. Storm
2002-03-19 21:27   ` Thien-Thi Nguyen
2002-03-21  9:04   ` Richard Stallman
2002-03-21 11:15     ` Eli Zaretskii
2002-03-21 13:31       ` Kim F. Storm
2002-03-21 14:56         ` Eli Zaretskii
2002-03-21 15:36           ` Kim F. Storm
2002-03-21 16:58             ` Stefan Monnier
2002-03-21 19:45               ` Eli Zaretskii
2002-03-22  1:05                 ` Stefan Monnier
2002-03-22 11:04                   ` Eli Zaretskii
2002-03-22  0:37           ` Miles Bader
2002-03-23  2:35           ` Richard Stallman
2002-03-21 16:53         ` Stefan Monnier
2002-03-23  2:36       ` Richard Stallman
  -- strict thread matches above, loose matches on Subject: below --
2002-03-22  1:25 Kenichi Handa
2002-03-22  1:27 ` Stefan Monnier
2002-03-22  1:30 Kenichi Handa

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.