unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Alexander Pohoyda <alexander.pohoyda@gmx.net>
Subject: New mail-related routines
Date: Mon, 18 Oct 2004 23:57:45 +0200 (CEST)	[thread overview]
Message-ID: <200410182157.i9ILvjln000739@oak.pohoyda.family> (raw)

I've developed a list of functions which I find very useful.  These
are basic functions to deal with header fields in mail messages.  A
great deal of code in "mail" directory could eventually be simplified
using these functions.

I know that some functionality is very similar to one found in
lisp/mail/mailheader.el file, but my small library is more powerful
(it parses structured header fields) and is closer to normal text
manipulation routines (header field searching, sorting, other
processing, folding/unfolding).

As you can see, I have moved (and re-implemented) functions
`mail-text-start' and `mail-head-end' from sendmail.el file, and
function `rfc822-goto-eoh' from simple.el file.  I think they are
general-purpose mail functions are belong to mail-utils.el file.


I would very like to hear comments on this code.


Index: mail-utils.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/mail/mail-utils.el,v
retrieving revision 1.57
diff -u -r1.57 mail-utils.el
--- mail-utils.el	4 Mar 2004 17:02:13 -0000	1.57
+++ mail-utils.el	18 Oct 2004 21:15:03 -0000
@@ -352,7 +352,12 @@
 		    "\\|"
 		    (substring labels (match-end 0))))))
   labels)
+
 \f
+;;;
+;;; Date/Time
+;;;
+
 (defun mail-rfc822-time-zone (time)
   (let* ((sec (or (car (current-time-zone time)) 0))
 	 (absmin (/ (abs sec) 60)))
@@ -368,6 +373,353 @@
 	    (substring s (match-beginning 3) (match-end 3)) " "
 	    (mail-rfc822-time-zone time))))
 
+\f
+;;;
+;;; Some variables
+;;;
+
+;;; The -hf suffix means Header Field.
+
+(defconst mail-wsp-regexp "[\040\011]")
+(defconst mail-crlf-regexp "[\015]?[\012]")
+
+;; Header fields must be unfolded before using these regexps.  This
+;; agrees with the RFC 2822, section 2.2.3, last paragraph.
+
+;; Unstructured header fields
+(defconst mail-hf-name-regexp "[\041-\071\073-\176]+")
+(defconst mail-hf-body-regexp "[^\015\012]*")
+(defconst mail-hf-regexp
+  (format "^\\(%s\\)%s*:%s*\\(%s\\)%s*\\(%s\\)?"
+	  mail-hf-name-regexp mail-wsp-regexp mail-wsp-regexp
+	  mail-hf-body-regexp mail-wsp-regexp mail-crlf-regexp))
+
+;; Structured header fields
+(defconst mail-hf-value-itself-regexp "[^;\040\011]*")
+(defconst mail-hf-value-regexp
+  (format "\\(%s\\)%s*"
+	  mail-hf-value-itself-regexp mail-wsp-regexp))
+
+(defconst mail-hf-param-name-regexp "[^=]+")
+(defconst mail-hf-param-value-regexp "\"\\([^\"]*\\)\"\\|\\([^\";\040\011]*\\)")
+(defconst mail-hf-param-regexp
+  (format ";%s*\\(%s\\)=\\(%s\\)"
+	  mail-wsp-regexp
+	  mail-hf-param-name-regexp mail-hf-param-value-regexp))
+
+;; Not used
+(defconst mail-hf-structured-regexp
+  (format "^\\(%s\\)%s*:%s*\\(%s\\)%s*\\(%s\\)*\\(%s\\)?"
+	  mail-hf-name-regexp mail-wsp-regexp mail-wsp-regexp
+	  mail-hf-value-itself-regexp mail-wsp-regexp
+	  mail-hf-param-regexp mail-crlf-regexp))
+
+\f
+;;;
+;;; General-purpose mail functions
+;;;
+
+;; Moved from sendmail.el
+(defun mail-text-start ()
+  "Return the buffer location of the start of text, as a number."
+  (save-restriction
+    (widen)
+    (mail-body-start-position)))
+
+(defun mail-body-start-position (&optional from to)
+  "Return a position where the body of a message starts.
+
+If called without arguments, the current buffer is assumed to be
+narrowed to exactly one message.
+
+This function may also be used to get the body start position of
+a MIME entity in the region between FROM and TO."
+  (let ((from (or from (point-min)))
+	(to (or to (point-max))))
+    (save-excursion
+      (goto-char from)
+      (save-match-data
+	(if (or (search-forward (concat "\n" mail-header-separator "\n") to t)
+		(search-forward "\n\n" to t))
+	    (point)
+	  ;; TODO: Shouldn't we return nil instead?
+	  (message "This entity has no body")
+	  to)))))
+
+;; Moved from simple.el
+(defun rfc822-goto-eoh ()
+  "Go to header delimiter line in a mail message, following RFC822 rules."
+  (goto-char (mail-header-end-position)))
+
+(defalias 'mail-rfc822-goto-eoh 'rfc822-goto-eoh)
+
+;; Moved from sendmail.el
+(defun mail-header-end ()
+  "Return the buffer location of the end of headers, as a number."
+  (save-restriction
+    (widen)
+    (mail-header-end-position)))
+
+(defun mail-header-end-position (&optional from to)
+  "Return a position where the header of a message ends.
+
+If called without arguments, the current buffer is assumed to be
+narrowed to exactly one message.
+
+This function may also be used to get the header end position of
+a MIME entity in the region between FROM and TO."
+  (save-excursion
+    (goto-char (mail-body-start-position from to))
+    (forward-line -1)
+    (point)))
+
+;; TODO: to be refined and extended
+(defun mail-token-p (candidate)
+  "Return t if the CANDIDATE is a valid token."
+  (not (or (string-match mail-wsp-regexp candidate)
+	   (string-match "[=?]" candidate))))
+
+\f
+;;;
+;;; Header field functions
+;;;
+
+(defsubst mail-make-hf (name body)
+  "Return \"NAME: BODY\" string."
+  (when name (concat name ": " body)))
+
+(defsubst mail-insert-hf (header-field)
+  (when header-field (insert header-field "\n")))
+
+(defun mail-make-hf-param (attribute value)
+  "Return and \"ATTRIBUTE=VALUE\" string.
+The VALUE is quoted if it contains SPACE, CTLs, or TSPECIALs."
+  (if (mail-token-p attribute)
+      ;; valid ATTRIBUTE
+      (if (mail-token-p value)
+	  ;; the VALUE is a token
+	  (concat attribute "=" value)
+	;; the VALUE must be quoted
+	(concat attribute "=" (format "%S" value)))
+    ;; the ATTRIBUTE contains invalid characters
+    (error "Invalid attribute.")))
+
+(defun mail-parse-hf (header-field)
+  "Parse the HEADER-FIELD and return a list of type
+\(HF-NAME (HF-VALUE ((HF-ATTR1-NAME . HF-ATTR1-VALUE) (...))))
+if a header field is structured, or
+\(HF-NAME (HF-BODY nil))
+for unstructured header field."
+  (when header-field
+    (let ((name (mail-get-hf-name header-field))
+	  (body (mail-get-hf-body header-field)))
+      (when name
+	(list name
+	      (when (and body (string-match mail-hf-value-regexp body))
+		(list (match-string 1 body)
+		      (mail-parse-hf-parameters
+		       (substring body (match-end 1))))))))))
+
+(defun mail-parse-hf-parameters (header-field)
+  "Parse the HEADER-FIELD and return a list of type
+\((HF-ATTR1-NAME . HF-ATTR1-VALUE) (...))."
+  (when (and header-field
+	     (string-match mail-hf-param-regexp header-field))
+    (cons (cons (match-string 1 header-field)
+		(or (match-string 3 header-field)
+		    (match-string 2 header-field)))
+	  (mail-parse-hf-parameters
+	   (substring header-field (match-end 2))))))
+
+(defun mail-recreate-hf (hf-list)
+  "Return a header field recreated from the HF-LIST."
+  (when hf-list
+    (mail-make-hf
+     (car hf-list)
+     (let ((body (caar (cdr hf-list)))
+	   (hf-params (cadr (cadr hf-list))))
+       (dolist (part hf-params body)
+	 (let ((attribute (car-safe part))
+	       (value (cdr-safe part)))
+	   (setq body
+		 (concat body "; "
+			 (mail-make-hf-param attribute value)))))))))
+
+(defun mail-search-hf (name &optional from to)
+  "Find a header field named NAME in the message header.
+Set point at the beginning of the field found, and return point.
+If the header field is not found, do not move the point and return nil.
+
+The argument FROM defaults to `point-min' and the argument TO is
+set to be the message header end."
+  (let ((found nil)
+	(case-fold-search t)
+	(from (or from (point-min)))
+	(to (or to (mail-header-end-position from (point-max)))))
+    (save-excursion
+      (goto-char from)
+      (save-match-data
+	(when (re-search-forward (concat "^" name ":") to t)
+	  (setq found (point-at-bol)))))
+    (when found (goto-char found))))
+
+(defun mail-hf-body-position ()
+  "Return a position where the current header field body starts."
+  (save-excursion
+    (save-match-data
+      (re-search-forward (format ":\\(%s*\\)" mail-wsp-regexp) nil t))))
+
+(defun mail-hf-end-position ()
+  "Return a position where the current header field ends."
+  (save-excursion
+    (save-match-data
+      (while (progn
+	       (forward-line)
+	       (looking-at (format "%s+" mail-wsp-regexp))))
+      (point))))
+
+(defun mail-get-hf-at-point ()
+  "Return the header field at point."
+  (buffer-substring-no-properties (point) (mail-hf-end-position)))
+
+(defun mail-get-hf (name &optional from to)
+  "Return the whole header field called NAME as a string.
+
+The argument FROM defaults to `point-min' and the argument TO is
+set to be the message header end.
+
+The trailing CRLF is also included."
+  (save-excursion
+    (when (mail-search-hf name from to)
+      (mail-get-hf-at-point))))
+
+(defun mail-get-hf-name (header-field)
+  "Return the name of the HEADER-FIELD."
+  (when header-field
+    (save-match-data
+      (setq header-field (mail-unfold-hf header-field))
+      (when (string-match mail-hf-regexp header-field)
+	(match-string-no-properties 1 header-field)))))
+
+(defun mail-get-hf-body (header-field)
+  "Return the body of the HEADER-FIELD."
+  (when header-field
+    (save-match-data
+      (setq header-field (mail-unfold-hf header-field))
+      (when (string-match mail-hf-regexp header-field)
+	(match-string-no-properties 2 header-field)))))
+
+(defun mail-get-hf-value (header-field)
+  "Return the value of the HEADER-FIELD."
+  (when header-field
+    (caar (cdr (mail-parse-hf header-field)))))
+
+(defun mail-get-hf-attribute (header-field attr-name)
+  "Return the attribute ATTR-NAME from the HEADER-FIELD."
+  (when header-field
+    (let ((attribute-list (cadr (cadr (mail-parse-hf header-field))))
+	  attribute)
+      (while (and (setq attribute (car attribute-list))
+		  (not (string-equal (upcase attr-name)
+				     (upcase (car attribute)))))
+	(setq attribute-list (cdr attribute-list)))
+      (cdr attribute))))
+
+(defun mail-process-hfs-in-region (from to func)
+  "Enumerate all header fields in the region between FROM and TO and
+call FUNC on them."
+  (save-excursion
+    (goto-char from)
+    (save-restriction
+      (narrow-to-region from to)
+      ;; RFC 2822, section 2.2.3.
+      (while (re-search-forward "^[^ \t]+:" nil t)
+	(beginning-of-line)
+	;;(message "Processing `%s' header..."
+	;;	 (mail-get-hf-name (mail-get-hf-at-point)))
+	(funcall func (point) (mail-hf-end-position))
+	;; Goto next header field
+	(goto-char (mail-hf-end-position)))
+      (- (point-max) from))))
+
+(defun mail-sort-hfs-in-region (from to sort-list)
+  "Sort header fields in the region between FROM and TO, using
+SORT-LIST as a sequence."
+  (save-excursion
+    (goto-char from)
+    (save-restriction
+      (narrow-to-region from to)
+      ;; Do the job.
+      (let ((my-pos (point))
+	    my-hf)
+	(dolist (sorted-hf sort-list)
+	  ;;(message "Sorting `%s' header..." sorted-hf)
+	  (when (mail-search-hf sorted-hf)
+	    (setq my-hf (mail-get-hf-at-point))
+	    (delete-region (point) (mail-hf-end-position))
+	    (goto-char my-pos)
+	    (insert my-hf)
+	    (setq my-pos (point))))))))
+
+(defun mail-fold-hf (header-field)
+  (when header-field
+    (with-temp-buffer
+      ;;(message "Header to fold:\n%s" header-field)
+      (insert header-field)
+      (mail-fold-region (point-min) (point-max))
+      (buffer-string))))
+
+(defun mail-fold-region (from to &optional limit)
+  "Fold header fields in the region between FROM and TO,
+as defined by RFC 2822.
+LIMIT defaults to 76."
+  (save-excursion
+    (goto-char from)
+    (save-restriction
+      (narrow-to-region from to)
+      (let ((limit (or limit 76))
+	    start)
+	(while (not (eobp))
+	  (setq start (point))
+	  (goto-char (min (+ (point) (- limit (current-column)))
+			  (point-at-eol)))
+	  (if (and (>= (current-column) limit)
+		   (re-search-backward "[ \t]" start t)
+		   (not (looking-at "\n[ \t]")))
+	      ;; Insert line break
+	      (progn
+		(delete-char 1)
+		(insert-char ?\n 1)         ;; CRLF
+		(insert-char ?\t 1))        ;; WSP
+	    (if (re-search-backward "[ \t]" start t)
+		(forward-line)
+	      ;; Token is too long, so we skip it
+	      (re-search-forward "[ \t]" nil t)
+	      (backward-char)
+	      (delete-char 1)
+	      (insert-char ?\n 1)
+	      (insert-char ?\t 1))))))))
+
+(defun mail-unfold-hf (header-field)
+  (when header-field
+    (with-temp-buffer
+      ;;(message "Header to unfold:\n%s" header-field)
+      (insert header-field)
+      (mail-unfold-region (point-min) (point-max))
+      (buffer-string))))
+
+(defun mail-unfold-region (from to)
+  "Unfold header fields in the region between FROM and TO, 
+as defined by RFC 2822."
+  (save-excursion
+    (goto-char from)
+    (save-restriction
+      (narrow-to-region from to)
+      (save-match-data
+	(while (re-search-forward
+		(format "%s%s+" mail-crlf-regexp mail-wsp-regexp) nil t)
+	  (replace-match " " nil t))))))
+
 (provide 'mail-utils)
 
 ;;; arch-tag: b24aec2f-fd65-4ceb-9e39-3cc2827036fd



-- 
Alexander Pohoyda <alexander.pohoyda@gmx.net>
PGP Key fingerprint: 7F C9 CC 5A 75 CD 89 72  15 54 5F 62 20 23 C6 44

             reply	other threads:[~2004-10-18 21:57 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-18 21:57 Alexander Pohoyda [this message]
2004-10-18 22:12 ` New mail-related routines Stefan Monnier
2004-10-19  7:06   ` Alexander Pohoyda
2004-10-19 12:51     ` Stefan Monnier
2004-10-19 18:37     ` Alexander Pohoyda
2004-10-19 19:29       ` Stefan Monnier
2004-10-19 23:56         ` Alexander Pohoyda
2004-10-19 12:32 ` Reiner Steib
2004-10-19 17:47   ` Alexander Pohoyda
2004-10-19 20:02     ` Reiner Steib
2004-10-20  0:03       ` Alexander Pohoyda
2004-10-24 12:03 ` Simon Josefsson
2004-10-25 22:15   ` Alexander Pohoyda
2004-10-25 22:43   ` Alexander Pohoyda
2004-10-26 23:16     ` Kevin Rodgers
2004-10-27 16:04       ` Alexander Pohoyda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200410182157.i9ILvjln000739@oak.pohoyda.family \
    --to=alexander.pohoyda@gmx.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).