1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
| | ;;; erc-d-i.el --- IRC helpers for ERC test server -*- lexical-binding: t -*-
;; Copyright (C) 2020-2021 Free Software Foundation, Inc.
;;
;; This file is part of GNU Emacs.
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see
;; <https://www.gnu.org/licenses/>.
;;; Commentary:
;;; Code:
(require 'cl-lib)
(cl-defstruct (erc-d-i-message (:conc-name erc-d-i-message.))
"Identical to `erc-response'.
When member `compat' is nil, it means the raw message was decoded as
UTF-8 text before parsing, which is nonstandard."
(unparsed "" :type string)
(sender "" :type string)
(command "" :type string)
(command-args nil :type (list-of string))
(contents "" :type string)
(tags nil :type (list-of (cons symbol string)))
(compat t :type boolean))
(defvar erc-d-i--tag-escapes
'((";" . "\\:") (" " . "\\s") ("\\" . "\\\\") ("\r" . "\\r") ("\n" . "\\n")))
;; XXX these are not mirror inverses; unescaping may degenerate
;; original by dropping stranded/misplaced backslashes.
(defvar erc-d-i--tag-escaped-regexp
(rx (or ?\; ?\ ?\\ ?\r ?\n)))
(defvar erc-d-i--tag-unescaped-regexp
(rx (or "\\:" "\\s" "\\\\" "\\r" "\\n"
(seq "\\" (or string-end (not (or ":" "n" "r" "\\")))))))
(defun erc-d-i--unescape-tag-value (str)
"Undo substitution of char placeholders in raw tag value STR."
(replace-regexp-in-string erc-d-i--tag-unescaped-regexp
(lambda (s)
(or (car (rassoc s erc-d-i--tag-escapes))
(substring s 1)))
str t t))
(defun erc-d-i--escape-tag-value (str)
"Swap out banned chars in tag value STR with message representation."
(replace-regexp-in-string erc-d-i--tag-escaped-regexp
(lambda (s)
(cdr (assoc s erc-d-i--tag-escapes)))
str t t))
(defvar erc-d-i--invalid-tag-regexp (rx (any "\0\7\r\n; ")))
;; This is `erc-v3-message-tags' with fatal errors.
(defun erc-d-i--validate-tags (raw)
"Validate tags portion of some RAW message.
RAW must not have a leading \"@\" or a trailing space. The spec says
validation shouldn't be performed on keys and that undecodeable values
or ones with illegal (unescaped) chars may be dropped. This does not
respect any of that."
(unless (> 4095 (string-bytes raw))
;; 417 ERR_INPUTTOOLONG Input line was too long
(error "Message tags exceed 4094 bytes: %S" raw))
(let (tags
(tag-strings (split-string raw ";")))
(dolist (s tag-strings (nreverse tags))
(let* ((m (string-search "=" s))
(key (if m (substring s 0 m) s))
(val (when-let* (m ; check first, like (m), but shadow
(v (substring s (1+ m)))
((not (string-equal v ""))))
(when (string-match-p erc-d-i--invalid-tag-regexp v)
(error "Bad tag: %s" s))
(thread-first v
(decode-coding-string 'utf-8 t)
(erc-d-i--unescape-tag-value)))))
(when (string-empty-p key)
(error "Tag missing key: %S" s))
(setf (alist-get (intern key) tags) val)))))
(defun erc-d-i--parse-message (s &optional decode)
"Parse string S into `erc-d-i-message' object.
With DECODE, decode as UTF-8 text."
(when (string-suffix-p "\r\n" s)
(error "Unstripped message encountered"))
(when decode
(setq s (decode-coding-string s 'utf-8 t)))
(let ((mes (make-erc-d-i-message :unparsed s :compat (not decode)))
tokens)
(when-let* (((not (string-empty-p s)))
((eq ?@ (aref s 0)))
(m (string-match " " s))
(u (substring s 1 m)))
(setf (erc-d-i-message.tags mes) (erc-d-i--validate-tags u)
s (substring s (1+ m))))
(if-let* ((m (string-match " :" s))
(other-toks (split-string (substring s 0 m) " " t))
(rest (substring s (+ 2 m))))
(setf (erc-d-i-message.contents mes) rest
tokens (nconc other-toks (list rest)))
(setq tokens (split-string s " " t " ")))
(when (and tokens (eq ?: (aref (car tokens) 0)))
(setf (erc-d-i-message.sender mes) (substring (pop tokens) 1)))
(setf (erc-d-i-message.command mes) (or (pop tokens) "")
(erc-d-i-message.command-args mes) tokens)
mes))
(provide 'erc-d-i)
;;; erc-d-i.el ends here
|