|
|
1.1 root 1: ;; Hairy rfc822 parser for mail and news and suchlike
2: ;; Copyright (C) 1986, 1987 Free Software Foundation, Inc.
3: ;; Author Richard Mlynarik.
4:
5: ;; This file is part of GNU Emacs.
6:
7: ;; GNU Emacs is distributed in the hope that it will be useful,
8: ;; but WITHOUT ANY WARRANTY. No author or distributor
9: ;; accepts responsibility to anyone for the consequences of using it
10: ;; or for whether it serves any particular purpose or works at all,
11: ;; unless he says so in writing. Refer to the GNU Emacs General Public
12: ;; License for full details.
13:
14: ;; Everyone is granted permission to copy, modify and redistribute
15: ;; GNU Emacs, but only under the conditions described in the
16: ;; GNU Emacs General Public License. A copy of this license is
17: ;; supposed to have been given to you along with GNU Emacs so you
18: ;; can know your rights and responsibilities. It should be in a
19: ;; file named COPYING. Among other things, the copyright notice
20: ;; and this notice must be preserved on all copies.
21:
22: (provide 'rfc822)
23:
24: ;; uses address-start free, throws to address
25: (defun rfc822-bad-address (reason)
26: (save-restriction
27: (insert "_^_")
28: (narrow-to-region address-start
29: (if (re-search-forward "[,;]" nil t)
30: (max (point-min) (1- (point)))
31: (point-max)))
32: ;; make the error string be suitable for inclusion in (...)
33: (let ((losers '("\\" "(" ")" "\n")))
34: (while losers
35: (goto-char (point-min))
36: (while (search-forward (car losers) nil t)
37: (backward-char 1)
38: (insert ?\\)
39: (forward-char 1))
40: (setq losers (cdr losers))))
41: (goto-char (point-min)) (insert "(Unparsable address -- "
42: reason
43: ":\n\t \"")
44: (goto-char (point-max)) (insert "\")"))
45: (rfc822-nuke-whitespace)
46: (throw 'address (buffer-substring address-start (point))))
47:
48: (defun rfc822-nuke-whitespace (&optional leave-space)
49: (let (ch)
50: (while (cond ((eobp)
51: nil)
52: ((= (setq ch (following-char)) ?\()
53: (forward-char 1)
54: (while (if (eobp)
55: (rfc822-bad-address "Unbalanced comment (...)")
56: (/= (setq ch (following-char)) ?\)))
57: (cond ((looking-at "[^()\\]+")
58: (replace-match ""))
59: ((= ch ?\()
60: (rfc822-nuke-whitespace))
61: ((< (point) (1- (point-max)))
62: (delete-char 2))
63: (t
64: (rfc822-bad-address "orphaned backslash"))))
65: ;; delete remaining "()"
66: (forward-char -1)
67: (delete-char 2)
68: t)
69: ((memq ch '(?\ ?\t ?\n))
70: (delete-region (point)
71: (progn (skip-chars-forward " \t\n") (point)))
72: t)
73: (t
74: nil)))
75: (or (not leave-space)
76: (eobp)
77: (bobp)
78: (= (preceding-char) ?\ )
79: (insert ?\ ))))
80:
81: (defun rfc822-looking-at (regex &optional leave-space)
82: (if (cond ((stringp regex)
83: (if (looking-at regex)
84: (progn (goto-char (match-end 0))
85: t)))
86: (t
87: (if (and (not (eobp))
88: (= (following-char) regex))
89: (progn (forward-char 1)
90: t))))
91: (let ((tem (match-data)))
92: (rfc822-nuke-whitespace leave-space)
93: (store-match-data tem)
94: t)))
95:
96: (defun rfc822-snarf-word ()
97: ;; word is atom | quoted-string
98: (cond ((= (following-char) ?\")
99: ;; quoted-string
100: (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
101: (rfc822-bad-address "Unterminated quoted string")))
102: ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
103: ;; atom
104: )
105: (t
106: (rfc822-bad-address "Rubbish in address"))))
107:
108: (defun rfc822-snarf-words ()
109: (rfc822-snarf-word)
110: (while (rfc822-looking-at ?.)
111: (rfc822-snarf-word)))
112:
113: (defun rfc822-snarf-subdomain ()
114: ;; sub-domain is domain-ref | domain-literal
115: (cond ((= (following-char) ?\[)
116: ;; domain-ref
117: (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
118: (rfc822-bad-address "Unterminated domain literal [...]")))
119: ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
120: ;; domain-literal = atom
121: )
122: (t
123: (rfc822-bad-address "Rubbish in host/domain specification"))))
124:
125: (defun rfc822-snarf-domain ()
126: (rfc822-snarf-subdomain)
127: (while (rfc822-looking-at ?.)
128: (rfc822-snarf-subdomain)))
129:
130: (defun rfc822-snarf-frob-list (name separator terminator snarfer
131: &optional return)
132: (let ((first t)
133: (list ())
134: tem)
135: (while (cond ((eobp)
136: (rfc822-bad-address
137: (format "End of addresses in middle of %s" name)))
138: ((rfc822-looking-at terminator)
139: nil)
140: ((rfc822-looking-at separator)
141: ;; multiple separators are allowed and do nothing.
142: (while (rfc822-looking-at separator))
143: t)
144: (first
145: t)
146: (t
147: (rfc822-bad-address
148: (format "Gubbish in middle of %s" name))))
149: (setq tem (funcall snarfer)
150: first nil)
151: (and return tem
152: (setq list (if (listp tem)
153: (nconc (reverse tem) list)
154: (cons tem list)))))
155: (nreverse list)))
156:
157: ;; return either an address (a string) or a list of addresses
158: (defun rfc822-addresses-1 (&optional allow-groups)
159: ;; Looking for an rfc822 `address'
160: ;; Either a group (1*word ":" [#mailbox] ";")
161: ;; or a mailbox (addr-spec | 1*word route-addr)
162: ;; addr-spec is (local-part "@" domain)
163: ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
164: ;; local-part is (word *("." word))
165: ;; word is (atom | quoted-string)
166: ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
167: ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
168: ;; domain is sub-domain *("." sub-domain)
169: ;; sub-domain is domain-ref | domain-literal
170: ;; domain-literal is "[" *(dtext | quoted-pair) "]"
171: ;; dtext is "[^][\\n"
172: ;; domain-ref is atom
173: (let ((address-start (point))
174: (n 0))
175: (catch 'address
176: ;; optimize common cases:
177: ;; foo
178: ;; [email protected]
179: ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
180: ;; other common cases are:
181: ;; foo bar <[email protected]>
182: ;; "foo bar" <[email protected]>
183: ;; those aren't hacked yet.
184: (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t)
185: (progn (or (eobp)
186: (rfc822-looking-at ?,))))
187: (progn
188: ;; rfc822-looking-at may have inserted a space
189: (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
190: ;; relying on the fact that rfc822-looking-at <char>
191: ;; doesn't mung match-data
192: (throw 'address (buffer-substring address-start (match-end 0)))))
193: (goto-char address-start)
194: (while t
195: (cond ((and (= n 1) (rfc822-looking-at ?@))
196: ;; local-part@domain
197: (rfc822-snarf-domain)
198: (throw 'address
199: (buffer-substring address-start (point))))
200: ((rfc822-looking-at ?:)
201: (cond ((not allow-groups)
202: (rfc822-bad-address "A group name may not appear here"))
203: ((= n 0)
204: (rfc822-bad-address "No name for :...; group")))
205: ;; group
206: (throw 'address
207: ;; return a list of addresses
208: (rfc822-snarf-frob-list ":...; group" ?\, ?\;
209: 'rfc822-addresses-1 t)))
210: ((rfc822-looking-at ?<)
211: (let ((start (point))
212: (strip t))
213: (cond ((rfc822-looking-at ?>)
214: ;; empty path
215: ())
216: ((and (not (eobp)) (= (following-char) ?\@))
217: ;; <@foo.bar,@baz:[email protected]>
218: (rfc822-snarf-frob-list "<...> address" ?\, ?\:
219: (function (lambda ()
220: (if (rfc822-looking-at ?\@)
221: (rfc822-snarf-domain)
222: (rfc822-bad-address
223: "Gubbish in route-addr")))))
224: (rfc822-snarf-words)
225: (or (rfc822-looking-at ?@)
226: (rfc822-bad-address "Malformed <..@..> address"))
227: (rfc822-snarf-domain)
228: (setq strip nil))
229: ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
230: ; allow <foo> (losing unix seems to do this)
231: (rfc822-snarf-domain)))
232: (let ((end (point)))
233: (if (rfc822-looking-at ?\>)
234: (throw 'address
235: (buffer-substring (if strip start (1- start))
236: (if strip end (1+ end))))
237: (rfc822-bad-address "Unterminated <...> address")))))
238: ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
239: ;; this allows "." to be part of the words preceding
240: ;; an addr-spec, since many broken mailers output
241: ;; "Hern K. Herklemeyer III
242: ;; <[email protected]>"
243: (or (= n 0)
244: (= (preceding-char) ?\ )
245: (insert ?\ ))
246: (rfc822-snarf-words)
247: (setq n (1+ n)))
248: ((= n 0)
249: (throw 'address nil))
250: ((= n 1) ; allow "foo" (losing unix seems to do this)
251: (throw 'address
252: (buffer-substring address-start (point))))
253: ((or (eobp) (looking-at ","))
254: (rfc822-bad-address "Missing comma or route-spec"))
255: (t
256: (rfc822-bad-address "Strange character or missing comma")))))))
257:
258:
259: (defun rfc822-addresses (header-text)
260: (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
261: header-text)
262: ;; Make very simple case moderately fast.
263: (list (substring header-text (match-beginning 1) (match-end 1)))
264: (let ((buf (generate-new-buffer " rfc822")))
265: (unwind-protect
266: (save-excursion
267: (set-buffer buf)
268: (make-local-variable 'case-fold-search)
269: (setq case-fold-search nil) ;For speed(?)
270: (insert header-text)
271: ;; unfold continuation lines
272: (goto-char (point-min))
273:
274: (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
275: (replace-match "\\1 " t))
276:
277: (goto-char (point-min))
278: (rfc822-nuke-whitespace)
279: (let ((list ())
280: tem
281: address-start); this is for rfc822-bad-address
282: (while (not (eobp))
283: (setq address-start (point))
284: (setq tem
285: (catch 'address ; this is for rfc822-bad-address
286: (cond ((rfc822-looking-at ?\,)
287: nil)
288: ((looking-at "[][\000-\037\177-\377@;:\\.]")
289: (forward-char)
290: (rfc822-bad-address
291: (format "Strange character \\%c found"
292: (preceding-char))))
293: (t
294: (rfc822-addresses-1 t)))))
295: (cond ((null tem))
296: ((stringp tem)
297: (setq list (cons tem list)))
298: (t
299: (setq list (nconc (nreverse tem) list)))))
300: (nreverse list)))
301: (and buf (kill-buffer buf))))))
302:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.