|
|
1.1 root 1: ;; Hairy rfc822 parser for mail and news and suchlike
2: ;; Copyright (C) 1986, 1987 Free Software Foundation, Inc.
3: ;; Author Richard Mlynarik.
4:
5: ;; This file is part of GNU Emacs.
6:
7: ;; GNU Emacs is free software; you can redistribute it and/or modify
8: ;; it under the terms of the GNU General Public License as published by
9: ;; the Free Software Foundation; either version 1, or (at your option)
10: ;; any later version.
11:
12: ;; GNU Emacs is distributed in the hope that it will be useful,
13: ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14: ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: ;; GNU General Public License for more details.
16:
17: ;; You should have received a copy of the GNU General Public License
18: ;; along with GNU Emacs; see the file COPYING. If not, write to
19: ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20:
21: (provide 'rfc822)
22:
23: ;; uses address-start free, throws to address
24: (defun rfc822-bad-address (reason)
25: (save-restriction
26: (insert "_^_")
27: (narrow-to-region address-start
28: (if (re-search-forward "[,;]" nil t)
29: (max (point-min) (1- (point)))
30: (point-max)))
31: ;; make the error string be suitable for inclusion in (...)
32: (let ((losers '("\\" "(" ")" "\n")))
33: (while losers
34: (goto-char (point-min))
35: (while (search-forward (car losers) nil t)
36: (backward-char 1)
37: (insert ?\\)
38: (forward-char 1))
39: (setq losers (cdr losers))))
40: (goto-char (point-min)) (insert "(Unparsable address -- "
41: reason
42: ":\n\t \"")
43: (goto-char (point-max)) (insert "\")"))
44: (rfc822-nuke-whitespace)
45: (throw 'address (buffer-substring address-start (point))))
46:
47: (defun rfc822-nuke-whitespace (&optional leave-space)
48: (let (ch)
49: (while (cond ((eobp)
50: nil)
51: ((= (setq ch (following-char)) ?\()
52: (forward-char 1)
53: (while (if (eobp)
54: (rfc822-bad-address "Unbalanced comment (...)")
55: (/= (setq ch (following-char)) ?\)))
56: (cond ((looking-at "[^()\\]+")
57: (replace-match ""))
58: ((= ch ?\()
59: (rfc822-nuke-whitespace))
60: ((< (point) (1- (point-max)))
61: (delete-char 2))
62: (t
63: (rfc822-bad-address "orphaned backslash"))))
64: ;; delete remaining "()"
65: (forward-char -1)
66: (delete-char 2)
67: t)
68: ((memq ch '(?\ ?\t ?\n))
69: (delete-region (point)
70: (progn (skip-chars-forward " \t\n") (point)))
71: t)
72: (t
73: nil)))
74: (or (not leave-space)
75: (eobp)
76: (bobp)
77: (= (preceding-char) ?\ )
78: (insert ?\ ))))
79:
80: (defun rfc822-looking-at (regex &optional leave-space)
81: (if (cond ((stringp regex)
82: (if (looking-at regex)
83: (progn (goto-char (match-end 0))
84: t)))
85: (t
86: (if (and (not (eobp))
87: (= (following-char) regex))
88: (progn (forward-char 1)
89: t))))
90: (let ((tem (match-data)))
91: (rfc822-nuke-whitespace leave-space)
92: (store-match-data tem)
93: t)))
94:
95: (defun rfc822-snarf-word ()
96: ;; word is atom | quoted-string
97: (cond ((= (following-char) ?\")
98: ;; quoted-string
99: (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
100: (rfc822-bad-address "Unterminated quoted string")))
101: ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
102: ;; atom
103: )
104: (t
105: (rfc822-bad-address "Rubbish in address"))))
106:
107: (defun rfc822-snarf-words ()
108: (rfc822-snarf-word)
109: (while (rfc822-looking-at ?.)
110: (rfc822-snarf-word)))
111:
112: (defun rfc822-snarf-subdomain ()
113: ;; sub-domain is domain-ref | domain-literal
114: (cond ((= (following-char) ?\[)
115: ;; domain-ref
116: (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
117: (rfc822-bad-address "Unterminated domain literal [...]")))
118: ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
119: ;; domain-literal = atom
120: )
121: (t
122: (rfc822-bad-address "Rubbish in host/domain specification"))))
123:
124: (defun rfc822-snarf-domain ()
125: (rfc822-snarf-subdomain)
126: (while (rfc822-looking-at ?.)
127: (rfc822-snarf-subdomain)))
128:
129: (defun rfc822-snarf-frob-list (name separator terminator snarfer
130: &optional return)
131: (let ((first t)
132: (list ())
133: tem)
134: (while (cond ((eobp)
135: (rfc822-bad-address
136: (format "End of addresses in middle of %s" name)))
137: ((rfc822-looking-at terminator)
138: nil)
139: ((rfc822-looking-at separator)
140: ;; multiple separators are allowed and do nothing.
141: (while (rfc822-looking-at separator))
142: t)
143: (first
144: t)
145: (t
146: (rfc822-bad-address
147: (format "Gubbish in middle of %s" name))))
148: (setq tem (funcall snarfer)
149: first nil)
150: (and return tem
151: (setq list (if (listp tem)
152: (nconc (reverse tem) list)
153: (cons tem list)))))
154: (nreverse list)))
155:
156: ;; return either an address (a string) or a list of addresses
157: (defun rfc822-addresses-1 (&optional allow-groups)
158: ;; Looking for an rfc822 `address'
159: ;; Either a group (1*word ":" [#mailbox] ";")
160: ;; or a mailbox (addr-spec | 1*word route-addr)
161: ;; addr-spec is (local-part "@" domain)
162: ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
163: ;; local-part is (word *("." word))
164: ;; word is (atom | quoted-string)
165: ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
166: ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
167: ;; domain is sub-domain *("." sub-domain)
168: ;; sub-domain is domain-ref | domain-literal
169: ;; domain-literal is "[" *(dtext | quoted-pair) "]"
170: ;; dtext is "[^][\\n"
171: ;; domain-ref is atom
172: (let ((address-start (point))
173: (n 0))
174: (catch 'address
175: ;; optimize common cases:
176: ;; foo
177: ;; [email protected]
178: ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
179: ;; other common cases are:
180: ;; foo bar <[email protected]>
181: ;; "foo bar" <[email protected]>
182: ;; those aren't hacked yet.
183: (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t)
184: (progn (or (eobp)
185: (rfc822-looking-at ?,))))
186: (progn
187: ;; rfc822-looking-at may have inserted a space
188: (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
189: ;; relying on the fact that rfc822-looking-at <char>
190: ;; doesn't mung match-data
191: (throw 'address (buffer-substring address-start (match-end 0)))))
192: (goto-char address-start)
193: (while t
194: (cond ((and (= n 1) (rfc822-looking-at ?@))
195: ;; local-part@domain
196: (rfc822-snarf-domain)
197: (throw 'address
198: (buffer-substring address-start (point))))
199: ((rfc822-looking-at ?:)
200: (cond ((not allow-groups)
201: (rfc822-bad-address "A group name may not appear here"))
202: ((= n 0)
203: (rfc822-bad-address "No name for :...; group")))
204: ;; group
205: (throw 'address
206: ;; return a list of addresses
207: (rfc822-snarf-frob-list ":...; group" ?\, ?\;
208: 'rfc822-addresses-1 t)))
209: ((rfc822-looking-at ?<)
210: (let ((start (point))
211: (strip t))
212: (cond ((rfc822-looking-at ?>)
213: ;; empty path
214: ())
215: ((and (not (eobp)) (= (following-char) ?\@))
216: ;; <@foo.bar,@baz:[email protected]>
217: (rfc822-snarf-frob-list "<...> address" ?\, ?\:
218: (function (lambda ()
219: (if (rfc822-looking-at ?\@)
220: (rfc822-snarf-domain)
221: (rfc822-bad-address
222: "Gubbish in route-addr")))))
223: (rfc822-snarf-words)
224: (or (rfc822-looking-at ?@)
225: (rfc822-bad-address "Malformed <..@..> address"))
226: (rfc822-snarf-domain)
227: (setq strip nil))
228: ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
229: ; allow <foo> (losing unix seems to do this)
230: (rfc822-snarf-domain)))
231: (let ((end (point)))
232: (if (rfc822-looking-at ?\>)
233: (throw 'address
234: (buffer-substring (if strip start (1- start))
235: (if strip end (1+ end))))
236: (rfc822-bad-address "Unterminated <...> address")))))
237: ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
238: ;; this allows "." to be part of the words preceding
239: ;; an addr-spec, since many broken mailers output
240: ;; "Hern K. Herklemeyer III
241: ;; <[email protected]>"
242: (let ((again t))
243: (while again
244: (or (= n 0) (bobp) (= (preceding-char) ?\ )
245: (insert ?\ ))
246: (rfc822-snarf-words)
247: (setq n (1+ n))
248: (setq again (or (rfc822-looking-at ?.)
249: (looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]"))))))
250: ((= n 0)
251: (throw 'address nil))
252: ((= n 1) ; allow "foo" (losing unix seems to do this)
253: (throw 'address
254: (buffer-substring address-start (point))))
255: ((> n 1)
256: (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
257: ((or (eobp) (= (following-char) ?,))
258: (rfc822-bad-address "Missing comma or route-spec"))
259: (t
260: (rfc822-bad-address "Strange character or missing comma")))))))
261:
262:
263: (defun rfc822-addresses (header-text)
264: (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
265: header-text)
266: ;; Make very simple case moderately fast.
267: (list (substring header-text (match-beginning 1) (match-end 1)))
268: (let ((buf (generate-new-buffer " rfc822")))
269: (unwind-protect
270: (save-excursion
271: (set-buffer buf)
272: (make-local-variable 'case-fold-search)
273: (setq case-fold-search nil) ;For speed(?)
274: (insert header-text)
275: ;; unfold continuation lines
276: (goto-char (point-min))
277:
278: (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
279: (replace-match "\\1 " t))
280:
281: (goto-char (point-min))
282: (rfc822-nuke-whitespace)
283: (let ((list ())
284: tem
285: address-start); this is for rfc822-bad-address
286: (while (not (eobp))
287: (setq address-start (point))
288: (setq tem
289: (catch 'address ; this is for rfc822-bad-address
290: (cond ((rfc822-looking-at ?\,)
291: nil)
292: ((looking-at "[][\000-\037\177-\377@;:\\.>)]")
293: (forward-char)
294: (rfc822-bad-address
295: (format "Strange character \\%c found"
296: (preceding-char))))
297: (t
298: (rfc822-addresses-1 t)))))
299: (cond ((null tem))
300: ((stringp tem)
301: (setq list (cons tem list)))
302: (t
303: (setq list (nconc (nreverse tem) list)))))
304: (nreverse list)))
305: (and buf (kill-buffer buf))))))
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.