|
|
1.1 ! root 1: ;; Hairy rfc822 parser for mail and news and suchlike ! 2: ;; Copyright (C) 1986, 1987 Free Software Foundation, Inc. ! 3: ;; Author Richard Mlynarik. ! 4: ! 5: ;; This file is part of GNU Emacs. ! 6: ! 7: ;; GNU Emacs is free software; you can redistribute it and/or modify ! 8: ;; it under the terms of the GNU General Public License as published by ! 9: ;; the Free Software Foundation; either version 1, or (at your option) ! 10: ;; any later version. ! 11: ! 12: ;; GNU Emacs is distributed in the hope that it will be useful, ! 13: ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ! 14: ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! 15: ;; GNU General Public License for more details. ! 16: ! 17: ;; You should have received a copy of the GNU General Public License ! 18: ;; along with GNU Emacs; see the file COPYING. If not, write to ! 19: ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. ! 20: ! 21: (provide 'rfc822) ! 22: ! 23: ;; uses address-start free, throws to address ! 24: (defun rfc822-bad-address (reason) ! 25: (save-restriction ! 26: (insert "_^_") ! 27: (narrow-to-region address-start ! 28: (if (re-search-forward "[,;]" nil t) ! 29: (max (point-min) (1- (point))) ! 30: (point-max))) ! 31: ;; make the error string be suitable for inclusion in (...) ! 32: (let ((losers '("\\" "(" ")" "\n"))) ! 33: (while losers ! 34: (goto-char (point-min)) ! 35: (while (search-forward (car losers) nil t) ! 36: (backward-char 1) ! 37: (insert ?\\) ! 38: (forward-char 1)) ! 39: (setq losers (cdr losers)))) ! 40: (goto-char (point-min)) (insert "(Unparsable address -- " ! 41: reason ! 42: ":\n\t \"") ! 43: (goto-char (point-max)) (insert "\")")) ! 44: (rfc822-nuke-whitespace) ! 45: (throw 'address (buffer-substring address-start (point)))) ! 46: ! 47: (defun rfc822-nuke-whitespace (&optional leave-space) ! 48: (let (ch) ! 49: (while (cond ((eobp) ! 50: nil) ! 51: ((= (setq ch (following-char)) ?\() ! 52: (forward-char 1) ! 53: (while (if (eobp) ! 54: (rfc822-bad-address "Unbalanced comment (...)") ! 55: (/= (setq ch (following-char)) ?\))) ! 56: (cond ((looking-at "[^()\\]+") ! 57: (replace-match "")) ! 58: ((= ch ?\() ! 59: (rfc822-nuke-whitespace)) ! 60: ((< (point) (1- (point-max))) ! 61: (delete-char 2)) ! 62: (t ! 63: (rfc822-bad-address "orphaned backslash")))) ! 64: ;; delete remaining "()" ! 65: (forward-char -1) ! 66: (delete-char 2) ! 67: t) ! 68: ((memq ch '(?\ ?\t ?\n)) ! 69: (delete-region (point) ! 70: (progn (skip-chars-forward " \t\n") (point))) ! 71: t) ! 72: (t ! 73: nil))) ! 74: (or (not leave-space) ! 75: (eobp) ! 76: (bobp) ! 77: (= (preceding-char) ?\ ) ! 78: (insert ?\ )))) ! 79: ! 80: (defun rfc822-looking-at (regex &optional leave-space) ! 81: (if (cond ((stringp regex) ! 82: (if (looking-at regex) ! 83: (progn (goto-char (match-end 0)) ! 84: t))) ! 85: (t ! 86: (if (and (not (eobp)) ! 87: (= (following-char) regex)) ! 88: (progn (forward-char 1) ! 89: t)))) ! 90: (let ((tem (match-data))) ! 91: (rfc822-nuke-whitespace leave-space) ! 92: (store-match-data tem) ! 93: t))) ! 94: ! 95: (defun rfc822-snarf-word () ! 96: ;; word is atom | quoted-string ! 97: (cond ((= (following-char) ?\") ! 98: ;; quoted-string ! 99: (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"") ! 100: (rfc822-bad-address "Unterminated quoted string"))) ! 101: ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+") ! 102: ;; atom ! 103: ) ! 104: (t ! 105: (rfc822-bad-address "Rubbish in address")))) ! 106: ! 107: (defun rfc822-snarf-words () ! 108: (rfc822-snarf-word) ! 109: (while (rfc822-looking-at ?.) ! 110: (rfc822-snarf-word))) ! 111: ! 112: (defun rfc822-snarf-subdomain () ! 113: ;; sub-domain is domain-ref | domain-literal ! 114: (cond ((= (following-char) ?\[) ! 115: ;; domain-ref ! 116: (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]") ! 117: (rfc822-bad-address "Unterminated domain literal [...]"))) ! 118: ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+") ! 119: ;; domain-literal = atom ! 120: ) ! 121: (t ! 122: (rfc822-bad-address "Rubbish in host/domain specification")))) ! 123: ! 124: (defun rfc822-snarf-domain () ! 125: (rfc822-snarf-subdomain) ! 126: (while (rfc822-looking-at ?.) ! 127: (rfc822-snarf-subdomain))) ! 128: ! 129: (defun rfc822-snarf-frob-list (name separator terminator snarfer ! 130: &optional return) ! 131: (let ((first t) ! 132: (list ()) ! 133: tem) ! 134: (while (cond ((eobp) ! 135: (rfc822-bad-address ! 136: (format "End of addresses in middle of %s" name))) ! 137: ((rfc822-looking-at terminator) ! 138: nil) ! 139: ((rfc822-looking-at separator) ! 140: ;; multiple separators are allowed and do nothing. ! 141: (while (rfc822-looking-at separator)) ! 142: t) ! 143: (first ! 144: t) ! 145: (t ! 146: (rfc822-bad-address ! 147: (format "Gubbish in middle of %s" name)))) ! 148: (setq tem (funcall snarfer) ! 149: first nil) ! 150: (and return tem ! 151: (setq list (if (listp tem) ! 152: (nconc (reverse tem) list) ! 153: (cons tem list))))) ! 154: (nreverse list))) ! 155: ! 156: ;; return either an address (a string) or a list of addresses ! 157: (defun rfc822-addresses-1 (&optional allow-groups) ! 158: ;; Looking for an rfc822 `address' ! 159: ;; Either a group (1*word ":" [#mailbox] ";") ! 160: ;; or a mailbox (addr-spec | 1*word route-addr) ! 161: ;; addr-spec is (local-part "@" domain) ! 162: ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">") ! 163: ;; local-part is (word *("." word)) ! 164: ;; word is (atom | quoted-string) ! 165: ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)") ! 166: ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+ ! 167: ;; domain is sub-domain *("." sub-domain) ! 168: ;; sub-domain is domain-ref | domain-literal ! 169: ;; domain-literal is "[" *(dtext | quoted-pair) "]" ! 170: ;; dtext is "[^][\\n" ! 171: ;; domain-ref is atom ! 172: (let ((address-start (point)) ! 173: (n 0)) ! 174: (catch 'address ! 175: ;; optimize common cases: ! 176: ;; foo ! 177: ;; [email protected] ! 178: ;; followed by "\\'\\|,\\|([^()\\]*)\\'" ! 179: ;; other common cases are: ! 180: ;; foo bar <[email protected]> ! 181: ;; "foo bar" <[email protected]> ! 182: ;; those aren't hacked yet. ! 183: (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t) ! 184: (progn (or (eobp) ! 185: (rfc822-looking-at ?,)))) ! 186: (progn ! 187: ;; rfc822-looking-at may have inserted a space ! 188: (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1)) ! 189: ;; relying on the fact that rfc822-looking-at <char> ! 190: ;; doesn't mung match-data ! 191: (throw 'address (buffer-substring address-start (match-end 0))))) ! 192: (goto-char address-start) ! 193: (while t ! 194: (cond ((and (= n 1) (rfc822-looking-at ?@)) ! 195: ;; local-part@domain ! 196: (rfc822-snarf-domain) ! 197: (throw 'address ! 198: (buffer-substring address-start (point)))) ! 199: ((rfc822-looking-at ?:) ! 200: (cond ((not allow-groups) ! 201: (rfc822-bad-address "A group name may not appear here")) ! 202: ((= n 0) ! 203: (rfc822-bad-address "No name for :...; group"))) ! 204: ;; group ! 205: (throw 'address ! 206: ;; return a list of addresses ! 207: (rfc822-snarf-frob-list ":...; group" ?\, ?\; ! 208: 'rfc822-addresses-1 t))) ! 209: ((rfc822-looking-at ?<) ! 210: (let ((start (point)) ! 211: (strip t)) ! 212: (cond ((rfc822-looking-at ?>) ! 213: ;; empty path ! 214: ()) ! 215: ((and (not (eobp)) (= (following-char) ?\@)) ! 216: ;; <@foo.bar,@baz:[email protected]> ! 217: (rfc822-snarf-frob-list "<...> address" ?\, ?\: ! 218: (function (lambda () ! 219: (if (rfc822-looking-at ?\@) ! 220: (rfc822-snarf-domain) ! 221: (rfc822-bad-address ! 222: "Gubbish in route-addr"))))) ! 223: (rfc822-snarf-words) ! 224: (or (rfc822-looking-at ?@) ! 225: (rfc822-bad-address "Malformed <..@..> address")) ! 226: (rfc822-snarf-domain) ! 227: (setq strip nil)) ! 228: ((progn (rfc822-snarf-words) (rfc822-looking-at ?@)) ! 229: ; allow <foo> (losing unix seems to do this) ! 230: (rfc822-snarf-domain))) ! 231: (let ((end (point))) ! 232: (if (rfc822-looking-at ?\>) ! 233: (throw 'address ! 234: (buffer-substring (if strip start (1- start)) ! 235: (if strip end (1+ end)))) ! 236: (rfc822-bad-address "Unterminated <...> address"))))) ! 237: ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]") ! 238: ;; this allows "." to be part of the words preceding ! 239: ;; an addr-spec, since many broken mailers output ! 240: ;; "Hern K. Herklemeyer III ! 241: ;; <[email protected]>" ! 242: (let ((again t)) ! 243: (while again ! 244: (or (= n 0) (bobp) (= (preceding-char) ?\ ) ! 245: (insert ?\ )) ! 246: (rfc822-snarf-words) ! 247: (setq n (1+ n)) ! 248: (setq again (or (rfc822-looking-at ?.) ! 249: (looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")))))) ! 250: ((= n 0) ! 251: (throw 'address nil)) ! 252: ((= n 1) ; allow "foo" (losing unix seems to do this) ! 253: (throw 'address ! 254: (buffer-substring address-start (point)))) ! 255: ((> n 1) ! 256: (rfc822-bad-address "Missing comma between addresses or badly-formatted address")) ! 257: ((or (eobp) (= (following-char) ?,)) ! 258: (rfc822-bad-address "Missing comma or route-spec")) ! 259: (t ! 260: (rfc822-bad-address "Strange character or missing comma"))))))) ! 261: ! 262: ! 263: (defun rfc822-addresses (header-text) ! 264: (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'" ! 265: header-text) ! 266: ;; Make very simple case moderately fast. ! 267: (list (substring header-text (match-beginning 1) (match-end 1))) ! 268: (let ((buf (generate-new-buffer " rfc822"))) ! 269: (unwind-protect ! 270: (save-excursion ! 271: (set-buffer buf) ! 272: (make-local-variable 'case-fold-search) ! 273: (setq case-fold-search nil) ;For speed(?) ! 274: (insert header-text) ! 275: ;; unfold continuation lines ! 276: (goto-char (point-min)) ! 277: ! 278: (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) ! 279: (replace-match "\\1 " t)) ! 280: ! 281: (goto-char (point-min)) ! 282: (rfc822-nuke-whitespace) ! 283: (let ((list ()) ! 284: tem ! 285: address-start); this is for rfc822-bad-address ! 286: (while (not (eobp)) ! 287: (setq address-start (point)) ! 288: (setq tem ! 289: (catch 'address ; this is for rfc822-bad-address ! 290: (cond ((rfc822-looking-at ?\,) ! 291: nil) ! 292: ((looking-at "[][\000-\037\177-\377@;:\\.>)]") ! 293: (forward-char) ! 294: (rfc822-bad-address ! 295: (format "Strange character \\%c found" ! 296: (preceding-char)))) ! 297: (t ! 298: (rfc822-addresses-1 t))))) ! 299: (cond ((null tem)) ! 300: ((stringp tem) ! 301: (setq list (cons tem list))) ! 302: (t ! 303: (setq list (nconc (nreverse tem) list))))) ! 304: (nreverse list))) ! 305: (and buf (kill-buffer buf))))))
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.