1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.email 4 ~~~~~~~~~~~~~~~~~~~~~ 5 6 Lexer for the raw E-mail. 7 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12from pygments.lexer import RegexLexer, DelegatingLexer, bygroups 13from pygments.lexers.mime import MIMELexer 14from pygments.token import Text, Keyword, Name, String, Number, Comment 15from pygments.util import get_bool_opt 16 17__all__ = ["EmailLexer"] 18 19 20class EmailHeaderLexer(RegexLexer): 21 """ 22 Sub-lexer for raw E-mail. This lexer only process header part of e-mail. 23 24 .. versionadded:: 2.5 25 """ 26 27 def __init__(self, **options): 28 super().__init__(**options) 29 self.highlight_x = get_bool_opt(options, "highlight-X-header", False) 30 31 def get_x_header_tokens(self, match): 32 if self.highlight_x: 33 # field 34 yield match.start(1), Name.Tag, match.group(1) 35 36 # content 37 default_actions = self.get_tokens_unprocessed( 38 match.group(2), stack=("root", "header")) 39 yield from default_actions 40 else: 41 # lowlight 42 yield match.start(1), Comment.Special, match.group(1) 43 yield match.start(2), Comment.Multiline, match.group(2) 44 45 tokens = { 46 "root": [ 47 (r"^(?:[A-WYZ]|X400)[\w\-]*:", Name.Tag, "header"), 48 (r"^(X-(?:\w[\w\-]*:))([\s\S]*?\n)(?![ \t])", get_x_header_tokens), 49 ], 50 "header": [ 51 # folding 52 (r"\n[ \t]", Text.Whitespace), 53 (r"\n(?![ \t])", Text.Whitespace, "#pop"), 54 55 # keywords 56 (r"\bE?SMTPS?\b", Keyword), 57 (r"\b(?:HE|EH)LO\b", Keyword), 58 59 # mailbox 60 (r"[\w\.\-\+=]+@[\w\.\-]+", Name.Label), 61 (r"<[\w\.\-\+=]+@[\w\.\-]+>", Name.Label), 62 63 # domain 64 (r"\b(\w[\w\.-]*\.[\w\.-]*\w[a-zA-Z]+)\b", Name.Function), 65 66 # IPv4 67 ( 68 r"(?<=\b)(?:(?:25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(?:25[0" 69 r"-5]|2[0-4][0-9]|1?[0-9][0-9]?)(?=\b)", 70 Number.Integer, 71 ), 72 73 # IPv6 74 (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,7}:(?!\b)", Number.Hex), 75 (r"(?<=\b):((:[0-9a-fA-F]{1,4}){1,7}|:)(?=\b)", Number.Hex), 76 (r"(?<=\b)([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(?=\b)", Number.Hex), 77 (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(?=\b)", Number.Hex), 78 (r"(?<=\b)[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(?=\b)", Number.Hex), 79 (r"(?<=\b)fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}(?=\b)", Number.Hex), 80 (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(?=\b)", Number.Hex), 81 (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(?=\b)", 82 Number.Hex), 83 (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(?=\b)", 84 Number.Hex), 85 (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(?=\b)", 86 Number.Hex), 87 ( 88 r"(?<=\b)::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}" 89 r"[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}" 90 r"[0-9])(?=\b)", 91 Number.Hex, 92 ), 93 ( 94 r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-" 95 r"9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-" 96 r"9])(?=\b)", 97 Number.Hex, 98 ), 99 100 # Date time 101 ( 102 r"(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)?(0[1-9]|[1-2]?[0-9]|3[" 103 r"01])\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(" 104 r"19[0-9]{2}|[2-9][0-9]{3})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])" 105 r"(?::(60|[0-5][0-9]))?(?:\.\d{1,5})?\s+([-\+][0-9]{2}[0-5][0-" 106 r"9]|\(?(?:UTC?|GMT|(?:E|C|M|P)(?:ST|ET|DT)|[A-IK-Z])\)?)", 107 Name.Decorator, 108 ), 109 110 # RFC-2047 encoded string 111 ( 112 r"(=\?)([\w-]+)(\?)([BbQq])(\?)([\[\w!\"#$%&\'()*+,-./:;<=>@[\\" 113 r"\]^_`{|}~]+)(\?=)", 114 bygroups( 115 String.Affix, 116 Name.Constant, 117 String.Affix, 118 Keyword.Constant, 119 String.Affix, 120 Number.Hex, 121 String.Affix 122 ) 123 ), 124 125 # others 126 (r'[\s]+', Text.Whitespace), 127 (r'[\S]', Text), 128 ], 129 } 130 131 132class EmailLexer(DelegatingLexer): 133 """ 134 Lexer for raw E-mail. 135 136 Additional options accepted: 137 138 `highlight-X-header` 139 Highlight the fields of ``X-`` user-defined email header. (default: 140 ``False``). 141 142 .. versionadded:: 2.5 143 """ 144 145 name = "E-mail" 146 aliases = ["email", "eml"] 147 filenames = ["*.eml"] 148 mimetypes = ["message/rfc822"] 149 150 def __init__(self, **options): 151 super().__init__(EmailHeaderLexer, MIMELexer, Comment, **options) 152