1 /**
2  * @file
3  * Manage precompiled / predefined regular expressions
4  *
5  * @authors
6  * Copyright (C) 2020 Pietro Cerutti <gahr@gahr.ch>
7  *
8  * @copyright
9  * This program is free software: you can redistribute it and/or modify it under
10  * the terms of the GNU General Public License as published by the Free Software
11  * Foundation, either version 2 of the License, or (at your option) any later
12  * version.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
17  * details.
18  *
19  * You should have received a copy of the GNU General Public License along with
20  * this program.  If not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #ifndef MUTT_LIB_PREX_H
24 #define MUTT_LIB_PREX_H
25 
26 #include "regex3.h"
27 
28 /**
29  * enum Prex - Predefined list of regular expressions
30  */
31 enum Prex
32 {
33   PREX_URL,                   ///< `[imaps://user:pass@example.com/INBOX?foo=bar]`
34   PREX_URL_QUERY_KEY_VAL,     ///< `https://example.com/?[q=foo]`
35   PREX_RFC2047_ENCODED_WORD,  ///< `[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]`
36   PREX_GNUTLS_CERT_HOST_HASH, ///<\c [\#H foo.com A76D 954B EB79 1F49 5B3A 0A0E 0681 65B1]
37   PREX_RFC5322_DATE,          ///< `[Mon, 16 Mar 2020 15:09:35 -0700]`
38   PREX_RFC5322_DATE_LAX,      ///< `[Mon, (Comment) 16 Mar 2020 15:09:35 -0700]`
39   PREX_IMAP_DATE,             ///< `[16-MAR-2020 15:09:35 -0700]`
40   PREX_MBOX_FROM,             ///< `[From god@heaven.af.mil Sat Jan  3 01:05:34 1996]`
41   PREX_MBOX_FROM_LAX,         ///< `[From god@heaven.af.mil Sat Jan  3 01:05:34 1996]`
42   PREX_MAX
43 };
44 
45 /**
46  * enum PrexUrlSchemeMatch - Regex Matches for #PREX_URL
47  *
48  * @note The []s show the matching path of the URL
49  */
50 enum PrexUrlSchemeMatch
51 {
52   PREX_URL_MATCH_FULL,           ///< `[imaps://user:pass@host.comInbox?foo=bar]`
53   PREX_URL_MATCH_SCHEME,         ///< `[imaps]://...`
54   PREX_URL_MATCH_REST,           ///< `imaps:[//...]`
55   PREX_URL_MATCH_AUTH_OR_PATH,   ///< `imaps:[somepath]|[//me@example.com/Inbox]?foo=bar`
56   PREX_URL_MATCH_AUTHORITY_PATH, ///< `imaps:[//me@example.com/Inbox]?foo=bar`
57   PREX_URL_MATCH_USERINFO,       ///< `...//[user:pass@]...`
58   PREX_URL_MATCH_USER,           ///< `...//[user]:pass@...`
59   PREX_URL_MATCH_COLONPASS,      ///< `...//user[:pass]@...`
60   PREX_URL_MATCH_PASS,           ///< `...//user:[pass]@...`
61   PREX_URL_MATCH_HOST,           ///< `imaps://...[host.com]...`
62   PREX_URL_MATCH_HOSTNAME,       ///< `imaps://...[host.com]...`
63   PREX_URL_MATCH_HOSTIPVX,       ///< `imaps://...[127.0.0.1]...`
64   PREX_URL_MATCH_COLONPORT,      ///< `imaps://host.com[:993]/...`
65   PREX_URL_MATCH_PORT,           ///< `imaps://host.com:[993]/...`
66   PREX_URL_MATCH_SLASHPATH,      ///< `...:993[/Inbox]`
67   PREX_URL_MATCH_PATH,           ///< `...:993/[Inbox]`
68   PREX_URL_MATCH_PATH_ONLY,      ///< `mailto:[me@example.com]?foo=bar`
69   PREX_URL_MATCH_QUESTIONQUERY,  ///< `...Inbox[?foo=bar&baz=value]`
70   PREX_URL_MATCH_QUERY,          ///< `...Inbox?[foo=bar&baz=value]`
71   PREX_URL_MATCH_MAX,
72 };
73 
74 /**
75  * enum PrexUrlQueryKeyValMatch - Regex Matches for #PREX_URL_QUERY_KEY_VAL
76  *
77  * @note The []s show the matching path of the URL Query
78  */
79 enum PrexUrlQueryKeyValMatch
80 {
81   PREX_URL_QUERY_KEY_VAL_MATCH_FULL, ///< `[key=val]`
82   PREX_URL_QUERY_KEY_VAL_MATCH_KEY,  ///< `[key]=val`
83   PREX_URL_QUERY_KEY_VAL_MATCH_VAL,  ///< `key=[val]`
84   PREX_URL_QUERY_KEY_VAL_MATCH_MAX
85 };
86 
87 /**
88  * enum PrexRfc2047EncodedWordMatch - Regex Matches for #PREX_RFC2047_ENCODED_WORD
89  *
90  * @note The []s show the matching path of the RFC2047-encoded word
91  */
92 enum PrexRfc2047EncodedWordMatch
93 {
94   PREX_RFC2047_ENCODED_WORD_MATCH_FULL,     ///< `[=?utf-8?Q?=E8=81...?=]`
95   PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET,  ///< `=?[utf-8]?Q?=E8=81...?=`
96   PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING, ///< `=?utf-8?[Q]?=E8=81...?=`
97   PREX_RFC2047_ENCODED_WORD_MATCH_TEXT,     ///< `=?utf-8?Q?[=E8=81...]?=`
98   PREX_RFC2047_ENCODED_WORD_MATCH_MAX
99 };
100 
101 /**
102  * enum PrexGnuTlsCertHostnameMatch - Regex Matches for a TLS Certificate Hostname
103  *
104  * @note The []s show the matching path of the TLS Certificate Hostname
105  */
106 enum PrexGnuTlsCertHostnameMatch
107 {
108   PREX_GNUTLS_CERT_HOST_HASH_MATCH_FULL,      ///<\c [\#H foo.com A76D ... 65B1]
109   PREX_GNUTLS_CERT_HOST_HASH_MATCH_HOST,      ///<\c \#H [foo.com] A76D ... 65B1
110   PREX_GNUTLS_CERT_HOST_HASH_MATCH_HASH,      ///<\c \#H foo.com [A76D ... 65B1]
111   PREX_GNUTLS_CERT_HOST_HASH_MATCH_HASH_LAST, ///<\c \#H foo.com A76D ... [65B1]
112   PREX_GNUTLS_CERT_HOST_HASH_MATCH_MAX
113 };
114 
115 /**
116  * enum PrexRfc5322Date - Regex Matches for a RFC5322 date
117  *
118  * @note The []s show the matching path of the RFC5322 date
119  */
120 enum PrexRfc5322Date
121 {
122   PREX_RFC5322_DATE_MATCH_FULL,        ///< `[Mon, 2 Mar 2020 14:32:55 +0200]`
123   PREX_RFC5322_DATE_MATCH_MAYBE_DOW,   ///< `[Mon, ]2 Mar 2020 14:32:55 +0200`
124   PREX_RFC5322_DATE_MATCH_DOW,         ///< `[Mon], 2 Mar 2020 14:32:55 +0200`
125   PREX_RFC5322_DATE_MATCH_DAY,         ///< `Tue, [3] Mar 2020 14:32:55 +0200`
126   PREX_RFC5322_DATE_MATCH_MONTH,       ///< `Tue, 3 [Jan] 2020 14:32:55 +0200`
127   PREX_RFC5322_DATE_MATCH_YEAR,        ///< `Tue, 3 Mar [2020] 14:32:55 +0200`
128   PREX_RFC5322_DATE_MATCH_HOUR,        ///< `Tue, 3 Mar 2020 [14]:32:55 +0200`
129   PREX_RFC5322_DATE_MATCH_MINUTE,      ///< `Tue, 3 Mar 2020 14:[32]:55 +0200`
130   PREX_RFC5322_DATE_MATCH_COLONSECOND, ///< `Tue, 3 Mar 2020 14:32[:55] +0200`
131   PREX_RFC5322_DATE_MATCH_SECOND,      ///< `Tue, 3 Mar 2020 14:32:[55] +0200`
132   PREX_RFC5322_DATE_MATCH_TZFULL,      ///< `Tue, 3 Mar 2020 14:32:55[CET]`
133   PREX_RFC5322_DATE_MATCH_TZ,          ///< `Tue, 3 Mar 2020 14:32:55 [+0200]`
134   PREX_RFC5322_DATE_MATCH_TZ_OBS,      ///< `Tue, 3 Mar 2020 14:32:55[UT]`
135   PREX_RFC5322_DATE_MATCH_MAX
136 };
137 
138 /**
139  * enum PrexRfc5322DateLax - Regex Matches for a RFC5322 date, including
140  * obsolete comments in parentheses
141  *
142  * The reason we provide an alternate regex for RFC5322 dates is that the
143  * non-obsolete one is faster, while this one is more complete.
144  *
145  * @note The []s show the matching path of the RFC5322 date
146  * @note The `*_CFWS*` constants match `()`d comments with whitespace
147  */
148 enum PrexRfc5322DateLax
149 {
150   PREX_RFC5322_DATE_LAX_MATCH_FULL,        ///< `[Mon, 2 Mar 2020 14:32:55 +0200]`
151   PREX_RFC5322_DATE_LAX_MATCH_CFWS1,
152   PREX_RFC5322_DATE_LAX_MATCH_MAYBE_DOW,   ///< `[Mon, ]2 Mar 2020 14:32:55 +0200`
153   PREX_RFC5322_DATE_LAX_MATCH_DOW,         ///< `[Mon], 2 Mar 2020 14:32:55 +0200`
154   PREX_RFC5322_DATE_LAX_MATCH_CFWS2,
155   PREX_RFC5322_DATE_LAX_MATCH_CFWS3,
156   PREX_RFC5322_DATE_LAX_MATCH_DAY,         ///< `Tue, [3] Mar 2020 14:32:55 +0200`
157   PREX_RFC5322_DATE_LAX_MATCH_CFWS4,
158   PREX_RFC5322_DATE_LAX_MATCH_MONTH,       ///< `Tue, 3 [Jan] 2020 14:32:55 +0200`
159   PREX_RFC5322_DATE_LAX_MATCH_CFWS5,
160   PREX_RFC5322_DATE_LAX_MATCH_YEAR,        ///< `Tue, 3 Mar [2020] 14:32:55 +0200`
161   PREX_RFC5322_DATE_LAX_MATCH_CFWS6,
162   PREX_RFC5322_DATE_LAX_MATCH_HOUR,        ///< `Tue, 3 Mar 2020 [14]:32:55 +0200`
163   PREX_RFC5322_DATE_LAX_MATCH_CFWS7,
164   PREX_RFC5322_DATE_LAX_MATCH_MINUTE,      ///< `Tue, 3 Mar 2020 14:[32]:55 +0200`
165   PREX_RFC5322_DATE_LAX_MATCH_CFWS8,
166   PREX_RFC5322_DATE_LAX_MATCH_COLONSECOND, ///< `Tue, 3 Mar 2020 14:32[:55] +0200`
167   PREX_RFC5322_DATE_LAX_MATCH_CFWS9,
168   PREX_RFC5322_DATE_LAX_MATCH_SECOND,      ///< `Tue, 3 Mar 2020 14:32:[55] +0200`
169   PREX_RFC5322_DATE_LAX_MATCH_CFWS10,
170   PREX_RFC5322_DATE_LAX_MATCH_TZFULL,      ///< `Tue, 3 Mar 2020 14:32:55[CET]`
171   PREX_RFC5322_DATE_LAX_MATCH_TZ,          ///< `Tue, 3 Mar 2020 14:32:55 [+0200]`
172   PREX_RFC5322_DATE_LAX_MATCH_TZ_OBS,      ///< `Tue, 3 Mar 2020 14:32:55[UT]`
173   PREX_RFC5322_DATE_LAX_MATCH_MAX
174 };
175 
176 /**
177  * enum PrexImapDate - Regex matches for an IMAP `INTERNALDATE`
178  *
179  * @note The []s show the matching path of the IMAP date
180  */
181 enum PrexImapDate
182 {
183   PREX_IMAP_DATE_MATCH_FULL,   ///< `[16-MAR-2020 15:09:35 -0700]`
184   PREX_IMAP_DATE_MATCH_DAY,    ///< `[ 4]-MAR-2020 15:09:35 -0700`
185   PREX_IMAP_DATE_MATCH_DAY1,   ///< ` [4]-MAR-2020 15:09:35 -0700`
186   PREX_IMAP_DATE_MATCH_DAY2,   ///< `[15]-MAR-2020 15:09:35 -0700`
187   PREX_IMAP_DATE_MATCH_MONTH,  ///< `15-[MAR]-2020 15:09:35 -0700`
188   PREX_IMAP_DATE_MATCH_YEAR,   ///< `15-MAR-[2020] 15:09:35 -0700`
189   PREX_IMAP_DATE_MATCH_TIME,   ///< `15-MAR-2020 [15:09:35] -0700`
190   PREX_IMAP_DATE_MATCH_TZ,     ///< `15-MAR-2020 15:09:35 [-0700]`
191   PREX_IMAP_DATE_MATCH_MAX
192 };
193 
194 /**
195  * enum PrexMboxFrom - Regex matches for an mbox-style From line
196  *
197  * @note The []s show the matching path of the IMAP date
198  */
199 enum PrexMboxFrom
200 {
201   PREX_MBOX_FROM_MATCH_FULL,      ///< `[From god@heaven.af.mil Sat Jan  3 01:05:34 1996]`
202   PREX_MBOX_FROM_MATCH_ENVSENDER, ///< `From [god@heaven.af.mil] Sat Jan  3 01:05:34 1996`
203   PREX_MBOX_FROM_MATCH_DOW,       ///< `From god@heaven.af.mil [Sat] Jan  3 01:05:34 1996`
204   PREX_MBOX_FROM_MATCH_MONTH,     ///< `From god@heaven.af.mil Sat [Jan]  3 01:05:34 1996`
205   PREX_MBOX_FROM_MATCH_DAY,       ///< `From god@heaven.af.mil Sat Jan [ 3] 01:05:34 1996`
206   PREX_MBOX_FROM_MATCH_DAY1,      ///< `From god@heaven.af.mil Sat Jan  [3] 01:05:34 1996`
207   PREX_MBOX_FROM_MATCH_DAY2,      ///< `From god@heaven.af.mil Sat Jan [10] 01:05:34 1996`
208   PREX_MBOX_FROM_MATCH_TIME,      ///< `From god@heaven.af.mil Sat Jan 10 [01:05:34] 1996`
209   PREX_MBOX_FROM_MATCH_YEAR,      ///< `From god@heaven.af.mil Sat Jan 10 01:05:34 [1996]`
210   PREX_MBOX_FROM_MATCH_MAX
211 };
212 
213 /**
214  * enum PrexMboxFromLax - Regex matches for an mbox-style From line, lax mode
215  *
216  * @note The []s show the matching path of the IMAP date
217  */
218 enum PrexMboxFromLax
219 {
220   PREX_MBOX_FROM_LAX_MATCH_FULL,            ///< `[From god@heaven.af.mil Sat Jan  3 01:05:34 1996]`
221   PREX_MBOX_FROM_LAX_MATCH_ENVSENDER,       ///< `From [god at heaven.af.mil] Sat Jan  3 01:05:34 1996`
222   PREX_MBOX_FROM_LAX_MATCH_ENVSENDER_PIPER, ///< `From [god@heaven.af.mil] Sat Jan  3 01:05:34 1996`
223   PREX_MBOX_FROM_LAX_MATCH_DOW,             ///< `From god@heaven.af.mil [Sat] Jan  3 01:05:34 1996`
224   PREX_MBOX_FROM_LAX_MATCH_MONTH,           ///< `From god@heaven.af.mil Sat [Jan]  3 01:05:34 1996`
225   PREX_MBOX_FROM_LAX_MATCH_DAY,             ///< `From god@heaven.af.mil Sat Jan [ 3] 01:05:34 1996`
226   PREX_MBOX_FROM_LAX_MATCH_DAY1,            ///< `From god@heaven.af.mil Sat Jan  [3] 01:05:34 1996`
227   PREX_MBOX_FROM_LAX_MATCH_DAY2,            ///< `From god@heaven.af.mil Sat Jan [10] 01:05:34 1996`
228   PREX_MBOX_FROM_LAX_MATCH_TIME,            ///< `From god@heaven.af.mil Sat Jan 10 [01:05:34] 1996`
229   PREX_MBOX_FROM_LAX_MATCH_TIME_SEC,        ///< `From god@heaven.af.mil Sat Jan 10 [01:05:34] 1996`
230   PREX_MBOX_FROM_LAX_MATCH_TIME_NOSEC,      ///< `From god@heaven.af.mil Sat Jan 10 [01:05] 1996`
231   PREX_MBOX_FROM_LAX_MATCH_TZ,              ///< `From god@heaven.af.mil Sat Jan 10 01:05:34 [MET DST] 1996`
232   PREX_MBOX_FROM_LAX_MATCH_YEAR,            ///< `From god@heaven.af.mil Sat Jan 10 01:05:34 [1996]`
233   PREX_MBOX_FROM_LAX_MATCH_YEAR_4DIG,       ///< `From god@heaven.af.mil Sat Jan 10 01:05:34 [1996]`
234   PREX_MBOX_FROM_LAX_MATCH_YEAR_2DIG,       ///< `From god@heaven.af.mil Sat Jan 10 01:05:34 [96]`
235   PREX_MBOX_FROM_LAX_MATCH_MAX
236 };
237 
238 regmatch_t *mutt_prex_capture(enum Prex which, const char *str);
239 void mutt_prex_free(void);
240 
241 #endif /* MUTT_LIB_PREX_H */
242