1 /*--------------------------------------------------------------------
2 *
3 * Copyright (c) 1991-2021 by the GMT Team (https://www.generic-mapping-tools.org/team.html)
4 * See LICENSE.TXT file for copying and redistribution conditions.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; version 3 or any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * Contact info: www.generic-mapping-tools.org
16 *--------------------------------------------------------------------*/
17 /*
18 * Extended regular expression matching function.
19 *
20 * Author: Florian Wobbe
21 * Date: 11-AUG-2010
22 * Version: 5.x
23 *
24 * PUBLIC functions:
25 *
26 * gmtlib_regexp_match: Match a string against an extended regular expression
27 *
28 */
29
30 #include "gmt_dev.h"
31 #include "gmt_internals.h"
32
33 /*
34 * ERE pattern matching with PCRE2, PCRE or POSIX
35 */
36 #ifdef HAVE_PCRE
37 #include <pcre.h>
38 #define OVECCOUNT 30 /* should be a multiple of 3 */
39 #elif defined HAVE_PCRE2
40 #define PCRE2_CODE_UNIT_WIDTH 8
41 #include <pcre2.h>
42 #elif defined HAVE_POSIX_ERE
43 #include <regex.h>
44 #define MAX_ERR_LENGTH 80 /* max error message length */
45 #endif
46
gmtlib_regexp_match(struct GMT_CTRL * GMT,const char * subject,const char * pattern,bool caseless)47 int gmtlib_regexp_match (struct GMT_CTRL *GMT, const char *subject, const char *pattern, bool caseless) {
48 /* Match string against the extended regular expression in pattern. Return 1 for match, 0 for no match.
49 * Returns a negative value for fatal errors. */
50
51 #ifdef HAVE_PCRE
52
53 /* Use PCRE for matching
54 * Based on PCRE DEMONSTRATION PROGRAM pcredemo.c
55 */
56 pcre *re;
57 const char *error;
58 int erroffset;
59 int ovector[OVECCOUNT];
60 int rc;
61 int options = 0; /* default options */
62
63 /*************************************************************************
64 * Now we are going to compile the regular expression pattern, and handle *
65 * any errors that are detected. *
66 *************************************************************************/
67
68 if (caseless) options = options|PCRE_CASELESS; /* caseless matching */
69
70 re = pcre_compile(
71 pattern, /* the pattern */
72 options, /* options */
73 &error, /* for error message */
74 &erroffset, /* for error offset */
75 NULL); /* use default character tables */
76
77 /* Compilation failed: print the error message and exit */
78
79 if (re == NULL) {
80 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE compilation failed at offset %d: %s.\n", erroffset, error);
81 return (-GMT_RUNTIME_ERROR);
82 }
83
84 /*************************************************************************
85 * If the compilation succeeded, we call PCRE again, in order to do a *
86 * pattern match against the subject string. This does just ONE match. If *
87 * further matching is needed, it will be done below. *
88 *************************************************************************/
89
90 rc = pcre_exec(
91 re, /* the compiled pattern */
92 NULL, /* no extra data - we didn't study the pattern */
93 subject, /* the subject string */
94 (int)strlen(subject), /* the length of the subject */
95 0, /* start at offset 0 in the subject */
96 0, /* default options */
97 ovector, /* output vector for substring information */
98 OVECCOUNT); /* number of elements in the output vector */
99
100 /* Matching failed: handle error cases */
101
102 pcre_free(re); /* Release memory used for the compiled pattern */
103 if (rc < 0) {
104 switch(rc) {
105 case PCRE_ERROR_NOMATCH: break;
106 /* Handle other special cases if you like */
107 default:
108 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE matching error %d.\n", rc);
109 return (-GMT_RUNTIME_ERROR);
110 break;
111 }
112 return (0); /* Match failed */
113 }
114
115 return (1); /* Match succeeded */
116
117 #elif defined HAVE_PCRE2
118
119 /* Use PCRE2 for matching
120 * Based on PCRE2 DEMONSTRATION PROGRAM pcre2demo.c
121 */
122 pcre2_code *re;
123 PCRE2_SIZE erroffset;
124 pcre2_match_data *match_data;
125 int errornumber;
126 int rc;
127 int options = 0; /* default options */
128
129 /*************************************************************************
130 * Now we are going to compile the regular expression pattern, and handle *
131 * any errors that are detected. *
132 *************************************************************************/
133
134 if (caseless) options = options|PCRE2_CASELESS; /* caseless matching */
135
136 re = pcre2_compile(
137 (PCRE2_SPTR) pattern, /* the pattern */
138 PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
139 options, /* options */
140 &errornumber, /* for error number */
141 &erroffset, /* for error offset */
142 NULL); /* use default compile context */
143
144 /* Compilation failed: print the error message and exit */
145
146 if (re == NULL) {
147 PCRE2_UCHAR error[256];
148 pcre2_get_error_message(errornumber, error, sizeof(error));
149 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE2 compilation failed at offset %d: %s.\n", erroffset, error);
150 return (-GMT_RUNTIME_ERROR);
151 }
152
153 /*************************************************************************
154 * If the compilation succeeded, we call PCRE again, in order to do a *
155 * pattern match against the subject string. This does just ONE match. If *
156 * further matching is needed, it will be done below. Before running the *
157 * match we must set up a match_data block for holding the result. *
158 *************************************************************************/
159
160 /* Using this function ensures that the block is exactly the right size for
161 the number of capturing parentheses in the pattern. */
162
163 match_data = pcre2_match_data_create_from_pattern(re, NULL);
164
165 rc = pcre2_match(
166 re, /* the compiled pattern */
167 (PCRE2_SPTR) subject, /* the subject string */
168 (int)strlen(subject), /* the length of the subject */
169 0, /* start at offset 0 in the subject */
170 0, /* default options */
171 match_data, /* block for storing the result */
172 NULL); /* use default matching context */
173
174 /* Matching failed: handle error cases */
175
176 pcre2_code_free(re); /* Release memory used for the compiled pattern */
177 pcre2_match_data_free(match_data); /* release memory for the match data */
178 if (rc < 0) {
179 switch(rc) {
180 case PCRE2_ERROR_NOMATCH: break;
181 /* Handle other special cases if you like */
182 default:
183 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE2 matching error %d.\n", rc);
184 return (-GMT_RUNTIME_ERROR);
185 break;
186 }
187 return (0); /* Match failed */
188 }
189
190 return (1); /* Match succeeded */
191
192 #elif defined HAVE_POSIX_ERE
193
194 /* Use POSIX ERE for matching
195 * Based on the regcomp documentation */
196 regex_t re;
197 int cflags = REG_EXTENDED|REG_NOSUB;
198 int status;
199 char err_msg[MAX_ERR_LENGTH];
200
201 if ( caseless )
202 cflags = cflags|REG_ICASE; /* caseless matching */
203
204 /* compile the RE */
205 if ( (status = regcomp(&re, pattern, cflags)) != 0) {
206 regerror(status, &re, err_msg, MAX_ERR_LENGTH);
207 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: POSIX ERE compilation failed: %s\n", err_msg);
208 return (-GMT_RUNTIME_ERROR);
209 }
210
211 /* execute the RE against the subject string */
212 status = regexec(&re, subject, 0U, NULL, 0);
213 regfree(&re); /* Release memory used for the compiled pattern */
214 if ( status == 0 )
215 return (1); /* Match succeeded */
216 else if ( status != REG_NOMATCH ) {
217 /* this is when errors have been encountered */
218 regerror(status, &re, err_msg, MAX_ERR_LENGTH);
219 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: POSIX ERE matching error: %s\n", err_msg); /* Report error. */
220 return (-GMT_RUNTIME_ERROR);
221 }
222 return (0); /* No match */
223
224 #else
225
226 /* disable ERE support */
227 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: this GMT version was compiled without regular expression support.\n");
228 return (-GMT_RUNTIME_ERROR);
229
230 #endif
231 }
232