1 /*--------------------------------------------------------------------
2  *
3  *	Copyright (c) 1991-2021 by the GMT Team (https://www.generic-mapping-tools.org/team.html)
4  *	See LICENSE.TXT file for copying and redistribution conditions.
5  *
6  *	This program is free software; you can redistribute it and/or modify
7  *	it under the terms of the GNU Lesser General Public License as published by
8  *	the Free Software Foundation; version 3 or any later version.
9  *
10  *	This program is distributed in the hope that it will be useful,
11  *	but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *	GNU Lesser General Public License for more details.
14  *
15  *	Contact info: www.generic-mapping-tools.org
16  *--------------------------------------------------------------------*/
17 /*
18  *  Extended regular expression matching function.
19  *
20  * Author:	Florian Wobbe
21  * Date:	11-AUG-2010
22  * Version:	5.x
23  *
24  * PUBLIC functions:
25  *
26  * gmtlib_regexp_match:	Match a string against an extended regular expression
27  *
28  */
29 
30 #include "gmt_dev.h"
31 #include "gmt_internals.h"
32 
33 /*
34  * ERE pattern matching with PCRE2, PCRE or POSIX
35  */
36 #ifdef HAVE_PCRE
37 #include <pcre.h>
38 #define OVECCOUNT 30        /* should be a multiple of 3 */
39 #elif defined HAVE_PCRE2
40 #define PCRE2_CODE_UNIT_WIDTH 8
41 #include <pcre2.h>
42 #elif defined HAVE_POSIX_ERE
43 #include <regex.h>
44 #define MAX_ERR_LENGTH 80   /* max error message length */
45 #endif
46 
gmtlib_regexp_match(struct GMT_CTRL * GMT,const char * subject,const char * pattern,bool caseless)47 int gmtlib_regexp_match (struct GMT_CTRL *GMT, const char *subject, const char *pattern, bool caseless) {
48 /* Match string against the extended regular expression in pattern. Return 1 for match, 0 for no match.
49  * Returns a negative value for fatal errors. */
50 
51 #ifdef HAVE_PCRE
52 
53 	/* Use PCRE for matching
54 	 * Based on PCRE DEMONSTRATION PROGRAM pcredemo.c
55 	 */
56 	pcre *re;
57 	const char *error;
58 	int erroffset;
59 	int ovector[OVECCOUNT];
60 	int rc;
61 	int options = 0; /* default options */
62 
63 	/*************************************************************************
64 	 * Now we are going to compile the regular expression pattern, and handle *
65 	 * any errors that are detected.                                          *
66 	 *************************************************************************/
67 
68 	if (caseless) options = options|PCRE_CASELESS;      /* caseless matching */
69 
70 	re = pcre_compile(
71 			pattern,              /* the pattern */
72 			options,              /* options */
73 			&error,               /* for error message */
74 			&erroffset,           /* for error offset */
75 			NULL);                /* use default character tables */
76 
77 	/* Compilation failed: print the error message and exit */
78 
79 	if (re == NULL) {
80 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE compilation failed at offset %d: %s.\n", erroffset, error);
81 		return (-GMT_RUNTIME_ERROR);
82 	}
83 
84 	/*************************************************************************
85  	* If the compilation succeeded, we call PCRE again, in order to do a     *
86  	* pattern match against the subject string. This does just ONE match. If *
87  	* further matching is needed, it will be done below.                     *
88  	*************************************************************************/
89 
90 	rc = pcre_exec(
91 			re,                   /* the compiled pattern */
92 			NULL,                 /* no extra data - we didn't study the pattern */
93 			subject,              /* the subject string */
94 			(int)strlen(subject), /* the length of the subject */
95 			0,                    /* start at offset 0 in the subject */
96 			0,                    /* default options */
97 			ovector,              /* output vector for substring information */
98 			OVECCOUNT);           /* number of elements in the output vector */
99 
100 	/* Matching failed: handle error cases */
101 
102 	pcre_free(re);	/* Release memory used for the compiled pattern */
103 	if (rc < 0) {
104 		switch(rc) {
105 			case PCRE_ERROR_NOMATCH: break;
106 			/* Handle other special cases if you like */
107 			default:
108 				 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE matching error %d.\n", rc);
109 				 return (-GMT_RUNTIME_ERROR);
110 				 break;
111 		}
112 		return (0);	/* Match failed */
113 	}
114 
115 	return (1); /* Match succeeded */
116 
117 #elif defined HAVE_PCRE2
118 
119 	/* Use PCRE2 for matching
120 	 * Based on PCRE2 DEMONSTRATION PROGRAM pcre2demo.c
121 	 */
122 	pcre2_code *re;
123 	PCRE2_SIZE erroffset;
124 	pcre2_match_data *match_data;
125 	int errornumber;
126 	int rc;
127 	int options = 0; /* default options */
128 
129 	/*************************************************************************
130 	 * Now we are going to compile the regular expression pattern, and handle *
131 	 * any errors that are detected.                                          *
132 	 *************************************************************************/
133 
134 	if (caseless) options = options|PCRE2_CASELESS;      /* caseless matching */
135 
136 	re = pcre2_compile(
137 			(PCRE2_SPTR) pattern, /* the pattern */
138 			PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
139 			options,              /* options */
140 			&errornumber,         /* for error number */
141 			&erroffset,           /* for error offset */
142 			NULL);                /* use default compile context */
143 
144 	/* Compilation failed: print the error message and exit */
145 
146 	if (re == NULL) {
147 		PCRE2_UCHAR error[256];
148 		pcre2_get_error_message(errornumber, error, sizeof(error));
149 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE2 compilation failed at offset %d: %s.\n", erroffset, error);
150 		return (-GMT_RUNTIME_ERROR);
151 	}
152 
153 	/*************************************************************************
154  	* If the compilation succeeded, we call PCRE again, in order to do a     *
155  	* pattern match against the subject string. This does just ONE match. If *
156  	* further matching is needed, it will be done below. Before running the  *
157     * match we must set up a match_data block for holding the result.        *
158  	*************************************************************************/
159 
160 	/* Using this function ensures that the block is exactly the right size for
161 	the number of capturing parentheses in the pattern. */
162 
163 	match_data = pcre2_match_data_create_from_pattern(re, NULL);
164 
165 	rc = pcre2_match(
166 			re,                   /* the compiled pattern */
167 			(PCRE2_SPTR) subject, /* the subject string */
168 			(int)strlen(subject), /* the length of the subject */
169 			0,                    /* start at offset 0 in the subject */
170 			0,                    /* default options */
171 			match_data,           /* block for storing the result */
172 			NULL);                /* use default matching context */
173 
174 	/* Matching failed: handle error cases */
175 
176 	pcre2_code_free(re);                /* Release memory used for the compiled pattern */
177 	pcre2_match_data_free(match_data);  /* release memory for the match data */
178 	if (rc < 0) {
179 		switch(rc) {
180 			case PCRE2_ERROR_NOMATCH: break;
181 			/* Handle other special cases if you like */
182 			default:
183 				 GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: PCRE2 matching error %d.\n", rc);
184 				 return (-GMT_RUNTIME_ERROR);
185 				 break;
186 		}
187 		return (0);	/* Match failed */
188 	}
189 
190 	return (1); /* Match succeeded */
191 
192 #elif defined HAVE_POSIX_ERE
193 
194 	/* Use POSIX ERE for matching
195 	 * Based on the regcomp documentation */
196 	regex_t re;
197 	int cflags = REG_EXTENDED|REG_NOSUB;
198 	int status;
199 	char err_msg[MAX_ERR_LENGTH];
200 
201 	if ( caseless )
202 		cflags = cflags|REG_ICASE; /* caseless matching */
203 
204 	/* compile the RE */
205 	if ( (status = regcomp(&re, pattern, cflags)) != 0) {
206 		regerror(status, &re, err_msg, MAX_ERR_LENGTH);
207 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: POSIX ERE compilation failed: %s\n", err_msg);
208 		return (-GMT_RUNTIME_ERROR);
209 	}
210 
211 	/* execute the RE against the subject string */
212 	status = regexec(&re, subject, 0U, NULL, 0);
213 	regfree(&re);     /* Release memory used for the compiled pattern */
214 	if ( status == 0 )
215 		return (1); /* Match succeeded */
216 	else if ( status != REG_NOMATCH ) {
217 		/* this is when errors have been encountered */
218 		regerror(status, &re, err_msg, MAX_ERR_LENGTH);
219 		GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: POSIX ERE matching error: %s\n", err_msg); /* Report error. */
220 		return (-GMT_RUNTIME_ERROR);
221 	}
222 	return (0); /* No match */
223 
224 #else
225 
226 	/* disable ERE support */
227 	GMT_Report (GMT->parent, GMT_MSG_ERROR, "gmtlib_regexp_match: this GMT version was compiled without regular expression support.\n");
228 	return (-GMT_RUNTIME_ERROR);
229 
230 #endif
231 }
232