1 /*
2 ** Copyright (c) 2011 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 **   drh@hwaci.com
14 **   http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code used to pattern matching using "glob" syntax.
19 */
20 #include "config.h"
21 #include "glob.h"
22 #include <assert.h>
23 
24 /*
25 ** Construct and return a string which is an SQL expression that will
26 ** be TRUE if value zVal matches any of the GLOB expressions in the list
27 ** zGlobList.  For example:
28 **
29 **    zVal:       "x"
30 **    zGlobList:  "*.o,*.obj"
31 **
32 **    Result:     "(x GLOB '*.o' OR x GLOB '*.obj')"
33 **
34 ** Commas and whitespace are considered to be element delimters.  Each
35 ** element of the GLOB list may optionally be enclosed in either '...' or
36 ** "...".  This allows commas and/or whitespace to be used in the elements
37 ** themselves.
38 **
39 ** This routine makes no effort to free the memory space it uses, which
40 ** currently consists of a blob object and its contents.
41 */
glob_expr(const char * zVal,const char * zGlobList)42 char *glob_expr(const char *zVal, const char *zGlobList){
43   Blob expr;
44   const char *zSep = "(";
45   int nTerm = 0;
46   int i;
47   int cTerm;
48 
49   if( zGlobList==0 || zGlobList[0]==0 ) return fossil_strdup("0");
50   blob_zero(&expr);
51   while( zGlobList[0] ){
52     while( fossil_isspace(zGlobList[0]) || zGlobList[0]==',' ){
53       zGlobList++;  /* Skip leading commas, spaces, and newlines */
54     }
55     if( zGlobList[0]==0 ) break;
56     if( zGlobList[0]=='\'' || zGlobList[0]=='"' ){
57       cTerm = zGlobList[0];
58       zGlobList++;
59     }else{
60       cTerm = ',';
61     }
62     /* Find the next delimter (or the end of the string). */
63     for(i=0; zGlobList[i] && zGlobList[i]!=cTerm; i++){
64       if( cTerm!=',' ) continue; /* If quoted, keep going. */
65       if( fossil_isspace(zGlobList[i]) ) break; /* If space, stop. */
66     }
67     blob_appendf(&expr, "%s%s GLOB '%#q'", zSep, zVal, i, zGlobList);
68     zSep = " OR ";
69     if( cTerm!=',' && zGlobList[i] ) i++;
70     zGlobList += i;
71     if( zGlobList[0] ) zGlobList++;
72     nTerm++;
73   }
74   if( nTerm ){
75     blob_appendf(&expr, ")");
76     return blob_str(&expr);
77   }else{
78     return fossil_strdup("0");
79   }
80 }
81 
82 #if INTERFACE
83 /*
84 ** A Glob object holds a set of patterns read to be matched against
85 ** a string.
86 */
87 struct Glob {
88   int nPattern;        /* Number of patterns */
89   char **azPattern;    /* Array of pointers to patterns */
90 };
91 #endif /* INTERFACE */
92 
93 /*
94 ** zPatternList is a comma-separated list of glob patterns.  Parse up
95 ** that list and use it to create a new Glob object.
96 **
97 ** Elements of the glob list may be optionally enclosed in single our
98 ** double-quotes.  This allows a comma to be part of a glob pattern.
99 **
100 ** Leading and trailing spaces on unquoted glob patterns are ignored.
101 **
102 ** An empty or null pattern list results in a null glob, which will
103 ** match nothing.
104 */
glob_create(const char * zPatternList)105 Glob *glob_create(const char *zPatternList){
106   int nList;         /* Size of zPatternList in bytes */
107   int i;             /* Loop counters */
108   Glob *p;           /* The glob being created */
109   char *z;           /* Copy of the pattern list */
110   char delimiter;    /* '\'' or '\"' or 0 */
111 
112   if( zPatternList==0 || zPatternList[0]==0 ) return 0;
113   nList = strlen(zPatternList);
114   p = fossil_malloc( sizeof(*p) + nList+1 );
115   memset(p, 0, sizeof(*p));
116   z = (char*)&p[1];
117   memcpy(z, zPatternList, nList+1);
118   while( z[0] ){
119     while( fossil_isspace(z[0]) || z[0]==',' ){
120       z++;  /* Skip leading commas, spaces, and newlines */
121     }
122     if( z[0]==0 ) break;
123     if( z[0]=='\'' || z[0]=='"' ){
124       delimiter = z[0];
125       z++;
126     }else{
127       delimiter = ',';
128     }
129     p->azPattern = fossil_realloc(p->azPattern, (p->nPattern+1)*sizeof(char*) );
130     p->azPattern[p->nPattern++] = z;
131     /* Find the next delimter (or the end of the string). */
132     for(i=0; z[i] && z[i]!=delimiter; i++){
133       if( delimiter!=',' ) continue; /* If quoted, keep going. */
134       if( fossil_isspace(z[i]) ) break; /* If space, stop. */
135     }
136     if( z[i]==0 ) break;
137     z[i] = 0;
138     z += i+1;
139   }
140   return p;
141 }
142 
143 /*
144 ** Return true (non-zero) if zString matches any of the patterns in
145 ** the Glob.  The value returned is actually a 1-based index of the pattern
146 ** that matched.  Return 0 if none of the patterns match zString.
147 **
148 ** A NULL glob matches nothing.
149 */
glob_match(Glob * pGlob,const char * zString)150 int glob_match(Glob *pGlob, const char *zString){
151   int i;
152   if( pGlob==0 ) return 0;
153   for(i=0; i<pGlob->nPattern; i++){
154     if( sqlite3_strglob(pGlob->azPattern[i], zString)==0 ) return i+1;
155   }
156   return 0;
157 }
158 
159 /*
160 ** Free all memory associated with the given Glob object
161 */
glob_free(Glob * pGlob)162 void glob_free(Glob *pGlob){
163   if( pGlob ){
164     fossil_free(pGlob->azPattern);
165     fossil_free(pGlob);
166   }
167 }
168 
169 /*
170 ** Appends the given glob to the given buffer in the form of a
171 ** JS/JSON-compatible array. It requires that pDest have been
172 ** initialized. If pGlob is NULL or empty it emits [] (an empty
173 ** array).
174 */
glob_render_json_to_blob(Glob * pGlob,Blob * pDest)175 void glob_render_json_to_blob(Glob *pGlob, Blob *pDest){
176   int i = 0;
177   blob_append(pDest, "[", 1);
178   for( ; pGlob && i < pGlob->nPattern; ++i ){
179     if(i){
180       blob_append(pDest, ",", 1);
181     }
182     blob_appendf(pDest, "%!j", pGlob->azPattern[i]);
183   }
184   blob_append(pDest, "]", 1);
185 }
186 /*
187 ** Functionally equivalent to glob_render_json_to_blob()
188 ** but outputs via cgi_print().
189 */
glob_render_json_to_cgi(Glob * pGlob)190 void glob_render_json_to_cgi(Glob *pGlob){
191   int i = 0;
192   CX("[");
193   for( ; pGlob && i < pGlob->nPattern; ++i ){
194     if(i){
195       CX(",");
196     }
197     CX("%!j", pGlob->azPattern[i]);
198   }
199   CX("]");
200 }
201 
202 /*
203 ** COMMAND: test-glob
204 **
205 ** Usage:  %fossil test-glob PATTERN STRING...
206 **
207 ** PATTERN is a comma- and whitespace-separated list of optionally
208 ** quoted glob patterns.  Show which of the STRINGs that follow match
209 ** the PATTERN.
210 **
211 ** If PATTERN begins with "@" the rest of the pattern is understood
212 ** to be a setting name (such as binary-glob, crln-glob, or encoding-glob)
213 ** and the value of that setting is used as the actually glob pattern.
214 */
glob_test_cmd(void)215 void glob_test_cmd(void){
216   Glob *pGlob;
217   int i;
218   char *zPattern;
219   if( g.argc<4 ) usage("PATTERN STRING ...");
220   zPattern = g.argv[2];
221   if( zPattern[0]=='@' ){
222     db_find_and_open_repository(OPEN_ANY_SCHEMA,0);
223     zPattern = db_get(zPattern+1, 0);
224     if( zPattern==0 ) fossil_fatal("no such setting: %s", g.argv[2]+1);
225     fossil_print("GLOB pattern: %s\n", zPattern);
226   }
227   fossil_print("SQL expression: %s\n", glob_expr("x", zPattern));
228   pGlob = glob_create(zPattern);
229   for(i=0; i<pGlob->nPattern; i++){
230     fossil_print("pattern[%d] = [%s]\n", i, pGlob->azPattern[i]);
231   }
232   for(i=3; i<g.argc; i++){
233     fossil_print("%d %s\n", glob_match(pGlob, g.argv[i]), g.argv[i]);
234   }
235   glob_free(pGlob);
236 }
237