1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #include "ec_glob.h"
30
31 #include <stdlib.h>
32 #include <string.h>
33 #include <errno.h>
34
35 #include <regex.h>
36
37 struct numpair_s {
38 long min;
39 long max;
40 };
41
42 struct ec_glob_re {
43 char *str;
44 unsigned len;
45 unsigned capacity;
46 };
47
48 #ifndef EC_GLOB_STACK_CAPACITY
49 #define EC_GLOB_STACK_CAPACITY 64
50 #endif
51
52 static void ec_glob_pattern_increase_capacity(
struct ec_glob_re *re) {
53 unsigned newcap = re->capacity *
2;
54 char *newmem;
55 if (re->capacity ==
EC_GLOB_STACK_CAPACITY) {
56 newmem = malloc(newcap);
57 if (newmem ==
NULL) abort();
58 memcpy(newmem, re->str, re->len);
59 }
else {
60 newmem = realloc(re->str, newcap);
61 if (newmem ==
NULL) abort();
62 }
63 re->capacity = newcap;
64 re->str = newmem;
65 }
66
67 #define ec_glob_pattern_ensure_capacity(re, n) \
68 if (re.len + n > re.capacity) ec_glob_pattern_increase_capacity(&re)
69
70 #define ec_glob_cats(re, s) \
71 ec_glob_pattern_ensure_capacity(re,
sizeof(s) -
1); \
72 memcpy((re).str + (re).len, s,
sizeof(s) -
1); (re).len +=
sizeof(s)-
1
73
74 #define ec_glob_catc(re, c) \
75 ec_glob_pattern_ensure_capacity(re,
1); (re).str[(re).len++] = (c)
76
77
78 int ec_glob(
const char *pattern,
const char *string) {
79 char stack[
EC_GLOB_STACK_CAPACITY];
80 struct ec_glob_re re_pattern = {
81 stack,
1,
EC_GLOB_STACK_CAPACITY
82 };
83 re_pattern.str[
0] =
'^';
84
85 unsigned scanidx =
0;
86 unsigned inputlen = strlen(pattern);
87 char c;
88
89
90 const unsigned brace_stack_size =
32;
91 char brace_stack[brace_stack_size];
92 int depth_brace =
0;
93 _Bool braces_valid =
1;
94
95
96 for (
unsigned i =
0 ; i < inputlen ; i++) {
97
98 if (pattern[i] ==
'\\') {
99 i++;
100 }
else if (pattern[i] ==
'{') {
101 depth_brace++;
102 }
else if (pattern[i] ==
'}') {
103 if (depth_brace >
0) {
104 depth_brace--;
105 }
else {
106 braces_valid =
0;
107 break;
108 }
109 }
110 }
111 if (depth_brace >
0) {
112 braces_valid =
0;
113 depth_brace =
0;
114 }
115
116
117 const unsigned numrange_max =
32;
118 unsigned numrange_grp_count =
0;
119 unsigned numrange_grp_idx[numrange_max];
120 struct numpair_s numrange_pairs[numrange_max];
121 regmatch_t numrange_matches[numrange_max];
122
123
124
125 numrange_grp_idx[
0] =
0;
126
127
128 while (scanidx < inputlen) {
129 c = pattern[scanidx++];
130
131
132 if (c ==
'\\') {
133 if (strchr(
"?{}[]*\\-,", pattern[scanidx]) !=
NULL) {
134
135 if (strchr(
"?{}[]*\\", pattern[scanidx]) !=
NULL) {
136 ec_glob_catc(re_pattern,
'\\');
137 }
138 c = pattern[scanidx++];
139 ec_glob_catc(re_pattern, c);
140 }
141
142 else {
143 ec_glob_cats(re_pattern,
"\\\\");
144 }
145 }
146
147 else if (c ==
'*') {
148
149 if (pattern[scanidx] ==
'*') {
150 scanidx++;
151
152 if (pattern[scanidx] ==
'/' && scanidx >=
3
153 && pattern[scanidx -
3] ==
'/') {
154
155 scanidx++;
156 }
157 ec_glob_cats(re_pattern,
".*");
158 }
else {
159 ec_glob_cats(re_pattern,
"[^/]*");
160 }
161 }
162
163 else if (c ==
'?') {
164 ec_glob_catc(re_pattern,
'.');
165 }
166
167 else if (c ==
'{') {
168
169 if (!braces_valid) {
170 ec_glob_cats(re_pattern,
"\\{");
171 continue;
172 }
173
174
175 depth_brace++;
176 if (depth_brace > brace_stack_size) {
177
178 ec_glob_cats(re_pattern,
"\\{");
179 continue;
180 }
181
182
183 _Bool single =
1;
184 _Bool dotdot = strchr(
"+-0123456789", pattern[scanidx]) !=
NULL;
185 _Bool dotdot_seen =
0;
186 for (
unsigned fw = scanidx; fw < inputlen; fw++) {
187 if (pattern[fw] ==
',') {
188 single =
0;
189 dotdot =
0;
190 break;
191 }
192 else if (pattern[fw] ==
'}') {
193
194 if (dotdot) {
195 _Bool ok =
1;
196 unsigned ngc = numrange_grp_count;
197 char *chk;
198 errno =
0;
199 numrange_pairs[ngc].min = strtol(
200 &pattern[scanidx], &chk,
10);
201 ok &= *chk ==
'.' &&
0 == errno;
202 numrange_pairs[ngc].max = strtol(
203 strrchr(&pattern[scanidx],
'.')+
1, &chk,
10);
204 ok &= *chk ==
'}' &&
0 == errno;
205 if (ok) {
206
207 single =
0;
208
209 scanidx = fw+
1;
210 }
else {
211
212 dotdot =
0;
213 }
214 }
215 break;
216 }
else if (dotdot) {
217
218 if (pattern[fw] ==
'.') {
219 if (!dotdot_seen &&
220 fw +
2 < inputlen && pattern[fw +
1] ==
'.' &&
221 strchr(
"+-0123456789", pattern[fw +
2]) !=
NULL) {
222 fw +=
2;
223 dotdot_seen =
1;
224 }
else {
225 dotdot =
0;
226 }
227 }
228
229 else if (!strchr(
"0123456789", pattern[fw])) {
230 dotdot =
0;
231 }
232 }
233 }
234
235 if (single) {
236
237 ec_glob_cats(re_pattern,
"\\{");
238 brace_stack[depth_brace-
1] =
'}';
239 }
else {
240
241 ec_glob_catc(re_pattern,
'(');
242
243
244 numrange_grp_idx[numrange_grp_count]++;
245
246 if (dotdot) {
247
248 ec_glob_cats(re_pattern,
"[-+]?[0-9]+)");
249
250
251 numrange_grp_count++;
252 if (numrange_grp_count < numrange_max) {
253 numrange_grp_idx[numrange_grp_count] =
254 numrange_grp_idx[numrange_grp_count -
1];
255 }
256
257 depth_brace--;
258 }
else {
259
260 brace_stack[depth_brace -
1] =
')';
261 }
262 }
263 }
264
265 else if (depth_brace >
0 && c ==
'}') {
266 depth_brace--;
267 if (depth_brace < brace_stack_size
268 && brace_stack[depth_brace] ==
')') {
269 ec_glob_catc(re_pattern,
')');
270 }
else {
271 ec_glob_cats(re_pattern,
"\\}");
272 }
273 }
274
275 else if (depth_brace >
0 && c ==
',') {
276 ec_glob_catc(re_pattern,
'|');
277 }
278
279 else if (c ==
'[') {
280
281 _Bool valid =
0;
282 _Bool closing_bracket_literal =
0;
283 unsigned newidx;
284 for (
unsigned fw = scanidx ; fw < inputlen ; fw++) {
285 if (pattern[fw] ==
']') {
286
287 if (fw == scanidx) {
288
289 closing_bracket_literal =
1;
290 }
else {
291 valid =
1;
292 newidx = fw+
1;
293 break;
294 }
295 }
else if (pattern[fw] ==
'/') {
296
297
298 break;
299 }
else if (pattern[fw] ==
'\\') {
300
301 fw++;
302 closing_bracket_literal |= pattern[fw] ==
']';
303 }
304 }
305
306
307 if (valid) {
308
309 if (pattern[scanidx] ==
'!') {
310 scanidx++;
311 ec_glob_cats(re_pattern,
"[^");
312 }
else {
313 ec_glob_catc(re_pattern,
'[');
314 }
315
316
317 if (closing_bracket_literal) {
318 ec_glob_catc(re_pattern,
']');
319
320
321 if (pattern[scanidx] ==
'-') {
322 scanidx++;
323 }
324 }
325
326
327
328
329 for (
unsigned fw = scanidx ; ; fw++) {
330 if (pattern[fw] ==
'\\') {
331
332 continue;
333 }
334
335 else if (pattern[fw] ==
']') {
336 if (fw > scanidx && pattern[fw-
1] !=
'\\') {
337 break;
338 }
339 }
340
341 else {
342 if (strchr(
".(){}[]", pattern[fw]) !=
NULL) {
343 ec_glob_catc(re_pattern,
'\\');
344 }
345 ec_glob_catc(re_pattern, pattern[fw]);
346 }
347 }
348
349
350 if (pattern[scanidx-
1] ==
'-') {
351 ec_glob_cats(re_pattern,
"-]");
352 }
else {
353 ec_glob_catc(re_pattern,
']');
354 }
355 scanidx = newidx;
356 }
else {
357
358 ec_glob_cats(re_pattern,
"\\[");
359 }
360 }
361
362 else if (strchr(
".(){}[]", c) !=
NULL) {
363 ec_glob_catc(re_pattern,
'\\');
364 ec_glob_catc(re_pattern, c);
365 }
366
367 else {
368 ec_glob_catc(re_pattern, c);
369 }
370 }
371
372
373 ec_glob_catc(re_pattern,
'$');
374 ec_glob_catc(re_pattern,
'\0');
375
376
377
378 regex_t re;
379 int status;
380 int flags =
REG_EXTENDED;
381
382
383 if (numrange_grp_count ==
0) {
384 flags |=
REG_NOSUB;
385 }
386
387 if ((status = regcomp(&re, re_pattern.str, flags)) ==
0) {
388 status = regexec(&re, string, numrange_max, numrange_matches,
0);
389
390
391 for (
unsigned i =
0 ; status ==
0 && i < numrange_grp_count ; i++) {
392 regmatch_t nm = numrange_matches[numrange_grp_idx[i]];
393 int nmlen = nm.rm_eo-nm.rm_so;
394 char *nmatch = malloc(nmlen+
1);
395 memcpy(nmatch, string+nm.rm_so, nmlen);
396 nmatch[nmlen] =
'\0';
397 errno =
0;
398 char *chk;
399 long num = strtol(nmatch, &chk,
10);
400 if (*chk ==
'\0' &&
0 == errno) {
401
402 status |= !(numrange_pairs[i].min <= num
403 && num <= numrange_pairs[i].max);
404 }
else {
405
406 status =
1;
407 }
408 free(nmatch);
409 }
410 regfree(&re);
411 }
412
413 if (re_pattern.capacity >
EC_GLOB_STACK_CAPACITY) {
414 free(re_pattern.str);
415 }
416
417 return status;
418 }
419