1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 #ifdef HAVE_CONFIG_H
46 #include "../config.h"
47 #endif
48
49 #include "regexConvert.h"
50 #include "../util/nedit_malloc.h"
51
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <ctype.h>
56 #include <limits.h>
57
58 #include <X11/Intrinsic.h>
59
60 #ifdef HAVE_DEBUG_H
61 #include "../debug.h"
62 #endif
63
64
65
66
67 #define NSUBEXP 50
68
69 #define CONVERT_FAIL(m) {*Error_Ptr = (m);
return 0;}
70 #define IS_QUANTIFIER(c) ((c) ==
'*' || (c) ==
'+' || (c) ==
'?')
71 #define U_CHAR_AT(p) ((
unsigned int) *(
unsigned char *)(p))
72
73
74
75 #define WORST 0
76 #define HAS_WIDTH 1
77 #define SIMPLE 2
78
79 #define NO_PAREN 0
80 #define PAREN 1
81
82 #define REG_ZERO 0UL
83 #define REG_ONE 1UL
84
85
86
87 static unsigned char *Reg_Parse;
88 static int Total_Paren;
89 static unsigned long Convert_Size;
90 static unsigned char *Code_Emit_Ptr;
91
92
93
94
95
96
97 static unsigned char Compute_Size;
98 static char **Error_Ptr;
99
100 static char Error_Text [
128];
101
102 static unsigned char Meta_Char [] =
".*+?[(|)^<>$";
103
104 static unsigned char *Convert_Str;
105
106
107
108 static int alternative (
int *flag_param);
109 static int chunk (
int paren,
int *flag_param);
110 static void emit_convert_byte (
unsigned char c);
111 static unsigned char literal_escape (
unsigned char c,
int);
112 static int atom (
int *flag_param);
113 static void reg_error (
char *str);
114 static int piece (
int *flag_param);
115
116
117
118
119
120
121
122
123
124
125
126 char * ConvertRE (
const char *exp,
char **errorText) {
127
128 int flags_local, pass;
129
130
131
132 Error_Ptr = errorText;
133 *Error_Ptr =
"";
134
135 if (exp ==
NULL)
CONVERT_FAIL (
"NULL argument to `ConvertRE\'");
136
137 Code_Emit_Ptr = &Compute_Size;
138 Convert_Size =
0UL;
139
140
141
142
143
144
145
146
147
148
149 for (pass =
1; pass <=
2; pass++) {
150
151
152
153
154
155 Reg_Parse = (
unsigned char *) exp;
156 Total_Paren =
1;
157
158 if (chunk (
NO_PAREN, &flags_local) ==
0)
return (
NULL);
159
160 emit_convert_byte (
'\0');
161
162 if (pass ==
1) {
163
164
165 Convert_Str =
166 (
unsigned char *) malloc(
sizeof (
unsigned char) * Convert_Size);
167
168 if (Convert_Str ==
NULL) {
169 CONVERT_FAIL (
"out of memory in `ConvertRE\'");
170 }
171
172 Code_Emit_Ptr = Convert_Str;
173 }
174 }
175
176 return (
char *) Convert_Str;
177 }
178
179
180
181
182
183
184
185
186
187 static int chunk (
int paren,
int *flag_param) {
188
189 register int this_branch;
190 int flags_local;
191
192 *flag_param =
HAS_WIDTH;
193
194
195
196 if (paren ==
PAREN) {
197 if (Total_Paren >=
NSUBEXP) {
198 sprintf (Error_Text,
"number of ()''s > %d", (
int)
NSUBEXP);
199 CONVERT_FAIL (Error_Text);
200 }
201
202 Total_Paren++;
203 }
204
205
206
207 do {
208 this_branch = alternative (&flags_local);
209
210 if (this_branch ==
0)
return 0;
211
212
213
214
215 if (!(flags_local &
HAS_WIDTH)) *flag_param &= ~
HAS_WIDTH;
216
217
218
219 if (*Reg_Parse !=
'|')
break;
220
221 emit_convert_byte (
'|');
222
223 Reg_Parse++;
224 }
while (
1);
225
226
227
228 if (paren !=
NO_PAREN && *Reg_Parse !=
')') {
229 CONVERT_FAIL (
"missing right parenthesis \')\'");
230
231 }
else if (paren !=
NO_PAREN) {
232 emit_convert_byte (
')');
233 Reg_Parse++;
234
235 }
else if (paren ==
NO_PAREN && *Reg_Parse !=
'\0') {
236 if (*Reg_Parse ==
')') {
237 CONVERT_FAIL (
"missing left parenthesis \'(\'");
238 }
else {
239 CONVERT_FAIL (
"junk on end");
240 }
241 }
242
243 return 1;
244 }
245
246
247
248
249
250 static int alternative (
int *flag_param) {
251
252 int ret_val;
253 int flags_local;
254
255 *flag_param =
WORST;
256
257
258
259
260 while (*Reg_Parse !=
'|' && *Reg_Parse !=
')' && *Reg_Parse !=
'\0') {
261 ret_val = piece (&flags_local);
262
263 if (ret_val ==
0)
return 0;
264
265 *flag_param |= flags_local &
HAS_WIDTH;
266 }
267
268 return 1;
269 }
270
271
272
273
274
275 static int piece (
int *flag_param) {
276
277 register int ret_val;
278 register unsigned char op_code;
279 unsigned long min_val =
REG_ZERO;
280 int flags_local;
281
282 ret_val = atom (&flags_local);
283
284 if (ret_val ==
0)
return 0;
285
286 op_code = *Reg_Parse;
287
288 if (!
IS_QUANTIFIER (op_code)) {
289 *flag_param = flags_local;
290
291 return (ret_val);
292 }
293
294 Reg_Parse++;
295
296 if (op_code ==
'+') min_val =
REG_ONE;
297
298
299
300
301 if (!(flags_local &
HAS_WIDTH) && min_val >
REG_ZERO) {
302 sprintf (Error_Text,
"%c operand could be empty", op_code);
303
304 CONVERT_FAIL (Error_Text);
305 }
306
307 *flag_param = (min_val >
REG_ZERO) ? (
WORST |
HAS_WIDTH) :
WORST;
308
309 if ( !((op_code ==
'*') || (op_code ==
'+') || (op_code ==
'?')) ) {
310
311
312
313 CONVERT_FAIL (
"internal error #2, `piece\'");
314 }
315
316 if (
IS_QUANTIFIER (*Reg_Parse)) {
317 sprintf (Error_Text,
"nested quantifiers, %c%c", op_code, *Reg_Parse);
318
319 CONVERT_FAIL (Error_Text);
320 }
321
322 emit_convert_byte (op_code);
323
324 return (ret_val);
325 }
326
327
328
329
330
331 static int atom (
int *flag_param) {
332 int ret_val =
1;
333 unsigned char test;
334 int flags_local;
335
336 *flag_param =
WORST;
337
338 switch (*Reg_Parse++) {
339 case '^':
340 emit_convert_byte (
'^');
341 break;
342
343 case '$':
344 emit_convert_byte (
'$');
345 break;
346
347 case '<':
348 emit_convert_byte (
'<');
349 break;
350
351 case '>':
352 emit_convert_byte (
'>');
353 break;
354
355 case '.':
356 emit_convert_byte (
'.');
357
358 *flag_param |= (
HAS_WIDTH |
SIMPLE);
break;
359
360 case '(':
361 emit_convert_byte (
'(');
362
363 ret_val = chunk (
PAREN, &flags_local);
364
365 if (ret_val ==
0)
return 0;
366
367
368
369 *flag_param |= flags_local &
HAS_WIDTH;
370
371 break;
372
373 case '\0':
374 case '|':
375 case ')':
376 CONVERT_FAIL (
"internal error #3, `atom\'");
377
378 case '?':
379 case '+':
380 case '*':
381 sprintf (Error_Text,
"%c follows nothing", *(Reg_Parse -
1));
382 CONVERT_FAIL (Error_Text);
383
384 case '{':
385 emit_convert_byte (
'\\');
386 emit_convert_byte (
'{');
387
388 break;
389
390 case '[':
391 {
392 register unsigned int last_value;
393 unsigned char last_emit =
0;
394 unsigned char buffer [
500];
395 int head =
0;
396 int negated =
0;
397 int do_brackets =
1;
398 int a_z_flag =
0;
399 int A_Z_flag =
0;
400 int zero_nine =
0;
401 int u_score_flag =
0;
402
403 buffer [
0] =
'\0';
404
405
406
407 if (*Reg_Parse ==
'^') {
408 negated =
1;
409
410 Reg_Parse++;
411 }
412
413 if (*Reg_Parse ==
']' || *Reg_Parse ==
'-') {
414
415
416
417 last_emit = *Reg_Parse;
418
419 if (head >=
498) {
420 CONVERT_FAIL (
"too much data in [] to convert.");
421 }
422
423 buffer [head++] =
'\\';
424 buffer [head++] = *Reg_Parse;
425
426 Reg_Parse++;
427 }
428
429
430
431 while (*Reg_Parse !=
'\0' && *Reg_Parse !=
']') {
432 if (*Reg_Parse ==
'-') {
433 Reg_Parse++;
434
435 if (*Reg_Parse ==
']' || *Reg_Parse ==
'\0') {
436
437
438
439
440 last_emit =
'-';
441
442 if (head >=
498) {
443 CONVERT_FAIL (
"too much data in [] to convert.");
444 }
445
446 buffer [head++] =
'\\';
447 buffer [head++] =
'-';
448
449 }
else {
450 if (*Reg_Parse ==
'\\') {
451
452
453 Reg_Parse++;
454
455 if ((test = literal_escape (*Reg_Parse,
0))) {
456
457 buffer [head++] =
'-';
458
459 if (*Reg_Parse !=
'\"') {
460 emit_convert_byte (
'\\');
461 }
462
463 buffer [head++] = *Reg_Parse;
464 last_value = (
unsigned int) test;
465 }
else {
466 sprintf (
467 Error_Text,
468 "\\%c is an invalid escape sequence(3)",
469 *Reg_Parse);
470
471 CONVERT_FAIL (Error_Text);
472 }
473 }
else {
474 last_value =
U_CHAR_AT (Reg_Parse);
475
476 if (last_emit ==
'0' && last_value ==
'9') {
477 zero_nine =
1;
478 head--;
479 }
else if (last_emit ==
'a' && last_value ==
'z') {
480 a_z_flag =
1;
481 head--;
482 }
else if (last_emit ==
'A' && last_value ==
'Z') {
483 A_Z_flag =
1;
484 head--;
485 }
else {
486 buffer [head++] =
'-';
487
488 if ((test = literal_escape (*Reg_Parse,
1))) {
489
490
491
492 if (head >=
495) {
493 CONVERT_FAIL (
494 "too much data in [] to convert.");
495 }
496
497 buffer [head++] =
'\\';
498
499 if (test ==
'0') {
500 test = *Reg_Parse;
501 buffer [head++] =
'0';
502 buffer [head++] = (
'0' + (test /
64));
503 test -= (test /
64) *
64;
504 buffer [head++] = (
'0' + (test /
8));
505 test -= (test /
8) *
8;
506 buffer [head++] = (
'0' + test);
507 }
else {
508 buffer [head++] = test;
509 }
510 }
else {
511 buffer [head++] = last_value;
512 }
513 }
514 }
515
516 if (last_emit > last_value) {
517 CONVERT_FAIL (
"invalid [] range");
518 }
519
520 last_emit = (
unsigned char) last_value;
521
522 Reg_Parse++;
523
524 }
525 }
else if (*Reg_Parse ==
'\\') {
526 Reg_Parse++;
527
528 if ((test = literal_escape (*Reg_Parse,
0)) !=
'\0') {
529 last_emit = test;
530
531 if (head >=
498) {
532 CONVERT_FAIL (
"too much data in [] to convert.");
533 }
534
535 if (*Reg_Parse !=
'\"') {
536 buffer [head++] =
'\\';
537 }
538
539 buffer [head++] = *Reg_Parse;
540
541 }
else {
542 sprintf (Error_Text,
543 "\\%c is an invalid escape sequence(1)",
544 *Reg_Parse);
545
546 CONVERT_FAIL (Error_Text);
547 }
548
549 Reg_Parse++;
550
551
552 }
else {
553 last_emit = *Reg_Parse;
554
555 if (*Reg_Parse ==
'_') {
556 u_score_flag =
1;
557
558 }
else if ((test = literal_escape (*Reg_Parse,
1))) {
559
560
561
562 if (head >=
495) {
563 CONVERT_FAIL (
"too much data in [] to convert.");
564 }
565
566 buffer [head++] =
'\\';
567
568 if (test ==
'0') {
569 test = *Reg_Parse;
570 buffer [head++] =
'0';
571 buffer [head++] = (
'0' + (test /
64));
572 test -= (test /
64) *
64;
573 buffer [head++] = (
'0' + (test /
8));
574 test -= (test /
8) *
8;
575 buffer [head++] = (
'0' + test);
576 }
else {
577 if (head >=
499) {
578 CONVERT_FAIL (
"too much data in [] to convert.");
579 }
580
581 buffer [head++] = test;
582 }
583 }
else {
584 if (head >=
499) {
585 CONVERT_FAIL (
"too much data in [] to convert.");
586 }
587
588 buffer [head++] = *Reg_Parse;
589 }
590
591 Reg_Parse++;
592 }
593 }
594
595 if (*Reg_Parse !=
']')
CONVERT_FAIL (
"missing right \']\'");
596
597 buffer [head] =
'\0';
598
599
600
601
602
603
604
605 Reg_Parse++; *flag_param |=
HAS_WIDTH |
SIMPLE;
606
607 if (head ==
0) {
608 if (( a_z_flag && A_Z_flag && zero_nine && u_score_flag) ||
609 ( a_z_flag && A_Z_flag && !zero_nine && !u_score_flag) ||
610 (!a_z_flag && !A_Z_flag && zero_nine && !u_score_flag)) {
611
612 do_brackets =
0;
613 }
614 }
615
616 if (do_brackets) {
617 emit_convert_byte (
'[');
618 if (negated) emit_convert_byte (
'^');
619 }
620
621
622
623 while (a_z_flag || A_Z_flag || zero_nine || u_score_flag) {
624 if (a_z_flag && A_Z_flag && zero_nine && u_score_flag) {
625 emit_convert_byte (
'\\');
626
627 if (negated && !do_brackets) {
628 emit_convert_byte (
'W');
629 }
else {
630 emit_convert_byte (
'w');
631 }
632
633 a_z_flag = A_Z_flag = zero_nine = u_score_flag =
0;
634 }
else if (a_z_flag && A_Z_flag) {
635 emit_convert_byte (
'\\');
636
637 if (negated && !do_brackets) {
638 emit_convert_byte (
'L');
639 }
else {
640 emit_convert_byte (
'l');
641 }
642
643 a_z_flag = A_Z_flag =
0;
644 }
else if (zero_nine) {
645 emit_convert_byte (
'\\');
646
647 if (negated && !do_brackets) {
648 emit_convert_byte (
'D');
649 }
else {
650 emit_convert_byte (
'd');
651 }
652
653 zero_nine =
0;
654 }
else if (a_z_flag) {
655 emit_convert_byte (
'a');
656 emit_convert_byte (
'-');
657 emit_convert_byte (
'z');
658
659 a_z_flag =
0;
660 }
else if (A_Z_flag) {
661 emit_convert_byte (
'A');
662 emit_convert_byte (
'-');
663 emit_convert_byte (
'Z');
664
665 A_Z_flag =
0;
666 }
else if (u_score_flag) {
667 emit_convert_byte (
'_');
668
669 u_score_flag =
0;
670 }
671 }
672
673
674
675 for (head =
0; buffer [head] !=
'\0'; head++) {
676 emit_convert_byte (buffer [head]);
677 }
678
679 if (do_brackets) {
680 emit_convert_byte (
']');
681 }
682 }
683
684 break;
685
686
687
688 default:
689 Reg_Parse--;
690
691 {
692 unsigned char *parse_save, *emit_save;
693 int emit_diff, len =
0;
694
695
696
697 for (; *Reg_Parse !=
'\0' &&
698 !strchr ((
char *) Meta_Char, (
int) *Reg_Parse);
699 len++) {
700
701
702
703
704 parse_save = Reg_Parse;
705 emit_save = Code_Emit_Ptr;
706
707 if (*Reg_Parse ==
'\\') {
708 if ((test = literal_escape (*(Reg_Parse +
1),
0))) {
709 if (*(Reg_Parse +
1) !=
'\"') {
710 emit_convert_byte (
'\\');
711 }
712
713 Reg_Parse++;
714 emit_convert_byte (*Reg_Parse);
715
716 }
else {
717 sprintf (Error_Text,
718 "\\%c is an invalid escape sequence(2)",
719 *(Reg_Parse +
1));
720
721 CONVERT_FAIL (Error_Text);
722 }
723
724 Reg_Parse++;
725 }
else {
726
727
728 if ((test = literal_escape (*Reg_Parse,
1))) {
729
730
731
732 emit_convert_byte (
'\\');
733
734 if (test ==
'0') {
735 test = *Reg_Parse;
736 emit_convert_byte (
'0');
737 emit_convert_byte (
'0' + (test /
64));
738 test -= (test /
64) *
64;
739 emit_convert_byte (
'0' + (test /
8));
740 test -= (test /
8) *
8;
741 emit_convert_byte (
'0' + test);
742 }
else {
743 emit_convert_byte (test);
744 }
745 }
else {
746 emit_convert_byte (*Reg_Parse);
747 }
748
749 Reg_Parse++;
750 }
751
752
753
754
755
756
757
758
759 if (
IS_QUANTIFIER (*Reg_Parse) && len >
0) {
760 Reg_Parse = parse_save;
761 emit_diff = (Code_Emit_Ptr - emit_save);
762
763 if (Code_Emit_Ptr == &Compute_Size) {
764 Convert_Size -= emit_diff;
765 }
else {
766 Code_Emit_Ptr = emit_save;
767 }
768
769 break;
770 }
771 }
772
773 if (len <=
0)
CONVERT_FAIL (
"internal error #4, `atom\'");
774
775 *flag_param |=
HAS_WIDTH;
776
777 if (len ==
1) *flag_param |=
SIMPLE;
778 }
779 }
780
781 return (ret_val);
782 }
783
784
785
786
787
788
789
790 static void emit_convert_byte (
unsigned char c) {
791
792 if (Code_Emit_Ptr == &Compute_Size) {
793 Convert_Size++;
794 }
else {
795 *Code_Emit_Ptr++ = c;
796 }
797 }
798
799
800
801
802
803
804
805
806
807
808
809 static unsigned char literal_escape (
unsigned char c,
int action) {
810
811 static unsigned char control_escape [] = {
812 'a',
'b',
813 'e',
814 'f',
'n',
'r',
't',
'v',
'\0'
815 };
816
817 static unsigned char control_actual [] = {
818 '\a',
'\b',
819 #ifdef EBCDIC_CHARSET
820 0x27,
821 #else
822 0x1B,
823 #endif
824 '\f',
'\n',
'\r',
'\t',
'\v',
'\0'
825 };
826
827 static unsigned char valid_escape [] = {
828 'a',
'b',
'f',
'n',
'r',
't',
'v',
'(',
')',
'[',
829 ']',
'<',
'>',
'.',
'\\',
'|',
'^',
'$',
'*',
'+',
830 '?',
'&',
'\"',
'\0'
831 };
832
833 static unsigned char value [] = {
834 '\a',
'\b',
'\f',
'\n',
'\r',
'\t',
'\v',
'(',
')',
'[',
835 ']',
'<',
'>',
'.',
'\\',
'|',
'^',
'$',
'*',
'+',
836 '?',
'&',
'\"',
'\0'
837 };
838
839 int i;
840
841 if (action ==
0) {
842 for (i =
0; valid_escape [i] !=
'\0'; i++) {
843 if (c == valid_escape [i])
return value [i];
844 }
845 }
else if (action ==
1) {
846 for (i =
0; control_actual [i] !=
'\0'; i++) {
847 if (c == control_actual [i]) {
848 return control_escape [i];
849 }
850 }
851 }
852
853 if (action ==
1) {
854 if (!isprint (c)) {
855
856 return '0';
857 }
858 }
859
860 return 0;
861 }
862
863
864
865
866
867 void ConvertSubstituteRE (
868 const char *source,
869 char *dest,
870 int max) {
871
872 register unsigned char *src;
873 register unsigned char *dst;
874 register unsigned char c;
875 register unsigned char test;
876
877 if (source ==
NULL || dest ==
NULL) {
878 reg_error (
"NULL parm to `ConvertSubstituteRE\'");
879
880 return;
881 }
882
883 src = (
unsigned char *) source;
884 dst = (
unsigned char *) dest;
885
886 while ((c = *src++) !=
'\0') {
887
888 if (c ==
'\\') {
889
890
891 if (*src ==
'u' || *src ==
'U' || *src ==
'l' || *src ==
'L') {
892 *dst++ =
'\\';
893 c = *src++;
894 *dst++ = c;
895
896 if (c ==
'\0') {
897 break;
898 }
else {
899 c = *src++;
900 }
901 }
902 }
903
904 if (c ==
'&') {
905 *dst++ =
'&';
906
907 }
else if (c ==
'\\') {
908 if (*src ==
'0') {
909
910
911 *dst++ =
'&'; src++;
912
913 }
else if (
'1' <= *src && *src <=
'9') {
914 *dst++ =
'\\';
915 *dst++ = *src++;
916
917 }
else if ((test = literal_escape (*src,
0)) !=
'\0') {
918 *dst++ =
'\\';
919 *dst++ = *src++;
920
921 }
else if (*src ==
'\0') {
922
923
924
925 *dst++ =
'\\';
926 }
else {
927
928
929
930
931 *dst++ = *src++;
932 }
933 }
else {
934
935
936 if (((
char *) dst - (
char *) dest) >= (max -
1)) {
937 break;
938 }
else {
939 if ((test = literal_escape (c,
1))) {
940
941
942
943 *dst++ =
'\\';
944
945 if (test ==
'0') {
946 test = c;
947 *dst++ =
'0';
948 *dst++ = (
'0' + (test /
64));
949 test -= (test /
64) *
64;
950 *dst++ = (
'0' + (test /
8));
951 test -= (test /
8) *
8;
952 *dst++ = (
'0' + test);
953 }
else {
954 *dst++ = test;
955 }
956
957 }
else {
958 *dst++ = c;
959 }
960 }
961 }
962 }
963
964 *dst =
'\0';
965 }
966
967
968
969
970
971 static void reg_error (
char *str) {
972
973 fprintf (
974 stderr,
975 "XNEdit: Internal error processing regular expression (%s)\n",
976 str);
977 }
978