#ifdef HAVE_CONFIG_H
#include "../config.h"
#endif
#include "regexConvert.h"
#include "../util/nedit_malloc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <X11/Intrinsic.h>
#ifdef HAVE_DEBUG_H
#include "../debug.h"
#endif
#define NSUBEXP 50
#define CONVERT_FAIL(m) {*Error_Ptr = (m);
return 0;}
#define IS_QUANTIFIER(c) ((c) ==
'*' || (c) ==
'+' || (c) ==
'?')
#define U_CHAR_AT(p) ((
unsigned int) *(
unsigned char *)(p))
#define WORST 0
#define HAS_WIDTH 1
#define SIMPLE 2
#define NO_PAREN 0
#define PAREN 1
#define REG_ZERO 0UL
#define REG_ONE 1UL
static unsigned char *Reg_Parse;
static int Total_Paren;
static unsigned long Convert_Size;
static unsigned char *Code_Emit_Ptr;
static unsigned char Compute_Size;
static char **Error_Ptr;
static char Error_Text [
128];
static unsigned char Meta_Char [] =
".*+?[(|)^<>$";
static unsigned char *Convert_Str;
static int alternative (
int *flag_param);
static int chunk (
int paren,
int *flag_param);
static void emit_convert_byte (
unsigned char c);
static unsigned char literal_escape (
unsigned char c,
int);
static int atom (
int *flag_param);
static void reg_error (
char *str);
static int piece (
int *flag_param);
char * ConvertRE (
const char *exp,
char **errorText) {
int flags_local, pass;
Error_Ptr = errorText;
*Error_Ptr =
"";
if (exp ==
NULL)
CONVERT_FAIL (
"NULL argument to `ConvertRE\'");
Code_Emit_Ptr = &Compute_Size;
Convert_Size =
0UL;
for (pass =
1; pass <=
2; pass++) {
Reg_Parse = (
unsigned char *) exp;
Total_Paren =
1;
if (chunk (
NO_PAREN, &flags_local) ==
0)
return (
NULL);
emit_convert_byte (
'\0');
if (pass ==
1) {
Convert_Str =
(
unsigned char *) malloc(
sizeof (
unsigned char) * Convert_Size);
if (Convert_Str ==
NULL) {
CONVERT_FAIL (
"out of memory in `ConvertRE\'");
}
Code_Emit_Ptr = Convert_Str;
}
}
return (
char *) Convert_Str;
}
static int chunk (
int paren,
int *flag_param) {
register int this_branch;
int flags_local;
*flag_param =
HAS_WIDTH;
if (paren ==
PAREN) {
if (Total_Paren >=
NSUBEXP) {
sprintf (Error_Text,
"number of ()''s > %d", (
int)
NSUBEXP);
CONVERT_FAIL (Error_Text);
}
Total_Paren++;
}
do {
this_branch = alternative (&flags_local);
if (this_branch ==
0)
return 0;
if (!(flags_local &
HAS_WIDTH)) *flag_param &= ~
HAS_WIDTH;
if (*Reg_Parse !=
'|')
break;
emit_convert_byte (
'|');
Reg_Parse++;
}
while (
1);
if (paren !=
NO_PAREN && *Reg_Parse !=
')') {
CONVERT_FAIL (
"missing right parenthesis \')\'");
}
else if (paren !=
NO_PAREN) {
emit_convert_byte (
')');
Reg_Parse++;
}
else if (paren ==
NO_PAREN && *Reg_Parse !=
'\0') {
if (*Reg_Parse ==
')') {
CONVERT_FAIL (
"missing left parenthesis \'(\'");
}
else {
CONVERT_FAIL (
"junk on end");
}
}
return 1;
}
static int alternative (
int *flag_param) {
int ret_val;
int flags_local;
*flag_param =
WORST;
while (*Reg_Parse !=
'|' && *Reg_Parse !=
')' && *Reg_Parse !=
'\0') {
ret_val = piece (&flags_local);
if (ret_val ==
0)
return 0;
*flag_param |= flags_local &
HAS_WIDTH;
}
return 1;
}
static int piece (
int *flag_param) {
register int ret_val;
register unsigned char op_code;
unsigned long min_val =
REG_ZERO;
int flags_local;
ret_val = atom (&flags_local);
if (ret_val ==
0)
return 0;
op_code = *Reg_Parse;
if (!
IS_QUANTIFIER (op_code)) {
*flag_param = flags_local;
return (ret_val);
}
Reg_Parse++;
if (op_code ==
'+') min_val =
REG_ONE;
if (!(flags_local &
HAS_WIDTH) && min_val >
REG_ZERO) {
sprintf (Error_Text,
"%c operand could be empty", op_code);
CONVERT_FAIL (Error_Text);
}
*flag_param = (min_val >
REG_ZERO) ? (
WORST |
HAS_WIDTH) :
WORST;
if ( !((op_code ==
'*') || (op_code ==
'+') || (op_code ==
'?')) ) {
CONVERT_FAIL (
"internal error #2, `piece\'");
}
if (
IS_QUANTIFIER (*Reg_Parse)) {
sprintf (Error_Text,
"nested quantifiers, %c%c", op_code, *Reg_Parse);
CONVERT_FAIL (Error_Text);
}
emit_convert_byte (op_code);
return (ret_val);
}
static int atom (
int *flag_param) {
int ret_val =
1;
unsigned char test;
int flags_local;
*flag_param =
WORST;
switch (*Reg_Parse++) {
case '^':
emit_convert_byte (
'^');
break;
case '$':
emit_convert_byte (
'$');
break;
case '<':
emit_convert_byte (
'<');
break;
case '>':
emit_convert_byte (
'>');
break;
case '.':
emit_convert_byte (
'.');
*flag_param |= (
HAS_WIDTH |
SIMPLE);
break;
case '(':
emit_convert_byte (
'(');
ret_val = chunk (
PAREN, &flags_local);
if (ret_val ==
0)
return 0;
*flag_param |= flags_local &
HAS_WIDTH;
break;
case '\0':
case '|':
case ')':
CONVERT_FAIL (
"internal error #3, `atom\'");
case '?':
case '+':
case '*':
sprintf (Error_Text,
"%c follows nothing", *(Reg_Parse -
1));
CONVERT_FAIL (Error_Text);
case '{':
emit_convert_byte (
'\\');
emit_convert_byte (
'{');
break;
case '[':
{
register unsigned int last_value;
unsigned char last_emit =
0;
unsigned char buffer [
500];
int head =
0;
int negated =
0;
int do_brackets =
1;
int a_z_flag =
0;
int A_Z_flag =
0;
int zero_nine =
0;
int u_score_flag =
0;
buffer [
0] =
'\0';
if (*Reg_Parse ==
'^') {
negated =
1;
Reg_Parse++;
}
if (*Reg_Parse ==
']' || *Reg_Parse ==
'-') {
last_emit = *Reg_Parse;
if (head >=
498) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
buffer [head++] =
'\\';
buffer [head++] = *Reg_Parse;
Reg_Parse++;
}
while (*Reg_Parse !=
'\0' && *Reg_Parse !=
']') {
if (*Reg_Parse ==
'-') {
Reg_Parse++;
if (*Reg_Parse ==
']' || *Reg_Parse ==
'\0') {
last_emit =
'-';
if (head >=
498) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
buffer [head++] =
'\\';
buffer [head++] =
'-';
}
else {
if (*Reg_Parse ==
'\\') {
Reg_Parse++;
if ((test = literal_escape (*Reg_Parse,
0))) {
buffer [head++] =
'-';
if (*Reg_Parse !=
'\"') {
emit_convert_byte (
'\\');
}
buffer [head++] = *Reg_Parse;
last_value = (
unsigned int) test;
}
else {
sprintf (
Error_Text,
"\\%c is an invalid escape sequence(3)",
*Reg_Parse);
CONVERT_FAIL (Error_Text);
}
}
else {
last_value =
U_CHAR_AT (Reg_Parse);
if (last_emit ==
'0' && last_value ==
'9') {
zero_nine =
1;
head--;
}
else if (last_emit ==
'a' && last_value ==
'z') {
a_z_flag =
1;
head--;
}
else if (last_emit ==
'A' && last_value ==
'Z') {
A_Z_flag =
1;
head--;
}
else {
buffer [head++] =
'-';
if ((test = literal_escape (*Reg_Parse,
1))) {
if (head >=
495) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
buffer [head++] =
'\\';
if (test ==
'0') {
test = *Reg_Parse;
buffer [head++] =
'0';
buffer [head++] = (
'0' + (test /
64));
test -= (test /
64) *
64;
buffer [head++] = (
'0' + (test /
8));
test -= (test /
8) *
8;
buffer [head++] = (
'0' + test);
}
else {
buffer [head++] = test;
}
}
else {
buffer [head++] = last_value;
}
}
}
if (last_emit > last_value) {
CONVERT_FAIL (
"invalid [] range");
}
last_emit = (
unsigned char) last_value;
Reg_Parse++;
}
}
else if (*Reg_Parse ==
'\\') {
Reg_Parse++;
if ((test = literal_escape (*Reg_Parse,
0)) !=
'\0') {
last_emit = test;
if (head >=
498) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
if (*Reg_Parse !=
'\"') {
buffer [head++] =
'\\';
}
buffer [head++] = *Reg_Parse;
}
else {
sprintf (Error_Text,
"\\%c is an invalid escape sequence(1)",
*Reg_Parse);
CONVERT_FAIL (Error_Text);
}
Reg_Parse++;
}
else {
last_emit = *Reg_Parse;
if (*Reg_Parse ==
'_') {
u_score_flag =
1;
}
else if ((test = literal_escape (*Reg_Parse,
1))) {
if (head >=
495) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
buffer [head++] =
'\\';
if (test ==
'0') {
test = *Reg_Parse;
buffer [head++] =
'0';
buffer [head++] = (
'0' + (test /
64));
test -= (test /
64) *
64;
buffer [head++] = (
'0' + (test /
8));
test -= (test /
8) *
8;
buffer [head++] = (
'0' + test);
}
else {
if (head >=
499) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
buffer [head++] = test;
}
}
else {
if (head >=
499) {
CONVERT_FAIL (
"too much data in [] to convert.");
}
buffer [head++] = *Reg_Parse;
}
Reg_Parse++;
}
}
if (*Reg_Parse !=
']')
CONVERT_FAIL (
"missing right \']\'");
buffer [head] =
'\0';
Reg_Parse++; *flag_param |=
HAS_WIDTH |
SIMPLE;
if (head ==
0) {
if (( a_z_flag && A_Z_flag && zero_nine && u_score_flag) ||
( a_z_flag && A_Z_flag && !zero_nine && !u_score_flag) ||
(!a_z_flag && !A_Z_flag && zero_nine && !u_score_flag)) {
do_brackets =
0;
}
}
if (do_brackets) {
emit_convert_byte (
'[');
if (negated) emit_convert_byte (
'^');
}
while (a_z_flag || A_Z_flag || zero_nine || u_score_flag) {
if (a_z_flag && A_Z_flag && zero_nine && u_score_flag) {
emit_convert_byte (
'\\');
if (negated && !do_brackets) {
emit_convert_byte (
'W');
}
else {
emit_convert_byte (
'w');
}
a_z_flag = A_Z_flag = zero_nine = u_score_flag =
0;
}
else if (a_z_flag && A_Z_flag) {
emit_convert_byte (
'\\');
if (negated && !do_brackets) {
emit_convert_byte (
'L');
}
else {
emit_convert_byte (
'l');
}
a_z_flag = A_Z_flag =
0;
}
else if (zero_nine) {
emit_convert_byte (
'\\');
if (negated && !do_brackets) {
emit_convert_byte (
'D');
}
else {
emit_convert_byte (
'd');
}
zero_nine =
0;
}
else if (a_z_flag) {
emit_convert_byte (
'a');
emit_convert_byte (
'-');
emit_convert_byte (
'z');
a_z_flag =
0;
}
else if (A_Z_flag) {
emit_convert_byte (
'A');
emit_convert_byte (
'-');
emit_convert_byte (
'Z');
A_Z_flag =
0;
}
else if (u_score_flag) {
emit_convert_byte (
'_');
u_score_flag =
0;
}
}
for (head =
0; buffer [head] !=
'\0'; head++) {
emit_convert_byte (buffer [head]);
}
if (do_brackets) {
emit_convert_byte (
']');
}
}
break;
default:
Reg_Parse--;
{
unsigned char *parse_save, *emit_save;
int emit_diff, len =
0;
for (; *Reg_Parse !=
'\0' &&
!strchr ((
char *) Meta_Char, (
int) *Reg_Parse);
len++) {
parse_save = Reg_Parse;
emit_save = Code_Emit_Ptr;
if (*Reg_Parse ==
'\\') {
if ((test = literal_escape (*(Reg_Parse +
1),
0))) {
if (*(Reg_Parse +
1) !=
'\"') {
emit_convert_byte (
'\\');
}
Reg_Parse++;
emit_convert_byte (*Reg_Parse);
}
else {
sprintf (Error_Text,
"\\%c is an invalid escape sequence(2)",
*(Reg_Parse +
1));
CONVERT_FAIL (Error_Text);
}
Reg_Parse++;
}
else {
if ((test = literal_escape (*Reg_Parse,
1))) {
emit_convert_byte (
'\\');
if (test ==
'0') {
test = *Reg_Parse;
emit_convert_byte (
'0');
emit_convert_byte (
'0' + (test /
64));
test -= (test /
64) *
64;
emit_convert_byte (
'0' + (test /
8));
test -= (test /
8) *
8;
emit_convert_byte (
'0' + test);
}
else {
emit_convert_byte (test);
}
}
else {
emit_convert_byte (*Reg_Parse);
}
Reg_Parse++;
}
if (
IS_QUANTIFIER (*Reg_Parse) && len >
0) {
Reg_Parse = parse_save;
emit_diff = (Code_Emit_Ptr - emit_save);
if (Code_Emit_Ptr == &Compute_Size) {
Convert_Size -= emit_diff;
}
else {
Code_Emit_Ptr = emit_save;
}
break;
}
}
if (len <=
0)
CONVERT_FAIL (
"internal error #4, `atom\'");
*flag_param |=
HAS_WIDTH;
if (len ==
1) *flag_param |=
SIMPLE;
}
}
return (ret_val);
}
static void emit_convert_byte (
unsigned char c) {
if (Code_Emit_Ptr == &Compute_Size) {
Convert_Size++;
}
else {
*Code_Emit_Ptr++ = c;
}
}
static unsigned char literal_escape (
unsigned char c,
int action) {
static unsigned char control_escape [] = {
'a',
'b',
'e',
'f',
'n',
'r',
't',
'v',
'\0'
};
static unsigned char control_actual [] = {
'\a',
'\b',
#ifdef EBCDIC_CHARSET
0x27,
#else
0x1B,
#endif
'\f',
'\n',
'\r',
'\t',
'\v',
'\0'
};
static unsigned char valid_escape [] = {
'a',
'b',
'f',
'n',
'r',
't',
'v',
'(',
')',
'[',
']',
'<',
'>',
'.',
'\\',
'|',
'^',
'$',
'*',
'+',
'?',
'&',
'\"',
'\0'
};
static unsigned char value [] = {
'\a',
'\b',
'\f',
'\n',
'\r',
'\t',
'\v',
'(',
')',
'[',
']',
'<',
'>',
'.',
'\\',
'|',
'^',
'$',
'*',
'+',
'?',
'&',
'\"',
'\0'
};
int i;
if (action ==
0) {
for (i =
0; valid_escape [i] !=
'\0'; i++) {
if (c == valid_escape [i])
return value [i];
}
}
else if (action ==
1) {
for (i =
0; control_actual [i] !=
'\0'; i++) {
if (c == control_actual [i]) {
return control_escape [i];
}
}
}
if (action ==
1) {
if (!isprint (c)) {
return '0';
}
}
return 0;
}
void ConvertSubstituteRE (
const char *source,
char *dest,
int max) {
register unsigned char *src;
register unsigned char *dst;
register unsigned char c;
register unsigned char test;
if (source ==
NULL || dest ==
NULL) {
reg_error (
"NULL parm to `ConvertSubstituteRE\'");
return;
}
src = (
unsigned char *) source;
dst = (
unsigned char *) dest;
while ((c = *src++) !=
'\0') {
if (c ==
'\\') {
if (*src ==
'u' || *src ==
'U' || *src ==
'l' || *src ==
'L') {
*dst++ =
'\\';
c = *src++;
*dst++ = c;
if (c ==
'\0') {
break;
}
else {
c = *src++;
}
}
}
if (c ==
'&') {
*dst++ =
'&';
}
else if (c ==
'\\') {
if (*src ==
'0') {
*dst++ =
'&'; src++;
}
else if (
'1' <= *src && *src <=
'9') {
*dst++ =
'\\';
*dst++ = *src++;
}
else if ((test = literal_escape (*src,
0)) !=
'\0') {
*dst++ =
'\\';
*dst++ = *src++;
}
else if (*src ==
'\0') {
*dst++ =
'\\';
}
else {
*dst++ = *src++;
}
}
else {
if (((
char *) dst - (
char *) dest) >= (max -
1)) {
break;
}
else {
if ((test = literal_escape (c,
1))) {
*dst++ =
'\\';
if (test ==
'0') {
test = c;
*dst++ =
'0';
*dst++ = (
'0' + (test /
64));
test -= (test /
64) *
64;
*dst++ = (
'0' + (test /
8));
test -= (test /
8) *
8;
*dst++ = (
'0' + test);
}
else {
*dst++ = test;
}
}
else {
*dst++ = c;
}
}
}
}
*dst =
'\0';
}
static void reg_error (
char *str) {
fprintf (
stderr,
"XNEdit: Internal error processing regular expression (%s)\n",
str);
}