UNIXworkcode

1 %{ 2 #ifdef HAVE_CONFIG_H 3 #include "../config.h" 4 #endif 5 6 #include "parse.h" 7 #include "textBuf.h" 8 #include "nedit.h" 9 #include "../util/rbTree.h" 10 #include "interpret.h" 11 12 #include <stdlib.h> 13 #include <string.h> 14 #include <stdio.h> 15 #include <ctype.h> 16 #include <X11/Intrinsic.h> 17 #include <Xm/Xm.h> 18 #ifdef VMS 19 #include "../util/VMSparam.h" 20 #else 21 #ifndef __MVS__ 22 #include <sys/param.h> 23 #endif 24 #endif /*VMS*/ 25 26 #ifdef HAVE_DEBUG_H 27 #include "../debug.h" 28 #endif 29 30 /* Macros to add error processing to AddOp and AddSym calls */ 31 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1 32 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1 33 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1 34 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1 35 #define SET_BR_OFF(from, to) ((from)->value) = ((Inst *)(to)) - ((Inst *)(from)) 36 37 /* Max. length for a string constant (... there shouldn't be a maximum) */ 38 #define MAX_STRING_CONST_LEN 5000 39 40 static int yyerror(char *s); 41 static int yylex(void); 42 int yyparse(void); 43 static int follow(char expect, int yes, int no); 44 static int follow2(char expect1, int yes1, char expect2, int yes2, int no); 45 static int follow_non_whitespace(char expect, int yes, int no); 46 static Symbol *matchesActionRoutine(char **inPtr); 47 48 static char *ErrMsg; 49 static char *InPtr; 50 extern Inst *LoopStack[]; /* addresses of break, cont stmts */ 51 extern Inst **LoopStackPtr; /* to fill at the end of a loop */ 52 53 %} 54 55 %union { 56 Symbol *sym; 57 Inst *inst; 58 int nArgs; 59 } 60 %token <sym> NUMBER STRING SYMBOL 61 %token DELETE ARG_LOOKUP 62 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN 63 %type <nArgs> arglist 64 %type <inst> cond comastmts for while else and or arrayexpr 65 %type <sym> evalsym 66 67 %nonassoc IF_NO_ELSE 68 %nonassoc ELSE 69 70 %nonassoc SYMBOL ARG_LOOKUP 71 %right '=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ 72 %left CONCAT 73 %left OR 74 %left AND 75 %left '|' 76 %left '&' 77 %left GT GE LT LE EQ NE IN 78 %left '+' '-' 79 %left '*' '/' '%' 80 %nonassoc UNARY_MINUS NOT 81 %nonassoc DELETE 82 %nonassoc INCR DECR 83 %right POW 84 %nonassoc '[' 85 %nonassoc '(' 86 87 %% /* Rules */ 88 89 program: blank stmts { 90 ADD_OP(OP_RETURN_NO_VAL); return 0; 91 } 92 | blank '{' blank stmts '}' { 93 ADD_OP(OP_RETURN_NO_VAL); return 0; 94 } 95 | blank '{' blank '}' { 96 ADD_OP(OP_RETURN_NO_VAL); return 0; 97 } 98 | error { 99 return 1; 100 } 101 ; 102 block: '{' blank stmts '}' blank 103 | '{' blank '}' blank 104 | stmt 105 ; 106 stmts: stmt 107 | stmts stmt 108 ; 109 stmt: simpstmt '\n' blank 110 | IF '(' cond ')' blank block %prec IF_NO_ELSE { 111 SET_BR_OFF($3, GetPC()); 112 } 113 | IF '(' cond ')' blank block else blank block %prec ELSE { 114 SET_BR_OFF($3, ($7+1)); SET_BR_OFF($7, GetPC()); 115 } 116 | while '(' cond ')' blank block { 117 ADD_OP(OP_BRANCH); ADD_BR_OFF($1); 118 SET_BR_OFF($3, GetPC()); FillLoopAddrs(GetPC(), $1); 119 } 120 | for '(' comastmts ';' cond ';' comastmts ')' blank block { 121 FillLoopAddrs(GetPC()+2+($7-($5+1)), GetPC()); 122 SwapCode($5+1, $7, GetPC()); 123 ADD_OP(OP_BRANCH); ADD_BR_OFF($3); SET_BR_OFF($5, GetPC()); 124 } 125 | for '(' SYMBOL IN arrayexpr ')' { 126 Symbol *iterSym = InstallIteratorSymbol(); 127 ADD_OP(OP_BEGIN_ARRAY_ITER); ADD_SYM(iterSym); 128 ADD_OP(OP_ARRAY_ITER); ADD_SYM($3); ADD_SYM(iterSym); ADD_BR_OFF(0); 129 } 130 blank block { 131 ADD_OP(OP_BRANCH); ADD_BR_OFF($5+2); 132 SET_BR_OFF($5+5, GetPC()); 133 FillLoopAddrs(GetPC(), $5+2); 134 } 135 | BREAK '\n' blank { 136 ADD_OP(OP_BRANCH); ADD_BR_OFF(0); 137 if (AddBreakAddr(GetPC()-1)) { 138 yyerror("break outside loop"); YYERROR; 139 } 140 } 141 | CONTINUE '\n' blank { 142 ADD_OP(OP_BRANCH); ADD_BR_OFF(0); 143 if (AddContinueAddr(GetPC()-1)) { 144 yyerror("continue outside loop"); YYERROR; 145 } 146 } 147 | RETURN expr '\n' blank { 148 ADD_OP(OP_RETURN); 149 } 150 | RETURN '\n' blank { 151 ADD_OP(OP_RETURN_NO_VAL); 152 } 153 ; 154 simpstmt: SYMBOL '=' expr { 155 ADD_OP(OP_ASSIGN); ADD_SYM($1); 156 } 157 | evalsym ADDEQ expr { 158 ADD_OP(OP_ADD); ADD_OP(OP_ASSIGN); ADD_SYM($1); 159 } 160 | evalsym SUBEQ expr { 161 ADD_OP(OP_SUB); ADD_OP(OP_ASSIGN); ADD_SYM($1); 162 } 163 | evalsym MULEQ expr { 164 ADD_OP(OP_MUL); ADD_OP(OP_ASSIGN); ADD_SYM($1); 165 } 166 | evalsym DIVEQ expr { 167 ADD_OP(OP_DIV); ADD_OP(OP_ASSIGN); ADD_SYM($1); 168 } 169 | evalsym MODEQ expr { 170 ADD_OP(OP_MOD); ADD_OP(OP_ASSIGN); ADD_SYM($1); 171 } 172 | evalsym ANDEQ expr { 173 ADD_OP(OP_BIT_AND); ADD_OP(OP_ASSIGN); ADD_SYM($1); 174 } 175 | evalsym OREQ expr { 176 ADD_OP(OP_BIT_OR); ADD_OP(OP_ASSIGN); ADD_SYM($1); 177 } 178 | DELETE arraylv '[' arglist ']' { 179 ADD_OP(OP_ARRAY_DELETE); ADD_IMMED($4); 180 } 181 | initarraylv '[' arglist ']' '=' expr { 182 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 183 } 184 | initarraylv '[' arglist ']' ADDEQ expr { 185 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 186 ADD_OP(OP_ADD); 187 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 188 } 189 | initarraylv '[' arglist ']' SUBEQ expr { 190 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 191 ADD_OP(OP_SUB); 192 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 193 } 194 | initarraylv '[' arglist ']' MULEQ expr { 195 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 196 ADD_OP(OP_MUL); 197 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 198 } 199 | initarraylv '[' arglist ']' DIVEQ expr { 200 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 201 ADD_OP(OP_DIV); 202 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 203 } 204 | initarraylv '[' arglist ']' MODEQ expr { 205 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 206 ADD_OP(OP_MOD); 207 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 208 } 209 | initarraylv '[' arglist ']' ANDEQ expr { 210 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 211 ADD_OP(OP_BIT_AND); 212 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 213 } 214 | initarraylv '[' arglist ']' OREQ expr { 215 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(1); ADD_IMMED($3); 216 ADD_OP(OP_BIT_OR); 217 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 218 } 219 | initarraylv '[' arglist ']' INCR { 220 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(0); ADD_IMMED($3); 221 ADD_OP(OP_INCR); 222 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 223 } 224 | initarraylv '[' arglist ']' DECR { 225 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(0); ADD_IMMED($3); 226 ADD_OP(OP_DECR); 227 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($3); 228 } 229 | INCR initarraylv '[' arglist ']' { 230 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(0); ADD_IMMED($4); 231 ADD_OP(OP_INCR); 232 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($4); 233 } 234 | DECR initarraylv '[' arglist ']' { 235 ADD_OP(OP_ARRAY_REF_ASSIGN_SETUP); ADD_IMMED(0); ADD_IMMED($4); 236 ADD_OP(OP_DECR); 237 ADD_OP(OP_ARRAY_ASSIGN); ADD_IMMED($4); 238 } 239 | SYMBOL '(' arglist ')' { 240 ADD_OP(OP_SUBR_CALL); 241 ADD_SYM(PromoteToGlobal($1)); ADD_IMMED($3); 242 } 243 | INCR SYMBOL { 244 ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_INCR); 245 ADD_OP(OP_ASSIGN); ADD_SYM($2); 246 } 247 | SYMBOL INCR { 248 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_INCR); 249 ADD_OP(OP_ASSIGN); ADD_SYM($1); 250 } 251 | DECR SYMBOL { 252 ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_DECR); 253 ADD_OP(OP_ASSIGN); ADD_SYM($2); 254 } 255 | SYMBOL DECR { 256 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_DECR); 257 ADD_OP(OP_ASSIGN); ADD_SYM($1); 258 } 259 ; 260 evalsym: SYMBOL { 261 $$ = $1; ADD_OP(OP_PUSH_SYM); ADD_SYM($1); 262 } 263 ; 264 comastmts: /* nothing */ { 265 $$ = GetPC(); 266 } 267 | simpstmt { 268 $$ = GetPC(); 269 } 270 | comastmts ',' simpstmt { 271 $$ = GetPC(); 272 } 273 ; 274 arglist: /* nothing */ { 275 $$ = 0; 276 } 277 | expr { 278 $$ = 1; 279 } 280 | arglist ',' expr { 281 $$ = $1 + 1; 282 } 283 ; 284 expr: numexpr %prec CONCAT 285 | expr numexpr %prec CONCAT { 286 ADD_OP(OP_CONCAT); 287 } 288 ; 289 initarraylv: SYMBOL { 290 ADD_OP(OP_PUSH_ARRAY_SYM); ADD_SYM($1); ADD_IMMED(1); 291 } 292 | initarraylv '[' arglist ']' { 293 ADD_OP(OP_ARRAY_REF); ADD_IMMED($3); 294 } 295 ; 296 arraylv: SYMBOL { 297 ADD_OP(OP_PUSH_ARRAY_SYM); ADD_SYM($1); ADD_IMMED(0); 298 } 299 | arraylv '[' arglist ']' { 300 ADD_OP(OP_ARRAY_REF); ADD_IMMED($3); 301 } 302 ; 303 arrayexpr: numexpr { 304 $$ = GetPC(); 305 } 306 ; 307 numexpr: NUMBER { 308 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); 309 } 310 | STRING { 311 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); 312 } 313 | SYMBOL { 314 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); 315 } 316 | SYMBOL '(' arglist ')' { 317 ADD_OP(OP_SUBR_CALL); 318 ADD_SYM(PromoteToGlobal($1)); ADD_IMMED($3); 319 ADD_OP(OP_FETCH_RET_VAL); 320 } 321 | '(' expr ')' 322 | ARG_LOOKUP '[' numexpr ']' { 323 ADD_OP(OP_PUSH_ARG); 324 } 325 | ARG_LOOKUP '[' ']' { 326 ADD_OP(OP_PUSH_ARG_COUNT); 327 } 328 | ARG_LOOKUP { 329 ADD_OP(OP_PUSH_ARG_ARRAY); 330 } 331 | numexpr '[' arglist ']' { 332 ADD_OP(OP_ARRAY_REF); ADD_IMMED($3); 333 } 334 | numexpr '+' numexpr { 335 ADD_OP(OP_ADD); 336 } 337 | numexpr '-' numexpr { 338 ADD_OP(OP_SUB); 339 } 340 | numexpr '*' numexpr { 341 ADD_OP(OP_MUL); 342 } 343 | numexpr '/' numexpr { 344 ADD_OP(OP_DIV); 345 } 346 | numexpr '%' numexpr { 347 ADD_OP(OP_MOD); 348 } 349 | numexpr POW numexpr { 350 ADD_OP(OP_POWER); 351 } 352 | '-' numexpr %prec UNARY_MINUS { 353 ADD_OP(OP_NEGATE); 354 } 355 | numexpr GT numexpr { 356 ADD_OP(OP_GT); 357 } 358 | numexpr GE numexpr { 359 ADD_OP(OP_GE); 360 } 361 | numexpr LT numexpr { 362 ADD_OP(OP_LT); 363 } 364 | numexpr LE numexpr { 365 ADD_OP(OP_LE); 366 } 367 | numexpr EQ numexpr { 368 ADD_OP(OP_EQ); 369 } 370 | numexpr NE numexpr { 371 ADD_OP(OP_NE); 372 } 373 | numexpr '&' numexpr { 374 ADD_OP(OP_BIT_AND); 375 } 376 | numexpr '|' numexpr { 377 ADD_OP(OP_BIT_OR); 378 } 379 | numexpr and numexpr %prec AND { 380 ADD_OP(OP_AND); SET_BR_OFF($2, GetPC()); 381 } 382 | numexpr or numexpr %prec OR { 383 ADD_OP(OP_OR); SET_BR_OFF($2, GetPC()); 384 } 385 | NOT numexpr { 386 ADD_OP(OP_NOT); 387 } 388 | INCR SYMBOL { 389 ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_INCR); 390 ADD_OP(OP_DUP); ADD_OP(OP_ASSIGN); ADD_SYM($2); 391 } 392 | SYMBOL INCR { 393 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_DUP); 394 ADD_OP(OP_INCR); ADD_OP(OP_ASSIGN); ADD_SYM($1); 395 } 396 | DECR SYMBOL { 397 ADD_OP(OP_PUSH_SYM); ADD_SYM($2); ADD_OP(OP_DECR); 398 ADD_OP(OP_DUP); ADD_OP(OP_ASSIGN); ADD_SYM($2); 399 } 400 | SYMBOL DECR { 401 ADD_OP(OP_PUSH_SYM); ADD_SYM($1); ADD_OP(OP_DUP); 402 ADD_OP(OP_DECR); ADD_OP(OP_ASSIGN); ADD_SYM($1); 403 } 404 | numexpr IN numexpr { 405 ADD_OP(OP_IN_ARRAY); 406 } 407 ; 408 while: WHILE { 409 $$ = GetPC(); StartLoopAddrList(); 410 } 411 ; 412 for: FOR { 413 StartLoopAddrList(); $$ = GetPC(); 414 } 415 ; 416 else: ELSE { 417 ADD_OP(OP_BRANCH); $$ = GetPC(); ADD_BR_OFF(0); 418 } 419 ; 420 cond: /* nothing */ { 421 ADD_OP(OP_BRANCH_NEVER); $$ = GetPC(); ADD_BR_OFF(0); 422 } 423 | numexpr { 424 ADD_OP(OP_BRANCH_FALSE); $$ = GetPC(); ADD_BR_OFF(0); 425 } 426 ; 427 and: AND { 428 ADD_OP(OP_DUP); ADD_OP(OP_BRANCH_FALSE); $$ = GetPC(); 429 ADD_BR_OFF(0); 430 } 431 ; 432 or: OR { 433 ADD_OP(OP_DUP); ADD_OP(OP_BRANCH_TRUE); $$ = GetPC(); 434 ADD_BR_OFF(0); 435 } 436 ; 437 blank: /* nothing */ 438 | blank '\n' 439 ; 440 441 %% /* User Subroutines Section */ 442 443 444 /* 445 ** Parse a null terminated string and create a program from it (this is the 446 ** parser entry point). The program created by this routine can be 447 ** executed using ExecuteProgram. Returns program on success, or NULL 448 ** on failure. If the command failed, the error message is returned 449 ** as a pointer to a static string in msg, and the length of the string up 450 ** to where parsing failed in stoppedAt. 451 */ 452 Program *ParseMacro(char *expr, char **msg, char **stoppedAt) 453 { 454 Program *prog; 455 456 BeginCreatingProgram(); 457 458 /* call yyparse to parse the string and check for success. If the parse 459 failed, return the error message and string index (the grammar aborts 460 parsing at the first error) */ 461 InPtr = expr; 462 if (yyparse()) { 463 *msg = ErrMsg; 464 *stoppedAt = InPtr; 465 FreeProgram(FinishCreatingProgram()); 466 return NULL; 467 } 468 469 /* get the newly created program */ 470 prog = FinishCreatingProgram(); 471 472 /* parse succeeded */ 473 *msg = ""; 474 *stoppedAt = InPtr; 475 return prog; 476 } 477 478 479 static int yylex(void) 480 { 481 int i, len; 482 Symbol *s; 483 static DataValue value = {NO_TAG, {0}}; 484 static char escape[] = "\\\"ntbrfave"; 485 #ifdef EBCDIC_CHARSET 486 static char replace[] = "\\\"\n\t\b\r\f\a\v\x27"; /* EBCDIC escape */ 487 #else 488 static char replace[] = "\\\"\n\t\b\r\f\a\v\x1B"; /* ASCII escape */ 489 #endif 490 491 /* skip whitespace, backslash-newline combinations, and comments, which are 492 all considered whitespace */ 493 for (;;) { 494 if (*InPtr == '\\' && *(InPtr + 1) == '\n') 495 InPtr += 2; 496 else if (*InPtr == ' ' || *InPtr == '\t') 497 InPtr++; 498 else if (*InPtr == '#') 499 while (*InPtr != '\n' && *InPtr != '\0') { 500 /* Comments stop at escaped newlines */ 501 if (*InPtr == '\\' && *(InPtr + 1) == '\n') { 502 InPtr += 2; 503 break; 504 } 505 InPtr++; 506 } else 507 break; 508 } 509 510 511 /* return end of input at the end of the string */ 512 if (*InPtr == '\0') { 513 return 0; 514 } 515 516 /* process number tokens */ 517 if (isdigit((unsigned char)*InPtr)) { /* number */ 518 char name[28]; 519 sscanf(InPtr, "%d%n", &value.val.n, &len); 520 sprintf(name, "const %d", value.val.n); 521 InPtr += len; 522 value.tag = INT_TAG; 523 if ((yylval.sym=LookupSymbol(name)) == NULL) 524 yylval.sym = InstallSymbol(name, CONST_SYM, value); 525 return NUMBER; 526 } 527 528 /* process symbol tokens. "define" is a special case not handled 529 by this parser, considered end of input. Another special case 530 is action routine names which are allowed to contain '-' despite 531 the ambiguity, handled in matchesActionRoutine. */ 532 if (isalpha((unsigned char)*InPtr) || *InPtr == '$') { 533 if ((s=matchesActionRoutine(&InPtr)) == NULL) { 534 char symName[MAX_SYM_LEN+1], *p = symName; 535 *p++ = *InPtr++; 536 while (isalnum((unsigned char)*InPtr) || *InPtr=='_') { 537 if (p >= symName + MAX_SYM_LEN) 538 InPtr++; 539 else 540 *p++ = *InPtr++; 541 } 542 *p = '\0'; 543 if (!strcmp(symName, "while")) return WHILE; 544 if (!strcmp(symName, "if")) return IF; 545 if (!strcmp(symName, "else")) return ELSE; 546 if (!strcmp(symName, "for")) return FOR; 547 if (!strcmp(symName, "break")) return BREAK; 548 if (!strcmp(symName, "continue")) return CONTINUE; 549 if (!strcmp(symName, "return")) return RETURN; 550 if (!strcmp(symName, "in")) return IN; 551 if (!strcmp(symName, "$args")) return ARG_LOOKUP; 552 if (!strcmp(symName, "delete") && follow_non_whitespace('(', SYMBOL, DELETE) == DELETE) return DELETE; 553 if (!strcmp(symName, "define")) { 554 InPtr -= 6; 555 return 0; 556 } 557 if ((s=LookupSymbol(symName)) == NULL) { 558 s = InstallSymbol(symName, symName[0]=='$' ? 559 (((symName[1] > '0' && symName[1] <= '9') && symName[2] == 0) ? 560 ARG_SYM : GLOBAL_SYM) : LOCAL_SYM, value); 561 s->value.tag = NO_TAG; 562 } 563 } 564 yylval.sym = s; 565 return SYMBOL; 566 } 567 568 /* Process quoted strings with embedded escape sequences: 569 For backslashes we recognise hexadecimal values with initial 'x' such 570 as "\x1B"; octal value (upto 3 oct digits with a possible leading zero) 571 such as "\33", "\033" or "\0033", and the C escapes: \", \', \n, \t, \b, 572 \r, \f, \a, \v, and the added \e for the escape character, as for REs. 573 Disallow hex/octal zero values (NUL): instead ignore the introductory 574 backslash, eg "\x0xyz" becomes "x0xyz" and "\0000hello" becomes 575 "0000hello". */ 576 577 if (*InPtr == '\"') { 578 char string[MAX_STRING_CONST_LEN], *p = string; 579 char *backslash; 580 InPtr++; 581 while (*InPtr != '\0' && *InPtr != '\"' && *InPtr != '\n') { 582 if (p >= string + MAX_STRING_CONST_LEN) { 583 InPtr++; 584 continue; 585 } 586 if (*InPtr == '\\') { 587 backslash = InPtr; 588 InPtr++; 589 if (*InPtr == '\n') { 590 InPtr++; 591 continue; 592 } 593 if (*InPtr == 'x') { 594 /* a hex introducer */ 595 int hexValue = 0; 596 const char *hexDigits = "0123456789abcdef"; 597 const char *hexD; 598 InPtr++; 599 if (*InPtr == '\0' || 600 (hexD = strchr(hexDigits, tolower(*InPtr))) == NULL) { 601 *p++ = 'x'; 602 } 603 else { 604 hexValue = hexD - hexDigits; 605 InPtr++; 606 /* now do we have another digit? only accept one more */ 607 if (*InPtr != '\0' && 608 (hexD = strchr(hexDigits,tolower(*InPtr))) != NULL){ 609 hexValue = hexD - hexDigits + (hexValue << 4); 610 InPtr++; 611 } 612 if (hexValue != 0) { 613 *p++ = (char)hexValue; 614 } 615 else { 616 InPtr = backslash + 1; /* just skip the backslash */ 617 } 618 } 619 continue; 620 } 621 /* the RE documentation requires \0 as the octal introducer; 622 here you can start with any octal digit, but you are only 623 allowed up to three (or four if the first is '0'). */ 624 if ('0' <= *InPtr && *InPtr <= '7') { 625 if (*InPtr == '0') { 626 InPtr++; /* octal introducer: don't count this digit */ 627 } 628 if ('0' <= *InPtr && *InPtr <= '7') { 629 /* treat as octal - first digit */ 630 char octD = *InPtr++; 631 int octValue = octD - '0'; 632 if ('0' <= *InPtr && *InPtr <= '7') { 633 /* second digit */ 634 octD = *InPtr++; 635 octValue = (octValue << 3) + octD - '0'; 636 /* now do we have another digit? can we add it? 637 if value is going to be too big for char (greater 638 than 0377), stop converting now before adding the 639 third digit */ 640 if ('0' <= *InPtr && *InPtr <= '7' && 641 octValue <= 037) { 642 /* third digit is acceptable */ 643 octD = *InPtr++; 644 octValue = (octValue << 3) + octD - '0'; 645 } 646 } 647 if (octValue != 0) { 648 *p++ = (char)octValue; 649 } 650 else { 651 InPtr = backslash + 1; /* just skip the backslash */ 652 } 653 } 654 else { /* \0 followed by non-digits: go back to 0 */ 655 InPtr = backslash + 1; /* just skip the backslash */ 656 } 657 continue; 658 } 659 for (i=0; escape[i]!='\0'; i++) { 660 if (escape[i] == *InPtr) { 661 *p++ = replace[i]; 662 InPtr++; 663 break; 664 } 665 } 666 /* if we get here, we didn't recognise the character after 667 the backslash: just copy it next time round the loop */ 668 } 669 else { 670 *p++= *InPtr++; 671 } 672 } 673 *p = '\0'; 674 InPtr++; 675 yylval.sym = InstallStringConstSymbol(string); 676 return STRING; 677 } 678 679 /* process remaining two character tokens or return single char as token */ 680 switch(*InPtr++) { 681 case '>': return follow('=', GE, GT); 682 case '<': return follow('=', LE, LT); 683 case '=': return follow('=', EQ, '='); 684 case '!': return follow('=', NE, NOT); 685 case '+': return follow2('+', INCR, '=', ADDEQ, '+'); 686 case '-': return follow2('-', DECR, '=', SUBEQ, '-'); 687 case '|': return follow2('|', OR, '=', OREQ, '|'); 688 case '&': return follow2('&', AND, '=', ANDEQ, '&'); 689 case '*': return follow2('*', POW, '=', MULEQ, '*'); 690 case '/': return follow('=', DIVEQ, '/'); 691 case '%': return follow('=', MODEQ, '%'); 692 case '^': return POW; 693 default: return *(InPtr-1); 694 } 695 } 696 697 /* 698 ** look ahead for >=, etc. 699 */ 700 static int follow(char expect, int yes, int no) 701 { 702 if (*InPtr++ == expect) 703 return yes; 704 InPtr--; 705 return no; 706 } 707 static int follow2(char expect1, int yes1, char expect2, int yes2, int no) 708 { 709 char next = *InPtr++; 710 if (next == expect1) 711 return yes1; 712 if (next == expect2) 713 return yes2; 714 InPtr--; 715 return no; 716 } 717 718 static int follow_non_whitespace(char expect, int yes, int no) 719 { 720 char *localInPtr = InPtr; 721 722 while (1) { 723 if (*localInPtr == ' ' || *localInPtr == '\t') { 724 ++localInPtr; 725 } 726 else if (*localInPtr == '\\' && *(localInPtr + 1) == '\n') { 727 localInPtr += 2; 728 } 729 else if (*localInPtr == expect) { 730 return(yes); 731 } 732 else { 733 return(no); 734 } 735 } 736 } 737 738 /* 739 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A 740 ** hyphenated name is allowed if it is pre-defined in the global symbol 741 ** table. If a matching name exists, returns the symbol, and update "inPtr". 742 ** 743 ** I know this is horrible language design, but existing nedit action routine 744 ** names contain hyphens. Handling them here in the lexical analysis process 745 ** is much easier than trying to deal with it in the parser itself. (sorry) 746 */ 747 static Symbol *matchesActionRoutine(char **inPtr) 748 { 749 char *c, *symPtr; 750 int hasDash = False; 751 char symbolName[MAX_SYM_LEN+1]; 752 Symbol *s; 753 754 symPtr = symbolName; 755 for (c = *inPtr; isalnum((unsigned char)*c) || *c=='_' || 756 ( *c=='-' && isalnum((unsigned char)(*(c+1)))); c++) { 757 if (*c == '-') 758 hasDash = True; 759 *symPtr++ = *c; 760 } 761 if (!hasDash) 762 return NULL; 763 *symPtr = '\0'; 764 s = LookupSymbol(symbolName); 765 if (s != NULL) 766 *inPtr = c; 767 return s; 768 } 769 770 /* 771 ** Called by yacc to report errors (just stores for returning when 772 ** parsing is aborted. The error token action is to immediate abort 773 ** parsing, so this message is immediately reported to the caller 774 ** of ParseExpr) 775 */ 776 static int yyerror(char *s) 777 { 778 ErrMsg = s; 779 return 0; 780 } 781