#include #include int regerr; #define INIT register char *sp = instring; #define GETC() (*sp++) #define PEEKC() (*sp) #define UNGETC(c) (--sp) #define RETURN(c) { regerr = 0; return('\0'); } #define ERROR(c) { remsg(c); return('\0'); } static void getrnge( char * ); static void remsg( int mval ) { regerr = mval; switch( mval ){ case 11 : fprintf( stderr, "Range endpoint too large.\n" ); break; case 16 : fprintf( stderr, "Bad number.\n" ); break; case 25 : fprintf( stderr, "``\\ digit'' out of range.\n" ); break; case 36 : fprintf( stderr, "Illegal or missing delimiter.\n" ); break; case 41 : fprintf( stderr, "No remembered search string.\n" ); break; case 42 : fprintf( stderr, "\\( \\) imbalance.\n" ); break; case 43 : fprintf( stderr, "Too many \\(.\n" ); break; case 44 : fprintf( stderr, "More than 2 numbers given in \\{ \\}.\n" ); break; case 46 : fprintf( stderr, "First number exceeds second in \\{ \\}.\n" ); break; case 49 : fprintf( stderr, "[ ] imbalance.\n" ); break; case 50 : fprintf( stderr, "Regular expression too long.\n" ); break; default : fprintf( stderr, "Unknown error %d\n", mval ); break; } } /* @(#)regexp.h 1.9 88/08/19 SMI; from S5R3.1 1.4.1.2 */ /* Copyright (c) 1984 AT&T */ /* All Rights Reserved */ /* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T */ /* The copyright notice above does not evidence any */ /* actual or intended publication of such source code. */ #ifndef _regexp_h #define _regexp_h #include #define CBRA 2 #define CCHR 4 #define CDOT 8 #define CCL 12 #define CXCL 16 #define CDOL 20 #define CCEOF 22 #define CKET 24 #define CBRC 28 #define CLET 30 #define CBACK 36 #define NCCL 40 #define STAR 01 #define RNGE 03 #define NBRA 9 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) #define ecmp(s1, s2, n) (!strncmp(s1, s2, n)) static char *braslist[NBRA]; static char *braelist[NBRA]; int sed, nbra; char *loc1, *loc2, *locs; static int nodelim; int circf; static int low; static int size; static char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; int advance( char *lp, char *ep ) /* register char *lp, *ep; */ { register char *curlp; register int c; char *bbeg; register char neg; int ct; while(1) { neg = 0; switch(*ep++) { case CCHR: if(*ep++ == *lp++) continue; return(0); case CDOT: if(*lp++) continue; return(0); case CDOL: if(*lp == 0) continue; return(0); case CCEOF: loc2 = lp; return(1); case CXCL: c = (unsigned char)*lp++; if(ISTHERE(c)) { ep += 32; continue; } return(0); case NCCL: neg = 1; case CCL: c = *lp++; if(((c & 0200) == 0 && ISTHERE(c)) ^ neg) { ep += 16; continue; } return(0); case CBRA: braslist[*ep++] = lp; continue; case CKET: braelist[*ep++] = lp; continue; case CCHR | RNGE: c = *ep++; getrnge(ep); while(low--) if(*lp++ != c) return(0); curlp = lp; while(size--) if(*lp++ != c) break; if(size < 0) lp++; ep += 2; goto star; case CDOT | RNGE: getrnge(ep); while(low--) if(*lp++ == '\0') return(0); curlp = lp; while(size--) if(*lp++ == '\0') break; if(size < 0) lp++; ep += 2; goto star; case CXCL | RNGE: getrnge(ep + 32); while(low--) { c = (unsigned char)*lp++; if(!ISTHERE(c)) return(0); } curlp = lp; while(size--) { c = (unsigned char)*lp++; if(!ISTHERE(c)) break; } if(size < 0) lp++; ep += 34; /* 32 + 2 */ goto star; case NCCL | RNGE: neg = 1; case CCL | RNGE: getrnge(ep + 16); while(low--) { c = *lp++; if(((c & 0200) || !ISTHERE(c)) ^ neg) return(0); } curlp = lp; while(size--) { c = *lp++; if(((c & 0200) || !ISTHERE(c)) ^ neg) break; } if(size < 0) lp++; ep += 18; /* 16 + 2 */ goto star; case CBACK: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; if(ecmp(bbeg, lp, ct)) { lp += ct; continue; } return(0); case CBACK | STAR: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; curlp = lp; while(ecmp(bbeg, lp, ct)) lp += ct; while(lp >= curlp) { if(advance(lp, ep)) return(1); lp -= ct; } return(0); case CDOT | STAR: curlp = lp; while(*lp++); goto star; case CCHR | STAR: curlp = lp; while(*lp++ == *ep); ep++; goto star; case CXCL | STAR: curlp = lp; do { c = (unsigned char)*lp++; } while(ISTHERE(c)); ep += 32; goto star; case NCCL | STAR: neg = 1; case CCL | STAR: curlp = lp; do { c = *lp++; } while(((c & 0200) == 0 && ISTHERE(c)) ^ neg); ep += 16; goto star; star: if(--lp == curlp) { continue; } if(*ep == CCHR) { c = ep[1]; do { if(*lp != c) continue; if(advance(lp, ep)) return(1); } while(lp-- > curlp); return(0); } if(*ep == CBACK) { c = *(braslist[ep[1]]); do { if(*lp != c) continue; if(advance(lp, ep)) return(1); } while(lp-- > curlp); return(0); } do { if(lp == locs) break; if(advance(lp, ep)) return(1); } while(lp-- > curlp); return(0); #define uletter(c) (isalpha(c) || (c) == '_') case CBRC: if(lp == loc1) continue; c = (unsigned char)*lp; if(uletter(c) || isdigit(c)) { c = (unsigned char)lp[-1]; if(!uletter(c) && !isdigit(c)) continue; } return(0); case CLET: c = (unsigned char)*lp; if(!uletter(c) && !isdigit(c)) continue; return(0); #undef uletter } } /*NOTREACHED*/ } /*ARGSUSED*/ char * compile( char *instring, char *ep, char *endbuf, int seof) /* register char *ep; char *instring, *endbuf; */ { INIT /* Dependent declarations and initializations */ register int c; register int eof = seof; char *lastep; int cclcnt; char bracket[NBRA], *bracketp; int closed; int neg; int lc; int i, cflg; int iflag; /* used for non-ascii characters in brackets */ if((c = GETC()) == eof || c == '\n') { if(c == '\n') { UNGETC(c); nodelim = 1; } if(*ep == 0 && !sed) ERROR(41); RETURN(ep); } bracketp = bracket; circf = closed = nbra = 0; lastep = 0; if(c == '^') circf++; else UNGETC(c); while(1) { if(ep >= endbuf) ERROR(50); c = GETC(); if(c == eof) { *ep++ = CCEOF; if (bracketp != bracket) ERROR(42); RETURN(ep); } if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) lastep = ep; switch(c) { case '.': *ep++ = CDOT; continue; case '\0': case '\n': if(!sed) { UNGETC(c); *ep++ = CCEOF; nodelim = 1; if(bracketp != bracket) ERROR(42); RETURN(ep); } else ERROR(36); case '*': if(lastep == 0 || *lastep == CBRA || *lastep == CKET || *lastep == CBRC || *lastep == CLET) goto defchar; *lastep |= STAR; continue; case '$': if(PEEKC() != eof && PEEKC() != '\n' && PEEKC() != '\0') goto defchar; *ep++ = CDOL; continue; case '[': if(&ep[17] >= endbuf) ERROR(50); *ep++ = CCL; lc = 0; for(i = 0; i < 16; i++) ep[i] = 0; neg = 0; if((c = GETC()) == '^') { neg = 1; c = GETC(); } iflag = 1; do { c &= 0377; if(c == '\0' || c == '\n') ERROR(49); if((c & 0200) && iflag) { iflag = 0; if(&ep[32] >= endbuf) ERROR(50); ep[-1] = CXCL; for(i = 16; i < 32; i++) ep[i] = 0; } if(c == '-' && lc != 0) { if((c = GETC()) == ']') { PLACE('-'); break; } if(sed && c == '\\') { switch(PEEKC()) { case 'n': (void) GETC(); c = '\n'; break; case '\\': (void) GETC(); c = '\\'; break; } } if((c & 0200) && iflag) { iflag = 0; if(&ep[32] >= endbuf) ERROR(50); ep[-1] = CXCL; for(i = 16; i < 32; i++) ep[i] = 0; } while(lc < c ) { PLACE(lc); lc++; } } else if(sed && c == '\\') { switch(PEEKC()) { case 'n': (void) GETC(); c = '\n'; break; case '\\': (void) GETC(); c = '\\'; break; } } lc = c; PLACE(c); } while((c = GETC()) != ']'); if(iflag) iflag = 16; else iflag = 32; if(neg) { if(iflag == 32) { for(cclcnt = 0; cclcnt < iflag; cclcnt++) ep[cclcnt] ^= 0377; ep[0] &= 0376; } else { ep[-1] = NCCL; /* make nulls match so test fails */ ep[0] |= 01; } } ep += iflag; continue; case '\\': switch(c = GETC()) { case '<': *ep++ = CBRC; continue; case '>': *ep++ = CLET; continue; case '(': if(nbra >= NBRA) ERROR(43); *bracketp++ = nbra; *ep++ = CBRA; *ep++ = nbra++; continue; case ')': if(bracketp <= bracket) ERROR(42); *ep++ = CKET; *ep++ = *--bracketp; closed++; continue; case '{': if(lastep == (char *) 0) goto defchar; *lastep |= RNGE; cflg = 0; nlim: c = GETC(); i = 0; do { if('0' <= c && c <= '9') i = 10 * i + c - '0'; else ERROR(16); } while(((c = GETC()) != '\\') && (c != ',')); if(i > 255) ERROR(11); *ep++ = i; if(c == ',') { if(cflg++) ERROR(44); if((c = GETC()) == '\\') *ep++ = 255; else { UNGETC(c); goto nlim; /* get 2'nd number */ } } if(GETC() != '}') ERROR(45); if(!cflg) /* one number */ *ep++ = i; else if((ep[-1] & 0377) < (ep[-2] & 0377)) ERROR(46); continue; case '\n': ERROR(36); case 'n': if(sed) c = '\n'; goto defchar; default: if(c >= '1' && c <= '9') { if((c -= '1') >= closed) ERROR(25); *ep++ = CBACK; *ep++ = c; continue; } } /* Drop through to default to use \ to turn off special chars */ defchar: default: lastep = ep; *ep++ = CCHR; *ep++ = c; } } /*NOTREACHED*/ } int step( char *p1, char *p2 ) /* register char *p1, *p2; */ { register int c; /* * Save the beginning of the string in "loc1", so that "advance" * knows when it's looking at the first character of the string; * it needs this to implement \<. */ loc1 = p1; if(circf) return(advance(p1, p2)); /* fast check for first character */ if(*p2 == CCHR) { c = p2[1]; do { if(*p1 != c) continue; if(advance(p1, p2)) { loc1 = p1; return(1); } } while(*p1++); return(0); } /* regular algorithm */ do { if(advance(p1, p2)) { loc1 = p1; return(1); } } while(*p1++); return(0); } static void getrnge( char *str ) /* register char *str; */ { register int sizecode; low = *str++ & 0377; sizecode = *str & 0377; if (sizecode == 255) size = 20000; else size = sizecode - low; } #endif /*!_regexp_h*/