12.6 Scanner

The Scanner yylex() is included in the yacc source file miniC.y. We did not use lex or flex to generate the Scanner. The Scanner is comparatively straightforward, but the handling of integer and float constants possibly requires some explanation. An integer may be a prefix in a float value, which makes the scanning somewhat tricky.

The Keywords, Built-in function names and proper variables all are detected as IDentifiers by the Scanner, using the regular expression [a–zA–Z_][a–zA–Z_0–9]*. Before the Scanner becomes active, the initializer function in the source file init.c has already inserted the Keywords and Built-in names in the Symbol Table. Thus, the Scanner is able to separate out variable proper from the Keywords and Built-in names.

Integer constants can be matched against the regular expression [0–9]+. A float constant can take any of the forms like 3.14 -3. .14 3e + 2 -0.314E + 1, etc.

A compact and combined regular expression for an integer and a float is:

(([0–9]+(.[0–9]*)?|.[0–9]+)([eE][+–]?[0–9]+)?)

Figure 12.3 shows a part of the DFA model of the Scanner. The Scanner code is given below:

 

A part of the DFA model of the Scanner. A variable type is determined by its declaration and here all the three types of variables are indicated by the token ID

 

Fig. 12.3 A part of the DFA model of the Scanner. A variable type is determined by its declaration and here all the three types of variables are indicated by the token ID

 

yylex(void)                 /* miniC */
{
while((c = getc(fin)) == ‘ ‘ || c == ‘	’)
       ;
if(c == EOF)
       return 0;
if(c == ‘\’) {
       c = getc(fin);
       if(c == ‘
’) {
               lineno++;
               return yylex();
       }
}
if(c == ‘#’) {             /* comment */
        while((c = getc(fin)) != ‘
’ && c != EOF)
                ;
        if(c == ‘
’)
                lineno++;
        return c;
}
if(c == ‘.’ || isdigit(c)) {             /* number */
        double d;
        char numb[20], numb2[20];
        int myi;
        Symbol *s;
        ungetc(c, fin);
        fscanf(fin, ″%[0–9.eE+–]″, numb2);
        if(   strchr(numb2,’.’) != NULL
           || strchr(numb2,’e’) != NULL
           || strchr(numb2,’E’) != NULL){
           sscanf(numb2, ″%lf″, &d);
           sprintf(numb,″%11.7e″,(float)d);
           if((s = lookup(numb)) == 0)
               s = install(numb, NUMBER, d);
           yylval.sym = s;
           return NUMBER;
        }else {                                                              /* integer */
           sscanf(numb2, ″%d″, &myi);
           sprintf(numb,″%d″ , myi);
           if((s = lookup(numb)) == 0)
               s = install(numb, INT, myi);
           yylval.sym = s;
           return INT;
        }
}
if(isalpha(c) || c == ‘_’){
        Symbol *s;
        char sbuf[100], *p = sbuf;
        do {
               if(p <= sbuf + sizeof(sbuf) – 1) {
                               *p = ‘’;
                               execerror(″name too long″, sbuf);
                }
                *p++ = c;
        } while((c = getc(fin)) != EOF && (isalnum(c) || c == ‘_’));
        ungetc(c, fin);
        *p = ‘’;
        if((s = lookup(sbuf)) == 0)
                s = install(sbuf, UNDEF, 0.0);
        yylval.sym = s;
        return type(s) == UNDEF ? VAR : type(s);
}
if(c == ‘$’) { /* argument? */
        int n = 0;
        while(isdigit(c = getc(fin)))
                n = 10 * n + c – ‘0’;
        ungetc(c, fin);
        if(n == 0)
                execerror(″strange $…″, (char *)0);
        yylval.narg = n;
        return ARG;
}
if(c == ‘″’) { /* quoted string */
        char sbuf[100], *p;
        for(p = sbuf; (c = getc(fin)) != ‘″’ ; p++) {
                if(c == ‘
’ || c == EOF)
                        execerror(″missing quote″, ″″);
                if(p <= sbuf + sizeof(sbuf) – 1) { *p = ‘’;
                        execerror(″string too long″, sbuf);
                }
                *p = backslash(c);
       }
       *p = 0;
       yylval.sym = (Symbol *)emalloc(strlen(sbuf) + 1);
       strcpy((char*)yylval.sym, sbuf);
       return STRING;
}
switch(c) {
case ‘+’:     return follow(’+’, INC, follow(’=’, ADDEQ, ‘+’));
case ‘–’:     return follow(’–’, DEC, follow(’=’, SUBEQ, ‘–’));
case ‘*’:     return follow(’=’, MULEQ, ‘*’);
case ‘/’:     return follow(’=’, DIVEQ, ‘/’);
case ‘%’:     return follow(’=’, MODEQ, ‘%’);
case ‘>’:     return follow(’=’, GE, GT);
case ‘<’:     return follow(’=’, LE, LT);
case ‘=’:     return follow(’=’, EQ, ‘=’);
case ‘!’:     return follow(’=’, NE, NOT);
case ‘|’:     return follow(’|’, OR, ‘|’);
case ‘&’:     return follow(’&’, AND, ‘&’);
case ‘
’:    lineno++; return ‘
’;
default:      return c;
        }
}
backslash(int c)     /* get next char with 's interpreted */
{
        static char transtab[] = ″bffn
r
t	″;
        if(c != ‘\’)
                return c;
        c = getc(fin);
        if(islower(c) && strchr(transtab, c))
                return strchr(transtab, c)[1];
        return c;
}

follow(int expect, int ifyes, int ifno) /* look ahead for >=, etc. */
{
        int c = getc(fin);

        if(c == expect)
                return ifyes;
        ungetc(c, fin);
        return ifno;
}
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
18.189.186.167