词法解析re2c实例
编译阶段,php使用分别使用re2c、bison来完成词法分析、语法分析的工作来生成抽象语法书AST re2c: 词法分析器,将输入分割为一个个有意义的词块,称为token bison: 语法分析器,确定词法分析器分割出的token是如何彼此关联的
re2c官网地址:http://re2c.org
实例1 创建文件re2c_numbers.l
#include <stdio.h>
typedef enum { ERR, BIN, OCT, DEC, HEX } NUM_T;
static NUM_T lex(const char *YYCURSOR)
{
const char *YYMARKER;
/*!re2c
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
end = "\x00";
bin = '0b' [01]+;
oct = "0" [0-7]*;
dec = [1-9][0-9]*;
hex = '0x' [0-9a-fA-F]+;
* { return ERR; }
bin end { return BIN; }
oct end { return OCT; }
dec end { return DEC; }
hex end { return HEX; }
*/
}
int main(int argc, char **argv)
{
for (int i = 1; i < argc; ++i) {
switch (lex(argv[i])) {
case ERR: printf("error\n"); break;
case BIN: printf("binary\n"); break;
case OCT: printf("octal\n"); break;
case DEC: printf("decimal\n"); break;
case HEX: printf("hexadecimal\n"); break;
}
}
return 0;
}
执行命令 bash re2c re2c_numbers.l -o re2c_numbers.c 会生成文件re2c_numbers.c,查看内容会发现原文件中注释里的re2c规则已经被替换成相应的c代码。
re2c_numbers.c内容如下
/* Generated by re2c 0.16 on Tue Mar 5 04:32:31 2019 */
#line 1 "re2c_test.l"
#include <stdio.h>
typedef enum { ERR, BIN, OCT, DEC, HEX } NUM_T;
static NUM_T lex(const char *YYCURSOR)
{
const char *YYMARKER;
#line 12 "re2c_test.c"
{
char yych;
yych = *YYCURSOR;
switch (yych) {
case '0': goto yy4;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy5;
default: goto yy2;
}
yy2:
++YYCURSOR;
yy3:
#line 18 "re2c_test.l"
{ return ERR; }
#line 34 "re2c_test.c"
yy4:
yych = *(YYMARKER = ++YYCURSOR);
switch (yych) {
case 0x00: goto yy6;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': goto yy8;
case 'B':
case 'b': goto yy11;
case 'X':
case 'x': goto yy12;
default: goto yy3;
}
yy5:
yych = *(YYMARKER = ++YYCURSOR);
switch (yych) {
case 0x00: goto yy13;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy15;
default: goto yy3;
}
yy6:
++YYCURSOR;
#line 20 "re2c_test.l"
{ return OCT; }
#line 73 "re2c_test.c"
yy8:
++YYCURSOR;
yych = *YYCURSOR;
switch (yych) {
case 0x00: goto yy6;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': goto yy8;
default: goto yy10;
}
yy10:
YYCURSOR = YYMARKER;
goto yy3;
yy11:
yych = *++YYCURSOR;
if (yych <= 0x00) goto yy10;
goto yy18;
yy12:
yych = *++YYCURSOR;
if (yych <= 0x00) goto yy10;
goto yy20;
yy13:
++YYCURSOR;
#line 21 "re2c_test.l"
{ return DEC; }
#line 104 "re2c_test.c"
yy15:
++YYCURSOR;
yych = *YYCURSOR;
switch (yych) {
case 0x00: goto yy13;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': goto yy15;
default: goto yy10;
}
yy17:
++YYCURSOR;
yych = *YYCURSOR;
yy18:
switch (yych) {
case 0x00: goto yy21;
case '0':
case '1': goto yy17;
default: goto yy10;
}
yy19:
++YYCURSOR;
yych = *YYCURSOR;
yy20:
switch (yych) {
case 0x00: goto yy23;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f': goto yy19;
default: goto yy10;
}
yy21:
++YYCURSOR;
#line 19 "re2c_test.l"
{ return BIN; }
#line 166 "re2c_test.c"
yy23:
++YYCURSOR;
#line 22 "re2c_test.l"
{ return HEX; }
#line 171 "re2c_test.c"
}
#line 23 "re2c_test.l"
}
int main(int argc, char **argv)
{
for (int i = 1; i < argc; ++i) {
switch (lex(argv[i])) {
case ERR: printf("error\n"); break;
case BIN: printf("binary\n"); break;
case OCT: printf("octal\n"); break;
case DEC: printf("decimal\n"); break;
case HEX: printf("hexadecimal\n"); break;
}
}
return 0;
}
编译re2c生成的c代码文件,生成可执行文件(a.out)并执行,可以看到返回结果,判定输入内容为十进制
gcc re2c_numbers.c
./a.out 2323
decimal