I’m implementing a lex version of a simple scanner I’ve coded in standard C. What I’m having trouble with is that I/O isn’t behaving the way I expect in the lex file. fscanf will not store strings and will store integers as 0 in the variables I indicate. fgetc returns characters that are not present in my test file. Is there anything (or something lacking) in my code below that jumps out as to why this is happening? Is how I’m using lex totally wrong?
scanner-lf.l:
%{
#include <stdio.h>
#include <stdlib.h>
extern int lineno;
extern int number;
extern char string[];
extern FILE *yyin;
#define INTEGER 1
#define FLOAT 2
#define READ 3
#define WRITE 4
#define ID 5
#define LPAREN 6
#define RPAREN 7
#define PLUS 8
#define MINUS 9
#define MULT 10
#define ASSIGN 11
#define DIV 12
#define COMMENT 13
#define ERROR 14
%}
%%
[ \t] {
printf("whitespace\n");
}
[0-9]* {
fscanf(yyin, "%d", &number);
printf("%d\n", number);
int c = fgetc(yyin);
printf("%c", c);
/*return INTEGER;*/
}
[a-zA-Z][a-zA-Z0-9]* {
fscanf(yyin, "%s", string);
printf("%s\n", string);
/*return ID;*/
}
scanner-lex.c:
#include <stdio.h>
#include <stdlib.h>
/* A couple of globals */
int lineno = 0;
int number;
char string[100];
FILE *yyin;
char charSet[] = { '(', ')', '+', '-', '*' };
#define INTEGER 1
#define FLOAT 2
#define READ 3
#define WRITE 4
#define ID 5
#define LPAREN 6
#define RPAREN 7
#define PLUS 8
#define MINUS 9
#define MULT 10
#define ASSIGN 11
#define DIV 12
#define COMMENT 13
#define ERROR 14
int yywrap(){
}
int main(int argc, char **argv){
int rc;
if (argc > 1){
yyin = fopen(argv[1], "r");
while ((rc=yylex())){
switch (rc){
case READ:
printf("found a read\n");
break;
case WRITE:
printf("found a write\n");
break;
case ID:
printf("found an id\n");
break;
case LPAREN:
printf("found an (\n");
break;
case RPAREN:
printf("found an )\n");
break;
case PLUS:
printf("found an +\n");
break;
case MINUS:
printf("found an -\n");
break;
case MULT:
printf("found an *\n");
break;
case ASSIGN:
printf("found an assign\n");
break;
case DIV:
printf("found a /\n");
break;
case INTEGER:
printf("found an integer\n");
break;
case FLOAT:
printf("found a float\n");
break;
case COMMENT:
printf("found a comment\n");
break;
case ERROR:
printf("Error\n");
break;
}
}
}
return 0;
}
This is wrong.
Lex reads from a file into its internal buffer, and then runs the specified action when the given pattern is maximally matched.
Let’s look at an example from the Flex manual:
As you can see,
The example pattern uses
+instead of*. You don’t want to try to match the empty string, so you should use+too.The contents of the matched pattern are stored in
yytext. You should not try to read the pattern from the file usingfscanf(), because the matched text has already been read by lex. So, simply usenumber = strtol(yytext, NULL, 10);in your action. (DO NOT useatoi(yytext)orsscanf(yytext, "%i", &number)— these will give bad output if your number has a leading zero.)