/*
* regexp-parser.c
*
* Created on: Jun 22, 2009
* Author: petergoodman
* Version: $Id$
*/
#include <p-regexp.h>
static char buffer[3],
curr_char;
static unsigned int line = 0,
column = 0,
in_char_class = 0,
first_char_in_class = 1,
got_char = 0;
static FILE *fp = NULL;
/* grammar terminals */
enum {
L_START_GROUP,
L_END_GROUP,
L_START_CLASS,
L_START_NEG_CLASS,
L_END_CLASS,
L_POSITIVE_CLOSURE,
L_KLEENE_CLOSURE,
L_OPTIONAL,
L_CARROT,
L_ANCHOR_START,
L_ANCHOR_END,
L_OR,
L_CHARACTER_RANGE,
L_CHARACTER,
L_SPACE,
L_NEW_LINE,
L_CARRIAGE_RETURN,
L_TAB,
L_ANY_CHAR
};
/* grammar non-terminals */
enum {
P_MACHINE,
P_EXPR,
P_CAT_EXPR,
P_FACTOR,
P_TERM,
P_OR_EXPR,
P_STRING,
P_CHAR,
P_CHAR_CLASS_STRING,
P_OPT_CHAR_CLASS_STRING,
P_CHAR_RANGE,
P_CHARACTER_CLASS,
P_NEGATED_CHARACTER_CLASS,
P_KLEENE_CLOSURE,
P_POSITIVE_CLOSURE,
P_OPTIONAL_TERM
};
/* data structure holding information to perform Thompson's construction while
* the parse tree is being traversed. */
typedef struct PThompsonsConstruction {
PNFA *nfa;
unsigned int state_stack[256];
int top_state;
} PThompsonsConstruction;
/**
* The top-level non-terminal action. This ties all of the information together
* for the NFA.
*/
static void Machine(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int start, end;
PT_Terminal *term;
PParseTree *tree;
switch(num_branches) {
/* ... */
case 1:
if(branches[0]->type != PT_NON_TERMINAL) {
return;
}
start = thompson->state_stack[thompson->top_state--];
end = thompson->state_stack[thompson->top_state--];
break;
case 2:
/* ^ ... */
if(branches[0]->type == PT_TERMINAL) {
start = nfa_add_state(thompson->nfa);
/* ^ ... */
if(branches[1]->type != PT_TERMINAL) {
nfa_add_value_transition(
thompson->nfa,
start,
thompson->state_stack[thompson->top_state--],
'\n'
);
end = thompson->state_stack[thompson->top_state--];
/* ^$ */
} else {
end = nfa_add_state(thompson->nfa);
nfa_add_value_transition(thompson->nfa, start, end, '\n');
}
/* ... $ */
} else {
start = thompson->state_stack[thompson->top_state--];
end = nfa_add_state(thompson->nfa);
nfa_add_value_transition(
thompson->nfa,
thompson->state_stack[thompson->top_state--],
end,
'\n'
);
}
break;
/* ^ ... $ */
case 3:
start = nfa_add_state(thompson->nfa);
end = nfa_add_state(thompson->nfa);
nfa_add_value_transition(
thompson->nfa,
start,
thompson->state_stack[thompson->top_state--],
'\n'
);
nfa_add_value_transition(
thompson->nfa,
thompson->state_stack[thompson->top_state--],
end,
'\n'
);
break;
default:
start = nfa_add_state(thompson->nfa);
end = start;
break;
}
nfa_change_start_state(thompson->nfa, start);
nfa_add_accepting_state(thompson->nfa, end);
}
/**
* For two sub-expressions A and B, AB is turned into the following structure:
*
* >--(A)--()--(B)-->
*
* Unfortunately this adds in extra epsilon transitions to connect various
* NFA sub-machines. The construction of NFAs is such that it would be
* extremely annoying to attempt to merge two states.
*/
static void CatExpr(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int start, prev, end, inter_start, inter_end;
PT_Terminal *character;
end = thompson->state_stack[thompson->top_state - 1];
if(num_branches > 1) {
for(; --num_branches; ) {
inter_start = thompson->state_stack[thompson->top_state];
inter_end = thompson->state_stack[thompson->top_state - 3];
nfa_add_epsilon_transition(
thompson->nfa,
inter_end,
inter_start
);
thompson->top_state -= 2;
}
}
thompson->state_stack[thompson->top_state - 1] = end;
}
/**
* For two sub-expression A and B, A|B is turned into the following structure:
*
* .-->--(A)-->--.
* >--(start)--| |--(end)-->
* `-->--(B)-->--'
*
*/
static void OrExpr(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int a1_start, a1_end, a2_start, a2_end, start, end;
a1_start = thompson->state_stack[thompson->top_state--];
a1_end = thompson->state_stack[thompson->top_state--];
a2_start = thompson->state_stack[thompson->top_state--];
a2_end = thompson->state_stack[thompson->top_state--];
start = nfa_add_state(thompson->nfa);
end = nfa_add_state(thompson->nfa);
nfa_add_epsilon_transition(thompson->nfa, start, a1_start);
nfa_add_epsilon_transition(thompson->nfa, start, a2_start);
nfa_add_epsilon_transition(thompson->nfa, a1_end, end);
nfa_add_epsilon_transition(thompson->nfa, a2_end, end);
thompson->state_stack[++thompson->top_state] = end;
thompson->state_stack[++thompson->top_state] = start;
}
/**
* For two characters A and B, AB is turned into the following structure:
*
* >--(A)-->--(B)-->
*
*/
static void String(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int start, prev, end, i;
PT_Terminal *character;
start = nfa_add_state(thompson->nfa);
prev = start;
for(i = 0; i < num_branches; ++i) {
character = (PT_Terminal *) branches[i];
end = nfa_add_state(thompson->nfa);
nfa_add_value_transition(
thompson->nfa,
prev,
end,
character->lexeme->str[0]
);
prev = end;
}
thompson->state_stack[++thompson->top_state] = end;
thompson->state_stack[++thompson->top_state] = start;
}
typedef void (R_set_elm_fnc_t)(PSet *, unsigned int);
static void R_char_class(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[],
PSet *set,
R_set_elm_fnc_t *fnc) {
unsigned int start, end, i;
unsigned char range_start, range_end;
PT_NonTerminal *range;
for(i = 0; i < num_branches; ++i) {
if(branches[i]->type == PT_NON_TERMINAL) {
range = (PT_NonTerminal *) branches[i];
range_start = ((PT_Terminal *) tree_get_branch(range, 0))->lexeme->str[0];
range_end = ((PT_Terminal *) tree_get_branch(range, 1))->lexeme->str[0];
for(; range_start <= range_end; ++range_start) {
fnc(set, range_start);
}
} else {
fnc(set,((PT_Terminal *) branches[i])->lexeme->str[0]);
}
}
start = nfa_add_state(thompson->nfa);
end = nfa_add_state(thompson->nfa);
nfa_add_set_transition(thompson->nfa, start, end, set);
thompson->state_stack[++thompson->top_state] = end;
thompson->state_stack[++thompson->top_state] = start;
}
/**
* For all characters c in some character class C, C is turned into the following
* structure:
*
* >--({c1, c2, ..., cN})-->
*
*/
static void CharClass(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
R_char_class(
thompson,
phrase,
num_branches,
branches,
set_alloc(),
&set_add_elm
);
}
/**
* For all characters c in some negated character class C, C is turned into the
* following structure:
*
* >--({c1, c2, ..., cN}^c)-->
*
*/
static void NegatedCharClass(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
R_char_class(
thompson,
phrase,
num_branches,
branches,
set_alloc_inverted(),
&set_remove_elm
);
}
/**
* For some sub-expression A, A* is turned into the following structure:
*
* .-->-->--.
* >--(start)--|--<-. |--(end)-->
* `->-(A)--'
*
*/
static void KleeneClosure(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int c_start, c_end, start, end, loop_start, loop_end;
loop_start = thompson->state_stack[thompson->top_state--];
loop_end = thompson->state_stack[thompson->top_state--];
start = nfa_add_state(thompson->nfa);
end = nfa_add_state(thompson->nfa);
nfa_add_epsilon_transition(thompson->nfa, start, loop_start);
nfa_add_epsilon_transition(thompson->nfa, start, end);
nfa_add_epsilon_transition(thompson->nfa, loop_end, end);
nfa_add_epsilon_transition(thompson->nfa, loop_end, loop_start);
thompson->state_stack[++thompson->top_state] = end;
thompson->state_stack[++thompson->top_state] = start;
}
/**
* For some sub-expression A, A+ is turned into the following structure:
*
* >--(start)-->--<-. .--(end)-->
* `->-(A)--'
*
*/
static void PositiveClosure(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int c_start, c_end, start, end, loop_start, loop_end;
loop_start = thompson->state_stack[thompson->top_state--];
loop_end = thompson->state_stack[thompson->top_state--];
start = nfa_add_state(thompson->nfa);
end = nfa_add_state(thompson->nfa);
nfa_add_epsilon_transition(thompson->nfa, start, loop_start);
nfa_add_epsilon_transition(thompson->nfa, loop_end, end);
nfa_add_epsilon_transition(thompson->nfa, loop_end, loop_start);
thompson->state_stack[++thompson->top_state] = end;
thompson->state_stack[++thompson->top_state] = start;
}
/**
* For some sub-expression A, A? is turned into the following structure:
*
* >--(start)-->-->--->--.--(end)-->
* `->-(A)--'
*
*/
static void OptionalTerm(PThompsonsConstruction *thompson,
unsigned char phrase,
unsigned int num_branches,
PParseTree *branches[]) {
unsigned int c_start, c_end, start, end, loop_start, loop_end;
loop_start = thompson->state_stack[thompson->top_state--];
loop_end = thompson->state_stack[thompson->top_state--];
start = nfa_add_state(thompson->nfa);
end = nfa_add_state(thompson->nfa);
nfa_add_epsilon_transition(thompson->nfa, start, loop_start);
nfa_add_epsilon_transition(thompson->nfa, start, end);
nfa_add_epsilon_transition(thompson->nfa, loop_end, end);
thompson->state_stack[++thompson->top_state] = end;
thompson->state_stack[++thompson->top_state] = start;
}
/**
* Scan the input letter-by-letter until a lexeme is matched. Stuff the lexeme
* into the 'token' variable along with any other information such as
* line/column number, etc.
*/
static int R_get_token(PScanner *scanner, PToken *token) {
do {
++column;
curr_char = fgetc(fp);
if(curr_char == '\n') {
++line;
column = 0;
}
} while(isspace(curr_char));
if(feof(fp)) {
return 0;
}
token->lexeme_length = 1;
token->line = line;
token->column = column;
token->lexeme = buffer;
buffer[1] = 0;
buffer[0] = curr_char;
if(in_char_class) {
goto any_char;
}
switch(curr_char) {
case '(': token->terminal = L_START_GROUP; break;
case ')': token->terminal = L_END_GROUP; break;
case '[':
token->terminal = L_START_CLASS;
in_char_class = 1;
first_char_in_class = 1;
curr_char = fgetc(fp);
got_char = 1;
++column;
if(feof(fp)) {
return 0;
}
if(curr_char == '^') {
buffer[0] = '[';
buffer[1] = '^';
buffer[2] = 0;
token->terminal = L_START_NEG_CLASS;
} else {
ungetc(curr_char, fp);
}
break;
case '+': token->terminal = L_POSITIVE_CLOSURE; break;
case '*': token->terminal = L_KLEENE_CLOSURE; break;
case '^': token->terminal = L_ANCHOR_START; break;
case '$': token->terminal = L_ANCHOR_END; break;
case '?': token->terminal = L_OPTIONAL; break;
case '|': token->terminal = L_OR; break;
case '.': token->terminal = L_ANY_CHAR; break;
default:
any_char:
switch(curr_char) {
case ']':
token->terminal = L_END_CLASS; in_char_class = 0;
break;
case '\\':
curr_char = fgetc(fp);
++column;
if(feof(fp)) {
return 0;
}
buffer[0] = curr_char;
switch(curr_char) {
case '-': token->terminal = L_CHARACTER; break;
case 'n': token->terminal = L_NEW_LINE; break;
case 's': token->terminal = L_SPACE; break;
case 't': token->terminal = L_TAB; break;
case 'r': token->terminal = L_CARRIAGE_RETURN; break;
default:
if(isspace(curr_char)) {
printf(
"Scanner Error: a space cannot follow a "
"backslash.\n"
);
exit(1);
}
goto all_chars;
}
break;
default:
if(in_char_class && curr_char == '-') {
token->terminal = L_CHARACTER_RANGE;
} else {
all_chars:
token->terminal = L_CHARACTER;
}
}
first_char_in_class = 0;
}
return 1;
}
/**
* Construct the grammar for recognizing a regular expression.
*/
static PGrammar *regex_grammar(void) {
PGrammar *G = grammar_alloc(
P_MACHINE, /* production to start matching with */
16, /* number of non-terminals */
19, /* number of terminals */
50, /* number of production phrases */
80 /* number of phrase symbols */
);
/*
* Machine
* : -<anchor_line_start> -<anchor_line_end>
* : -<anchor_line_start> ^Expr -<anchor_line_end>
* : -<anchor_line_start> ^Expr
* : ^Expr -<anchor_line_end>
* : ^Expr
* : <>
* ;
*/
grammar_add_terminal_symbol(G, L_ANCHOR_START, G_NON_EXCLUDABLE);
grammar_add_terminal_symbol(G, L_ANCHOR_END, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_ANCHOR_START, G_NON_EXCLUDABLE);
grammar_add_non_terminal_symbol(G, P_EXPR, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_ANCHOR_END, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_ANCHOR_START, G_NON_EXCLUDABLE);
grammar_add_non_terminal_symbol(G, P_EXPR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_EXPR, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_ANCHOR_END, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_EXPR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_epsilon_symbol(G, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_MACHINE, (G_ProductionRuleFunc *) &Machine);
/*
* OrExpr
* : ^Expr <or> CatExpr
* ;
*/
grammar_add_non_terminal_symbol(G, P_EXPR, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_OR, G_AUTO);
grammar_add_non_terminal_symbol(G, P_CAT_EXPR, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_OR_EXPR, (G_ProductionRuleFunc *) &OrExpr);
/*
* Expr
* : OrExpr
* : CatExpr
* ;
*/
grammar_add_non_terminal_symbol(G, P_OR_EXPR, G_AUTO);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_CAT_EXPR, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_EXPR, &grammar_null_action);
/*
* CatExpr
* : ^CatExpr ^Factor
* : ^Factor
* ;
*/
grammar_add_non_terminal_symbol(G, P_CAT_EXPR, G_RAISE_CHILDREN);
grammar_add_non_terminal_symbol(G, P_FACTOR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_FACTOR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_CAT_EXPR, (G_ProductionRuleFunc *) &CatExpr);
/*
* KleeneClosure : ^Term <kleene_closure> ;
*/
grammar_add_non_terminal_symbol(G, P_TERM, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_KLEENE_CLOSURE, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_KLEENE_CLOSURE, (G_ProductionRuleFunc *) &KleeneClosure);
/*
* PositiveClosure : ^Term <positive_closure> ;
*/
grammar_add_non_terminal_symbol(G, P_TERM, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_POSITIVE_CLOSURE, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_POSITIVE_CLOSURE, (G_ProductionRuleFunc *) &PositiveClosure);
/*
* OptionalTerm : ^Term <optional> ;
*/
grammar_add_non_terminal_symbol(G, P_TERM, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_OPTIONAL, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_OPTIONAL_TERM, (G_ProductionRuleFunc *) &OptionalTerm);
/*
* Factor
* : -KleeneClosure
* : -PositiveClosure
* : -OptionalTerm
* : ^Term
* ;
*/
grammar_add_non_terminal_symbol(G, P_KLEENE_CLOSURE, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_POSITIVE_CLOSURE, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_OPTIONAL_TERM, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_TERM, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_FACTOR, &grammar_null_action);
/*
* Term
* : -NegatedCharacterClass
* : -CharacterClass
* : -String
* : <start_group> ^Expr <end_group>
* ;
*/
grammar_add_non_terminal_symbol(G, P_NEGATED_CHARACTER_CLASS, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_CHARACTER_CLASS, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_STRING, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_START_GROUP, G_AUTO);
grammar_add_non_terminal_symbol(G, P_EXPR, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_END_GROUP, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_TERM, &grammar_null_action);
/*
* NegatedCharacterClass : <start_neg_class> ^CharClassString <end_class> ;
*/
grammar_add_terminal_symbol(G, L_START_NEG_CLASS, G_AUTO);
grammar_add_non_terminal_symbol(G, P_CHAR_CLASS_STRING, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_END_CLASS, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_NEGATED_CHARACTER_CLASS, (G_ProductionRuleFunc *) &NegatedCharClass);
/*
* CharRange
* : ^Char <character_range> ^Char
* ;
*/
grammar_add_non_terminal_symbol(G, P_CHAR, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_CHARACTER_RANGE, G_AUTO);
grammar_add_non_terminal_symbol(G, P_CHAR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_CHAR_RANGE, &grammar_null_action);
/*
* OptCharClassString
* : ^CharClassString
* : <>
* ;
*/
grammar_add_non_terminal_symbol(G, P_CHAR_CLASS_STRING, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_epsilon_symbol(G, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_OPT_CHAR_CLASS_STRING, &grammar_null_action);
/*
* CharClassString
* : -CharRange ^OptCharClassString
* : ^Char ^CharClassString
* : ^Char
* ;
*/
grammar_add_non_terminal_symbol(G, P_CHAR_RANGE, G_NON_EXCLUDABLE);
grammar_add_non_terminal_symbol(G, P_OPT_CHAR_CLASS_STRING, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_CHAR, G_RAISE_CHILDREN);
grammar_add_non_terminal_symbol(G, P_CHAR_CLASS_STRING, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_CHAR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_CHAR_CLASS_STRING, &grammar_null_action);
/*
* CharacterClass : <start_class> ^CharClassString <end_class> ;
*/
grammar_add_terminal_symbol(G, L_START_CLASS, G_AUTO);
grammar_add_non_terminal_symbol(G, P_CHAR_CLASS_STRING, G_RAISE_CHILDREN);
grammar_add_terminal_symbol(G, L_END_CLASS, G_AUTO);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_CHARACTER_CLASS, (G_ProductionRuleFunc *) &CharClass);
/*
* Char
* : -<characher>
* : -<any_char>
* : -<space>
* : -<tab>
* : -<new_line>
* : -<carriage_return>
* ;
*/
grammar_add_terminal_symbol(G, L_CHARACTER, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_ANY_CHAR, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_SPACE, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_TAB, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_NEW_LINE, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_terminal_symbol(G, L_CARRIAGE_RETURN, G_NON_EXCLUDABLE);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_CHAR, &grammar_null_action);
/*
* String
* : ^Char ^String
* : ^Char
* ;
*/
grammar_add_non_terminal_symbol(G, P_CHAR, G_RAISE_CHILDREN);
grammar_add_non_terminal_symbol(G, P_STRING, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_non_terminal_symbol(G, P_CHAR, G_RAISE_CHILDREN);
grammar_add_phrase(G);
grammar_add_production_rule(G, P_STRING, (G_ProductionRuleFunc *) &String);
return G;
}
/**
* Parse a regular expression, turn it into a NFA using Thompson's construction,
* then turn that NFA into a DFA using subset construction, then minimize the
* DFA.
*/
void parse_regexp(const char *file) {
PScanner *scanner = (PScanner *) 0x1; /* fake scanner */
PGrammar *grammar = regex_grammar();
PThompsonsConstruction thom, *thompson = &thom;
if(is_not_null(fp = fopen(file, "r"))) {
thompson->nfa = nfa_alloc();
thompson->top_state = -1;
parse_tokens(
grammar,
scanner,
(PScannerFunction *) &R_get_token,
(void *) thompson,
TREE_TRAVERSE_POSTORDER
);
printf("\nFreeing memory.. \n");
grammar_free(grammar);
nfa_print_dot(thompson->nfa);
nfa_free(thompson->nfa);
fclose(fp);
} else {
printf("Error: Could not open file '%s'.", file);
}
}