implement lexical analysis function for c-minus
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,2 +1,4 @@
|
|||||||
.idea
|
.idea
|
||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
|
*.o
|
||||||
38
notes/1.md
38
notes/1.md
@@ -1,9 +1,41 @@
|
|||||||
컴파일러 1
|
# ISA & Compiler Basics
|
||||||
===
|
|
||||||
|
## Basic Computer
|
||||||
|
|
||||||
|
To get a task done by a general-purpose computer, we need:
|
||||||
|
|
||||||
|
* Program: A sequence of instructions
|
||||||
|
* Instruction set: A set of possible instructions
|
||||||
|
|
||||||
|
### Von Neuman Architecture
|
||||||
|
|
||||||
|
Both Instructions and data are stored in the memory.
|
||||||
|
Instructions dictate:
|
||||||
|
|
||||||
|
1. which and how data are manipulated
|
||||||
|
2. which instruction should be next
|
||||||
|
|
||||||
|
The memory is independent of the CPU.
|
||||||
|
|
||||||
|
|
||||||
# Interpreter in Modern Processors
|
### How to load a program
|
||||||
|
```
|
||||||
|
*.c -(compiler)-> *.s: Assembly program
|
||||||
|
*.s -(assembler)-> *.o: Object file
|
||||||
|
*.o -(linker)-[with library(*.o)]-> *.exe: Executable
|
||||||
|
*.exe -(loader)-> to in memory
|
||||||
|
```
|
||||||
|
|
||||||
|
### Program
|
||||||
|
|
||||||
|
Computer is essentially a complex state machine.
|
||||||
|
**programmer visible state**:
|
||||||
|
|
||||||
|
* Memory
|
||||||
|
* Registers
|
||||||
|
* Program Counter
|
||||||
|
|
||||||
|
Instructions(Program) specify how to transform the values of programmer visible state.
|
||||||
|
|
||||||
# Compiler
|
# Compiler
|
||||||
|
|
||||||
|
|||||||
@@ -23,17 +23,17 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* MAXRESERVED = the number of reserved words */
|
/* MAXRESERVED = the number of reserved words */
|
||||||
#define MAXRESERVED 8
|
#define MAXRESERVED 6
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
/* book-keeping tokens */
|
/* book-keeping tokens */
|
||||||
{ENDFILE,ERROR,
|
{ENDFILE,ERROR,
|
||||||
/* reserved words */
|
/* reserved words */
|
||||||
IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,
|
IF,ELSE,WHILE,RETURN,INT,VOID,
|
||||||
/* multicharacter tokens */
|
/* multicharacter tokens */
|
||||||
ID,NUM,
|
ID,NUM,
|
||||||
/* special symbols */
|
/* special symbols */
|
||||||
ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI
|
ASSIGN,EQ,NE,LT,LE,GT,GE,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,LBRACE,RBRACE,LCURLY,RCURLY,SEMI,COMMA
|
||||||
} TokenType;
|
} TokenType;
|
||||||
|
|
||||||
extern FILE* source; /* source code text file */
|
extern FILE* source; /* source code text file */
|
||||||
|
|||||||
75
src/lex/tiny.l
Normal file
75
src/lex/tiny.l
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
/****************************************************/
|
||||||
|
/* File: tiny.l */
|
||||||
|
/* Lex specification for TINY */
|
||||||
|
/* Compiler Construction: Principles and Practice */
|
||||||
|
/* Kenneth C. Louden */
|
||||||
|
/****************************************************/
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include "globals.h"
|
||||||
|
#include "util.h"
|
||||||
|
#include "scan.h"
|
||||||
|
/* lexeme of identifier or reserved word */
|
||||||
|
char tokenString[MAXTOKENLEN+1];
|
||||||
|
%}
|
||||||
|
|
||||||
|
digit [0-9]
|
||||||
|
number {digit}+
|
||||||
|
letter [a-zA-Z]
|
||||||
|
identifier {letter}+
|
||||||
|
newline \n
|
||||||
|
whitespace [ \t]+
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
"if" {return IF;}
|
||||||
|
"then" {return THEN;}
|
||||||
|
"else" {return ELSE;}
|
||||||
|
"end" {return END;}
|
||||||
|
"repeat" {return REPEAT;}
|
||||||
|
"until" {return UNTIL;}
|
||||||
|
"read" {return READ;}
|
||||||
|
"write" {return WRITE;}
|
||||||
|
":=" {return ASSIGN;}
|
||||||
|
"=" {return EQ;}
|
||||||
|
"<" {return LT;}
|
||||||
|
"+" {return PLUS;}
|
||||||
|
"-" {return MINUS;}
|
||||||
|
"*" {return TIMES;}
|
||||||
|
"/" {return OVER;}
|
||||||
|
"(" {return LPAREN;}
|
||||||
|
")" {return RPAREN;}
|
||||||
|
";" {return SEMI;}
|
||||||
|
{number} {return NUM;}
|
||||||
|
{identifier} {return ID;}
|
||||||
|
{newline} {lineno++;}
|
||||||
|
{whitespace} {/* skip whitespace */}
|
||||||
|
"{" { char c;
|
||||||
|
do
|
||||||
|
{ c = input();
|
||||||
|
if (c == EOF) break;
|
||||||
|
if (c == '\n') lineno++;
|
||||||
|
} while (c != '}');
|
||||||
|
}
|
||||||
|
. {return ERROR;}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
TokenType getToken(void)
|
||||||
|
{ static int firstTime = TRUE;
|
||||||
|
TokenType currentToken;
|
||||||
|
if (firstTime)
|
||||||
|
{ firstTime = FALSE;
|
||||||
|
lineno++;
|
||||||
|
yyin = source;
|
||||||
|
yyout = listing;
|
||||||
|
}
|
||||||
|
currentToken = yylex();
|
||||||
|
strncpy(tokenString,yytext,MAXTOKENLEN);
|
||||||
|
if (TraceScan) {
|
||||||
|
fprintf(listing,"\t%d: ",lineno);
|
||||||
|
printToken(currentToken,tokenString);
|
||||||
|
}
|
||||||
|
return currentToken;
|
||||||
|
}
|
||||||
|
|
||||||
107
src/main.c
107
src/main.c
@@ -8,14 +8,14 @@
|
|||||||
#include "globals.h"
|
#include "globals.h"
|
||||||
|
|
||||||
/* set NO_PARSE to TRUE to get a scanner-only compiler */
|
/* set NO_PARSE to TRUE to get a scanner-only compiler */
|
||||||
#define NO_PARSE FALSE
|
#define NO_PARSE TRUE
|
||||||
/* set NO_ANALYZE to TRUE to get a parser-only compiler */
|
/* set NO_ANALYZE to TRUE to get a parser-only compiler */
|
||||||
#define NO_ANALYZE FALSE
|
#define NO_ANALYZE TRUE
|
||||||
|
|
||||||
/* set NO_CODE to TRUE to get a compiler that does not
|
/* set NO_CODE to TRUE to get a compiler that does not
|
||||||
* generate code
|
* generate code
|
||||||
*/
|
*/
|
||||||
#define NO_CODE FALSE
|
#define NO_CODE TRUE
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#if NO_PARSE
|
#if NO_PARSE
|
||||||
@@ -32,71 +32,70 @@
|
|||||||
|
|
||||||
/* allocate global variables */
|
/* allocate global variables */
|
||||||
int lineno = 0;
|
int lineno = 0;
|
||||||
FILE * source;
|
FILE *source;
|
||||||
FILE * listing;
|
FILE *listing;
|
||||||
FILE * code;
|
FILE *code;
|
||||||
|
|
||||||
/* allocate and set tracing flags */
|
/* allocate and set tracing flags */
|
||||||
int EchoSource = FALSE;
|
int EchoSource = FALSE;
|
||||||
int TraceScan = FALSE;
|
int TraceScan = TRUE;
|
||||||
int TraceParse = FALSE;
|
int TraceParse = FALSE;
|
||||||
int TraceAnalyze = FALSE;
|
int TraceAnalyze = FALSE;
|
||||||
int TraceCode = FALSE;
|
int TraceCode = FALSE;
|
||||||
|
|
||||||
int Error = FALSE;
|
int Error = FALSE;
|
||||||
|
|
||||||
main( int argc, char * argv[] )
|
main(int argc, char *argv[]) {
|
||||||
{ TreeNode * syntaxTree;
|
TreeNode *syntaxTree;
|
||||||
char pgm[120]; /* source code file name */
|
char pgm[120]; /* source code file name */
|
||||||
if (argc != 2)
|
if (argc != 2) {
|
||||||
{ fprintf(stderr,"usage: %s <filename>\n",argv[0]);
|
fprintf(stderr, "usage: %s <filename>\n", argv[0]);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
strcpy(pgm,argv[1]) ;
|
strcpy(pgm, argv[1]);
|
||||||
if (strchr (pgm, '.') == NULL)
|
if (strchr(pgm, '.') == NULL)
|
||||||
strcat(pgm,".tny");
|
strcat(pgm, ".tny");
|
||||||
source = fopen(pgm,"r");
|
source = fopen(pgm, "r");
|
||||||
if (source==NULL)
|
if (source == NULL) {
|
||||||
{ fprintf(stderr,"File %s not found\n",pgm);
|
fprintf(stderr, "File %s not found\n", pgm);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
listing = stdout; /* send listing to screen */
|
listing = stdout; /* send listing to screen */
|
||||||
fprintf(listing,"\nTINY COMPILATION: %s\n",pgm);
|
fprintf(listing, "\nC-MINUS COMPILATION: %s\n", pgm);
|
||||||
#if NO_PARSE
|
#if NO_PARSE
|
||||||
while (getToken()!=ENDFILE);
|
while (getToken() != ENDFILE);
|
||||||
#else
|
#else
|
||||||
syntaxTree = parse();
|
syntaxTree = parse();
|
||||||
if (TraceParse) {
|
if (TraceParse) {
|
||||||
fprintf(listing,"\nSyntax tree:\n");
|
fprintf(listing, "\nSyntax tree:\n");
|
||||||
printTree(syntaxTree);
|
printTree(syntaxTree);
|
||||||
}
|
}
|
||||||
#if !NO_ANALYZE
|
#if !NO_ANALYZE
|
||||||
if (! Error)
|
if (!Error) {
|
||||||
{ if (TraceAnalyze) fprintf(listing,"\nBuilding Symbol Table...\n");
|
if (TraceAnalyze) fprintf(listing, "\nBuilding Symbol Table...\n");
|
||||||
buildSymtab(syntaxTree);
|
buildSymtab(syntaxTree);
|
||||||
if (TraceAnalyze) fprintf(listing,"\nChecking Types...\n");
|
if (TraceAnalyze) fprintf(listing, "\nChecking Types...\n");
|
||||||
typeCheck(syntaxTree);
|
typeCheck(syntaxTree);
|
||||||
if (TraceAnalyze) fprintf(listing,"\nType Checking Finished\n");
|
if (TraceAnalyze) fprintf(listing, "\nType Checking Finished\n");
|
||||||
}
|
}
|
||||||
#if !NO_CODE
|
#if !NO_CODE
|
||||||
if (! Error)
|
if (!Error) {
|
||||||
{ char * codefile;
|
char *codefile;
|
||||||
int fnlen = strcspn(pgm,".");
|
int fnlen = strcspn(pgm, ".");
|
||||||
codefile = (char *) calloc(fnlen+4, sizeof(char));
|
codefile = (char *) calloc(fnlen + 4, sizeof(char));
|
||||||
strncpy(codefile,pgm,fnlen);
|
strncpy(codefile, pgm, fnlen);
|
||||||
strcat(codefile,".tm");
|
strcat(codefile, ".tm");
|
||||||
code = fopen(codefile,"w");
|
code = fopen(codefile, "w");
|
||||||
if (code == NULL)
|
if (code == NULL) {
|
||||||
{ printf("Unable to open %s\n",codefile);
|
printf("Unable to open %s\n", codefile);
|
||||||
exit(1);
|
exit(1);
|
||||||
|
}
|
||||||
|
codeGen(syntaxTree, codefile);
|
||||||
|
fclose(code);
|
||||||
}
|
}
|
||||||
codeGen(syntaxTree,codefile);
|
|
||||||
fclose(code);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
fclose(source);
|
fclose(source);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
396
src/scan.c
396
src/scan.c
@@ -5,69 +5,81 @@
|
|||||||
/* Kenneth C. Louden */
|
/* Kenneth C. Louden */
|
||||||
/****************************************************/
|
/****************************************************/
|
||||||
|
|
||||||
|
#include "scan.h"
|
||||||
#include "globals.h"
|
#include "globals.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "scan.h"
|
|
||||||
|
|
||||||
/* states in scanner DFA */
|
/* states in scanner DFA */
|
||||||
typedef enum
|
typedef enum { START,
|
||||||
{ START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
|
INOVER,
|
||||||
StateType;
|
INCOMMENT,
|
||||||
|
ASTERCOMMENT,
|
||||||
|
INASSIGN,
|
||||||
|
INLT /* > */,
|
||||||
|
INGT /* < */,
|
||||||
|
INNE /* !*/,
|
||||||
|
INNUM,
|
||||||
|
INID,
|
||||||
|
DONE } StateType;
|
||||||
|
|
||||||
/* lexeme of identifier or reserved word */
|
/* lexeme of identifier or reserved word */
|
||||||
char tokenString[MAXTOKENLEN+1];
|
char tokenString[MAXTOKENLEN + 1];
|
||||||
|
|
||||||
/* BUFLEN = length of the input buffer for
|
/* BUFLEN = length of the input buffer for
|
||||||
source code lines */
|
source code lines */
|
||||||
#define BUFLEN 256
|
#define BUFLEN 256
|
||||||
|
|
||||||
static char lineBuf[BUFLEN]; /* holds the current line */
|
static char lineBuf[BUFLEN]; /* holds the current line */
|
||||||
static int linepos = 0; /* current position in LineBuf */
|
static int linepos = 0; /* current position in LineBuf */
|
||||||
static int bufsize = 0; /* current size of buffer string */
|
static int bufsize = 0; /* current size of buffer string */
|
||||||
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
|
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
|
||||||
|
|
||||||
/* getNextChar fetches the next non-blank character
|
/* getNextChar fetches the next non-blank character
|
||||||
from lineBuf, reading in a new line if lineBuf is
|
from lineBuf, reading in a new line if lineBuf is
|
||||||
exhausted */
|
exhausted */
|
||||||
static int getNextChar(void)
|
static int getNextChar(void) {
|
||||||
{ if (!(linepos < bufsize))
|
if (!(linepos < bufsize)) {
|
||||||
{ lineno++;
|
lineno++;
|
||||||
if (fgets(lineBuf,BUFLEN-1,source))
|
if (fgets(lineBuf, BUFLEN - 1, source)) {
|
||||||
{ if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
|
if (EchoSource) fprintf(listing, "%4d: %s", lineno, lineBuf);
|
||||||
bufsize = strlen(lineBuf);
|
bufsize = strlen(lineBuf);
|
||||||
linepos = 0;
|
linepos = 0;
|
||||||
return lineBuf[linepos++];
|
return lineBuf[linepos++];
|
||||||
}
|
} else {
|
||||||
else
|
EOF_flag = TRUE;
|
||||||
{ EOF_flag = TRUE;
|
return EOF;
|
||||||
return EOF;
|
}
|
||||||
}
|
} else
|
||||||
}
|
return lineBuf[linepos++];
|
||||||
else return lineBuf[linepos++];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ungetNextChar backtracks one character
|
/* ungetNextChar backtracks one character
|
||||||
in lineBuf */
|
in lineBuf */
|
||||||
static void ungetNextChar(void)
|
static void ungetNextChar(void) {
|
||||||
{ if (!EOF_flag) linepos-- ;}
|
if (!EOF_flag) linepos--;
|
||||||
|
}
|
||||||
|
|
||||||
/* lookup table of reserved words */
|
/* lookup table of reserved words */
|
||||||
static struct
|
static struct {// keywords
|
||||||
{ char* str;
|
char *str;
|
||||||
TokenType tok;
|
TokenType tok;
|
||||||
} reservedWords[MAXRESERVED]
|
} reservedWords[MAXRESERVED] = {
|
||||||
= {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
|
{"if", IF},
|
||||||
{"repeat",REPEAT},{"until",UNTIL},{"read",READ},
|
{"else", ELSE},
|
||||||
{"write",WRITE}};
|
{"while", WHILE},
|
||||||
|
{"return", RETURN},
|
||||||
|
{"int", INT},
|
||||||
|
{"void", VOID},
|
||||||
|
};
|
||||||
|
|
||||||
/* lookup an identifier to see if it is a reserved word */
|
/* lookup an identifier to see if it is a reserved word */
|
||||||
/* uses linear search */
|
/* uses linear search */
|
||||||
static TokenType reservedLookup (char * s)
|
static TokenType reservedLookup(char *s) {
|
||||||
{ int i;
|
int i;
|
||||||
for (i=0;i<MAXRESERVED;i++)
|
for (i = 0; i < MAXRESERVED; i++)
|
||||||
if (!strcmp(s,reservedWords[i].str))
|
if (!strcmp(s, reservedWords[i].str))
|
||||||
return reservedWords[i].tok;
|
return reservedWords[i].tok;
|
||||||
return ID;
|
return ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
/****************************************/
|
/****************************************/
|
||||||
@@ -76,128 +88,190 @@ static TokenType reservedLookup (char * s)
|
|||||||
/* function getToken returns the
|
/* function getToken returns the
|
||||||
* next token in source file
|
* next token in source file
|
||||||
*/
|
*/
|
||||||
TokenType getToken(void)
|
TokenType getToken(void) { /* index for storing into tokenString */
|
||||||
{ /* index for storing into tokenString */
|
int tokenStringIndex = 0;
|
||||||
int tokenStringIndex = 0;
|
/* holds current token to be returned */
|
||||||
/* holds current token to be returned */
|
TokenType currentToken;
|
||||||
TokenType currentToken;
|
/* current state - always begins at START */
|
||||||
/* current state - always begins at START */
|
StateType state = START;
|
||||||
StateType state = START;
|
/* flag to indicate save to tokenString */
|
||||||
/* flag to indicate save to tokenString */
|
int save;
|
||||||
int save;
|
while (state != DONE) {
|
||||||
while (state != DONE)
|
int c = getNextChar();
|
||||||
{ int c = getNextChar();
|
save = TRUE;
|
||||||
save = TRUE;
|
/* main char process (state transition)*/
|
||||||
switch (state)
|
switch (state) {
|
||||||
{ case START:
|
case START:
|
||||||
if (isdigit(c))
|
if (isdigit(c))
|
||||||
state = INNUM;
|
state = INNUM;
|
||||||
else if (isalpha(c))
|
else if (isalpha(c))
|
||||||
state = INID;
|
state = INID;
|
||||||
else if (c == ':')
|
else if ((c == ' ') || (c == '\t') || (c == '\n'))
|
||||||
state = INASSIGN;
|
save = FALSE;
|
||||||
else if ((c == ' ') || (c == '\t') || (c == '\n'))
|
else if (c == '=')
|
||||||
save = FALSE;
|
state = INASSIGN;
|
||||||
else if (c == '{')
|
else if (c == '<') {
|
||||||
{ save = FALSE;
|
state = INLT;
|
||||||
state = INCOMMENT;
|
} else if (c == '>') {
|
||||||
}
|
state = INGT;
|
||||||
else
|
} else if (c == '/') {// comment in or not
|
||||||
{ state = DONE;
|
save = FALSE;
|
||||||
switch (c)
|
state = INOVER;
|
||||||
{ case EOF:
|
} else {
|
||||||
save = FALSE;
|
state = DONE;
|
||||||
currentToken = ENDFILE;
|
switch (c) {
|
||||||
break;
|
case EOF:
|
||||||
case '=':
|
save = FALSE;
|
||||||
currentToken = EQ;
|
currentToken = ENDFILE;
|
||||||
break;
|
break;
|
||||||
case '<':
|
case '+':
|
||||||
currentToken = LT;
|
currentToken = PLUS;
|
||||||
break;
|
break;
|
||||||
case '+':
|
case '-':
|
||||||
currentToken = PLUS;
|
currentToken = MINUS;
|
||||||
break;
|
break;
|
||||||
case '-':
|
case '*':
|
||||||
currentToken = MINUS;
|
currentToken = TIMES;
|
||||||
break;
|
break;
|
||||||
case '*':
|
case '[':
|
||||||
currentToken = TIMES;
|
currentToken = LBRACE;
|
||||||
break;
|
break;
|
||||||
case '/':
|
case ']':
|
||||||
currentToken = OVER;
|
currentToken = RBRACE;
|
||||||
break;
|
break;
|
||||||
case '(':
|
case '{':
|
||||||
currentToken = LPAREN;
|
currentToken = LCURLY;
|
||||||
break;
|
break;
|
||||||
case ')':
|
case '}':
|
||||||
currentToken = RPAREN;
|
currentToken = RCURLY;
|
||||||
break;
|
break;
|
||||||
case ';':
|
case '(':
|
||||||
currentToken = SEMI;
|
currentToken = LPAREN;
|
||||||
break;
|
break;
|
||||||
default:
|
case ')':
|
||||||
currentToken = ERROR;
|
currentToken = RPAREN;
|
||||||
break;
|
break;
|
||||||
}
|
case ';':
|
||||||
}
|
currentToken = SEMI;
|
||||||
break;
|
break;
|
||||||
case INCOMMENT:
|
case ',':
|
||||||
save = FALSE;
|
currentToken = COMMA;
|
||||||
if (c == EOF)
|
break;
|
||||||
{ state = DONE;
|
default:
|
||||||
currentToken = ENDFILE;
|
currentToken = ERROR;
|
||||||
}
|
break;
|
||||||
else if (c == '}') state = START;
|
}
|
||||||
break;
|
}
|
||||||
case INASSIGN:
|
break;
|
||||||
state = DONE;
|
/* begin comment process */
|
||||||
if (c == '=')
|
case INCOMMENT:
|
||||||
currentToken = ASSIGN;
|
save = FALSE;
|
||||||
else
|
if (c == EOF) {
|
||||||
{ /* backup in the input */
|
state = DONE;
|
||||||
ungetNextChar();
|
currentToken = ENDFILE;
|
||||||
save = FALSE;
|
} else if (c == '*')// comment out
|
||||||
currentToken = ERROR;
|
state = ASTERCOMMENT;
|
||||||
}
|
break;
|
||||||
break;
|
case INOVER:
|
||||||
case INNUM:
|
if (c == '*') {
|
||||||
if (!isdigit(c))
|
save = FALSE;
|
||||||
{ /* backup in the input */
|
state = INCOMMENT;
|
||||||
ungetNextChar();
|
} else {
|
||||||
save = FALSE;
|
state = DONE;
|
||||||
state = DONE;
|
ungetNextChar();
|
||||||
currentToken = NUM;
|
save = FALSE;
|
||||||
}
|
currentToken = OVER;
|
||||||
break;
|
}
|
||||||
case INID:
|
break;
|
||||||
if (!isalpha(c))
|
case ASTERCOMMENT:
|
||||||
{ /* backup in the input */
|
save = FALSE;
|
||||||
ungetNextChar();
|
if (c == EOF) {
|
||||||
save = FALSE;
|
state = DONE;
|
||||||
state = DONE;
|
currentToken = ENDFILE;
|
||||||
currentToken = ID;
|
} else if (c == '/') {
|
||||||
}
|
state = START;
|
||||||
break;
|
}
|
||||||
case DONE:
|
|
||||||
default: /* should never happen */
|
|
||||||
fprintf(listing,"Scanner Bug: state= %d\n",state);
|
|
||||||
state = DONE;
|
|
||||||
currentToken = ERROR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
|
|
||||||
tokenString[tokenStringIndex++] = (char) c;
|
|
||||||
if (state == DONE)
|
|
||||||
{ tokenString[tokenStringIndex] = '\0';
|
|
||||||
if (currentToken == ID)
|
|
||||||
currentToken = reservedLookup(tokenString);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (TraceScan) {
|
|
||||||
fprintf(listing,"\t%d: ",lineno);
|
|
||||||
printToken(currentToken,tokenString);
|
|
||||||
}
|
|
||||||
return currentToken;
|
|
||||||
} /* end getToken */
|
|
||||||
|
|
||||||
|
else {
|
||||||
|
state = INCOMMENT;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
/* end comment process */
|
||||||
|
case INASSIGN:
|
||||||
|
state = DONE;
|
||||||
|
if (c == '=')
|
||||||
|
currentToken = EQ;
|
||||||
|
else { /* backup in the input */
|
||||||
|
ungetNextChar();
|
||||||
|
save = FALSE;
|
||||||
|
currentToken = ASSIGN;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case INNE:
|
||||||
|
state = DONE;
|
||||||
|
if (c == '=') {
|
||||||
|
currentToken = NE;
|
||||||
|
} else {
|
||||||
|
ungetNextChar();
|
||||||
|
save = FALSE;
|
||||||
|
currentToken = ERROR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INLT:
|
||||||
|
state = DONE;
|
||||||
|
if (c == '=') {
|
||||||
|
currentToken = LE;
|
||||||
|
} else {
|
||||||
|
ungetNextChar();
|
||||||
|
save = FALSE;
|
||||||
|
currentToken = LT;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case INGT:
|
||||||
|
state = DONE;
|
||||||
|
if (c == '=') {
|
||||||
|
currentToken = GE;
|
||||||
|
} else {
|
||||||
|
ungetNextChar();
|
||||||
|
save = FALSE;
|
||||||
|
currentToken = GT;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case INNUM:
|
||||||
|
if (!isdigit(c)) { /* backup in the input */
|
||||||
|
ungetNextChar();
|
||||||
|
save = FALSE;
|
||||||
|
state = DONE;
|
||||||
|
currentToken = NUM;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case INID:
|
||||||
|
if (!isalpha(c)) { /* backup in the input */
|
||||||
|
ungetNextChar();
|
||||||
|
save = FALSE;
|
||||||
|
state = DONE;
|
||||||
|
currentToken = ID;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DONE:
|
||||||
|
default: /* should never happen */
|
||||||
|
fprintf(listing, "Scanner Bug: state= %d\n", state);
|
||||||
|
state = DONE;
|
||||||
|
currentToken = ERROR;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
|
||||||
|
tokenString[tokenStringIndex++] = (char) c;
|
||||||
|
if (state == DONE) {
|
||||||
|
tokenString[tokenStringIndex] = '\0';
|
||||||
|
if (currentToken == ID)
|
||||||
|
currentToken = reservedLookup(tokenString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (TraceScan) {
|
||||||
|
fprintf(listing, "\t%d: ", lineno);
|
||||||
|
printToken(currentToken, tokenString);
|
||||||
|
}
|
||||||
|
return currentToken;
|
||||||
|
} /* end getToken */
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#ifndef _SCAN_H_
|
#ifndef _SCAN_H_
|
||||||
#define _SCAN_H_
|
#define _SCAN_H_
|
||||||
|
#include "globals.h"
|
||||||
|
|
||||||
/* MAXTOKENLEN is the maximum size of a token */
|
/* MAXTOKENLEN is the maximum size of a token */
|
||||||
#define MAXTOKENLEN 40
|
#define MAXTOKENLEN 40
|
||||||
|
|||||||
16
src/test.cm
Normal file
16
src/test.cm
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
/* A program to perform Euclid's
|
||||||
|
Algorithm to compute gcd */
|
||||||
|
|
||||||
|
int gcd(int u, int v)
|
||||||
|
{
|
||||||
|
if(v == 0) return u;
|
||||||
|
else return gcd(v, u- u/v * v);
|
||||||
|
/* hello u-u/v*v == u mod v */
|
||||||
|
}
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
int x; int y;
|
||||||
|
x = input(); y = input();
|
||||||
|
print(gcd(x, y));
|
||||||
|
}
|
||||||
311
src/util.c
311
src/util.c
@@ -6,103 +6,151 @@
|
|||||||
/* Kenneth C. Louden */
|
/* Kenneth C. Louden */
|
||||||
/****************************************************/
|
/****************************************************/
|
||||||
|
|
||||||
#include "globals.h"
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
#include "globals.h"
|
||||||
|
|
||||||
/* Procedure printToken prints a token
|
/* Procedure printToken prints a token
|
||||||
* and its lexeme to the listing file
|
* and its lexeme to the listing file
|
||||||
*/
|
*/
|
||||||
void printToken( TokenType token, const char* tokenString )
|
void printToken(TokenType token, const char *tokenString) {
|
||||||
{ switch (token)
|
switch (token) {
|
||||||
{ case IF:
|
case IF:
|
||||||
case THEN:
|
case ELSE:
|
||||||
case ELSE:
|
case WHILE:
|
||||||
case END:
|
case RETURN:
|
||||||
case REPEAT:
|
case INT:
|
||||||
case UNTIL:
|
case VOID:
|
||||||
case READ:
|
fprintf(listing,
|
||||||
case WRITE:
|
"reserved word: %s\n", tokenString);
|
||||||
fprintf(listing,
|
break;
|
||||||
"reserved word: %s\n",tokenString);
|
case ASSIGN:
|
||||||
break;
|
fprintf(listing, "=\n");
|
||||||
case ASSIGN: fprintf(listing,":=\n"); break;
|
break;
|
||||||
case LT: fprintf(listing,"<\n"); break;
|
case EQ:
|
||||||
case EQ: fprintf(listing,"=\n"); break;
|
fprintf(listing, "==\n");
|
||||||
case LPAREN: fprintf(listing,"(\n"); break;
|
break;
|
||||||
case RPAREN: fprintf(listing,")\n"); break;
|
case NE:
|
||||||
case SEMI: fprintf(listing,";\n"); break;
|
fprintf(listing, "!=\n");
|
||||||
case PLUS: fprintf(listing,"+\n"); break;
|
break;
|
||||||
case MINUS: fprintf(listing,"-\n"); break;
|
case LT:
|
||||||
case TIMES: fprintf(listing,"*\n"); break;
|
fprintf(listing, "<\n");
|
||||||
case OVER: fprintf(listing,"/\n"); break;
|
break;
|
||||||
case ENDFILE: fprintf(listing,"EOF\n"); break;
|
case LE:
|
||||||
case NUM:
|
fprintf(listing, "<=\n");
|
||||||
fprintf(listing,
|
break;
|
||||||
"NUM, val= %s\n",tokenString);
|
case GT:
|
||||||
break;
|
fprintf(listing, ">\n");
|
||||||
case ID:
|
break;
|
||||||
fprintf(listing,
|
case GE:
|
||||||
"ID, name= %s\n",tokenString);
|
fprintf(listing, ">=\n");
|
||||||
break;
|
break;
|
||||||
case ERROR:
|
case PLUS:
|
||||||
fprintf(listing,
|
fprintf(listing, "+\n");
|
||||||
"ERROR: %s\n",tokenString);
|
break;
|
||||||
break;
|
case MINUS:
|
||||||
default: /* should never happen */
|
fprintf(listing, "-\n");
|
||||||
fprintf(listing,"Unknown token: %d\n",token);
|
break;
|
||||||
}
|
case TIMES:
|
||||||
|
fprintf(listing, "*\n");
|
||||||
|
break;
|
||||||
|
case OVER:
|
||||||
|
fprintf(listing, "/\n");
|
||||||
|
break;
|
||||||
|
case LPAREN:
|
||||||
|
fprintf(listing, "(\n");
|
||||||
|
break;
|
||||||
|
case RPAREN:
|
||||||
|
fprintf(listing, ")\n");
|
||||||
|
break;
|
||||||
|
case LBRACE:
|
||||||
|
fprintf(listing, "[\n");
|
||||||
|
break;
|
||||||
|
case RBRACE:
|
||||||
|
fprintf(listing, "]\n");
|
||||||
|
break;
|
||||||
|
case LCURLY:
|
||||||
|
fprintf(listing, "{\n");
|
||||||
|
break;
|
||||||
|
case RCURLY:
|
||||||
|
fprintf(listing, "}\n");
|
||||||
|
break;
|
||||||
|
case SEMI:
|
||||||
|
fprintf(listing, ";\n");
|
||||||
|
break;
|
||||||
|
case COMMA:
|
||||||
|
fprintf(listing, ",\n");
|
||||||
|
break;
|
||||||
|
case ENDFILE:
|
||||||
|
fprintf(listing, "EOF\n");
|
||||||
|
break;
|
||||||
|
case NUM:
|
||||||
|
fprintf(listing,
|
||||||
|
"NUM, val= %s\n", tokenString);
|
||||||
|
break;
|
||||||
|
case ID:
|
||||||
|
fprintf(listing,
|
||||||
|
"ID, name= %s\n", tokenString);
|
||||||
|
break;
|
||||||
|
case ERROR:
|
||||||
|
fprintf(listing,
|
||||||
|
"ERROR: %s\n", tokenString);
|
||||||
|
break;
|
||||||
|
default: /* should never happen */
|
||||||
|
fprintf(listing, "Unknown token: %d\n", token);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Function newStmtNode creates a new statement
|
/* Function newStmtNode creates a new statement
|
||||||
* node for syntax tree construction
|
* node for syntax tree construction
|
||||||
*/
|
*/
|
||||||
TreeNode * newStmtNode(StmtKind kind)
|
TreeNode *newStmtNode(StmtKind kind) {
|
||||||
{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));
|
TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode));
|
||||||
int i;
|
int i;
|
||||||
if (t==NULL)
|
if (t == NULL)
|
||||||
fprintf(listing,"Out of memory error at line %d\n",lineno);
|
fprintf(listing, "Out of memory error at line %d\n", lineno);
|
||||||
else {
|
else {
|
||||||
for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;
|
for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL;
|
||||||
t->sibling = NULL;
|
t->sibling = NULL;
|
||||||
t->nodekind = StmtK;
|
t->nodekind = StmtK;
|
||||||
t->kind.stmt = kind;
|
t->kind.stmt = kind;
|
||||||
t->lineno = lineno;
|
t->lineno = lineno;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Function newExpNode creates a new expression
|
/* Function newExpNode creates a new expression
|
||||||
* node for syntax tree construction
|
* node for syntax tree construction
|
||||||
*/
|
*/
|
||||||
TreeNode * newExpNode(ExpKind kind)
|
TreeNode *newExpNode(ExpKind kind) {
|
||||||
{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));
|
TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode));
|
||||||
int i;
|
int i;
|
||||||
if (t==NULL)
|
if (t == NULL)
|
||||||
fprintf(listing,"Out of memory error at line %d\n",lineno);
|
fprintf(listing, "Out of memory error at line %d\n", lineno);
|
||||||
else {
|
else {
|
||||||
for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;
|
for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL;
|
||||||
t->sibling = NULL;
|
t->sibling = NULL;
|
||||||
t->nodekind = ExpK;
|
t->nodekind = ExpK;
|
||||||
t->kind.exp = kind;
|
t->kind.exp = kind;
|
||||||
t->lineno = lineno;
|
t->lineno = lineno;
|
||||||
t->type = Void;
|
t->type = Void;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Function copyString allocates and makes a new
|
/* Function copyString allocates and makes a new
|
||||||
* copy of an existing string
|
* copy of an existing string
|
||||||
*/
|
*/
|
||||||
char * copyString(char * s)
|
char *copyString(char *s) {
|
||||||
{ int n;
|
int n;
|
||||||
char * t;
|
char *t;
|
||||||
if (s==NULL) return NULL;
|
if (s == NULL) return NULL;
|
||||||
n = strlen(s)+1;
|
n = strlen(s) + 1;
|
||||||
t = malloc(n);
|
t = malloc(n);
|
||||||
if (t==NULL)
|
if (t == NULL)
|
||||||
fprintf(listing,"Out of memory error at line %d\n",lineno);
|
fprintf(listing, "Out of memory error at line %d\n", lineno);
|
||||||
else strcpy(t,s);
|
else
|
||||||
return t;
|
strcpy(t, s);
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Variable indentno is used by printTree to
|
/* Variable indentno is used by printTree to
|
||||||
@@ -111,67 +159,66 @@ char * copyString(char * s)
|
|||||||
static indentno = 0;
|
static indentno = 0;
|
||||||
|
|
||||||
/* macros to increase/decrease indentation */
|
/* macros to increase/decrease indentation */
|
||||||
#define INDENT indentno+=2
|
#define INDENT indentno += 2
|
||||||
#define UNINDENT indentno-=2
|
#define UNINDENT indentno -= 2
|
||||||
|
|
||||||
/* printSpaces indents by printing spaces */
|
/* printSpaces indents by printing spaces */
|
||||||
static void printSpaces(void)
|
static void printSpaces(void) {
|
||||||
{ int i;
|
int i;
|
||||||
for (i=0;i<indentno;i++)
|
for (i = 0; i < indentno; i++)
|
||||||
fprintf(listing," ");
|
fprintf(listing, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* procedure printTree prints a syntax tree to the
|
/* procedure printTree prints a syntax tree to the
|
||||||
* listing file using indentation to indicate subtrees
|
* listing file using indentation to indicate subtrees
|
||||||
*/
|
*/
|
||||||
void printTree( TreeNode * tree )
|
void printTree(TreeNode *tree) {
|
||||||
{ int i;
|
int i;
|
||||||
INDENT;
|
INDENT;
|
||||||
while (tree != NULL) {
|
while (tree != NULL) {
|
||||||
printSpaces();
|
printSpaces();
|
||||||
if (tree->nodekind==StmtK)
|
if (tree->nodekind == StmtK) {
|
||||||
{ switch (tree->kind.stmt) {
|
switch (tree->kind.stmt) {
|
||||||
case IfK:
|
case IfK:
|
||||||
fprintf(listing,"If\n");
|
fprintf(listing, "If\n");
|
||||||
break;
|
break;
|
||||||
case RepeatK:
|
case RepeatK:
|
||||||
fprintf(listing,"Repeat\n");
|
fprintf(listing, "Repeat\n");
|
||||||
break;
|
break;
|
||||||
case AssignK:
|
case AssignK:
|
||||||
fprintf(listing,"Assign to: %s\n",tree->attr.name);
|
fprintf(listing, "Assign to: %s\n", tree->attr.name);
|
||||||
break;
|
break;
|
||||||
case ReadK:
|
case ReadK:
|
||||||
fprintf(listing,"Read: %s\n",tree->attr.name);
|
fprintf(listing, "Read: %s\n", tree->attr.name);
|
||||||
break;
|
break;
|
||||||
case WriteK:
|
case WriteK:
|
||||||
fprintf(listing,"Write\n");
|
fprintf(listing, "Write\n");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(listing,"Unknown ExpNode kind\n");
|
fprintf(listing, "Unknown ExpNode kind\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
} else if (tree->nodekind == ExpK) {
|
||||||
|
switch (tree->kind.exp) {
|
||||||
|
case OpK:
|
||||||
|
fprintf(listing, "Op: ");
|
||||||
|
printToken(tree->attr.op, "\0");
|
||||||
|
break;
|
||||||
|
case ConstK:
|
||||||
|
fprintf(listing, "Const: %d\n", tree->attr.val);
|
||||||
|
break;
|
||||||
|
case IdK:
|
||||||
|
fprintf(listing, "Id: %s\n", tree->attr.name);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(listing, "Unknown ExpNode kind\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
fprintf(listing, "Unknown node kind\n");
|
||||||
|
for (i = 0; i < MAXCHILDREN; i++)
|
||||||
|
printTree(tree->child[i]);
|
||||||
|
tree = tree->sibling;
|
||||||
}
|
}
|
||||||
else if (tree->nodekind==ExpK)
|
UNINDENT;
|
||||||
{ switch (tree->kind.exp) {
|
|
||||||
case OpK:
|
|
||||||
fprintf(listing,"Op: ");
|
|
||||||
printToken(tree->attr.op,"\0");
|
|
||||||
break;
|
|
||||||
case ConstK:
|
|
||||||
fprintf(listing,"Const: %d\n",tree->attr.val);
|
|
||||||
break;
|
|
||||||
case IdK:
|
|
||||||
fprintf(listing,"Id: %s\n",tree->attr.name);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(listing,"Unknown ExpNode kind\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else fprintf(listing,"Unknown node kind\n");
|
|
||||||
for (i=0;i<MAXCHILDREN;i++)
|
|
||||||
printTree(tree->child[i]);
|
|
||||||
tree = tree->sibling;
|
|
||||||
}
|
|
||||||
UNINDENT;
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user