diff --git a/.gitignore b/.gitignore index d48c759..f86c04f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .idea -.vscode \ No newline at end of file +.vscode + +*.o \ No newline at end of file diff --git a/notes/1.md b/notes/1.md index cfa494d..9e7b04b 100644 --- a/notes/1.md +++ b/notes/1.md @@ -1,9 +1,41 @@ -컴파일러 1 -=== +# ISA & Compiler Basics + +## Basic Computer + +To get a task done by a general-purpose computer, we need: + +* Program: A sequence of instructions +* Instruction set: A set of possible instructions + +### Von Neuman Architecture + +Both Instructions and data are stored in the memory. +Instructions dictate: + +1. which and how data are manipulated +2. which instruction should be next + +The memory is independent of the CPU. -# Interpreter in Modern Processors +### How to load a program +``` +*.c -(compiler)-> *.s: Assembly program +*.s -(assembler)-> *.o: Object file +*.o -(linker)-[with library(*.o)]-> *.exe: Executable +*.exe -(loader)-> to in memory +``` +### Program + +Computer is essentially a complex state machine. +**programmer visible state**: + +* Memory +* Registers +* Program Counter + +Instructions(Program) specify how to transform the values of programmer visible state. # Compiler diff --git a/src/globals.h b/src/globals.h index d3407b3..5d09545 100644 --- a/src/globals.h +++ b/src/globals.h @@ -23,17 +23,17 @@ #endif /* MAXRESERVED = the number of reserved words */ -#define MAXRESERVED 8 +#define MAXRESERVED 6 typedef enum /* book-keeping tokens */ {ENDFILE,ERROR, /* reserved words */ - IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE, + IF,ELSE,WHILE,RETURN,INT,VOID, /* multicharacter tokens */ ID,NUM, /* special symbols */ - ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI + ASSIGN,EQ,NE,LT,LE,GT,GE,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,LBRACE,RBRACE,LCURLY,RCURLY,SEMI,COMMA } TokenType; extern FILE* source; /* source code text file */ diff --git a/src/lex/tiny.l b/src/lex/tiny.l new file mode 100644 index 0000000..aa96caa --- /dev/null +++ b/src/lex/tiny.l @@ -0,0 +1,75 @@ +/****************************************************/ +/* File: tiny.l */ +/* Lex specification for TINY */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +%{ +#include "globals.h" +#include "util.h" +#include "scan.h" +/* lexeme of identifier or reserved word */ +char tokenString[MAXTOKENLEN+1]; +%} + +digit [0-9] +number {digit}+ +letter [a-zA-Z] +identifier {letter}+ +newline \n +whitespace [ \t]+ + +%% + +"if" {return IF;} +"then" {return THEN;} +"else" {return ELSE;} +"end" {return END;} +"repeat" {return REPEAT;} +"until" {return UNTIL;} +"read" {return READ;} +"write" {return WRITE;} +":=" {return ASSIGN;} +"=" {return EQ;} +"<" {return LT;} +"+" {return PLUS;} +"-" {return MINUS;} +"*" {return TIMES;} +"/" {return OVER;} +"(" {return LPAREN;} +")" {return RPAREN;} +";" {return SEMI;} +{number} {return NUM;} +{identifier} {return ID;} +{newline} {lineno++;} +{whitespace} {/* skip whitespace */} +"{" { char c; + do + { c = input(); + if (c == EOF) break; + if (c == '\n') lineno++; + } while (c != '}'); + } +. {return ERROR;} + +%% + +TokenType getToken(void) +{ static int firstTime = TRUE; + TokenType currentToken; + if (firstTime) + { firstTime = FALSE; + lineno++; + yyin = source; + yyout = listing; + } + currentToken = yylex(); + strncpy(tokenString,yytext,MAXTOKENLEN); + if (TraceScan) { + fprintf(listing,"\t%d: ",lineno); + printToken(currentToken,tokenString); + } + return currentToken; +} + diff --git a/src/main.c b/src/main.c index 680cc7b..e685e43 100644 --- a/src/main.c +++ b/src/main.c @@ -8,14 +8,14 @@ #include "globals.h" /* set NO_PARSE to TRUE to get a scanner-only compiler */ -#define NO_PARSE FALSE +#define NO_PARSE TRUE /* set NO_ANALYZE to TRUE to get a parser-only compiler */ -#define NO_ANALYZE FALSE +#define NO_ANALYZE TRUE /* set NO_CODE to TRUE to get a compiler that does not * generate code */ -#define NO_CODE FALSE +#define NO_CODE TRUE #include "util.h" #if NO_PARSE @@ -32,71 +32,70 @@ /* allocate global variables */ int lineno = 0; -FILE * source; -FILE * listing; -FILE * code; +FILE *source; +FILE *listing; +FILE *code; /* allocate and set tracing flags */ int EchoSource = FALSE; -int TraceScan = FALSE; +int TraceScan = TRUE; int TraceParse = FALSE; int TraceAnalyze = FALSE; int TraceCode = FALSE; int Error = FALSE; -main( int argc, char * argv[] ) -{ TreeNode * syntaxTree; - char pgm[120]; /* source code file name */ - if (argc != 2) - { fprintf(stderr,"usage: %s \n",argv[0]); - exit(1); +main(int argc, char *argv[]) { + TreeNode *syntaxTree; + char pgm[120]; /* source code file name */ + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); } - strcpy(pgm,argv[1]) ; - if (strchr (pgm, '.') == NULL) - strcat(pgm,".tny"); - source = fopen(pgm,"r"); - if (source==NULL) - { fprintf(stderr,"File %s not found\n",pgm); - exit(1); - } - listing = stdout; /* send listing to screen */ - fprintf(listing,"\nTINY COMPILATION: %s\n",pgm); + strcpy(pgm, argv[1]); + if (strchr(pgm, '.') == NULL) + strcat(pgm, ".tny"); + source = fopen(pgm, "r"); + if (source == NULL) { + fprintf(stderr, "File %s not found\n", pgm); + exit(1); + } + listing = stdout; /* send listing to screen */ + fprintf(listing, "\nC-MINUS COMPILATION: %s\n", pgm); #if NO_PARSE - while (getToken()!=ENDFILE); + while (getToken() != ENDFILE); #else - syntaxTree = parse(); - if (TraceParse) { - fprintf(listing,"\nSyntax tree:\n"); - printTree(syntaxTree); - } -#if !NO_ANALYZE - if (! Error) - { if (TraceAnalyze) fprintf(listing,"\nBuilding Symbol Table...\n"); - buildSymtab(syntaxTree); - if (TraceAnalyze) fprintf(listing,"\nChecking Types...\n"); - typeCheck(syntaxTree); - if (TraceAnalyze) fprintf(listing,"\nType Checking Finished\n"); - } -#if !NO_CODE - if (! Error) - { char * codefile; - int fnlen = strcspn(pgm,"."); - codefile = (char *) calloc(fnlen+4, sizeof(char)); - strncpy(codefile,pgm,fnlen); - strcat(codefile,".tm"); - code = fopen(codefile,"w"); - if (code == NULL) - { printf("Unable to open %s\n",codefile); - exit(1); + syntaxTree = parse(); + if (TraceParse) { + fprintf(listing, "\nSyntax tree:\n"); + printTree(syntaxTree); + } +#if !NO_ANALYZE + if (!Error) { + if (TraceAnalyze) fprintf(listing, "\nBuilding Symbol Table...\n"); + buildSymtab(syntaxTree); + if (TraceAnalyze) fprintf(listing, "\nChecking Types...\n"); + typeCheck(syntaxTree); + if (TraceAnalyze) fprintf(listing, "\nType Checking Finished\n"); + } +#if !NO_CODE + if (!Error) { + char *codefile; + int fnlen = strcspn(pgm, "."); + codefile = (char *) calloc(fnlen + 4, sizeof(char)); + strncpy(codefile, pgm, fnlen); + strcat(codefile, ".tm"); + code = fopen(codefile, "w"); + if (code == NULL) { + printf("Unable to open %s\n", codefile); + exit(1); + } + codeGen(syntaxTree, codefile); + fclose(code); } - codeGen(syntaxTree,codefile); - fclose(code); - } #endif #endif #endif - fclose(source); - return 0; + fclose(source); + return 0; } - diff --git a/src/scan.c b/src/scan.c index a9cfdde..5f1be06 100644 --- a/src/scan.c +++ b/src/scan.c @@ -5,69 +5,81 @@ /* Kenneth C. Louden */ /****************************************************/ +#include "scan.h" #include "globals.h" #include "util.h" -#include "scan.h" /* states in scanner DFA */ -typedef enum - { START,INASSIGN,INCOMMENT,INNUM,INID,DONE } - StateType; +typedef enum { START, + INOVER, + INCOMMENT, + ASTERCOMMENT, + INASSIGN, + INLT /* > */, + INGT /* < */, + INNE /* !*/, + INNUM, + INID, + DONE } StateType; /* lexeme of identifier or reserved word */ -char tokenString[MAXTOKENLEN+1]; +char tokenString[MAXTOKENLEN + 1]; /* BUFLEN = length of the input buffer for source code lines */ #define BUFLEN 256 static char lineBuf[BUFLEN]; /* holds the current line */ -static int linepos = 0; /* current position in LineBuf */ -static int bufsize = 0; /* current size of buffer string */ +static int linepos = 0; /* current position in LineBuf */ +static int bufsize = 0; /* current size of buffer string */ static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */ /* getNextChar fetches the next non-blank character from lineBuf, reading in a new line if lineBuf is exhausted */ -static int getNextChar(void) -{ if (!(linepos < bufsize)) - { lineno++; - if (fgets(lineBuf,BUFLEN-1,source)) - { if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf); - bufsize = strlen(lineBuf); - linepos = 0; - return lineBuf[linepos++]; - } - else - { EOF_flag = TRUE; - return EOF; - } - } - else return lineBuf[linepos++]; +static int getNextChar(void) { + if (!(linepos < bufsize)) { + lineno++; + if (fgets(lineBuf, BUFLEN - 1, source)) { + if (EchoSource) fprintf(listing, "%4d: %s", lineno, lineBuf); + bufsize = strlen(lineBuf); + linepos = 0; + return lineBuf[linepos++]; + } else { + EOF_flag = TRUE; + return EOF; + } + } else + return lineBuf[linepos++]; } /* ungetNextChar backtracks one character in lineBuf */ -static void ungetNextChar(void) -{ if (!EOF_flag) linepos-- ;} +static void ungetNextChar(void) { + if (!EOF_flag) linepos--; +} /* lookup table of reserved words */ -static struct - { char* str; - TokenType tok; - } reservedWords[MAXRESERVED] - = {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END}, - {"repeat",REPEAT},{"until",UNTIL},{"read",READ}, - {"write",WRITE}}; +static struct {// keywords + char *str; + TokenType tok; +} reservedWords[MAXRESERVED] = { + {"if", IF}, + {"else", ELSE}, + {"while", WHILE}, + {"return", RETURN}, + {"int", INT}, + {"void", VOID}, +}; /* lookup an identifier to see if it is a reserved word */ /* uses linear search */ -static TokenType reservedLookup (char * s) -{ int i; - for (i=0;i') { + state = INGT; + } else if (c == '/') {// comment in or not + save = FALSE; + state = INOVER; + } else { + state = DONE; + switch (c) { + case EOF: + save = FALSE; + currentToken = ENDFILE; + break; + case '+': + currentToken = PLUS; + break; + case '-': + currentToken = MINUS; + break; + case '*': + currentToken = TIMES; + break; + case '[': + currentToken = LBRACE; + break; + case ']': + currentToken = RBRACE; + break; + case '{': + currentToken = LCURLY; + break; + case '}': + currentToken = RCURLY; + break; + case '(': + currentToken = LPAREN; + break; + case ')': + currentToken = RPAREN; + break; + case ';': + currentToken = SEMI; + break; + case ',': + currentToken = COMMA; + break; + default: + currentToken = ERROR; + break; + } + } + break; + /* begin comment process */ + case INCOMMENT: + save = FALSE; + if (c == EOF) { + state = DONE; + currentToken = ENDFILE; + } else if (c == '*')// comment out + state = ASTERCOMMENT; + break; + case INOVER: + if (c == '*') { + save = FALSE; + state = INCOMMENT; + } else { + state = DONE; + ungetNextChar(); + save = FALSE; + currentToken = OVER; + } + break; + case ASTERCOMMENT: + save = FALSE; + if (c == EOF) { + state = DONE; + currentToken = ENDFILE; + } else if (c == '/') { + state = START; + } + else { + state = INCOMMENT; + } + break; + /* end comment process */ + case INASSIGN: + state = DONE; + if (c == '=') + currentToken = EQ; + else { /* backup in the input */ + ungetNextChar(); + save = FALSE; + currentToken = ASSIGN; + } + break; + case INNE: + state = DONE; + if (c == '=') { + currentToken = NE; + } else { + ungetNextChar(); + save = FALSE; + currentToken = ERROR; + } + break; + + case INLT: + state = DONE; + if (c == '=') { + currentToken = LE; + } else { + ungetNextChar(); + save = FALSE; + currentToken = LT; + } + break; + case INGT: + state = DONE; + if (c == '=') { + currentToken = GE; + } else { + ungetNextChar(); + save = FALSE; + currentToken = GT; + } + break; + case INNUM: + if (!isdigit(c)) { /* backup in the input */ + ungetNextChar(); + save = FALSE; + state = DONE; + currentToken = NUM; + } + break; + case INID: + if (!isalpha(c)) { /* backup in the input */ + ungetNextChar(); + save = FALSE; + state = DONE; + currentToken = ID; + } + break; + case DONE: + default: /* should never happen */ + fprintf(listing, "Scanner Bug: state= %d\n", state); + state = DONE; + currentToken = ERROR; + break; + } + if ((save) && (tokenStringIndex <= MAXTOKENLEN)) + tokenString[tokenStringIndex++] = (char) c; + if (state == DONE) { + tokenString[tokenStringIndex] = '\0'; + if (currentToken == ID) + currentToken = reservedLookup(tokenString); + } + } + if (TraceScan) { + fprintf(listing, "\t%d: ", lineno); + printToken(currentToken, tokenString); + } + return currentToken; +} /* end getToken */ diff --git a/src/scan.h b/src/scan.h index 1761fda..5e3471f 100644 --- a/src/scan.h +++ b/src/scan.h @@ -7,6 +7,7 @@ #ifndef _SCAN_H_ #define _SCAN_H_ +#include "globals.h" /* MAXTOKENLEN is the maximum size of a token */ #define MAXTOKENLEN 40 diff --git a/src/test.cm b/src/test.cm new file mode 100644 index 0000000..93d7e2b --- /dev/null +++ b/src/test.cm @@ -0,0 +1,16 @@ +/* A program to perform Euclid's + Algorithm to compute gcd */ + +int gcd(int u, int v) +{ + if(v == 0) return u; + else return gcd(v, u- u/v * v); + /* hello u-u/v*v == u mod v */ +} + +void main() +{ + int x; int y; + x = input(); y = input(); + print(gcd(x, y)); +} \ No newline at end of file diff --git a/src/util.c b/src/util.c index cc45fcb..fe4904f 100644 --- a/src/util.c +++ b/src/util.c @@ -6,103 +6,151 @@ /* Kenneth C. Louden */ /****************************************************/ -#include "globals.h" #include "util.h" +#include "globals.h" /* Procedure printToken prints a token * and its lexeme to the listing file */ -void printToken( TokenType token, const char* tokenString ) -{ switch (token) - { case IF: - case THEN: - case ELSE: - case END: - case REPEAT: - case UNTIL: - case READ: - case WRITE: - fprintf(listing, - "reserved word: %s\n",tokenString); - break; - case ASSIGN: fprintf(listing,":=\n"); break; - case LT: fprintf(listing,"<\n"); break; - case EQ: fprintf(listing,"=\n"); break; - case LPAREN: fprintf(listing,"(\n"); break; - case RPAREN: fprintf(listing,")\n"); break; - case SEMI: fprintf(listing,";\n"); break; - case PLUS: fprintf(listing,"+\n"); break; - case MINUS: fprintf(listing,"-\n"); break; - case TIMES: fprintf(listing,"*\n"); break; - case OVER: fprintf(listing,"/\n"); break; - case ENDFILE: fprintf(listing,"EOF\n"); break; - case NUM: - fprintf(listing, - "NUM, val= %s\n",tokenString); - break; - case ID: - fprintf(listing, - "ID, name= %s\n",tokenString); - break; - case ERROR: - fprintf(listing, - "ERROR: %s\n",tokenString); - break; - default: /* should never happen */ - fprintf(listing,"Unknown token: %d\n",token); - } +void printToken(TokenType token, const char *tokenString) { + switch (token) { + case IF: + case ELSE: + case WHILE: + case RETURN: + case INT: + case VOID: + fprintf(listing, + "reserved word: %s\n", tokenString); + break; + case ASSIGN: + fprintf(listing, "=\n"); + break; + case EQ: + fprintf(listing, "==\n"); + break; + case NE: + fprintf(listing, "!=\n"); + break; + case LT: + fprintf(listing, "<\n"); + break; + case LE: + fprintf(listing, "<=\n"); + break; + case GT: + fprintf(listing, ">\n"); + break; + case GE: + fprintf(listing, ">=\n"); + break; + case PLUS: + fprintf(listing, "+\n"); + break; + case MINUS: + fprintf(listing, "-\n"); + break; + case TIMES: + fprintf(listing, "*\n"); + break; + case OVER: + fprintf(listing, "/\n"); + break; + case LPAREN: + fprintf(listing, "(\n"); + break; + case RPAREN: + fprintf(listing, ")\n"); + break; + case LBRACE: + fprintf(listing, "[\n"); + break; + case RBRACE: + fprintf(listing, "]\n"); + break; + case LCURLY: + fprintf(listing, "{\n"); + break; + case RCURLY: + fprintf(listing, "}\n"); + break; + case SEMI: + fprintf(listing, ";\n"); + break; + case COMMA: + fprintf(listing, ",\n"); + break; + case ENDFILE: + fprintf(listing, "EOF\n"); + break; + case NUM: + fprintf(listing, + "NUM, val= %s\n", tokenString); + break; + case ID: + fprintf(listing, + "ID, name= %s\n", tokenString); + break; + case ERROR: + fprintf(listing, + "ERROR: %s\n", tokenString); + break; + default: /* should never happen */ + fprintf(listing, "Unknown token: %d\n", token); + } } /* Function newStmtNode creates a new statement * node for syntax tree construction */ -TreeNode * newStmtNode(StmtKind kind) -{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode)); - int i; - if (t==NULL) - fprintf(listing,"Out of memory error at line %d\n",lineno); - else { - for (i=0;ichild[i] = NULL; - t->sibling = NULL; - t->nodekind = StmtK; - t->kind.stmt = kind; - t->lineno = lineno; - } - return t; +TreeNode *newStmtNode(StmtKind kind) { + TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode)); + int i; + if (t == NULL) + fprintf(listing, "Out of memory error at line %d\n", lineno); + else { + for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL; + t->sibling = NULL; + t->nodekind = StmtK; + t->kind.stmt = kind; + t->lineno = lineno; + } + return t; } /* Function newExpNode creates a new expression * node for syntax tree construction */ -TreeNode * newExpNode(ExpKind kind) -{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode)); - int i; - if (t==NULL) - fprintf(listing,"Out of memory error at line %d\n",lineno); - else { - for (i=0;ichild[i] = NULL; - t->sibling = NULL; - t->nodekind = ExpK; - t->kind.exp = kind; - t->lineno = lineno; - t->type = Void; - } - return t; +TreeNode *newExpNode(ExpKind kind) { + TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode)); + int i; + if (t == NULL) + fprintf(listing, "Out of memory error at line %d\n", lineno); + else { + for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL; + t->sibling = NULL; + t->nodekind = ExpK; + t->kind.exp = kind; + t->lineno = lineno; + t->type = Void; + } + return t; } /* Function copyString allocates and makes a new * copy of an existing string */ -char * copyString(char * s) -{ int n; - char * t; - if (s==NULL) return NULL; - n = strlen(s)+1; - t = malloc(n); - if (t==NULL) - fprintf(listing,"Out of memory error at line %d\n",lineno); - else strcpy(t,s); - return t; +char *copyString(char *s) { + int n; + char *t; + if (s == NULL) return NULL; + n = strlen(s) + 1; + t = malloc(n); + if (t == NULL) + fprintf(listing, "Out of memory error at line %d\n", lineno); + else + strcpy(t, s); + return t; } /* Variable indentno is used by printTree to @@ -111,67 +159,66 @@ char * copyString(char * s) static indentno = 0; /* macros to increase/decrease indentation */ -#define INDENT indentno+=2 -#define UNINDENT indentno-=2 +#define INDENT indentno += 2 +#define UNINDENT indentno -= 2 /* printSpaces indents by printing spaces */ -static void printSpaces(void) -{ int i; - for (i=0;inodekind==StmtK) - { switch (tree->kind.stmt) { - case IfK: - fprintf(listing,"If\n"); - break; - case RepeatK: - fprintf(listing,"Repeat\n"); - break; - case AssignK: - fprintf(listing,"Assign to: %s\n",tree->attr.name); - break; - case ReadK: - fprintf(listing,"Read: %s\n",tree->attr.name); - break; - case WriteK: - fprintf(listing,"Write\n"); - break; - default: - fprintf(listing,"Unknown ExpNode kind\n"); - break; - } +void printTree(TreeNode *tree) { + int i; + INDENT; + while (tree != NULL) { + printSpaces(); + if (tree->nodekind == StmtK) { + switch (tree->kind.stmt) { + case IfK: + fprintf(listing, "If\n"); + break; + case RepeatK: + fprintf(listing, "Repeat\n"); + break; + case AssignK: + fprintf(listing, "Assign to: %s\n", tree->attr.name); + break; + case ReadK: + fprintf(listing, "Read: %s\n", tree->attr.name); + break; + case WriteK: + fprintf(listing, "Write\n"); + break; + default: + fprintf(listing, "Unknown ExpNode kind\n"); + break; + } + } else if (tree->nodekind == ExpK) { + switch (tree->kind.exp) { + case OpK: + fprintf(listing, "Op: "); + printToken(tree->attr.op, "\0"); + break; + case ConstK: + fprintf(listing, "Const: %d\n", tree->attr.val); + break; + case IdK: + fprintf(listing, "Id: %s\n", tree->attr.name); + break; + default: + fprintf(listing, "Unknown ExpNode kind\n"); + break; + } + } else + fprintf(listing, "Unknown node kind\n"); + for (i = 0; i < MAXCHILDREN; i++) + printTree(tree->child[i]); + tree = tree->sibling; } - else if (tree->nodekind==ExpK) - { switch (tree->kind.exp) { - case OpK: - fprintf(listing,"Op: "); - printToken(tree->attr.op,"\0"); - break; - case ConstK: - fprintf(listing,"Const: %d\n",tree->attr.val); - break; - case IdK: - fprintf(listing,"Id: %s\n",tree->attr.name); - break; - default: - fprintf(listing,"Unknown ExpNode kind\n"); - break; - } - } - else fprintf(listing,"Unknown node kind\n"); - for (i=0;ichild[i]); - tree = tree->sibling; - } - UNINDENT; + UNINDENT; }