Compare commits

...

30 Commits

Author SHA1 Message Date
48cafc8815 add semantic paper 2025-12-05 02:29:14 +09:00
f524a9e165 fix parser and semantic v2.2 2025-12-05 02:09:12 +09:00
d46fd36bcb fix parse and semantic v2.1 2025-12-05 00:47:51 +09:00
2b6193a2c7 semi-complete semantic v2 2025-12-03 04:58:41 +09:00
90b7d8fcbe add child_count to scope 2025-12-03 01:57:47 +09:00
4e2fdf88f9 fix semantic for printing or sth 2025-12-03 01:53:05 +09:00
75407304f8 fix output format 2025-12-02 14:35:38 +09:00
f1fd203233 semi-complete semantic 2025-12-02 05:11:27 +09:00
f4501529df modify symtab.h struct naming 2025-12-01 16:55:56 +09:00
1840ea6b74 improve symtab.h signature 2025-12-01 15:22:05 +09:00
1204abb4f9 some changes to cminus for symtab 2025-12-01 03:35:09 +09:00
a9855535cb add P4.pdf 2025-11-28 21:11:58 +09:00
2d185ee8ee add syntax 2025-11-28 20:12:51 +09:00
1f7026bda8 add L11, L12 pdf 2025-11-27 09:40:52 +09:00
d14d440d14 modify L9 pdf
add L10 pdf
2025-11-20 09:15:17 +09:00
807ac03a6e complement in 11.13 2025-11-13 10:10:10 +09:00
d39f272563 complement in 11.11 2025-11-11 14:11:28 +09:00
d83964a190 add L8.pdf and complement in 11.06 2025-11-06 10:14:44 +09:00
9de6b3a02d complement notes in 11.04 2025-11-05 11:14:03 +09:00
0f3b235e99 add L6.pdf and complement in 10.30 2025-10-30 09:58:04 +09:00
2a7cb64862 update notes in midterm2 2025-10-27 17:19:12 +09:00
13b27cc21e update notes in midterm1 2025-10-27 04:25:46 +09:00
4a11a52f1a update style 2025-10-27 04:25:17 +09:00
13eaf5bdba add P3.pdf 2025-10-26 18:45:53 +09:00
517c03837b complement notes in 10.16 2025-10-16 10:05:22 +09:00
30027b6b4c complement notes 10.14 2025-10-14 14:00:50 +09:00
7b07687528 fix cminus.l 2025-10-03 21:06:09 +09:00
e955ec672b add crossnote and mv textfile 2025-10-03 19:17:46 +09:00
add7307893 input() return 0 when EOF after flex 2.6 2025-10-03 19:17:20 +09:00
cb19f33fef fix scan.c for ID accept non-alphabet characters and resolve some warning 2025-10-03 16:59:36 +09:00
62 changed files with 10426 additions and 1156 deletions

15
.crossnote/config.js Normal file
View File

@@ -0,0 +1,15 @@
({
katexConfig: {
"macros": {}
},
mathjaxConfig: {
"tex": {},
"options": {},
"loader": {}
},
mermaidConfig: {
"startOnLoad": false
},
})

6
.crossnote/head.html Normal file
View File

@@ -0,0 +1,6 @@
<!-- The content below will be included at the end of the <head> element. -->
<script type="text/javascript">
document.addEventListener("DOMContentLoaded", function () {
// your code here
});
</script>

12
.crossnote/parser.js Normal file
View File

@@ -0,0 +1,12 @@
({
// Please visit the URL below for more information:
// https://shd101wyy.github.io/markdown-preview-enhanced/#/extend-parser
onWillParseMarkdown: async function(markdown) {
return markdown;
},
onDidParseMarkdown: async function(html) {
return html;
},
})

48
.crossnote/style.less Normal file
View File

@@ -0,0 +1,48 @@
/* Please visit the URL below for more information: */
/* https://shd101wyy.github.io/markdown-preview-enhanced/#/customize-css */
.markdown-preview.markdown-preview {
// modify your style here
.mermaid {
background-color: white;
font-family: NanumGothic;
}
font-size: 10pt;
font-family: NanumMyeongjo;
.language-scanres {
font-size: 6pt;
}
ol, ul {
margin-top: 0.0pt;
margin-bottom: 0.2rem;
}
p {
margin-bottom: 0.4rem;
}
h1 {
margin-top: 0.6rem;
margin-bottom: 0.4rem;
}
h2 {
margin-top: 0.6rem;
margin-bottom: 0.4rem;
}
h3 {
margin-top: 0.6rem;
margin-bottom: 0.4rem;
}
h4 {
margin-top: 0.4rem;
margin-bottom: 0.2rem;
}
h5 {
margin-top: 0.4rem;
margin-bottom: 0.2rem;
}
h6 {
margin-top: 0.2rem;
margin-bottom: 0.1rem;
}
}

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@
*.o *.o
*.yy.c *.yy.c
node_modules

28
build.js Normal file
View File

@@ -0,0 +1,28 @@
const { Notebook } = require("crossnote");
const path = require("path");
const fs = require("fs");
async function main() {
const notebook = await Notebook.init(
{
notebookPath: path.resolve(''),
config: {
previewTheme: 'github-light.css',
mathRenderingOption: 'KaTeX',
codeBlockTheme: 'github.css',
printBackground: true,
enableScriptExecution: true,
chromePath: '/usr/bin/google-chrome-stable',
},
}
);
const file = "./src/2024062806.md";
const engine = notebook.getNoteMarkdownEngine(file);
await engine.chromeExport( {runAllCodeChunks: true});
}
main();

View File

@@ -1,5 +1,4 @@
Lexical Analysis # Lexical Analysis
===
포트란은 모든 whitespace를 지움. 포트란은 모든 whitespace를 지움.

View File

@@ -1,30 +1,72 @@
# Syntax Analysis # Syntax Analysis
## Specification
## Context-Free Grammars (CFG) ### Context-Free Grammars (CFG)
## Parse Tree There are four main compoennts of CFG
A tree representation of the derivation * Terminal Symbols
* Non-terminal Symbols
* Start Symbol $S$
* Production
Language generated by a CFG is a set of strings of terminals by repeatedly applying productions to the non-terminals: $L(G)$ indicates a language generated by the grammar $G$.
We can use CFGs to express the syntax of the target programming languages: Parser detects that the token stream from lexer is valid or invalid.
**We cannot rely on regex to sepcify the syntax:** Because regex is not expressive enough to describe valid syntax. (e.g. nested parenthesis)
## Recognition
### Parse Tree
A tree representation of the derivation.
parse tree has `terminals` at the leaves, `non-terminals` at the interior. parse tree has `terminals` at the leaves, `non-terminals` at the interior.
An in-order traversal of the leaves is the original input. An in-order traversal of the leaves is the original input.
We can appply productions for the non-terminals in any order:
* leftmost derivation * leftmost derivation
* rightmost derivation * rightmost derivation
### Ambiguity #### Ambiguity
should be removed A grammar $G$ is ambigous if it produces different parse tree depening on the order.
for example: `A + B * C` should be resolved It should be **resolved** to construct a useful parser.
**removing ambiguity** **removing ambiguity**
Example of Ambiguity.
1. Precedence:
The production at higher levels will have operators with lower priorities (and vice versa).
we can insert non-terminals to enforce precendence.
2. Associativity:
We should determine where to place recursion depending on the associativity.
* left associative: place the recursion on the left
```txt
S -> S - T | T
T -> id
```
* right associative: place the recursion on the right
```txt
S -> T ^ S | T
T -> id
```
* non associative: do not use recursion
```txt
S -> A < A | A
A -> id
```
### AST (Abstract Syntax Tree) ### AST (Abstract Syntax Tree)
AST discards unneeded information for syntax analysis: removes non-terminals from parse tree.
### Error Handling ### Error Handling
@@ -32,24 +74,255 @@ One of the purposes of the compiler is error handling.
- to detect non-valid programs - to detect non-valid programs
- and to translate the non-valid to the valid - and to translate the non-valid to the valid
## Parsing so therefore, error handler should:
* report errors accurately and clearly
* recover quickly from an error
* not slow down compilation of valid code
* Top-down Parsing so Why?
* back in the day, the compiler was extremely slow.
* but nowadays, we do not need complex error handling procedure
* Quick recompilatio
* Users tend to correct few errors at once
* Does not need a complex error recovery procedure
**Recursive Descent Parsing** ## Automation
by using backtracking How to generate parse tree from CFG?
* Predictive Parsing ### Top-Down Parsing
Construct a leftmost derivation of string while reading the token stream.
e.g.
```txt
S -> E + S | E
E -> num | (S)
```
Parsing Table: no need to backtrack. We can implment it as **recursive descent parsing**.
Recursive descent parsing is try out rules in order and backtrack if the production does not generate proper token.
#### Predictive Parsing and LL(1)
But it needs **backtracking**.
So we introduce **predictive parsing**.
Predictive Parsing applies a single production without "backtracking". LL(1) grammar can apply **"at most a single production"**, which actually eliminates the multiple matches in top-down parsing(recursive-descent).
### Parser Implementation
#### Recursive Descent Parser by LL(1)
```text
S -> ES'
S' -> +ES' | e
E -> num | (S)
```
We can use the table to implement parsers.
| * | num | + | ( | ) | $(EOF) |
| --- | --- | --- | --- | --- | ------ |
| S | ES_ | | ES_ | | |
| S_ | | + S | | e | e |
| E | num | | (S) | | |
```c
void parse_S() {
switch(token) {
case num: parse_E(); parse_S_(); return;
case '(': parse_E(); parse_S_(); return;
default: ParseError();
}
}
void parse_S_() {
switch(token) {
case '+': token=input.next(); parse_S_(); return;
case ')': return;
case EOF: return;
default: ParseError();
}
}
void parse_E() {
switch(token) {
case num: token = input.next(); return;
case '(': token = input.next(); parse_S(); if(token != ')') ParseError(); token = input.next(); return;
default: ParseError();
}
}
```
#### Parsing Tables
And then How to Construct a Parsing Tables? There are three important traits to gen parse tables.
* $x$ is nullable if it can derive an empty string.
* $\text{First}(x)$ is a set of terminals that can derived in the first position.
* $\text{Follow}(x)$ is a set of terminals that can appears after $\alpha$ in at least one of the derivations.
Computing Nullable
1. Easy
Computing First
1. $\text{First}(t) = \set{t}$
2. For a production $x \to A_1 A_2 \dots A_n$ where $A_1 \dots A_{i-1}$ are nullable, then $\text{First}(x) += \text{First}(A_{i})$
Computing Follow
1. $\text{Follow}(S) = \set{ \$ }$, ($S$ is the start symbol)
2. For a production $x \to A_1 A_2 \dots A_n$ where $A_{i+1}\dots A_{n}$ are nullable, then $\text{Follow}(A_{i}) += \text{Follow}(x)$
3. For a production $x \to A_1 A_2 \dots A_n$ where $A_{i+1}A_{i+2}\dots A_{j-1}$ are nullable, $\text{Follow}(A_{i}) += \text{Follow}(A_{j})$
So Combine Them Together:
```py
S = symbols.start
for x in symbols:
First(x) = {}; Follow(x) = {}, Nullable(x) = false
Follow(S) = {$}
for t in terminals:
First(t) = {t}
Nullable(e) = True
while not (First.is_changed or Follow.is_changed or Nullable.is_changed):
for X, A in productions:
if all(A) is Nullable: Nullalbe(X) = True
if A[1..i-1] is Nullable: First(X) += First(A[i])
if A[i+1..n] is Nullable: Follow(A[i]) += Follow(X)
if A[i+1..j-1] is Nullable: Follow(A[i]) += First(A[j])
```
We can use the tables with `First`, `Follow`, `Nullable` to make actual **Parsing Tables** by combining.
### Bottom-Up Parsing
Bottom-up Parsing is more efficient than Top-down parsing by using **LR grammars**, which means **L**eft-recursive grammars, and **R**ight-most derivation.
It relies on **Shift-reduce Parsers**.
Shift-Reducing Parsing:
Bottom-up parser traces a rightmost derivation in reverse, we should scan the input terminals in a left-to-right manner.
So the parser splits a string into two parts: sequence of symbols to reduce(**stack**), remaining tokens(**input**).
And shift-reduce parsing requires two actions: **Reduce**, **Shift**.
What's the important challenge is "Action Selection Problem":
As there can be conflicts: For a given state(stack + input) there can be multiple possible actions:
* shift-reduce conflict
* reduce-reduce conflict
### LR Grammars
* LR(k): left-to-right scanning, right most derivation and $k$ symbol lookahead
* LR(0) Grammar
LR(0) indicates grammars that can determine actions without any lookahead.
There are **no reduce-reduce and shift-reduce conflicts**, because it should be determined by stacks.
### Parser Implement ### NFA Representations
We can represent shift-reduce parsing using an **NFA**, whose states are production with separator '`.`' on RHS. And we have an additional dummy production `S' -> S$` for a start and end state.
There are two types of transitions between the states:
* **shift transition**: transition by the shift actions.
* **$\epsilon$ transition**: that is the parser expand the expected list not consuming any input tokens. (transition to LHS of the production is equal to next of the current position)
#### NFA to DFA
NFA can be fully converted to DFA. by using DFA, the parser determine whether to shift or reduce: by using symbols in the stack to **traverse the state** and **determine whether to shift or reduce by destination state**.
### Parsing Table And LR(0)
DFA Traversal is implemented by simplifying DFA to **LR Parsing Table**.
Store the states along with the symbols in the stack: <`symbol`, `state`>
There are two different types of tables: `goto`, `action`.
* `goto`: determine the next state using the top state and an **input non-terminal**.
* `action`: determine the action using the top state and an input terminal.
And table consists of four different actions:
* `shift x`: push<a, x> on the stack (a is current input and x is a state)
* `reduce x -> a`: pop a from the stack and push <`x`, `goto[curr_state, x]`>
* accept(`S' -> S$.`) / Error
Also DFA states are converted to index of each rows.
But There is a limitation when there are multiple options to fill the parsing table, which should be solved with **lookahead**.
### SLR(1) Parsing
A simple extension of LR(0).
For each reduction `X -> b`, look at the next symbol `c` and then apply reduction **only if `c` is in `Follow(X)`** which is a lookahead.
### LR(1) Parsing
LR(1) uses lookahead more delicately. For them, it uses a more complex state like `X -> a.b,c`, which means:
1. `a` is already matched at top of the stack
2. next expect to see `b` followed by `c`
Also `X -> a.b,{x1,x2,...,xn}` indicates:
* forall i in `{x1,...,xn}`, `X -> a.b,i`
We extend the $\epsilon$-closure and `goto` operation.
LR(1) closure identification:
* start with `Closure(S) = S`
* foreach item: `[X -> a.Bb,c]` in `S`
* add `{B -> .y,First(bc)}`
* Initalize the state with `[S' -> .S,$]`
LR(1) `goto`:
Given an Item in the state I: `[X -> a.Bb,c]`, `Goto/Shift(I, B) = Closure([X -> aB.b,c])`
LR(1) Parsing Table is same as LR(0) except for **reductions**.
### LALR(1) Parsing
LR(1) has too many states. LALR(1) Parsing.
LR(1) parsing is a **LookAhead LR**.
Construct LR(1) DFA and merges any two LR(1) states whose items have the same production rule, but different lookahead. It reduces the number of parser table entries, but theoretically less powerful than LR(1).
LR(1) generally has the same number of states as SLR(1) but much less than LR(1).
But we will not dive into the details of LALR(1).
### LL/LR Grammars
1. LL Parsing Tables
* Table[NT, T] = Production to apply
* Compute using First, Follow.
2. LR Parsing Tables
* Table[LR State, Term] = shift/reduce/error/accept
* Table[LR State, NT] = goto/err
* Computing using closure and goto operations on LR states
## Automatic Disambiguation
It is highly complex to propose unambiguous grammars: precedence, associativity. By defining precedence, using ambiguous grammars without shift-reduce conflicts: define precedence between terminals on the stack vs. terminals on the input.
## AST Data Structure
LL/LR parsing implicitly build AST.
* LL parsing: AST represented by the productions
* LR parsing: AST represented by the reduction
### AST Construction in LL
```cpp
expr parse_S() {
switch (token) {
case num:
case '(':
expr child1 = parse_E();
expr child2 = parse_S_();
return new S(child1, child2);
default: ParseError();
}
}
```
### AST Construction in LR
Construction mechanism:
* Store parts of the tree on the stack
* foreach nonterminal `X` on the stack, store the sub-tree for `X`
* After reduce operation for a production `X -> a`, create an AST node for `X`

View File

@@ -1,169 +1,144 @@
# Syntax Anlysis 2 # Semantic Analysis
## Bottom-Up Parsing Even after passing the lexical and syntax analysis, there are still erros: correct usage of variables, objects, functions.
**Semantic Analysis** ensures that the program satisfies a set of rules regarding the usage of programming constructs.
Bottom-up Parsing is more efficient than Top-down parsing. There are two main categories of semantic analysis:
it uses LR grammars: Left-recursive and right-most derivation. * Scopes
* Types
Rely on **Shift-reduce Parsers**. ## Scope
example: Lexical scope is textual region in the program.
Scope of an identifier is the lexical scope its declaration refers to
### Symbol Tables
Symantic checks refer to properties of identifier in the program; it need an environment to store identifier info: **symbol table**.
In symbol tables each entry contains name of an identifier and additional info.
### Implementing Symbol Tables
Five operations:
* `Insert Scope`
* `Exit Scope`
* `Find Symbol(x)`
* `Add Symbol(x)`
* `Check Scope(x)`
We can build the symbol tables during parsing or after constructing the AST. The symbol tables should be generated before semantic analysis.
### Function Declaration and Usage Types
* Declare the functions before usage(`cminus`)
* Can use functions before declaration(`py`)
* Separate body declaration(`C`)
### Scope Analysis
* Generate Symbol Table and do Scope Analysis
* Simultaneously
## Types
* Type Checking: A set of rules which ensures the type consistency of different construct in the program.
* Type inferencing: fill missing type info.
### Type Checking
Semantic checking to enforce the type safety of the program.
There are three types of types.
* Statically typed
* Dynamically typed
* Untyped
Static Type Checking does not require additional type checking instructions at runtime. It guarantees that the executions are safe at compile time. But modern languages require both static and dynamic type checking (union, void pointer).
A **type** is a description of a set of values and a set of allowed operations on those values.
**Type expression** is the description the possible types in the program.
**Type System** defines types for language construct like nodes in AST.
Language usually have basic types aka primitive types. Using these types to build type expressions.
### Type Comparison Implementation
There are two options:
1. Implement a method `Equals(T1, T2)`.
It must compare type trees of T1 and T2. For OOP languages also need sub-types
### Type Checking Methodology
Type checking means verifying whether expressions are given proper types
1. Implement using Syntax-Directed Definitions(SDD)
2. First build the AST, then implement type checking by recursive traversal of the AST nodes
#### SDD
SDD associates semantic rules for the productions in the CFG. It checks types based on the semantic rules associated with the production rules.
#### AST Traversal
Type Checking During AST Traversal.
중간에 새로운 타입이 생길 수 있음.
By Recursive Traversal of the AST nodes, inferencing types of AST nodes.
### Inference
$$\frac{\vdash H_1 \vdash H_2 \vdash H_3}{\vdash \text{conclusion}}[\text{rule name}]$$
#### Soundness
항상 참이 되는 타입
$e : T$ means that $e$ is a sound expression of type $T$, that is $e$ is always the type $T$.
for non expression statements use special unit type (like void or empty type)
$S: \text{void}$
#### Proof Tree
$$\frac{
\frac{1 \text{ is a integer literal}}{\vdash 1: \text{int}} [\text{int}] \;
\frac{2 \text{ is an integer literal}}{\vdash 2: \text{int}} [\text{int}]
}{\vdash 1 + 2 : \text{int}} [\text{int add}]
$$
Proof Tree는 AST를 뒤집은 모양.
If-then-else는 가질 수 있는데, If-then은 타입을 가질 수 없음.
#### Type Environment
어떻게 $x$가 variable이라고 할때 어떤 타입을 가지는지 알 수가 없음.
Type Environment gives types for **free variables**.
$$O \vdash e: T$$
* $O$ is essentially **symbol table**.
Complex Example of Declaration:
```c ```c
E -> T | T + E for (T1 i = 0;;) {
T -> int | int * T | (E) exp
```
```c
// reduce
int * int + int | T -> int
int * T + int | T -> int * T
T + int | T -> int
T + T | E -> T
T + E | E -> T + E
E |
// view as reversed, it seems right-most derivation
```
### Shift-Reudce Parsing
```c
E -> T | T + E
T -> int | int * T | (E)
```
| Stack | Input | Applied Production |
| ----------- | ----------------- | --------------------- |
| | `int * int + int` | shift |
| `int` | `* int + int` | shift |
| `int *` | `int + int` | shift |
| `int * int` | `+ int` | reduce `T -> int` |
| `int * T` | `+ int` | reduce `T -> int * T` |
| `T` | `+ int` | shift |
| `T +` | `int` | shift |
| `T + int` | | reduce `T -> int` |
| `T + T` | | reduce `E -> T` |
| `T + E` | | reduce `E -> T + E` |
| `E` | | |
**Action Selection Problem**
Which action should we take?
when shift, when reduce, which production applied?
#### LR-Style Grammars
* LR(k): left-toright scanning, right most derivation and k symbol lookahead
* LR(0) Grammar
LR(0) indicates grammars that can determine actions without any lookahead: there are no reduce-reduce and shift-reduce conflicts when using **only the symbols in the stack**.
represent shift-reduce parsing using an **NFA**, whose states are production with separator '`.`' on RHS.
for eample, a production `T -> (E)` has four states: `T -> .(E)`, `T -> (.E)`, `T -> (E.)`, `T -> (E.)`.
before `.` means already in stack, next item means expecting item.
plus an additional dummy production `S' -> S$` for a start and end state.
there are two types of transitions between the stats
- shift transition
- $\epsilon$ transition:
example:
```
S -> (L) | id
L -> LS | L,S
```
```
S' -> S$
S -> (L) | id
L -> S | L,S
```
It can be represented as a NFA:
```python {cmd matplotlib hide}
import sys
import pymupdf
from PIL import Image
doc = pymupdf.open("../pdf/L4.pdf")
pix = doc[22].get_pixmap(dpi=360)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
plt.imshow(img)
plt.axis('off')
plt.tight_layout()
plt.show()
```
* SLR(1) Parsing
* LR(1) Grammar
```python {cmd matplotlib hide}
import sys
import pymupdf
from PIL import Image
doc = pymupdf.open("../pdf/L4.pdf")
pix = doc[47].get_pixmap(dpi=360)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
plt.imshow(img)
plt.axis('off')
plt.tight_layout()
plt.show()
```
LR(1) Parsing Table
is same as LR(0) parsing table construction except for reductions:
* LALR(1) Grammar
LALR(1) generally has the same number of states as SLR (much less than LR(1))
for Pascal language, SLR requires several hundred states, LR(1) requires several thousand states.
#### Ambiguous Grammar
Ambiguity is mainly from
* Precedence
* The production at higher levels will have operators with lower priorities (and vice versa).
* we can insert non-terminals to enforce precendence.
* Associativity
* we should determine where to place recursion depending on the associativity
for example: `if-then-else`
**Automatic Disambiguation**
We can define precedence to use ambiguous grammars w/o shift-reduce conflicts.
## AST
### AST Construction LL
```c
expr parse_S() {
switch(token) {
case num:
case '(':
expr child1 = parse_E();
expr child2 = parse_Sp();
return new S(child1, child2);
default:
parseError();
}
} }
``` ```
$$\frac{O[T1/i] \vdash \text{exp}: T2}{O\vdash \texttt{for}(i: T1) \set{\text{exp}: T2}}$$
### AST Construction LR Complex Example of Class Attrs
$$O_C(x) = T$$
* forall attrs $x: T$ in class C
$$O_C$$
Complex Example of Class Methods
### Subtyping

92
notes/5.md Normal file
View File

@@ -0,0 +1,92 @@
# Semantic Analysis
* after passing the lexical and syntax analysis, there are still errors. so correcting usage of variables, objects and function... are needed.
**Semantic Analysis** ensures the program satisfies a set of rules regarding the usage of programming constructs:
* identifiers declared before used
* types
* inheritance relationships
* single definition
There are two main categories of semantic analysis:
* Scopes
* Types
## Scope
**Lexical Scope**: textual region in the program
**Symbol Tables**
Symantic checks refer to properties of identifier in the program; it need an environment to store identifier info: **symbol table**.
| name | kind | type |
| ---- | ---- | ------------ |
| foo | func | `int -> int` |
| m | arg | `int` |
| n | arg | `int` |
| tmp | var | char |
Each scope has symbol tables.
And program has hierachy of symbol tables(scope).
if the identifier is used traverse the hierachy of symbol tables upward until finding the identifier with the same name to determine the declaration from the current scope.
### Build a Symbol Table
there are five operations:
* insert scope
* exit scope
* find symbol(x)
* add symbol(x)
* check scope(x)
## Type
* Type checking
* Type inferencing
Three Language Types:
* Statically typed
* Dynamically typed
* Untyped(machine code)
**Static Type Checking**
Does not require additional type checking instructions at runtime.
and guarantees that the executions are safe at compile time.
modern languages require both static and dynamic type checking(union, void ptr)
So what is types?
A type indicates a description of a set of values and a set of allowed operations on those values.
* Type Expressions: Describe the possible typse in the program, e.g., `int`, `char*`, `array[]`, `object`.
* Type System: Defines types for language constructs (think about nodes in AST)
### Type Comparision Implementation
1. Implement a method `Equals(T1, T2)`
* must compare type trees of `T1` and `T2`
2. Use unique objects for each distinct type
* each type expression resolved to same type object everywhere
* faster type comparision(use `==`)
* object-oriendted: check subtyping of type objects
for option 1
### Type Checking Methodology
### Inferecnce Rules
### Soundness
$$\frac{i\text{ is an integer literal}}{\vdash i: \texttt{int}}[\texttt{int}]$$
Some rules are sound but not necessary for a language: (not giving meanings)
$$\frac{i\text{ is an integer literal}}{\vdash i \text{ is object} }$$
$$\frac{e_1: \texttt{bool} \quad e_2: T}{\vdash \texttt{while} (e_1) \{ e_2 \} : \texttt{void}}[\texttt{while}]$$

68
notes/6.md Normal file
View File

@@ -0,0 +1,68 @@
# Code Generation 1
## Organization of Storage
* Code Area
* Static Area
* Stack Area
* Heap Area
### Stack Management using Register File
* Stack pointer (SP): points to the top of the frame
* Frame Pointer (FP, BP): points to the frame base
because stack frame size is not always known at compile time.
### Saving Registers
The callee may overwrite useful values in the registers.
So caller's useful values should be stored in stack when function-call (`push`), and restored when callee return (`pop`). This role is done by both callee and caller (`caller-saved`, `callee-saved`).
* Caller-saved registers
used to hold **temporary quantities** that need not be preserved across calls.
* Callee-saved registers
used to hold **long-lived values** that should be preserved across calls.
So Code before call:
* push the params.
* push caller-saved registers
* push return address (curr `PC`) and jump to callee code
Prologue of callee
* push dynamic link (`FP`)
* old stack pointer becomes new frame pointer
* push callee-saved registers
* push local var
Epilogue
* pop callee-saved registers
* store return value at appropriate place
* restore old stack pointer
* pop old frame pointer
* pop return addr and jump to that
Code after call
* pop caller-saved registers
* use return value from appropriate place
When accessing stack variable, use offsets from `FP`.
## Global Variables or Static Variables
Global variables have same references for all scope. They are assigned a **fixed address once** (**statically allocated**).
`Code > Static Data > Stack`
## Dynamic Allocation
The dynamically allocated value outlives the procedure creating it (unless deleted).
`Code > Static Data > Stack -> <- Heap`

96
notes/7.md Normal file
View File

@@ -0,0 +1,96 @@
# Code Generation 2
## Stack Machine
Consider two instructions
* `push i`
* `add i`
It is not efficient because all stack is considered as memory (which is slower than register).
### Utilizing Register Files
Keep the top of the stack in a register, so `add` requires only a single memory access.
* `acc <- i`
* `push acc`
* `pop`
* `add`
### Code Generation From Stack Machine
Assume that stack grows towards lower addresses.
## MIPS
32 regs
`$sp`, `$a0`, `$t1`
* `lw`
* `add`
* `sw`
* `addi`
* `li`
* `mv`
Converting Stack to MIPS ISA
* `acc <- i`
* `li $a0 i`
### Optimizing
no
## Branch
`beq $1 $2 lbl`
`b lbl`
## Function
At Caller Side:
1. saves the `$fp` to the stack
2. saves the actual params in reverse order
3. saves the return address to `$ra`
At Callee Side:
1. set `$fp` to `$sp`
2. the callee saves the return address
3. ...
## Temp Var
Many various intermediate vars should be stored in the AR. But compiler can statically know how many temporary variables there are.
Let $NT(e)$ is defined recursively by:
$$NT(e1 + e2) = \max (NT(e1), NT(e2) + 1)$$
for example:
### CG using NT
add new args to the `cgen(e, nt)`
reduce number of decrease `$sp` (`addi $sp $sp -4`)
## Intermediate Representation (IR)
Before:
Each Languages need respective, different Optimizer.
Now:
Common IR optimizer.
IR is language-independent and machine-independent optimization.
### High-Level Assembly
It uses unlimited number of registers.
It uses assembly-like control structures (jmp and lbl).
opcodes but some are higher level
igen(e, t)

99
notes/8.md Normal file
View File

@@ -0,0 +1,99 @@
# Dataflow Analysis
Optimization means improving resource utilization not changing what the program computes.
Resource utilization means many things:
* **Execution time**
* Code size
* Network messages sent.
## Basic Block (BB)
A BB is a maximum sequence of instructions with **no labels**, **no jumps**
All instructions in a BB has fixed control flow.
## Control Flow Graph
```mermaid
flowchart TD
Entry --> A
A[BB1] --> B[BB2]
A --> C[BB3]
B --> D[BB4]
C --> D
D --> E[BB5]
E --> G[BB7]
E --> F[BB6]
G --> Exit
F --> Exit
```
## Local Optimization
### Algebraic Simplification
x := x + 0 -> x := 0
y := y ** 2 -> y := y * y
x := x * 8 -> x := x << 3
x := x * 15 -> t := x << 4; x := t - x
### Constant Folding
x := 2 + 2 -> x := 4
if 2 < 0 jump L -> nop
if 2 > 0 jump L -> jump L
But Constant folding can be dangerous on cross-compilation (in precision).
### Unreachable Code
### Dead Code Elimination
## Global Optimization
It is not actually global but in control graph.
In Basic Block, there are a few instructions (4-8). So only local optimization, it is not quite optimizable. There are many cases where the optimization can be performed through the entire CFG.
## Dataflow Analysis
* Local Dataflow Analysis
* Global Dataflow Analysis
Analysis of Reaching Definition
Effect of an Instruction
`IN[b]` and `OUT[b]`
Meet Operator
`IN[b] = union(OUT[p1]...OUT[pn])`
```c
// init
OUT[entry] = {}
```
## Liveness Analysis
Liveness is the concept the variable is used in the future. It helps **eliminating dead code**.
Transfer function
* `USE[b]` set of variables used in `b`
* `DEF[b]` set of variables defined in `b`
so transfer function `f_b` for a basic block b:
```IN[b] = USE[b] + (OUT[b] - DEF[b])```
for reaching defintion
```OUT[b] = union(INs)```
For supporting cyclic graphs, repeated computation is needed.

28
notes/9.md Normal file
View File

@@ -0,0 +1,28 @@
# Control Flow Analysis
Dominator
for a given CFG **a node `x` dominates a node `y`** if every path from the Entry block to `y` contains `x`.
* Each BB dominates itself
* If `x dom y` and `y dom z` then `x dom z`
* If `x dom z` and `y dom z` then either `x dom y` or `y dom x`
Dominator Tree
* initial node is a root
* `x dom y` means that "`y` is a child of `x`"
Natural Loops
How to Find Natural Loop?: Introduction Backedge
Important concepts in a loop
* Header and Loop BB
* Back Edges
* Exit Edges
* Preheader (Preloop)
Loop-Invariant Computation

379
out/lex.md Normal file
View File

@@ -0,0 +1,379 @@
# Lexical Analysis(Scanner) Report
* 주하진, 2024062806
## Compilation Environment and Method
주어진 `Makefile`을 이용하여 C파일을 컴파일함.
C파일은 `gcc`를 이용해서 컴파일한다.
`Makefile`에서 산출되는 실행파일은 `cminus_cimpl``cminus_lex`가 있으며 각각 `main.c util.c scan.c`, `main.c util.c lex.yy.c`를 컴파일한 오브젝트 파일을 필요로 한다.
`lex.yy.c``flex -o lex.yy.c cminus.l`을 통해 생성된다.
## C-Minus Language
C-Minus에서 필요한 토큰타입변수와 그에 대한 설명은 다음과 같다.
**특수 토큰**
* `ENDFILE`: 파일끝
* `ERROR`: 에러
**키워드 토큰**
* `IF`: `if`
* `THEN`: `then`
* `ELSE`: `else`
* `WHILE`: `while`
* `RETURN`: `return`
* `INT`: `int`
* `VOID`: `void`
**가변길이 토큰**
* `ID`: 식별자
* `NUM`: 숫자
**기호 토큰**
* `ASSIGN`: `=`
* `EQ`: `==`
* `NE`: `!=`
* `LT`: `<`
* `LE`: `<=`
* `GT`: `>`
* `GE`: `>=`
* `PLUS`: `+`
* `MINUS`: `-`
* `TIMES`: `*`
* `OVER`: `/`
* `LPAREN`: `(`
* `RPAREN`: `)`
* `LBRACE`: `[`
* `RBRACE`: `]`
* `LCURLY`: `{`
* `RCURLY`: `}`
* `SEMI`: `;`
* `COMMA`: `,`
**토큰에 포함되지 않는 스펙**
* `/*` - `*/`: 주석 (토큰에 포함하지 않음)
위와 같은 토큰 타입을 기반으로 토크나이징하는 것이 목적이다.
### Using `scan.c`
`scan.c`에서는 올바른 `getToken`을 작성해야 한다.
`getToken`을 작성하기에 앞서 전이가능한 `STATE`를 작성한다. 특히 `<`, `>`, `!`, `=`, `/`의 경우에는 단 한 글자만 받는게 아니라 그 다음 문자에 따라 산출할 토큰이 달라질 수 있으므로 그에 따른 `STATE`를 만든다.
결과적으로 필요한 STATE는 다음과 같다.
```
START, INOVER, INCOMMENT, ASTERCOMMENT, INASSIGN, INLT, INGT, INNE, INNUM, INID, DONE
```
이를 이용해 `getToken`의 DFA를 작성할 수 있다.
```mermaid
stateDiagram-v2
START
state comment {
INOVER
INCOMMENT
ASTERCOMMENT
}
INASSIGN
INLT
INGT
INNE
state multichar {
INNUM
INID
}
state done {
DONE
}
START --> INNUM: isdigit
INNUM --> INNUM: isdigit
INNUM --> DONE: else with unget
START --> INID: isalpha
INID --> INID: isalnum
INID --> DONE: else with unget
START --> INASSIGN: =
INASSIGN --> DONE: =
INASSIGN --> DONE: else with unget
START --> INLT: \<
INLT --> DONE: =
INLT --> DONE: else with unget
START --> INGT: \>
INGT --> DONE: =
INGT --> DONE: else with unget
START --> INNE: !
INNE --> DONE: =
INNE --> DONE: else with unget and</br> return ERROR
START --> INOVER: /
INOVER --> INCOMMENT: \*
INCOMMENT --> ASTERCOMMENT: \*
ASTERCOMMENT --> INCOMMENT: else
ASTERCOMMENT --> START: /
```
이를 통해 `scan.c`를 작성하면 된다.
이때 `tokenString`은 항상 넣되 (하지만 NUM, ID 토큰에서만 필요함) comment때만 안 넣으면 된다. `unget`할때도 안넣어야 한다.
### Using Lex (cminus.l)
tiny의 lex파일처럼 간단하게 넣고 컴파일하면 된다.
하나 중요한 점은 comment를 구현할 때, `prev``now`를 각 과정에서 계속 업데이트 해가면서 `now == '/' && prev == '*'` 일때까지 계속 `input()`을 받아주면 된다.
## Examples & Result
<table>
<tr>
<th>cminus file</th>
<th>result text file</th>
</tr>
<tr>
<td>
```c { .line-numbers }
/* A program to perform Euclid's
Algorithm to computer gcd */
int gcd (int u, int v)
{
if (v == 0) return u;
else return gcd(v,u-u/v*v);
/* u-u/v*v == u mod v */
}
void main(void)
{
int x; int y;
x = input(); y = input();
output(gcd(x,y));
}
```
</td>
<td>
```scanres
C-MINUS COMPILATION: ./test1.cm
4: reserved word: int
4: ID, name= gcd
4: (
4: reserved word: int
4: ID, name= u
4: ,
4: reserved word: int
4: ID, name= v
4: )
5: {
6: reserved word: if
6: (
6: ID, name= v
6: ==
6: NUM, val= 0
6: )
6: reserved word: return
6: ID, name= u
6: ;
7: reserved word: else
7: reserved word: return
7: ID, name= gcd
7: (
7: ID, name= v
7: ,
7: ID, name= u
7: -
7: ID, name= u
7: /
7: ID, name= v
7: *
7: ID, name= v
7: )
7: ;
9: }
11: reserved word: void
11: ID, name= main
11: (
11: reserved word: void
11: )
12: {
13: reserved word: int
13: ID, name= x
13: ;
13: reserved word: int
13: ID, name= y
13: ;
14: ID, name= x
14: =
14: ID, name= input
14: (
14: )
14: ;
14: ID, name= y
14: =
14: ID, name= input
14: (
14: )
14: ;
15: ID, name= output
15: (
15: ID, name= gcd
15: (
15: ID, name= x
15: ,
15: ID, name= y
15: )
15: )
15: ;
16: }
17: EOF
```
</td>
</tr>
<tr>
<td>
```c {.line-numbers}
void main(void)
{
int i; int x[5];
i = 0;
while( i < 5 )
{
x[i] = input();
i = i + 1;
}
i = 0;
while( i <= 4 )
{
if( x[i] != 0 )
{
output(x[i]);
}
}
}
```
</td>
<td>
```scanres
C-MINUS COMPILATION: ./test2.cm
1: reserved word: void
1: ID, name= main
1: (
1: reserved word: void
1: )
2: {
3: reserved word: int
3: ID, name= i
3: ;
3: reserved word: int
3: ID, name= x
3: [
3: NUM, val= 5
3: ]
3: ;
5: ID, name= i
5: =
5: NUM, val= 0
5: ;
6: reserved word: while
6: (
6: ID, name= i
6: <
6: NUM, val= 5
6: )
7: {
8: ID, name= x
8: [
8: ID, name= i
8: ]
8: =
8: ID, name= input
8: (
8: )
8: ;
10: ID, name= i
10: =
10: ID, name= i
10: +
10: NUM, val= 1
10: ;
11: }
13: ID, name= i
13: =
13: NUM, val= 0
13: ;
14: reserved word: while
14: (
14: ID, name= i
14: <=
14: NUM, val= 4
14: )
15: {
16: reserved word: if
16: (
16: ID, name= x
16: [
16: ID, name= i
16: ]
16: !=
16: NUM, val= 0
16: )
17: {
18: ID, name= output
18: (
18: ID, name= x
18: [
18: ID, name= i
18: ]
18: )
18: ;
19: }
20: }
21: }
22: EOF
```
</td>
</tr>
</table>

BIN
out/lex.pdf Normal file

Binary file not shown.

267
out/parse.md Normal file
View File

@@ -0,0 +1,267 @@
# Syntax Analysis (Parser) Report
* 주하진, 2024062806
## Compilation Environment and Method
주어진 `Makefile`을 이용해 C파일과 `cminus.l`, `cminus.y`를 변환함.
C파일은 `gcc`를 이용하고 `*.l``flex`, `*.y``yacc`을 이용함.
`Makefile`의 빌드 결과물 `cminus_parser`를 만들기 위해서 `main.c`, `util.c`, `lex.yy.o`, `y.tab.o`를 필요로 한다.
## C-Minus Parser Implementation
C-Minus Parser 구현을 위해 다음 세 파일의 큰 수정이 필요했다.
* `globals.h`
* `util.c`, `util.h`
* `cminus.y` (Important)
### `globals.h`
여러개의 Kind Enum을 추가하였다.
* NodeKind(큰 분류)
* StmtKind(Statement의 종류)
* ExpKind(Expression의 종류)
* DeclKind(Declaration의 종류)
* TypeKind(Declaration에서 Type을 구분하기 위해 사용, 실제로 파스트리에 들어가진 않음, var_declaration에서 참조하기 위한 목적.)
**StmtKind**
* IfK: if문
* IterK: while문
* ReturnK: return문
* CompK: 여러개 있는 중괄호(복합) 문
**ExpKind**
* AssignK: 할당문
* OpK: 연산자가 포함된 표현식
* ConstK: 상수
* IdK: 식별자
* ArrIdK: 배열 식별자
* CallK: 함수 호출
**DeclKind**
* FuncK: 함수 선언
* VarK: 변수 선언
* ArrVarK: 배열 변수 선언
* ArrParamK: 배열 매개변수
* NonArrParamK: 매개변수
**TypeKind**
* TypeNameK: 선언의 타입
-----
`TreeNode`를 추가하였다.
```c
typedef struct treeNode {
struct treeNode *child[MAXCHILDREN];
struct treeNode *sibling;
int lineno;
NodeKind nodekind;
union {
StmtKind stmt;
ExpKind exp;
DeclKind decl;
TypeKind type;
} kind;
union {
TokenType op;
int val;
char *name;
} attr;
ExpType type; /* for type checking of exps */
} TreeNode;
```
TreeNode 타입은 ParseTree의 노드를 나타내며, 자식 노드와 형제 노드를 가리키는 포인터 그리고 노드의 kind와 attr, type을 가진다.
### `util.c`, `util.h`
`newStmtNode`, `newExpNode`, `newDeclNode`, `newTypeNode` 함수를 추가 및 수정했다. 각각 Statement, Expression, Declaration, Type 노드를 생성하는 함수이다.
Type을 출력하기 위해 `printType` 함수를 추가하였다.
printTree는 TreeNode를 출력하는 함수이다. nodeKind에 따라 구분하여 출력한다. 이때 type이 필요한 node이면 type도 같이 출력한다.
### `cminus.y`(Important)
cminus.y에서 토큰의 선언은 다음과 같이 했다.
```yacc
%token IF ELSE WHILE RETURN INT VOID
%token EQ NE LT LE GT GE LPAREN RPAREN LBRACE LCURLY RBRACE RCURLY SEMI
%token ID NUM
%left PLUS MINUS
%left TIMES OVER
%right ASSIGN
%nonassoc THEN
%nonassoc ELSE
%token ERROR
```
나머지 부분은 제공된 grammar와 tiny.y의 많은 부분을 참고하여 작성했다.
이때 중요한 부분은 **dangling-else** 부분이다.
```yacc
selection_stmt : IF LPAREN expression RPAREN statement %prec THEN {
...
} | IF LPAREN expression RPAREN statement ELSE statement {
...
};
```
`single-if`문의 우선순위를 `ELSE`보다 낮은 `THEN`으로 지정하여 Shift/Reduce Conflict를 해결했다.
## Results
다음은 테스트 C-Minus 프로그램과 그에 대한 파스트리 출력 결과이다.
<table>
<tr>
<th>C-Minus Test Program</th>
<th>Parse Tree Output</th>
</tr><tr>
<td>
```c
void main(void)
{
int i; int x[5];
i = 0;
while( i < 5 )
{
x[i] = input();
i = i + 1;
}
i = 0;
while( i <= 4 )
{
if( x[i] != 0 )
{
output(x[i]);
}
}
}
```
</td><td>
```txt
C-MINUS COMPILATION: test.2.txt
Syntax tree:
Function Declaration: name = main, return type = void
Void Parameter
Compound Statement:
Variable Declaration: name = i, type = int
Variable Declaration: name = x, type = int[]
Const: 5
Assign:
Variable: name = i
Const: 0
While Statement:
Op: <
Variable: name = i
Const: 5
Compound Statement:
Assign:
Variable: name = x
Variable: name = i
Call: function name = input
Assign:
Variable: name = i
Op: +
Variable: name = i
Const: 1
Assign:
Variable: name = i
Const: 0
While Statement:
Op: <=
Variable: name = i
Const: 4
Compound Statement:
If Statement:
Op: !=
Variable: name = x
Variable: name = i
Const: 0
Compound Statement:
Call: function name = output
Variable: name = x
Variable: name = i
```
</td></tr>
<td><tr>
<td>
```c
int main(void){
int a;
int b;
a = (b = 4) + 3;
if(a==b+(c*b+d))
while(1)
if(1)
a=2;
else a=3;
}
```
</td><td>
```txt
C-MINUS COMPILATION: test.cm
Syntax tree:
Function Declaration: name = main, return type = int
Void Parameter
Compound Statement:
Variable Declaration: name = a, type = int
Variable Declaration: name = b, type = int
Assign:
Variable: name = a
Op: +
Assign:
Variable: name = b
Const: 4
Const: 3
If Statement:
Op: ==
Variable: name = a
Op: +
Variable: name = b
Op: +
Op: *
Variable: name = c
Variable: name = b
Variable: name = d
While Statement:
Const: 1
If-Else Statement:
Const: 1
Assign:
Variable: name = a
Const: 2
Assign:
Variable: name = a
Const: 3
```
</td>
</tr>
</table>

BIN
out/parse.pdf Normal file

Binary file not shown.

6933
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

5
package.json Normal file
View File

@@ -0,0 +1,5 @@
{
"dependencies": {
"crossnote": "^0.9.15"
}
}

BIN
pdf/L10.pdf Normal file

Binary file not shown.

BIN
pdf/L11.pdf Normal file

Binary file not shown.

BIN
pdf/L12.pdf Normal file

Binary file not shown.

BIN
pdf/L5.pdf Normal file

Binary file not shown.

BIN
pdf/L6.pdf Normal file

Binary file not shown.

BIN
pdf/L7.pdf Normal file

Binary file not shown.

BIN
pdf/L8.pdf Normal file

Binary file not shown.

BIN
pdf/L9.pdf Normal file

Binary file not shown.

BIN
pdf/P3.pdf Normal file

Binary file not shown.

BIN
pdf/P4.pdf Normal file

Binary file not shown.

BIN
src/2024062806.docx Normal file

Binary file not shown.

BIN
src/2024062806.pdf Normal file

Binary file not shown.

View File

@@ -1,37 +1,46 @@
# Makefile for C-Minus Scanner # Makefile for C-Minus
# ./lex/tiny.l --> ./cminus.l #
# ./lex/tiny.l --> ./cminus.l (from Project 1)
# ./yacc/tiny.y --> ./cminus.y (from Project 2)
# ./yacc/globals.h --> ./globals.h (from Project 2)
CC = gcc CC = gcc
CFLAGS = -W -Wall CFLAGS = -W -Wall -g
OBJS = main.o util.o scan.o OBJS = main.o util.o lex.yy.o y.tab.o symtab.o analyze.o
OBJS_LEX = main.o util.o lex.yy.o
.PHONY: all clean .PHONY: all clean
all: cminus_cimpl cminus_lex all: cminus_semantic
clean: clean:
-rm -vf cminus_cimpl cminus_lex *.o lex.yy.c rm -vf cminus_semantic *.o lex.yy.c y.tab.c y.tab.h y.output
cminus_cimpl: $(OBJS) cminus_semantic: $(OBJS)
$(CC) $(CFLAGS) -o $@ $(OBJS) $(CC) $(CFLAGS) $(OBJS) -o $@ -lfl
cminus_lex: $(OBJS_LEX) main.o: main.c globals.h util.h scan.h parse.h y.tab.h analyze.h
$(CC) $(CFLAGS) -o $@ $(OBJS_LEX) -lfl $(CC) $(CFLAGS) -c main.c
main.o: main.c globals.h util.h scan.h util.o: util.c util.h globals.h y.tab.h
$(CC) $(CFLAGS) -c -o $@ $< $(CC) $(CFLAGS) -c util.c
scan.o: scan.c globals.h util.h scan.h lex.yy.o: lex.yy.c scan.h globals.h y.tab.h util.h
$(CC) $(CFLAGS) -c -o $@ $< $(CC) $(CFLAGS) -c lex.yy.c
util.o: util.c globals.h util.h
$(CC) $(CFLAGS) -c -o $@ $<
lex.yy.o: lex.yy.c globals.h util.h scan.h
$(CC) $(CFLAGS) -c -o $@ $<
lex.yy.c: cminus.l lex.yy.c: cminus.l
flex -o $@ $< flex cminus.l
y.tab.h: y.tab.c
y.tab.o: y.tab.c parse.h
$(CC) $(CFLAGS) -c y.tab.c
y.tab.c: cminus.y
yacc -d -v cminus.y
analyze.o: analyze.c analyze.h globals.h y.tab.h symtab.h util.h
$(CC) $(CFLAGS) -c analyze.c
symtab.o: symtab.c symtab.h
$(CC) $(CFLAGS) -c symtab.c

View File

@@ -1,159 +1,416 @@
/****************************************************/ /****************************************************/
/* File: analyze.c */ /* File: analyze.c */
/* Semantic analyzer implementation */ /* Semantic analyzer implementation */
/* for the TINY compiler */ /* for the CMinus compiler */
/* Compiler Construction: Principles and Practice */ /* Yenru0 */
/* Kenneth C. Louden */
/****************************************************/ /****************************************************/
#include "analyze.h"
#include "globals.h" #include "globals.h"
#include "symtab.h" #include "symtab.h"
#include "analyze.h"
/* counter for variable memory locations */
static int location = 0; static BucketList func_entry = NULL;
static Scope func_scope;
static TreeNode *func_params[MAX_PARAM_COUNT];
static int func_param_count = 0;
void gen_random_hex_16(char *buffer) {
char *hex_chars = "0123456789abcdef";
for (int i = 0; i < 16; i++) {
buffer[i] = hex_chars[rand() % 16];
}
buffer[16] = '\0';
}
/* Procedure traverse is a generic recursive /* Procedure traverse is a generic recursive
* syntax tree traversal routine: * syntax tree traversal routine:
* it applies preProc in preorder and postProc * it applies preProc in preorder and postProc
* in postorder to tree pointed to by t * in postorder to tree pointed to by t
*/ */
static void traverse( TreeNode * t, static void traverse(TreeNode *t,
void (* preProc) (TreeNode *), void (*preProc)(TreeNode *),
void (* postProc) (TreeNode *) ) void (*postProc)(TreeNode *)) {
{ if (t != NULL) if (t != NULL) {
{ preProc(t); preProc(t);
{ int i; {
for (i=0; i < MAXCHILDREN; i++) int i;
traverse(t->child[i],preProc,postProc); for (i = 0; i < MAXCHILDREN; i++)
traverse(t->child[i], preProc, postProc);
}
postProc(t);
traverse(t->sibling, preProc, postProc);
} }
postProc(t);
traverse(t->sibling,preProc,postProc);
}
} }
/* nullProc is a do-nothing procedure to /* nullProc is a do-nothing procedure to
* generate preorder-only or postorder-only * generate preorder-only or postorder-only
* traversals from traverse * traversals from traverse
*/ */
static void nullProc(TreeNode * t) static void nullProc(TreeNode *t) {
{ if (t==NULL) return; if (t == NULL) return;
else return; else
return;
} }
/* Procedure insertNode inserts /* Procedure insertNode inserts
* identifiers stored in t into * identifiers stored in t into
* the symbol table * the symbol table
*/ */
static void insertNode( TreeNode * t) static void insertNode(TreeNode *t) {
{ switch (t->nodekind) switch (t->nodekind) {
{ case StmtK: case ExpK:
switch (t->kind.stmt) switch (t->kind.exp) {
{ case AssignK: case IdK:
case ReadK: case ArrIdK: {
if (st_lookup(t->attr.name) == -1) BucketList entry = st_lookup(t->attr.name);
/* not yet in table, so treat as new definition */ if (entry == NULL) {
st_insert(t->attr.name,t->lineno,location++); fprintf(listing, "Error: undeclared variable \"%s\" is used at line %d\n", t->attr.name, t->lineno);
else Error = TRUE;
/* already in table, so ignore location,
add line number of use only */ entry = st_try_insert(t->attr.name, SymbolVar, Undetermined, t->lineno);
st_insert(t->attr.name,t->lineno,0); } else {
break; st_entry_insert_line(entry, t->lineno);
}
t->scope = curr_scope();
} break;
case CallK: {
BucketList entry = st_lookup(t->attr.name);
if (entry == NULL) {
fprintf(listing, "Error: undeclared function \"%s\" is called at line %d\n", t->attr.name, t->lineno);
Error = TRUE;
entry = st_try_insert(t->attr.name, SymbolFunc, Undetermined, t->lineno);
entry->param_count = -1;
entry->returnType = Undetermined;
} else {
st_entry_insert_line(entry, t->lineno);
}
t->scope = curr_scope();
}
default:
break;
}
break;
case StmtK:
switch (t->kind.stmt) {
case CompK:
if (func_scope != NULL) {
push_scope(func_scope);
func_scope = NULL;
for (int i = 0; i < func_param_count; i++) {
TreeNode *param = func_params[i];
func_entry->param_types[func_entry->param_count] = param->type;
func_entry->param_names[func_entry->param_count] = param->attr.name;
func_entry->param_count++;
BucketList param_entry = st_lookup_current(param->attr.name);
if (param_entry != NULL) {
fprintf(listing, "Error: Symbol \"%s\" is redefined at line %d (already defined at line", func_entry->param_names[i], param->lineno);
LineList lines = param_entry->lines;
while (lines != NULL) {
fprintf(listing, " ");
fprintf(listing, "%d", lines->lineno);
lines = lines->next;
}
fprintf(listing, ")\n");
Error = TRUE;
st_entry_insert_line(param_entry, param->lineno);
} else {
st_try_insert(param->attr.name, SymbolParam, param->type, param->lineno);
}
}
func_entry = NULL;
func_scope = NULL;
func_param_count = 0;
} else {
char *parent_scope_name = curr_scope()->name;
char *new_name = (char *) malloc(255);
snprintf(new_name, 255, "%s.%d", parent_scope_name, curr_scope()->child_count);
push_scope(scope_new(new_name));
}
break;
default:
break;
}
break;
case DeclK:
switch (t->kind.decl) {
case FuncK: {
BucketList entry = st_lookup(t->attr.name);
if (entry != NULL) {
fprintf(listing, "Error: Symbol \"%s\" is redefined at line %d (already defined at line", t->attr.name, t->lineno);
LineList lines = entry->lines;
while (lines != NULL) {
fprintf(listing, " ");
fprintf(listing, "%d", lines->lineno);
lines = lines->next;
}
st_entry_insert_line(entry, t->lineno);
fprintf(listing, ")\n");
Error = TRUE;
char* random_name = (char *) calloc(1, 20);
gen_random_hex_16(random_name);
func_entry = st_try_insert(random_name, SymbolFunc, t->type, t->lineno);
t->scope = curr_scope();
func_scope = scope_new(random_name);
} else {
func_entry = st_try_insert(t->attr.name, SymbolFunc, t->type, t->lineno);
t->scope = curr_scope();
func_scope = scope_new(t->attr.name);
}
} break;
case ArrParamK:
case NonArrParamK: {
if (func_scope != NULL) {
if (t->type == Void) {
if (t->attr.name != NULL) {
fprintf(listing, "Error: The void-type variable is declared at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
} else {
goto insert_param_exit;
}
}
func_params[func_param_count++] = t;
}
insert_param_exit:
} break;
case VarK:
case ArrVarK: {
BucketList entry = st_lookup_current(t->attr.name);
if (entry != NULL) {// ignore
fprintf(listing, "Error: Symbol \"%s\" is redefined at line %d (already defined at line", t->attr.name, t->lineno);
LineList lines = entry->lines;
while (lines != NULL) {
fprintf(listing, " ");
fprintf(listing, "%d", lines->lineno);
lines = lines->next;
}
fprintf(listing, ")\n");
st_entry_insert_line(entry, t->lineno);
Error = TRUE;
} else {
if (t->type == Void) {
fprintf(listing, "Error: The void-type variable is declared at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
}
t->scope = curr_scope();
st_try_insert(t->attr.name, SymbolVar, t->type, t->lineno);
}
} break;
default:
break;
}
break;
default: default:
break; break;
} }
break; }
case ExpK:
switch (t->kind.exp) static void afterNode(TreeNode *t) {
{ case IdK: if (t->nodekind == StmtK && t->kind.stmt == CompK) {
if (st_lookup(t->attr.name) == -1) pop_scope();
/* not yet in table, so treat as new definition */ }
st_insert(t->attr.name,t->lineno,location++);
else
/* already in table, so ignore location,
add line number of use only */
st_insert(t->attr.name,t->lineno,0);
break;
default:
break;
}
break;
default:
break;
}
} }
/* Function buildSymtab constructs the symbol /* Function buildSymtab constructs the symbol
* table by preorder traversal of the syntax tree * table by preorder traversal of the syntax tree
*/ */
void buildSymtab(TreeNode * syntaxTree) void buildSymtab(TreeNode *syntaxTree) {
{ traverse(syntaxTree,insertNode,nullProc); st_init();
if (TraceAnalyze) BucketList entry;
{ fprintf(listing,"\nSymbol table:\n\n"); entry = st_try_insert("input", SymbolFunc, Integer, 0);
printSymTab(listing); entry->param_count = 0;
} // entry->returnType = Integer; /* not know */
entry = st_try_insert("output", SymbolFunc, Void, 0);
entry->returnType = Void;
entry->param_types[0] = Integer;
entry->param_names[0] = "value";
entry->param_count = 1;
push_scope(scope_new("output"));
st_try_insert("value", SymbolParam, Integer, 0);
pop_scope();
traverse(syntaxTree, insertNode, afterNode);
if (TraceAnalyze) {
printSymTab(listing);
}
} }
static void typeError(TreeNode * t, char * message) static void typeError(TreeNode *t, char *message) {
{ fprintf(listing,"Type error at line %d: %s\n",t->lineno,message); fprintf(listing, "Type error at line %d: %s\n", t->lineno, message);
Error = TRUE; Error = TRUE;
} }
/* Procedure checkNode performs static void beforeCheckNode(TreeNode *t) {
* type checking at a single tree node if (t->nodekind == DeclK && t->kind.decl == FuncK) {
*/ func_entry = st_lookup(t->attr.name);
static void checkNode(TreeNode * t) }
{ switch (t->nodekind) }
{ case ExpK: static void checkNode(TreeNode *t) {
switch (t->kind.exp) switch (t->nodekind) {
{ case OpK: case ExpK:
if ((t->child[0]->type != Integer) || switch (t->kind.exp) {
(t->child[1]->type != Integer)) case OpK: {
typeError(t,"Op applied to non-integer"); TreeNode *left = t->child[0];
if ((t->attr.op == EQ) || (t->attr.op == LT)) TreeNode *right = t->child[1];
t->type = Boolean; if (left->type != Integer || right->type != Integer) {
else fprintf(listing, "Error: invalid operation at line %d\n", t->lineno);
t->type = Integer; Error = TRUE;
break; t->type = Undetermined;
case ConstK: } else {
case IdK: t->type = Integer;
t->type = Integer; }
break; } break;
default: case ConstK:
break; t->type = Integer;
} break;
break; case IdK: {
case StmtK: BucketList entry = st_lookup_from(t->attr.name, t->scope);
switch (t->kind.stmt) t->type = entry->type;
{ case IfK: } break;
if (t->child[0]->type == Integer) case ArrIdK: {
typeError(t->child[0],"if test is not Boolean"); BucketList entry = st_lookup_from(t->attr.name, t->scope);
break; if (entry->type != IntegerArray) {
case AssignK: fprintf(listing, "Error: Invalid array indexing at line %d (name : \"%s\"). indexing can only be allowed for int[] variables\n", t->lineno, t->attr.name);
if (t->child[0]->type != Integer) Error = TRUE;
typeError(t->child[0],"assignment of non-integer value"); }
break; if (t->child[0]->type != Integer) {
case WriteK: fprintf(listing, "Error: Invalid array indexing at line %d (name : \"%s\"). indicies should be integer\n", t->lineno, t->attr.name);
if (t->child[0]->type != Integer) Error = TRUE;
typeError(t->child[0],"write of non-integer value"); }
break; t->type = Integer;
case RepeatK: } break;
if (t->child[1]->type == Integer)
typeError(t->child[1],"repeat test is not Boolean");
break;
default:
break;
}
break;
default:
break;
} case AssignK: {
TreeNode *left = t->child[0];
TreeNode *right = t->child[1];
if (left->type == Integer && right->type == Integer) {
} else if (left->type == IntegerArray && right->type == IntegerArray) {
} else {
fprintf(listing, "Error: invalid assignment at line %d\n", t->lineno);
Error = TRUE;
}
t->type = right->type;
} break;
case CallK: {
BucketList entry = st_lookup_from(t->attr.name, t->scope);// not null
if (entry->symbolKind != SymbolFunc) {
fprintf(listing, "Error: Invalid function call at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
goto check_callk_after;
}
TreeNode *arg = t->child[0];
int i = 0;// 파라미터 인덱스
if (entry->param_count == -1) {
fprintf(listing, "Error: Invalid function call at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
goto check_callk_after;
}
while (arg != NULL && i < entry->param_count) {
if (arg->type != entry->param_types[i]) {
fprintf(listing, "Error: Invalid function call at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
goto check_callk_after;
}
arg = arg->sibling;
i++;
}
if (arg != NULL) {
fprintf(listing, "Error: Invalid function call at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
} else if (i < entry->param_count) {
fprintf(listing, "Error: Invalid function call at line %d (name : \"%s\")\n", t->lineno, t->attr.name);
Error = TRUE;
}
check_callk_after:
t->type = entry->returnType;
} break;
default:
break;
}
break;
case StmtK:
switch (t->kind.stmt) {
case ReturnK: {
if (func_entry == NULL) {
fprintf(listing, "Error: Invalid return at line %d\n", t->lineno);
Error = TRUE;
}
TreeNode *retval = t->child[0]; /* nullalbe */
if (func_entry->returnType == Void) {
if (retval != NULL) {
fprintf(listing, "Error: Invalid return at line %d\n", t->lineno);
Error = TRUE;
}
} else {
if (retval == NULL) {
fprintf(listing, "Error: Invalid return at line %d\n", t->lineno);
Error = TRUE;
} else if (retval->type != func_entry->returnType) {
fprintf(listing, "Error: Invalid return at line %d\n", t->lineno);
Error = TRUE;
}
}
} break;
case IterK: {
TreeNode *condition = t->child[0];
if (condition->type != Integer) {
fprintf(listing, "Error: invalid condition at line %d\n", t->child[0]->lineno);
Error = TRUE;
}
} break;
case IfK: {
TreeNode *condition = t->child[0];
if (condition->type != Integer) {
fprintf(listing, "Error: invalid condition at line %d\n", t->child[0]->lineno);
Error = TRUE;
}
} break;
default:
break;
}
break;
case DeclK:
switch (t->kind.decl) {
case FuncK:
func_entry = NULL;
break;
default:
break;
}
default:
break;
}
} }
/* Procedure typeCheck performs type checking /* Procedure typeCheck performs type checking
* by a postorder syntax tree traversal * by a postorder syntax tree traversal
*/ */
void typeCheck(TreeNode * syntaxTree) void typeCheck(TreeNode *syntaxTree) {
{ traverse(syntaxTree,nullProc,checkNode); traverse(syntaxTree, beforeCheckNode, checkNode);
} }

View File

@@ -16,7 +16,7 @@ char tokenString[MAXTOKENLEN+1];
digit [0-9] digit [0-9]
number {digit}+ number {digit}+
letter [a-zA-Z] letter [a-zA-Z]
identifier {letter}+ identifier {letter}({letter}|{digit})*
newline \n newline \n
whitespace [ \t]+ whitespace [ \t]+
@@ -55,7 +55,7 @@ whitespace [ \t]+
"/*" { char now, prev; "/*" { char now, prev;
do do
{ now = input(); { now = input();
if (now == EOF) break; if (now == 0) break;
else if (now == '\n') lineno++; else if (now == '\n') lineno++;
else if (now == '/' && prev == '*') break; else if (now == '/' && prev == '*') break;
prev = now; prev = now;

344
src/cminus.y Normal file
View File

@@ -0,0 +1,344 @@
/****************************************************/
/* File: tiny.y */
/* The TINY Yacc/Bison specification file */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
%{
#define YYPARSER /* distinguishes Yacc output from other code files */
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"
#define YYSTYPE TreeNode *
static char * savedName; /* for use in assignments */
static int savedNumber;
static int savedLineNo; /* ditto */
static TreeNode * savedTree; /* stores syntax tree for later return */
static int yylex(void); // added 11/2/11 to ensure no conflict with lex
int yyerror(char * message);
%}
%token IF ELSE WHILE RETURN INT VOID
%token EQ NE LT LE GT GE LPAREN RPAREN LBRACE LCURLY RBRACE RCURLY SEMI COMMA
%token ID NUM
%left PLUS MINUS
%left TIMES OVER
%right ASSIGN
%nonassoc THEN
%nonassoc ELSE
%token ERROR
%% /* Grammar for C-MINUS */
program : declaration_list
{savedTree = $1;};
declaration_list : declaration_list declaration {
YYSTYPE t = $1;
if (t != NULL) {
while (t->sibling != NULL) {
t = t->sibling;
}
t->sibling = $2;
$$ = $1;
} else { $$ = $2; };
}
| declaration { $$ = $1; }
;
declaration : var_declaration {$$ = $1; } | func_declaration { $$ = $1; };
name_specifier : ID {
savedName = copyString(tokenString);
savedLineNo = lineno;
};
number_specifier : NUM {
savedNumber = atoi(tokenString);
savedLineNo = lineno;
};
var_declaration : type_specifier name_specifier SEMI {
$$ = newDeclNode(VarK);
$$->lineno = savedLineNo;
$$->attr.name = savedName;
$$->type = $1->type;
free($1);
} | type_specifier name_specifier LBRACE number_specifier RBRACE SEMI {
$$ = newDeclNode(ArrVarK);
$$->lineno = savedLineNo;
if ($1->type == Integer)
$$->type = IntegerArray;
else
$$->type = Void;
$$->attr.name = savedName;
free($1);
$$->child[0] = newExpNode(ConstK);
$$->child[0]->type = Integer;
$$->child[0]->attr.val = savedNumber;
};
type_specifier : INT {
$$ = newTypeNode(TypeNameK);
$$->type = Integer;
}
| VOID {
$$ = newTypeNode(TypeNameK);
$$->type = Void;
};
func_declaration : type_specifier name_specifier {
$$ = newDeclNode(FuncK);
$$->lineno = savedLineNo;
$$->attr.name = savedName;
$$->type = $1->type; /* 타입 바로 복사 */
} LPAREN params RPAREN compound_stmt {
$$ = $3;
$$->child[0] = $5; /* params */
$$->child[1] = $7; /* compound_stmt */
};
params : param_list { $$ = $1; } | VOID {
$$ = newDeclNode(NonArrParamK);
$$->type = Void;
};
param_list : param_list COMMA param {
YYSTYPE t = $1;
if (t != NULL) {
while (t->sibling != NULL) {
t = t->sibling;
}
t->sibling = $3;
$$ = $1;
} else {
$$ = $3;
};
} | param {$$ = $1; };
param : type_specifier name_specifier {
$$ = newDeclNode(NonArrParamK);
$$->attr.name = savedName;
$$->type = $1->type;
} | type_specifier name_specifier LBRACE RBRACE {
$$ = newDeclNode(ArrParamK);
$$->attr.name = savedName;
if ($1->type == Integer)
$$->type = IntegerArray;
else
$$->type = Void;
};
compound_stmt : LCURLY local_declarations statement_list RCURLY {
$$ = newStmtNode(CompK);
$$->lineno = lineno;
$$->child[0] = $2;
$$->child[1] = $3;
};
local_declarations : local_declarations var_declaration {
YYSTYPE t = $1;
if (t != NULL) {
while (t->sibling != NULL)
t = t->sibling;
t->sibling = $2;
$$ = $1;
} else $$ = $2;
} | { $$ = NULL; };
statement_list : statement_list statement {
YYSTYPE t = $1;
if (t != NULL) {
while (t->sibling != NULL)
t = t->sibling;
t->sibling = $2;
$$ = $1;
} else $$ = $2;
} | { $$ = NULL; };
statement : expression_stmt { $$ = $1; }
| compound_stmt { $$ = $1; }
| selection_stmt { $$ = $1; }
| iteration_stmt { $$ = $1; }
| return_stmt { $$ = $1; }
;
expression_stmt : expression SEMI { $$ = $1; }
| SEMI { $$ = NULL; }
;
selection_stmt : IF LPAREN expression RPAREN statement %prec THEN {
$$ = newStmtNode(IfK);
$$->lineno = lineno;
$$->child[0] = $3;
$$->child[1] = $5;
} | IF LPAREN expression RPAREN statement ELSE statement {
$$ = newStmtNode(IfK);
$$->lineno = lineno;
$$->child[0] = $3;
$$->child[1] = $5;
$$->child[2] = $7;
};
iteration_stmt : WHILE LPAREN expression RPAREN statement {
$$ = newStmtNode(IterK);
$$->lineno = lineno;
$$->child[0] = $3;
$$->child[1] = $5;
};
return_stmt : RETURN SEMI {
$$ = newStmtNode(ReturnK);
$$->lineno = lineno;
} | RETURN expression SEMI {
$$ = newStmtNode(ReturnK);
$$->lineno = lineno;
$$->child[0] = $2;
};
expression : var ASSIGN expression {
$$ = newExpNode(AssignK);
$$->lineno = lineno;
$$->type = $3->type;
$$->child[0] = $1;
$$->child[1] = $3;
} | simple_expression { $$ = $1; };
var : name_specifier {
$$ = newExpNode(IdK);
$$->attr.name = savedName;
} | name_specifier {
$$ = newExpNode(ArrIdK);
$$->attr.name = savedName;
} LBRACE expression RBRACE {
$$ = $2;
$$->child[0] = $4;
};
simple_expression : additive_expression relop additive_expression {
$$ = $2;
$$->lineno = $2->lineno;
$$->child[0] = $1;
$$->child[1] = $3;
$$->type = Integer;
} | additive_expression { $$ = $1; };
relop : LE {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = LE;
} | LT {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = LT;
} | GT {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = GT;
} | GE {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = GE;
} | EQ {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = EQ;
} | NE {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = NE;
};
additive_expression : additive_expression addop term {
$$ = $2;
$$->lineno = $2->lineno;
$$->child[0] = $1;
$$->child[1] = $3;
} | term { $$ = $1; };
addop : PLUS {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = PLUS;
} | MINUS {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = MINUS;
};
term : term mulop factor {
$$ = $2;
$$->lineno = $2->lineno;
$$->child[0] = $1;
$$->child[1] = $3;
} | factor { $$ = $1; };
mulop : TIMES {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = TIMES;
} | OVER {
$$ = newExpNode(OpK);
$$->lineno = lineno;
$$->attr.op = OVER;
};
factor : LPAREN expression RPAREN { $$ = $2; }
| var { $$ = $1; }
| call { $$ = $1; }
| NUM {
$$ = newExpNode(ConstK);
$$->lineno = lineno;
$$->type = Integer;
$$->attr.val = atoi(tokenString);
}
;
call : name_specifier {
$$ = newExpNode(CallK);
$$->lineno = lineno;
$$->attr.name = savedName;
} LPAREN args RPAREN {
$$ = $2;
$$->child[0] = $4;
};
args : arg_list { $$ = $1; } | { $$ = NULL; } ;
arg_list : arg_list COMMA expression {
YYSTYPE t = $1;
if (t != NULL) {
while (t->sibling != NULL)
t = t->sibling;
t->sibling = $3;
$$ = $1;
} else $$ = $3;
} | expression { $$ = $1; } ;
%%
int yyerror(char * message)
{ fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);
fprintf(listing,"Current token: ");
printToken(yychar,tokenString);
Error = TRUE;
return 0;
}
/* yylex calls getToken to make Yacc/Bison output
* compatible with ealier versions of the TINY scanner
*/
static int yylex(void)
{ return getToken(); }
TreeNode * parse(void)
{ yyparse();
return savedTree;
}

View File

@@ -9,11 +9,14 @@
#ifndef _GLOBALS_H_ #ifndef _GLOBALS_H_
#define _GLOBALS_H_ #define _GLOBALS_H_
#include <ctype.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <ctype.h>
#include <string.h> #include <string.h>
struct Scope;
typedef struct Scope *Scope;
#ifndef FALSE #ifndef FALSE
#define FALSE 0 #define FALSE 0
#endif #endif
@@ -22,23 +25,56 @@
#define TRUE 1 #define TRUE 1
#endif #endif
#ifndef YYPARSER
#include "y.tab.h"
#define ENDFILE 0
#endif
/* MAXRESERVED = the number of reserved words */ /* MAXRESERVED = the number of reserved words */
#define MAXRESERVED 6 #define MAXRESERVED 6
#if 0
typedef enum typedef enum
/* book-keeping tokens */ /* book-keeping tokens */
{ENDFILE,ERROR, { ENDFILE,
/* reserved words */ ERROR,
IF,ELSE,WHILE,RETURN,INT,VOID, /* reserved words */
/* multicharacter tokens */ IF,
ID,NUM, ELSE,
/* special symbols */ WHILE,
ASSIGN,EQ,NE,LT,LE,GT,GE,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,LBRACE,RBRACE,LCURLY,RCURLY,SEMI,COMMA RETURN,
} TokenType; INT,
VOID,
/* multicharacter tokens */
ID,
NUM,
/* special symbols */
ASSIGN,
EQ,
NE,
LT,
LE,
GT,
GE,
PLUS,
MINUS,
TIMES,
OVER,
LPAREN,
RPAREN,
LBRACE,
RBRACE,
LCURLY,
RCURLY,
SEMI,
COMMA
} TokenType;
#endif
extern FILE* source; /* source code text file */ typedef int TokenType;
extern FILE* listing; /* listing output text file */
extern FILE* code; /* code text file for TM simulator */ extern FILE *source; /* source code text file */
extern FILE *listing; /* listing output text file */
extern FILE *code; /* code text file for TM simulator */
extern int lineno; /* source line number for listing */ extern int lineno; /* source line number for listing */
@@ -46,26 +82,61 @@ extern int lineno; /* source line number for listing */
/*********** Syntax tree for parsing ************/ /*********** Syntax tree for parsing ************/
/**************************************************/ /**************************************************/
typedef enum {StmtK,ExpK} NodeKind; typedef enum { StmtK,
typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind; ExpK,
typedef enum {OpK,ConstK,IdK} ExpKind; DeclK,
TypeK } NodeKind;
typedef enum { CompK,
IfK,
IterK, /* WhileK*/
ReturnK } StmtKind;
typedef enum { AssignK,
OpK,
ConstK,
IdK,
ArrIdK,
CallK } ExpKind;
typedef enum { FuncK,
VarK,
ArrVarK,
ArrParamK,
NonArrParamK } DeclKind;
typedef enum { TypeNameK } TypeKind;
/* ExpType is used for type checking */ /* ExpType is used for type checking */
typedef enum {Void,Integer,Boolean} ExpType; typedef enum { Void,
Integer,
IntegerArray,
Undetermined
} ExpType;
typedef enum {
SymbolVar,
SymbolFunc,
SymbolParam
} SymbolKind;
#define MAXCHILDREN 3 #define MAXCHILDREN 3
typedef struct treeNode typedef struct treeNode {
{ struct treeNode * child[MAXCHILDREN]; struct treeNode *child[MAXCHILDREN];
struct treeNode * sibling; struct treeNode *sibling;
int lineno; int lineno;
NodeKind nodekind; NodeKind nodekind;
union { StmtKind stmt; ExpKind exp;} kind; union {
union { TokenType op; StmtKind stmt;
int val; ExpKind exp;
char * name; } attr; DeclKind decl;
ExpType type; /* for type checking of exps */ TypeKind type;
} TreeNode; } kind;
union {
TokenType op;
int val;
char *name;
} attr;
ExpType type; /* for type checking of exps */
Scope scope;
} TreeNode;
/**************************************************/ /**************************************************/
/*********** Flags for tracing ************/ /*********** Flags for tracing ************/

View File

@@ -1,75 +0,0 @@
/****************************************************/
/* File: tiny.l */
/* Lex specification for TINY */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1];
%}
digit [0-9]
number {digit}+
letter [a-zA-Z]
identifier {letter}+
newline \n
whitespace [ \t]+
%%
"if" {return IF;}
"then" {return THEN;}
"else" {return ELSE;}
"end" {return END;}
"repeat" {return REPEAT;}
"until" {return UNTIL;}
"read" {return READ;}
"write" {return WRITE;}
":=" {return ASSIGN;}
"=" {return EQ;}
"<" {return LT;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return TIMES;}
"/" {return OVER;}
"(" {return LPAREN;}
")" {return RPAREN;}
";" {return SEMI;}
{number} {return NUM;}
{identifier} {return ID;}
{newline} {lineno++;}
{whitespace} {/* skip whitespace */}
"{" { char c;
do
{ c = input();
if (c == EOF) break;
if (c == '\n') lineno++;
} while (c != '}');
}
. {return ERROR;}
%%
TokenType getToken(void)
{ static int firstTime = TRUE;
TokenType currentToken;
if (firstTime)
{ firstTime = FALSE;
lineno++;
yyin = source;
yyout = listing;
}
currentToken = yylex();
strncpy(tokenString,yytext,MAXTOKENLEN);
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
}
return currentToken;
}

View File

@@ -8,9 +8,9 @@
#include "globals.h" #include "globals.h"
/* set NO_PARSE to TRUE to get a scanner-only compiler */ /* set NO_PARSE to TRUE to get a scanner-only compiler */
#define NO_PARSE TRUE #define NO_PARSE FALSE
/* set NO_ANALYZE to TRUE to get a parser-only compiler */ /* set NO_ANALYZE to TRUE to get a parser-only compiler */
#define NO_ANALYZE TRUE #define NO_ANALYZE FALSE
/* set NO_CODE to TRUE to get a compiler that does not /* set NO_CODE to TRUE to get a compiler that does not
* generate code * generate code
@@ -38,14 +38,14 @@ FILE *code;
/* allocate and set tracing flags */ /* allocate and set tracing flags */
int EchoSource = FALSE; int EchoSource = FALSE;
int TraceScan = TRUE; int TraceScan = FALSE;
int TraceParse = FALSE; int TraceParse = FALSE;
int TraceAnalyze = FALSE; int TraceAnalyze = FALSE;
int TraceCode = FALSE; int TraceCode = FALSE;
int Error = FALSE; int Error = FALSE;
main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
TreeNode *syntaxTree; TreeNode *syntaxTree;
char pgm[120]; /* source code file name */ char pgm[120]; /* source code file name */
if (argc != 2) { if (argc != 2) {

47
src/res.txt Normal file
View File

@@ -0,0 +1,47 @@
C-MINUS COMPILATION: ./test.cm
Building Symbol Table...
Error: undeclared function "x" is called at line 3
< Symbol Table >
Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers
------------- ----------- ------------- ------------ -------- ------------
main Function int global 2 1
input Function int global 0 0
output Function void global 1 0
value Variable int output 0 0
x Function undetermined main 0 3
< Functions >
Function Name Return Type Parameter Name Parameter Type
------------- ------------- -------------- --------------
main int void
input int void
output void
- - value int
x undetermined undetermined
< Global Symbols >
Symbol Name Symbol Kind Symbol Type
------------- ----------- -------------
main Function int
input Function int
output Function void
< Scopes >
Scope Name Nested Level Symbol Name Symbol Type
------------ ------------ ------------- -----------
output 1 value int
main 1 x void
Checking Types...
Error: Invalid function call at line 3 (name : "x")
Type Checking Finished

View File

@@ -1,73 +0,0 @@
C-MINUS COMPILATION: ./test.1.txt
4: reserved word: int
4: ID, name= gcd
4: (
4: reserved word: int
4: ID, name= u
4: ,
4: reserved word: int
4: ID, name= v
4: )
5: {
6: reserved word: if
6: (
6: ID, name= v
6: ==
6: NUM, val= 0
6: )
6: reserved word: return
6: ID, name= u
6: ;
7: reserved word: else
7: reserved word: return
7: ID, name= gcd
7: (
7: ID, name= v
7: ,
7: ID, name= u
7: -
7: ID, name= u
7: /
7: ID, name= v
7: *
7: ID, name= v
7: )
7: ;
9: }
11: reserved word: void
11: ID, name= main
11: (
11: reserved word: void
11: )
12: {
13: reserved word: int
13: ID, name= x
13: ;
13: reserved word: int
13: ID, name= y
13: ;
14: ID, name= x
14: =
14: ID, name= input
14: (
14: )
14: ;
14: ID, name= y
14: =
14: ID, name= input
14: (
14: )
14: ;
15: ID, name= output
15: (
15: ID, name= gcd
15: (
15: ID, name= x
15: ,
15: ID, name= y
15: )
15: )
15: ;
16: }
17: EOF

View File

@@ -1,77 +0,0 @@
C-MINUS COMPILATION: ./test.2.txt
1: reserved word: void
1: ID, name= main
1: (
1: reserved word: void
1: )
2: {
3: reserved word: int
3: ID, name= i
3: ;
3: reserved word: int
3: ID, name= x
3: [
3: NUM, val= 5
3: ]
3: ;
5: ID, name= i
5: =
5: NUM, val= 0
5: ;
6: reserved word: while
6: (
6: ID, name= i
6: <
6: NUM, val= 5
6: )
7: {
8: ID, name= x
8: [
8: ID, name= i
8: ]
8: =
8: ID, name= input
8: (
8: )
8: ;
10: ID, name= i
10: =
10: ID, name= i
10: +
10: NUM, val= 1
10: ;
11: }
13: ID, name= i
13: =
13: NUM, val= 0
13: ;
14: reserved word: while
14: (
14: ID, name= i
14: <=
14: NUM, val= 4
14: )
15: {
16: reserved word: if
16: (
16: ID, name= x
16: [
16: ID, name= i
16: ]
16: !=
16: NUM, val= 0
16: )
17: {
18: ID, name= output
18: (
18: ID, name= x
18: [
18: ID, name= i
18: ]
18: )
18: ;
19: }
20: }
21: }
22: EOF

View File

@@ -1,73 +0,0 @@
C-MINUS COMPILATION: ./res/test.1.txt
4: reserved word: int
4: ID, name= gcd
4: (
4: reserved word: int
4: ID, name= u
4: ,
4: reserved word: int
4: ID, name= v
4: )
5: {
6: reserved word: if
6: (
6: ID, name= v
6: ==
6: NUM, val= 0
6: )
6: reserved word: return
6: ID, name= u
6: ;
7: reserved word: else
7: reserved word: return
7: ID, name= gcd
7: (
7: ID, name= v
7: ,
7: ID, name= u
7: -
7: ID, name= u
7: /
7: ID, name= v
7: *
7: ID, name= v
7: )
7: ;
9: }
11: reserved word: void
11: ID, name= main
11: (
11: reserved word: void
11: )
12: {
13: reserved word: int
13: ID, name= x
13: ;
13: reserved word: int
13: ID, name= y
13: ;
14: ID, name= x
14: =
14: ID, name= input
14: (
14: )
14: ;
14: ID, name= y
14: =
14: ID, name= input
14: (
14: )
14: ;
15: ID, name= output
15: (
15: ID, name= gcd
15: (
15: ID, name= x
15: ,
15: ID, name= y
15: )
15: )
15: ;
16: }
17: EOF

View File

@@ -1,77 +0,0 @@
C-MINUS COMPILATION: ./res/test.2.txt
1: reserved word: void
1: ID, name= main
1: (
1: reserved word: void
1: )
2: {
3: reserved word: int
3: ID, name= i
3: ;
3: reserved word: int
3: ID, name= x
3: [
3: NUM, val= 5
3: ]
3: ;
5: ID, name= i
5: =
5: NUM, val= 0
5: ;
6: reserved word: while
6: (
6: ID, name= i
6: <
6: NUM, val= 5
6: )
7: {
8: ID, name= x
8: [
8: ID, name= i
8: ]
8: =
8: ID, name= input
8: (
8: )
8: ;
10: ID, name= i
10: =
10: ID, name= i
10: +
10: NUM, val= 1
10: ;
11: }
13: ID, name= i
13: =
13: NUM, val= 0
13: ;
14: reserved word: while
14: (
14: ID, name= i
14: <=
14: NUM, val= 4
14: )
15: {
16: reserved word: if
16: (
16: ID, name= x
16: [
16: ID, name= i
16: ]
16: !=
16: NUM, val= 0
16: )
17: {
18: ID, name= output
18: (
18: ID, name= x
18: [
18: ID, name= i
18: ]
18: )
18: ;
19: }
20: }
21: }
22: EOF

View File

@@ -1,16 +0,0 @@
/* A program to perform Euclid's
Algorithm to compute gcd */
int gcd(int u, int v)
{
if(v == 0) return u;
else return gcd(v, u- u/v * v);
/* hello u-u/v*v == u mod v */
}
void main()
{
int x; int y;
x = input(); y = input();
print(gcd(x, y));
}

56
src/result_1.txt Normal file
View File

@@ -0,0 +1,56 @@
C-MINUS COMPILATION: test.1.txt
Building Symbol Table...
< Symbol Table >
Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers
------------- ----------- ------------- ------------ -------- ------------
main Function void global 3 11
input Function int global 0 0 14 14
output Function void global 1 0 15
gcd Function int global 2 4 7 15
value Variable int output 0 0
u Variable int gcd 0 4 6 7 7
v Variable int gcd 1 4 6 7 7 7
x Variable int main 0 13 14 15
y Variable int main 1 13 14 15
< Functions >
Function Name Return Type Parameter Name Parameter Type
------------- ------------- -------------- --------------
main void void
input int void
output void
- - value int
gcd int
- - u int
- - v int
< Global Symbols >
Symbol Name Symbol Kind Symbol Type
------------- ----------- -------------
main Function void
input Function int
output Function void
gcd Function int
< Scopes >
Scope Name Nested Level Symbol Name Symbol Type
------------ ------------ ------------- -----------
output 1 value int
gcd 1 u int
gcd 1 v int
main 1 x int
main 1 y int
Checking Types...
Type Checking Finished

46
src/result_2.txt Normal file
View File

@@ -0,0 +1,46 @@
C-MINUS COMPILATION: test.2.txt
Building Symbol Table...
< Symbol Table >
Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers
------------- ----------- ------------- ------------ -------- ------------
main Function void global 2 1
input Function int global 0 0 8
output Function void global 1 0 18
value Variable int output 0 0
i Variable int main 0 3 5 6 8 10 10 13 14 16 18
x Variable int[] main 1 3 8 16 18
< Functions >
Function Name Return Type Parameter Name Parameter Type
------------- ------------- -------------- --------------
main void void
input int void
output void
- - value int
< Global Symbols >
Symbol Name Symbol Kind Symbol Type
------------- ----------- -------------
main Function void
input Function int
output Function void
< Scopes >
Scope Name Nested Level Symbol Name Symbol Type
------------ ------------ ------------- -----------
output 1 value int
main 1 i int
main 1 x int[]
Checking Types...
Type Checking Finished

56
src/result_3.txt Normal file
View File

@@ -0,0 +1,56 @@
C-MINUS COMPILATION: test.3.txt
Building Symbol Table...
< Symbol Table >
Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers
------------- ----------- ------------- ------------ -------- ------------
main Function int global 3 6
input Function int global 0 0
output Function void global 1 0
x Function int global 2 1 12
value Variable int output 0 0
y Variable int x 0 1 3
a Variable int main 0 8 12
b Variable int main 1 9 12
c Variable int main 2 10 12
< Functions >
Function Name Return Type Parameter Name Parameter Type
------------- ------------- -------------- --------------
main int void
input int void
output void
- - value int
x int
- - y int
< Global Symbols >
Symbol Name Symbol Kind Symbol Type
------------- ----------- -------------
main Function int
input Function int
output Function void
x Function int
< Scopes >
Scope Name Nested Level Symbol Name Symbol Type
------------ ------------ ------------- -----------
output 1 value int
x 1 y int
main 1 a int
main 1 b int
main 1 c int
Checking Types...
Error: Invalid function call at line 12 (name : "x")
Type Checking Finished

45
src/result_4.txt Normal file
View File

@@ -0,0 +1,45 @@
C-MINUS COMPILATION: test.4.txt
Building Symbol Table...
< Symbol Table >
Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers
------------- ----------- ------------- ------------ -------- ------------
main Function int global 2 1
input Function int global 0 0
output Function void global 1 0 4
value Variable int output 0 0
x Variable int[] main 0 3 4
< Functions >
Function Name Return Type Parameter Name Parameter Type
------------- ------------- -------------- --------------
main int void
input int void
output void
- - value int
< Global Symbols >
Symbol Name Symbol Kind Symbol Type
------------- ----------- -------------
main Function int
input Function int
output Function void
< Scopes >
Scope Name Nested Level Symbol Name Symbol Type
------------ ------------ ------------- -----------
output 1 value int
main 1 x int[]
Checking Types...
Error: Invalid array indexing at line 4 (name : "x"). indices should be integer
Type Checking Finished

44
src/sample.txt Normal file
View File

@@ -0,0 +1,44 @@
C-MINUS COMPILATION: ./testcase/3_Semantic_Makefile_Testcase/mytest.8.txt
Building Symbol Table...
< Symbol Table >
Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers
------------- ----------- ------------- ------------ -------- ------------
main Function int global 2 1
input Function int global 0 0
output Function void global 1 0
value Variable int output 0 0
x Variable int main 0 2 3
< Functions >
Function Name Return Type Parameter Name Parameter Type
------------- ------------- -------------- --------------
main int void
input int void
output void
- - value int
< Global Symbols >
Symbol Name Symbol Kind Symbol Type
------------- ----------- -------------
main Function int
input Function int
output Function void
< Scopes >
Scope Name Nested Level Symbol Name Symbol Type
------------ ------------ ------------- -----------
output 1 value int
main 1 x int
Checking Types...
Error: Invalid function call at line 3 (name : "x")
Type Checking Finished

View File

@@ -219,7 +219,6 @@ TokenType getToken(void) { /* index for storing into tokenString */
currentToken = ERROR; currentToken = ERROR;
} }
break; break;
case INLT: case INLT:
state = DONE; state = DONE;
if (c == '=') { if (c == '=') {
@@ -249,7 +248,7 @@ TokenType getToken(void) { /* index for storing into tokenString */
} }
break; break;
case INID: case INID:
if (!isalpha(c)) { /* backup in the input */ if (!isalnum(c)) { /* backup in the input */
ungetNextChar(); ungetNextChar();
save = FALSE; save = FALSE;
state = DONE; state = DONE;

View File

@@ -4,119 +4,506 @@
/* (allows only one symbol table) */ /* (allows only one symbol table) */
/* Symbol table is implemented as a chained */ /* Symbol table is implemented as a chained */
/* hash table */ /* hash table */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/ /****************************************************/
#include "symtab.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "symtab.h"
/* SIZE is the size of the hash table */ Scope scope_stack[MAX_SCOPE_DEPTH];
#define SIZE 211 int scope_stack_top = -1;
Scope scope_global;// no sibling no parent
void st_init(void) {
scope_global = scope_new("global");
scope_global->depth = 0;
scope_stack_top = 0;
scope_stack[scope_stack_top] = scope_global;
}
/* SHIFT is the power of two used as multiplier /* SHIFT is the power of two used as multiplier
in hash function */ in hash function */
#define SHIFT 4 #define SHIFT 4
/* the hash function */ /* the hash function */
static int hash ( char * key ) static int hash(char *key) {
{ int temp = 0; int temp = 0;
int i = 0; int i = 0;
while (key[i] != '\0') while (key[i] != '\0') {
{ temp = ((temp << SHIFT) + key[i]) % SIZE; temp = ((temp << SHIFT) + key[i]) % SYMTAB_SIZE;
++i; ++i;
} }
return temp; return temp;
} }
/* the list of line numbers of the source Scope scope_new(char *scope_name) {// it
* code in which a variable is referenced Scope new_scope = (Scope) malloc(sizeof(struct Scope));
*/ new_scope->name = scope_name;
typedef struct LineListRec new_scope->depth = -1;
{ int lineno; new_scope->parent = NULL;
struct LineListRec * next; new_scope->child = NULL;
} * LineList; new_scope->child_last = NULL;
new_scope->next_sibling = NULL;
int i;
for (i = 0; i < SYMTAB_SIZE; ++i) {
new_scope->hashTable[i] = NULL;
}
new_scope->child_count = 0;
new_scope->location = 0;
return new_scope;
}
/* The record in the bucket lists for void pop_scope(void) {
* each variable, including name, if (scope_stack_top == -1) {// empty
* assigned memory location, and return;
* the list of line numbers in which } else {
* it appears in the source code scope_stack[scope_stack_top] = NULL;
*/ scope_stack_top--;
typedef struct BucketListRec }
{ char * name; }
LineList lines;
int memloc ; /* memory location for variable */
struct BucketListRec * next;
} * BucketList;
/* the hash table */ void push_scope(Scope scope) {
static BucketList hashTable[SIZE]; if (scope_stack_top == MAX_SCOPE_DEPTH - 1) {// full
return;
} else {
Scope before = curr_scope();
if (before->child == NULL) {
before->child = scope;
before->child_last = scope;
} else {
before->child_last->next_sibling = scope;
before->child_last = scope;
}
scope->parent = before;
scope->depth = before->depth + 1;
before->child_count++;
scope_stack_top++;
scope_stack[scope_stack_top] = scope;
}
}
/* Procedure st_insert inserts line numbers and Scope curr_scope(void) {
* memory locations into the symbol table if (scope_stack_top == -1) {
* loc = memory location is inserted only the return NULL;
* first time, otherwise ignored } else {
*/ return scope_stack[scope_stack_top];
void st_insert( char * name, int lineno, int loc ) }
{ int h = hash(name); }
BucketList l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->name) != 0)) BucketList st_try_insert(char *name, SymbolKind symbolkind, ExpType type, int lineno) {
l = l->next; int h = hash(name);
if (l == NULL) /* variable not yet in table */ Scope scope = curr_scope();
{ l = (BucketList) malloc(sizeof(struct BucketListRec)); BucketList *hashTable = scope->hashTable;
l->name = name; BucketList l = hashTable[h];
l->lines = (LineList) malloc(sizeof(struct LineListRec)); while ((l != NULL) && (strcmp(name, l->name) != 0))
l->lines->lineno = lineno; l = l->next;
l->memloc = loc; if (l == NULL) { /* variable not yet in table */
l->lines->next = NULL; l = (BucketList) malloc(sizeof(struct BucketListEntry));
l->next = hashTable[h]; l->name = name;
hashTable[h] = l; } l->symbolKind = symbolkind;
else /* found in table, so just add line number */ l->lines = (LineList) malloc(sizeof(struct LineListEntry));
{ LineList t = l->lines; l->lines->lineno = lineno;
while (t->next != NULL) t = t->next; l->symbolKind = symbolkind;
t->next = (LineList) malloc(sizeof(struct LineListRec)); if (symbolkind == SymbolFunc) {
l->type = Void;
l->returnType = type;
l->param_count = 0;
} else {
l->type = type;
l->returnType = type;
}
l->lines->next = NULL;
l->memloc = scope->location;
scope->location++;
l->next = hashTable[h];
hashTable[h] = l;
} else { /* found in table, so just add line number */
LineList t = l->lines;
while (t->next != NULL)
t = t->next;
t->next = (LineList) malloc(sizeof(struct LineListEntry));
t->next->lineno = lineno;
t->next->next = NULL;
}
return l;
}
void st_entry_insert_line(BucketList entry, int lineno) {
if (entry == NULL) return;
LineList t = entry->lines;
while (t->next != NULL)
t = t->next;
t->next = (LineList) malloc(sizeof(struct LineListEntry));
t->next->lineno = lineno; t->next->lineno = lineno;
t->next->next = NULL; t->next->next = NULL;
}
} /* st_insert */
/* Function st_lookup returns the memory
* location of a variable or -1 if not found
*/
int st_lookup ( char * name )
{ int h = hash(name);
BucketList l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->memloc;
} }
/* Procedure printSymTab prints a formatted BucketList st_lookup_current(char *name) {
* listing of the symbol table contents int h = hash(name);
* to the listing file Scope scope = curr_scope();
*/ BucketList *hashTable = scope->hashTable;
void printSymTab(FILE * listing) BucketList l = hashTable[h];
{ int i; while ((l != NULL) && (strcmp(name, l->name) != 0))
fprintf(listing,"Variable Name Location Line Numbers\n");
fprintf(listing,"------------- -------- ------------\n");
for (i=0;i<SIZE;++i)
{ if (hashTable[i] != NULL)
{ BucketList l = hashTable[i];
while (l != NULL)
{ LineList t = l->lines;
fprintf(listing,"%-14s ",l->name);
fprintf(listing,"%-8d ",l->memloc);
while (t != NULL)
{ fprintf(listing,"%4d ",t->lineno);
t = t->next;
}
fprintf(listing,"\n");
l = l->next; l = l->next;
}
return l;
}
BucketList st_lookup(char *name) {
int h = hash(name);
Scope scope = curr_scope();
while (scope != NULL) {
BucketList *hashTable = scope->hashTable;
BucketList l = hashTable[h];
while ((l != NULL) && (strcmp(name, l->name) != 0))
l = l->next;
if (l != NULL) {
return l;
}
scope = scope->parent;
} }
} return NULL; /* not found */
} /* printSymTab */ }
BucketList st_lookup_from(char *name, Scope scope) {
int h = hash(name);
while (scope != NULL) {
BucketList *hashTable = scope->hashTable;
BucketList l = hashTable[h];
while ((l != NULL) && (strcmp(name, l->name) != 0))
l = l->next;
if (l != NULL) {
return l;
}
scope = scope->parent;
}
return NULL; /* not found */
}
static void printEntry(FILE *listing, Scope scope, BucketList entry) {
if (entry == NULL) return;
fprintf(listing, "%-13s", entry->name);
fprintf(listing, " ");
switch (entry->symbolKind) {
case SymbolVar:
fprintf(listing, "%-11s", "Variable");
break;
case SymbolFunc:
fprintf(listing, "%-11s", "Function");
break;
case SymbolParam:
fprintf(listing, "%-11s", "Variable");
break;
}
fprintf(listing, " ");
ExpType type;
if (entry->symbolKind == SymbolFunc) {
type = entry->returnType;
} else {
type = entry->type;
}
switch (type) {
case Void:
fprintf(listing, "%-13s", "void");
break;
case Integer:
fprintf(listing, "%-13s", "int");
break;
case IntegerArray:
fprintf(listing, "%-13s", "int[]");
break;
case Undetermined:
fprintf(listing, "%-13s", "undetermined");
}
//
fprintf(listing, " ");
fprintf(listing, "%-12s", scope->name);
//
fprintf(listing, " ");
fprintf(listing, "%-8d", entry->memloc);
fprintf(listing, " ");
LineList t = entry->lines;
while (t != NULL) {
fprintf(listing, "%3d", t->lineno);
fprintf(listing, " ");
t = t->next;
}
fprintf(listing, "\n");
}
static void printScopeTable(FILE *listing, Scope scope) {
if (scope == NULL) return;
int count = 0;
for (int i = 0; i < SYMTAB_SIZE; ++i) {
BucketList l = scope->hashTable[i];
while (l != NULL) {
printEntry(listing, scope, l);
count++;
l = l->next;
}
}
/*
if (count > 0) {
BucketList *entries = (BucketList *) malloc(sizeof(BucketList) * count);
int idx = 0;
for (int i = 0; i < SYMTAB_SIZE; ++i) {
BucketList l = scope->hashTable[i];
while (l != NULL) {
entries[idx++] = l;
l = l->next;
}
}
for (int i = 1; i < count; ++i) {
BucketList key = entries[i];
int j = i - 1;
while (j >= 0 && entries[j]->memloc > key->memloc) {
entries[j + 1] = entries[j];
--j;
}
entries[j + 1] = key;
}
for (int i = 0; i < count; ++i) {
printEntry(listing, scope, entries[i]);
}
free(entries);
}
*/
//fprintf(listing, "\n");
}
static void printScope(FILE *listing, Scope scope) {
if (scope == NULL) return;
fprintf(listing, "Scope Name: %s, Depth: %d\n", scope->name, scope->depth);
fprintf(listing, "-----------------------------------------\n");
fprintf(listing, "Variable Name Symbol Kind Type Location Line Numbers\n");
fprintf(listing, "------------------------------------------------------------\n");
printScopeTable(listing, scope);
}
static void printScopeRecursive(FILE *listing, Scope scope) {
if (scope == NULL) return;
printScope(listing, scope);
Scope child = scope->child;
while (child != NULL) {
printScopeRecursive(listing, child);
child = child->next_sibling;
}
}
static void printScopeTree(FILE *listing) {
if (scope_global == NULL) return;
Scope current_scope = scope_global;
printScopeRecursive(listing, current_scope);
}
static void printScopeTableRecursive(FILE *listing, Scope scope) {
if (scope == NULL) return;
printScopeTable(listing, scope);
Scope child = scope->child;
while (child != NULL) {
printScopeTableRecursive(listing, child);
child = child->next_sibling;
}
}
static void printFunctionTableRecursive(FILE *listing, Scope scope) {
for (int i = 0; i < SYMTAB_SIZE; ++i) {
BucketList entry = scope->hashTable[i];
while (entry != NULL) {
if (entry->symbolKind == SymbolFunc) {
fprintf(listing, "%-13s", entry->name);
fprintf(listing, " ");
switch (entry->returnType) {
case Void:
fprintf(listing, "%-13s", "void");
break;
case Integer:
fprintf(listing, "%-13s", "int");
break;
case IntegerArray:
fprintf(listing, "%-13s", "int[]");
break;
case Undetermined:
fprintf(listing, "%-13s", "undetermined");
}
if (entry->param_count == -1) {
fprintf(listing, " ");
fprintf(listing, "%-14s", "undetermined");
entry = entry->next;
fprintf(listing, "\n");
continue;
} else if (entry->param_count == 0) {
fprintf(listing, " ");
fprintf(listing, "%-14s", "void");
entry = entry->next;
fprintf(listing, "\n");
continue;
}
fprintf(listing, " \n");
for (int j = 0; j < entry->param_count; j++) {
fprintf(listing, "- - ");
fprintf(listing, " %-14s", entry->param_names[j]);
fprintf(listing, " ");
switch (entry->param_types[j]) {
case Void:
fprintf(listing, "%-14s", "void");
break;
case Integer:
fprintf(listing, "%-14s", "int");
break;
case IntegerArray:
fprintf(listing, "%-14s", "int[]");
break;
case Undetermined:
fprintf(listing, "%-14s", "undetermined");
}
fprintf(listing, "\n");
}
}
entry = entry->next;
}
}
Scope child = scope->child;
while (child != NULL) {
printFunctionTableRecursive(listing, child);
child = child->next_sibling;
}
}
static void printFunctionTable(FILE *listing) {
if (scope_global == NULL) return;
printFunctionTableRecursive(listing, scope_global);
}
static void printGlobalSymbols(FILE *listing) {
if (scope_global == NULL) return;
for (int i = 0; i < SYMTAB_SIZE; ++i) {
BucketList entry = scope_global->hashTable[i];
while (entry != NULL) {
fprintf(listing, "%-13s ", entry->name);
switch (entry->symbolKind) {
case SymbolVar:
fprintf(listing, "%-11s ", "Variable");
break;
case SymbolFunc:
fprintf(listing, "%-11s ", "Function");
break;
case SymbolParam:
fprintf(listing, "%-11s ", "Variable");
break;
}
ExpType type;
if (entry->symbolKind == SymbolFunc) {
type = entry->returnType;
} else {
type = entry->type;
}
switch (type) {
case Void:
fprintf(listing, "%-13s", "void");
break;
case Integer:
fprintf(listing, "%-13s", "int");
break;
case IntegerArray:
fprintf(listing, "%-13s", "int[]");
break;
case Undetermined:
fprintf(listing, "%-13s", "undetermined");
}
fprintf(listing, "\n");
entry = entry->next;
}
}
}
static void printScopeTableAdvancedRecursive(FILE *listing, Scope scope) {
if (scope == NULL) return;
int no_sym = TRUE;
for (int i = 0; i < SYMTAB_SIZE; ++i) {
BucketList entry = scope->hashTable[i];
while (entry != NULL) {
no_sym = FALSE;
fprintf(listing, "%-12s", scope->name);
fprintf(listing, " ");
fprintf(listing, "%-12d", scope->depth);
fprintf(listing, " ");
fprintf(listing, "%-13s", entry->name);
fprintf(listing, " ");
switch (entry->type) {
case Void:
fprintf(listing, "%-11s", "void");
break;
case Integer:
fprintf(listing, "%-11s", "int");
break;
case IntegerArray:
fprintf(listing, "%-11s", "int[]");
break;
case Undetermined:
fprintf(listing, "%-11s", "undetermined");
}
fprintf(listing, "\n");
entry = entry->next;
}
}
if (!no_sym) fprintf(listing, "\n");
Scope child = scope->child;
while (child != NULL) {
printScopeTableAdvancedRecursive(listing, child);
child = child->next_sibling;
}
}
static void printScopeTableAdvanced(FILE *listing) {
if (scope_global == NULL) return;
Scope current_scope;
current_scope = scope_global->child;
while (current_scope != NULL) {
printScopeTableAdvancedRecursive(listing, current_scope);
current_scope = current_scope->next_sibling;
}
}
void printSymTab(FILE *listing) {
Scope curr = scope_global;
fprintf(listing, "\n\n< Symbol Table >\n");
fprintf(listing, " Symbol Name Symbol Kind Symbol Type Scope Name Location Line Numbers\n");
fprintf(listing, "------------- ----------- ------------- ------------ -------- ------------\n");
printScopeTableRecursive(listing, curr);
fprintf(listing, "\n\n< Functions >\n");
fprintf(listing, "Function Name Return Type Parameter Name Parameter Type\n");
fprintf(listing, "------------- ------------- -------------- --------------\n");
printFunctionTable(listing);
fprintf(listing, "\n\n< Global Symbols >\n");
fprintf(listing, " Symbol Name Symbol Kind Symbol Type\n");
fprintf(listing, "------------- ----------- -------------\n");
printGlobalSymbols(listing);
fprintf(listing, "\n\n< Scopes >\n");
fprintf(listing, " Scope Name Nested Level Symbol Name Symbol Type\n");
fprintf(listing, "------------ ------------ ------------- -----------\n");
printScopeTableAdvanced(listing);
}

View File

@@ -1,9 +1,7 @@
/****************************************************/ /****************************************************/
/* File: symtab.h */ /* File: symtab.h */
/* Symbol table interface for the TINY compiler */ /* Symbol table interface for the CMINUS COMPILER */
/* (allows only one symbol table) */ /* Modified by Yenru0 */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/ /****************************************************/
#ifndef _SYMTAB_H_ #ifndef _SYMTAB_H_
@@ -11,22 +9,130 @@
#include "globals.h" #include "globals.h"
/* Procedure st_insert inserts line numbers and /**
* memory locations into the symbol table * it is the size of the hash table
* loc = memory location is inserted only the */
* first time, otherwise ignored #define SYMTAB_SIZE 211
*/
void st_insert( char * name, int lineno, int loc );
/* Function st_lookup returns the memory #define MAX_SCOPE_DEPTH 1557
* location of a variable or -1 if not found
*/
int st_lookup ( char * name );
/* Procedure printSymTab prints a formatted #define MAX_PARAM_COUNT 32
* listing of the symbol table contents
* to the listing file /* the list of line numbers of the source
* code in which a variable is referenced
*/ */
void printSymTab(FILE * listing); typedef struct LineListEntry {
int lineno;
struct LineListEntry *next;
} * LineList;
/* The record in the bucket lists for
* each variable, including name,
* assigned memory location, and
* the list of line numbers in which
* it appears in the source code
*/
typedef struct BucketListEntry {
char *name;
LineList lines;
SymbolKind symbolKind;
ExpType type;
ExpType param_types[MAX_PARAM_COUNT];
char* param_names[MAX_PARAM_COUNT];
int param_count;
ExpType returnType;
int memloc; /* memory location for variable */
struct BucketListEntry *next;
} * BucketList;
struct Scope {
char *name;
int depth;
struct Scope *parent;
struct Scope *child;
struct Scope *child_last;
struct Scope *next_sibling;
int child_count;
int location;
BucketList hashTable[SYMTAB_SIZE];
};
extern Scope scope_global;// no sibling no parent
extern Scope scope_stack[MAX_SCOPE_DEPTH];
extern int scope_stack_top;
/**
* before using the symbol table, initialize the global scope
*/
void st_init(void);
/**
* create a new scope with given name
* @note it does not link parent or insert into stack/list
* @param scope_name: name of the scope
* @return the created scope
*/
Scope scope_new(char *scope_name);
/**
* pop the current scope from the scope stack
*/
void pop_scope(void);
/**
* push a scope into the scope stack
* @note it does link the parent or siblings to construct tree
* @param scope: the scope to be pushed
*/
void push_scope(Scope scope);
/**
* get the top of the scope stack wit
* @return the current scope or NULL if the stack is empty
*/
Scope curr_scope(void);
/**
* insert a variable into the symbol table of the current scope
* or add a line number if it already exists
* @param name name of the variable
* @param symbolkind kind of the symbol
* @param type type of the variable
* @param lineno line number of the variable
* @return 0 if success, -1 if failure
*/
BucketList st_try_insert(char *name, SymbolKind symbolkind, ExpType type, int lineno);
/**
* insert a line number into the variable's line list
* @param entry the bucket list entry of the variable
* @param lineno the line number to be inserted
*/
void st_entry_insert_line(BucketList entry, int lineno);
/**
* lookup a variable in the current scope
* @param name name of the variable to lookup
* @return the bucket list entry of the variable or NULL if not found
*/
BucketList st_lookup_current(char *name);
/**
* lookup a variable from the top scope to root
* @param name name of the variable to lookup
* @return the bucket list entry of the variable or NULL if not found
*/
BucketList st_lookup(char *name);
/**
* lookup a variable from the given scope to root
* @param name name of the variable to lookup
* @param scope the scope to start lookup from
* @return the bucket list entry of the variable or NULL if not found
*/
BucketList st_lookup_from(char *name, Scope scope);
void printSymTab(FILE *listing);
#endif #endif

42
src/test.cm Normal file
View File

@@ -0,0 +1,42 @@
int zero(void)
{
return 0;
}
int first(int data[])
{
return data[0];
}
void scopedemo(void)
{
int outer;
outer = 1;
{
int outer;
outer = outer + 2;
}
}
void output(void)
{
return;
}
int input;
void main(void)
{
int arr[2];
arr[0] = 5;
scopedemo();
first(arr);
zero();
zero(1);
{
int blockOnly;
blockOnly = arr[0];
}
blockOnly;
return;
}

View File

@@ -1,6 +1,3 @@
/* A program to perform Euclid's
Algorithm to computer gcd */
int gcd (int u, int v) int gcd (int u, int v)
{ {
if (v == 0) return u; if (v == 0) return u;
@@ -8,7 +5,7 @@ int gcd (int u, int v)
/* u-u/v*v == u mod v */ /* u-u/v*v == u mod v */
} }
void main(void) void main(void k)
{ {
int x; int y; int x; int y;
x = input(); y = input(); x = input(); y = input();

13
src/test_3.cm Normal file
View File

@@ -0,0 +1,13 @@
int x(int y)
{
return y + 1;
}
int main(void)
{
int a;
int b;
int c;
return x(a, b, c);
}

7
src/test_4.cm Normal file
View File

@@ -0,0 +1,7 @@
int main(void)
{
int x[5];
x[output(5)] = 3 + 5;
return 0;
}

View File

@@ -137,6 +137,36 @@ TreeNode *newExpNode(ExpKind kind) {
return t; return t;
} }
TreeNode *newDeclNode(DeclKind kind) {
TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode));
int i;
if (t == NULL)
fprintf(listing, "Out of memory error at line %d\n", lineno);
else {
for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL;
t->sibling = NULL;
t->nodekind = DeclK;
t->kind.decl = kind;
t->lineno = lineno;
}
return t;
}
TreeNode *newTypeNode(TypeKind kind) {
TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode));
int i;
if (t == NULL)
fprintf(listing, "Out of memory error at line %d\n", lineno);
else {
for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL;
t->sibling = NULL;
t->nodekind = TypeK;
t->kind.type = kind;
t->lineno = lineno;
}
return t;
}
/* Function copyString allocates and makes a new /* Function copyString allocates and makes a new
* copy of an existing string * copy of an existing string
*/ */
@@ -156,7 +186,7 @@ char *copyString(char *s) {
/* Variable indentno is used by printTree to /* Variable indentno is used by printTree to
* store current number of spaces to indent * store current number of spaces to indent
*/ */
static indentno = 0; static int indentno = 0;
/* macros to increase/decrease indentation */ /* macros to increase/decrease indentation */
#define INDENT indentno += 2 #define INDENT indentno += 2
@@ -169,6 +199,22 @@ static void printSpaces(void) {
fprintf(listing, " "); fprintf(listing, " ");
} }
void printType(TreeNode *tree) {
switch (tree->type) {
case Void:
fprintf(listing, "void");
break;
case Integer:
fprintf(listing, "int");
break;
case IntegerArray:
fprintf(listing, "int[]");
break;
default:
return;
}
}
/* procedure printTree prints a syntax tree to the /* procedure printTree prints a syntax tree to the
* listing file using indentation to indicate subtrees * listing file using indentation to indicate subtrees
*/ */
@@ -179,27 +225,28 @@ void printTree(TreeNode *tree) {
printSpaces(); printSpaces();
if (tree->nodekind == StmtK) { if (tree->nodekind == StmtK) {
switch (tree->kind.stmt) { switch (tree->kind.stmt) {
case CompK:
fprintf(listing, "Compound Statement:\n");
break;
case IfK: case IfK:
fprintf(listing, "If\n"); fprintf(listing, "%s:\n",
(tree->child[2] != NULL) ? "If-Else Statement" : "If Statement");
break; break;
case RepeatK: case IterK:
fprintf(listing, "Repeat\n"); fprintf(listing, "While Statement:\n");
break; break;
case AssignK: case ReturnK:
fprintf(listing, "Assign to: %s\n", tree->attr.name); fprintf(listing, "Return Statement:\n");
break;
case ReadK:
fprintf(listing, "Read: %s\n", tree->attr.name);
break;
case WriteK:
fprintf(listing, "Write\n");
break; break;
default: default:
fprintf(listing, "Unknown ExpNode kind\n"); fprintf(listing, "Unknown StmtNode kind\n");
break; break;
} }
} else if (tree->nodekind == ExpK) { } else if (tree->nodekind == ExpK) {
switch (tree->kind.exp) { switch (tree->kind.exp) {
case AssignK:
fprintf(listing, "Assign:\n");
break;
case OpK: case OpK:
fprintf(listing, "Op: "); fprintf(listing, "Op: ");
printToken(tree->attr.op, "\0"); printToken(tree->attr.op, "\0");
@@ -208,12 +255,55 @@ void printTree(TreeNode *tree) {
fprintf(listing, "Const: %d\n", tree->attr.val); fprintf(listing, "Const: %d\n", tree->attr.val);
break; break;
case IdK: case IdK:
fprintf(listing, "Id: %s\n", tree->attr.name); fprintf(listing, "Variable: name = %s\n", tree->attr.name);
break;
case ArrIdK:
fprintf(listing, "Variable: name = %s\n", tree->attr.name);
break;
case CallK:
fprintf(listing, "Call: function name = %s\n", tree->attr.name);
break; break;
default: default:
fprintf(listing, "Unknown ExpNode kind\n"); fprintf(listing, "Unknown ExpNode kind\n");
break; break;
} }
} else if (tree->nodekind == DeclK) {
switch (tree->kind.decl) {
case FuncK:
fprintf(listing, "Function Declaration: name = %s, return type = ", tree->attr.name);
printType(tree);
fprintf(listing, "\n");
break;
case VarK:
fprintf(listing, "Variable Declaration: name = %s, type = ", tree->attr.name);
printType(tree);
fprintf(listing, "\n");
break;
case ArrVarK:
fprintf(listing, "Variable Declaration: name = %s, type = ", tree->attr.name);
printType(tree);
fprintf(listing, "\n");
break;
case NonArrParamK:
if (tree->type == Void)
fprintf(listing, "Void Parameter\n");
else {
fprintf(listing, "Parameter: name = %s, type = ", tree->attr.name);
printType(tree);
fprintf(listing, "\n");
}
break;
case ArrParamK:
fprintf(listing, "Parameter: name = %s, type = ", tree->attr.name);
printType(tree);
fprintf(listing, "\n");
break;
default:
fprintf(listing, "Unknown DeclNode kind\n");
break;
}
} else if (tree->nodekind == TypeK) {
} else } else
fprintf(listing, "Unknown node kind\n"); fprintf(listing, "Unknown node kind\n");
for (i = 0; i < MAXCHILDREN; i++) for (i = 0; i < MAXCHILDREN; i++)

View File

@@ -25,6 +25,10 @@ TreeNode * newStmtNode(StmtKind);
*/ */
TreeNode * newExpNode(ExpKind); TreeNode * newExpNode(ExpKind);
TreeNode* newDeclNode(DeclKind);
TreeNode* newTypeNode(TypeKind);
/* Function copyString allocates and makes a new /* Function copyString allocates and makes a new
* copy of an existing string * copy of an existing string
*/ */

View File

@@ -1,120 +0,0 @@
/****************************************************/
/* File: globals.h */
/* Yacc/Bison Version */
/* Global types and vars for TINY compiler */
/* must come before other include files */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _GLOBALS_H_
#define _GLOBALS_H_
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/* Yacc/Bison generates internally its own values
* for the tokens. Other files can access these values
* by including the tab.h file generated using the
* Yacc/Bison option -d ("generate header")
*
* The YYPARSER flag prevents inclusion of the tab.h
* into the Yacc/Bison output itself
*/
#ifndef YYPARSER
/* the name of the following file may change */
#include "y.tab.h"
/* ENDFILE is implicitly defined by Yacc/Bison,
* and not included in the tab.h file
*/
#define ENDFILE 0
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
/* MAXRESERVED = the number of reserved words */
#define MAXRESERVED 8
/* Yacc/Bison generates its own integer values
* for tokens
*/
typedef int TokenType;
extern FILE* source; /* source code text file */
extern FILE* listing; /* listing output text file */
extern FILE* code; /* code text file for TM simulator */
extern int lineno; /* source line number for listing */
/**************************************************/
/*********** Syntax tree for parsing ************/
/**************************************************/
typedef enum {StmtK,ExpK} NodeKind;
typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind;
typedef enum {OpK,ConstK,IdK} ExpKind;
/* ExpType is used for type checking */
typedef enum {Void,Integer,Boolean} ExpType;
#define MAXCHILDREN 3
typedef struct treeNode
{ struct treeNode * child[MAXCHILDREN];
struct treeNode * sibling;
int lineno;
NodeKind nodekind;
union { StmtKind stmt; ExpKind exp;} kind;
union { TokenType op;
int val;
char * name; } attr;
ExpType type; /* for type checking of exps */
} TreeNode;
/**************************************************/
/*********** Flags for tracing ************/
/**************************************************/
/* EchoSource = TRUE causes the source program to
* be echoed to the listing file with line numbers
* during parsing
*/
extern int EchoSource;
/* TraceScan = TRUE causes token information to be
* printed to the listing file as each token is
* recognized by the scanner
*/
extern int TraceScan;
/* TraceParse = TRUE causes the syntax tree to be
* printed to the listing file in linearized form
* (using indents for children)
*/
extern int TraceParse;
/* TraceAnalyze = TRUE causes symbol table inserts
* and lookups to be reported to the listing file
*/
extern int TraceAnalyze;
/* TraceCode = TRUE causes comments to be written
* to the TM code file as code is generated
*/
extern int TraceCode;
/* Error = TRUE prevents further passes if an error occurs */
extern int Error;
#endif

View File

@@ -1,164 +0,0 @@
/****************************************************/
/* File: tiny.y */
/* The TINY Yacc/Bison specification file */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
%{
#define YYPARSER /* distinguishes Yacc output from other code files */
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"
#define YYSTYPE TreeNode *
static char * savedName; /* for use in assignments */
static int savedLineNo; /* ditto */
static TreeNode * savedTree; /* stores syntax tree for later return */
static int yylex(void); // added 11/2/11 to ensure no conflict with lex
%}
%token IF THEN ELSE END REPEAT UNTIL READ WRITE
%token ID NUM
%token ASSIGN EQ LT PLUS MINUS TIMES OVER LPAREN RPAREN SEMI
%token ERROR
%% /* Grammar for TINY */
program : stmt_seq
{ savedTree = $1;}
;
stmt_seq : stmt_seq SEMI stmt
{ YYSTYPE t = $1;
if (t != NULL)
{ while (t->sibling != NULL)
t = t->sibling;
t->sibling = $3;
$$ = $1; }
else $$ = $3;
}
| stmt { $$ = $1; }
;
stmt : if_stmt { $$ = $1; }
| repeat_stmt { $$ = $1; }
| assign_stmt { $$ = $1; }
| read_stmt { $$ = $1; }
| write_stmt { $$ = $1; }
| error { $$ = NULL; }
;
if_stmt : IF exp THEN stmt_seq END
{ $$ = newStmtNode(IfK);
$$->child[0] = $2;
$$->child[1] = $4;
}
| IF exp THEN stmt_seq ELSE stmt_seq END
{ $$ = newStmtNode(IfK);
$$->child[0] = $2;
$$->child[1] = $4;
$$->child[2] = $6;
}
;
repeat_stmt : REPEAT stmt_seq UNTIL exp
{ $$ = newStmtNode(RepeatK);
$$->child[0] = $2;
$$->child[1] = $4;
}
;
assign_stmt : ID { savedName = copyString(tokenString);
savedLineNo = lineno; }
ASSIGN exp
{ $$ = newStmtNode(AssignK);
$$->child[0] = $4;
$$->attr.name = savedName;
$$->lineno = savedLineNo;
}
;
read_stmt : READ ID
{ $$ = newStmtNode(ReadK);
$$->attr.name =
copyString(tokenString);
}
;
write_stmt : WRITE exp
{ $$ = newStmtNode(WriteK);
$$->child[0] = $2;
}
;
exp : simple_exp LT simple_exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = LT;
}
| simple_exp EQ simple_exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = EQ;
}
| simple_exp { $$ = $1; }
;
simple_exp : simple_exp PLUS term
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = PLUS;
}
| simple_exp MINUS term
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = MINUS;
}
| term { $$ = $1; }
;
term : term TIMES factor
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = TIMES;
}
| term OVER factor
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = OVER;
}
| factor { $$ = $1; }
;
factor : LPAREN exp RPAREN
{ $$ = $2; }
| NUM
{ $$ = newExpNode(ConstK);
$$->attr.val = atoi(tokenString);
}
| ID { $$ = newExpNode(IdK);
$$->attr.name =
copyString(tokenString);
}
| error { $$ = NULL; }
;
%%
int yyerror(char * message)
{ fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);
fprintf(listing,"Current token: ");
printToken(yychar,tokenString);
Error = TRUE;
return 0;
}
/* yylex calls getToken to make Yacc/Bison output
* compatible with ealier versions of the TINY scanner
*/
static int yylex(void)
{ return getToken(); }
TreeNode * parse(void)
{ yyparse();
return savedTree;
}