diff --git a/README.md b/README.md index b942f64..93a6bf1 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ ## Syntax Spec -```spec +```c program := defn* defn := VAL type ID ; @@ -55,3 +55,56 @@ lambda := param_list compound compound := LCURLY (stmt)* expr? RCURLY ``` + +### AST Spec + +```c +NODE_PROGRAM: + token: PROGRAM + children: NODE_DEFN* + +NODE_DEFN: + token: VAL + children: NODE_TYPE, TOKEN_ID, (NODE_EXPR)? + children_count: 2 | 3 + +NODE_TYPE_SIMPLE: + token: ID + children: NODE_ID NODE_TYPE_STAR* + children_count: 1+ +NODE_TYPE_COMPLEX: + token: COMPLEX_TYPE + children: NODE_TYPE_PARAM NODE_TYPE_OUT +NODE_TYPE_PARAM + token: TYPE_PARAM + children: (NODE_TYPE | NODE_TYPE_COMPLEX)* + children_count: 0+ +NODE_TYPE_OUT + token: TYPE_OUT + children: (NODE_TYPE | NODE_TYPE_COMPLEX)? + +NODE_EXPR: + token: EXPR + children: (atom)+ +// atom definition +NODE_NUM: + token: NUM +NODE_STR: + token: STR +NODE_LAMBDA: + token: LAMBDA + children: NODE_PARAM_LIST NODE_COMPOUND +NODE_COMPOUND: + token: COMPOUND + children: (NODE_STMT)* (NODE_EXPR)? + children_count: 0+ + + +NODE_PARAM_LIST: + token: PARAM_LIST + children: NODE_PARAM* +NODE_PARAM: + token: PARAM + children: NODE_TYPE, TOKEN_ID + +``` \ No newline at end of file diff --git a/include/ast_util.h b/include/ast_util.h new file mode 100644 index 0000000..b051a0a --- /dev/null +++ b/include/ast_util.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +#include "globals.h" + + +void ast_node_add_child(ASTNode *parent, ASTNode *child); + +void ast_node_free(ASTNode *node); + +void ast_node_print(ASTNode *node, int depth); + +/* + NODE SPECIFIC FUNCTIONS +*/ + +ASTNode *ast_node_program(); + +ASTNode *ast_node_defn(Token tok_val, ASTNode *type, ASTNode *id, ASTNode *expr); + +ASTNode *ast_node_type_simple(Token tok_id); + +ASTNode *ast_node_type_complex(Token tok_bracket, ASTNode *type_param, ASTNode *type_out); + +ASTNode *ast_node_type_param(); + +ASTNode *ast_node_type_out(); + +ASTNode *ast_node_type_star(Token tok_star); + +ASTNode *ast_node_type_void(); + +ASTNode *ast_node_id(Token id); \ No newline at end of file diff --git a/include/globals.h b/include/globals.h index db5c8ec..336e565 100644 --- a/include/globals.h +++ b/include/globals.h @@ -31,7 +31,14 @@ typedef enum { ELSE, EOF_TOKEN, - ERROR + ERROR, + // for parser use + + PARSER_USE, + + VOID, + + } TokenType; typedef struct { @@ -48,8 +55,39 @@ typedef struct { /** * AST Node Definitions */ + +typedef enum { + NODE_PROGRAM, + NODE_DEFN, + + NODE_TYPE_SIMPLE, + NODE_TYPE_COMPLEX, + NODE_TYPE_PARAM, + NODE_TYPE_OUT, + NODE_TYPE_STAR, + + NODE_EXPR, + NODE_NUM, + NODE_ID, + + NODE_PARAM_LIST, + NODE_PARAM, + + NODE_LAMBDA, + NODE_COMPOUND, + + NODE_STMT_RETURN, + NODE_STMT_EXPR, + + + +} NodeKind; + typedef struct ASTNode { + NodeKind kind; Token token; + struct ASTNode **children; - size_t child_count; + size_t count; + size_t capacity; } ASTNode; \ No newline at end of file diff --git a/include/parse.h b/include/parse.h index 4d276e1..d217d69 100644 --- a/include/parse.h +++ b/include/parse.h @@ -1,8 +1,10 @@ #pragma once #include +#include #include "globals.h" #include "lex.h" +#include "ast_util.h" typedef struct Parser { Lexer *lexer; @@ -17,10 +19,6 @@ Parser *parser_new(Lexer *lexer); void parser_free(Parser *parser); -static void parser_next(Parser *parser); - -static void parser_expect(Parser *parser, TokenType type); - /* PARSER PARSE FUNCTIONS */ @@ -30,6 +28,10 @@ ASTNode *parser_parse_defn(Parser *parser); ASTNode *parser_parse_type(Parser *parser); +ASTNode *parser_parse_type_simple(Parser *parser); + +ASTNode *parser_parse_type_complex(Parser *parser); + ASTNode *parser_parse_expr(Parser *parser); ASTNode *parser_parse_atom(Parser *parser); diff --git a/src/ast_util.c b/src/ast_util.c new file mode 100644 index 0000000..6ff52f4 --- /dev/null +++ b/src/ast_util.c @@ -0,0 +1,148 @@ +#include "ast_util.h" +#include "util.h" + +static ASTNode *ast_node_new(NodeKind kind, Token token) { + ASTNode *node = malloc(sizeof(ASTNode)); + node->kind = kind; + node->token = token; + node->children = NULL; + node->count = 0; + node->capacity = 0; + return node; +} + +void ast_node_add_child(ASTNode *parent, ASTNode *child) { + if (parent->count >= parent->capacity) { + size_t new_capacity = parent->capacity == 0 ? 4 : parent->capacity * 2; + parent->children = realloc(parent->children, new_capacity * sizeof(ASTNode *)); + parent->capacity = new_capacity; + } + parent->children[parent->count++] = child; +} + +void ast_node_free(ASTNode *node) { + for (size_t i = 0; i < node->count; i++) { + ast_node_free(node->children[i]); + } + free(node->children); + free(node); +} + +void ast_node_print(ASTNode *node, int depth) { + for (int i = 0; i < depth; i++) { + printf(" "); + } + printf("NodeKind: "); + + switch (node->kind) { + case NODE_PROGRAM: + printf("NODE_PROGRAM"); + break; + case NODE_DEFN: + printf("NODE_DEFN"); + break; + case NODE_TYPE_SIMPLE: + printf("NODE_TYPE_SIMPLE"); + break; + case NODE_TYPE_COMPLEX: + printf("NODE_TYPE_COMPLEX"); + break; + case NODE_TYPE_PARAM: + printf("NODE_TYPE_PARAM"); + break; + case NODE_TYPE_OUT: + printf("NODE_TYPE_OUT"); + break; + case NODE_TYPE_STAR: + printf("NODE_TYPE_STAR"); + break; + case NODE_ID: + printf("NODE_ID"); + break; + default: + printf("UNKNOWN_NODE"); + break; + } + + printf(", Token: "); + print_token(node->token); + + printf("\n"); + for (size_t i = 0; i < node->count; i++) { + ast_node_print(node->children[i], depth + 1); + } +} + +/* + NODE SPECIFIC FUNCTIONS +*/ + +ASTNode *ast_node_program() { + Token tok = (Token) { + .type = EOF_TOKEN, + .line = 0, + .data = {0}}; + ASTNode *node = + ast_node_new(NODE_PROGRAM, tok); + + return node; +} + +ASTNode *ast_node_defn( + Token tok_val /*VAL token*/, ASTNode *type, ASTNode *id, ASTNode *expr) { + ASTNode *node = + ast_node_new(NODE_DEFN, tok_val); + ast_node_add_child(node, type); + ast_node_add_child(node, id); + + if (expr != NULL) { + ast_node_add_child(node, expr); + } + return node; +} + +ASTNode *ast_node_type_simple(Token tok_id) { + ASTNode *node = + ast_node_new(NODE_TYPE_SIMPLE, tok_id); + return node; +} + +ASTNode *ast_node_type_complex( + Token tok_bracket, ASTNode *type_param, ASTNode *type_out) { + ASTNode *node = ast_node_new(NODE_TYPE_COMPLEX, tok_bracket); + ast_node_add_child(node, type_param); + ast_node_add_child(node, type_out); + return node; +} + +ASTNode *ast_node_type_param() { + ASTNode *node = + ast_node_new(NODE_TYPE_PARAM, (Token) {0}); + return node; +} + +ASTNode *ast_node_type_out() { + ASTNode *node = + ast_node_new(NODE_TYPE_OUT, (Token) {0}); + return node; +} + +ASTNode *ast_node_type_star(Token tok_star) { + ASTNode *node = + ast_node_new(NODE_TYPE_STAR, tok_star); + return node; +} + + +ASTNode *ast_node_type_void() { + ASTNode *node = + ast_node_new(NODE_TYPE_SIMPLE, (Token) { .type = VOID, .line = 0, .data = {0} }); + return node; +} + + +ASTNode *ast_node_id(Token id) { + ASTNode *node = + ast_node_new(NODE_ID, id); + return node; +} \ No newline at end of file diff --git a/src/main.c b/src/main.c index ca9c220..65f21b8 100644 --- a/src/main.c +++ b/src/main.c @@ -1,11 +1,25 @@ #include "globals.h" #include "lex.h" #include "util.h" +#include "parse.h" -int main() { +#define SCAN 0 +#define PARSE 1 + +int main(int argc, char **argv) { Lexer *lexer = lexer_new(); + + FILE * f; - FILE *f = fopen("test.cval", "r"); + if (argc > 2) { + printf("Usage: %s [source_file]\n", argv[0]); + return 1; + } + else if (argc == 2) { + f = fopen(argv[1], "r"); + } else { + f = fopen("test.cval", "r"); + } if (f == NULL) { perror("Failed to open file"); @@ -14,14 +28,32 @@ int main() { lexer_set_source(lexer, f); + #if SCAN Token tok; do { tok = lexer_next_token(lexer); print_token(tok); + printf("\n"); } while(tok.type != EOF_TOKEN && tok.type != ERROR); + #endif + + #if PARSE + Parser *parser = parser_new(lexer); + + ASTNode *ast_root = parser_parse_program(parser); + + if (parser->flag_error) { + printf("Parsing failed due to errors.\n"); + } else { + printf("Parsing succeeded.\n"); + ast_node_print(ast_root, 0); + } + #endif + + fclose(f); return 0; -} \ No newline at end of file +} diff --git a/src/parse.c b/src/parse.c index 726a158..02cc25e 100644 --- a/src/parse.c +++ b/src/parse.c @@ -2,6 +2,10 @@ #include +static void parser_next(Parser *parser); + +static bool parser_expect(Parser *parser, TokenType type); + Parser *parser_new(Lexer *lexer) { Parser *parser = malloc(sizeof(Parser)); if (parser == NULL) { @@ -25,14 +29,163 @@ static void parser_next(Parser *parser) { parser->peek = lexer_next_token(parser->lexer); } -static void parser_expect(Parser *parser, TokenType type) { +static bool parser_expect(Parser *parser, TokenType type) { if (parser->current.type == type) { parser_next(parser); + return true; } else { parser->flag_error = 1; + return false; } } /* IMPL. PARSER PARSE FUNCTIONS -*/ \ No newline at end of file +*/ + +ASTNode *parser_parse_program(Parser *parser) { + ASTNode *root = ast_node_program(); + + while (parser->current.type == VAL) { + ASTNode *defn_node = parser_parse_defn(parser); + + if (defn_node == NULL) { + ast_node_free(root); + parser->flag_error = 1; + return NULL; + } + ast_node_add_child(root, defn_node); + printf(":%zu\n", root->capacity); + } + + return root; +} + +ASTNode *parser_parse_defn(Parser *parser) { + Token val_tok = parser->current; + if (!parser_expect(parser, VAL)) {// must start with VAL + return NULL; + } + + ASTNode *type_node = parser_parse_type(parser); + if (type_node == NULL) { + return NULL; + } + printf("%d", type_node->kind); + printf("rogally%d\n", parser->current.type); + if (parser->current.type != ID) { + parser->flag_error = 1; + return NULL; + } + Token id_tok = parser->current; + ASTNode *id_node = ast_node_id(id_tok); + parser_next(parser); + + printf("%d", id_node->kind); + + ASTNode *expr_node = NULL; + printf("rogally%d\n", parser->current.type); + if (parser->current.type != SEMI) { + perror("Parsing expression in definition not implemented yet.\n"); + //expr_node = parser_parse_expr(parser); + return NULL; + if (expr_node == NULL) { + return NULL; + } + } + printf("rogally\n"); + if (!parser_expect(parser, SEMI)) { + return NULL; + } + printf("rogally\n"); + + ASTNode *defn_node = ast_node_defn(val_tok, type_node, id_node, expr_node); + return defn_node; +} + +ASTNode *parser_parse_type(Parser *parser) { + ASTNode *type_node = NULL; + if (parser->current.type == ID) { + type_node = parser_parse_type_simple(parser); + } else if (parser->current.type == LBRACK) { + type_node = parser_parse_type_complex(parser); + } else { + parser->flag_error = 1; + return NULL; + } + return type_node; +} + +ASTNode *parser_parse_type_simple(Parser *parser) { + Token token = parser->current; + if (!parser_expect(parser, ID)) { + return NULL; + } + ASTNode *type_node = ast_node_type_simple(token); + while (parser->current.type == STAR) { + Token star_tok = parser->current; + parser_next(parser); + + ASTNode *type_star = ast_node_type_star(star_tok); + ast_node_add_child(type_node, type_star); + } + + return type_node; +} + +ASTNode *parser_parse_type_complex(Parser *parser) { + Token tok = parser->current; + parser_next(parser); + + ASTNode *types[256]; + ASTNode *type_ret = NULL; + size_t cnt = 0; + + ASTNode *ret = NULL; + + while (parser->current.type != RBRACK && parser->current.type != ARROW) { + ASTNode *type = parser_parse_type(parser); + if (type == NULL) { + return NULL; + } + + types[cnt++] = type; + } + + if (parser->current.type == ARROW) { + + parser_next(parser); + + if (parser->current.type != RBRACK) { + type_ret = parser_parse_type(parser); + if (type_ret == NULL) return NULL; + } else { + type_ret = ast_node_type_void(parser); + } + + ASTNode * type_param = ast_node_type_param(); + for (size_t i = 0; i < cnt; i++) { + ast_node_add_child(type_param, types[i]); + } + + ASTNode *type_out = ast_node_type_out(); + ast_node_add_child(type_out, type_ret); + ret = ast_node_type_complex(tok, type_param, type_out); + } else if (parser->current.type == RBRACK) { + + if (cnt >= 2) { + parser->flag_error = 1;// too many args + return NULL; + } + if (cnt == 1) { + ret = types[0]; + } else { + ret = ast_node_type_void(parser); + } + } else { + parser->flag_error = 1; + return NULL; + } + parser_next(parser); + return ret; +} \ No newline at end of file diff --git a/src/util.c b/src/util.c index 5c6f902..8bde049 100644 --- a/src/util.c +++ b/src/util.c @@ -103,5 +103,4 @@ void print_token(Token tok) { if (tok.type == ID || tok.type == NUM || tok.type == STRING_LITERAL) { printf("Data: %s", tok.data.string); } - printf("\n"); } \ No newline at end of file diff --git a/test2.cval b/test2.cval new file mode 100644 index 0000000..3244143 --- /dev/null +++ b/test2.cval @@ -0,0 +1,2 @@ +val [->int] main; +val int n; \ No newline at end of file