Compare commits

...

3 Commits

Author SHA1 Message Date
53e9cb2577 fix ast_util print for newly added node kind 2025-11-25 23:35:10 +09:00
9c94663045 minor implementation
basic expr parsing (not complex like compound, lambda)
2025-11-25 23:29:57 +09:00
d8c0b2a762 minor implementation for parser
(simple type and defn)
2025-11-23 22:07:04 +09:00
9 changed files with 698 additions and 13 deletions

View File

@@ -22,7 +22,7 @@
## Syntax Spec ## Syntax Spec
```spec ```c
program := defn* program := defn*
defn := VAL type ID ; defn := VAL type ID ;
@@ -55,3 +55,56 @@ lambda := param_list compound
compound := LCURLY (stmt)* expr? RCURLY compound := LCURLY (stmt)* expr? RCURLY
``` ```
### AST Spec
```c
NODE_PROGRAM:
token: PROGRAM
children: NODE_DEFN*
NODE_DEFN:
token: VAL
children: NODE_TYPE, TOKEN_ID, (NODE_EXPR)?
children_count: 2 | 3
NODE_TYPE_SIMPLE:
token: ID
children: NODE_ID NODE_TYPE_STAR*
children_count: 1+
NODE_TYPE_COMPLEX:
token: COMPLEX_TYPE
children: NODE_TYPE_PARAM NODE_TYPE_OUT
NODE_TYPE_PARAM
token: TYPE_PARAM
children: (NODE_TYPE | NODE_TYPE_COMPLEX)*
children_count: 0+
NODE_TYPE_OUT
token: TYPE_OUT
children: (NODE_TYPE | NODE_TYPE_COMPLEX)?
NODE_EXPR:
token: EXPR
children: (atom)+
// atom definition
NODE_NUM:
token: NUM
NODE_STR:
token: STR
NODE_LAMBDA:
token: LAMBDA
children: NODE_PARAM_LIST NODE_COMPOUND
NODE_COMPOUND:
token: COMPOUND
children: (NODE_STMT)* (NODE_EXPR)?
children_count: 0+
NODE_PARAM_LIST:
token: PARAM_LIST
children: NODE_PARAM*
NODE_PARAM:
token: PARAM
children: NODE_TYPE, TOKEN_ID
```

61
include/ast_util.h Normal file
View File

@@ -0,0 +1,61 @@
#pragma once
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include "globals.h"
void ast_node_add_child(ASTNode *parent, ASTNode *child);
void ast_node_free(ASTNode *node);
void ast_node_print(ASTNode *node, int depth);
/*
NODE SPECIFIC FUNCTIONS
*/
ASTNode *ast_node_program();
ASTNode *ast_node_defn(Token tok_val, ASTNode *type, ASTNode *id, ASTNode *expr);
ASTNode *ast_node_type_simple(Token tok_id);
ASTNode *ast_node_type_complex(Token tok_bracket, ASTNode *type_param, ASTNode *type_out);
ASTNode *ast_node_type_param();
ASTNode *ast_node_type_out();
ASTNode *ast_node_type_star(Token tok_star);
ASTNode *ast_node_type_void();
ASTNode *ast_node_id(Token id);
ASTNode *ast_node_num(Token num);
ASTNode *ast_node_str(Token str);
ASTNode *ast_node_star(Token star);
ASTNode *ast_node_andref(Token andref);
ASTNode *ast_node_expr(Token caller);
ASTNode *ast_node_param_list(Token tok_lparen);
ASTNode *ast_node_param(Token id, ASTNode *type);
ASTNode *ast_node_compound(Token tok_lcurly);
ASTNode *ast_node_lambda(ASTNode *param_list, ASTNode *body);
ASTNode *ast_node_stmt_return(Token tok_return, ASTNode *expr);
ASTNode *ast_node_stmt_expr(ASTNode *expr);
ASTNode *ast_node_stmt_if(Token tok_if, ASTNode *condition, ASTNode *then_branch, ASTNode *else_branch);
ASTNode *ast_node_stmt_set(Token tok_dollar, ASTNode *id, ASTNode *expr);

View File

@@ -31,7 +31,13 @@ typedef enum {
ELSE, ELSE,
EOF_TOKEN, EOF_TOKEN,
ERROR ERROR,
// for parser use
PARSER_USE,
VOID,
} TokenType; } TokenType;
typedef struct { typedef struct {
@@ -48,8 +54,43 @@ typedef struct {
/** /**
* AST Node Definitions * AST Node Definitions
*/ */
typedef enum {
NODE_PROGRAM,
NODE_DEFN,
NODE_TYPE_SIMPLE,
NODE_TYPE_COMPLEX,
NODE_TYPE_PARAM,
NODE_TYPE_OUT,
NODE_TYPE_STAR,
NODE_EXPR,
NODE_ID,
NODE_NUM,
NODE_STR,
NODE_STAR,
NODE_ANDREF,
NODE_PARAM_LIST,
NODE_PARAM,
NODE_LAMBDA,
NODE_COMPOUND,
NODE_STMT_RETURN,
NODE_STMT_EXPR,
NODE_STMT_SET,
NODE_STMT_IF,
} NodeKind;
typedef struct ASTNode { typedef struct ASTNode {
NodeKind kind;
Token token; Token token;
struct ASTNode **children; struct ASTNode **children;
size_t child_count; size_t count;
size_t capacity;
} ASTNode; } ASTNode;

View File

@@ -1,8 +1,10 @@
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
#include <stdbool.h>
#include "globals.h" #include "globals.h"
#include "lex.h" #include "lex.h"
#include "ast_util.h"
typedef struct Parser { typedef struct Parser {
Lexer *lexer; Lexer *lexer;
@@ -17,10 +19,6 @@ Parser *parser_new(Lexer *lexer);
void parser_free(Parser *parser); void parser_free(Parser *parser);
static void parser_next(Parser *parser);
static void parser_expect(Parser *parser, TokenType type);
/* /*
PARSER PARSE FUNCTIONS PARSER PARSE FUNCTIONS
*/ */
@@ -30,6 +28,10 @@ ASTNode *parser_parse_defn(Parser *parser);
ASTNode *parser_parse_type(Parser *parser); ASTNode *parser_parse_type(Parser *parser);
ASTNode *parser_parse_type_simple(Parser *parser);
ASTNode *parser_parse_type_complex(Parser *parser);
ASTNode *parser_parse_expr(Parser *parser); ASTNode *parser_parse_expr(Parser *parser);
ASTNode *parser_parse_atom(Parser *parser); ASTNode *parser_parse_atom(Parser *parser);

280
src/ast_util.c Normal file
View File

@@ -0,0 +1,280 @@
#include "ast_util.h"
#include "util.h"
static ASTNode *ast_node_new(NodeKind kind, Token token) {
ASTNode *node = malloc(sizeof(ASTNode));
node->kind = kind;
node->token = token;
node->children = NULL;
node->count = 0;
node->capacity = 0;
return node;
}
void ast_node_add_child(ASTNode *parent, ASTNode *child) {
if (parent->count >= parent->capacity) {
size_t new_capacity = parent->capacity == 0 ? 4 : parent->capacity * 2;
parent->children = realloc(parent->children, new_capacity * sizeof(ASTNode *));
parent->capacity = new_capacity;
}
parent->children[parent->count++] = child;
}
void ast_node_free(ASTNode *node) {
for (size_t i = 0; i < node->count; i++) {
ast_node_free(node->children[i]);
}
free(node->children);
free(node);
}
void ast_node_print(ASTNode *node, int depth) {
for (int i = 0; i < depth; i++) {
printf(" ");
}
printf("NodeKind: ");
switch (node->kind) {
case NODE_PROGRAM:
printf("NODE_PROGRAM");
break;
case NODE_DEFN:
printf("NODE_DEFN");
break;
case NODE_TYPE_SIMPLE:
printf("NODE_TYPE_SIMPLE");
break;
case NODE_TYPE_COMPLEX:
printf("NODE_TYPE_COMPLEX");
break;
case NODE_TYPE_PARAM:
printf("NODE_TYPE_PARAM");
break;
case NODE_TYPE_OUT:
printf("NODE_TYPE_OUT");
break;
case NODE_TYPE_STAR:
printf("NODE_TYPE_STAR");
break;
case NODE_EXPR:
printf("NODE_EXPR");
break;
case NODE_ID:
printf("NODE_ID");
break;
case NODE_NUM:
printf("NODE_NUM");
break;
case NODE_STR:
printf("NODE_STR");
break;
case NODE_STAR:
printf("NODE_STAR");
break;
case NODE_ANDREF:
printf("NODE_ANDREF");
break;
case NODE_PARAM_LIST:
printf("NODE_PARAM_LIST");
break;
case NODE_PARAM:
printf("NODE_PARAM");
break;
case NODE_LAMBDA:
printf("NODE_LAMBDA");
break;
case NODE_COMPOUND:
printf("NODE_COMPOUND");
break;
case NODE_STMT_RETURN:
printf("NODE_STMT_RETURN");
break;
case NODE_STMT_EXPR:
printf("NODE_STMT_EXPR");
break;
case NODE_STMT_SET:
printf("NODE_STMT_SET");
break;
case NODE_STMT_IF:
printf("NODE_STMT_IF");
break;
}
printf(", Token: ");
print_token(node->token);
printf("\n");
for (size_t i = 0; i < node->count; i++) {
ast_node_print(node->children[i], depth + 1);
}
}
/*
NODE SPECIFIC FUNCTIONS
*/
ASTNode *ast_node_program() {
Token tok = (Token) {
.type = EOF_TOKEN,
.line = 0,
.data = {0}};
ASTNode *node =
ast_node_new(NODE_PROGRAM, tok);
return node;
}
ASTNode *ast_node_defn(
Token tok_val /*VAL token*/, ASTNode *type, ASTNode *id, ASTNode *expr) {
ASTNode *node =
ast_node_new(NODE_DEFN, tok_val);
ast_node_add_child(node, type);
ast_node_add_child(node, id);
if (expr != NULL) {
ast_node_add_child(node, expr);
}
return node;
}
ASTNode *ast_node_type_simple(Token tok_id) {
ASTNode *node =
ast_node_new(NODE_TYPE_SIMPLE, tok_id);
return node;
}
ASTNode *ast_node_type_complex(
Token tok_bracket, ASTNode *type_param, ASTNode *type_out) {
ASTNode *node = ast_node_new(NODE_TYPE_COMPLEX, tok_bracket);
type_param->token = tok_bracket;
type_out->token = tok_bracket;
ast_node_add_child(node, type_param);
ast_node_add_child(node, type_out);
return node;
}
ASTNode *ast_node_type_param() {
ASTNode *node =
ast_node_new(NODE_TYPE_PARAM, (Token) {0});
return node;
}
ASTNode *ast_node_type_out() {
ASTNode *node =
ast_node_new(NODE_TYPE_OUT, (Token) {0});
return node;
}
ASTNode *ast_node_type_star(Token tok_star) {
ASTNode *node =
ast_node_new(NODE_TYPE_STAR, tok_star);
return node;
}
ASTNode *ast_node_type_void() {
ASTNode *node =
ast_node_new(NODE_TYPE_SIMPLE, (Token) {.type = VOID, .line = 0, .data = {0}});
return node;
}
ASTNode *ast_node_id(Token id) {
ASTNode *node =
ast_node_new(NODE_ID, id);
return node;
}
ASTNode *ast_node_num(Token num) {
ASTNode *node =
ast_node_new(NODE_NUM, num);
return node;
}
ASTNode *ast_node_str(Token str) {
ASTNode *node =
ast_node_new(NODE_STR, str);
return node;
}
ASTNode *ast_node_star(Token star) {
ASTNode *node =
ast_node_new(NODE_STAR, star);
return node;
}
ASTNode *ast_node_andref(Token andref) {
ASTNode *node =
ast_node_new(NODE_ANDREF, andref);
return node;
}
ASTNode *ast_node_expr(Token caller) {
ASTNode *node =
ast_node_new(NODE_EXPR, caller);
return node;
}
ASTNode *ast_node_param_list(Token tok_lparen) {
ASTNode *node =
ast_node_new(NODE_PARAM_LIST, tok_lparen);
return node;
}
ASTNode *ast_node_param(Token id, ASTNode *type) {
ASTNode *node =
ast_node_new(NODE_PARAM, id);
ast_node_add_child(node, type);
return node;
}
ASTNode *ast_node_compound(Token tok_lcurly) {
ASTNode *node =
ast_node_new(NODE_COMPOUND, tok_lcurly);
return node;
}
ASTNode *ast_node_lambda(ASTNode *param_list, ASTNode *body) {
ASTNode *node =
ast_node_new(NODE_LAMBDA, param_list->token);
ast_node_add_child(node, param_list);
ast_node_add_child(node, body);
return node;
}
ASTNode *ast_node_stmt_return(Token tok_return, ASTNode *expr) {
ASTNode *node =
ast_node_new(NODE_STMT_RETURN, tok_return);
if (expr != NULL) {
ast_node_add_child(node, expr);
}
return node;
}
ASTNode *ast_node_stmt_expr(ASTNode *expr) {
ASTNode *node =
ast_node_new(NODE_STMT_EXPR, expr->token);
ast_node_add_child(node, expr);
return node;
}
ASTNode *ast_node_stmt_if(
Token tok_if,
ASTNode *cond, /* compound */
ASTNode *then_branch, /* compound */
ASTNode *else_branch /* compound */) {
ASTNode *node =
ast_node_new(NODE_STMT_IF, tok_if);
ast_node_add_child(node, cond);
ast_node_add_child(node, then_branch);
if (else_branch != NULL) {
ast_node_add_child(node, else_branch);
}
return node;
}
ASTNode *ast_node_stmt_set(Token dollar, ASTNode *id, ASTNode *expr) {
ASTNode *node =
ast_node_new(NODE_STMT_SET, dollar);
ast_node_add_child(node, id);
ast_node_add_child(node, expr);
return node;
}

View File

@@ -1,11 +1,25 @@
#include "globals.h" #include "globals.h"
#include "lex.h" #include "lex.h"
#include "util.h" #include "util.h"
#include "parse.h"
int main() { #define SCAN 0
#define PARSE 1
int main(int argc, char **argv) {
Lexer *lexer = lexer_new(); Lexer *lexer = lexer_new();
FILE * f;
FILE *f = fopen("test.cval", "r"); if (argc > 2) {
printf("Usage: %s [source_file]\n", argv[0]);
return 1;
}
else if (argc == 2) {
f = fopen(argv[1], "r");
} else {
f = fopen("test.cval", "r");
}
if (f == NULL) { if (f == NULL) {
perror("Failed to open file"); perror("Failed to open file");
@@ -14,14 +28,32 @@ int main() {
lexer_set_source(lexer, f); lexer_set_source(lexer, f);
#if SCAN
Token tok; Token tok;
do { do {
tok = lexer_next_token(lexer); tok = lexer_next_token(lexer);
print_token(tok); print_token(tok);
printf("\n");
} while(tok.type != EOF_TOKEN && tok.type != ERROR); } while(tok.type != EOF_TOKEN && tok.type != ERROR);
#endif
#if PARSE
Parser *parser = parser_new(lexer);
ASTNode *ast_root = parser_parse_program(parser);
if (parser->flag_error) {
printf("Parsing failed due to errors.\n");
} else {
printf("Parsing succeeded.\n");
ast_node_print(ast_root, 0);
}
#endif
fclose(f); fclose(f);
return 0; return 0;
} }

View File

@@ -2,6 +2,10 @@
#include <stdlib.h> #include <stdlib.h>
static void parser_next(Parser *parser);
static bool parser_expect(Parser *parser, TokenType type);
Parser *parser_new(Lexer *lexer) { Parser *parser_new(Lexer *lexer) {
Parser *parser = malloc(sizeof(Parser)); Parser *parser = malloc(sizeof(Parser));
if (parser == NULL) { if (parser == NULL) {
@@ -25,14 +29,218 @@ static void parser_next(Parser *parser) {
parser->peek = lexer_next_token(parser->lexer); parser->peek = lexer_next_token(parser->lexer);
} }
static void parser_expect(Parser *parser, TokenType type) { static bool parser_expect(Parser *parser, TokenType type) {
if (parser->current.type == type) { if (parser->current.type == type) {
parser_next(parser); parser_next(parser);
return true;
} else { } else {
parser->flag_error = 1; parser->flag_error = 1;
return false;
} }
} }
/* /*
IMPL. PARSER PARSE FUNCTIONS IMPL. PARSER PARSE FUNCTIONS
*/ */
ASTNode *parser_parse_program(Parser *parser) {
ASTNode *root = ast_node_program();
while (parser->current.type == VAL) {
ASTNode *defn_node = parser_parse_defn(parser);
if (defn_node == NULL) {
ast_node_free(root);
parser->flag_error = 1;
return NULL;
}
ast_node_add_child(root, defn_node);
}
return root;
}
ASTNode *parser_parse_defn(Parser *parser) {
Token val_tok = parser->current;
if (!parser_expect(parser, VAL)) {// must start with VAL
return NULL;
}
ASTNode *type_node = parser_parse_type(parser);
if (type_node == NULL) {
return NULL;
}
if (parser->current.type != ID) {
parser->flag_error = 1;
return NULL;
}
Token id_tok = parser->current;
ASTNode *id_node = ast_node_id(id_tok);
parser_next(parser);
ASTNode *expr_node = NULL;
if (parser->current.type != SEMI) {
expr_node = parser_parse_expr(parser);
if (expr_node == NULL) {
parser->flag_error = 1;
return NULL;
}
}
if (!parser_expect(parser, SEMI)) {
return NULL;
}
ASTNode *defn_node = ast_node_defn(val_tok, type_node, id_node, expr_node);
return defn_node;
}
/*
PARSING TYPE
*/
ASTNode *parser_parse_type(Parser *parser) {
ASTNode *type_node = NULL;
if (parser->current.type == ID) {
type_node = parser_parse_type_simple(parser);
} else if (parser->current.type == LBRACK) {
type_node = parser_parse_type_complex(parser);
} else {
parser->flag_error = 1;
return NULL;
}
return type_node;
}
ASTNode *parser_parse_type_simple(Parser *parser) {
Token token = parser->current;
if (!parser_expect(parser, ID)) {
return NULL;
}
ASTNode *type_node = ast_node_type_simple(token);
while (parser->current.type == STAR) {
Token star_tok = parser->current;
parser_next(parser);
ASTNode *type_star = ast_node_type_star(star_tok);
ast_node_add_child(type_node, type_star);
}
return type_node;
}
ASTNode *parser_parse_type_complex(Parser *parser) {
Token tok = parser->current;
parser_next(parser);
ASTNode *types[256];
ASTNode *type_ret = NULL;
size_t cnt = 0;
ASTNode *ret = NULL;
while (parser->current.type != RBRACK && parser->current.type != ARROW) {
ASTNode *type = parser_parse_type(parser);
if (type == NULL) {
return NULL;
}
types[cnt++] = type;
}
if (parser->current.type == ARROW) {
parser_next(parser);
if (parser->current.type != RBRACK) {
type_ret = parser_parse_type(parser);
if (type_ret == NULL) return NULL;
} else {
type_ret = ast_node_type_void(parser);
}
ASTNode *type_param = ast_node_type_param();
for (size_t i = 0; i < cnt; i++) {
ast_node_add_child(type_param, types[i]);
}
ASTNode *type_out = ast_node_type_out();
ast_node_add_child(type_out, type_ret);
ret = ast_node_type_complex(tok, type_param, type_out);
} else if (parser->current.type == RBRACK) {
if (cnt >= 2) {
parser->flag_error = 1;// too many args
return NULL;
}
if (cnt == 1) {
ret = types[0];
} else {
ret = ast_node_type_void(parser);
}
} else {
parser->flag_error = 1;
return NULL;
}
parser_next(parser);
return ret;
}
/*
PARSING EXPR
*/
ASTNode *parser_parse_expr(Parser *parser) {
ASTNode *atoms[256];
size_t cnt = 1;
ASTNode *atom_head = parser_parse_atom(parser);
if (atom_head == NULL) {
parser->flag_error = 1;
return NULL;
}
atoms[0] = atom_head;
ASTNode *atom;
while (true) {
atom = parser_parse_atom(parser);
if (atom == NULL) {
break;
}
atoms[cnt] = atom;
cnt++;
}
ASTNode *node = ast_node_expr(atom_head->token);
for (size_t i = 0; i < cnt; i++) {
ast_node_add_child(node, atoms[i]);
}
return node;
}
ASTNode *parser_parse_atom(Parser *parser) {
if (parser->current.type == NUM) {
Token num_tok = parser->current;
parser_next(parser);
return ast_node_num(num_tok);
} else if (parser->current.type == ID) {
Token id_tok = parser->current;
parser_next(parser);
return ast_node_id(id_tok);
} else if (parser->current.type == STRING_LITERAL) {
Token str_tok = parser->current;
parser_next(parser);
return ast_node_str(str_tok);
} else if (parser->current.type == STAR) {
Token star_tok = parser->current;
parser_next(parser);
return ast_node_star(star_tok);
} else if (parser->current.type == ANDREF) {
Token andref_tok = parser->current;
parser_next(parser);
return ast_node_andref(andref_tok);
}
else {
return NULL;
}
}

View File

@@ -103,5 +103,4 @@ void print_token(Token tok) {
if (tok.type == ID || tok.type == NUM || tok.type == STRING_LITERAL) { if (tok.type == ID || tok.type == NUM || tok.type == STRING_LITERAL) {
printf("Data: %s", tok.data.string); printf("Data: %s", tok.data.string);
} }
printf("\n");
} }

9
test2.cval Normal file
View File

@@ -0,0 +1,9 @@
val [->int] main;
val char* line;
val size_t line get_line_ptr stdin line;
val char first *line;
val [int [int->size_t]->size_t] functor;