#include #include #include #include #include #include "oberon.h" #include "generator.h" enum { EOF_ = 0, IDENT, MODULE, SEMICOLON, END, DOT, VAR, COLON, BEGIN, ASSIGN, INTEGER, TRUE, FALSE, LPAREN, RPAREN, EQUAL, NEQ, LESS, LEQ, GREAT, GEQ, PLUS, MINUS, OR, STAR, SLASH, DIV, MOD, AND, NOT, PROCEDURE }; // ======================================================================= // UTILS // ======================================================================= void oberon_error(oberon_context_t * ctx, const char * fmt, ...) { va_list ptr; va_start(ptr, fmt); fprintf(stderr, "error: "); vfprintf(stderr, fmt, ptr); fprintf(stderr, "\n"); fprintf(stderr, " code_index = %i\n", ctx -> code_index); fprintf(stderr, " c = %c\n", ctx -> c); fprintf(stderr, " token = %i\n", ctx -> token); va_end(ptr); exit(1); } // ======================================================================= // TABLE // ======================================================================= static oberon_type_t * oberon_find_type(oberon_context_t * ctx, char * name) { oberon_type_t * x = ctx -> types; while(x -> next && strcmp(x -> next -> name, name) != 0) { x = x -> next; } return x -> next; } static oberon_var_t * oberon_find_var(oberon_context_t * ctx, char * name) { oberon_var_t * x = ctx -> mod -> vars; while(x -> next && strcmp(x -> next -> name, name) != 0) { x = x -> next; } return x -> next; } static void oberon_define_var(oberon_context_t * ctx, char * name, oberon_type_t * type) { oberon_var_t * x = ctx -> mod -> vars; while(x -> next && strcmp(x -> next -> name, name) != 0) { x = x -> next; } if(x -> next) { oberon_error(ctx, "already defined"); } oberon_var_t * newvar = malloc(sizeof *newvar); memset(newvar, 0, sizeof *newvar); newvar -> name = name; newvar -> type = type; oberon_generator_init_var(ctx, newvar); x -> next = newvar; } // ======================================================================= // SCANER // ======================================================================= static void oberon_get_char(oberon_context_t * ctx) { ctx -> code_index += 1; ctx -> c = ctx -> code[ctx -> code_index]; } static void oberon_init_scaner(oberon_context_t * ctx, const char * code) { ctx -> code = code; ctx -> code_index = 0; ctx -> c = ctx -> code[ctx -> code_index]; } static void oberon_read_ident(oberon_context_t * ctx) { int len = 0; int i = ctx -> code_index; int c = ctx -> code[i]; while(isalnum(c)) { i += 1; len += 1; c = ctx -> code[i]; } char * ident = malloc(len + 1); memcpy(ident, &ctx->code[ctx->code_index], len); ident[len] = 0; ctx -> code_index = i; ctx -> c = ctx -> code[i]; ctx -> string = ident; ctx -> token = IDENT; if(strcmp(ident, "MODULE") == 0) { ctx -> token = MODULE; } else if(strcmp(ident, "END") == 0) { ctx -> token = END; } else if(strcmp(ident, "VAR") == 0) { ctx -> token = VAR; } else if(strcmp(ident, "BEGIN") == 0) { ctx -> token = BEGIN; } else if(strcmp(ident, "TRUE") == 0) { ctx -> token = TRUE; } else if(strcmp(ident, "FALSE") == 0) { ctx -> token = FALSE; } else if(strcmp(ident, "OR") == 0) { ctx -> token = OR; } else if(strcmp(ident, "DIV") == 0) { ctx -> token = DIV; } else if(strcmp(ident, "MOD") == 0) { ctx -> token = MOD; } else if(strcmp(ident, "PROCEDURE") == 0) { ctx -> token = PROCEDURE; } } static void oberon_read_integer(oberon_context_t * ctx) { int len = 0; int i = ctx -> code_index; int c = ctx -> code[i]; while(isdigit(c)) { i += 1; len += 1; c = ctx -> code[i]; } char * ident = malloc(len + 2); memcpy(ident, &ctx->code[ctx->code_index], len); ident[len + 1] = 0; ctx -> code_index = i; ctx -> c = ctx -> code[i]; ctx -> string = ident; ctx -> integer = atoi(ident); ctx -> token = INTEGER; } static void oberon_skip_space(oberon_context_t * ctx) { while(isspace(ctx -> c)) { oberon_get_char(ctx); } } static void oberon_read_symbol(oberon_context_t * ctx) { int c = ctx -> c; switch(c) { case 0: ctx -> token = EOF_; break; case ';': ctx -> token = SEMICOLON; oberon_get_char(ctx); break; case ':': ctx -> token = COLON; oberon_get_char(ctx); if(ctx -> c == '=') { ctx -> token = ASSIGN; oberon_get_char(ctx); } break; case '.': ctx -> token = DOT; oberon_get_char(ctx); break; case '(': ctx -> token = LPAREN; oberon_get_char(ctx); break; case ')': ctx -> token = RPAREN; oberon_get_char(ctx); break; case '=': ctx -> token = EQUAL; oberon_get_char(ctx); break; case '#': ctx -> token = NEQ; oberon_get_char(ctx); break; case '<': ctx -> token = LESS; oberon_get_char(ctx); if(ctx -> c == '=') { ctx -> token = LEQ; oberon_get_char(ctx); } break; case '>': ctx -> token = GREAT; oberon_get_char(ctx); if(ctx -> c == '=') { ctx -> token = GEQ; oberon_get_char(ctx); } break; case '+': ctx -> token = PLUS; oberon_get_char(ctx); break; case '-': ctx -> token = MINUS; oberon_get_char(ctx); break; case '*': ctx -> token = STAR; oberon_get_char(ctx); break; case '/': ctx -> token = SLASH; oberon_get_char(ctx); break; case '&': ctx -> token = AND; oberon_get_char(ctx); break; case '~': ctx -> token = NOT; oberon_get_char(ctx); break; default: oberon_error(ctx, "invalid char"); break; } } static void oberon_read_token(oberon_context_t * ctx) { oberon_skip_space(ctx); int c = ctx -> c; if(isalpha(c)) { oberon_read_ident(ctx); } else if(isdigit(c)) { oberon_read_integer(ctx); } else { oberon_read_symbol(ctx); } } // ======================================================================= // EXPRESSION // ======================================================================= static void oberon_expect_token(oberon_context_t * ctx, int token); static oberon_expr_t * oberon_expr(oberon_context_t * ctx); static void oberon_assert_token(oberon_context_t * ctx, int token); static char * oberon_assert_ident(oberon_context_t * ctx); static oberon_expr_t * oberon_new_operator(int op, oberon_type_t * result, oberon_expr_t * left, oberon_expr_t * right) { oberon_oper_t * operator; operator = malloc(sizeof *operator); memset(operator, 0, sizeof *operator); operator -> is_item = 0; operator -> result = result; operator -> op = op; operator -> left = left; operator -> right = right; return (oberon_expr_t *) operator; } static oberon_expr_t * oberon_new_item(int mode, oberon_type_t * result) { oberon_item_t * item; item = malloc(sizeof *item); memset(item, 0, sizeof *item); item -> is_item = 1; item -> result = result; item -> mode = mode; return (oberon_expr_t *)item; } static oberon_expr_t * oberon_make_unary_op(oberon_context_t * ctx, int token, oberon_expr_t * a) { oberon_expr_t * expr; oberon_type_t * result; result = a -> result; if(token == MINUS) { if(result -> class != OBERON_TYPE_INTEGER) { oberon_error(ctx, "incompatible operator type"); } expr = oberon_new_operator(OP_UNARY_MINUS, result, a, NULL); } else if(token == NOT) { if(result -> class != OBERON_TYPE_BOOLEAN) { oberon_error(ctx, "incompatible operator type"); } expr = oberon_new_operator(OP_LOGIC_NOT, result, a, NULL); } else { oberon_error(ctx, "oberon_make_unary_op: wat"); } return expr; } static oberon_expr_t * oberon_factor(oberon_context_t * ctx) { char * name; oberon_var_t * var; oberon_expr_t * expr; switch(ctx -> token) { case IDENT: name = oberon_assert_ident(ctx); var = oberon_find_var(ctx, name); if(var == NULL) { oberon_error(ctx, "undefined variable %s", name); } expr = oberon_new_item(MODE_VAR, var -> type); expr -> item.var = var; break; case INTEGER: expr = oberon_new_item(MODE_INTEGER, ctx -> int_type); expr -> item.integer = ctx -> integer; oberon_assert_token(ctx, INTEGER); break; case TRUE: expr = oberon_new_item(MODE_BOOLEAN, ctx -> bool_type); expr -> item.boolean = 1; oberon_assert_token(ctx, TRUE); break; case FALSE: expr = oberon_new_item(MODE_BOOLEAN, ctx -> bool_type); expr -> item.boolean = 0; oberon_assert_token(ctx, FALSE); break; case LPAREN: oberon_assert_token(ctx, LPAREN); expr = oberon_expr(ctx); oberon_assert_token(ctx, RPAREN); break; case NOT: oberon_assert_token(ctx, NOT); expr = oberon_factor(ctx); expr = oberon_make_unary_op(ctx, NOT, expr); break; default: oberon_error(ctx, "invalid expression"); } return expr; } /* * oberon_autocast_binary_op автоматически переобразовывеат тип по след. правилам: * 1. Классы обоих типов должны быть одинаковы * 2. В качестве результата должен быть выбран больший тип. * 3. Если размер результат не должен быть меньше чем базовый int */ static void oberon_autocast_binary_op(oberon_context_t * ctx, oberon_type_t * a, oberon_type_t * b, oberon_type_t ** result) { if((a -> class) != (b -> class)) { oberon_error(ctx, "incompatible types"); } if((a -> size) > (b -> size)) { *result = a; } else { *result = b; } if(((*result) -> class) == OBERON_TYPE_INTEGER) { if(((*result) -> size) < (ctx -> int_type -> size)) { *result = ctx -> int_type; } } /* TODO: cast types */ } #define ITMAKESBOOLEAN(x) \ (((x) >= EQUAL && (x) <= GEQ) || ((x) == OR) || ((x) == AND)) #define ITUSEONLYINTEGER(x) \ ((x) >= LESS && (x) <= GEQ) #define ITUSEONLYBOOLEAN(x) \ (((x) == OR) || ((x) == AND)) static oberon_expr_t * oberon_make_bin_op(oberon_context_t * ctx, int token, oberon_expr_t * a, oberon_expr_t * b) { oberon_expr_t * expr; oberon_type_t * result; oberon_autocast_binary_op(ctx, a -> result, b -> result, &result); if(ITMAKESBOOLEAN(token)) { if(ITUSEONLYINTEGER(token)) { if(a -> result -> class != OBERON_TYPE_INTEGER && b -> result -> class != OBERON_TYPE_INTEGER) { oberon_error(ctx, "used only with integer types"); } } else if(ITUSEONLYBOOLEAN(token)) { if(a -> result -> class != OBERON_TYPE_BOOLEAN && b -> result -> class != OBERON_TYPE_BOOLEAN) { oberon_error(ctx, "used only with boolean type"); } } if(token == EQUAL) { expr = oberon_new_operator(OP_EQ, result, a, b); } else if(token == NEQ) { expr = oberon_new_operator(OP_NEQ, result, a, b); } else if(token == LESS) { expr = oberon_new_operator(OP_LSS, result, a, b); } else if(token == LEQ) { expr = oberon_new_operator(OP_LEQ, result, a, b); } else if(token == GREAT) { expr = oberon_new_operator(OP_GRT, result, a, b); } else if(token == GEQ) { expr = oberon_new_operator(OP_GEQ, result, a, b); } else if(token == OR) { expr = oberon_new_operator(OP_LOGIC_OR, result, a, b); } else if(token == AND) { expr = oberon_new_operator(OP_LOGIC_AND, result, a, b); } else { oberon_error(ctx, "oberon_make_bin_op: bool wat"); } } else { if(token == PLUS) { expr = oberon_new_operator(OP_ADD, result, a, b); } else if(token == MINUS) { expr = oberon_new_operator(OP_SUB, result, a, b); } else if(token == STAR) { expr = oberon_new_operator(OP_MUL, result, a, b); } else if(token == SLASH) { expr = oberon_new_operator(OP_DIV, result, a, b); } else if(token == DIV) { expr = oberon_new_operator(OP_DIV, result, a, b); } else if(token == MOD) { expr = oberon_new_operator(OP_MOD, result, a, b); } else { oberon_error(ctx, "oberon_make_bin_op: bin wat"); } } return expr; } #define ISMULOP(x) \ ((x) >= STAR && (x) <= AND) static oberon_expr_t * oberon_term_expr(oberon_context_t * ctx) { oberon_expr_t * expr; expr = oberon_factor(ctx); while(ISMULOP(ctx -> token)) { int token = ctx -> token; oberon_read_token(ctx); oberon_expr_t * inter = oberon_factor(ctx); expr = oberon_make_bin_op(ctx, token, expr, inter); } return expr; } #define ISADDOP(x) \ ((x) >= PLUS && (x) <= OR) static oberon_expr_t * oberon_simple_expr(oberon_context_t * ctx) { oberon_expr_t * expr; int minus = 0; if(ctx -> token == PLUS) { minus = 0; oberon_assert_token(ctx, PLUS); } else if(ctx -> token == MINUS) { minus = 1; oberon_assert_token(ctx, MINUS); } expr = oberon_term_expr(ctx); while(ISADDOP(ctx -> token)) { int token = ctx -> token; oberon_read_token(ctx); oberon_expr_t * inter = oberon_term_expr(ctx); expr = oberon_make_bin_op(ctx, token, expr, inter); } if(minus) { expr = oberon_make_unary_op(ctx, MINUS, expr); } return expr; } #define ISRELATION(x) \ ((x) >= EQUAL && (x) <= GEQ) static oberon_expr_t * oberon_expr(oberon_context_t * ctx) { oberon_expr_t * expr; expr = oberon_simple_expr(ctx); while(ISRELATION(ctx -> token)) { int token = ctx -> token; oberon_read_token(ctx); oberon_expr_t * inter = oberon_simple_expr(ctx); expr = oberon_make_bin_op(ctx, token, expr, inter); } return expr; } // ======================================================================= // PARSER // ======================================================================= static void oberon_statement_seq(oberon_context_t * ctx); static void oberon_expect_token(oberon_context_t * ctx, int token) { if(ctx -> token != token) { oberon_error(ctx, "unexpected token %i (%i)", ctx -> token, token); } } static void oberon_assert_token(oberon_context_t * ctx, int token) { oberon_expect_token(ctx, token); oberon_read_token(ctx); } static char * oberon_assert_ident(oberon_context_t * ctx) { oberon_expect_token(ctx, IDENT); char * ident = ctx -> string; oberon_read_token(ctx); return ident; } static oberon_type_t * oberon_type(oberon_context_t * ctx) { char * name = oberon_assert_ident(ctx); oberon_type_t * type = oberon_find_type(ctx, name); if(type == NULL) { oberon_error(ctx, "undefined type"); } return type; } static void oberon_var_decl(oberon_context_t * ctx) { char * name = oberon_assert_ident(ctx); oberon_assert_token(ctx, COLON); oberon_type_t * type = oberon_type(ctx); oberon_define_var(ctx, name, type); } static void oberon_make_procedure_begin(oberon_context_t * ctx, char * name) { } static void oberon_make_procedure_end(oberon_context_t * ctx) { } static void oberon_proc_decl(oberon_context_t * ctx) { oberon_assert_token(ctx, PROCEDURE); char * name; name = oberon_assert_ident(ctx); oberon_assert_token(ctx, SEMICOLON); oberon_make_procedure_begin(ctx, name); if(ctx -> token == BEGIN) { oberon_assert_token(ctx, BEGIN); oberon_statement_seq(ctx); } oberon_make_procedure_end(ctx); oberon_assert_token(ctx, END); char * name2 = oberon_assert_ident(ctx); if(strcmp(name2, name) != 0) { oberon_error(ctx, "procedure name not matched"); } } static void oberon_decl_seq(oberon_context_t * ctx) { if(ctx -> token == VAR) { oberon_assert_token(ctx, VAR); while(ctx -> token == IDENT) { oberon_var_decl(ctx); oberon_assert_token(ctx, SEMICOLON); } } if(ctx -> token == PROCEDURE) { oberon_proc_decl(ctx); oberon_assert_token(ctx, SEMICOLON); } } static void oberon_assign(oberon_context_t * ctx, oberon_expr_t * src, oberon_expr_t * dst) { if(src -> result -> class != dst -> result -> class) { oberon_error(ctx, "incompatible assignment types"); } if(dst -> result -> class == OBERON_TYPE_INTEGER) { if((dst -> result -> size) < (src -> result -> size)) { oberon_error(ctx, "incompatible assignment type size"); } } oberon_generate_assign(ctx, src, dst); } static void oberon_statement(oberon_context_t * ctx) { oberon_expr_t * item1; oberon_expr_t * item2; if(ctx -> token == IDENT) { item1 = oberon_expr(ctx); oberon_assert_token(ctx, ASSIGN); item2 = oberon_expr(ctx); oberon_assign(ctx, item2, item1); } } static void oberon_statement_seq(oberon_context_t * ctx) { oberon_statement(ctx); while(ctx -> token == SEMICOLON) { oberon_assert_token(ctx, SEMICOLON); oberon_statement(ctx); } } static void oberon_parse_module(oberon_context_t * ctx) { char *name1, *name2; oberon_read_token(ctx); oberon_assert_token(ctx, MODULE); name1 = oberon_assert_ident(ctx); oberon_assert_token(ctx, SEMICOLON); ctx -> mod -> name = name1; oberon_decl_seq(ctx); if(ctx -> token == BEGIN) { oberon_assert_token(ctx, BEGIN); oberon_generate_begin_module(ctx); oberon_statement_seq(ctx); oberon_generate_end_module(ctx); } oberon_assert_token(ctx, END); name2 = oberon_assert_ident(ctx); oberon_assert_token(ctx, DOT); if(strcmp(name1, name2) != 0) { oberon_error(ctx, "module name not matched"); } } // ======================================================================= // LIBRARY // ======================================================================= static oberon_type_t * oberon_register_global_type_ret(oberon_context_t * ctx, oberon_type_t * type) { oberon_type_t * x = ctx -> types; while(x -> next && strcmp(x -> next -> name, type -> name) != 0) { x = x -> next; } if(x -> next) { oberon_error(ctx, "already defined"); } // TODO: copy type name (not a pointer) oberon_type_t * newtype = malloc(sizeof *newtype); memcpy(newtype, type, sizeof *newtype); newtype -> next = NULL; oberon_generator_init_type(ctx, newtype); x -> next = newtype; return newtype; } static void register_default_types(oberon_context_t * ctx) { static oberon_type_t integer = { "INTEGER", OBERON_TYPE_INTEGER, sizeof(int) }; static oberon_type_t boolean = { "BOOLEAN", OBERON_TYPE_BOOLEAN, sizeof(int) }; ctx -> int_type = oberon_register_global_type_ret(ctx, &integer); ctx -> bool_type = oberon_register_global_type_ret(ctx, &boolean); } void oberon_register_global_type(oberon_context_t * ctx, oberon_type_t * type) { oberon_register_global_type_ret(ctx, type); } oberon_context_t * oberon_create_context() { oberon_context_t * ctx = malloc(sizeof *ctx); memset(ctx, 0, sizeof *ctx); oberon_type_t * types = malloc(sizeof *types); memset(types, 0, sizeof *types); ctx -> types = types; oberon_generator_init_context(ctx); register_default_types(ctx); return ctx; } void oberon_destroy_context(oberon_context_t * ctx) { oberon_generator_destroy_context(ctx); free(ctx); } oberon_module_t * oberon_compile_module(oberon_context_t * ctx, const char * code) { oberon_module_t * mod = malloc(sizeof *mod); memset(mod, 0, sizeof *mod); oberon_var_t * vars = malloc(sizeof *vars); memset(vars, 0, sizeof *vars); ctx -> mod = mod; ctx -> mod -> vars = vars; oberon_init_scaner(ctx, code); oberon_parse_module(ctx); oberon_generate_code(ctx); return mod; }