#include #include #include #include #include "tmpasm.h" #include "debug.h" #define is_space(c) (((c) == ' ') || ((c) == '\t')) #define is_sep(c) (((c) == '\n') || ((c) == '\r') || ((c) == ';')) #define is_addr(c) ( (((c) >= '0') && ((c) <= '9')) || (((c) >= 'a') && ((c) <= 'z')) || (((c) >= 'A') && ((c) <= 'Z')) || ((c) == '_') || ((c) == '?') || ((c) == '.') || ((c) == ',') || ((c) == ',') || ((c) == '-') || ((c) == '/') || ((c) == '&') ) /* this local copy is to make tmpasm compile independently */ static char *strclone(const char *str) { int l; char *ret; if (str == NULL) return NULL; l = strlen(str)+1; ret = malloc(l); memcpy(ret, str, l); return ret; } #define TOP ctx->st static const char *kw_names[] = {"-", "if", "then", "else", "end", "foreach", "in", "switch", "case", "default", "nop", NULL }; static tmpasm_kw_t kw_lookup(const char *str) { const char **k; tmpasm_kw_t i; /* slow linear search is enough: we have only a few keywords */ for(k = kw_names, i = KW_none; *k != NULL; k++,i++) if (strcmp(*k, str) == 0) return i; return KW_none; } tmpasm_exec_t *code_new(tmpasm_kw_t kw) { tmpasm_exec_t *c; c = calloc(sizeof(tmpasm_exec_t), 1); c->kw = kw; return c; } /*tmpasm_exec_t *code_end(tmpasm_exec_t *start) { while(start->next != NULL) start = start->next; return start; }*/ tmpasm_exec_t *code_append(tmpasm_t *ctx, tmpasm_kw_t kw) { tmpasm_exec_t *c; /* c = code_end(TOP->code);*/ c = TOP->last_code; if (TOP->last_code->kw != KW_NOP) { c->next = code_new(kw); return c->next; } c->kw = kw; return c; } static void error(tmpasm_t *ctx, char c, char *msg) { fprintf(stderr, "error: %s at %d:%d\n", msg, ctx->line, ctx->col); if (c != 0) fprintf(stderr, " character last seen: %c\n", c); ctx->dead = 1; } static void push(tmpasm_t *ctx, tmpasm_kw_t kw, tmpasm_state_t st, tmpasm_exec_t *code) { tmpasm_stack_t *new; new = calloc(sizeof(tmpasm_stack_t), 1); new->kw = kw; new->state = st; new->next = ctx->st; new->last_code = code; ctx->st = new; } static void pop_(tmpasm_t *ctx, int chk_underfl) { tmpasm_stack_t *old; old = ctx->st; ctx->st = old->next; /* stack underflow? */ if (chk_underfl) { if (TOP == NULL) { error(ctx, 0, "Excess \"end\""); TOP = old; return; } } if (old->argv != NULL) free(old->argv); if (old->argend != NULL) free(old->argend); if (old->arg_used != NULL) free(old->arg_used); if (old->arg_alloced != NULL) free(old->arg_alloced); free(old); } static void pop(tmpasm_t *ctx) { pop_(ctx, 1); } #define grow(arr, size) arr = realloc(arr, sizeof((arr)[0]) * size) static void arg_new(tmpasm_t *ctx, int is_addr) { if (TOP->args_used >= TOP->args_alloced) { TOP->args_alloced = TOP->args_alloced + 16; grow(TOP->argv, TOP->args_alloced); grow(TOP->argend, TOP->args_alloced); grow(TOP->arg_alloced, TOP->args_alloced); grow(TOP->arg_used, TOP->args_alloced); } TOP->arg_alloced[TOP->args_used] = 64; TOP->arg_used[TOP->args_used] = 0; TOP->argv[TOP->args_used] = malloc(TOP->arg_alloced[TOP->args_used]+sizeof(tmpasm_arg_t)); TOP->argv[TOP->args_used]->is_addr = is_addr; TOP->argv[TOP->args_used]->next = NULL; TOP->argend[TOP->args_used] = TOP->argv[TOP->args_used]; TOP->args_used++; } static void arg_append(tmpasm_t *ctx, char c) { int i = TOP->args_used - 1; if (TOP->arg_used[i] >= TOP->arg_alloced[i]) { tmpasm_arg_t *prev, *last; /* since argend[i] is also in the ->next pointer of the previous item in a block chain, we need to look it up */ for(prev = NULL, last = TOP->argv[i]; last->next != NULL; last = last->next) prev = last; TOP->arg_alloced[i] += 64; last = realloc(last, TOP->arg_alloced[i]+sizeof(tmpasm_arg_t)); if (prev == NULL) TOP->argv[i] = last; else prev->next = last; TOP->argend[i] = last; } TOP->argend[i]->data[TOP->arg_used[i]] = c; TOP->arg_used[i]++; } static void arg_free(tmpasm_arg_t *a) { tmpasm_arg_t *next; if (a == NULL) return; next = a->next; free(a); if (next != NULL) arg_free(next); } static void arg_new_next(tmpasm_t *ctx, int is_addr) { tmpasm_arg_t *a; int id; arg_append(ctx, '\0'); id = TOP->args_used - 1; assert(id>=0); TOP->arg_alloced[id] = 64; TOP->arg_used[id] = 0; a = malloc(TOP->arg_alloced[id]+sizeof(tmpasm_arg_t)); strcpy(a->data, "QWERT"); a->is_addr = is_addr; a->next = NULL; TOP->argend[id]->next = a; TOP->argend[id] = a; } static void arg_remove(tmpasm_t *ctx) { assert(TOP->args_used == 1); TOP->args_used = 0; TOP->argv[0] = NULL; TOP->argend[0] = NULL; TOP->arg_alloced[0] = 0; TOP->arg_used[0] = 0; } static int arg_is_addr(tmpasm_arg_t *a) { return (a->next == NULL) && (a->is_addr); } static void arg_end(tmpasm_t *ctx, int cmd_ctx) { tmpasm_arg_t *a; arg_append(ctx, '\0'); a = TOP->argv[TOP->args_used-1]; if (cmd_ctx) { /* when argument ends in a command context (not in a block inline), we may may need to switch back to command mode; example: after the cond of an "if cond then"*/ switch(TOP->kw) { case KW_IF: TOP->state = ST_PRECMD; break; case KW_FOREACH: if (!arg_is_addr(a)) { error(ctx, 0, "variable of a foreach must be an address"); return; } TOP->last_code->payload.fc_foreach.loop_var = strclone(a->data); arg_free(a); arg_remove(ctx); TOP->state = ST_PRECMD; break; case KW_IN: /* pop will free the argv[] array, but not the elements so "a" is safe to use after this line */ pop(ctx); /* in foreach context, after the IN-data */ TOP->last_code->payload.fc_foreach.data = a; /* we are in the body now, TOP is the foreach context, last_code is body */ TOP->last_code->payload.fc_foreach.code_body = code_new(KW_NOP); push(ctx, KW_none, ST_PRECMD, TOP->last_code->payload.fc_foreach.code_body); break; case KW_CASE: ctx->st->next->last_code->payload.fc_switch.last->data = a; arg_remove(ctx); push(ctx, KW_none, ST_PRECMD, TOP->last_code); break; case KW_SWITCH: TOP->last_code->payload.fc_switch.cond = a; arg_remove(ctx); TOP->state = ST_PRECMD; break; default: TOP->state = ST_PREDATA; } } } /* end of statement; update kw state for a composite control kw; for the rest just call the lib */ static void end_of_statement(tmpasm_t *ctx) { switch(TOP->kw) { case KW_none: case KW_THEN: case KW_ELSE: case KW_CASE: case KW_DEFAULT: TOP->last_code->payload.instr.argc = TOP->args_used; TOP->last_code->payload.instr.argv = TOP->argv; TOP->argv = NULL; free(TOP->argend); TOP->argend = NULL; TOP->args_used = 0; TOP->args_alloced = 0; break; default: /* don't mess with the payload */ ; } TOP->state = ST_PRECMD; } #define loc_update() \ do { \ TOP->last_code->line = TOP->kwline; \ TOP->last_code->col = TOP->kwcol; \ } while(0) static void got_kw(tmpasm_t *ctx, tmpasm_kw_t kw, int terminated) { switch(kw) { case KW_END: /* then-else threads have their own subcontext within the if subcontext; end needs to pop the innermost subcontext before terminating the if context */ if (TOP->kw == KW_IF) { error(ctx, 0, "unexpected \"end\" in \"if\" - expected \"then\""); goto bind_if_cond; } if ((TOP->kw == KW_ELSE) || (TOP->kw == KW_THEN)) pop(ctx); if (TOP->kw == KW_SWITCH) TOP->kw = TOP->old_kw; else { pop(ctx); if ((TOP->kw == KW_CASE) || (TOP->kw == KW_DEFAULT)) pop(ctx); } TOP->state = ST_PRECMD; /* have to restore context keyword after these */ if (TOP->kw == KW_FOREACH) TOP->kw = TOP->old_kw; break; case KW_IF: if (terminated) { error(ctx, 0, "unexpected end of if statement; expected a condition"); return; } TOP->last_code = code_append(ctx, KW_IF); TOP->last_code->payload.fc_if.code_then = code_new(KW_NOP); TOP->last_code->payload.fc_if.code_else = code_new(KW_NOP); loc_update(); TOP->state = ST_PRECMD; /* prepare for reading a condition */ push(ctx, KW_IF, ST_PREDATA, TOP->last_code); break; case KW_THEN: /* we are in an if context, right after reading a condition */ if (TOP->kw != KW_IF) { error(ctx, 0, "unexpected 'then' - must be in an 'if' after the condition"); return; } bind_if_cond:; TOP->last_code->payload.fc_if.cond = TOP->argv[0]; loc_update(); arg_remove(ctx); push(ctx, KW_THEN, ST_PRECMD, TOP->last_code->payload.fc_if.code_then); break; case KW_ELSE: /* we are in an if context, after and end */ if (TOP->kw != KW_THEN) { error(ctx, 0, "unexpected 'else' - must be in a 'then' block before an else"); return; } pop(ctx); /* that was the then branch */ push(ctx, KW_ELSE, ST_PRECMD, TOP->last_code->payload.fc_if.code_else); break; case KW_FOREACH: if (terminated) { error(ctx, 0, "unexpected end of if foreach statement; expected an address"); return; } TOP->last_code = code_append(ctx, KW_FOREACH); loc_update(); TOP->state = ST_PREDATA; TOP->old_kw = TOP->kw; TOP->kw = KW_FOREACH; break; case KW_IN: if (TOP->kw != KW_FOREACH) error(ctx, 0, "unexpected \"in\"; should be after the address in foreach"); else push(ctx, KW_IN, ST_PREDATA, NULL); break; case KW_SWITCH: if (terminated) { error(ctx, 0, "unexpected end of if switch statement; expected a data"); return; } TOP->last_code = code_append(ctx, KW_SWITCH); TOP->state = ST_PREDATA; TOP->old_kw = TOP->kw; TOP->kw = KW_SWITCH; loc_update(); break; case KW_CASE: case KW_DEFAULT: if (TOP->kw == KW_SWITCH) { tmpasm_case_t *c; c = malloc(sizeof(tmpasm_case_t)); c->body = code_new(KW_NOP); c->data = NULL; c->next = NULL; if (TOP->last_code->payload.fc_switch.last == NULL) { TOP->last_code->payload.fc_switch.first = c; TOP->last_code->payload.fc_switch.last = c; } else { TOP->last_code->payload.fc_switch.last->next = c; TOP->last_code->payload.fc_switch.last = c; } if (kw == KW_DEFAULT) { push(ctx, KW_DEFAULT, ST_PRECMD, c->body); push(ctx, KW_none, ST_PRECMD, c->body); c->data = NULL; } else push(ctx, KW_CASE, ST_PREDATA, c->body); } else error(ctx, 0, "unexpected \"case\" or \"default\"; should be in a switch (is the last case terminated by an \"end\"?)"); break; default: TOP->last_code = code_append(ctx, KW_none); TOP->last_code->payload.instr.call_name = strclone(TOP->cmd_buff); if (TOP->last_code->payload.instr.call_name != NULL) { TOP->last_code->payload.instr.call = ctx->cb->resolve(ctx, TOP->last_code->payload.instr.call_name); loc_update(); } if (terminated) TOP->state = ST_PRECMD; else TOP->state = ST_PREDATA; } } static void comment_start(tmpasm_t *ctx) { push(ctx, KW_none, ST_COMMENT, NULL); } int tmpasm_gotchar(tmpasm_t *ctx, char c) { if (ctx->dead) return -1; switch(TOP->state) { case ST_COMMENT: if ((c == '\n') || (c == '\r')) { pop(ctx); if (TOP->state == ST_PREDATA) end_of_statement(ctx); } break; case ST_PRECMD: if (c == '#') { comment_start(ctx); break; } if (is_space(c) || is_sep(c)) break; TOP->cmdi = 0; TOP->state = ST_CMD; TOP->kwline = ctx->line; TOP->kwcol = ctx->col; /* fall thru */ case ST_CMD: /* end of command or keyword */ if (is_space(c) || is_sep(c)) { TOP->cmd_buff[TOP->cmdi] = '\0'; got_kw(ctx, kw_lookup(TOP->cmd_buff), is_sep(c)); } else { TOP->cmd_buff[TOP->cmdi] = c; TOP->cmdi++; if (TOP->cmdi >= sizeof(TOP->cmd_buff)) error(ctx, 0, "keyword or instruction name is too long"); } break; case ST_PREDATA: if (c == '#') { comment_start(ctx); break; } if (is_space(c)) break; if (is_sep(c)) end_of_statement(ctx); else if (c == '{') { TOP->state = ST_STRING; arg_new(ctx, 0); } else if (c == '[') { TOP->state = ST_PREBLOCKSEP; arg_new(ctx, 0); } else if (is_addr(c)) { TOP->state = ST_ADDRESS; arg_new(ctx, 1); arg_append(ctx, c); } else error(ctx, c, "unexpected character; expected '{' for starting a string or an address"); break; case ST_PREBLOCKSEP: TOP->block_sep = c; TOP->state = ST_BLOCK; break; case ST_BLOCK: if (c == TOP->block_sep) TOP->state = ST_BLOCKSEP; else arg_append(ctx, c); break; case ST_BLOCKSEP: if (c != ']') { arg_new_next(ctx, 1); arg_append(ctx, c); TOP->state = ST_BLOCK_INLINE; } else arg_end(ctx, 1); break; case ST_BLOCK_INLINE: if (c == TOP->block_sep) { arg_new_next(ctx, 0); TOP->state = ST_BLOCK; } else arg_append(ctx, c); break; case ST_STRING: if (c == '}') arg_end(ctx, 1); else if (c == '\\') TOP->state = ST_STRING_ESCAPE; else arg_append(ctx, c); break; case ST_STRING_ESCAPE: { char co; switch(c) { case 'n': co = '\n'; break; case 'r': co = '\r'; break; case 't': co = '\t'; break; case '\\': co = '\\'; break; case 'o': co = '{'; break; case 'c': co = '}'; break; default: co = c; } arg_append(ctx, co); TOP->state = ST_STRING; } break; case ST_ADDRESS: if (is_space(c)) arg_end(ctx, 1); else if (is_sep(c)) { arg_end(ctx, 1); end_of_statement(ctx); } else if (is_addr(c)) arg_append(ctx, c); else error(ctx, c, "unexpected character; expected next character of the address"); break; } if (c == '\n') { ctx->line++; ctx->col = 1; } else ctx->col++; return 0; } tmpasm_t *tmpasm_init(const tmpasm_cb_t *cb) { tmpasm_t *ctx; ctx = calloc(sizeof(tmpasm_t), 1); ctx->line = 1; ctx->col = 1; ctx->code = code_new(KW_NOP); ctx->cb = cb; push(ctx, KW_none, ST_PRECMD, ctx->code); return ctx; } static void free_exec(tmpasm_exec_t *e) { int n; tmpasm_case_t *c, *c_next; tmpasm_exec_t *e_next; for(; e != NULL; e = e_next) { e_next = e->next; switch(e->kw) { case KW_none: if (e->payload.instr.call_name != NULL) free(e->payload.instr.call_name); for(n = 0; n < e->payload.instr.argc; n++) arg_free(e->payload.instr.argv[n]); free(e->payload.instr.argv); break; case KW_IF: arg_free(e->payload.fc_if.cond); free_exec(e->payload.fc_if.code_then); free_exec(e->payload.fc_if.code_else); break; case KW_FOREACH: free(e->payload.fc_foreach.loop_var); arg_free(e->payload.fc_foreach.data); free_exec(e->payload.fc_foreach.code_body); break; case KW_SWITCH: arg_free(e->payload.fc_switch.cond); for(c = e->payload.fc_switch.first; c != NULL; c = c_next) { c_next = c->next; if (c->data != NULL) arg_free(c->data); free_exec(c->body); free(c); } break; default:; } free(e); } } void tmpasm_uninit(tmpasm_t *ctx) { free_exec(ctx->code); while (ctx->st != NULL) pop_(ctx, 0); if (ctx->runtime_error_data != NULL) free(ctx->runtime_error_data); free(ctx); } /****************** runtime ********************/ static const char *tmpasm_runtime_error_fmts[] = { "success %s", "variable '%s' does not exist", "empty argument (broken AST)%s", "compilation error: control block without an \"end\"; premature end of script%s", "attempt to call unresolved instruction '%s'", NULL }; void tmpasm_runtime_error(tmpasm_t *ctx, int code, const char *data) { ctx->runtime_error = code; if (ctx->runtime_error_data != NULL) free(ctx->runtime_error_data); ctx->runtime_error_data = strclone(data); if (ctx->executing != NULL) { ctx->runtime_error_line = ctx->executing->line; ctx->runtime_error_col = ctx->executing->col; } else { ctx->runtime_error_line = 0; ctx->runtime_error_col = 0; } } const char *tmpasm_runtime_error_fmt(tmpasm_t *ctx) { if (ctx->runtime_error == 0) return NULL; if ((ctx->runtime_error < 0) && (ctx->cb->runtime_error_fmt != NULL)) { const char *fmt; fmt = ctx->cb->runtime_error_fmt(ctx); if (fmt != NULL) return fmt; } if ((ctx->runtime_error < 0) || ((size_t)ctx->runtime_error > (sizeof(tmpasm_runtime_error_fmts)/sizeof(char *)))) return "invalid error code %s"; return tmpasm_runtime_error_fmts[ctx->runtime_error]; } char *tmpasm_arg2str(tmpasm_t *ctx, tmpasm_arg_t *a, int keep_addr) { if (a == NULL) { tmpasm_runtime_error(ctx, 2, NULL); return strclone(""); } if (a->next != NULL) { /* block mode */ int alloced = 0, used = 0; char *s = NULL; const char *i; for(;a != NULL; a = a->next) { int l; if (a->is_addr) { i = ctx->cb->get(ctx, a->data); if (i == NULL) { i = ""; tmpasm_runtime_error(ctx, 1, strclone(a->data)); } } else i = a->data; l = strlen(i); if (used + l >= alloced) { alloced = used + l + 256; s = realloc(s, alloced); } memcpy(s+used, i, l); used += l; } s[used] = '\0'; return s; } /* non-block */ if (a->is_addr) { const char *i; if (keep_addr) i = a->data; else i = ctx->cb->get(ctx, a->data); if (i == NULL) { i = ""; tmpasm_runtime_error(ctx, 1, strclone(a->data)); } return strclone(i); } return strclone(a->data); } static void execute(tmpasm_t *ctx, tmpasm_exec_t *e) { tmpasm_case_t *c; void *state; char *cond, *list; const char *i; while((e != NULL) && (ctx->runtime_error == 0) && (ctx->halt == 0)) { ctx->executing = e; switch(e->kw) { case KW_none: if (e->payload.instr.call != NULL) e->payload.instr.call(ctx, e->payload.instr.call_name, e->payload.instr.argc, e->payload.instr.argv); else tmpasm_runtime_error(ctx, 4, e->payload.instr.call_name); break; case KW_IF: cond = tmpasm_arg2str(ctx, e->payload.fc_if.cond, 0); if (ctx->cb->is_true(ctx, cond)) execute(ctx, e->payload.fc_if.code_then); else execute(ctx, e->payload.fc_if.code_else); free(cond); break; case KW_FOREACH: list = tmpasm_arg2str(ctx, e->payload.fc_foreach.data, 0); for(i = ctx->cb->first(ctx, &state, list); i != NULL; i = ctx->cb->next(ctx, &state)) { ctx->cb->set(ctx, e->payload.fc_foreach.loop_var, i); execute(ctx, e->payload.fc_foreach.code_body); } free(list); break; case KW_SWITCH: cond = tmpasm_arg2str(ctx, e->payload.fc_switch.cond, 0); for(c = e->payload.fc_switch.first; c != NULL; c = c->next) { char *cv = NULL; if (c->data != NULL) cv = tmpasm_arg2str(ctx, c->data, 0); if ((c->data == NULL) || (ctx->cb->match(ctx, cond, cv))) { execute(ctx, c->body); if (cv != NULL) free(cv); break; } if (cv != NULL) free(cv); } free(cond); break; default:; } e = e->next; } } void tmpasm_execute(tmpasm_t *ctx) { if (TOP->next != NULL) { ctx->executing = TOP->next->last_code; tmpasm_runtime_error(ctx, 3, NULL); return; } if ((TOP->state != ST_PRECMD) || (TOP->kw != KW_none)) { ctx->executing = TOP->last_code; tmpasm_runtime_error(ctx, 3, NULL); return; } ctx->halt = 0; ctx->runtime_error = 0; if (ctx->runtime_error_data != NULL) { free(ctx->runtime_error_data); ctx->runtime_error_data = NULL; } if (ctx->cb->preexec != NULL) ctx->cb->preexec(ctx); execute(ctx, ctx->code); if (ctx->cb->postexec != NULL) ctx->cb->postexec(ctx); }