From: Lukas Krickl Date: Fri, 20 Mar 2026 05:54:30 +0000 (+0100) Subject: scripting: reworked lexter to not depend on the vm X-Git-Url: https://git.krickl.dev/?a=commitdiff_plain;h=d655558f04346985aee41a30ae36b099c6aec29b;p=lrts%2F.git scripting: reworked lexter to not depend on the vm --- diff --git a/src/l_lsl.c b/src/l_lsl.c index 5efae9c..6e280d7 100644 --- a/src/l_lsl.c +++ b/src/l_lsl.c @@ -13,12 +13,15 @@ struct l_lsl_vm l_lsl_vm_init(void) { return v; } -int l_lsl_err(struct l_lsl_vm *v, enum l_lsl_error err, const char *fmt, ...) { +/* lexing errors and warnings */ +int l_lsl_lex_err(struct l_lsl_vm *v, + struct l_lsl_lex *lex, + enum l_lsl_error err, const char *fmt, ...) { int res; va_list args; va_start(args, fmt); - u_fprintf(u_stderr, "error at %s:%d: ", v->path, v->line_num); + u_fprintf(u_stderr, "error at %s:%d: ", lex->path, lex->line_num); res = u_vfprintf(u_stderr, fmt, args); va_end(args); @@ -26,14 +29,16 @@ int l_lsl_err(struct l_lsl_vm *v, enum l_lsl_error err, const char *fmt, ...) { return res; } -int l_lsl_warn(struct l_lsl_vm *v, enum l_lsl_error err, const char *fmt, ...) { +int l_lsl_lex_warn(struct l_lsl_vm *v, + struct l_lsl_lex *lex, + enum l_lsl_error err, const char *fmt, ...) { int res; va_list args; LRTS_UNUSED(v); LRTS_UNUSED(err); va_start(args, fmt); - u_fprintf(u_stderr, "warning at %s:%d: ", v->path, v->line_num); + u_fprintf(u_stderr, "warning at %s:%d: ", lex->path, lex->line_num); res = u_vfprintf(u_stderr, fmt, args); va_end(args); return res; @@ -82,16 +87,16 @@ lrts_bool l_lsl_isterm(char c) { } /* peeks current token */ -#define l_lsl_lex_peek(v, code) \ - (code)[v->lex_pos] +#define l_lsl_lex_peek(lex) \ + (lex->code)[lex->pos] /* peeks current token and then advances cursor */ -#define l_lsl_lex_consume(v, code)\ - (code)[v->lex_pos++] +#define l_lsl_lex_consume(lex)\ + (lex->code)[lex->pos++] /* advances cursor and then peeks current token */ -#define l_lsl_lex_advance(v, code) \ - (code)[++v->lex_pos] +#define l_lsl_lex_advance(v) \ + (lex->code)[++lex->pos] /* writes a char to token and advances token_len */ #define l_lsl_lex_write_token(t, c) \ @@ -102,21 +107,22 @@ lrts_bool l_lsl_isterm(char c) { t.val[t.len] = '\0'; char tokbuf[L_LSL_TOK_MAX]; -struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code) { +struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, struct l_lsl_lex *lex) { char c = 0; + char string_term = '\''; struct l_lsl_token t; u_memset(&t, 0, sizeof(t)); t.val = tokbuf; - c = l_lsl_lex_peek(v, code); + c = l_lsl_lex_peek(lex); /* consume all spaces */ while (l_lsl_isspace(c)) { if (c == '\n') { - v->line_num++; + lex->line_num++; } - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); } if (l_lsl_isnum(c)) { @@ -124,35 +130,35 @@ struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code) { if (c == '0') { /* special base */ /* specific base number */ - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); if (l_lsl_isnum(c)) { /* octal */ while (l_lsl_isoct(c)) { l_lsl_lex_write_token(t, c); - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); } - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); t.base = 8; } else if (c == 'x') { /* hex */ - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); while (l_lsl_ishexnum(c)) { l_lsl_lex_write_token(t, c); - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); } t.base = 16; } else if (c == 'b') { /* bin */ - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); while (l_lsl_isbin(c)) { l_lsl_lex_write_token(t, c); - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); } - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); t.base = 2; } else { - l_lsl_err(v, L_LSL_ERR_INVALID_NUMBER_BASE, + l_lsl_lex_err(v, lex, L_LSL_ERR_INVALID_NUMBER_BASE, "unknown number base: 0%c\n", c); t.type = L_LSL_TOK_NONE; } @@ -160,23 +166,31 @@ struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code) { /* decimal number */ while (l_lsl_isnum(c)) { l_lsl_lex_write_token(t, c); - c = l_lsl_lex_advance(v, code); + c = l_lsl_lex_advance(lex); } t.base = 10; } } else if (l_lsl_isident(c)) { /* identifier */ + } else if (c == '\'' || c == '\"') { + /* string literal */ + /* consume initial term */ + c = l_lsl_lex_advance(lex); + string_term = c; + while (c && c != string_term) { + c = l_lsl_lex_advance(lex); + } } else { - l_lsl_err(v, L_LSL_ERR_LEX, "unknown char: %c\n", c); + l_lsl_lex_err(v, lex, L_LSL_ERR_LEX, "unknown char: %c\n", c); t.type = L_LSL_TOK_NONE; } l_lsl_lex_term_token(t); /* end of a token must be a term */ - c = l_lsl_lex_peek(v, code); + c = l_lsl_lex_peek(lex); if (!l_lsl_isterm(c)) { - l_lsl_err(v, L_LSL_ERR_LEX, "unexpected end of token\n", c); + l_lsl_lex_err(v, lex, L_LSL_ERR_LEX, "unexpected end of token\n", c); t.type = L_LSL_TOK_NONE; } @@ -202,15 +216,21 @@ void l_lsl_value_free(struct l_lsl_vm *v, v->free(val); } -struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, const char *code, +struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, + const char *file_path, + const char *code, u32 len) { struct l_lsl_value *val = LRTS_NULL; struct l_lsl_token tok; - /* TODO: length check */ - LRTS_UNUSED(len); + struct l_lsl_lex lex; + lex.code = code; + lex.code_len = len; + lex.pos = 0; + lex.line_num = 1; + lex.path = file_path; - tok = l_lsl_next_token(v, code); + tok = l_lsl_next_token(v, &lex); switch (tok.type) { case L_LSL_TOK_INT: @@ -229,27 +249,16 @@ struct l_lsl_value* l_lsl_compile_file(struct l_lsl_vm *v, const char *path) { const char *src = u_file_read(path); struct l_lsl_value *val = LRTS_NULL; - u32 prev_lex_pos = v->lex_pos; - const char *prev_file_name = v->path; - u32 prev_line_num = v->line_num; - - v->lex_pos = 0; - v->path = path; - v->line_num = 1; - if (src == LRTS_NULL) { u_log(U_LOG_CRIT, "%s: No such file or directory\n", path); v->err = L_LSL_ERR_FILE_NOT_FOUND; return val; } - val = l_lsl_compile(v, src, u_strnlen(src, L_LSL_SOURCE_MAX)); + val = l_lsl_compile(v, path, src, u_strnlen(src, L_LSL_SOURCE_MAX)); u_free((void*)src); - - v->lex_pos = prev_lex_pos; - v->path = prev_file_name; - v->line_num = prev_line_num; + return val; } diff --git a/src/l_lsl.h b/src/l_lsl.h index 861b530..64da415 100644 --- a/src/l_lsl.h +++ b/src/l_lsl.h @@ -54,7 +54,8 @@ enum l_lsl_error { L_LSL_ERR_OK = 0, L_LSL_ERR_FILE_NOT_FOUND, L_LSL_ERR_INVALID_NUMBER_BASE, - L_LSL_ERR_LEX + L_LSL_ERR_LEX, + L_LSL_ERR_UNTERM_STRING }; struct l_lsl_table; @@ -85,14 +86,19 @@ struct l_lsl_gc { void *ptrs; }; +struct l_lsl_lex { + const char *path; + u32 pos; + u32 line_num; + + const char *code; + u32 code_len; +}; + struct l_lsl_vm { enum l_lsl_error err; enum l_lsl_flags flags; - const char *path; - u32 lex_pos; - u32 line_num; - struct l_lsl_gc gc; u_malloc_fp malloc; @@ -117,12 +123,13 @@ void l_lsl_value_free(struct l_lsl_vm *v, * the result token is placed into an internal string buffer and should not be modified * or saved. */ -struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code); +struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, struct l_lsl_lex *lex); /* compiles a program * returns the resulting program's head as a list */ -struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, const char *code, u32 len); +struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, const char *file_path, + const char *code, u32 len); /* runs a program from a given entry point function * if the entry point is NULL the global scope is executed */