scripting: reworked lexter to not depend on the vm
authorLukas Krickl <lukas@krickl.dev>
Fri, 20 Mar 2026 05:54:30 +0000 (06:54 +0100)
committerLukas Krickl <lukas@krickl.dev>
Fri, 20 Mar 2026 05:54:30 +0000 (06:54 +0100)
src/l_lsl.c
src/l_lsl.h

index 5efae9c899e53401bdb0c12c6cbb5cdb11c88874..6e280d7e86336fbb859ab6606fb7d0cc4d9b48cd 100644 (file)
@@ -13,12 +13,15 @@ struct l_lsl_vm l_lsl_vm_init(void) {
        return v;
 }
 
-int l_lsl_err(struct l_lsl_vm *v, enum l_lsl_error err, const char *fmt, ...) {
+/* lexing errors and warnings */
+int l_lsl_lex_err(struct l_lsl_vm *v, 
+               struct l_lsl_lex *lex,
+               enum l_lsl_error err, const char *fmt, ...) {
        int res;
        va_list args;
 
        va_start(args, fmt);
-       u_fprintf(u_stderr, "error at %s:%d: ", v->path, v->line_num);
+       u_fprintf(u_stderr, "error at %s:%d: ", lex->path, lex->line_num);
        res = u_vfprintf(u_stderr, fmt, args);
        va_end(args);
 
@@ -26,14 +29,16 @@ int l_lsl_err(struct l_lsl_vm *v, enum l_lsl_error err, const char *fmt, ...) {
        return res;
 }
 
-int l_lsl_warn(struct l_lsl_vm *v, enum l_lsl_error err, const char *fmt, ...) {
+int l_lsl_lex_warn(struct l_lsl_vm *v, 
+               struct l_lsl_lex *lex,
+               enum l_lsl_error err, const char *fmt, ...) {
        int res;
        va_list args;
        LRTS_UNUSED(v);
        LRTS_UNUSED(err);
 
        va_start(args, fmt);
-       u_fprintf(u_stderr, "warning at %s:%d: ", v->path, v->line_num);
+       u_fprintf(u_stderr, "warning at %s:%d: ", lex->path, lex->line_num);
        res = u_vfprintf(u_stderr, fmt, args);
        va_end(args);
        return res;
@@ -82,16 +87,16 @@ lrts_bool l_lsl_isterm(char c) {
 }
 
 /* peeks current token */
-#define l_lsl_lex_peek(v, code) \
-       (code)[v->lex_pos]
+#define l_lsl_lex_peek(lex) \
+       (lex->code)[lex->pos]
 
 /* peeks current token and then advances cursor */
-#define l_lsl_lex_consume(v, code)\
-       (code)[v->lex_pos++]
+#define l_lsl_lex_consume(lex)\
+       (lex->code)[lex->pos++]
 
 /* advances cursor and then peeks current token */
-#define l_lsl_lex_advance(v, code) \
-       (code)[++v->lex_pos]
+#define l_lsl_lex_advance(v) \
+       (lex->code)[++lex->pos]
 
 /* writes a char to token and advances token_len */
 #define l_lsl_lex_write_token(t, c) \
@@ -102,21 +107,22 @@ lrts_bool l_lsl_isterm(char c) {
                t.val[t.len] = '\0';
 
 char tokbuf[L_LSL_TOK_MAX];
-struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code) {
+struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, struct l_lsl_lex *lex) {
        char c = 0;
+       char string_term = '\'';
        struct l_lsl_token t;
        u_memset(&t, 0, sizeof(t));
        t.val = tokbuf;
 
 
        
-       c = l_lsl_lex_peek(v, code);
+       c = l_lsl_lex_peek(lex);
        /* consume all spaces */
        while (l_lsl_isspace(c)) {
                if (c == '\n') {
-                       v->line_num++;
+                       lex->line_num++;
                }
-               c = l_lsl_lex_advance(v, code);
+               c = l_lsl_lex_advance(lex);
        }
 
        if (l_lsl_isnum(c)) {
@@ -124,35 +130,35 @@ struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code) {
                if (c == '0') {
                        /* special base */
                        /* specific base number */
-                       c = l_lsl_lex_advance(v, code);
+                       c = l_lsl_lex_advance(lex);
 
                        if (l_lsl_isnum(c)) {
                                /* octal */
                                while (l_lsl_isoct(c)) {
                                        l_lsl_lex_write_token(t, c);
-                                       c = l_lsl_lex_advance(v, code);
+                                       c = l_lsl_lex_advance(lex);
                                }
-                               c = l_lsl_lex_advance(v, code);
+                               c = l_lsl_lex_advance(lex);
                                t.base = 8;
                        } else if (c == 'x') {
                                /* hex */
-                               c = l_lsl_lex_advance(v, code);
+                               c = l_lsl_lex_advance(lex);
                                while (l_lsl_ishexnum(c)) {
                                        l_lsl_lex_write_token(t, c);
-                                       c = l_lsl_lex_advance(v, code);
+                                       c = l_lsl_lex_advance(lex);
                                }
                                t.base = 16;
                        } else if (c == 'b') {
                                /* bin */
-                               c = l_lsl_lex_advance(v, code);
+                               c = l_lsl_lex_advance(lex);
                                while (l_lsl_isbin(c)) {
                                        l_lsl_lex_write_token(t, c);
-                                       c = l_lsl_lex_advance(v, code);
+                                       c = l_lsl_lex_advance(lex);
                                }
-                               c = l_lsl_lex_advance(v, code);
+                               c = l_lsl_lex_advance(lex);
                                t.base = 2;
                        } else {
-                               l_lsl_err(v, L_LSL_ERR_INVALID_NUMBER_BASE, 
+                               l_lsl_lex_err(v, lex, L_LSL_ERR_INVALID_NUMBER_BASE, 
                                                "unknown number base: 0%c\n", c);
                                t.type = L_LSL_TOK_NONE;
                        }
@@ -160,23 +166,31 @@ struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code) {
                        /* decimal number */
                        while (l_lsl_isnum(c)) {
                                l_lsl_lex_write_token(t, c);
-                               c = l_lsl_lex_advance(v, code);
+                               c = l_lsl_lex_advance(lex);
                        }
                        t.base = 10;
                }
        } else if (l_lsl_isident(c)) {
                /* identifier */
+       } else if (c == '\'' || c == '\"') {
+               /* string literal */
+               /* consume initial term */
+               c = l_lsl_lex_advance(lex);
+               string_term = c;
+               while (c && c != string_term) {
+                       c = l_lsl_lex_advance(lex);
+               }
        } else {
-                       l_lsl_err(v, L_LSL_ERR_LEX, "unknown char: %c\n", c);
+                       l_lsl_lex_err(v, lex, L_LSL_ERR_LEX, "unknown char: %c\n", c);
                        t.type = L_LSL_TOK_NONE;
        }
                                
        l_lsl_lex_term_token(t);
 
        /* end of a token must be a term */
-       c = l_lsl_lex_peek(v, code);
+       c = l_lsl_lex_peek(lex);
        if (!l_lsl_isterm(c)) {
-               l_lsl_err(v, L_LSL_ERR_LEX, "unexpected end of token\n", c);
+               l_lsl_lex_err(v, lex, L_LSL_ERR_LEX, "unexpected end of token\n", c);
                t.type = L_LSL_TOK_NONE;
        }
 
@@ -202,15 +216,21 @@ void l_lsl_value_free(struct l_lsl_vm *v,
        v->free(val);
 }
 
-struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, const char *code, 
+struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, 
+               const char *file_path,
+               const char *code, 
                u32 len) {      
        struct l_lsl_value *val = LRTS_NULL;
        struct l_lsl_token tok;
-       /* TODO: length check */
-       LRTS_UNUSED(len);
+       struct l_lsl_lex lex;
        
+       lex.code = code;
+       lex.code_len = len;
+       lex.pos = 0;
+       lex.line_num = 1;
+       lex.path = file_path;
 
-       tok = l_lsl_next_token(v, code);
+       tok = l_lsl_next_token(v, &lex);
 
        switch (tok.type) {
                case L_LSL_TOK_INT:
@@ -229,27 +249,16 @@ struct l_lsl_value* l_lsl_compile_file(struct l_lsl_vm *v, const char *path) {
        const char *src = u_file_read(path);
        struct l_lsl_value *val = LRTS_NULL;
 
-       u32 prev_lex_pos = v->lex_pos;
-       const char *prev_file_name = v->path;
-       u32 prev_line_num = v->line_num;
-
-       v->lex_pos = 0;
-       v->path = path;
-       v->line_num = 1;
-
        if (src == LRTS_NULL) {
                u_log(U_LOG_CRIT, "%s: No such file or directory\n", path);
                v->err = L_LSL_ERR_FILE_NOT_FOUND;
                return val;
        }
 
-       val = l_lsl_compile(v, src, u_strnlen(src, L_LSL_SOURCE_MAX));
+       val = l_lsl_compile(v, path, src, u_strnlen(src, L_LSL_SOURCE_MAX));
 
        u_free((void*)src);
-
-       v->lex_pos = prev_lex_pos;
-       v->path = prev_file_name;
-       v->line_num = prev_line_num;
+       
        return val;
 }
 
index 861b5309b38a671fe6a0ed72087383fa3bf9b7ac..64da41506a2283d3c716b6ec222180f3e18deca3 100644 (file)
@@ -54,7 +54,8 @@ enum l_lsl_error {
        L_LSL_ERR_OK = 0,
        L_LSL_ERR_FILE_NOT_FOUND,
        L_LSL_ERR_INVALID_NUMBER_BASE,
-       L_LSL_ERR_LEX
+       L_LSL_ERR_LEX,
+       L_LSL_ERR_UNTERM_STRING
 };
 
 struct l_lsl_table;
@@ -85,14 +86,19 @@ struct l_lsl_gc {
        void *ptrs;
 };
 
+struct l_lsl_lex {
+       const char *path;
+       u32 pos;
+       u32 line_num;
+
+       const char *code;
+       u32 code_len;
+};
+
 struct l_lsl_vm {
        enum l_lsl_error err;
        enum l_lsl_flags flags;
 
-       const char *path;
-       u32 lex_pos;
-       u32 line_num;
-
        struct l_lsl_gc gc;
 
        u_malloc_fp malloc;
@@ -117,12 +123,13 @@ void l_lsl_value_free(struct l_lsl_vm *v,
  * the result token is placed into an internal string buffer and should not be modified 
  * or saved.
  */
-struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, const char *code);
+struct l_lsl_token l_lsl_next_token(struct l_lsl_vm *v, struct l_lsl_lex *lex);
 
 /* compiles a program 
  * returns the resulting program's head as a list
  */
-struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, const char *code, u32 len);
+struct l_lsl_value* l_lsl_compile(struct l_lsl_vm *v, const char *file_path, 
+               const char *code, u32 len);
 
 /* runs a program from a given entry point function
  * if the entry point is NULL the global scope is executed */