From 352a67f9d9b8b5895f7d1b5919661a89b6ebbc9a Mon Sep 17 00:00:00 2001 From: Lukas Krickl Date: Wed, 15 Nov 2023 17:29:41 +0100 Subject: [PATCH] WIP: tokenizer --- include/ulas.h | 6 +++++- src/ulas.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/include/ulas.h b/include/ulas.h index 2fdabda..6931ee2 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -89,7 +89,8 @@ struct ulas_str { * Tokens */ -enum ulas_toks { ULAS_TOKLITERAL, ULAS_TOKINT, ULAS_TOKCHAR, ULAS_TOKSTRING }; +// any token before 256 is just the literal char value +enum ulas_toks { ULAS_TOKLITERAL = 256, ULAS_TOKINT, ULAS_TOKCHAR, ULAS_TOKSTRING }; // primitive data types enum ulas_type { ULAS_INT, ULAS_STR }; @@ -278,6 +279,9 @@ char *ulas_strndup(const char *src, size_t n); // returns 0 when no more tokens can be read int ulas_tok(struct ulas_str *dst, const char **out_line, size_t n); +// converts a token string to a token struct +struct ulas_tok ulas_totok(const char *buf, size_t n, int *rc); + int ulas_tokuntil(struct ulas_str *dst, char c, const char **out_line, size_t n); diff --git a/src/ulas.c b/src/ulas.c index b8e853d..54e6c0d 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -217,6 +217,34 @@ int ulas_tokuntil(struct ulas_str *dst, char c, const char **out_line, return i; } +struct ulas_tok ulas_totok(const char *buf, size_t n, int *rc) { + struct ulas_tok tok; + memset(&tok, 0, sizeof(tok)); + + if (n == 0) { + *rc = -1; + goto end; + } + + unsigned char first = buf[0]; + + switch (first) { + case ';': + tok.type = first; + goto end; + case '"': + // string + break; + default: + ULASERR("Unexpected token: %s\n", buf); + *rc = -1; + goto end; + } + +end: + return tok; +} + #undef WELD_TOKCOND #undef WLED_TOKISTERM @@ -773,7 +801,22 @@ void ulas_tokbuffree(struct ulas_tokbuf *tb) { free(tb->buf); } * Assembly step */ -int ulas_intexpr(const char **line, size_t n, int *rc) { return -1; } +int ulas_intexpr(const char **line, size_t n, int *rc) { + // read tokens until the next token is end of line, ; or , + + int tokrc = 0; + while ((tokrc = ulas_tok(&ulas.tok, line, n) > 0)) { + if (tokrc == -1) { + *rc = -1; + goto fail; + } + + // interpret the token + } + +fail: + return -1; +} int ulas_asmline(FILE *dst, FILE *src, const char *line, size_t n) { const char *start = line; -- 2.30.2