From 5cb259efee68275813114d19ef670e889ca5bcfe Mon Sep 17 00:00:00 2001 From: Lukas Krickl Date: Wed, 15 Nov 2023 19:36:46 +0100 Subject: [PATCH] WIP: tokenizer --- include/ulas.h | 6 +++--- src/ulas.c | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/ulas.h b/include/ulas.h index d4c30fa..f5a87c2 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -89,8 +89,8 @@ struct ulas_str { * Tokens */ -// any token before 256 is just the literal char value -enum ulas_toks { ULAS_TOKLITERAL = 256, ULAS_TOKINT, ULAS_TOKCHAR, ULAS_TOKSTRING }; +// any token before 256 is just the literal char value +enum ulas_toks { ULAS_TOKLITERAL = 256, ULAS_TOKSYMBOL }; // primitive data types enum ulas_type { ULAS_INT, ULAS_STR }; @@ -280,7 +280,7 @@ char *ulas_strndup(const char *src, unsigned long n); int ulas_tok(struct ulas_str *dst, const char **out_line, unsigned long n); // converts a token string to a token struct -struct ulas_tok ulas_totok(const char *buf, unsigned long n, int *rc); +struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc); int ulas_tokuntil(struct ulas_str *dst, char c, const char **out_line, unsigned long n); diff --git a/src/ulas.c b/src/ulas.c index 5a14180..c43dbaa 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -1,6 +1,7 @@ #include "ulas.h" #include #include +#include #include #include @@ -217,7 +218,7 @@ int ulas_tokuntil(struct ulas_str *dst, char c, const char **out_line, return i; } -struct ulas_tok ulas_totok(const char *buf, unsigned long n, int *rc) { +struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) { struct ulas_tok tok; memset(&tok, 0, sizeof(tok)); @@ -227,6 +228,7 @@ struct ulas_tok ulas_totok(const char *buf, unsigned long n, int *rc) { } unsigned char first = buf[0]; + buf++; switch (first) { case ';': @@ -238,10 +240,20 @@ struct ulas_tok ulas_totok(const char *buf, unsigned long n, int *rc) { default: if (isdigit(first)) { // integer + tok.type = ULAS_TOKLITERAL; + tok.lit.type = ULAS_INT; + tok.lit.val.int_value = (int)strtol(buf, &buf, 0); + } else if (n == 3 && first == '\'') { + tok.type = ULAS_TOKLITERAL; + tok.lit.type = ULAS_INT; + // TODO: read char value between ' and ' and unescape } else if (ulas_isname(buf, n)) { // literal. we can resolve it now // because literals need to be able to be resolved // for every line, unless they are a label! + // TODO: read and unescape striing between " and " + tok.type = ULAS_TOKSYMBOL; + tok.lit.type = ULAS_STR; } else { ULASERR("Unexpected token: %s\n", buf); @@ -252,6 +264,11 @@ struct ulas_tok ulas_totok(const char *buf, unsigned long n, int *rc) { } end: + // did we consume the entire token? + if (buf[0] != '\0') { + *rc = -1; + } + return tok; } -- 2.30.2