From: Lukas Krickl Date: Fri, 17 Nov 2023 06:38:01 +0000 (+0100) Subject: WIP: tokenizer X-Git-Url: https://git.krickl.dev/?a=commitdiff_plain;h=50292cedf2593e65fe1caef4545867375bd965f6;p=ulas%2F.git WIP: tokenizer --- diff --git a/src/test.c b/src/test.c index d4f6368..2433618 100644 --- a/src/test.c +++ b/src/test.c @@ -142,6 +142,11 @@ void test_preproc(void) { ulascfg.preproc_only = 0; } +void test_totok(void) { + TESTBEGIN("totok"); + TESTEND("totok"); +} + int main(int arc, char **argv) { ulas_init(ulas_cfg_from_env()); @@ -152,6 +157,7 @@ int main(int arc, char **argv) { test_tok(); test_strbuf(); test_preproc(); + test_totok(); ulas_free(); diff --git a/src/ulas.c b/src/ulas.c index c43dbaa..5bb481d 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -218,6 +218,35 @@ int ulas_tokuntil(struct ulas_str *dst, char c, const char **out_line, return i; } +int ulas_unescape(char c, int *rc) { + switch (c) { + case '\'': + case '\\': + case '"': + return c; + case 'n': + return '\n'; + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case '?': + return '\?'; + case '0': + return '\0'; + default: + ULASERR("Unexpected esxcape sequence: \\%c\n", c); + *rc = -1; + break; + } + + return '\0'; +} + struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) { struct ulas_tok tok; memset(&tok, 0, sizeof(tok)); @@ -243,10 +272,22 @@ struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) { tok.type = ULAS_TOKLITERAL; tok.lit.type = ULAS_INT; tok.lit.val.int_value = (int)strtol(buf, &buf, 0); - } else if (n == 3 && first == '\'') { + } else if (first == '\'') { tok.type = ULAS_TOKLITERAL; tok.lit.type = ULAS_INT; - // TODO: read char value between ' and ' and unescape + buf++; + if (*buf == '\\') { + buf++; + tok.lit.val.int_value = ulas_unescape(*buf, rc); + } else { + tok.lit.val.int_value = (int)*buf; + } + buf++; + if (*buf != '\'') { + *rc = -1; + ULASERR("Unterminated character sequence\n"); + } + break; } else if (ulas_isname(buf, n)) { // literal. we can resolve it now // because literals need to be able to be resolved