From 958c9f0d6986f915c69129167a57abc23d4b0202 Mon Sep 17 00:00:00 2001 From: Lukas Krickl Date: Sat, 18 Nov 2023 18:25:48 +0100 Subject: [PATCH] Simplified token struct --- include/ulas.h | 29 +++++++++++------------------ src/test.c | 19 ++++++++----------- src/ulas.c | 42 +++++++++++++++++++----------------------- 3 files changed, 38 insertions(+), 52 deletions(-) diff --git a/include/ulas.h b/include/ulas.h index 1798b3f..ac48b3f 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -90,32 +90,25 @@ struct ulas_str { */ // any token before 256 is just the literal char value -enum ulas_toks { - ULAS_TOKLITERAL = 256, - ULAS_TOKSYMBOL, +// primitive data types +enum ulas_type { + ULAS_SYMBOL = 256, + ULAS_INT, + ULAS_STR }; -// primitive data types -enum ulas_type { ULAS_INT, ULAS_STR }; - // data type value union ulas_val { int intv; char *strv; }; -// literal value -struct ulas_lit { +struct ulas_tok { enum ulas_type type; union ulas_val val; }; -struct ulas_tok { - enum ulas_toks type; - struct ulas_lit lit; -}; - // the token buffer is a dynamically allocated token store struct ulas_tokbuf { struct ulas_tok *buf; @@ -205,7 +198,7 @@ struct ulas_preproc { struct ulas_sym { char *name; - struct ulas_lit lit; + struct ulas_tok tok; }; /** @@ -236,8 +229,8 @@ struct ulas_explit { }; struct ulas_expgrp { - // points to the first expression - // in this group + // points to the first expression + // in this group long expr; // how many expressions belong to the group long len; @@ -356,9 +349,9 @@ char *ulas_preprocexpand(struct ulas_preproc *pp, const char *raw_line, */ // convert literal to its int value -int ulas_litint(struct ulas_lit *lit, int *rc); +int ulas_litint(struct ulas_tok *lit, int *rc); // convert literal to its char value -char *ulas_litchar(struct ulas_lit *lit, int *rc); +char *ulas_litchar(struct ulas_tok *lit, int *rc); struct ulas_tokbuf ulas_tokbuf(void); void ulas_tokbufpush(struct ulas_tokbuf *tb, struct ulas_tok tok); diff --git a/src/test.c b/src/test.c index e70d3a0..65589b9 100644 --- a/src/test.c +++ b/src/test.c @@ -147,9 +147,8 @@ void test_preproc(void) { int rc = 0; \ struct ulas_tok tok = ulas_totok((token), strlen(token), &rc); \ assert((expected_rc) == rc); \ - assert(tok.type == ULAS_TOKLITERAL); \ - assert(tok.lit.type == ULAS_INT); \ - assert(tok.lit.val.intv == (expected_val)); \ + assert(tok.type == ULAS_INT); \ + assert(tok.val.intv == (expected_val)); \ } #define ASSERT_STR_TOTOK(expected_val, expected_rc, token) \ @@ -157,10 +156,9 @@ void test_preproc(void) { int rc = 0; \ struct ulas_tok tok = ulas_totok((token), strlen(token), &rc); \ assert((expected_rc) == rc); \ - assert(tok.type == ULAS_TOKLITERAL); \ - assert(tok.lit.type == ULAS_STR); \ - assert(strcmp((expected_val), tok.lit.val.strv) == 0); \ - free(tok.lit.val.strv); \ + assert(tok.type == ULAS_STR); \ + assert(strcmp((expected_val), tok.val.strv) == 0); \ + free(tok.val.strv); \ } #define ASSERT_SYMBOL_TOTOK(expected_val, expected_rc, token) \ @@ -168,10 +166,9 @@ void test_preproc(void) { int rc = 0; \ struct ulas_tok tok = ulas_totok((token), strlen(token), &rc); \ assert((expected_rc) == rc); \ - assert(tok.type == ULAS_TOKSYMBOL); \ - assert(tok.lit.type == ULAS_STR); \ - assert(strcmp((expected_val), tok.lit.val.strv) == 0); \ - free(tok.lit.val.strv); \ + assert(tok.type == ULAS_SYMBOL); \ + assert(strcmp((expected_val), tok.val.strv) == 0); \ + free(tok.val.strv); \ } #define ASSERT_UNEXPECTED_TOTOK(expected_rc, token) \ diff --git a/src/ulas.c b/src/ulas.c index c75190f..486fc06 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -278,26 +278,25 @@ struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) { goto end; case '"': // string - tok.type = ULAS_TOKLITERAL; - tok.lit.type = ULAS_STR; + tok.type = ULAS_STR; // FIXME: this likely mallocs a few extra bytes // but honestly its probably fine - tok.lit.val.strv = malloc(n * sizeof(char) + 1); - memset(tok.lit.val.strv, 0, n); + tok.val.strv = malloc(n * sizeof(char) + 1); + memset(tok.val.strv, 0, n); long i = 0; while (*buf && *buf != '\"') { if (*buf == '\\') { buf++; - tok.lit.val.strv[i] = ulas_unescape(*buf, rc); + tok.val.strv[i] = ulas_unescape(*buf, rc); } else { - tok.lit.val.strv[i] = *buf; + tok.val.strv[i] = *buf; } i++; buf++; } - tok.lit.val.strv[i] = '\0'; + tok.val.strv[i] = '\0'; if (*buf != '\"') { *rc = -1; @@ -309,24 +308,22 @@ struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) { default: if (isdigit(first)) { // integer - tok.type = ULAS_TOKLITERAL; - tok.lit.type = ULAS_INT; + tok.type = ULAS_INT; // 0b prefix is not supported in strtol... so we implement it by hand if (*buf == 'b') { buf++; - tok.lit.val.intv = (int)strtol(buf, &buf, 2); + tok.val.intv = (int)strtol(buf, &buf, 2); } else { - tok.lit.val.intv = (int)strtol(buf - 1, &buf, 0); + tok.val.intv = (int)strtol(buf - 1, &buf, 0); } } else if (first == '\'') { - tok.type = ULAS_TOKLITERAL; - tok.lit.type = ULAS_INT; + tok.type = ULAS_INT; if (*buf == '\\') { buf++; - tok.lit.val.intv = ulas_unescape(*buf, rc); + tok.val.intv = ulas_unescape(*buf, rc); } else { - tok.lit.val.intv = (int)*buf; + tok.val.intv = (int)*buf; } buf++; if (*buf != '\'') { @@ -339,9 +336,8 @@ struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) { } else if (ulas_isname(buf - 1, n)) { // literal token // we resolve it later, will need to malloc here for now - tok.type = ULAS_TOKSYMBOL; - tok.lit.type = ULAS_STR; - tok.lit.val.strv = strndup(buf - 1, n); + tok.type = ULAS_SYMBOL; + tok.val.strv = strndup(buf - 1, n); buf += n - 1; } else { ULASERR("Unexpected token: %s\n", buf); @@ -861,7 +857,7 @@ fail: * Literals, tokens and expressions */ -int ulas_litint(struct ulas_lit *lit, int *rc) { +int ulas_litint(struct ulas_tok *lit, int *rc) { if (lit->type != ULAS_INT) { *rc = -1; return 0; @@ -870,7 +866,7 @@ int ulas_litint(struct ulas_lit *lit, int *rc) { return lit->val.intv; } -char *ulas_litchar(struct ulas_lit *lit, int *rc) { +char *ulas_litchar(struct ulas_tok *lit, int *rc) { if (lit->type != ULAS_STR) { *rc = -1; return NULL; @@ -908,9 +904,9 @@ void ulas_tokbufpush(struct ulas_tokbuf *tb, struct ulas_tok tok) { void ulas_tokbufclear(struct ulas_tokbuf *tb) { for (long i = 0; i < tb->len; i++) { struct ulas_tok *t = &tb->buf[i]; - if ((t->type == ULAS_TOKLITERAL || t->type == ULAS_TOKSYMBOL) && - t->lit.type == ULAS_STR) { - free(t->lit.val.strv); + if (t->type == ULAS_SYMBOL || + t->type == ULAS_STR) { + free(t->val.strv); } } tb->len = 0; -- 2.30.2