From 9e23f856fa9ba195541468bd11192fea05ddc5c0 Mon Sep 17 00:00:00 2001 From: Lukas Krickl Date: Tue, 7 Nov 2023 08:06:10 +0100 Subject: [PATCH] Refactored tokenizer to use a dynamic string buffer --- include/preproc.h | 48 ----------------- include/ulas.h | 75 +++++++++++++++++++++++++- makefile | 2 +- src/preproc.c | 100 ---------------------------------- src/test.c | 36 +++++++++---- src/ulas.c | 134 +++++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 229 insertions(+), 166 deletions(-) delete mode 100644 include/preproc.h delete mode 100644 src/preproc.c diff --git a/include/preproc.h b/include/preproc.h deleted file mode 100644 index 24a04dd..0000000 --- a/include/preproc.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef PREPROC_H_ -#define PREPROC_H_ - -#include -#include - -/* - * Preprocessor - */ - -enum ulas_ppdirs { - ULAS_PPDIR_NONE = 0, - ULAS_PPDIR_DEF, - ULAS_PPDIR_MACRO, - ULAS_PPDIR_ENDMACRO, - ULAS_PPDIR_IFDEF, - ULAS_PPDIR_IFNDEF, - ULAS_PPDIR_ENDIF -}; - -enum ulas_ppdefs { - ULAS_PP_DEF, - ULAS_PP_MACRO, -}; - -struct ulas_ppdef { - enum ulas_ppdefs type; - bool undef; -}; - -struct ulas_preproc { - struct ulas_ppdef *defs; - size_t defslen; - - const char *srcname; - const char *dstname; -}; - -/** - * Tokenize and apply the preprocessor - */ -int ulas_preproc(FILE *dst, const char *dstname, FILE *src, - const char *srcname); - -// expand preproc into dst line -char *ulas_preprocexpand(char *line, size_t linemax, const char *raw_line, size_t *n); - -#endif diff --git a/include/ulas.h b/include/ulas.h index 10d5e0a..ab60854 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -75,6 +75,14 @@ struct ulas_config { bool verbose; }; +/** + * str + */ +struct ulas_str { + char *buf; + size_t maxlen; +}; + /** * Assembly context */ @@ -84,6 +92,41 @@ struct ulas { size_t strslen; }; +/** + * Preproc + */ + +enum ulas_ppdirs { + ULAS_PPDIR_NONE = 0, + ULAS_PPDIR_DEF, + ULAS_PPDIR_MACRO, + ULAS_PPDIR_ENDMACRO, + ULAS_PPDIR_IFDEF, + ULAS_PPDIR_IFNDEF, + ULAS_PPDIR_ENDIF +}; + +enum ulas_ppdefs { + ULAS_PP_DEF, + ULAS_PP_MACRO, +}; + +struct ulas_ppdef { + enum ulas_ppdefs type; + bool undef; +}; + +struct ulas_preproc { + struct ulas_ppdef *defs; + size_t defslen; + + const char *srcname; + const char *dstname; + + struct ulas_str tok; + struct ulas_str line; +}; + /** * Tokens */ @@ -174,9 +217,37 @@ typedef int (*ulas_tokrule)(int current); // returns the amount of bytes of line that were // consumed or -1 on error // returns 0 when no more tokens can be read -int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule); +int ulas_tok(struct ulas_str *dst, const char *line, size_t n, + ulas_tokrule rule); // smae as ulas_tok but modifies line -int ulas_tokline(char *dst, const char **line, size_t n, ulas_tokrule rule); +int ulas_tokline(struct ulas_str *dst, const char **line, size_t n, + ulas_tokrule rule); + +/** + * str + */ + +// create a string buffer +struct ulas_str ulas_str(size_t n); + +// ensure the string buffer is at least n bytes long, if not realloc +struct ulas_str ulas_strensr(struct ulas_str *s, size_t maxlen); + +void ulas_strfree(struct ulas_str *s); + +/* + * Preprocessor + */ + +/** + * Tokenize and apply the preprocessor + */ +int ulas_preproc(FILE *dst, const char *dstname, FILE *src, + const char *srcname); + +// expand preproc into dst line +char *ulas_preprocexpand(struct ulas_preproc *pp, const char *raw_line, + size_t *n); #endif diff --git a/makefile b/makefile index aa03e42..336b96f 100644 --- a/makefile +++ b/makefile @@ -20,7 +20,7 @@ TEST_BNAME=testulas BIN_INSTALL_DIR=/usr/local/bin MAN_INSTALL_DIR=/usr/local/man -_OBJ = $(MAIN) ulas.o preproc.o +_OBJ = $(MAIN) ulas.o OBJ = $(patsubst %,$(ODIR)/%,$(_OBJ)) all: bin test diff --git a/src/preproc.c b/src/preproc.c deleted file mode 100644 index 6ccc1d8..0000000 --- a/src/preproc.c +++ /dev/null @@ -1,100 +0,0 @@ -#include "preproc.h" -#include "ulas.h" -#include -#include -#include -#include - -char *ulas_preprocexpand(char *line, size_t linemax, const char *raw_line, - size_t *n) { - assert(*n <= linemax); - const char *praw_line = raw_line; - - char tok[ULAS_TOKMAX]; - - // go through all tokens, see if a define matches the token, - // if so expand it - // only expand macros if they match toks[0] though! - // otherwise memcpy the read bytes 1:1 into the new string - while (ulas_tokline(tok, &praw_line, ULAS_TOKMAX, isalnum)) { - } - - // TODO: actually expand here... - strncpy(line, raw_line, *n); - *n = strlen(line); - return line; -} - -int ulas_preprocline(struct ulas_preproc *pp, FILE *dst, const char *raw_line, - size_t n) { - if (n > ULAS_LINEMAX) { - ULASERR("%s: line exceeds %d (LINEMAX)\n", raw_line, ULAS_LINEMAX); - return -1; - } - assert(n <= ULAS_LINEMAX); - char line[ULAS_LINEMAX]; - const char *pline = line; - - ulas_preprocexpand(line, ULAS_LINEMAX, raw_line, &n); - const char *dirstrs[] = {"#define", "#macro", "#ifdef", "#ifndef", - "#endif", "#endmacro", NULL}; - enum ulas_ppdirs dirs[] = {ULAS_PPDIR_DEF, ULAS_PPDIR_MACRO, - ULAS_PPDIR_IFDEF, ULAS_PPDIR_IFNDEF, - ULAS_PPDIR_ENDIF, ULAS_PPDIR_ENDMACRO}; - - enum ulas_ppdirs found_dir = ULAS_PPDIR_NONE; - - char tok[ULAS_TOKMAX]; - - // check if the first token is any of the valid preproc directives - if (ulas_tokline(tok, &pline, ULAS_TOKMAX, isspace)) { - // not a preproc directive... - if (tok[0] != ULAS_TOK_PREPROC_BEGIN) { - goto found; - } - for (size_t i = 0; dirstrs[i]; i++) { - if (strncmp(dirstrs[i], tok, ULAS_TOKMAX) == 0) { - found_dir = dirs[i]; - goto found; - } - } - - ULASPANIC("Unknown preprocessor directive: %s\n", line); - return -1; - } -found: - - if (found_dir) { - // TODO: process directive - printf("%s preproc directive!\n", tok); - fputc('\0', dst); - } else { - assert(fwrite(line, 1, n, dst) == n); - } - - return 0; -} - -int ulas_preproc(FILE *dst, const char *dstname, FILE *src, - const char *srcname) { - char buf[ULAS_LINEMAX]; - memset(buf, 0, ULAS_LINEMAX); - int rc = 0; - - if (!dst || !src) { - ULASERR("[%s] Unable to read from dst or write to src!\n", srcname); - return -1; - } - - struct ulas_preproc pp = {NULL, 0, srcname, dstname}; - - while (fgets(buf, ULAS_LINEMAX, src) != NULL) { - if (ulas_preprocline(&pp, dst, buf, strlen(buf)) == -1) { - rc = -1; - goto fail; - } - } - -fail: - return rc; -} diff --git a/src/test.c b/src/test.c index a5a627c..bb88598 100644 --- a/src/test.c +++ b/src/test.c @@ -2,32 +2,33 @@ #include #include #include -#include "preproc.h" #define TESTBEGIN(name) printf("[test %s]\n", (name)); #define TESTEND(name) printf("[%s ok]\n", (name)); #define assert_tok(expected_tok, expected_ret, line, rule) \ { \ - char buf[ULAS_TOKMAX]; \ - memset(buf, 0, ULAS_TOKMAX); \ - assert(ulas_tok(buf, (line), ULAS_TOKMAX, (rule)) == (expected_ret)); \ - assert(strcmp(buf, expected_tok) == 0); \ + struct ulas_str dst = ulas_str(ULAS_TOKMAX); \ + memset(dst.buf, 0, ULAS_TOKMAX); \ + assert(ulas_tok(&dst, (line), ULAS_TOKMAX, (rule)) == (expected_ret)); \ + assert(strcmp(dst.buf, expected_tok) == 0); \ + ulas_strfree(&dst); \ } #define assert_tokline(expected_n, line, rule, ...) \ { \ const char *expect[] = __VA_ARGS__; \ size_t n = ULAS_TOKMAX; \ - char buf[n]; \ - memset(buf, 0, n); \ + struct ulas_str dst = ulas_str(n); \ + memset(dst.buf, 0, n); \ int i = 0; \ const char *pline = line; \ - while (ulas_tokline(buf, &pline, n, rule)) { \ - assert(strcmp(buf, expect[i]) == 0); \ + while (ulas_tokline(&dst, &pline, n, rule)) { \ + assert(strcmp(dst.buf, expect[i]) == 0); \ i++; \ } \ assert(i == expected_n); \ + ulas_strfree(&dst); \ } void test_tok(void) { @@ -45,6 +46,22 @@ void test_tok(void) { TESTEND("tok"); } +void test_strbuf(void) { + TESTBEGIN("strbuf"); + + struct ulas_str s = ulas_str(5); + assert(s.maxlen == 5); + assert(s.buf); + + s = ulas_strensr(&s, 10); + assert(s.maxlen == 10); + assert(s.buf); + + ulas_strfree(&s); + + TESTEND("strbuf"); +} + #define assert_preproc(expect_dst, expect_ret, input) \ { \ char dstbuf[ULAS_LINEMAX]; \ @@ -75,6 +92,7 @@ int main(int arc, char **argv) { }*/ test_tok(); + test_strbuf(); test_preproc(); return 0; diff --git a/src/ulas.c b/src/ulas.c index a6576ca..fbea85d 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -2,6 +2,7 @@ #include #include #include +#include FILE *ulasin = NULL; FILE *ulasout = NULL; @@ -49,15 +50,17 @@ int ulas_main(struct ulas_config cfg) { return 0; } -int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule) { - if (!dst || !line || n == 0) { +int ulas_tok(struct ulas_str *dst, const char *line, size_t n, + ulas_tokrule rule) { + if (!dst->buf || !line || n == 0) { return -1; } + ulas_strensr(dst, n + 1); int i = 0; int write = 0; -#define weld_tokcond (i < n - 1 && write < n - 1 && line[i]) +#define weld_tokcond (i < n && write < n && line[i]) // always skip leading terminators while (weld_tokcond && rule(line[i])) { @@ -68,17 +71,18 @@ int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule) { if (rule(line[i])) { break; } - dst[write] = line[i]; + dst->buf[write] = line[i]; i++; write++; } #undef weld_tokcond - dst[write] = '\0'; + dst->buf[write] = '\0'; return i; } -int ulas_tokline(char *dst, const char **line, size_t n, ulas_tokrule rule) { +int ulas_tokline(struct ulas_str *dst, const char **line, size_t n, + ulas_tokrule rule) { int rc = ulas_tok(dst, *line, n, rule); if (rc == -1) { return -1; @@ -86,3 +90,121 @@ int ulas_tokline(char *dst, const char **line, size_t n, ulas_tokrule rule) { *line += rc; return rc; } + +struct ulas_str ulas_str(size_t n) { + struct ulas_str str = {malloc(n), n}; + return str; +} + +struct ulas_str ulas_strensr(struct ulas_str *s, size_t maxlen) { + if (s->maxlen < maxlen) { + char *c = realloc(s->buf, maxlen); + if (!c) { + ULASPANIC("%s\n", strerror(errno)); + } + s->maxlen = maxlen; + s->buf = c; + } + + return *s; +} + +void ulas_strfree(struct ulas_str *s) { + if (s->buf) { + free(s->buf); + } +} + +char *ulas_preprocexpand(struct ulas_preproc *pp, const char *raw_line, + size_t *n) { + const char *praw_line = raw_line; + ulas_strensr(&pp->line, (*n) + 1); + + // go through all tokens, see if a define matches the token, + // if so expand it + // only expand macros if they match toks[0] though! + // otherwise memcpy the read bytes 1:1 into the new string + while (ulas_tokline(&pp->tok, &praw_line, ULAS_TOKMAX, isalnum)) { + } + + // TODO: actually expand here... + strncpy(pp->line.buf, raw_line, (*n) + 1); + *n = strlen(pp->line.buf); + return pp->line.buf; +} + +int ulas_preprocline(struct ulas_preproc *pp, FILE *dst, const char *raw_line, + size_t n) { + if (n > ULAS_LINEMAX) { + ULASERR("%s: line exceeds %d (LINEMAX)\n", raw_line, ULAS_LINEMAX); + return -1; + } + + char *line = ulas_preprocexpand(pp, raw_line, &n); + const char *pline = line; + + const char *dirstrs[] = {"#define", "#macro", "#ifdef", "#ifndef", + "#endif", "#endmacro", NULL}; + enum ulas_ppdirs dirs[] = {ULAS_PPDIR_DEF, ULAS_PPDIR_MACRO, + ULAS_PPDIR_IFDEF, ULAS_PPDIR_IFNDEF, + ULAS_PPDIR_ENDIF, ULAS_PPDIR_ENDMACRO}; + + enum ulas_ppdirs found_dir = ULAS_PPDIR_NONE; + + // check if the first token is any of the valid preproc directives + if (ulas_tokline(&pp->tok, &pline, ULAS_TOKMAX, isspace)) { + // not a preproc directive... + if (pp->tok.buf[0] != ULAS_TOK_PREPROC_BEGIN) { + goto found; + } + for (size_t i = 0; dirstrs[i]; i++) { + if (strncmp(dirstrs[i], pp->tok.buf, ULAS_TOKMAX) == 0) { + found_dir = dirs[i]; + goto found; + } + } + + ULASPANIC("Unknown preprocessor directive: %s\n", line); + return -1; + } +found: + + if (found_dir) { + // TODO: process directive + printf("%s preproc directive!\n", pp->tok.buf); + fputc('\0', dst); + } else { + assert(fwrite(line, 1, n, dst) == n); + } + + return 0; +} + +int ulas_preproc(FILE *dst, const char *dstname, FILE *src, + const char *srcname) { + char buf[ULAS_LINEMAX]; + memset(buf, 0, ULAS_LINEMAX); + int rc = 0; + + if (!dst || !src) { + ULASERR("[%s] Unable to read from dst or write to src!\n", srcname); + return -1; + } + + struct ulas_preproc pp = {NULL, 0, srcname, dstname}; + + pp.line = ulas_str(1); + pp.tok = ulas_str(1); + + while (fgets(buf, ULAS_LINEMAX, src) != NULL) { + if (ulas_preprocline(&pp, dst, buf, strlen(buf)) == -1) { + rc = -1; + goto fail; + } + } + +fail: + ulas_strfree(&pp.line); + ulas_strfree(&pp.tok); + return rc; +} -- 2.30.2