From: Lukas Krickl Date: Mon, 6 Nov 2023 05:38:08 +0000 (+0100) Subject: WIP: tokenizer X-Git-Url: https://git.krickl.dev/?a=commitdiff_plain;h=291b51f36dca3552d943e0c8229c6ba69aab7efc;p=ulas%2F.git WIP: tokenizer --- diff --git a/include/preproc.h b/include/preproc.h index c5bbfc6..7d069a9 100644 --- a/include/preproc.h +++ b/include/preproc.h @@ -1,6 +1,7 @@ #ifndef PREPROC_H_ #define PREPROC_H_ +#include #include /* @@ -14,11 +15,20 @@ enum ulas_ppdefs { struct ulas_ppdef { enum ulas_ppdefs type; + bool undef; +}; + +struct ulas_preproc { + struct ulas_ppdef *defs; + size_t defslen; + + const char *srcname; + const char *dstname; }; /** * Tokenize and apply the preprocessor */ -int ulas_preproc(FILE *dst, FILE *src); +int ulas_preproc(FILE *dst, const char *dstname, FILE *src, const char *srcname); #endif diff --git a/include/ulas.h b/include/ulas.h index ba8c24c..ef7148b 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -8,6 +8,7 @@ #define ULAS_PATHMAX 4096 #define ULAS_LINEMAX 4096 +#define ULAS_TOKMAX 512 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MIN(x, y) (((x) < (y)) ? (x) : (y)) @@ -142,4 +143,9 @@ bool ulas_tokrulespace(char current); // returns 0 when no more tokens can be read int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule); +// tokenizes an entire line +char **ulas_tokline(const char *line, size_t *n, ulas_tokrule rule); + +void ulas_toklinefree(char **data, size_t n); + #endif diff --git a/src/preproc.c b/src/preproc.c index a469713..9d6d619 100644 --- a/src/preproc.c +++ b/src/preproc.c @@ -1,17 +1,39 @@ #include "preproc.h" #include "ulas.h" +#include +#include -int ulas_preproc(FILE *dst, FILE *src) { + +int ulas_preprocline(struct ulas_preproc *pp, FILE *dst, const char *line, + size_t n) { + + // check if the first token is any of the valid preproc directives + + assert(fwrite(line, 1, n, dst) == n); + + return 0; +} + +int ulas_preproc(FILE *dst, const char *dstname, FILE *src, + const char *srcname) { char buf[ULAS_LINEMAX]; memset(buf, 0, ULAS_LINEMAX); + int rc = 0; if (!dst || !src) { - ULASERR("[preproc] Unable to read from dst or write to src!\n"); + ULASERR("[%s] Unable to read from dst or write to src!\n", srcname); return -1; } - while (fgets(buf, ULAS_LINEMAX, src) == NULL) { + struct ulas_preproc pp = {NULL, 0, srcname, dstname}; + + while (fgets(buf, ULAS_LINEMAX, src) != NULL) { + if (ulas_preprocline(&pp, dst, buf, strlen(buf)) == -1) { + rc = -1; + goto fail; + } } - return 0; +fail: + return rc; } diff --git a/src/test.c b/src/test.c index e17777c..280dd44 100644 --- a/src/test.c +++ b/src/test.c @@ -8,12 +8,15 @@ #define assert_tok(expected_tok, expected_ret, line, rule) \ { \ - char buf[256]; \ - memset(buf, 0, 256); \ - assert(ulas_tok(buf, (line), 256, (rule)) == (expected_ret)); \ + char buf[ULAS_TOKMAX]; \ + memset(buf, 0, ULAS_TOKMAX); \ + assert(ulas_tok(buf, (line), ULAS_TOKMAX, (rule)) == (expected_ret)); \ assert(strcmp(buf, expected_tok) == 0); \ } +#define assert_tokline(expected_toks, expected_n, line, rule) \ + {} + void test_tok(void) { TESTBEGIN("tok"); @@ -23,6 +26,9 @@ void test_tok(void) { assert_tok("", 0, "", ulas_tokrulespace); assert_tok("", -1, NULL, ulas_tokrulespace); + assert_tokline(({"test", "tokens", "with", "line"}), 4, + " test tokens with line", ulas_tokrulespace); + TESTEND("tok"); } @@ -30,18 +36,18 @@ void test_tok(void) { { \ char dstbuf[ULAS_LINEMAX]; \ memset(dstbuf, 0, ULAS_LINEMAX); \ - FILE *src = fmemopen((input), strlen((input)), "r"); \ - FILE *dst = fmemopen(dstbuf, ULAS_LINEMAX, "w"); \ - assert(ulas_preproc(dst, src) == (expect_ret)); \ - assert(strcmp(dstbuf, (expect_dst)) == 0); \ + FILE *src = fmemopen((input), strlen((input)), "re"); \ + FILE *dst = fmemopen(dstbuf, ULAS_LINEMAX, "we"); \ + assert(ulas_preproc(dst, "testdst", src, "testsrc") == (expect_ret)); \ fclose(src); \ fclose(dst); \ + assert(strcmp(dstbuf, (expect_dst)) == 0); \ } void test_preproc(void) { TESTBEGIN("preproc"); - assert_preproc("", 0, "test"); + assert_preproc("test", 0, "test"); TESTEND("preproc"); } diff --git a/src/ulas.c b/src/ulas.c index 3464a9b..a80853c 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -86,3 +86,44 @@ int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule) { dst[write + 1] = '\0'; return i; } + +char **ulas_tokline(const char *line, size_t *n, ulas_tokrule rule) { + char buf[ULAS_TOKMAX]; + + char **dst = NULL; + *n = 0; + + int tokrc = 0; + int read = 0; + while ((tokrc = ulas_tok(buf, line + read, ULAS_TOKMAX, rule)) > 0) { + if (tokrc == -1) { + goto fail; + } + read += tokrc; + + *n = *n + 1; + char **newdst = realloc(dst, sizeof(char *) * (*n)); + if (!newdst) { + goto fail; + } + dst = newdst; + + dst[*n - 1] = strndup(buf, ULAS_TOKMAX); + } + + return dst; +fail: + ulas_toklinefree(dst, *n); + return NULL; +} + +void ulas_toklinefree(char **data, size_t n) { + if (!data) { + return; + } + for (size_t i = 0; i < n; i++) { + free(data[n]); + } + + free(data); +}