From a4d0eed527d5e1b3c382e88350592dafcc1b2b89 Mon Sep 17 00:00:00 2001 From: Lukas Krickl Date: Sun, 5 Nov 2023 19:27:24 +0100 Subject: [PATCH] WIP: added simple tokenizer with tests --- doc/ulas.man | 15 ++++++++++++--- include/preproc.h | 24 ++++++++++++++++++++++++ include/ulas.h | 25 +++++++++++++------------ makefile | 2 +- src/preproc.c | 4 ++++ src/test.c | 26 ++++++++++++++++++++++---- src/ulas.c | 24 +++++++++++++++++++++++- 7 files changed, 99 insertions(+), 21 deletions(-) create mode 100644 include/preproc.h create mode 100644 src/preproc.c diff --git a/doc/ulas.man b/doc/ulas.man index d168766..4951e2c 100644 --- a/doc/ulas.man +++ b/doc/ulas.man @@ -22,6 +22,12 @@ The preprocessor is a text based way of modifying the input before the assembly step. +Preprocessor directives start with a # and have to be the first non-space character in a line. +All lines that are not a preprocessor directive, or are not part of a preprocessor block (macro, or if) +will be output as-is. If a line contains the name of a preprocessot define or macro +,surrounded by token any character that is not alphanumeric or an underscore, +the define or macro will be expanded. + Withing preprocessor macros - $0 will expand to the literal argument string - $1 to $9 will expand to the comma separated arguments of the macro @@ -30,14 +36,17 @@ Withing preprocessor macros Macros: -#macro +#macro my_macro label$$: adc $1, $2 -#endm +#endmacro + +my_macro a, b -.SH ASSEMBLY SYNTAX +.SH ASSEMBLY SYNTAX + .SH EXAMPLES diff --git a/include/preproc.h b/include/preproc.h new file mode 100644 index 0000000..c5bbfc6 --- /dev/null +++ b/include/preproc.h @@ -0,0 +1,24 @@ +#ifndef PREPROC_H_ +#define PREPROC_H_ + +#include + +/* + * Preprocessor + */ + +enum ulas_ppdefs { + ULAS_PP_DEF, + ULAS_PP_MACRO, +}; + +struct ulas_ppdef { + enum ulas_ppdefs type; +}; + +/** + * Tokenize and apply the preprocessor + */ +int ulas_preproc(FILE *dst, FILE *src); + +#endif diff --git a/include/ulas.h b/include/ulas.h index 870fa2b..e50ca2e 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -72,15 +72,6 @@ struct ulas_tok { union ulas_tokdat dat; }; -enum ulas_ppdirs { - ULAS_PP_DEF, - ULAS_PP_MACRO, -}; - -struct ulas_ppdir { - enum ulas_ppdirs type; -}; - /** * Symbols */ @@ -130,11 +121,21 @@ void ulas_init(struct ulas_config cfg); int ulas_main(struct ulas_config cfg); +char *ulas_strndup(const char *src, size_t n); + /** - * Tokenize and apply the preprocessor + * A token rule returns true when a token should end + * otherwise returns false */ -int ulas_preproc(FILE *dst, FILE *src); +typedef bool (*ulas_tokrule)(char current, char prev); -char *ulas_strndup(const char *src, size_t n); +// simple tokenizer at any space char +bool ulas_tokrulespace(char current, char prev); + +// tokenisze according to pre-defined rules +// returns the amount of bytes of line that were +// consumed or -1 on error +// returns 0 when no more tokens can be read +int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule); #endif diff --git a/makefile b/makefile index 0c0e66a..3465ca5 100644 --- a/makefile +++ b/makefile @@ -20,7 +20,7 @@ TEST_BNAME=testulas BIN_INSTALL_DIR=/usr/local/bin MAN_INSTALL_DIR=/usr/local/man -_OBJ = $(MAIN) ulas.o +_OBJ = $(MAIN) ulas.o preproc.o OBJ = $(patsubst %,$(ODIR)/%,$(_OBJ)) all: bin test diff --git a/src/preproc.c b/src/preproc.c new file mode 100644 index 0000000..5ddacfe --- /dev/null +++ b/src/preproc.c @@ -0,0 +1,4 @@ +#include "preproc.h" + + +int ulas_preproc(FILE *dst, FILE *src) { return 0; } diff --git a/src/test.c b/src/test.c index d05918d..11eab36 100644 --- a/src/test.c +++ b/src/test.c @@ -1,9 +1,27 @@ #include "ulas.h" #include +#include -void null_test_success(void) { - puts("[null test]"); - puts("[null test ok]"); +#define TESTBEGIN(name) printf("[test %s]\n", (name)); +#define TESTEND(name) printf("[%s ok]\n", (name)); + +#define assert_tok(expected_tok, expected_ret, line, rule) \ + { \ + char buf[256]; \ + memset(buf, 0, 256); \ + assert(ulas_tok(buf, (line), 256, (rule)) == (expected_ret)); \ + assert(strcmp(buf, expected_tok) == 0); \ + } + +void test_tok(void) { + TESTBEGIN("tok"); + + assert_tok("test", 4, "test tokens", ulas_tokrulespace); + assert_tok("tokens", 6, "tokens", ulas_tokrulespace); + assert_tok("", 0, "", ulas_tokrulespace); + assert_tok("", -1, NULL, ulas_tokrulespace); + + TESTEND("tok"); } int main(int arc, char **argv) { @@ -13,7 +31,7 @@ int main(int arc, char **argv) { fclose(stderr); } - null_test_success(); + test_tok(); return 0; } diff --git a/src/ulas.c b/src/ulas.c index 24ed9d3..b060a9e 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -1,4 +1,5 @@ #include "ulas.h" +#include #include #include @@ -55,4 +56,25 @@ int ulas_main(struct ulas_config cfg) { return 0; } -int ulas_preproc(FILE *dst, FILE *src) {} +bool ulas_tokrulespace(char current, char prev) { return isspace(current); } + +int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule) { + if (!dst || !line || n == 0) { + return -1; + } + + int i = 0; + char prev = '\0'; + char current = '\0'; + for (i = 0; i < n - 1 && line[i]; i++) { + prev = current; + current = line[i]; + if (rule(current, prev)) { + break; + } + dst[i] = current; + } + + dst[i + 1] = '\0'; + return i; +} -- 2.30.2