From c68241b648927d317c1a0030a0986c6365489d27 Mon Sep 17 00:00:00 2001 From: Lukas Krickl Date: Sun, 5 Nov 2023 19:42:40 +0100 Subject: [PATCH] The tokenizer now skips leading terminator characters --- include/ulas.h | 4 ++-- src/test.c | 1 + src/ulas.c | 19 ++++++++++++------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/ulas.h b/include/ulas.h index e50ca2e..b7ff99b 100644 --- a/include/ulas.h +++ b/include/ulas.h @@ -127,10 +127,10 @@ char *ulas_strndup(const char *src, size_t n); * A token rule returns true when a token should end * otherwise returns false */ -typedef bool (*ulas_tokrule)(char current, char prev); +typedef bool (*ulas_tokrule)(char current); // simple tokenizer at any space char -bool ulas_tokrulespace(char current, char prev); +bool ulas_tokrulespace(char current); // tokenisze according to pre-defined rules // returns the amount of bytes of line that were diff --git a/src/test.c b/src/test.c index 11eab36..da84d42 100644 --- a/src/test.c +++ b/src/test.c @@ -17,6 +17,7 @@ void test_tok(void) { TESTBEGIN("tok"); assert_tok("test", 4, "test tokens", ulas_tokrulespace); + assert_tok("test", 6, " test tokens", ulas_tokrulespace); assert_tok("tokens", 6, "tokens", ulas_tokrulespace); assert_tok("", 0, "", ulas_tokrulespace); assert_tok("", -1, NULL, ulas_tokrulespace); diff --git a/src/ulas.c b/src/ulas.c index b060a9e..6adbe18 100644 --- a/src/ulas.c +++ b/src/ulas.c @@ -56,7 +56,7 @@ int ulas_main(struct ulas_config cfg) { return 0; } -bool ulas_tokrulespace(char current, char prev) { return isspace(current); } +bool ulas_tokrulespace(char current) { return isspace(current); } int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule) { if (!dst || !line || n == 0) { @@ -64,17 +64,22 @@ int ulas_tok(char *dst, const char *line, size_t n, ulas_tokrule rule) { } int i = 0; - char prev = '\0'; + int write = 0; char current = '\0'; - for (i = 0; i < n - 1 && line[i]; i++) { - prev = current; + + // always skip leading terminators + while (line[i] && i < n - 1 && rule(line[i])) { + i++; + } + + for (; i < n - 1 && write < n - 1 && line[i]; i++, write++) { current = line[i]; - if (rule(current, prev)) { + if (rule(current)) { break; } - dst[i] = current; + dst[write] = current; } - dst[i + 1] = '\0'; + dst[write + 1] = '\0'; return i; } -- 2.30.2