Added tests for string tokens and escaping
authorLukas Krickl <lukas@krickl.dev>
Fri, 17 Nov 2023 16:07:54 +0000 (17:07 +0100)
committerLukas Krickl <lukas@krickl.dev>
Fri, 17 Nov 2023 16:07:54 +0000 (17:07 +0100)
src/test.c
src/ulas.c

index 77900832287cc37a9c3960d3f2f18489ee39c6b4..5295c94190a1c5ee177b2b7fc47a8d300c7e600c 100644 (file)
@@ -152,6 +152,17 @@ void test_preproc(void) {
     assert(tok.lit.val.intv == (expected_val));                                \
   }
 
+#define ASSERT_STR_TOTOK(expected_val, expected_rc, token)                     \
+  {                                                                            \
+    int rc = 0;                                                                \
+    struct ulas_tok tok = ulas_totok((token), strlen(token), &rc);             \
+    assert((expected_rc) == rc);                                               \
+    assert(tok.type == ULAS_TOKLITERAL);                                       \
+    assert(tok.lit.type == ULAS_STR);                                          \
+    assert(strcmp((expected_val), tok.lit.val.strv) == 0);                     \
+    free(tok.lit.val.strv);                                                    \
+  }
+
 void test_totok(void) {
   TESTBEGIN("totok");
 
@@ -171,6 +182,11 @@ void test_totok(void) {
   // unterminated escape
   ASSERT_INT_TOTOK('\n', -1, "'\\n");
 
+  // string token
+  ASSERT_STR_TOTOK("test", 0, "\"test\"");
+  
+  ASSERT_STR_TOTOK("test\n\"123\"", 0, "\"test\\n\\\"123\\\"\"");
+
   TESTEND("totok");
 }
 
index 763079b23f91f795150224433a84e767018e4d5a..5ddb9bd3adfecd38d18da87a241b409bc550b397 100644 (file)
@@ -258,11 +258,53 @@ struct ulas_tok ulas_totok(char *buf, unsigned long n, int *rc) {
   buf++;
 
   switch (first) {
+  case '+':
+  case '-':
+  case '*':
+  case '/':
+  case '!':
+  case '~':
+  case '|':
+  case '&':
+  case '%':
+  case '(':
+  case ')':
+  case '[':
+  case ']':
+  case ',':
   case ';':
+    // single char tokens
     tok.type = first;
     goto end;
   case '"':
     // string
+    tok.type = ULAS_TOKLITERAL;
+    tok.lit.type = ULAS_STR;
+
+    // FIXME: this likely mallocs a few extra bytes
+    // but honestly its probably fine
+    tok.lit.val.strv = malloc(n * sizeof(char) + 1);
+    memset(tok.lit.val.strv, 0, n);
+
+    long i = 0;
+    while (*buf && *buf != '\"') {
+      if (*buf == '\\') {
+        buf++;
+        tok.lit.val.strv[i] = ulas_unescape(*buf, rc);
+      } else {
+        tok.lit.val.strv[i] = *buf;
+      }
+      i++;
+      buf++;
+    }
+    tok.lit.val.strv[i] = '\0';
+
+    if (*buf != '\"') {
+      *rc = -1;
+      ULASERR("Unterminated string sequence\n");
+      goto end;
+    }
+    buf++;
     break;
   default:
     if (isdigit(first)) {