1 files changed, 46 insertions, 11 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 30ca6bd..8cf9b31 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -1,3 +1,4 @@
+// https://dev.w3.org/html5/spec-LC/tokenization.html#tokenization
 #include "tokenizer.hpp"
 #include "code_point.h"
 #include "html_token.hpp"
@@ -37,14 +38,30 @@ namespace CodePoints {
     const code_point_t UPPERCASE_X = 0x0058;
     const code_point_t DIGIT_ZERO = 0x0030;
     const code_point_t DIGIT_NINE = 0x0039;
+
+    inline bool is_decimal(code_point_t c) {
+        return c >= CodePoints::DIGIT_ZERO && c <= CodePoints::DIGIT_NINE;
+    }
+
+    inline bool is_hex(code_point_t c) {
+        return (c >= CodePoints::DIGIT_ZERO && c <= CodePoints::DIGIT_NINE)
+            || (c >= CodePoints::LOWERCASE_A && c <= CodePoints::LOWERCASE_F)
+            || (c >= CodePoints::UPPERCASE_A && c <= CodePoints::UPPERCASE_F);
+    }
 };
 
-// https://dev.w3.org/html5/spec-LC/tokenization.html#tokenization
+
+// Helpers
+
 
 inline void consume_next(Tokenizer* tokenizer) {
     tokenizer->ptr++;
 }
 
+inline void unconsume_previous(Tokenizer* tokenizer) {
+    tokenizer->ptr--;
+}
+
 
 /// https://dev.w3.org/html5/spec-LC/tokenization.html#data-state
 inline void data_state(Tokenizer* tokenizer) {
@@ -202,19 +219,37 @@ inline void try_consume_character_reference(Tokenizer* tokenizer) {
         consume_next(tokenizer);
         c = *tokenizer->ptr;
 
-        bool is_hex = false; // If set to true, we should interpret within the range 0 to F as hex, otherwise 0 to 9 as decimal
-        switch (c) {
-        case CodePoints::UPPERCASE_X:
-        case CodePoints::LOWERCASE_X: {
+        bool none_match_range = false;
+        code_point_t value = 0x0000;
+        if (c == CodePoints::UPPERCASE_X || c == CodePoints::LOWERCASE_X) {
             consume_next(tokenizer);
-            is_hex = true;
-            break;
-        }
-        default: {
-            is_hex = false;
-            break;
+            c = *tokenizer->ptr;
+
+            if (!CodePoints::is_hex(c)) {
+                unconsume_previous(tokenizer); // X
+                unconsume_previous(tokenizer); // Number sign
+                // TODO: @Error parse error
+                return;
+            }
+
+            while (CodePoints::is_hex(c)) {
+                consume_next(tokenizer);
+                c = *tokenizer->ptr;
+            }
         }
+        else {
+            if (!CodePoints::is_decimal(c)) {
+                unconsume_previous(tokenizer); // Number sign
+                // TODO: @Error parse error
+                return;
+            }
+            
+            while (CodePoints::is_decimal(c)) {
+                consume_next(tokenizer);
+                c = *tokenizer->ptr;
+            }
         }
+
         break;
     }
     }