summaryrefslogtreecommitdiff
path: root/src/tokenizer.hpp
diff options
context:
space:
mode:
authormattkae <mattkae@protonmail.com>2023-04-23 20:23:54 -0400
committermattkae <mattkae@protonmail.com>2023-04-23 20:23:54 -0400
commit4058f9b1704322f8185136c2558c2ab96a4d835c (patch)
treef764007c2cdd0f41372d66dcf02ccde26509b839 /src/tokenizer.hpp
Initial commit with a working parser
Diffstat (limited to 'src/tokenizer.hpp')
-rw-r--r--src/tokenizer.hpp41
1 files changed, 41 insertions, 0 deletions
diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp
new file mode 100644
index 0000000..4978bfb
--- /dev/null
+++ b/src/tokenizer.hpp
@@ -0,0 +1,41 @@
+#ifndef TOKENIZER_HPP
+#define TOKENIZER_HPP
+
+#include "code_point.h"
+#include "html_token.hpp"
+
+enum TokenizerFlag {
+ TokenizerFlag_None = 0,
+ TokenizerFlag_Emit = 1,
+ TokenizerFlag_IncrementPtr = 2
+};
+
+enum TokenizerState {
+ TokenizerState_None,
+ TokenizerState_Data,
+ TokenizerState_CharacterReferenceInData,
+ TokenizerState_TagOpen,
+ TokenizerState_MarkupDeclarationOpen,
+ TokenizerState_EndTagOpen,
+ TokenizerState_TagName,
+ TokenizerState_BogusComment,
+ TokenizerState_CommentState,
+ TokenizerState_BeforeAttribute,
+ TokenizerState_SelfClosingStartTag
+};
+
+struct Tokenizer {
+ code_point_t* ptr = nullptr;
+ size_t length = 0;
+
+ TokenizerState state = TokenizerState_Data;
+ HtmlToken last;
+ int flag = TokenizerFlag_None;
+};
+
+
+Tokenizer create(code_point_t*);
+HtmlToken read_next(Tokenizer*);
+
+
+#endif