From 4058f9b1704322f8185136c2558c2ab96a4d835c Mon Sep 17 00:00:00 2001 From: mattkae Date: Sun, 23 Apr 2023 20:23:54 -0400 Subject: Initial commit with a working parser --- src/tokenizer.hpp | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/tokenizer.hpp (limited to 'src/tokenizer.hpp') diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp new file mode 100644 index 0000000..4978bfb --- /dev/null +++ b/src/tokenizer.hpp @@ -0,0 +1,41 @@ +#ifndef TOKENIZER_HPP +#define TOKENIZER_HPP + +#include "code_point.h" +#include "html_token.hpp" + +enum TokenizerFlag { + TokenizerFlag_None = 0, + TokenizerFlag_Emit = 1, + TokenizerFlag_IncrementPtr = 2 +}; + +enum TokenizerState { + TokenizerState_None, + TokenizerState_Data, + TokenizerState_CharacterReferenceInData, + TokenizerState_TagOpen, + TokenizerState_MarkupDeclarationOpen, + TokenizerState_EndTagOpen, + TokenizerState_TagName, + TokenizerState_BogusComment, + TokenizerState_CommentState, + TokenizerState_BeforeAttribute, + TokenizerState_SelfClosingStartTag +}; + +struct Tokenizer { + code_point_t* ptr = nullptr; + size_t length = 0; + + TokenizerState state = TokenizerState_Data; + HtmlToken last; + int flag = TokenizerFlag_None; +}; + + +Tokenizer create(code_point_t*); +HtmlToken read_next(Tokenizer*); + + +#endif -- cgit v1.2.1