diff options
author | mattkae <mattkae@protonmail.com> | 2023-04-23 20:23:54 -0400 |
---|---|---|
committer | mattkae <mattkae@protonmail.com> | 2023-04-23 20:23:54 -0400 |
commit | 4058f9b1704322f8185136c2558c2ab96a4d835c (patch) | |
tree | f764007c2cdd0f41372d66dcf02ccde26509b839 /src/tokenizer.hpp |
Initial commit with a working parser
Diffstat (limited to 'src/tokenizer.hpp')
-rw-r--r-- | src/tokenizer.hpp | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp new file mode 100644 index 0000000..4978bfb --- /dev/null +++ b/src/tokenizer.hpp @@ -0,0 +1,41 @@ +#ifndef TOKENIZER_HPP +#define TOKENIZER_HPP + +#include "code_point.h" +#include "html_token.hpp" + +enum TokenizerFlag { + TokenizerFlag_None = 0, + TokenizerFlag_Emit = 1, + TokenizerFlag_IncrementPtr = 2 +}; + +enum TokenizerState { + TokenizerState_None, + TokenizerState_Data, + TokenizerState_CharacterReferenceInData, + TokenizerState_TagOpen, + TokenizerState_MarkupDeclarationOpen, + TokenizerState_EndTagOpen, + TokenizerState_TagName, + TokenizerState_BogusComment, + TokenizerState_CommentState, + TokenizerState_BeforeAttribute, + TokenizerState_SelfClosingStartTag +}; + +struct Tokenizer { + code_point_t* ptr = nullptr; + size_t length = 0; + + TokenizerState state = TokenizerState_Data; + HtmlToken last; + int flag = TokenizerFlag_None; +}; + + +Tokenizer create(code_point_t*); +HtmlToken read_next(Tokenizer*); + + +#endif |