diff options
Diffstat (limited to 'src/tokenizer.hpp')
-rw-r--r-- | src/tokenizer.hpp | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp new file mode 100644 index 0000000..4978bfb --- /dev/null +++ b/src/tokenizer.hpp @@ -0,0 +1,41 @@ +#ifndef TOKENIZER_HPP +#define TOKENIZER_HPP + +#include "code_point.h" +#include "html_token.hpp" + +enum TokenizerFlag { + TokenizerFlag_None = 0, + TokenizerFlag_Emit = 1, + TokenizerFlag_IncrementPtr = 2 +}; + +enum TokenizerState { + TokenizerState_None, + TokenizerState_Data, + TokenizerState_CharacterReferenceInData, + TokenizerState_TagOpen, + TokenizerState_MarkupDeclarationOpen, + TokenizerState_EndTagOpen, + TokenizerState_TagName, + TokenizerState_BogusComment, + TokenizerState_CommentState, + TokenizerState_BeforeAttribute, + TokenizerState_SelfClosingStartTag +}; + +struct Tokenizer { + code_point_t* ptr = nullptr; + size_t length = 0; + + TokenizerState state = TokenizerState_Data; + HtmlToken last; + int flag = TokenizerFlag_None; +}; + + +Tokenizer create(code_point_t*); +HtmlToken read_next(Tokenizer*); + + +#endif |