#ifndef TOKENIZER_HPP #define TOKENIZER_HPP #include "code_point.h" #include "html_token.hpp" enum TokenizerFlag { TokenizerFlag_None = 0, /// When set, the tokenizer will emit the HtmlToken stored in "last". TokenizerFlag_Emit = 1, /// When set, the tokenizer will not increment the pointer when it attempts its next read. TokenizerFlag_NoIncrement = 2 }; enum TokenizerState { TokenizerState_None, TokenizerState_Data, TokenizerState_CharacterReferenceInData, TokenizerState_TagOpen, TokenizerState_MarkupDeclarationOpen, TokenizerState_EndTagOpen, TokenizerState_TagName, TokenizerState_BogusComment, TokenizerState_CommentState, TokenizerState_BeforeAttributeName, TokenizerState_AttributeNameState, TokenizerState_AfterAttributeNameState, TokenizerState_BeforeAttributeValueState, TokenizerState_AttributeValueUnquoted, TokenizerState_AttributeValueDoubleQuoted, TokenizerState_AttributeValueSingleQuoted, TokenizerState_AfterAttributeValueQuoted, TokenizerState_SelfClosingStartTag }; struct Tokenizer { code_point_t* ptr = nullptr; size_t length = 0; TokenizerState state = TokenizerState_Data; HtmlToken last; int flag = TokenizerFlag_None; }; Tokenizer create(code_point_t*); HtmlToken read_next(Tokenizer*); #endif