1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#ifndef TOKENIZER_HPP
#define TOKENIZER_HPP
#include "code_point.h"
#include "html_token.hpp"
enum TokenizerFlag {
TokenizerFlag_None = 0,
/// When set, the tokenizer will emit the HtmlToken stored in "last".
TokenizerFlag_Emit = 1,
/// When set, the tokenizer will not increment the pointer when it attempts its next read.
TokenizerFlag_NoIncrement = 2
};
enum TokenizerState {
TokenizerState_None,
TokenizerState_Data,
TokenizerState_CharacterReferenceInData,
TokenizerState_TagOpen,
TokenizerState_MarkupDeclarationOpen,
TokenizerState_EndTagOpen,
TokenizerState_TagName,
TokenizerState_BogusComment,
TokenizerState_CommentState,
TokenizerState_BeforeAttributeName,
TokenizerState_AttributeNameState,
TokenizerState_AfterAttributeNameState,
TokenizerState_BeforeAttributeValueState,
TokenizerState_AttributeValueUnquoted,
TokenizerState_AttributeValueDoubleQuoted,
TokenizerState_AttributeValueSingleQuoted,
TokenizerState_AfterAttributeValueQuoted,
TokenizerState_SelfClosingStartTag
};
struct Tokenizer {
code_point_t* ptr = nullptr;
size_t length = 0;
TokenizerState state = TokenizerState_Data;
HtmlToken last;
int flag = TokenizerFlag_None;
};
Tokenizer create(code_point_t*);
HtmlToken read_next(Tokenizer*);
#endif
|