summaryrefslogtreecommitdiff
path: root/src/tokenizer.hpp
blob: e2c17f999bf8a6a3d6b3947e4bec3e504e7baca8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#ifndef TOKENIZER_HPP
#define TOKENIZER_HPP

#include "code_point.h"
#include "html_token.hpp"

enum TokenizerFlag {
    TokenizerFlag_None = 0,

    /// When set, the tokenizer will emit the HtmlToken stored in "last".
    TokenizerFlag_Emit = 1,

    /// When set, the tokenizer will not increment the pointer when it attempts its next read.
    TokenizerFlag_NoIncrement = 2
};

enum TokenizerState {
    TokenizerState_None,
    TokenizerState_Data,
    TokenizerState_CharacterReferenceInData,
    TokenizerState_TagOpen,
    TokenizerState_MarkupDeclarationOpen,
    TokenizerState_EndTagOpen,
    TokenizerState_TagName,
    TokenizerState_BogusComment,
    TokenizerState_CommentState,
    TokenizerState_BeforeAttributeName,
    TokenizerState_AttributeNameState,
    TokenizerState_AfterAttributeNameState,
    TokenizerState_BeforeAttributeValueState,
    TokenizerState_AttributeValueUnquoted,
    TokenizerState_AttributeValueDoubleQuoted,
    TokenizerState_AttributeValueSingleQuoted,
    TokenizerState_AfterAttributeValueQuoted,
    TokenizerState_SelfClosingStartTag
};

struct Tokenizer {
    code_point_t* ptr = nullptr;
    size_t length = 0;

    TokenizerState state = TokenizerState_Data;
    HtmlToken last;
    int flag = TokenizerFlag_None;
};


Tokenizer create(code_point_t*);
HtmlToken read_next(Tokenizer*);


#endif