summaryrefslogtreecommitdiff
path: root/src/tokenizer.hpp
blob: 4978bfbc33e0285e2dffee961168606a2a213ada (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#ifndef TOKENIZER_HPP
#define TOKENIZER_HPP

#include "code_point.h"
#include "html_token.hpp"

enum TokenizerFlag {
    TokenizerFlag_None = 0,
    TokenizerFlag_Emit = 1,
    TokenizerFlag_IncrementPtr = 2
};

enum TokenizerState {
    TokenizerState_None,
    TokenizerState_Data,
    TokenizerState_CharacterReferenceInData,
    TokenizerState_TagOpen,
    TokenizerState_MarkupDeclarationOpen,
    TokenizerState_EndTagOpen,
    TokenizerState_TagName,
    TokenizerState_BogusComment,
    TokenizerState_CommentState,
    TokenizerState_BeforeAttribute,
    TokenizerState_SelfClosingStartTag
};

struct Tokenizer {
    code_point_t* ptr = nullptr;
    size_t length = 0;

    TokenizerState state = TokenizerState_Data;
    HtmlToken last;
    int flag = TokenizerFlag_None;
};


Tokenizer create(code_point_t*);
HtmlToken read_next(Tokenizer*);


#endif