summaryrefslogtreecommitdiff
path: root/src/html_token.hpp
blob: 09a5c985cce723d6ea77295460243549df838c0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#ifndef HTML_TOKEN_HPP
#define HTML_TOKEN_HPP

#include <string>
#include "code_point.h"
#include <vector>

enum HtmlTokenType {
    HtmlTokenType_None = 0,
    HtmlTokenType_StartTag,
    HtmlTokenType_EndTag,
    HtmlTokenType_EOF,
    HtmlTokenType_Character,
    HtmlTokenType_Length
};

struct HtmlAttribute {
    std::wstring name;
    std::wstring value;
};

struct HtmlToken {
    HtmlTokenType type;

    // TODO: @Performance
    char character_token;
    std::wstring tag_name;
    std::wstring code_entity;
    std::vector<HtmlAttribute> attributes;

    HtmlAttribute* active_attribute;
    
    code_point_t entity;

    void append_to_tag_name(code_point_t c) {
        tag_name += c;
    }

    void add_to_attribute_name(code_point_t c) {
        active_attribute->name += c;
    }

    void add_to_attribute_value(code_point_t c) {
        active_attribute->value += c;
    }

    void start_attribute() {
        auto length = attributes.size();
        attributes.push_back(HtmlAttribute());
        active_attribute = &attributes[length];
    }

    /// Transforms the code_entity into a usable value.
    /// Note that we are assuming that the code_entity is
    /// valid at this point in time.
    void set_code_entity_to_value(const std::wstring& code_entity, bool is_hex) {
        code_point_t value = 0x0000;
        if (is_hex) {
            int multiplier = 1;
            for (size_t i = code_entity.size() - 1; i > 0; i--) {
                auto c = code_entity[i];
                if (c >= CodePoints::LOWERCASE_A) { // [a, z]
                    c = 10 + c - CodePoints::LOWERCASE_A;
                }
                else if (c >= CodePoints::UPPERCASE_A) { // [A, Z]
                    c = 10 + c - CodePoints::UPPERCASE_A;
                }
                else { // [0, 9]
                    c = c - CodePoints::DIGIT_ZERO; // Now it is between 0 and 9
                }

                // Now we have c in decimal, let's convert it to the final value.
                c = c * multiplier;
                value += c;
                multiplier *= 16;
            }
        }
        else {
            int multiplier = 1;
            for (int i = code_entity.size() - 1; i >= 0; i--) {
                auto c = code_entity[i];
                c = c - CodePoints::DIGIT_ZERO; // Now it is between 0 and 9
                value += c * multiplier;
                multiplier *= 10;
            }
        }
        
        entity = value;
    }

    void print();
    void reset();
};

#endif