#ifndef HTML_TOKEN_HPP #define HTML_TOKEN_HPP #include #include "code_point.h" #include enum HtmlTokenType { HtmlTokenType_None = 0, HtmlTokenType_StartTag, HtmlTokenType_EndTag, HtmlTokenType_EOF, HtmlTokenType_Character, HtmlTokenType_Length }; struct HtmlAttribute { std::wstring name; std::wstring value; }; struct HtmlToken { HtmlTokenType type; // TODO: @Performance char character_token; std::wstring tag_name; std::wstring code_entity; std::vector attributes; HtmlAttribute* active_attribute; code_point_t entity; void append_to_tag_name(code_point_t c) { tag_name += c; } void add_to_attribute_name(code_point_t c) { active_attribute->name += c; } void add_to_attribute_value(code_point_t c) { active_attribute->value += c; } void start_attribute() { auto length = attributes.size(); attributes.push_back(HtmlAttribute()); active_attribute = &attributes[length]; } /// Transforms the code_entity into a usable value. /// Note that we are assuming that the code_entity is /// valid at this point in time. void set_code_entity_to_value(const std::wstring& code_entity, bool is_hex) { code_point_t value = 0x0000; if (is_hex) { int multiplier = 1; for (size_t i = code_entity.size() - 1; i > 0; i--) { auto c = code_entity[i]; if (c >= CodePoints::LOWERCASE_A) { // [a, z] c = 10 + c - CodePoints::LOWERCASE_A; } else if (c >= CodePoints::UPPERCASE_A) { // [A, Z] c = 10 + c - CodePoints::UPPERCASE_A; } else { // [0, 9] c = c - CodePoints::DIGIT_ZERO; // Now it is between 0 and 9 } // Now we have c in decimal, let's convert it to the final value. c = c * multiplier; value += c; multiplier *= 16; } } else { int multiplier = 1; for (int i = code_entity.size() - 1; i >= 0; i--) { auto c = code_entity[i]; c = c - CodePoints::DIGIT_ZERO; // Now it is between 0 and 9 value += c * multiplier; multiplier *= 10; } } entity = value; } void print(); void reset(); }; #endif