#ifndef HTML_TOKEN_HPP #define HTML_TOKEN_HPP #include #include "code_point.h" enum HtmlTokenType { HtmlTokenType_None = 0, HtmlTokenType_StartTag, HtmlTokenType_EndTag, HtmlTokenType_Attribute, HtmlTokenType_EOF, HtmlTokenType_Character, HtmlTokenType_Length }; struct HtmlToken { HtmlTokenType type; // TODO: @Performance char character_token; std::wstring tag_name; std::wstring code_entity; void append_to_tag_name(code_point_t c) { tag_name += c; } void append_to_code_entity(code_point_t c) { code_entity += c; } /// Transforms the code_entity into a usable value. /// Note that we are assuming that the code_entity is /// valid at this point in time. code_point_t code_entity_to_value(bool is_hex) { code_point_t value = 0x0000; if (is_hex) { int multiplier = 1; for (size_t i = code_entity.size() - 1; i > 0; i--) { auto c = code_entity[i]; if (c >= CodePoints::LOWERCASE_A) { // [a, z] c = 10 + c - CodePoints::LOWERCASE_A; } else if (c >= CodePoints::UPPERCASE_A) { // [A, Z] c = 10 + c - CodePoints::UPPERCASE_A; } else { // [0, 9] c = c - CodePoints::DIGIT_ZERO; // Now it is between 0 and 9 } // Now we have c in decimal, let's convert it to the final value. c = c * multiplier; value += c; multiplier *= 16; } } else { int multiplier = 1; for (int i = code_entity.size() - 1; i >= 0; i--) { auto c = code_entity[i]; c = c - CodePoints::DIGIT_ZERO; // Now it is between 0 and 9 value += c * multiplier; multiplier *= 10; } } return value; } void print(); void reset(); }; #endif