From 4feb59d831d395369aa21d77e9b9d293125421d1 Mon Sep 17 00:00:00 2001 From: mattkae Date: Fri, 23 Jun 2023 10:25:52 -0400 Subject: Able to parse double quoted HTML attributes --- src/html_token.hpp | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'src/html_token.hpp') diff --git a/src/html_token.hpp b/src/html_token.hpp index e691d21..09a5c98 100644 --- a/src/html_token.hpp +++ b/src/html_token.hpp @@ -3,17 +3,22 @@ #include #include "code_point.h" +#include enum HtmlTokenType { HtmlTokenType_None = 0, HtmlTokenType_StartTag, HtmlTokenType_EndTag, - HtmlTokenType_Attribute, HtmlTokenType_EOF, HtmlTokenType_Character, HtmlTokenType_Length }; +struct HtmlAttribute { + std::wstring name; + std::wstring value; +}; + struct HtmlToken { HtmlTokenType type; @@ -21,19 +26,34 @@ struct HtmlToken { char character_token; std::wstring tag_name; std::wstring code_entity; + std::vector attributes; + + HtmlAttribute* active_attribute; + + code_point_t entity; void append_to_tag_name(code_point_t c) { tag_name += c; } - void append_to_code_entity(code_point_t c) { - code_entity += c; + void add_to_attribute_name(code_point_t c) { + active_attribute->name += c; + } + + void add_to_attribute_value(code_point_t c) { + active_attribute->value += c; + } + + void start_attribute() { + auto length = attributes.size(); + attributes.push_back(HtmlAttribute()); + active_attribute = &attributes[length]; } /// Transforms the code_entity into a usable value. /// Note that we are assuming that the code_entity is /// valid at this point in time. - code_point_t code_entity_to_value(bool is_hex) { + void set_code_entity_to_value(const std::wstring& code_entity, bool is_hex) { code_point_t value = 0x0000; if (is_hex) { int multiplier = 1; @@ -64,7 +84,8 @@ struct HtmlToken { multiplier *= 10; } } - return value; + + entity = value; } void print(); -- cgit v1.2.1