summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/html_token.cpp2
-rw-r--r--src/html_token.hpp2
-rw-r--r--src/tokenizer.cpp19
-rw-r--r--src/tokenizer.hpp2
4 files changed, 16 insertions, 9 deletions
diff --git a/src/html_token.cpp b/src/html_token.cpp
index 8589ba8..1d0952d 100644
--- a/src/html_token.cpp
+++ b/src/html_token.cpp
@@ -18,7 +18,7 @@ void HtmlToken::print() {
break;
case HtmlTokenType_StartTag:
case HtmlTokenType_EndTag:
- logger_info("%s, %s", name, tag_name.c_str());
+ logger_info("%s, %S", name, tag_name.c_str());
break;
default:
logger_info("%s", name);
diff --git a/src/html_token.hpp b/src/html_token.hpp
index 3d848d9..e691d21 100644
--- a/src/html_token.hpp
+++ b/src/html_token.hpp
@@ -37,7 +37,7 @@ struct HtmlToken {
code_point_t value = 0x0000;
if (is_hex) {
int multiplier = 1;
- for (size_t i = code_entity.size() - 1; i >= 0; i--) {
+ for (size_t i = code_entity.size() - 1; i > 0; i--) {
auto c = code_entity[i];
if (c >= CodePoints::LOWERCASE_A) { // [a, z]
c = 10 + c - CodePoints::LOWERCASE_A;
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 9931d59..dc0b8d7 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -28,6 +28,12 @@ inline void unconsume_previous(Tokenizer* tokenizer) {
tokenizer->ptr--;
}
+inline void emit_character(Tokenizer* tokenizer, code_point_t c) {
+ tokenizer->last.type = HtmlTokenType_Character;
+ tokenizer->last.character_token = c;
+ tokenizer->flag |= TokenizerFlag_Emit;
+}
+
/// https://dev.w3.org/html5/spec-LC/tokenization.html#data-state
inline void data_state(Tokenizer* tokenizer) {
@@ -45,9 +51,7 @@ inline void data_state(Tokenizer* tokenizer) {
break;
default:
// TODO: @Error If null, throw an error
- tokenizer->last.type = HtmlTokenType_Character;
- tokenizer->last.character_token = *tokenizer->ptr;
- tokenizer->flag |= TokenizerFlag_Emit;
+ emit_character(tokenizer, *tokenizer->ptr);
break;
}
}
@@ -80,8 +84,11 @@ inline void tag_open_state(Tokenizer* tokenizer) {
tokenizer->state = TokenizerState_BogusComment; // TODO:
}
else {
+ emit_character(tokenizer, CodePoints::LESS_THAN_SIGN);
tokenizer->state = TokenizerState_Data;
+ tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement;
}
+ break;
}
}
@@ -114,7 +121,7 @@ inline void tag_name_state(Tokenizer* tokenizer) {
else if (c == EOF) {
// TODO: @Error
tokenizer->state = TokenizerState_Data;
- tokenizer->flag = tokenizer->flag & TokenizerFlag_DecrementPtr;
+ tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement;
}
else {
tokenizer->last.append_to_tag_name(c);
@@ -151,7 +158,7 @@ inline void end_tag_open_state(Tokenizer* tokenizer) {
tokenizer->last.type = HtmlTokenType_Character;
tokenizer->last.character_token = CodePoints::SOLIDUS;
- tokenizer->flag = tokenizer->flag & TokenizerFlag_DecrementPtr;
+ tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement;
}
else if (c == CodePoints::NULL_CHAR) {
// TODO: @Error
@@ -286,7 +293,7 @@ HtmlToken read_next(Tokenizer* tokenizer) {
exit(1);
}
- if ((tokenizer->flag & TokenizerFlag_DecrementPtr) == 0) {
+ if ((tokenizer->flag & TokenizerFlag_NoIncrement) == 0) {
tokenizer->ptr++;
}
diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp
index 4cd9245..8b844cd 100644
--- a/src/tokenizer.hpp
+++ b/src/tokenizer.hpp
@@ -7,7 +7,7 @@
enum TokenizerFlag {
TokenizerFlag_None = 0,
TokenizerFlag_Emit = 1,
- TokenizerFlag_DecrementPtr = 2
+ TokenizerFlag_NoIncrement = 2
};
enum TokenizerState {