summaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
authormattkae <mattkae@protonmail.com>2023-06-22 13:48:11 -0400
committermattkae <mattkae@protonmail.com>2023-06-22 13:48:11 -0400
commit29e03ef74a814cb31a0ae53192e25cc75b638256 (patch)
treee992398b4fba704859653f615ba40aab6e4eee94 /src/tokenizer.cpp
parentd53f2e7107cf63669b705c3abf08c129eeb0315e (diff)
Handling less than signs in plain text
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 9931d59..dc0b8d7 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -28,6 +28,12 @@ inline void unconsume_previous(Tokenizer* tokenizer) {
tokenizer->ptr--;
}
+inline void emit_character(Tokenizer* tokenizer, code_point_t c) {
+ tokenizer->last.type = HtmlTokenType_Character;
+ tokenizer->last.character_token = c;
+ tokenizer->flag |= TokenizerFlag_Emit;
+}
+
/// https://dev.w3.org/html5/spec-LC/tokenization.html#data-state
inline void data_state(Tokenizer* tokenizer) {
@@ -45,9 +51,7 @@ inline void data_state(Tokenizer* tokenizer) {
break;
default:
// TODO: @Error If null, throw an error
- tokenizer->last.type = HtmlTokenType_Character;
- tokenizer->last.character_token = *tokenizer->ptr;
- tokenizer->flag |= TokenizerFlag_Emit;
+ emit_character(tokenizer, *tokenizer->ptr);
break;
}
}
@@ -80,8 +84,11 @@ inline void tag_open_state(Tokenizer* tokenizer) {
tokenizer->state = TokenizerState_BogusComment; // TODO:
}
else {
+ emit_character(tokenizer, CodePoints::LESS_THAN_SIGN);
tokenizer->state = TokenizerState_Data;
+ tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement;
}
+ break;
}
}
@@ -114,7 +121,7 @@ inline void tag_name_state(Tokenizer* tokenizer) {
else if (c == EOF) {
// TODO: @Error
tokenizer->state = TokenizerState_Data;
- tokenizer->flag = tokenizer->flag & TokenizerFlag_DecrementPtr;
+ tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement;
}
else {
tokenizer->last.append_to_tag_name(c);
@@ -151,7 +158,7 @@ inline void end_tag_open_state(Tokenizer* tokenizer) {
tokenizer->last.type = HtmlTokenType_Character;
tokenizer->last.character_token = CodePoints::SOLIDUS;
- tokenizer->flag = tokenizer->flag & TokenizerFlag_DecrementPtr;
+ tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement;
}
else if (c == CodePoints::NULL_CHAR) {
// TODO: @Error
@@ -286,7 +293,7 @@ HtmlToken read_next(Tokenizer* tokenizer) {
exit(1);
}
- if ((tokenizer->flag & TokenizerFlag_DecrementPtr) == 0) {
+ if ((tokenizer->flag & TokenizerFlag_NoIncrement) == 0) {
tokenizer->ptr++;
}