From 29e03ef74a814cb31a0ae53192e25cc75b638256 Mon Sep 17 00:00:00 2001 From: mattkae Date: Thu, 22 Jun 2023 13:48:11 -0400 Subject: Handling less than signs in plain text --- src/tokenizer.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src/tokenizer.cpp') diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 9931d59..dc0b8d7 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -28,6 +28,12 @@ inline void unconsume_previous(Tokenizer* tokenizer) { tokenizer->ptr--; } +inline void emit_character(Tokenizer* tokenizer, code_point_t c) { + tokenizer->last.type = HtmlTokenType_Character; + tokenizer->last.character_token = c; + tokenizer->flag |= TokenizerFlag_Emit; +} + /// https://dev.w3.org/html5/spec-LC/tokenization.html#data-state inline void data_state(Tokenizer* tokenizer) { @@ -45,9 +51,7 @@ inline void data_state(Tokenizer* tokenizer) { break; default: // TODO: @Error If null, throw an error - tokenizer->last.type = HtmlTokenType_Character; - tokenizer->last.character_token = *tokenizer->ptr; - tokenizer->flag |= TokenizerFlag_Emit; + emit_character(tokenizer, *tokenizer->ptr); break; } } @@ -80,8 +84,11 @@ inline void tag_open_state(Tokenizer* tokenizer) { tokenizer->state = TokenizerState_BogusComment; // TODO: } else { + emit_character(tokenizer, CodePoints::LESS_THAN_SIGN); tokenizer->state = TokenizerState_Data; + tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement; } + break; } } @@ -114,7 +121,7 @@ inline void tag_name_state(Tokenizer* tokenizer) { else if (c == EOF) { // TODO: @Error tokenizer->state = TokenizerState_Data; - tokenizer->flag = tokenizer->flag & TokenizerFlag_DecrementPtr; + tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement; } else { tokenizer->last.append_to_tag_name(c); @@ -151,7 +158,7 @@ inline void end_tag_open_state(Tokenizer* tokenizer) { tokenizer->last.type = HtmlTokenType_Character; tokenizer->last.character_token = CodePoints::SOLIDUS; - tokenizer->flag = tokenizer->flag & TokenizerFlag_DecrementPtr; + tokenizer->flag = tokenizer->flag | TokenizerFlag_NoIncrement; } else if (c == CodePoints::NULL_CHAR) { // TODO: @Error @@ -286,7 +293,7 @@ HtmlToken read_next(Tokenizer* tokenizer) { exit(1); } - if ((tokenizer->flag & TokenizerFlag_DecrementPtr) == 0) { + if ((tokenizer->flag & TokenizerFlag_NoIncrement) == 0) { tokenizer->ptr++; } -- cgit v1.2.1