v/vlib/net/html/parser.v

module html

import os
import strings

struct LexicalAttributes {
mut:
	current_tag      &Tag
	open_tag         bool
	open_code        bool
	open_string      int
	open_comment     bool
	is_attribute     bool
	opened_code_type string
	line_count       int
	lexeme_builder   strings.Builder = strings.new_builder(100)
	code_tags        map[string]bool = {
		'script': true
		'style':  true
	}
}

// Parser is responsible for reading the HTML strings and converting them into a `DocumentObjectModel`.
pub struct Parser {
mut:
	dom                DocumentObjectModel
	lexical_attributes LexicalAttributes = LexicalAttributes{
		current_tag: &Tag{}
	}
	filename    string = 'direct-parse'
	initialized bool
	tags        []&Tag
	debug_file  os.File
}

// This function is used to add a tag for the parser ignore it's content.
// For example, if you have an html or XML with a custom tag, like `<script>`, using this function,
// like `add_code_tag('script')` will make all `script` tags content be jumped,
// so you still have its content, but will not confuse the parser with it's `>` or `<`.
pub fn (mut parser Parser) add_code_tag(name string) {
	if name.len <= 0 {
		return
	}
	parser.lexical_attributes.code_tags[name] = true
}

[inline]
fn (parser Parser) builder_str() string {
	return parser.lexical_attributes.lexeme_builder.after(0)
}

[if debug_html ?]
fn (mut parser Parser) print_debug(data string) {
	if data.len > 0 {
		parser.debug_file.writeln(data) or { panic(err) }
	}
}

fn (mut parser Parser) verify_end_comment(remove bool) bool {
	lexeme := parser.builder_str()
	last := lexeme[lexeme.len - 1]
	penultimate := lexeme[lexeme.len - 2]
	is_end_comment := last == `-` && penultimate == `-`
	if is_end_comment && remove {
		parser.lexical_attributes.lexeme_builder.go_back(2)
	}
	return is_end_comment
}

fn blank_string(data string) bool {
	mut count := 0
	for chr in data {
		if chr == 9 || chr == 32 {
			count++
		}
	}
	return count == data.len
}

// init initializes the parser.
fn (mut parser Parser) init() {
	if parser.initialized {
		return
	}
	parser.dom = DocumentObjectModel{
		debug_file: parser.debug_file
		root: &Tag{}
	}
	parser.add_code_tag('')
	parser.tags = []&Tag{}
	parser.dom.close_tags['/!document'] = true
	parser.lexical_attributes.current_tag = &Tag{}
	parser.initialized = true
}

fn (mut parser Parser) generate_tag() {
	if parser.lexical_attributes.open_tag {
		return
	}
	if parser.lexical_attributes.current_tag.name.len > 0
		|| parser.lexical_attributes.current_tag.content.len > 0 {
		parser.tags << parser.lexical_attributes.current_tag
	}
	parser.lexical_attributes.current_tag = &Tag{}
}

// split_parse parses the HTML fragment
pub fn (mut parser Parser) split_parse(data string) {
	parser.init()
	for chr in data {
		// returns true if byte is a " or '
		is_quote := chr == `"` || chr == `'`
		string_code := match chr {
			`"` { 1 } // "
			`'` { 2 } // '
			else { 0 }
		}
		if parser.lexical_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code
			parser.lexical_attributes.lexeme_builder.write_u8(chr)
			if parser.lexical_attributes.open_string > 0
				&& parser.lexical_attributes.open_string == string_code {
				parser.lexical_attributes.open_string = 0
			} else if is_quote {
				parser.lexical_attributes.open_string = string_code
			} else if chr == `>` { // only execute verification if is a > // here will verify < to know if code tag is finished
				name_close_tag := '</$parser.lexical_attributes.opened_code_type>'
				if parser.builder_str().to_lower().ends_with(name_close_tag) {
					parser.lexical_attributes.open_code = false
					// need to modify lexeme_builder to add script text as a content in next loop (not gave error in dom)
					parser.lexical_attributes.lexeme_builder.go_back(name_close_tag.len)
					parser.lexical_attributes.current_tag.closed = true
					parser.lexical_attributes.current_tag.close_type = .new_tag
				}
			}
		} else if parser.lexical_attributes.open_comment {
			if chr == `>` && parser.verify_end_comment(false) { // close tag '>'
				// parser.print_debug(parser.builder_str() + " >> " + parser.lexical_attributes.line_count.str())
				parser.lexical_attributes.lexeme_builder.go_back_to(0)
				parser.lexical_attributes.open_comment = false
				parser.lexical_attributes.open_tag = false
			} else {
				parser.lexical_attributes.lexeme_builder.write_u8(chr)
			}
		} else if parser.lexical_attributes.open_string > 0 {
			if parser.lexical_attributes.open_string == string_code {
				parser.lexical_attributes.open_string = 0
				parser.lexical_attributes.lexeme_builder.write_u8(chr)
				temp_lexeme := parser.builder_str()
				if parser.lexical_attributes.current_tag.last_attribute != '' {
					lattr := parser.lexical_attributes.current_tag.last_attribute
					nval := temp_lexeme.substr(1, temp_lexeme.len - 1)
					// parser.print_debug(lattr + " = " + temp_lexeme)
					parser.lexical_attributes.current_tag.attributes[lattr] = nval
					parser.lexical_attributes.current_tag.last_attribute = ''
				} else {
					parser.lexical_attributes.current_tag.attributes[temp_lexeme.to_lower()] = ''
					// parser.print_debug(temp_lexeme)
				}
				parser.lexical_attributes.lexeme_builder.go_back_to(0)
			} else {
				parser.lexical_attributes.lexeme_builder.write_u8(chr)
			}
		} else if parser.lexical_attributes.open_tag {
			if parser.lexical_attributes.lexeme_builder.len == 0 && is_quote {
				parser.lexical_attributes.open_string = string_code
				parser.lexical_attributes.lexeme_builder.write_u8(chr)
			} else if chr == `>` { // close tag >
				complete_lexeme := parser.builder_str().to_lower()
				parser.lexical_attributes.current_tag.closed = (complete_lexeme.len > 0
					&& complete_lexeme[complete_lexeme.len - 1] == `/`) // if equals to /
				if complete_lexeme.len > 0 && complete_lexeme[0] == `/` {
					parser.dom.close_tags[complete_lexeme] = true
				}
				/*
				else if complete_lexeme.len > 0 && complete_lexeme[complete_lexeme.len - 1] == 47 { // if end tag like "/>"
					parser.lexical_attributes.current_tag.closed = true
				}
				*/
				if parser.lexical_attributes.current_tag.name == '' {
					parser.lexical_attributes.current_tag.name = complete_lexeme
				} else if complete_lexeme != '/' {
					parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
				}
				parser.lexical_attributes.open_tag = false
				parser.lexical_attributes.lexeme_builder.go_back_to(0) // if tag name is code
				if parser.lexical_attributes.current_tag.name in parser.lexical_attributes.code_tags {
					parser.lexical_attributes.open_code = true
					parser.lexical_attributes.opened_code_type = parser.lexical_attributes.current_tag.name
				}
				// parser.print_debug(parser.lexical_attributes.current_tag.name)
			} else if chr !in [u8(9), ` `, `=`, `\n`] { // Tab, space, = and \n
				parser.lexical_attributes.lexeme_builder.write_u8(chr)
			} else if chr != 10 {
				complete_lexeme := parser.builder_str().to_lower()
				if parser.lexical_attributes.current_tag.name == '' {
					parser.lexical_attributes.current_tag.name = complete_lexeme
				} else {
					parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
					parser.lexical_attributes.current_tag.last_attribute = ''
					if chr == `=` { // if was a =
						parser.lexical_attributes.current_tag.last_attribute = complete_lexeme
					}
				}
				parser.lexical_attributes.lexeme_builder.go_back_to(0)
			}
			if parser.builder_str() == '!--' {
				parser.lexical_attributes.open_comment = true
			}
		} else if chr == `<` { // open tag '<'
			temp_string := parser.builder_str()
			if parser.lexical_attributes.lexeme_builder.len >= 1 {
				if parser.lexical_attributes.current_tag.name.len > 1
					&& parser.lexical_attributes.current_tag.name[0] == 47
					&& !blank_string(temp_string) {
					parser.tags << &Tag{
						name: 'text'
						content: temp_string
					}
				} else {
					parser.lexical_attributes.current_tag.content = temp_string // verify later who has this content
				}
			}
			// parser.print_debug(parser.lexical_attributes.current_tag.str())
			parser.lexical_attributes.lexeme_builder.go_back_to(0)
			parser.generate_tag()
			parser.lexical_attributes.open_tag = true
		} else {
			parser.lexical_attributes.lexeme_builder.write_u8(chr)
		}
	}
}

// parse_html parses the given HTML string
pub fn (mut parser Parser) parse_html(data string) {
	parser.init()
	mut lines := data.split_into_lines()
	for line in lines {
		parser.lexical_attributes.line_count++
		parser.split_parse(line)
	}
	parser.generate_tag()
	parser.dom.debug_file = parser.debug_file
	parser.dom.construct(parser.tags)
}

// finalize finishes the parsing stage .
[inline]
pub fn (mut parser Parser) finalize() {
	parser.generate_tag()
}

// get_dom returns the parser's current DOM representation.
pub fn (mut parser Parser) get_dom() DocumentObjectModel {
	if !parser.dom.constructed {
		parser.generate_tag()
		parser.dom.construct(parser.tags)
	}
	return parser.dom
}
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`module html`

			`import os`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`import strings`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`struct LexicalAttributes {`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`mut:`
			`current_tag &Tag`
checker: fix error pos on default value (#6338) 2020-09-09 15:34:41 +02:00			`open_tag bool`
			`open_code bool`
checker: more default field fixes 2020-09-09 14:14:44 +02:00			`open_string int`
checker: fix error pos on default value (#6338) 2020-09-09 15:34:41 +02:00			`open_comment bool`
			`is_attribute bool`
checker: more default field fixes 2020-09-09 14:14:44 +02:00			`opened_code_type string`
			`line_count int`
strings: simplify Builder (#10263) 2021-05-31 13:21:06 +02:00			`lexeme_builder strings.Builder = strings.new_builder(100)`
vlib: remove deprecated map{} usages as well as deprecated functions (#11035) 2021-08-04 11:44:41 +02:00			`code_tags map[string]bool = {`
strings: simplify Builder (#10263) 2021-05-31 13:21:06 +02:00			`'script': true`
			`'style': true`
			`}`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			// Parser is responsible for reading the HTML strings and converting them into a `DocumentObjectModel`.
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`pub struct Parser {`
			`mut:`
			`dom DocumentObjectModel`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`lexical_attributes LexicalAttributes = LexicalAttributes{`
net.html: use `or {}` in .writeln() method calls (fix #8942) (#8953) 2021-02-25 13:24:30 +01:00			`current_tag: &Tag{}`
			`}`
			`filename string = 'direct-parse'`
			`initialized bool`
			`tags []&Tag`
			`debug_file os.File`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// This function is used to add a tag for the parser ignore it's content.`
			// For example, if you have an html or XML with a custom tag, like `<script>`, using this function,
			// like `add_code_tag('script')` will make all `script` tags content be jumped,
			// so you still have its content, but will not confuse the parser with it's `>` or `<`.
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`pub fn (mut parser Parser) add_code_tag(name string) {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if name.len <= 0 {`
			`return`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.code_tags[name] = true`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`[inline]`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`fn (parser Parser) builder_str() string {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`return parser.lexical_attributes.lexeme_builder.after(0)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`

net.html: use `-d debug_html` instead of `-g`, prevent undesired output, while debugging user programs (#14521) 2022-05-25 10:23:56 +02:00			`[if debug_html ?]`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`fn (mut parser Parser) print_debug(data string) {`
net.html: use `-d debug_html` instead of `-g`, prevent undesired output, while debugging user programs (#14521) 2022-05-25 10:23:56 +02:00			`if data.len > 0 {`
			`parser.debug_file.writeln(data) or { panic(err) }`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`}`

			`fn (mut parser Parser) verify_end_comment(remove bool) bool {`
			`lexeme := parser.builder_str()`
			`last := lexeme[lexeme.len - 1]`
			`penultimate := lexeme[lexeme.len - 2]`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			is_end_comment := last == `-` && penultimate == `-`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`if is_end_comment && remove {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.lexeme_builder.go_back(2)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`return is_end_comment`
			`}`

			`fn blank_string(data string) bool {`
			`mut count := 0`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`for chr in data {`
			`if chr == 9 \|\| chr == 32 {`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`count++`
			`}`
			`}`
			`return count == data.len`
			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// init initializes the parser.`
			`fn (mut parser Parser) init() {`
			`if parser.initialized {`
			`return`
			`}`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`parser.dom = DocumentObjectModel{`
			`debug_file: parser.debug_file`
			`root: &Tag{}`
			`}`
			`parser.add_code_tag('')`
			`parser.tags = []&Tag{}`
			`parser.dom.close_tags['/!document'] = true`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.current_tag = &Tag{}`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`parser.initialized = true`
			`}`

			`fn (mut parser Parser) generate_tag() {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if parser.lexical_attributes.open_tag {`
			`return`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: use `or {}` in .writeln() method calls (fix #8942) (#8953) 2021-02-25 13:24:30 +01:00			`if parser.lexical_attributes.current_tag.name.len > 0`
			`\|\| parser.lexical_attributes.current_tag.content.len > 0 {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.tags << parser.lexical_attributes.current_tag`
			`}`
			`parser.lexical_attributes.current_tag = &Tag{}`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// split_parse parses the HTML fragment`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`pub fn (mut parser Parser) split_parse(data string) {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.init()`
			`for chr in data {`
			`// returns true if byte is a " or '`
fmt: keep char literal, `'` (#11060) 2021-08-06 05:21:28 +02:00			is_quote := chr == `"` \|\| chr == `'`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`string_code := match chr {`
			`"` { 1 } // "
fmt: keep char literal, `'` (#11060) 2021-08-06 05:21:28 +02:00			`'` { 2 } // '
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`else { 0 }`
			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if parser.lexical_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code`
all: byte => u8 2022-04-15 13:58:56 +02:00			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: use `or {}` in .writeln() method calls (fix #8942) (#8953) 2021-02-25 13:24:30 +01:00			`if parser.lexical_attributes.open_string > 0`
			`&& parser.lexical_attributes.open_string == string_code {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.open_string = 0`
			`} else if is_quote {`
			`parser.lexical_attributes.open_string = string_code`
			} else if chr == `>` { // only execute verification if is a > // here will verify < to know if code tag is finished
			`name_close_tag := '</$parser.lexical_attributes.opened_code_type>'`
			`if parser.builder_str().to_lower().ends_with(name_close_tag) {`
			`parser.lexical_attributes.open_code = false`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`// need to modify lexeme_builder to add script text as a content in next loop (not gave error in dom)`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.lexeme_builder.go_back(name_close_tag.len)`
			`parser.lexical_attributes.current_tag.closed = true`
			`parser.lexical_attributes.current_tag.close_type = .new_tag`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`} else if parser.lexical_attributes.open_comment {`
			if chr == `>` && parser.verify_end_comment(false) { // close tag '>'
			`// parser.print_debug(parser.builder_str() + " >> " + parser.lexical_attributes.line_count.str())`
			`parser.lexical_attributes.lexeme_builder.go_back_to(0)`
			`parser.lexical_attributes.open_comment = false`
			`parser.lexical_attributes.open_tag = false`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`} else {`
all: byte => u8 2022-04-15 13:58:56 +02:00			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`} else if parser.lexical_attributes.open_string > 0 {`
			`if parser.lexical_attributes.open_string == string_code {`
			`parser.lexical_attributes.open_string = 0`
all: byte => u8 2022-04-15 13:58:56 +02:00			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`temp_lexeme := parser.builder_str()`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if parser.lexical_attributes.current_tag.last_attribute != '' {`
			`lattr := parser.lexical_attributes.current_tag.last_attribute`
net.html: simplify map setting (fixes compilation with tcc on aarch64) 2020-08-20 15:45:54 +02:00			`nval := temp_lexeme.substr(1, temp_lexeme.len - 1)`
			`// parser.print_debug(lattr + " = " + temp_lexeme)`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.current_tag.attributes[lattr] = nval`
			`parser.lexical_attributes.current_tag.last_attribute = ''`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`} else {`
net.html: use `-d debug_html` instead of `-g`, prevent undesired output, while debugging user programs (#14521) 2022-05-25 10:23:56 +02:00			`parser.lexical_attributes.current_tag.attributes[temp_lexeme.to_lower()] = ''`
			`// parser.print_debug(temp_lexeme)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.lexeme_builder.go_back_to(0)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`} else {`
all: byte => u8 2022-04-15 13:58:56 +02:00			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`} else if parser.lexical_attributes.open_tag {`
			`if parser.lexical_attributes.lexeme_builder.len == 0 && is_quote {`
			`parser.lexical_attributes.open_string = string_code`
all: byte => u8 2022-04-15 13:58:56 +02:00			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			} else if chr == `>` { // close tag >
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`complete_lexeme := parser.builder_str().to_lower()`
net.html: use `or {}` in .writeln() method calls (fix #8942) (#8953) 2021-02-25 13:24:30 +01:00			`parser.lexical_attributes.current_tag.closed = (complete_lexeme.len > 0`
			&& complete_lexeme[complete_lexeme.len - 1] == `/`) // if equals to /
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			if complete_lexeme.len > 0 && complete_lexeme[0] == `/` {
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`parser.dom.close_tags[complete_lexeme] = true`
			`}`
			`/*`
			`else if complete_lexeme.len > 0 && complete_lexeme[complete_lexeme.len - 1] == 47 { // if end tag like "/>"`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.current_tag.closed = true`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`*/`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if parser.lexical_attributes.current_tag.name == '' {`
			`parser.lexical_attributes.current_tag.name = complete_lexeme`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`} else if complete_lexeme != '/' {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.open_tag = false`
			`parser.lexical_attributes.lexeme_builder.go_back_to(0) // if tag name is code`
			`if parser.lexical_attributes.current_tag.name in parser.lexical_attributes.code_tags {`
			`parser.lexical_attributes.open_code = true`
			`parser.lexical_attributes.opened_code_type = parser.lexical_attributes.current_tag.name`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// parser.print_debug(parser.lexical_attributes.current_tag.name)`
all: byte => u8 2022-04-15 13:58:56 +02:00			} else if chr !in [u8(9), ` `, `=`, `\n`] { // Tab, space, = and \n
			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`} else if chr != 10 {`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`complete_lexeme := parser.builder_str().to_lower()`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if parser.lexical_attributes.current_tag.name == '' {`
			`parser.lexical_attributes.current_tag.name = complete_lexeme`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`} else {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''`
			`parser.lexical_attributes.current_tag.last_attribute = ''`
			if chr == `=` { // if was a =
			`parser.lexical_attributes.current_tag.last_attribute = complete_lexeme`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.lexeme_builder.go_back_to(0)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`if parser.builder_str() == '!--' {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.open_comment = true`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			} else if chr == `<` { // open tag '<'
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`temp_string := parser.builder_str()`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`if parser.lexical_attributes.lexeme_builder.len >= 1 {`
net.html: use `or {}` in .writeln() method calls (fix #8942) (#8953) 2021-02-25 13:24:30 +01:00			`if parser.lexical_attributes.current_tag.name.len > 1`
			`&& parser.lexical_attributes.current_tag.name[0] == 47`
			`&& !blank_string(temp_string) {`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`parser.tags << &Tag{`
			`name: 'text'`
			`content: temp_string`
			`}`
			`} else {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.current_tag.content = temp_string // verify later who has this content`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`}`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// parser.print_debug(parser.lexical_attributes.current_tag.str())`
			`parser.lexical_attributes.lexeme_builder.go_back_to(0)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`parser.generate_tag()`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.open_tag = true`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`} else {`
all: byte => u8 2022-04-15 13:58:56 +02:00			`parser.lexical_attributes.lexeme_builder.write_u8(chr)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`
			`}`
			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// parse_html parses the given HTML string`
			`pub fn (mut parser Parser) parse_html(data string) {`
			`parser.init()`
			`mut lines := data.split_into_lines()`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`for line in lines {`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.lexical_attributes.line_count++`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`parser.split_parse(line)`
			`}`
			`parser.generate_tag()`
			`parser.dom.debug_file = parser.debug_file`
net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`parser.dom.construct(parser.tags)`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// finalize finishes the parsing stage .`
			`[inline]`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`pub fn (mut parser Parser) finalize() {`
			`parser.generate_tag()`
			`}`

net.html: polish module, update docs (#7193) 2020-12-09 20:08:15 +01:00			`// get_dom returns the parser's current DOM representation.`
net.html: create html parser module (#6076) 2020-08-09 04:13:34 +02:00			`pub fn (mut parser Parser) get_dom() DocumentObjectModel {`
			`if !parser.dom.constructed {`
			`parser.generate_tag()`
			`parser.dom.construct(parser.tags)`
			`}`
			`return parser.dom`
			`}`