net.html: polish module, update docs (#7193)

2020-12-10 03:08:15 +08:00 · 2020-12-10 03:08:15 +08:00 · b952bf2e6b
parent 5fa1e403ec
commit b952bf2e6b
9 changed files with 302 additions and 446 deletions
--- a/vlib/net/html/README.md
+++ b/vlib/net/html/README.md
@ -1,118 +1,16 @@
-# V HTML
+net/http is an HTML written in pure V.
 A HTML parser made in V.
 ## Usage
 ```v oksyntax
 import net.html
-If the description below isn't enought, please look at the test files.
+fn main() {
-
+	doc := html.parse('<html><body><h1 class="title">Hello world!</h1></body></html>')
-### Parser
+	tag := doc.get_tag('h1')[0] // <h1>Hello world!</h1>
-
+	println(tag.name) // h1
-Responsible for read HTML in full strings or splited string and returns all Tag objets of
+	println(tag.content) // Hello world!
-it HTML or return a DocumentObjectModel, that will try to find how the HTML Tree is.
+	println(tag.attributes) // {'class':'title'}
-
+	println(tag.str()) // <h1 class="title">Hello world!</h1>
-#### split_parse(data string)
+}
-This functions is the main function called by parse method to fragment parse your HTML.
+```
-
+More examples found on [`parser_test.v`](parser_test.v) and [`html_test.v`](html_test.v)
 #### parse_html(data string, is_file bool)
 This function is called passing a filename or a complete html data string to it.
 #### add_code_tag(name string)
 This function is used to add a tag for the parser ignore it's content. 
 For example, if you have an html or XML with a custom tag, like `<script>`, using this function, 
 like `add_code_tag('script')` will make all `script` tags content be jumped, 
 so you still have its content, but will not confuse the parser with it's `>` or `<`.
 #### finalize()
 When using **split_parse** method, you must call this function to ends the parse completely.
 #### get_tags() []Tag_ptr
 This functions returns a array with all tags and it's content.
 #### get_dom() DocumentObjectModel
 Returns the DocumentObjectModel for current parsed tags.
 ### WARNING
 If you want to reuse parser object to parse another HTML, call `initialize_all()` function first.
 ### DocumentObjectModel
 A DOM object that will make easier to access some tags and search it.
 #### get_by_attribute_value(name string, value string) []Tag_ptr
 This function retuns a Tag array with all tags in document 
 that have a attribute with given name and given value.
 #### get_by_tag(name string) []Tag_ptr
 This function retuns a Tag array with all tags in document that have a name with the given value.
 #### get_by_attribute(name string) []Tag_ptr
 This function retuns a Tag array with all tags in document that have a attribute with given name.
 #### get_root() Tag_ptr
 This function returns the root Tag.
 #### get_all_tags() []Tag_ptr
 This function returns all important tags, removing close tags.
 ### Tag
 An object that holds tags information, such as `name`, `attributes`, `children`.
 #### get_children() []Tag_ptr
 Returns all children as an array.
 #### get_parent() &Tag
 Returns the parent of current tag.
 #### get_name() string
 Returns tag name.
 #### get_content() string
 Returns tag content.
 #### get_attributes() map[string]string
 Returns all attributes and it value.
 #### text() string
 Returns the content of the tag and all tags inside it. 
 Also, any `<br>` tag will be converted into `\n`.
 ## Some questions that can appear
 ### Q: Why in parser have a `builder_str() string` method that returns only the lexeme string?
 A: Because in early stages of the project, `strings.Builder` are used, 
 but for some bug existing somewhere, it was necessary to use `string` directly. 
 Later, it's planned to use `strings.Builder` again.
 ### Q: Why have a `compare_string(a string, b string) bool` method?
 A: For some reason when using != and == in strings directly, it is not working. 
 So this method is a workaround.
 ### Q: Will be something like `XPath`?
 A: Like XPath yes. Exactly equal to it, no.
 ## Roadmap
 - [x] Parser
  - [x] `<!-- Comments -->` detection
  - [x] `Open Generic tags` detection
  - [x] `Close Generic tags` detection
  - [x] `verify string` detection
  - [x] `tag attributes` detection
  - [x] `attributes values` detection
  - [x] `tag text` (on tag it is declared as content, maybe change for text in the future)
  - [x] `text file for parse` support (open local files for parsing)
  - [x] `open_code` verification
 - [x] DocumentObjectModel
  - [x] push elements that have a close tag into stack
  - [x] remove elements from stack
  - [x] ~~create a new document root if have some syntax error (deleted)~~
  - [x] search tags in `DOM` by attributes
  - [x] search tags in `DOM` by tag type
  - [x] finish dom test
 ## License
 [MIT](../../../LICENSE)
--- a/vlib/net/html/data_structures.v
+++ b/vlib/net/html/data_structures.v
@ -1,30 +1,35 @@
 module html
-#include <limits.h>
+const (
 	null_element = int(0x80000000)
 )
 struct Stack {
 	null_element int = C.INT_MIN
 mut:
 	elements []int
 	size     int
 }
-fn (stack Stack) is_null(data int) bool {
+[inline]
-	return data == stack.null_element
+fn is_null(data int) bool {
 	return data == null_element
 }
 [inline]
 fn (stack Stack) is_empty() bool {
 	return stack.size <= 0
 }
 fn (stack Stack) peek() int {
-	if !stack.is_empty() {
+	return if !stack.is_empty() {
-		return stack.elements[stack.size - 1]
+		stack.elements[stack.size - 1]
 	} else {
 		null_element
 	}
 	return stack.null_element
 }
 fn (mut stack Stack) pop() int {
-	mut to_return := stack.null_element
+	mut to_return := null_element
 	if !stack.is_empty() {
 		to_return = stack.elements[stack.size - 1]
 		stack.size--
@ -53,7 +58,6 @@ fn (mut btree BTree) add_children(tag Tag) int {
 	btree.all_tags << tag
 	if btree.all_tags.len > 1 {
 		for btree.childrens.len <= btree.node_pointer {
 			// println("${btree.childrens.len} <= ${btree.node_pointer}")
 			mut temp_array := btree.childrens
 			temp_array << []int{}
 			btree.childrens = temp_array
@ -69,14 +73,17 @@ fn (mut btree BTree) add_children(tag Tag) int {
 	return btree.all_tags.len - 1
 }
 [inline]
 fn (btree BTree) get_children() []int {
 	return btree.childrens[btree.node_pointer]
 }
 [inline]
 fn (btree BTree) get_parent() int {
 	return btree.parents[btree.node_pointer]
 }
 [inline]
 fn (btree BTree) get_stored() Tag {
 	return btree.all_tags[btree.node_pointer]
 }
--- a/vlib/net/html/dom.v
+++ b/vlib/net/html/dom.v
@ -2,6 +2,11 @@ module html
 import os
 // The W3C Document Object Model (DOM) is a platform and language-neutral
 // interface that allows programs and scripts to dynamically access and
 // update the content, structure, and style of a document.
 //
 // https://www.w3.org/TR/WD-DOM/introduction.html
 pub struct DocumentObjectModel {
 mut:
 	root           &Tag
@ -25,24 +30,14 @@ fn (mut dom DocumentObjectModel) print_debug(data string) {
 	}
 }
-/*
+[inline]
 fn (dom mut DocumentObjectModel) new_root(tag &Tag) {
 	mut new_tag := &Tag{} new_tag.name = "div"
 	new_tag.add_child(dom.root) new_tag.add_child(tag)
 	dom.root = new_tag
 }
 */
 fn is_close_tag(tag &Tag) bool {
-	if tag.name.len > 0 {
+	return tag.name.len > 0 && tag.name[0] == `/`
 		return tag.name[0] == 47 // return if equals to /
 	}
 	return false
 }
 fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name string) int {
-	if !(attribute_name in dom.attributes) {
+	if attribute_name !in dom.attributes {
-		temp_array := []string{}
+		dom.attributes[attribute_name] = []string{}
 		dom.attributes[attribute_name] = temp_array
 	}
 	mut string_array := dom.attributes[attribute_name]
 	mut counter := 0
@ -58,10 +53,10 @@ fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name strin
 }
 fn (mut dom DocumentObjectModel) add_tag_attribute(tag &Tag) {
-	for attribute_name in tag.attributes.keys() {
+	for attribute_name, _ in tag.attributes {
 		attribute_value := tag.attributes[attribute_name]
 		location := dom.where_is(attribute_value, attribute_name)
-		if !(attribute_name in dom.tag_attributes) {
+		if attribute_name !in dom.tag_attributes {
 			dom.tag_attributes[attribute_name] = []
 		}
 		for {
@ -91,7 +86,7 @@ fn (mut dom DocumentObjectModel) add_tag_by_type(tag &Tag) {
 fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
 	for attribute_name in tag.attributes.keys() {
-		if !(attribute_name in dom.all_attributes) {
+		if attribute_name !in dom.all_attributes {
 			dom.all_attributes[attribute_name] = [tag]
 		} else {
 			mut temp_array := dom.all_attributes[attribute_name]
@ -101,22 +96,10 @@ fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
 	}
 }
 fn compare_string(a string, b string) bool { // for some reason == doesn't work
 	if a.len != b.len {
 		return false
 	}
 	for i := 0; i < a.len; i++ {
 		if a[i] != b[i] {
 			return false
 		}
 	}
 	return true
 }
 fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 	dom.constructed = true
 	mut temp_map := map[string]int{}
-	mut temp_int := C.INT_MIN
+	mut temp_int := null_element
 	mut temp_string := ''
 	mut stack := Stack{}
 	dom.btree = BTree{}
@ -130,21 +113,16 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 		dom.print_debug(tag.str())
 		if is_close_tag(tag) {
 			temp_int = stack.peek()
-			temp_string = tag.name[1..tag.name.len] // print(temp_string + " != " + tag_list[temp_int].name + " >> ") // println(temp_string != tag_list[temp_int].name)
+			temp_string = tag.name[1..]
-			for !stack.is_null(temp_int) &&
+			for !is_null(temp_int) && temp_string != tag_list[temp_int].name && !tag_list[temp_int].closed {
-				!compare_string(temp_string, tag_list[temp_int].name) && !tag_list[temp_int].closed {
+				dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' + (temp_string ==
-				dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' +
+					tag_list[temp_int].name).str())
 					compare_string(temp_string, tag_list[temp_int].name).str())
 				stack.pop()
 				temp_int = stack.peek()
 			}
 			temp_int = stack.peek()
-			if !stack.is_null(temp_int) {
+			temp_int = if !is_null(temp_int) { stack.pop() } else { root_index }
-				temp_int = stack.pop()
+			if is_null(temp_int) {
 			} else {
 				temp_int = root_index
 			}
 			if stack.is_null(temp_int) {
 				stack.push(root_index)
 			}
 			dom.print_debug('Removed ' + temp_string + ' -- ' + tag_list[temp_int].name)
@ -154,7 +132,7 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 			dom.add_tag_by_type(tag)
 			dom.all_tags << tag
 			temp_int = stack.peek()
-			if !stack.is_null(temp_int) {
+			if !is_null(temp_int) {
 				dom.btree.move_pointer(temp_map[temp_int.str()])
 				temp_map[index.str()] = dom.btree.add_children(tag)
 				mut temp_tag := tag_list[temp_int]
@ -164,8 +142,7 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 				dom.print_debug("Added ${tag.name} as child of '" + tag_list[temp_int].name +
 					"' which now has ${dom.btree.get_children().len} childrens")
 				*/
-				dom.print_debug("Added $tag.name as child of '" + temp_tag.name +
+				dom.print_debug("Added $tag.name as child of '" + temp_tag.name + "' which now has $temp_tag.children.len childrens")
 					"' which now has $temp_tag.get_children().len childrens")
 			} else { // dom.new_root(tag)
 				stack.push(root_index)
 			}
@ -179,40 +156,40 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
 	dom.root = tag_list[0]
 }
-pub fn (mut dom DocumentObjectModel) get_by_attribute_value(name string, value string) []&Tag {
+// get_tag_by_attribute_value retrieves all the tags in the document that has the given attribute name and value.
 pub fn (mut dom DocumentObjectModel) get_tag_by_attribute_value(name string, value string) []&Tag {
 	location := dom.where_is(value, name)
-	if dom.tag_attributes[name].len > location {
+	return if dom.tag_attributes[name].len > location {
-		return dom.tag_attributes[name][location]
+		dom.tag_attributes[name][location]
 	} else {
 		[]&Tag{}
 	}
 	return []&Tag{}
 }
-pub fn (dom DocumentObjectModel) get_by_tag(name string) []&Tag {
+// get_tag retrieves all the tags in the document that has the given tag name.
-	if name in dom.tag_type {
+pub fn (dom DocumentObjectModel) get_tag(name string) []&Tag {
-		return dom.tag_type[name]
+	return if name in dom.tag_type {
 		dom.tag_type[name]
 	} else {
 		[]&Tag{}
 	}
 	return []&Tag{}
 }
-pub fn (dom DocumentObjectModel) get_by_attribute(name string) []&Tag {
+// get_tag_by_attribute retrieves all the tags in the document that has the given attribute name.
-	if name in dom.all_attributes {
+pub fn (dom DocumentObjectModel) get_tag_by_attribute(name string) []&Tag {
-		return dom.all_attributes[name]
+	return if name in dom.all_attributes {
 		dom.all_attributes[name]
 	} else {
 		[]&Tag{}
 	}
 	return []&Tag{}
 }
 // get_root returns the root of the document.
 pub fn (dom DocumentObjectModel) get_root() &Tag {
 	return dom.root
 }
-pub fn (dom DocumentObjectModel) get_all_tags() []&Tag {
+// get_tags returns all of the tags stored in the document.
 pub fn (dom DocumentObjectModel) get_tags() []&Tag {
 	return dom.all_tags
 }
 /*
 pub fn (dom DocumentObjectModel) get_xpath() XPath {
 	return XPath{
 		dom: dom
 	}
 }
 */
--- a/vlib/net/html/dom_test.v
+++ b/vlib/net/html/dom_test.v
@ -1,63 +1,56 @@
 module html
-fn generate_temp_html() string {
+import strings
 	mut temp_html := '<!doctype html><html><head><title>Giant String</title></head><body>'
 	for counter := 0; counter < 4; counter++ {
 		temp_html += "<div id='name_$counter' "
 		temp_html += "class='several-$counter'>Look at $counter</div>"
 	}
 	temp_html += '</body></html>'
 	return temp_html
 }
-fn generate_dom(temp_html string) DocumentObjectModel {
+fn generate_temp_html() string {
-	mut parser := Parser{}
+	mut temp_html := strings.new_builder(200)
-	parser.parse_html(temp_html, false)
+	temp_html.write('<!doctype html><html><head><title>Giant String</title></head><body>')
-	dom := parser.get_dom()
+	for counter := 0; counter < 4; counter++ {
-	return dom
+		temp_html.write("<div id='name_$counter' ")
 		temp_html.write("class='several-$counter'>Look at $counter</div>")
 	}
 	temp_html.write('</body></html>')
 	return temp_html.str()
 }
 fn test_search_by_tag_type() {
-	dom := generate_dom(generate_temp_html())
+	dom := parse(generate_temp_html())
-	assert dom.get_by_tag('div').len == 4
+	assert dom.get_tag('div').len == 4
-	assert dom.get_by_tag('head').len == 1
+	assert dom.get_tag('head').len == 1
-	assert dom.get_by_tag('body').len == 1
+	assert dom.get_tag('body').len == 1
 }
 fn test_search_by_attribute_value() {
-	mut dom := generate_dom(generate_temp_html())
+	mut dom := parse(generate_temp_html())
 	// println(temp_html)
 	print('Amount ')
-	println(dom.get_by_attribute_value('id', 'name_0'))
+	println(dom.get_tag_by_attribute_value('id', 'name_0'))
-	assert dom.get_by_attribute_value('id', 'name_0').len == 1
+	assert dom.get_tag_by_attribute_value('id', 'name_0').len == 1
 }
 fn test_access_parent() {
-	mut dom := generate_dom(generate_temp_html())
+	mut dom := parse(generate_temp_html())
-	div_tags := dom.get_by_tag('div')
+	div_tags := dom.get_tag('div')
-	assert div_tags[0].get_parent() != C.NULL
+	parent := div_tags[0].parent
-	/*
+	assert parent != 0
 	parent := div_tags[0].get_parent()
 	assert parent != C.NULL
 	for div_tag in div_tags {
-		assert div_tag.get_parent() == parent
+		assert div_tag.parent == parent
 	}
 	*/
 }
 fn test_search_by_attributes() {
-	dom := generate_dom(generate_temp_html())
+	dom := parse(generate_temp_html())
-	assert dom.get_by_attribute('id').len == 4
+	assert dom.get_tag_by_attribute('id').len == 4
 }
 fn test_tags_used() {
-	dom := generate_dom(generate_temp_html())
+	dom := parse(generate_temp_html())
-	assert dom.get_all_tags().len == 9
+	assert dom.get_tags().len == 9
 }
 fn test_access_tag_fields() {
-	dom := generate_dom(generate_temp_html())
+	dom := parse(generate_temp_html())
-	id_tags := dom.get_by_attribute('id')
+	id_tags := dom.get_tag_by_attribute('id')
-	assert id_tags[0].get_name() == "div"
+	assert id_tags[0].name == 'div'
-	assert id_tags[1].get_attributes()['class'] == "several-1"
+	assert id_tags[1].attributes['class'] == 'several-1'
 }
--- a/vlib/net/html/html.v
+++ b/vlib/net/html/html.v
@ -0,0 +1,18 @@
 module html
 import os
 // parse parses and returns the DOM from the given text.
 pub fn parse(text string) DocumentObjectModel {
 	mut parser := Parser{}
 	parser.parse_html(text)
 	return parser.get_dom()
 }
 // parse_file parses and returns the DOM from the contents of a file.
 pub fn parse_file(filename string) DocumentObjectModel {
 	content := os.read_file(filename) or { return DocumentObjectModel{
 		root: &Tag{}
 	} }
 	return parse(content)
 }
--- a/vlib/net/html/html_test.v
+++ b/vlib/net/html/html_test.v
@ -0,0 +1,15 @@
 module html
 fn test_parse() {
 	doc := parse('<html><body><h1 class="title">Hello world!</h1></body></html>')
 	tags := doc.get_tag('h1')
 	assert tags.len == 1
 	h1_tag := tags[0] // <h1>Hello world!</h1>
 	assert h1_tag.name == 'h1'
 	assert h1_tag.content == 'Hello world!'
 	assert h1_tag.attributes.len == 2
 	// TODO: do not remove. Attributes must not have an empty attr.
 	// assert h1_tag.attributes.len == 1
 	assert h1_tag.str() == '<h1 class="title" >Hello world!</h1>'
 	// assert h1_tag.str() == '<h1 class="title">Hello world!</h1>'
 }
--- a/vlib/net/html/parser.v
+++ b/vlib/net/html/parser.v
@ -1,8 +1,9 @@
 module html
 import os
 import strings
-struct LexycalAttributes {
+struct LexicalAttributes {
 mut:
 	current_tag      &Tag
 	open_tag         bool
@ -12,44 +13,40 @@ mut:
 	is_attribute     bool
 	opened_code_type string
 	line_count       int
-	lexeme_builder   string
+	lexeme_builder   strings.Builder = strings.Builder{}
 	code_tags        map[string]bool = {
 	'script': true
 	'style':  true
-	}
+}
 }
 fn (mut lxa LexycalAttributes) write_lexeme(data byte) {
 	mut temp := lxa.lexeme_builder
 	temp += data.str()
 	lxa.lexeme_builder = temp
 }
 // Parser is responsible for reading the HTML strings and converting them into a `DocumentObjectModel`.
 pub struct Parser {
 mut:
 	dom                DocumentObjectModel
-	lexycal_attributes LexycalAttributes = LexycalAttributes{
+	lexical_attributes LexicalAttributes = LexicalAttributes{
 	current_tag: &Tag{}
-	}
+}
 	filename           string = 'direct-parse'
 	initialized        bool
 	tags               []&Tag
 	debug_file         os.File
 }
 // This function is used to add a tag for the parser ignore it's content.
 // For example, if you have an html or XML with a custom tag, like `<script>`, using this function,
 // like `add_code_tag('script')` will make all `script` tags content be jumped,
 // so you still have its content, but will not confuse the parser with it's `>` or `<`.
 pub fn (mut parser Parser) add_code_tag(name string) {
-	if parser.lexycal_attributes.code_tags.keys().len <= 0 {
+	if name.len <= 0 {
-		parser.lexycal_attributes.code_tags = map[string]bool{}
+		return
 		parser.lexycal_attributes.code_tags['script'] = true
 		parser.lexycal_attributes.code_tags['style'] = true
 	}
 	if name.len > 0 {
 		parser.lexycal_attributes.code_tags[name] = true
 	}
 	parser.lexical_attributes.code_tags[name] = true
 }
 [inline]
 fn (parser Parser) builder_str() string {
-	return parser.lexycal_attributes.lexeme_builder
+	return parser.lexical_attributes.lexeme_builder.after(0)
 }
 [if debug]
@ -65,28 +62,28 @@ fn (mut parser Parser) verify_end_comment(remove bool) bool {
 	lexeme := parser.builder_str()
 	last := lexeme[lexeme.len - 1]
 	penultimate := lexeme[lexeme.len - 2]
-	mut is_end_comment := false
+	is_end_comment := last == `-` && penultimate == `-`
 	if last.str() == '-' && penultimate.str() == '-' {
 		is_end_comment = true
 	}
 	if is_end_comment && remove {
-		temp := parser.lexycal_attributes.lexeme_builder
+		parser.lexical_attributes.lexeme_builder.go_back(2)
 		parser.lexycal_attributes.lexeme_builder = temp[0..temp.len - 2]
 	}
 	return is_end_comment
 }
 fn blank_string(data string) bool {
 	mut count := 0
-	for word in data {
+	for chr in data {
-		if word == 9 || word == 32 {
+		if chr == 9 || chr == 32 {
 			count++
 		}
 	}
 	return count == data.len
 }
-fn (mut parser Parser) initialize_all() {
+// init initializes the parser.
 fn (mut parser Parser) init() {
 	if parser.initialized {
 		return
 	}
 	parser.dom = DocumentObjectModel{
 		debug_file: parser.debug_file
 		root: &Tag{}
@ -94,181 +91,165 @@ fn (mut parser Parser) initialize_all() {
 	parser.add_code_tag('')
 	parser.tags = []&Tag{}
 	parser.dom.close_tags['/!document'] = true
-	parser.lexycal_attributes.current_tag = &Tag{}
+	parser.lexical_attributes.current_tag = &Tag{}
 	parser.initialized = true
 }
 fn (mut parser Parser) generate_tag() {
-	if !parser.lexycal_attributes.open_tag {
+	if parser.lexical_attributes.open_tag {
-		if parser.lexycal_attributes.current_tag.name.len > 0 ||
+		return
 			parser.lexycal_attributes.current_tag.content.len > 0 {
 			parser.tags << parser.lexycal_attributes.current_tag
 	}
-		parser.lexycal_attributes.current_tag = &Tag{}
+	if parser.lexical_attributes.current_tag.name.len > 0 ||
 		parser.lexical_attributes.current_tag.content.len > 0 {
 		parser.tags << parser.lexical_attributes.current_tag
 	}
 	parser.lexical_attributes.current_tag = &Tag{}
 }
 // split_parse parses the HTML fragment
 pub fn (mut parser Parser) split_parse(data string) {
-	if !parser.initialized {
+	parser.init()
-		parser.initialize_all()
+	for chr in data {
-	}
+		// returns true if byte is a " or '
-	for word in data {
+		is_quote := chr == `"` || chr == `\'`
-		mut is_quotation := false // " or '
+		string_code := match chr {
-		if word == 34 || word == 39 {
+			`"` { 1 } // "
-			is_quotation = true
+			`\'` { 2 } // '
 		}
 		string_code := match word {
 			34 { 1 } // "
 			39 { 2 } // '
 			else { 0 }
 		}
-		if parser.lexycal_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code
+		if parser.lexical_attributes.open_code { // here will verify all needed to know if open_code finishes and string in code
-			parser.lexycal_attributes.write_lexeme(word)
+			parser.lexical_attributes.lexeme_builder.write_b(chr)
-			if parser.lexycal_attributes.open_string > 0 {
+			if parser.lexical_attributes.open_string > 0 &&
-				if parser.lexycal_attributes.open_string == string_code {
+				parser.lexical_attributes.open_string == string_code {
-					parser.lexycal_attributes.open_string = 0
+				parser.lexical_attributes.open_string = 0
-				}
+			} else if is_quote {
-			} else if is_quotation {
+				parser.lexical_attributes.open_string = string_code
-				parser.lexycal_attributes.open_string = string_code
+			} else if chr == `>` { // only execute verification if is a > // here will verify < to know if code tag is finished
-			} else if word == 62 { // only execute verification if is a > // here will verify < to know if code tag is finished
+				name_close_tag := '</$parser.lexical_attributes.opened_code_type>'
-				name_close_tag := '</' + parser.lexycal_attributes.opened_code_type + '>'
+				if parser.builder_str().to_lower().ends_with(name_close_tag) {
-				temp_string := parser.builder_str()
+					parser.lexical_attributes.open_code = false
 				if temp_string.to_lower().ends_with(name_close_tag) {
 					parser.lexycal_attributes.open_code = false
 					// need to modify lexeme_builder to add script text as a content in next loop (not gave error in dom)
-					parser.lexycal_attributes.lexeme_builder = temp_string[0..temp_string.len -
+					parser.lexical_attributes.lexeme_builder.go_back(name_close_tag.len)
-						name_close_tag.len]
+					parser.lexical_attributes.current_tag.closed = true
-					parser.lexycal_attributes.current_tag.closed = true
+					parser.lexical_attributes.current_tag.close_type = .new_tag
 					parser.lexycal_attributes.current_tag.close_type = .new_tag
 				}
 			}
-		} else if parser.lexycal_attributes.open_comment {
+		} else if parser.lexical_attributes.open_comment {
-			if word == 62 && parser.verify_end_comment(false) { // close tag '>'
+			if chr == `>` && parser.verify_end_comment(false) { // close tag '>'
-				// parser.print_debug(parser.builder_str() + " >> " + parser.lexycal_attributes.line_count.str())
+				// parser.print_debug(parser.builder_str() + " >> " + parser.lexical_attributes.line_count.str())
-				parser.lexycal_attributes.lexeme_builder = '' // strings.Builder{}
+				parser.lexical_attributes.lexeme_builder.go_back_to(0)
-				parser.lexycal_attributes.open_comment = false
+				parser.lexical_attributes.open_comment = false
-				parser.lexycal_attributes.open_tag = false
+				parser.lexical_attributes.open_tag = false
 			} else {
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
 			}
-		} else if parser.lexycal_attributes.open_string > 0 {
+		} else if parser.lexical_attributes.open_string > 0 {
-			if parser.lexycal_attributes.open_string == string_code {
+			if parser.lexical_attributes.open_string == string_code {
-				parser.lexycal_attributes.open_string = 0
+				parser.lexical_attributes.open_string = 0
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
 				temp_lexeme := parser.builder_str()
-				if parser.lexycal_attributes.current_tag.last_attribute != '' {
+				if parser.lexical_attributes.current_tag.last_attribute != '' {
-					lattr := parser.lexycal_attributes.current_tag.last_attribute
+					lattr := parser.lexical_attributes.current_tag.last_attribute
 					nval := temp_lexeme.substr(1, temp_lexeme.len - 1)
 					// parser.print_debug(lattr + " = " + temp_lexeme)
-					parser.lexycal_attributes.current_tag.attributes[lattr] = nval
+					parser.lexical_attributes.current_tag.attributes[lattr] = nval
-					parser.lexycal_attributes.current_tag.last_attribute = ''
+					parser.lexical_attributes.current_tag.last_attribute = ''
 				} else {
-					parser.lexycal_attributes.current_tag.attributes[temp_lexeme.to_lower()] = '' // parser.print_debug(temp_lexeme)
+					parser.lexical_attributes.current_tag.attributes[temp_lexeme.to_lower()] = '' // parser.print_debug(temp_lexeme)
 				}
-				parser.lexycal_attributes.lexeme_builder = ''
+				parser.lexical_attributes.lexeme_builder.go_back_to(0)
 			} else {
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
 			}
-		} else if parser.lexycal_attributes.open_tag {
+		} else if parser.lexical_attributes.open_tag {
-			if parser.lexycal_attributes.lexeme_builder.len == 0 && is_quotation {
+			if parser.lexical_attributes.lexeme_builder.len == 0 && is_quote {
-				parser.lexycal_attributes.open_string = string_code
+				parser.lexical_attributes.open_string = string_code
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
-			} else if word == 62 { // close tag >
+			} else if chr == `>` { // close tag >
 				complete_lexeme := parser.builder_str().to_lower()
-				parser.lexycal_attributes.current_tag.closed = (complete_lexeme.len > 0 &&
+				parser.lexical_attributes.current_tag.closed = (complete_lexeme.len > 0 &&
-					complete_lexeme[complete_lexeme.len - 1] == 47) // if equals to /
+					complete_lexeme[complete_lexeme.len - 1] == `/`) // if equals to /
-				if complete_lexeme.len > 0 && complete_lexeme[0] == 47 {
+				if complete_lexeme.len > 0 && complete_lexeme[0] == `/` {
 					parser.dom.close_tags[complete_lexeme] = true
 				}
 				/*
 				else if complete_lexeme.len > 0 && complete_lexeme[complete_lexeme.len - 1] == 47 { // if end tag like "/>"
-					parser.lexycal_attributes.current_tag.closed = true
+					parser.lexical_attributes.current_tag.closed = true
 				}
 				*/
-				if parser.lexycal_attributes.current_tag.name == '' {
+				if parser.lexical_attributes.current_tag.name == '' {
-					parser.lexycal_attributes.current_tag.name = complete_lexeme
+					parser.lexical_attributes.current_tag.name = complete_lexeme
 				} else if complete_lexeme != '/' {
-					parser.lexycal_attributes.current_tag.attributes[complete_lexeme] = ''
+					parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
 				}
-				parser.lexycal_attributes.open_tag = false
+				parser.lexical_attributes.open_tag = false
-				parser.lexycal_attributes.lexeme_builder = '' // if tag name is code
+				parser.lexical_attributes.lexeme_builder.go_back_to(0) // if tag name is code
-				if parser.lexycal_attributes.current_tag.name in parser.lexycal_attributes.code_tags {
+				if parser.lexical_attributes.current_tag.name in parser.lexical_attributes.code_tags {
-					parser.lexycal_attributes.open_code = true
+					parser.lexical_attributes.open_code = true
-					parser.lexycal_attributes.opened_code_type = parser.lexycal_attributes.current_tag.name
+					parser.lexical_attributes.opened_code_type = parser.lexical_attributes.current_tag.name
 				}
-				// parser.print_debug(parser.lexycal_attributes.current_tag.name)
+				// parser.print_debug(parser.lexical_attributes.current_tag.name)
-			} else if word != 9 && word != 32 && word != 61 && word != 10 { // Tab, space, = and \n
+			} else if chr !in [byte(9), ` `, `=`, `\n`] { // Tab, space, = and \n
-				parser.lexycal_attributes.write_lexeme(word)
+				parser.lexical_attributes.lexeme_builder.write_b(chr)
-			} else if word != 10 {
+			} else if chr != 10 {
 				complete_lexeme := parser.builder_str().to_lower()
-				if parser.lexycal_attributes.current_tag.name == '' {
+				if parser.lexical_attributes.current_tag.name == '' {
-					parser.lexycal_attributes.current_tag.name = complete_lexeme
+					parser.lexical_attributes.current_tag.name = complete_lexeme
 				} else {
-					parser.lexycal_attributes.current_tag.attributes[complete_lexeme] = ''
+					parser.lexical_attributes.current_tag.attributes[complete_lexeme] = ''
-					parser.lexycal_attributes.current_tag.last_attribute = ''
+					parser.lexical_attributes.current_tag.last_attribute = ''
-					if word == 61 { // if was a =
+					if chr == `=` { // if was a =
-						parser.lexycal_attributes.current_tag.last_attribute = complete_lexeme
+						parser.lexical_attributes.current_tag.last_attribute = complete_lexeme
 					}
 				}
-				parser.lexycal_attributes.lexeme_builder = '' // strings.Builder{}
+				parser.lexical_attributes.lexeme_builder.go_back_to(0)
 			}
 			if parser.builder_str() == '!--' {
-				parser.lexycal_attributes.open_comment = true
+				parser.lexical_attributes.open_comment = true
 			}
-		} else if word == 60 { // open tag '<'
+		} else if chr == `<` { // open tag '<'
 			temp_string := parser.builder_str()
-			if parser.lexycal_attributes.lexeme_builder.len >= 1 {
+			if parser.lexical_attributes.lexeme_builder.len >= 1 {
-				if parser.lexycal_attributes.current_tag.name.len > 1 &&
+				if parser.lexical_attributes.current_tag.name.len > 1 &&
-					parser.lexycal_attributes.current_tag.name[0] == 47 && !blank_string(temp_string) {
+					parser.lexical_attributes.current_tag.name[0] == 47 && !blank_string(temp_string) {
 					parser.tags << &Tag{
 						name: 'text'
 						content: temp_string
 					}
 				} else {
-					parser.lexycal_attributes.current_tag.content = temp_string // verify later who has this content
+					parser.lexical_attributes.current_tag.content = temp_string // verify later who has this content
 				}
 			}
-			// parser.print_debug(parser.lexycal_attributes.current_tag.str())
+			// parser.print_debug(parser.lexical_attributes.current_tag.str())
-			parser.lexycal_attributes.lexeme_builder = ''
+			parser.lexical_attributes.lexeme_builder.go_back_to(0)
 			parser.generate_tag()
-			parser.lexycal_attributes.open_tag = true
+			parser.lexical_attributes.open_tag = true
 		} else {
-			parser.lexycal_attributes.write_lexeme(word)
+			parser.lexical_attributes.lexeme_builder.write_b(chr)
 		}
 	}
 }
-pub fn (mut parser Parser) parse_html(data string, is_file bool) {
+// parse_html parses the given HTML string
-	if !parser.initialized {
+pub fn (mut parser Parser) parse_html(data string) {
-		parser.initialize_all()
+	parser.init()
-	}
+	mut lines := data.split_into_lines()
 	mut lines := []string{}
 	if is_file {
 		file_lines := os.read_lines(data) or {
 			eprintln('failed to read the file $data')
 			return
 		}
 		lines = file_lines
 	} else {
 		lines = data.split_into_lines()
 	}
 	for line in lines {
-		parser.lexycal_attributes.line_count++
+		parser.lexical_attributes.line_count++
 		parser.split_parse(line)
 	}
 	parser.generate_tag()
 	parser.dom.debug_file = parser.debug_file
-	parser.dom.construct(parser.tags) // println(parser.close_tags.keys())
+	parser.dom.construct(parser.tags)
 }
 // finalize finishes the parsing stage .
 [inline]
 pub fn (mut parser Parser) finalize() {
 	parser.generate_tag()
 }
-pub fn (parser Parser) get_tags() []&Tag {
+// get_dom returns the parser's current DOM representation.
 	return parser.tags
 }
 pub fn (mut parser Parser) get_dom() DocumentObjectModel {
 	if !parser.dom.constructed {
 		parser.generate_tag()
@ -276,10 +257,3 @@ pub fn (mut parser Parser) get_dom() DocumentObjectModel {
 	}
 	return parser.dom
 }
 /*pub fn (mut parser Parser) get_xpath() XPath {
 	dom := parser.get_dom()
 	return XPath{
 		dom: dom
 	}
 }*/
--- a/vlib/net/html/parser_test.v
+++ b/vlib/net/html/parser_test.v
@ -1,10 +1,10 @@
 module html
-//import net.http
+import strings
 fn test_split_parse() {
 	mut parser := Parser{}
-	parser.initialize_all()
+	parser.init()
 	parser.split_parse('<!doctype htm')
 	parser.split_parse('l public')
 	parser.split_parse('><html><he')
@ -16,37 +16,26 @@ fn test_split_parse() {
 	parser.split_parse('Nice Test!</h3>')
 	parser.split_parse('</bo\n\n\ndy></html>')
 	parser.finalize()
-	assert parser.get_tags().len == 11
+	assert parser.tags.len == 11
-	assert parser.get_tags()[3].get_content() == ' Hum... A Tit\nle'
+	assert parser.tags[3].content == ' Hum... A Tit\nle'
 }
 fn test_giant_string() {
-	mut temp_html := '<!doctype html><html><head><title>Giant String</title></head><body>'
+	mut temp_html := strings.new_builder(200)
 	for counter := 0; counter < 2000; counter++ {
 		temp_html += "<div id='name_$counter' class='several-$counter'>Look at $counter</div>"
 	}
 	temp_html += '</body></html>'
 	mut parser := Parser{}
-	parser.parse_html(temp_html, false)
+	temp_html.write('<!doctype html><html><head><title>Giant String</title></head><body>')
-	assert parser.get_tags().len == 4009
+	for counter := 0; counter < 2000; counter++ {
 		temp_html.write("<div id='name_$counter' class='several-$counter'>Look at $counter</div>")
 	}
 	temp_html.write('</body></html>')
 	parser.parse_html(temp_html.str())
 	assert parser.tags.len == 4009
 }
 fn test_script_tag() {
 	temp_html := "<html><body><script>\nvar googletag = googletag || {};\n
 	googletag.cmd = googletag.cmd || [];if(3 > 5) {console.log('Birl');}\n</script></body></html>"
 	mut parser := Parser{}
-	parser.parse_html(temp_html, false)
+	script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log(\'Birl\');}\n"
-	assert parser.get_tags()[2].get_content().len == 101
+	temp_html := '<html><body><script>$script_content</script></body></html>'
 	parser.parse_html(temp_html)
 	assert parser.tags[2].content.len == script_content.replace('\n', '').len
 }
 /*fn test_download_source() {
 	println('Fetching github data in pastebin')
 	resp := http.get('https://pastebin.com/raw/5snUQgqN') or {
 		println('failed to fetch data from the server')
 		return
 	}
 	println('Finalized fetching, start parsing')
 	mut parser := Parser{}
 	parser.parse_html(resp.text, false)
 	assert parser.get_tags().len == 2244
 }*/
--- a/vlib/net/html/tag.v
+++ b/vlib/net/html/tag.v
@ -1,20 +1,22 @@
 module html
 import strings
 enum CloseTagType {
 	in_name
 	new_tag
 }
 // Tag holds the information of an HTML tag.
 [ref_only]
 pub struct Tag {
 pub mut:
 	name               string
 	content            string
 	children           []&Tag
 mut:
 	attributes         map[string]string // attributes will be like map[name]value
 	last_attribute     string
-	parent             &Tag = C.NULL
+	parent             &Tag = 0
 	position_in_parent int
 	closed             bool
 	close_type         CloseTagType = .in_name
@ -26,62 +28,45 @@ fn (mut tag Tag) add_parent(t &Tag, position int) {
 }
 fn (mut tag Tag) add_child(t &Tag) int {
-	mut children := tag.children
+	tag.children << t
 	children << t
 	tag.children = children
 	return tag.children.len
 }
-pub fn (tag Tag) get_children() []&Tag {
+// text returns the text contents of the tag.
 	return tag.children
 }
 pub fn (tag Tag) get_parent() &Tag {
 	return tag.parent
 }
 pub fn (tag Tag) get_name() string {
 	return tag.name
 }
 pub fn (tag Tag) get_content() string {
 	return tag.content
 }
 pub fn (tag Tag) get_attributes() map[string]string {
 	return tag.attributes
 }
 pub fn (tag Tag) text() string {
-	if tag.name.len >= 2 && tag.name[0..2] == 'br' {
+	if tag.name.len >= 2 && tag.name[..2] == 'br' {
 		return '\n'
 	}
-	mut to_return := tag.content.replace('\n', '')
+	mut text_str := strings.new_builder(200)
-	for index := 0; index < tag.children.len; index++ {
+	text_str.write(tag.content.replace('\n', ''))
-		to_return += tag.children[index].text()
+	for child in tag.children {
 		text_str.write(child.text())
 	}
-	return to_return
+	return text_str.str()
 }
 pub fn (tag &Tag) str() string {
-	mut to_return := '<$tag.name'
+	mut html_str := strings.new_builder(200)
-	for key in tag.attributes.keys() {
+	html_str.write('<$tag.name')
-		to_return += ' $key'
+	for key, value in tag.attributes {
-		value := tag.attributes[key]
+		html_str.write(' $key')
 		if value.len > 0 {
-			to_return += '=' + '"${tag.attributes[key]}"'
+			html_str.write('="$value"')
 		}
 	}
-	to_return += if tag.closed && tag.close_type == .in_name { '/>' } else { '>' }
+	html_str.write(if tag.closed && tag.close_type == .in_name {
-	to_return += '$tag.content'
+		'/>'
 	} else {
 		'>'
 	})
 	html_str.write(tag.content)
 	if tag.children.len > 0 {
-		// println('${tag.name} have ${tag.children.len} childrens')
+		for child in tag.children {
-		for index := 0; index < tag.children.len; index++ {
+			html_str.write(child.str())
 			to_return += tag.get_children()[index].str()
 		}
 	}
 	if !tag.closed || tag.close_type == .new_tag {
-		to_return += '</$tag.name>'
+		html_str.write('</$tag.name>')
 	}
-	return to_return
+	return html_str.str()
 }