From b952bf2e6b6f7bdc76d366b2851bc4df0edf7262 Mon Sep 17 00:00:00 2001 From: Ned Palacios Date: Thu, 10 Dec 2020 03:08:15 +0800 Subject: [PATCH] net.html: polish module, update docs (#7193) --- vlib/net/html/README.md | 128 ++------------- vlib/net/html/data_structures.v | 29 ++-- vlib/net/html/dom.v | 107 +++++-------- vlib/net/html/dom_test.v | 67 ++++---- vlib/net/html/html.v | 18 +++ vlib/net/html/html_test.v | 15 ++ vlib/net/html/parser.v | 272 +++++++++++++++----------------- vlib/net/html/parser_test.v | 43 ++--- vlib/net/html/tag.v | 69 ++++---- 9 files changed, 302 insertions(+), 446 deletions(-) create mode 100644 vlib/net/html/html.v create mode 100644 vlib/net/html/html_test.v diff --git a/vlib/net/html/README.md b/vlib/net/html/README.md index 42f725b08e..a92a6e6dc7 100644 --- a/vlib/net/html/README.md +++ b/vlib/net/html/README.md @@ -1,118 +1,16 @@ -# V HTML - -A HTML parser made in V. +net/http is an HTML written in pure V. ## Usage +```v oksyntax +import net.html -If the description below isn't enought, please look at the test files. - -### Parser - -Responsible for read HTML in full strings or splited string and returns all Tag objets of -it HTML or return a DocumentObjectModel, that will try to find how the HTML Tree is. - -#### split_parse(data string) -This functions is the main function called by parse method to fragment parse your HTML. - -#### parse_html(data string, is_file bool) -This function is called passing a filename or a complete html data string to it. - -#### add_code_tag(name string) -This function is used to add a tag for the parser ignore it's content. -For example, if you have an html or XML with a custom tag, like `" mut parser := Parser{} - parser.parse_html(temp_html, false) - assert parser.get_tags()[2].get_content().len == 101 + script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log(\'Birl\');}\n" + temp_html := '' + parser.parse_html(temp_html) + assert parser.tags[2].content.len == script_content.replace('\n', '').len } - -/*fn test_download_source() { - println('Fetching github data in pastebin') - resp := http.get('https://pastebin.com/raw/5snUQgqN') or { - println('failed to fetch data from the server') - return - } - println('Finalized fetching, start parsing') - mut parser := Parser{} - parser.parse_html(resp.text, false) - assert parser.get_tags().len == 2244 -}*/ diff --git a/vlib/net/html/tag.v b/vlib/net/html/tag.v index 50e30e94ab..89917a562d 100644 --- a/vlib/net/html/tag.v +++ b/vlib/net/html/tag.v @@ -1,20 +1,22 @@ module html +import strings + enum CloseTagType { in_name new_tag } +// Tag holds the information of an HTML tag. [ref_only] pub struct Tag { pub mut: name string content string children []&Tag -mut: attributes map[string]string // attributes will be like map[name]value last_attribute string - parent &Tag = C.NULL + parent &Tag = 0 position_in_parent int closed bool close_type CloseTagType = .in_name @@ -26,62 +28,45 @@ fn (mut tag Tag) add_parent(t &Tag, position int) { } fn (mut tag Tag) add_child(t &Tag) int { - mut children := tag.children - children << t - tag.children = children + tag.children << t return tag.children.len } -pub fn (tag Tag) get_children() []&Tag { - return tag.children -} - -pub fn (tag Tag) get_parent() &Tag { - return tag.parent -} - -pub fn (tag Tag) get_name() string { - return tag.name -} - -pub fn (tag Tag) get_content() string { - return tag.content -} - -pub fn (tag Tag) get_attributes() map[string]string { - return tag.attributes -} - +// text returns the text contents of the tag. pub fn (tag Tag) text() string { - if tag.name.len >= 2 && tag.name[0..2] == 'br' { + if tag.name.len >= 2 && tag.name[..2] == 'br' { return '\n' } - mut to_return := tag.content.replace('\n', '') - for index := 0; index < tag.children.len; index++ { - to_return += tag.children[index].text() + mut text_str := strings.new_builder(200) + text_str.write(tag.content.replace('\n', '')) + for child in tag.children { + text_str.write(child.text()) } - return to_return + return text_str.str() } pub fn (tag &Tag) str() string { - mut to_return := '<$tag.name' - for key in tag.attributes.keys() { - to_return += ' $key' - value := tag.attributes[key] + mut html_str := strings.new_builder(200) + html_str.write('<$tag.name') + for key, value in tag.attributes { + html_str.write(' $key') if value.len > 0 { - to_return += '=' + '"${tag.attributes[key]}"' + html_str.write('="$value"') } } - to_return += if tag.closed && tag.close_type == .in_name { '/>' } else { '>' } - to_return += '$tag.content' + html_str.write(if tag.closed && tag.close_type == .in_name { + '/>' + } else { + '>' + }) + html_str.write(tag.content) if tag.children.len > 0 { - // println('${tag.name} have ${tag.children.len} childrens') - for index := 0; index < tag.children.len; index++ { - to_return += tag.get_children()[index].str() + for child in tag.children { + html_str.write(child.str()) } } if !tag.closed || tag.close_type == .new_tag { - to_return += '' + html_str.write('') } - return to_return + return html_str.str() }