From a849d52d4a7bb39e94501817e925feb73d34f93e Mon Sep 17 00:00:00 2001 From: ScriptBoy2077 <84058466+ScriptBoy2077@users.noreply.github.com> Date: Fri, 14 May 2021 16:09:21 +0800 Subject: [PATCH] examples: add web_crawler and get_weather (#10084) --- examples/file_list.v | 18 ++++++++++ examples/get_weather/README.md | 23 +++++++++++++ examples/get_weather/get_weather.v | 55 ++++++++++++++++++++++++++++++ examples/web_crawler/README.md | 22 ++++++++++++ examples/web_crawler/web_crawler.v | 31 +++++++++++++++++ 5 files changed, 149 insertions(+) create mode 100644 examples/file_list.v create mode 100644 examples/get_weather/README.md create mode 100644 examples/get_weather/get_weather.v create mode 100644 examples/web_crawler/README.md create mode 100644 examples/web_crawler/web_crawler.v diff --git a/examples/file_list.v b/examples/file_list.v new file mode 100644 index 0000000000..b1dc2958f9 --- /dev/null +++ b/examples/file_list.v @@ -0,0 +1,18 @@ +import os + +fn main() { + files := os.ls('.') or { + println(err) + return + } + mut f := os.create('file_list.txt') or { + println(err) + return + } + for file in files { + if os.is_file(file) { + f.write_string(file + '\r\n') or { println(err) } + } + } + f.close() +} diff --git a/examples/get_weather/README.md b/examples/get_weather/README.md new file mode 100644 index 0000000000..be2695d759 --- /dev/null +++ b/examples/get_weather/README.md @@ -0,0 +1,23 @@ +# get_weather +get_weather is a very simple web crawler. +Its goal is to get a weather forecast from caiyunapp.com. + +# Compile and Run + +Use this to generate an executable and then launch the web crawler. +```bash +v get_weather.v +./get_weather +``` + +As a convenience, you can also compile and launch the web crawler directly. +```bash +v run get_weather.v +``` + +In this project we use http.fetch() to get a http.Response, with a +custom user-agent and then we use json.decode() to decode the json +response to struct. +We also use a `[skip]` attribute to skip certain fields in the response, +that we don't need and use a `[json: result]` attribute to specify that +our struct field is named differently from the incoming json response. diff --git a/examples/get_weather/get_weather.v b/examples/get_weather/get_weather.v new file mode 100644 index 0000000000..3156590738 --- /dev/null +++ b/examples/get_weather/get_weather.v @@ -0,0 +1,55 @@ +import json +import rand +import net.http + +struct Weather { + status string [skip] // drop this field + api_version string [skip] + api_status string [skip] + lang string [skip] + unit string [skip] + tzshift int [skip] + timezone string [skip] + server_time u32 [skip] + location []f32 [skip] + result Result //[json: result] if the field name is different in JSON, it can be specified +} + +struct Result { + realtime Realtime [skip] + minutely Minutely [skip] + hourly Hourly [skip] + daily Daily [skip] + primary int [skip] + forecast_keypoint string +} + +struct Realtime {} + +struct Minutely {} + +struct Hourly {} + +struct Daily {} + +fn main() { + config := http.FetchConfig{ + user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0' + } + + rnd := rand.f32() + url := 'https://api.caiyunapp.com/v2.5/96Ly7wgKGq6FhllM/116.391912,40.010711/weather.jsonp?hourlysteps=120&random=$rnd' + // println(url) + + resp := http.fetch(url, config) or { + println('failed to fetch data from the server') + return + } + + weather := json.decode(Weather, resp.text) or { + println('failed to decode weather json') + return + } + + println('未来两小时天气:\n${weather.result.forecast_keypoint}.') +} diff --git a/examples/web_crawler/README.md b/examples/web_crawler/README.md new file mode 100644 index 0000000000..c8a741ff4d --- /dev/null +++ b/examples/web_crawler/README.md @@ -0,0 +1,22 @@ +# web_crawler +web_crawler is a very simple web crawler. +This web crawler fetches news from tuicool.com, +(a chinese site similar to hacker-news.firebaseio.com). + +# Compile and Run + +Use this to generate an executable, and then launch the web crawler: +```bash +v web_crawler.v +./web_crawler +``` + +And this to compile and launch the web crawler directly: +```bash +v run web_crawler.v +``` + +This project shows how to use http.fetch() to get http.Response, +and then html.parse() to parse the returned html. + +It's easy, isn't it? diff --git a/examples/web_crawler/web_crawler.v b/examples/web_crawler/web_crawler.v new file mode 100644 index 0000000000..122aed5d9e --- /dev/null +++ b/examples/web_crawler/web_crawler.v @@ -0,0 +1,31 @@ +import net.http +import net.html + +fn main() { + /* + user_agent = 'v.http' + resp := http.get('https://tuicool.com') or { + println('failed to fetch data from the server') + return + } + */ + // http.fetch() sends an HTTP request to the URL with the given method and configurations. + config := http.FetchConfig{ + user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0' + } + resp := http.fetch('https://tuicool.com', config) or { + println('failed to fetch data from the server') + return + } + // html.parse() parses and returns the DOM from the given text. + mut doc := html.parse(resp.text) + // html.DocumentObjectModel.get_tag_by_attribute_value() retrieves all the tags in the document that has the given attribute name and value. + tags := doc.get_tag_by_attribute_value('class', 'list_article_item') + for tag in tags { + href := tag.children[0].attributes['href'] or { panic('key not found') } + title := tag.children[0].attributes['title'] or { panic('key not found') } + println('href: $href') + println('title: $title') + println('') + } +}