examples: add web_crawler and get_weather (#10084)
parent
0139836412
commit
a849d52d4a
|
@ -0,0 +1,18 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
files := os.ls('.') or {
|
||||||
|
println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mut f := os.create('file_list.txt') or {
|
||||||
|
println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for file in files {
|
||||||
|
if os.is_file(file) {
|
||||||
|
f.write_string(file + '\r\n') or { println(err) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f.close()
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
# get_weather
|
||||||
|
get_weather is a very simple web crawler.
|
||||||
|
Its goal is to get a weather forecast from caiyunapp.com.
|
||||||
|
|
||||||
|
# Compile and Run
|
||||||
|
|
||||||
|
Use this to generate an executable and then launch the web crawler.
|
||||||
|
```bash
|
||||||
|
v get_weather.v
|
||||||
|
./get_weather
|
||||||
|
```
|
||||||
|
|
||||||
|
As a convenience, you can also compile and launch the web crawler directly.
|
||||||
|
```bash
|
||||||
|
v run get_weather.v
|
||||||
|
```
|
||||||
|
|
||||||
|
In this project we use http.fetch() to get a http.Response, with a
|
||||||
|
custom user-agent and then we use json.decode() to decode the json
|
||||||
|
response to struct.
|
||||||
|
We also use a `[skip]` attribute to skip certain fields in the response,
|
||||||
|
that we don't need and use a `[json: result]` attribute to specify that
|
||||||
|
our struct field is named differently from the incoming json response.
|
|
@ -0,0 +1,55 @@
|
||||||
|
import json
|
||||||
|
import rand
|
||||||
|
import net.http
|
||||||
|
|
||||||
|
struct Weather {
|
||||||
|
status string [skip] // drop this field
|
||||||
|
api_version string [skip]
|
||||||
|
api_status string [skip]
|
||||||
|
lang string [skip]
|
||||||
|
unit string [skip]
|
||||||
|
tzshift int [skip]
|
||||||
|
timezone string [skip]
|
||||||
|
server_time u32 [skip]
|
||||||
|
location []f32 [skip]
|
||||||
|
result Result //[json: result] if the field name is different in JSON, it can be specified
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Result {
|
||||||
|
realtime Realtime [skip]
|
||||||
|
minutely Minutely [skip]
|
||||||
|
hourly Hourly [skip]
|
||||||
|
daily Daily [skip]
|
||||||
|
primary int [skip]
|
||||||
|
forecast_keypoint string
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Realtime {}
|
||||||
|
|
||||||
|
struct Minutely {}
|
||||||
|
|
||||||
|
struct Hourly {}
|
||||||
|
|
||||||
|
struct Daily {}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
config := http.FetchConfig{
|
||||||
|
user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||||
|
}
|
||||||
|
|
||||||
|
rnd := rand.f32()
|
||||||
|
url := 'https://api.caiyunapp.com/v2.5/96Ly7wgKGq6FhllM/116.391912,40.010711/weather.jsonp?hourlysteps=120&random=$rnd'
|
||||||
|
// println(url)
|
||||||
|
|
||||||
|
resp := http.fetch(url, config) or {
|
||||||
|
println('failed to fetch data from the server')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
weather := json.decode(Weather, resp.text) or {
|
||||||
|
println('failed to decode weather json')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
println('未来两小时天气:\n${weather.result.forecast_keypoint}.')
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
# web_crawler
|
||||||
|
web_crawler is a very simple web crawler.
|
||||||
|
This web crawler fetches news from tuicool.com,
|
||||||
|
(a chinese site similar to hacker-news.firebaseio.com).
|
||||||
|
|
||||||
|
# Compile and Run
|
||||||
|
|
||||||
|
Use this to generate an executable, and then launch the web crawler:
|
||||||
|
```bash
|
||||||
|
v web_crawler.v
|
||||||
|
./web_crawler
|
||||||
|
```
|
||||||
|
|
||||||
|
And this to compile and launch the web crawler directly:
|
||||||
|
```bash
|
||||||
|
v run web_crawler.v
|
||||||
|
```
|
||||||
|
|
||||||
|
This project shows how to use http.fetch() to get http.Response,
|
||||||
|
and then html.parse() to parse the returned html.
|
||||||
|
|
||||||
|
It's easy, isn't it?
|
|
@ -0,0 +1,31 @@
|
||||||
|
import net.http
|
||||||
|
import net.html
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
/*
|
||||||
|
user_agent = 'v.http'
|
||||||
|
resp := http.get('https://tuicool.com') or {
|
||||||
|
println('failed to fetch data from the server')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// http.fetch() sends an HTTP request to the URL with the given method and configurations.
|
||||||
|
config := http.FetchConfig{
|
||||||
|
user_agent: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||||
|
}
|
||||||
|
resp := http.fetch('https://tuicool.com', config) or {
|
||||||
|
println('failed to fetch data from the server')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// html.parse() parses and returns the DOM from the given text.
|
||||||
|
mut doc := html.parse(resp.text)
|
||||||
|
// html.DocumentObjectModel.get_tag_by_attribute_value() retrieves all the tags in the document that has the given attribute name and value.
|
||||||
|
tags := doc.get_tag_by_attribute_value('class', 'list_article_item')
|
||||||
|
for tag in tags {
|
||||||
|
href := tag.children[0].attributes['href'] or { panic('key not found') }
|
||||||
|
title := tag.children[0].attributes['title'] or { panic('key not found') }
|
||||||
|
println('href: $href')
|
||||||
|
println('title: $title')
|
||||||
|
println('')
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue