vweb: refactor form parsing and add tests (#9051)

pull/9118/head
Miccah 2021-03-04 10:00:03 -06:00 committed by GitHub
parent 7f830fac86
commit 709d7460de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 157 additions and 101 deletions

View File

@ -1,10 +1,11 @@
module vweb
import io
import strings
import net.http
import net.urllib
pub fn parse_request(mut reader io.BufferedReader) ?http.Request {
fn parse_request(mut reader io.BufferedReader) ?http.Request {
// request line
mut line := reader.read_line() ?
method, target, version := parse_request_line(line) ?
@ -68,21 +69,92 @@ fn parse_header(s string) ?(string, string) {
return error('missing colon in header')
}
words := s.split_nth(':', 2)
if !is_token(words[0]) {
return error('invalid character in header name')
}
// TODO: parse quoted text according to the RFC
return words[0], words[1].trim_left(' \t')
}
// TODO: use map for faster lookup (untested)
const token_chars = r"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#$%&'*+-.^_`|~".bytes()
// Parse URL encoded key=value&key=value forms
fn parse_form(body string) map[string]string {
words := body.split('&')
mut form := map[string]string{}
for word in words {
kv := word.split_nth('=', 2)
if kv.len != 2 {
continue
}
key := urllib.query_unescape(kv[0]) or { continue }
val := urllib.query_unescape(kv[1]) or { continue }
form[key] = val
}
return form
// }
// todo: parse form-data and application/json
// ...
}
fn is_token(s string) bool {
for c in s {
if c !in vweb.token_chars {
return false
fn parse_multipart_form(body string, boundary string) (map[string]string, map[string][]FileData) {
sections := body.split(boundary)
fields := sections[1..sections.len - 1]
mut form := map[string]string{}
mut files := map[string][]FileData{}
for field in fields {
// TODO: do not split into lines; do same parsing for HTTP body
lines := field.split_into_lines()[1..]
disposition := parse_disposition(lines[0])
// Grab everything between the double quotes
name := disposition['name'] or { continue }
// Parse files
// TODO: filename*
if 'filename' in disposition {
filename := disposition['filename']
// Parse Content-Type header
if lines.len == 1 || !lines[1].to_lower().starts_with('content-type:') {
continue
}
mut ct := lines[1].split_nth(':', 2)[1]
ct = ct.trim_left(' \t')
data := lines_to_string(field.len, lines, 3, lines.len - 1)
files[name] << FileData{
filename: filename
content_type: ct
data: data
}
continue
}
data := lines_to_string(field.len, lines, 2, lines.len - 1)
form[name] = data
}
return form, files
}
// Parse the Content-Disposition header of a multipart form
// Returns a map of the key="value" pairs
// Example: parse_disposition('Content-Disposition: form-data; name="a"; filename="b"') == {'name': 'a', 'filename': 'b'}
fn parse_disposition(line string) map[string]string {
mut data := map[string]string{}
for word in line.split(';') {
kv := word.split_nth('=', 2)
if kv.len != 2 {
continue
}
key, value := kv[0].to_lower().trim_left(' \t'), kv[1]
if value.starts_with('"') && value.ends_with('"') {
data[key] = value[1..value.len - 1]
} else {
data[key] = value
}
}
return true
return data
}
[manualfree]
fn lines_to_string(len int, lines []string, start int, end int) string {
mut sb := strings.new_builder(len)
for i in start .. end {
sb.writeln(lines[i])
}
res := sb.str()
unsafe { sb.free() }
return res
}

View File

@ -76,3 +76,62 @@ fn test_parse_request_line() {
assert target.str() == '/target'
assert version == .v1_1
}
fn test_parse_form() {
assert parse_form('foo=bar&bar=baz') == map{
'foo': 'bar'
'bar': 'baz'
}
assert parse_form('foo=bar=&bar=baz') == map{
'foo': 'bar='
'bar': 'baz'
}
assert parse_form('foo=bar%3D&bar=baz') == map{
'foo': 'bar='
'bar': 'baz'
}
assert parse_form('foo=b%26ar&bar=baz') == map{
'foo': 'b&ar'
'bar': 'baz'
}
assert parse_form('a=b& c=d') == map{
'a': 'b'
' c': 'd'
}
assert parse_form('a=b&c= d ') == map{
'a': 'b'
'c': ' d '
}
}
fn test_parse_multipart_form() {
boundary := '6844a625b1f0b299'
names := ['foo', 'fooz']
file := 'bar.v'
ct := 'application/octet-stream'
contents := ['baz', 'buzz']
data := '--------------------------$boundary
Content-Disposition: form-data; name=\"${names[0]}\"; filename=\"$file\"
Content-Type: $ct
${contents[0]}
--------------------------$boundary
Content-Disposition: form-data; name=\"${names[1]}\"
${contents[1]}
--------------------------$boundary--
'
form, files := parse_multipart_form(data, boundary)
// TODO: remove newlines
assert files == map{
names[0]: [FileData{
filename: file
content_type: ct
data: contents[0] + '\n'
}]
}
assert form == map{
names[1]: contents[1] + '\n'
}
}

View File

@ -13,10 +13,11 @@ import time
pub const (
methods_with_form = [http.Method.post, .put, .patch]
methods_without_first = ['ost', 'ut', 'et', 'atch', 'ptions', 'elete', 'ead'] // needed for method checking as method parameter
header_server = 'Server: VWeb\r\n'
header_connection_close = 'Connection: close\r\n'
headers_close = '$header_server$header_connection_close\r\n'
// TODO: use http.response structs
http_400 = 'HTTP/1.1 400 Bad Request\r\nContent-Type: text/plain\r\nContent-Length: 15\r\n${headers_close}400 Bad Request'
http_404 = 'HTTP/1.1 404 Not Found\r\nContent-Type: text/plain\r\nContent-Length: 13\r\n${headers_close}404 Not Found'
http_500 = 'HTTP/1.1 500 Internal Server Error\r\nContent-Type: text/plain\r\n${headers_close}500 Internal Server Error'
mime_types = map{
@ -318,14 +319,23 @@ fn handle_conn<T>(mut conn net.TcpConn, mut app T) {
}
if req.method in vweb.methods_with_form {
if 'multipart/form-data' in req.lheaders['content-type'].split('; ') {
boundary := req.lheaders['content-type'].split('; ').filter(it.starts_with('boundary '))
boundary := req.lheaders['content-type'].split('; ').filter(it.starts_with('boundary='))
if boundary.len != 1 {
// TODO: send 400 error
send_string(mut conn, vweb.http_400) or { }
return
}
app.parse_multipart_form(req.data, boundary[0][9..])
form, files := parse_multipart_form(req.data, boundary[0][9..])
for k, v in form {
app.form[k] = v
}
for k, v in files {
app.files[k] = v
}
} else {
app.parse_form(req.data)
form := parse_form(req.data)
for k, v in form {
app.form[k] = v
}
}
}
// Serve a static file if it is one
@ -491,86 +501,6 @@ fn serve_static<T>(mut app T, url urllib.URL) bool {
return true
}
// vweb intern function
pub fn (mut ctx Context) parse_form(s string) {
if ctx.req.method !in vweb.methods_with_form {
return
}
// pos := s.index('\r\n\r\n')
// if pos > -1 {
mut str_form := s // [pos..s.len]
str_form = str_form.replace('+', ' ')
words := str_form.split('&')
for word in words {
$if debug {
println('parse form keyval="$word"')
}
keyval := word.trim_space().split('=')
if keyval.len != 2 {
continue
}
key := urllib.query_unescape(keyval[0]) or { continue }
val := urllib.query_unescape(keyval[1]) or { continue }
$if debug {
println('http form "$key" => "$val"')
}
ctx.form[key] = val
}
// }
// todo: parse form-data and application/json
// ...
}
// vweb intern function
[manualfree]
pub fn (mut ctx Context) parse_multipart_form(s string, b string) {
if ctx.req.method !in vweb.methods_with_form {
return
}
a := s.split('$b')[1..]
fields := a[..a.len - 1]
for field in fields {
lines := field.split_into_lines()[1..]
mut l := 0
// Parse name
disposition_data := lines[l].split('; ')[1..]
l++
name := disposition_data[0][6..disposition_data[0].len - 1]
// Parse files
if disposition_data.len > 1 {
filename := disposition_data[1][10..disposition_data[1].len - 1]
ct := lines[l].split(': ')[1]
l++
if name !in ctx.files {
ctx.files[name] = []FileData{}
}
mut sb := strings.new_builder(field.len)
for i in l + 1 .. lines.len - 1 {
sb.writeln(lines[i])
}
ctx.files[name] << FileData{
filename: filename
content_type: ct
data: sb.str()
}
unsafe {
sb.free()
}
continue
}
mut sb := strings.new_builder(field.len)
for i in l + 1 .. lines.len - 1 {
sb.writeln(lines[i])
}
ctx.form[name] = sb.str()
unsafe {
disposition_data.free()
name.free()
sb.free()
}
}
}
fn (mut ctx Context) scan_static_directory(directory_path string, mount_path string) {
files := os.ls(directory_path) or { panic(err) }
if files.len > 0 {
@ -647,11 +577,6 @@ pub fn (mut ctx Context) error(s string) {
ctx.form_error = s
}
fn strip(s string) string {
// strip('\nabc\r\n') => 'abc'
return s.trim('\r\n')
}
// Returns an empty result
pub fn not_found() Result {
return Result{}