From 7e08e84bc1caab016b08fad1c9c09b17efedbe13 Mon Sep 17 00:00:00 2001 From: Miccah Date: Sat, 27 Feb 2021 16:18:25 -0600 Subject: [PATCH] vweb: refactor HTTP request parsing (#8874) --- cmd/tools/vtest-self.v | 5 ++ examples/vweb/vweb_example.v | 5 ++ vlib/net/http/http.v | 7 +- vlib/net/http/version.v | 30 +++++++ vlib/vweb/request.v | 85 ++++++++++++++++++ vlib/vweb/request_test.v | 78 ++++++++++++++++ vlib/vweb/vweb.v | 166 ++++------------------------------- 7 files changed, 225 insertions(+), 151 deletions(-) create mode 100644 vlib/net/http/version.v create mode 100644 vlib/vweb/request.v create mode 100644 vlib/vweb/request_test.v diff --git a/cmd/tools/vtest-self.v b/cmd/tools/vtest-self.v index 2d68091088..5a704f1d87 100644 --- a/cmd/tools/vtest-self.v +++ b/cmd/tools/vtest-self.v @@ -18,6 +18,7 @@ const ( 'vlib/sqlite/sqlite_test.v', 'vlib/v/tests/orm_sub_struct_test.v', 'vlib/vweb/tests/vweb_test.v', + 'vlib/vweb/request_test.v', 'vlib/x/websocket/websocket_test.v', ] skip_with_fsanitize_address = [ @@ -135,6 +136,7 @@ const ( 'vlib/v/tests/unsafe_test.v', 'vlib/v/tests/working_with_an_empty_struct_test.v', 'vlib/vweb/tests/vweb_test.v', + 'vlib/vweb/request_test.v', 'vlib/x/json2/any_test.v', 'vlib/x/json2/decoder_test.v', 'vlib/x/json2/json2_test.v', @@ -308,6 +310,7 @@ const ( 'vlib/v/tests/working_with_an_empty_struct_test.v', 'vlib/v/vcache/vcache_test.v', 'vlib/vweb/tests/vweb_test.v', + 'vlib/vweb/request_test.v', 'vlib/v/compiler_errors_test.v', 'vlib/v/tests/map_enum_keys_test.v', 'vlib/v/tests/tmpl_test.v', @@ -336,6 +339,7 @@ const ( 'vlib/v/tests/orm_sub_struct_test.v', 'vlib/clipboard/clipboard_test.v', 'vlib/vweb/tests/vweb_test.v', + 'vlib/vweb/request_test.v', 'vlib/x/websocket/websocket_test.v', 'vlib/net/http/http_httpbin_test.v', 'vlib/net/http/header_test.v', @@ -351,6 +355,7 @@ const ( 'vlib/net/unix/unix_test.v', 'vlib/x/websocket/websocket_test.v', 'vlib/vweb/tests/vweb_test.v', + 'vlib/vweb/request_test.v', ] skip_on_non_windows = []string{} skip_on_macos = []string{} diff --git a/examples/vweb/vweb_example.v b/examples/vweb/vweb_example.v index 965b0679f8..9cfbf83803 100644 --- a/examples/vweb/vweb_example.v +++ b/examples/vweb/vweb_example.v @@ -43,3 +43,8 @@ pub fn (mut app App) cookie() vweb.Result { app.set_cookie(name: 'cookie', value: 'test') return app.text('Headers: $app.headers') } + +[post] +pub fn (mut app App) post() vweb.Result { + return app.text('Post body: $app.req.data') +} diff --git a/vlib/net/http/http.v b/vlib/net/http/http.v index c54731927e..3fad6bf143 100644 --- a/vlib/net/http/http.v +++ b/vlib/net/http/http.v @@ -16,8 +16,10 @@ const ( pub struct Request { pub mut: + version Version = .v1_1 method Method - headers map[string]string + headers map[string]string // original requset headers + lheaders map[string]string // same as headers, but with normalized lowercased keys (for received requests) cookies map[string]string data string url string @@ -340,7 +342,8 @@ fn (req &Request) build_request_headers(method Method, host_name string, path st uheaders << '$key: $val\r\n' } uheaders << req.build_request_cookies_header() - return '$method $path HTTP/1.1\r\n' + uheaders.join('') + 'Connection: close\r\n\r\n' + req.data + version := if req.version == .unknown { Version.v1_1 } else { req.version } + return '$method $path $version\r\n' + uheaders.join('') + 'Connection: close\r\n\r\n' + req.data } fn (req &Request) build_request_cookies_header() string { diff --git a/vlib/net/http/version.v b/vlib/net/http/version.v new file mode 100644 index 0000000000..95a13bf2b7 --- /dev/null +++ b/vlib/net/http/version.v @@ -0,0 +1,30 @@ +// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module http + +// The versions listed here are the most common ones. +pub enum Version { + unknown + v1_1 + v2_0 + v1_0 +} + +pub fn (v Version) str() string { + return match v { + .v1_1 { 'HTTP/1.1' } + .v2_0 { 'HTTP/2.0' } + .v1_0 { 'HTTP/1.0' } + .unknown { 'unknown' } + } +} + +pub fn version_from_str(v string) Version { + return match v.to_lower() { + 'http/1.1' { Version.v1_1 } + 'http/2.0' { Version.v2_0 } + 'http/1.0' { Version.v1_0 } + else { Version.unknown } + } +} diff --git a/vlib/vweb/request.v b/vlib/vweb/request.v new file mode 100644 index 0000000000..9485bf7465 --- /dev/null +++ b/vlib/vweb/request.v @@ -0,0 +1,85 @@ +module vweb + +import io +import net.http +import net.urllib + +pub fn parse_request(mut reader io.BufferedReader) ?http.Request { + // request line + mut line := reader.read_line() ? + method, target, version := parse_request_line(line) ? + + // headers + mut headers := map[string][]string{} + line = reader.read_line() ? + for line != '' { + key, values := parse_header(line) ? + headers[key] << values + line = reader.read_line() ? + } + + mut http_headers := map[string]string{} + mut http_lheaders := map[string]string{} + for k, v in headers { + values := v.join('; ') + http_headers[k] = values + http_lheaders[k.to_lower()] = values + } + + // body + mut body := [byte(0)] + if 'content-length' in http_lheaders { + n := http_lheaders['content-length'].int() + body = []byte{len: n, cap: n + 1} + reader.read(mut body) or { } + body << 0 + } + + return http.Request{ + method: method + url: target.str() + headers: http_headers + lheaders: http_lheaders + data: string(body) + version: version + } +} + +fn parse_request_line(s string) ?(http.Method, urllib.URL, http.Version) { + words := s.split(' ') + if words.len != 3 { + return error('malformed request line') + } + method := http.method_from_str(words[0]) + target := urllib.parse(words[1]) ? + version := http.version_from_str(words[2]) + if version == .unknown { + return error('unsupported version') + } + + return method, target, version +} + +fn parse_header(s string) ?(string, []string) { + if ':' !in s { + return error('missing colon in header') + } + words := s.split_nth(':', 2) + if !is_token(words[0]) { + return error('invalid character in header name') + } + // TODO: parse quoted text according to the RFC + return words[0], words[1].trim_left(' \t').split(';').map(it.trim_space()) +} + +// TODO: use map for faster lookup (untested) +const token_chars = r"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#$%&'*+-.^_`|~".bytes() + +fn is_token(s string) bool { + for c in s { + if c !in vweb.token_chars { + return false + } + } + return true +} diff --git a/vlib/vweb/request_test.v b/vlib/vweb/request_test.v new file mode 100644 index 0000000000..130f0b9886 --- /dev/null +++ b/vlib/vweb/request_test.v @@ -0,0 +1,78 @@ +module vweb + +import io + +struct StringReader { + text string +mut: + place int +} + +fn (mut s StringReader) read(mut buf []byte) ?int { + if s.place >= s.text.len { + return none + } + n := copy(buf, s.text[s.place..].bytes()) + s.place += n + return n +} + +fn reader(s string) &io.BufferedReader { + return io.new_buffered_reader(reader: io.make_reader(&StringReader{ text: s })) +} + +fn test_parse_request_not_http() { + parse_request(mut reader('hello')) or { return } + panic('should not have parsed') +} + +fn test_parse_request_no_headers() { + req := parse_request(mut reader('GET / HTTP/1.1\r\n\r\n')) or { panic('did not parse: $err') } + assert req.method == .get + assert req.url == '/' + assert req.version == .v1_1 +} + +fn test_parse_request_two_headers() { + req := parse_request(mut reader('GET / HTTP/1.1\r\nTest1: a\r\nTest2: B\r\n\r\n')) or { + panic('did not parse: $err') + } + assert req.headers == map{ + 'Test1': 'a' + 'Test2': 'B' + } + assert req.lheaders == map{ + 'test1': 'a' + 'test2': 'B' + } +} + +fn test_parse_request_two_header_values() { + req := parse_request(mut reader('GET / HTTP/1.1\r\nTest1: a; b\r\nTest2: c\r\nTest2: d\r\n\r\n')) or { + panic('did not parse: $err') + } + assert req.headers == map{ + 'Test1': 'a; b' + 'Test2': 'c; d' + } + assert req.lheaders == map{ + 'test1': 'a; b' + 'test2': 'c; d' + } +} + +fn test_parse_request_body() { + req := parse_request(mut reader('GET / HTTP/1.1\r\nTest1: a\r\nTest2: b\r\nContent-Length: 4\r\n\r\nbodyabc')) or { + panic('did not parse: $err') + } + assert req.data == 'body' +} + +fn test_parse_request_line() { + method, target, version := parse_request_line('GET /target HTTP/1.1') or { + panic('did not parse: $err') + } + assert method == .get + assert target.str() == '/target' + assert version == .v1_1 +} diff --git a/vlib/vweb/vweb.v b/vlib/vweb/vweb.v index 0265848a70..268169846b 100644 --- a/vlib/vweb/vweb.v +++ b/vlib/vweb/vweb.v @@ -269,12 +269,9 @@ pub fn (mut ctx Context) add_header(key string, val string) { // Returns the header data from the key pub fn (ctx &Context) get_header(key string) string { - return ctx.req.headers[key] + return ctx.req.lheaders[key.to_lower()] } -// fn handle_conn(conn net.Socket) { -// println('handle') -// } pub fn run(port int) { mut app := T{} run_app(mut app, port) @@ -292,31 +289,10 @@ pub fn run_app(mut app T, port int) { // check routes for validity } } - // app.reset() for { mut conn := l.accept() or { panic('accept() failed') } - go handle_conn(mut conn, mut app) - // app.vweb.page_gen_time = time.ticks() - t - // eprintln('handle conn() took ${time.ticks()-t}ms') - // message := readall(conn) - // println(message) - /* - if message.len > max_http_post_size { - println('message.len = $message.len > max_http_post_size') - conn.send_string(http_500) or {} - conn.close() or {} - continue - } - */ - // lines := message.split_into_lines() - // println(lines) - /* - if lines.len < 2 { - conn.send_string(http_500) or {} - conn.close() or {} - continue - } - */ + // TODO: running handle_conn concurrently results in a race-condition + handle_conn(mut conn, mut app) } } @@ -326,98 +302,9 @@ fn handle_conn(mut conn net.TcpConn, mut app T) { defer { conn.close() or { } } - // fn handle_conn(conn net.Socket, app_ T) T { - // mut app := app_ - // first_line := strip(lines[0]) mut reader := io.new_buffered_reader(reader: io.make_reader(conn)) page_gen_start := time.ticks() - first_line := reader.read_line() or { - $if debug { - eprintln('Failed to read first_line') // show this only in debug mode, because it always would be shown after a chromium user visits the site - } - return - } - $if debug { - eprintln('firstline="$first_line"') - } - // Parse the first line - // "GET / HTTP/1.1" - // first_line := s.all_before('\n') - vals := first_line.split(' ') - if vals.len < 2 { - println('no vals for http') - send_string(mut conn, vweb.http_500) or { } - return - } - mut headers := []string{} - mut body := '' - mut in_headers := true - mut len := 0 - // File receive stuff - mut ct := 'text/plain' - mut boundary := '' - // for line in lines[1..] { - for lindex in 0 .. 100 { - // println(j) - line := reader.read_line() or { - println('Failed read_line $lindex') - break - } - sline := strip(line) - // Parse content type - if sline.len >= 14 && sline[..14].to_lower() == 'content-type: ' { - args := sline[14..].split('; ') - ct = args[0] - if args.len > 1 { - boundary = args[1][9..] - } - } - if sline == '' { - // if in_headers { - // End of headers, no body => exit - if len == 0 { - break - } - //} //else { - // End of body - // break - //} - // read body - mut read_body := []byte{len: len} - // read just the amount of content len if there is no content there is nothing more to read here - reader.read(mut read_body) or { println('reader.read failed with err: $err') } - body += read_body.bytestr() - break - } - if ct == 'multipart/form-data' && sline == boundary { - body += boundary - read_body := io.read_all(reader: reader) or { []byte{} } - body += read_body.bytestr() - break - } - if in_headers { - headers << sline - if sline.to_lower().starts_with('content-length') { - len = sline.all_after(': ').int() - // println('GOT CL=$len') - } - } - } - req := http.Request{ - headers: http.parse_headers(headers) // s.split_into_lines()) - data: strip(body) - ws_func: 0 - user_ptr: 0 - method: http.method_from_str(vals[0]) - url: vals[1] - } - $if debug { - println('req.headers = ') - println(req.headers) - println('req.data="$req.data"') - // println('vweb action = "$action"') - } - // mut app := T{ + req := parse_request(mut reader) or { return } app.Context = Context{ req: req conn: conn @@ -426,23 +313,22 @@ fn handle_conn(mut conn net.TcpConn, mut app T) { static_mime_types: app.static_mime_types page_gen_start: page_gen_start } - // } if req.method in vweb.methods_with_form { - if ct == 'multipart/form-data' { - app.parse_multipart_form(body, boundary) + if 'multipart/form-data' in req.lheaders['content-type'].split('; ') { + boundary := req.lheaders['content-type'].split('; ').filter(it.starts_with('boundary ')) + if boundary.len != 1 { + // TODO: send 400 error + return + } + app.parse_multipart_form(req.data, boundary[0][9..]) } else { app.parse_form(req.data) } } - if vals.len < 2 { - $if debug { - println('no vals for http') - } - return - } // Serve a static file if it is one // TODO: handle url parameters properly - for now, ignore them mut static_file_name := app.req.url + // TODO: use urllib methods instead of manually parsing if static_file_name.contains('?') { static_file_name = static_file_name.all_before('?') } @@ -462,12 +348,9 @@ fn handle_conn(mut conn net.TcpConn, mut app T) { $if debug { println('route matching...') } - // t := time.ticks() - // mut action := '' mut route_words_a := [][]string{} - // mut url_words := vals[1][1..].split('/').filter(it != '') - x := vals[1][1..].split('/') - mut url_words := x.filter(it != '') + // TODO: use urllib methods instead of manually parsing + mut url_words := req.url.split('/').filter(it != '') // Parse URL query if url_words.len > 0 && url_words.last().contains('?') { words := url_words.last().after('?').split('&') @@ -754,10 +637,11 @@ pub fn (mut ctx Context) serve_static(url string, file_path string, mime_type st // Returns the ip address from the current user pub fn (ctx &Context) ip() string { - mut ip := ctx.req.headers['X-Forwarded-For'] + mut ip := ctx.req.lheaders['x-forwarded-for'] if ip == '' { - ip = ctx.req.headers['X-Real-IP'] + ip = ctx.req.lheaders['x-real-ip'] } + if ip.contains(',') { ip = ip.all_before(',') } @@ -772,22 +656,6 @@ pub fn (mut ctx Context) error(s string) { ctx.form_error = s } -/* -fn readall(conn net.Socket) string { - // read all message from socket - //printf("waitall=%d\n", C.MSG_WAITALL) - mut message := '' - buf := [1024]byte - for { - n := C.recv(conn.sockfd, buf, 1024, 0) - m := conn.crecv(buf, 1024) - message += unsafe { byteptr(buf).vstring_with_len(m) } - if message.len > max_http_post_size { break } - if n == m { break } - } - return message -} -*/ fn strip(s string) string { // strip('\nabc\r\n') => 'abc' return s.trim('\r\n')