From 0acb84d5a59276207657df64d349b4189a2e8f6d Mon Sep 17 00:00:00 2001 From: Miccah Date: Sat, 24 Jul 2021 03:31:33 -0500 Subject: [PATCH] net: add parse_headers function and handle header line folding (#10936) Closes https://github.com/vlang/v/issues/10930 --- vlib/net/http/header.v | 39 ++++++++++++++++++++++++++++++ vlib/net/http/header_test.v | 42 ++++++++++++++++++++++++++++++--- vlib/net/http/request.v | 9 ------- vlib/net/http/response.v | 47 +++++++++++++++++++------------------ 4 files changed, 102 insertions(+), 35 deletions(-) diff --git a/vlib/net/http/header.v b/vlib/net/http/header.v index e7804bc6d5..e96563ec5e 100644 --- a/vlib/net/http/header.v +++ b/vlib/net/http/header.v @@ -644,6 +644,14 @@ fn is_valid(header string) ? { }) } } + if header.len == 0 { + return IError(HeaderKeyError{ + msg: "Invalid header key: '$header'" + code: 2 + header: header + invalid_char: 0 + }) + } } // is_token checks if the byte is valid for a header token @@ -659,3 +667,34 @@ fn is_token(b byte) bool { pub fn (h Header) str() string { return h.render(version: .v1_1) } + +// parse_headers parses a newline delimited string into a Header struct +fn parse_headers(s string) ?Header { + mut h := new_header() + mut last_key := '' + mut last_value := '' + for line in s.split_into_lines() { + if line.len == 0 { + break + } + // handle header fold + if line[0] == ` ` || line[0] == `\t` { + last_value += ' ${line.trim(' \t')}' + continue + } else if last_key != '' { + h.add_custom(last_key, last_value) ? + } + last_key, last_value = parse_header(line) ? + } + h.add_custom(last_key, last_value) ? + return h +} + +fn parse_header(s string) ?(string, string) { + if !s.contains(':') { + return error('missing colon in header') + } + words := s.split_nth(':', 2) + // TODO: parse quoted text according to the RFC + return words[0], words[1].trim(' \t') +} diff --git a/vlib/net/http/header_test.v b/vlib/net/http/header_test.v index 573e58a0e2..3740d8afcd 100644 --- a/vlib/net/http/header_test.v +++ b/vlib/net/http/header_test.v @@ -1,7 +1,7 @@ module http fn test_header_new() { - h := new_header({ key: .accept, value: 'nothing' }, + h := new_header(HeaderConfig{ key: .accept, value: 'nothing' }, key: .expires value: 'yesterday' ) @@ -37,7 +37,7 @@ fn test_header_get() ? { } fn test_header_set() ? { - mut h := new_header({ key: .dnt, value: 'one' }, + mut h := new_header(HeaderConfig{ key: .dnt, value: 'one' }, key: .dnt value: 'two' ) @@ -47,7 +47,7 @@ fn test_header_set() ? { } fn test_header_delete() { - mut h := new_header({ key: .dnt, value: 'one' }, + mut h := new_header(HeaderConfig{ key: .dnt, value: 'one' }, key: .dnt value: 'two' ) @@ -323,3 +323,39 @@ fn test_header_join() ? { assert h3.contains_custom('Server') assert h3.contains_custom('foo') } + +fn parse_headers_test(s string, expected map[string]string) ? { + assert parse_headers(s) ? == new_custom_header_from_map(expected) ? +} + +fn test_parse_headers() ? { + parse_headers_test('foo: bar', map{ + 'foo': 'bar' + }) ? + parse_headers_test('foo: \t bar', map{ + 'foo': 'bar' + }) ? + parse_headers_test('foo: bar\r\n\tbaz', map{ + 'foo': 'bar baz' + }) ? + parse_headers_test('foo: bar \r\n\tbaz\r\n buzz', map{ + 'foo': 'bar baz buzz' + }) ? + parse_headers_test('foo: bar\r\nbar:baz', map{ + 'foo': 'bar' + 'bar': 'baz' + }) ? + parse_headers_test('foo: bar\r\nbar:baz\r\n', map{ + 'foo': 'bar' + 'bar': 'baz' + }) ? + parse_headers_test('foo: bar\r\nbar:baz\r\n\r\n', map{ + 'foo': 'bar' + 'bar': 'baz' + }) ? + assert parse_headers('foo: bar\r\nfoo:baz') ?.custom_values('foo') == ['bar', 'baz'] + + if x := parse_headers(' oops: oh no') { + return error('should have errored, but got $x') + } +} diff --git a/vlib/net/http/request.v b/vlib/net/http/request.v index 4cd2034a88..453b30b98b 100644 --- a/vlib/net/http/request.v +++ b/vlib/net/http/request.v @@ -217,15 +217,6 @@ fn parse_request_line(s string) ?(Method, urllib.URL, Version) { return method, target, version } -fn parse_header(s string) ?(string, string) { - if !s.contains(':') { - return error('missing colon in header') - } - words := s.split_nth(':', 2) - // TODO: parse quoted text according to the RFC - return words[0], words[1].trim_left(' \t') -} - // Parse URL encoded key=value&key=value forms fn parse_form(body string) map[string]string { words := body.split('&') diff --git a/vlib/net/http/response.v b/vlib/net/http/response.v index 31517827c8..48543d0ddf 100644 --- a/vlib/net/http/response.v +++ b/vlib/net/http/response.v @@ -34,8 +34,7 @@ pub fn (resp Response) bytestr() string { } // Parse a raw HTTP response into a Response object -pub fn parse_response(resp string) Response { - mut header := new_header() +pub fn parse_response(resp string) ?Response { // TODO: Cookie data type mut cookies := map[string]string{} first_header := resp.all_before('\n') @@ -44,28 +43,10 @@ pub fn parse_response(resp string) Response { val := first_header.find_between(' ', ' ') status_code = val.int() } - mut text := '' // Build resp header map and separate the body - mut nl_pos := 3 - mut i := 1 - for { - old_pos := nl_pos - nl_pos = resp.index_after('\n', nl_pos + 1) - if nl_pos == -1 { - break - } - h := resp[old_pos + 1..nl_pos] - // End of headers - if h.len <= 1 { - text = resp[nl_pos + 1..] - break - } - i++ - pos := h.index(':') or { continue } - mut key := h[..pos] - val := h[pos + 2..].trim_space() - header.add_custom(key, val) or { eprintln('$err; skipping header') } - } + start_idx, end_idx := find_headers_range(resp) ? + header := parse_headers(resp.substr(start_idx, end_idx)) ? + mut text := resp.substr(end_idx, resp.len) // set cookies for cookie in header.values(.set_cookie) { parts := cookie.split_nth('=', 2) @@ -81,3 +62,23 @@ pub fn parse_response(resp string) Response { text: text } } + +// find_headers_range returns the start (inclusive) and end (exclusive) +// index of the headers in the string, including the trailing newlines. This +// helper function expects the first line in `data` to be the HTTP status line +// (HTTP/1.1 200 OK). +fn find_headers_range(data string) ?(int, int) { + start_idx := data.index('\n') or { return error('no start index found') } + 1 + mut count := 0 + for i := start_idx; i < data.len; i++ { + if data[i] == `\n` { + count++ + } else if data[i] != `\r` { + count = 0 + } + if count == 2 { + return start_idx, i + 1 + } + } + return error('no end index found') +}