net: add parse_headers function and handle header line folding (#10936)

Closes https://github.com/vlang/v/issues/10930
pull/10935/head
Miccah 2021-07-24 03:31:33 -05:00 committed by GitHub
parent 304f26edeb
commit 0acb84d5a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 35 deletions

View File

@ -644,6 +644,14 @@ fn is_valid(header string) ? {
}) })
} }
} }
if header.len == 0 {
return IError(HeaderKeyError{
msg: "Invalid header key: '$header'"
code: 2
header: header
invalid_char: 0
})
}
} }
// is_token checks if the byte is valid for a header token // is_token checks if the byte is valid for a header token
@ -659,3 +667,34 @@ fn is_token(b byte) bool {
pub fn (h Header) str() string { pub fn (h Header) str() string {
return h.render(version: .v1_1) return h.render(version: .v1_1)
} }
// parse_headers parses a newline delimited string into a Header struct
fn parse_headers(s string) ?Header {
mut h := new_header()
mut last_key := ''
mut last_value := ''
for line in s.split_into_lines() {
if line.len == 0 {
break
}
// handle header fold
if line[0] == ` ` || line[0] == `\t` {
last_value += ' ${line.trim(' \t')}'
continue
} else if last_key != '' {
h.add_custom(last_key, last_value) ?
}
last_key, last_value = parse_header(line) ?
}
h.add_custom(last_key, last_value) ?
return h
}
fn parse_header(s string) ?(string, string) {
if !s.contains(':') {
return error('missing colon in header')
}
words := s.split_nth(':', 2)
// TODO: parse quoted text according to the RFC
return words[0], words[1].trim(' \t')
}

View File

@ -1,7 +1,7 @@
module http module http
fn test_header_new() { fn test_header_new() {
h := new_header({ key: .accept, value: 'nothing' }, h := new_header(HeaderConfig{ key: .accept, value: 'nothing' },
key: .expires key: .expires
value: 'yesterday' value: 'yesterday'
) )
@ -37,7 +37,7 @@ fn test_header_get() ? {
} }
fn test_header_set() ? { fn test_header_set() ? {
mut h := new_header({ key: .dnt, value: 'one' }, mut h := new_header(HeaderConfig{ key: .dnt, value: 'one' },
key: .dnt key: .dnt
value: 'two' value: 'two'
) )
@ -47,7 +47,7 @@ fn test_header_set() ? {
} }
fn test_header_delete() { fn test_header_delete() {
mut h := new_header({ key: .dnt, value: 'one' }, mut h := new_header(HeaderConfig{ key: .dnt, value: 'one' },
key: .dnt key: .dnt
value: 'two' value: 'two'
) )
@ -323,3 +323,39 @@ fn test_header_join() ? {
assert h3.contains_custom('Server') assert h3.contains_custom('Server')
assert h3.contains_custom('foo') assert h3.contains_custom('foo')
} }
fn parse_headers_test(s string, expected map[string]string) ? {
assert parse_headers(s) ? == new_custom_header_from_map(expected) ?
}
fn test_parse_headers() ? {
parse_headers_test('foo: bar', map{
'foo': 'bar'
}) ?
parse_headers_test('foo: \t bar', map{
'foo': 'bar'
}) ?
parse_headers_test('foo: bar\r\n\tbaz', map{
'foo': 'bar baz'
}) ?
parse_headers_test('foo: bar \r\n\tbaz\r\n buzz', map{
'foo': 'bar baz buzz'
}) ?
parse_headers_test('foo: bar\r\nbar:baz', map{
'foo': 'bar'
'bar': 'baz'
}) ?
parse_headers_test('foo: bar\r\nbar:baz\r\n', map{
'foo': 'bar'
'bar': 'baz'
}) ?
parse_headers_test('foo: bar\r\nbar:baz\r\n\r\n', map{
'foo': 'bar'
'bar': 'baz'
}) ?
assert parse_headers('foo: bar\r\nfoo:baz') ?.custom_values('foo') == ['bar', 'baz']
if x := parse_headers(' oops: oh no') {
return error('should have errored, but got $x')
}
}

View File

@ -217,15 +217,6 @@ fn parse_request_line(s string) ?(Method, urllib.URL, Version) {
return method, target, version return method, target, version
} }
fn parse_header(s string) ?(string, string) {
if !s.contains(':') {
return error('missing colon in header')
}
words := s.split_nth(':', 2)
// TODO: parse quoted text according to the RFC
return words[0], words[1].trim_left(' \t')
}
// Parse URL encoded key=value&key=value forms // Parse URL encoded key=value&key=value forms
fn parse_form(body string) map[string]string { fn parse_form(body string) map[string]string {
words := body.split('&') words := body.split('&')

View File

@ -34,8 +34,7 @@ pub fn (resp Response) bytestr() string {
} }
// Parse a raw HTTP response into a Response object // Parse a raw HTTP response into a Response object
pub fn parse_response(resp string) Response { pub fn parse_response(resp string) ?Response {
mut header := new_header()
// TODO: Cookie data type // TODO: Cookie data type
mut cookies := map[string]string{} mut cookies := map[string]string{}
first_header := resp.all_before('\n') first_header := resp.all_before('\n')
@ -44,28 +43,10 @@ pub fn parse_response(resp string) Response {
val := first_header.find_between(' ', ' ') val := first_header.find_between(' ', ' ')
status_code = val.int() status_code = val.int()
} }
mut text := ''
// Build resp header map and separate the body // Build resp header map and separate the body
mut nl_pos := 3 start_idx, end_idx := find_headers_range(resp) ?
mut i := 1 header := parse_headers(resp.substr(start_idx, end_idx)) ?
for { mut text := resp.substr(end_idx, resp.len)
old_pos := nl_pos
nl_pos = resp.index_after('\n', nl_pos + 1)
if nl_pos == -1 {
break
}
h := resp[old_pos + 1..nl_pos]
// End of headers
if h.len <= 1 {
text = resp[nl_pos + 1..]
break
}
i++
pos := h.index(':') or { continue }
mut key := h[..pos]
val := h[pos + 2..].trim_space()
header.add_custom(key, val) or { eprintln('$err; skipping header') }
}
// set cookies // set cookies
for cookie in header.values(.set_cookie) { for cookie in header.values(.set_cookie) {
parts := cookie.split_nth('=', 2) parts := cookie.split_nth('=', 2)
@ -81,3 +62,23 @@ pub fn parse_response(resp string) Response {
text: text text: text
} }
} }
// find_headers_range returns the start (inclusive) and end (exclusive)
// index of the headers in the string, including the trailing newlines. This
// helper function expects the first line in `data` to be the HTTP status line
// (HTTP/1.1 200 OK).
fn find_headers_range(data string) ?(int, int) {
start_idx := data.index('\n') or { return error('no start index found') } + 1
mut count := 0
for i := start_idx; i < data.len; i++ {
if data[i] == `\n` {
count++
} else if data[i] != `\r` {
count = 0
}
if count == 2 {
return start_idx, i + 1
}
}
return error('no end index found')
}