From 553ecf63e73b26bad664d66f59267081fb988294 Mon Sep 17 00:00:00 2001 From: Emily Hudson Date: Wed, 16 Dec 2020 17:22:26 +0000 Subject: [PATCH] vlib/io: fix reader bugs, make read_all take a config struct (#7361) --- examples/net_raw_http.v | 2 +- vlib/builtin/array.v | 10 ++- vlib/io/buffered_reader.v | 104 ++++++++++++++++++------------- vlib/io/reader.v | 50 +++++++++++---- vlib/io/reader_test.v | 118 ++++++++++++++++++++++++++++++++---- vlib/io/readerwriter.v | 10 +-- vlib/net/http/http.v | 2 +- vlib/net/smtp/smtp.v | 2 +- vlib/vweb/tests/vweb_test.v | 2 +- vlib/vweb/vweb.v | 2 +- 10 files changed, 227 insertions(+), 75 deletions(-) diff --git a/examples/net_raw_http.v b/examples/net_raw_http.v index 4ec25fd3c6..4acff57c7e 100644 --- a/examples/net_raw_http.v +++ b/examples/net_raw_http.v @@ -8,7 +8,7 @@ fn main() { // Simple http HEAD request for a file conn.write_str('HEAD /index.html HTTP/1.0\r\n\r\n')? // Read all the data that is waiting - result := io.read_all(conn)? + result := io.read_all(reader: conn)? // Cast to string and print result println(result.bytestr()) } diff --git a/vlib/builtin/array.v b/vlib/builtin/array.v index 5054a121db..a3844df3e3 100644 --- a/vlib/builtin/array.v +++ b/vlib/builtin/array.v @@ -649,9 +649,15 @@ pub fn (a []int) reduce(iter fn (int, int) int, accum_start int) int { return accum_ } -// grow grows the array's capacity by `amount` elements. -pub fn (mut a array) grow(amount int) { +// grow_cap grows the array's capacity by `amount` elements. +pub fn (mut a array) grow_cap(amount int) { + a.ensure_cap(a.cap + amount) +} + +// grow_len ensures that an array has a.len + amount of length +pub fn (mut a array) grow_len(amount int) { a.ensure_cap(a.len + amount) + a.len += amount } // array_eq checks if two arrays contain all the same elements in the same order. diff --git a/vlib/io/buffered_reader.v b/vlib/io/buffered_reader.v index 2652dac130..6cb0f7238e 100644 --- a/vlib/io/buffered_reader.v +++ b/vlib/io/buffered_reader.v @@ -3,27 +3,28 @@ module io // BufferedReader provides a buffered interface for a reader struct BufferedReader { mut: - reader Reader - buf []byte + reader Reader + buf []byte // current offset in the buffer - offset int - len int + offset int + len int + // Whether we reached the end of the upstream reader + end_of_stream bool } // BufferedReaderConfig are options that can be given to a reader pub struct BufferedReaderConfig { - reader Reader - buf_cap int = 128 * 1024 // large for fast reading of big(ish) files + reader Reader + cap int = 128 * 1024 // large for fast reading of big(ish) files } // new_buffered_reader creates new BufferedReader pub fn new_buffered_reader(o BufferedReaderConfig) &BufferedReader { - assert o.buf_cap >= 2 - + assert o.cap >= 2 // create r := &BufferedReader{ reader: o.reader - buf: []byte{len: o.buf_cap, cap: o.buf_cap} + buf: []byte{len: o.cap, cap: o.cap} offset: 0 } return r @@ -31,46 +32,70 @@ pub fn new_buffered_reader(o BufferedReaderConfig) &BufferedReader { // read fufills the Reader interface pub fn (mut r BufferedReader) read(mut buf []byte) ?int { - // read data out of the buffer if we dont have any - if r.offset >= r.len-1 { - r.fill_buffer()? + if r.end_of_stream { + return none + } + // read data out of the buffer if we dont have any + if r.needs_fill() { + if !r.fill_buffer() { + // end of stream + return none + } } - read := copy(buf, r.buf[r.offset..r.len]) r.offset += read - return read } -// fill buffer attempts to refill the internal buffer -fn (mut r BufferedReader) fill_buffer() ? { - // TODO we should keep track of when we get an end of stream - // from the upstream reader so that we dont have to keep - // trying to call this - r.offset = 0 - new_len := r.reader.read(mut r.buf) or { - if errcode != 0 || err.len != 0 { - eprintln('>> BufferedReader.reader.read err: $err | errcode: $errcode') - } - 0 +// fill_buffer attempts to refill the internal buffer +// and returns whether it got any data +fn (mut r BufferedReader) fill_buffer() bool { + if r.end_of_stream { + // we know we have already reached the end of stream + // so return early + return false } - r.len = new_len + r.offset = 0 + r.len = 0 + r.len = r.reader.read(mut r.buf) or { + // end of stream was reached + r.end_of_stream = true + return false + } + // we got some data + return true } -// read_line reads a line from the buffered reader +// needs_fill returns whether the buffer needs refilling +fn (r BufferedReader) needs_fill() bool { + return r.offset >= r.len - 1 +} + +// end_of_stream returns whether the end of the stream was reached +pub fn (r BufferedReader) end_of_stream() bool { + return r.end_of_stream +} + +// read_line attempts to read a line from the buffered reader +// it will read until it finds a new line character (\n) or +// the end of stream pub fn (mut r BufferedReader) read_line() ?string { + if r.end_of_stream { + return none + } mut line := []byte{} for { - if r.offset >= (r.len-1) { + if r.needs_fill() { // go fetch some new data - r.fill_buffer()? + if !r.fill_buffer() { + // We are at the end of the stream + if line.len == 0 { + // we had nothing so return nothing + return none + } + return line.bytestr() + } } - - if r.len == 0 { - // if we have no data then return nothing - return none - } - // try and find a newline character mut i := r.offset for ; i < r.len; i++ { @@ -78,22 +103,17 @@ pub fn (mut r BufferedReader) read_line() ?string { if c == `\n` { // great, we hit something // do some checking for whether we hit \r\n or just \n - - if i != 0 && r.buf[i-1] == `\r` { - x := i-1 + if i != 0 && r.buf[i - 1] == `\r` { + x := i - 1 line << r.buf[r.offset..x] } else { line << r.buf[r.offset..i] } - r.offset = i + 1 - return line.bytestr() } } - line << r.buf[r.offset..i] - r.offset = i } } diff --git a/vlib/io/reader.v b/vlib/io/reader.v index 99ea10b1a2..ccbbb22e5c 100644 --- a/vlib/io/reader.v +++ b/vlib/io/reader.v @@ -4,30 +4,58 @@ module io pub interface Reader { // read reads up to buf.len bytes and places // them into buf. - // A struct that implements this should return + // A type that implements this should return // `none` on end of stream (EOF) instead of just returning 0 read(mut buf []byte) ?int } -// make_reader is a temp that converts a struct to a reader +// make_reader is a temp that converts a type to a reader // (e.g. for use in struct initialisation) +// (this shouldnt need to be a thing but until coercion gets made better +// it is required) pub fn make_reader(r Reader) Reader { return r } -pub const ( - eof_code = -1 - eof = error_with_code('EOF', eof_code) -) - const ( read_all_len = 10 * 1024 read_all_grow_len = 1024 ) -// read_all reads all available bytes from a reader -pub fn read_all(r Reader) ?[]byte { - mut b := []byte{len:read_all_len} +// ReadAllConfig allows options to be passed for the behaviour +// of read_all +pub struct ReadAllConfig { + reader Reader + read_to_end_of_stream bool +} + +// read_all reads all bytes from a reader until either a 0 length read +// or if read_to_end_of_stream is true then the end of the stream (`none`) +pub fn read_all(config ReadAllConfig) ?[]byte { + r := config.reader + read_till_eof := config.read_to_end_of_stream + + mut b := []byte{len: read_all_len} + mut read := 0 + for { + new_read := r.read(mut b[read..]) or { + break + } + read += new_read + if !read_till_eof && read == 0 { + break + } + if b.len == read { + b.grow_len(read_all_grow_len) + } + } + return b[..read] +} + +// read_any reads any available bytes from a reader +// (until the reader returns a read of 0 length) +pub fn read_any(r Reader) ?[]byte { + mut b := []byte{len: read_all_len} mut read := 0 for { new_read := r.read(mut b[read..]) or { @@ -38,7 +66,7 @@ pub fn read_all(r Reader) ?[]byte { break } if b.len == read { - b.grow(read_all_grow_len) + b.grow_len(read_all_grow_len) } } return b[..read] diff --git a/vlib/io/reader_test.v b/vlib/io/reader_test.v index 7667286683..9702950402 100644 --- a/vlib/io/reader_test.v +++ b/vlib/io/reader_test.v @@ -9,9 +9,9 @@ mut: fn (mut b Buf) read(mut buf []byte) ?int { if !(b.i < b.bytes.len) { - return eof + return none } - n := copy(buf, b.bytes[b.i..b.bytes.len]) + n := copy(buf, b.bytes[b.i..]) b.i += n return n } @@ -20,21 +20,117 @@ fn test_read_all() { buf := Buf{ bytes: '123'.repeat(10).bytes() } - res := read_all(buf) or { + res := read_all(reader: buf) or { assert false ''.bytes() } assert res == '123'.repeat(10).bytes() } -/* -TODO: This test failed by a bug of read_all fn test_read_all_huge() { - buf := Buf{bytes: "123".repeat(100000).bytes()} - res := read_all(buf) or { - assert false - "".bytes() + buf := Buf{ + bytes: '123'.repeat(100000).bytes() } - assert res == "123".repeat(100000).bytes() + res := read_all(reader: buf) or { + assert false + ''.bytes() + } + assert res == '123'.repeat(100000).bytes() +} + +struct StringReader { + text string +mut: + place int +} + +fn (mut s StringReader) read(mut buf []byte) ?int { + if s.place >= s.text.len { + return none + } + read := copy(buf, s.text[s.place..].bytes()) + s.place += read + return read +} + +const ( + newline_count = 100000 +) + +fn test_stringreader() { + text := '12345\n'.repeat(newline_count) + mut s := StringReader{ + text: text + } + mut r := new_buffered_reader({ + reader: make_reader(s) + }) + for i := 0; true; i++ { + if _ := r.read_line() { + } else { + assert i == newline_count + break + } + } + if _ := r.read_line() { + assert false + } + leftover := read_all(reader: r) or { + assert false + panic('bad') + } + if leftover.len > 0 { + assert false + } +} + +fn test_stringreader2() { + text := '12345\r\n'.repeat(newline_count) + mut s := StringReader{ + text: text + } + mut r := new_buffered_reader({ + reader: make_reader(s) + }) + for i := 0; true; i++ { + if _ := r.read_line() { + } else { + assert i == newline_count + break + } + } + if _ := r.read_line() { + assert false + } + leftover := read_all(reader: io.make_reader(r)) or { + assert false + panic('bad') + } + if leftover.len > 0 { + assert false + } +} + +fn test_leftover() { + text := 'This is a test\r\nNice try!' + mut s := StringReader{ + text: text + } + mut r := new_buffered_reader({ + reader: make_reader(s) + }) + _ := r.read_line() or { + assert false + panic('bad') + } + line2 := r.read_line() or { + assert false + panic('bad') + } + assert line2 == 'Nice try!' + if _ := r.read_line() { + assert false + panic('bad') + } + assert r.end_of_stream() } -*/ diff --git a/vlib/io/readerwriter.v b/vlib/io/readerwriter.v index 31965eb85a..c7c4ae62f1 100644 --- a/vlib/io/readerwriter.v +++ b/vlib/io/readerwriter.v @@ -4,12 +4,11 @@ module io pub interface ReaderWriter { // from Reader read(mut buf []byte) ?int - - // from Writer + // from Writer write(buf []byte) ?int } -// ReaderWriterImpl is a ReaderWriter that can be made from +// ReaderWriterImpl is a ReaderWriter that can be made from // a seperate reader and writer (see fn make_readerwriter) struct ReaderWriterImpl { r Reader @@ -27,7 +26,10 @@ pub fn (mut r ReaderWriterImpl) write(buf []byte) ?int { // make_readerwriter takes a rstream and a wstream and makes // an rwstream with them pub fn make_readerwriter(r Reader, w Writer) ReaderWriterImpl { - return {r: r, w: w} + return { + r: r + w: w + } } struct Zzz_CoerceInterfaceTableGeneration { diff --git a/vlib/net/http/http.v b/vlib/net/http/http.v index 9c5210e0d9..0b118975c6 100644 --- a/vlib/net/http/http.v +++ b/vlib/net/http/http.v @@ -398,7 +398,7 @@ fn (req &Request) http_do(host string, method Method, path string) ?Response { mut client := net.dial_tcp(host)? // TODO this really needs to be exposed somehow client.write(s.bytes())? - mut bytes := io.read_all(client)? + mut bytes := io.read_all(reader: client)? client.close() return parse_response(bytes.bytestr()) } diff --git a/vlib/net/smtp/smtp.v b/vlib/net/smtp/smtp.v index 9953039220..afa592d3ae 100644 --- a/vlib/net/smtp/smtp.v +++ b/vlib/net/smtp/smtp.v @@ -95,7 +95,7 @@ pub fn (mut c Client) quit() ? { // expect_reply checks if the SMTP server replied with the expected reply code fn (c Client) expect_reply(expected ReplyCode) ? { - bytes := io.read_all(c.conn)? + bytes := io.read_all(reader: c.conn)? str := bytes.bytestr().trim_space() $if smtp_debug? { diff --git a/vlib/vweb/tests/vweb_test.v b/vlib/vweb/tests/vweb_test.v index d88107f8ba..8c6699546a 100644 --- a/vlib/vweb/tests/vweb_test.v +++ b/vlib/vweb/tests/vweb_test.v @@ -259,7 +259,7 @@ $config.content' eprintln('sending:\n$message') } client.write(message.bytes()) ? - read := io.read_all(client) ? + read := io.read_all(reader: client) ? $if debug_net_socket_client ? { eprintln('received:\n$read') } diff --git a/vlib/vweb/vweb.v b/vlib/vweb/vweb.v index 54bf89f112..e855b4cee8 100644 --- a/vlib/vweb/vweb.v +++ b/vlib/vweb/vweb.v @@ -285,7 +285,7 @@ fn handle_conn(mut conn net.TcpConn, mut app T) { //} // read body - read_body := io.read_all(reader) or { []byte{} } + read_body := io.read_all(reader: reader) or { []byte{} } body += read_body.bytestr() break