v/vlib/encoding/csv/reader.v

// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.

module csv

// Once interfaces are further along the idea would be to have something similar to
// go's io.reader & bufio.reader rather than reading the whole file into string, this
// would then satisfy that interface. I designed it this way to be easily adapted.

const (
	err_comment_is_delim = error('encoding.csv: comment cannot be the same as delimiter')
	err_invalid_delim    = error('encoding.csv: invalid delimiter')
	err_eof              = error('encoding.csv: end of file')
	err_invalid_le       = error('encoding.csv: could not find any valid line endings')
)


struct Reader  {
	// not used yet
	// has_header        bool
	// headings          []string
	data              string
pub mut:
	delimiter         byte
	comment           byte
	is_mac_pre_osx_le bool
	row_pos           int
}

pub fn new_reader(data string) &Reader {
	return &Reader{
		delimiter: `,`,
		comment: `#`,
		data: data
	}
}

// read() reads one row from the csv file
pub fn (r mut Reader) read() ?[]string {
	l := r.read_record() or {
		return error(err)
	}
	return l
}

// Once we have multi dimensional array
// pub fn (r mut Reader) read_all() ?[][]string {
// 	mut records := []string
// 	for {
// 		record := r.read_record() or {
// 			if error(err).error == err_eof.error {
// 				return records
// 			} else {
// 				return error(err)
// 			}
// 		}
// 		records << record
// 	}
// 	return records
// }

fn (r mut Reader) read_line() ?string {
	// last record
	if r.row_pos == r.data.len {
		return err_eof
	}
	le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
	mut i := r.data.index_after(le, r.row_pos)
	if i == -1 {
		if r.row_pos == 0 {
			// check for pre osx mac line endings
			i = r.data.index_after('\r', r.row_pos)
			if i != -1 {
				r.is_mac_pre_osx_le = true
			} else {
				// no valid line endings found
				return err_invalid_le
			}
		} else {
			// No line ending on file
			i = r.data.len-1
		}
	}
	mut line := r.data[r.row_pos..i]
	r.row_pos = i+1
	// normalize win line endings (remove extra \r)
	if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len-1] == `\r`) {
		line = line[..line.len-1]
	}
	return line
}

fn (r mut Reader) read_record() ?[]string {
	if r.delimiter == r.comment {
		return err_comment_is_delim
	}
	if !valid_delim(r.delimiter) {
		return err_invalid_delim
	}
	mut line := ''
	for {
		l := r.read_line() or {
			return error(err)
		}
		line = l
		// skip commented lines
		if line[0] == r.comment {
			continue
		}
		break
	}
	mut fields := []string
	mut i := -1
	for {
		// not quoted
		if line[0] != `"` {
			// QTODO i = ...
			j := line.index(r.delimiter.str()) or {
				// last
				fields << line[..line.len]
				break
			}
			i = j
			fields << line[..i]
			line = line[i+1..]
			continue
		}
		// quoted
		else {
			line = line[1..]
			if j := line.index('"') {
				if j+1 == line.len {
					// last record
					fields << line[..j]
					break
				}
				next := line[j+1]
				if next == r.delimiter {
					fields << line[..j]
					line = line[j..]
					continue
				}
			}
			line = line[1..]
		}
		if i <= -1 && fields.len == 0 {
			return err_invalid_delim
		}
	}
	return fields
}

fn valid_delim(b byte) bool {
	return b != 0 &&
		   b != `"` &&
		   b != `\r` &&
		   b != `\n`
}
update copyright years 2020-02-03 05:00:36 +01:00			`// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.`
encoding.csv module 2019-08-14 08:45:56 +02:00			`// Use of this source code is governed by an MIT license`
			`// that can be found in the LICENSE file.`

encoding.csv: add write support 2019-08-17 14:51:20 +02:00			`module csv`

encoding.csv module 2019-08-14 08:45:56 +02:00			`// Once interfaces are further along the idea would be to have something similar to`
			`// go's io.reader & bufio.reader rather than reading the whole file into string, this`
			`// would then satisfy that interface. I designed it this way to be easily adapted.`

			`const (`
			`err_comment_is_delim = error('encoding.csv: comment cannot be the same as delimiter')`
			`err_invalid_delim = error('encoding.csv: invalid delimiter')`
			`err_eof = error('encoding.csv: end of file')`
compiler/vlib: add error for no new vars in loop ("_,_") & remove "." from errors 2019-09-25 16:59:50 +02:00			`err_invalid_le = error('encoding.csv: could not find any valid line endings')`
encoding.csv module 2019-08-14 08:45:56 +02:00			`)`


			`struct Reader {`
			`// not used yet`
			`// has_header bool`
			`// headings []string`
			`data string`
access modifiers: update tests/examples 2019-12-13 18:09:11 +01:00			`pub mut:`
encoding.csv module 2019-08-14 08:45:56 +02:00			`delimiter byte`
			`comment byte`
			`is_mac_pre_osx_le bool`
			`row_pos int`
			`}`

fix bugs breaking tests 2019-09-03 13:57:04 +02:00			`pub fn new_reader(data string) &Reader {`
encoding.csv module 2019-08-14 08:45:56 +02:00			`return &Reader{`
			delimiter: `,`,
			comment: `#`,
			`data: data`
			`}`
			`}`

			`// read() reads one row from the csv file`
			`pub fn (r mut Reader) read() ?[]string {`
			`l := r.read_record() or {`
			`return error(err)`
			`}`
			`return l`
			`}`

			`// Once we have multi dimensional array`
			`// pub fn (r mut Reader) read_all() ?[][]string {`
			`// mut records := []string`
			`// for {`
			`// record := r.read_record() or {`
			`// if error(err).error == err_eof.error {`
			`// return records`
			`// } else {`
			`// return error(err)`
			`// }`
			`// }`
			`// records << record`
			`// }`
			`// return records`
			`// }`

			`fn (r mut Reader) read_line() ?string {`
			`// last record`
			`if r.row_pos == r.data.len {`
			`return err_eof`
			`}`
			`le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }`
			`mut i := r.data.index_after(le, r.row_pos)`
			`if i == -1 {`
			`if r.row_pos == 0 {`
			`// check for pre osx mac line endings`
			`i = r.data.index_after('\r', r.row_pos)`
			`if i != -1 {`
			`r.is_mac_pre_osx_le = true`
			`} else {`
			`// no valid line endings found`
			`return err_invalid_le`
			`}`
csv: handle missing line ending 2020-04-21 00:02:55 +02:00			`} else {`
			`// No line ending on file`
			`i = r.data.len-1`
encoding.csv module 2019-08-14 08:45:56 +02:00			`}`
			`}`
compiler/vlib: replace substr/left/right with `[start..end]` everywhere 2019-10-27 08:03:15 +01:00			`mut line := r.data[r.row_pos..i]`
encoding.csv module 2019-08-14 08:45:56 +02:00			`r.row_pos = i+1`
			`// normalize win line endings (remove extra \r)`
			if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len-1] == `\r`) {
compiler/vlib: replace substr/left/right with `[start..end]` everywhere 2019-10-27 08:03:15 +01:00			`line = line[..line.len-1]`
encoding.csv module 2019-08-14 08:45:56 +02:00			`}`
			`return line`
			`}`

			`fn (r mut Reader) read_record() ?[]string {`
			`if r.delimiter == r.comment {`
			`return err_comment_is_delim`
			`}`
encoding.csv: add write support 2019-08-17 14:51:20 +02:00			`if !valid_delim(r.delimiter) {`
			`return err_invalid_delim`
			`}`
encoding.csv module 2019-08-14 08:45:56 +02:00			`mut line := ''`
			`for {`
			`l := r.read_line() or {`
			`return error(err)`
			`}`
			`line = l`
			`// skip commented lines`
			`if line[0] == r.comment {`
			`continue`
			`}`
			`break`
			`}`
			`mut fields := []string`
			`mut i := -1`
			`for {`
			`// not quoted`
			if line[0] != `"` {
cgen: fix returning optional consts; fix csv test 2020-04-08 17:21:36 +02:00			`// QTODO i = ...`
			`j := line.index(r.delimiter.str()) or {`
encoding.csv module 2019-08-14 08:45:56 +02:00			`// last`
csv: fix missing last column 2020-04-20 21:49:05 +02:00			`fields << line[..line.len]`
encoding.csv module 2019-08-14 08:45:56 +02:00			`break`
			`}`
cgen: fix returning optional consts; fix csv test 2020-04-08 17:21:36 +02:00			`i = j`
compiler/vlib: replace substr/left/right with `[start..end]` everywhere 2019-10-27 08:03:15 +01:00			`fields << line[..i]`
			`line = line[i+1..]`
encoding.csv module 2019-08-14 08:45:56 +02:00			`continue`
			`}`
			`// quoted`
			`else {`
compiler/vlib: replace substr/left/right with `[start..end]` everywhere 2019-10-27 08:03:15 +01:00			`line = line[1..]`
fix csv test 2019-12-07 23:03:35 +01:00			`if j := line.index('"') {`
			`if j+1 == line.len {`
encoding.csv: add write support 2019-08-17 14:51:20 +02:00			`// last record`
fix csv test 2019-12-07 23:03:35 +01:00			`fields << line[..j]`
encoding.csv: add write support 2019-08-17 14:51:20 +02:00			`break`
			`}`
fix csv test 2019-12-07 23:03:35 +01:00			`next := line[j+1]`
encoding.csv: add write support 2019-08-17 14:51:20 +02:00			`if next == r.delimiter {`
fix csv test 2019-12-07 23:03:35 +01:00			`fields << line[..j]`
			`line = line[j..]`
encoding.csv: add write support 2019-08-17 14:51:20 +02:00			`continue`
			`}`
encoding.csv module 2019-08-14 08:45:56 +02:00			`}`
compiler/vlib: replace substr/left/right with `[start..end]` everywhere 2019-10-27 08:03:15 +01:00			`line = line[1..]`
encoding.csv module 2019-08-14 08:45:56 +02:00			`}`
			`if i <= -1 && fields.len == 0 {`
			`return err_invalid_delim`
			`}`
			`}`
			`return fields`
			`}`
encoding.csv: add write support 2019-08-17 14:51:20 +02:00
			`fn valid_delim(b byte) bool {`
			`return b != 0 &&`
			b != `"` &&
			b != `\r` &&
			b != `\n`
			`}`