all: byte.str() => byte.ascii_str()

pull/7902/head
Alexander Medvednikov 2021-01-05 19:14:35 +01:00
parent fdb6f1ab50
commit 5a70eba8e1
5 changed files with 61 additions and 97 deletions

View File

@ -5,7 +5,7 @@ fn test_clone() {
assert b[0] == 0 assert b[0] == 0
assert b[1] == 1 assert b[1] == 1
assert b[2] == 2 assert b[2] == 2
println(b[1].str() ) println(b[1].ascii_str())
println(typeof(`A`)) println(typeof(`A`))
x := rune(`A`) x := rune(`A`)
assert x.str() == 'A' assert x.str() == 'A'
@ -15,5 +15,4 @@ fn test_clone() {
assert typeof(y) == 'rune' assert typeof(y) == 'rune'
assert y.str() == 'Z' assert y.str() == 'Z'
// assert b[1].str() == '1' TODO // assert b[1].str() == '1' TODO
} }

View File

@ -1,13 +1,11 @@
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved. // Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license // Use of this source code is governed by an MIT license
// that can be found in the LICENSE file. // that can be found in the LICENSE file.
module csv module csv
// Once interfaces are further along the idea would be to have something similar to // Once interfaces are further along the idea would be to have something similar to
// go's io.reader & bufio.reader rather than reading the whole file into string, this // go's io.reader & bufio.reader rather than reading the whole file into string, this
// would then satisfy that interface. I designed it this way to be easily adapted. // would then satisfy that interface. I designed it this way to be easily adapted.
const ( const (
err_comment_is_delim = error('encoding.csv: comment cannot be the same as delimiter') err_comment_is_delim = error('encoding.csv: comment cannot be the same as delimiter')
err_invalid_delim = error('encoding.csv: invalid delimiter') err_invalid_delim = error('encoding.csv: invalid delimiter')
@ -15,8 +13,7 @@ const (
err_invalid_le = error('encoding.csv: could not find any valid line endings') err_invalid_le = error('encoding.csv: could not find any valid line endings')
) )
struct Reader {
struct Reader {
// not used yet // not used yet
// has_header bool // has_header bool
// headings []string // headings []string
@ -31,8 +28,8 @@ pub mut:
// new_reader initializes a Reader with string data to parse // new_reader initializes a Reader with string data to parse
pub fn new_reader(data string) &Reader { pub fn new_reader(data string) &Reader {
return &Reader{ return &Reader{
delimiter: `,`, delimiter: `,`
comment: `#`, comment: `#`
data: data data: data
} }
} }
@ -40,7 +37,7 @@ pub fn new_reader(data string) &Reader {
// read reads a row from the CSV data. // read reads a row from the CSV data.
// If successful, the result holds an array of each column's data. // If successful, the result holds an array of each column's data.
pub fn (mut r Reader) read() ?[]string { pub fn (mut r Reader) read() ?[]string {
l := r.read_record()? l := r.read_record() ?
return l return l
} }
@ -59,7 +56,6 @@ pub fn (mut r Reader) read() ?[]string {
// } // }
// return records // return records
// } // }
fn (mut r Reader) read_line() ?string { fn (mut r Reader) read_line() ?string {
// last record // last record
if r.row_pos == r.data.len { if r.row_pos == r.data.len {
@ -79,14 +75,14 @@ fn (mut r Reader) read_line() ?string {
} }
} else { } else {
// No line ending on file // No line ending on file
i = r.data.len-1 i = r.data.len - 1
} }
} }
mut line := r.data[r.row_pos..i] mut line := r.data[r.row_pos..i]
r.row_pos = i+1 r.row_pos = i + 1
// normalize win line endings (remove extra \r) // normalize win line endings (remove extra \r)
if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len-1] == `\r`) { if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
line = line[..line.len-1] line = line[..line.len - 1]
} }
return line return line
} }
@ -103,47 +99,51 @@ fn (mut r Reader) read_record() ?[]string {
mut line := '' mut line := ''
mut fields := []string{} mut fields := []string{}
mut i := -1 mut i := -1
for { for {
if need_read { if need_read {
l := r.read_line()? l := r.read_line() ?
if l.len <= 0 { if l.len <= 0 {
if keep_raw { line += '\n'} if keep_raw {
line += '\n'
}
continue continue
} else if l[0] == r.comment { } else if l[0] == r.comment {
if keep_raw { line += '\n' + l } if keep_raw {
line += '\n' + l
}
continue continue
} else { } else {
if keep_raw { line += '\n'} if keep_raw {
line += '\n'
}
line += l line += l
} }
need_read = false need_read = false
keep_raw = false keep_raw = false
} }
if line[0] != `"` { // not quoted
if line[0] != `"` { // not quoted j := line.index(r.delimiter.ascii_str()) or {
j := line.index(r.delimiter.str()) or {
// last // last
fields << line[..line.len] fields << line[..line.len]
break break
} }
i = j i = j
fields << line[..i] fields << line[..i]
line = line[i+1..] line = line[i + 1..]
continue continue
} else { // quoted } else { // quoted
j := line[1..].index('"') or { j := line[1..].index('"') or {
need_read = true need_read = true
keep_raw = true keep_raw = true
continue continue
} }
line = line[1..] line = line[1..]
if j+1 == line.len { if j + 1 == line.len {
// last record // last record
fields << line[..j] fields << line[..j]
break break
} }
next := line[j+1] next := line[j + 1]
if next == r.delimiter { if next == r.delimiter {
fields << line[..j] fields << line[..j]
line = line[j..] line = line[j..]
@ -159,8 +159,5 @@ fn (mut r Reader) read_record() ?[]string {
} }
fn valid_delim(b byte) bool { fn valid_delim(b byte) bool {
return b != 0 && return b != 0 && b != `"` && b != `\r` && b != `\n`
b != `"` &&
b != `\r` &&
b != `\n`
} }

View File

@ -1,22 +1,21 @@
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved. // Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license // Use of this source code is governed by an MIT license
// that can be found in the LICENSE file. // that can be found in the LICENSE file.
module csv module csv
import strings import strings
struct Writer { struct Writer {
mut: mut:
sb strings.Builder sb strings.Builder
pub mut: pub mut:
use_crlf bool use_crlf bool
delimiter byte delimiter byte
} }
pub fn new_writer() &Writer { pub fn new_writer() &Writer {
return &Writer{ return &Writer{
delimiter: `,`, delimiter: `,`
sb: strings.new_builder(200) sb: strings.new_builder(200)
} }
} }
@ -30,34 +29,25 @@ pub fn (mut w Writer) write(record []string) ?bool {
for n, field_ in record { for n, field_ in record {
mut field := field_ mut field := field_
if n > 0 { if n > 0 {
w.sb.write(w.delimiter.str()) w.sb.write(w.delimiter.ascii_str())
} }
if !w.field_needs_quotes(field) { if !w.field_needs_quotes(field) {
w.sb.write(field) w.sb.write(field)
continue continue
} }
w.sb.write('"') w.sb.write('"')
for field.len > 0 { for field.len > 0 {
mut i := field.index_any('"\r\n') mut i := field.index_any('"\r\n')
if i < 0 { if i < 0 {
i = field.len i = field.len
} }
w.sb.write(field[..i]) w.sb.write(field[..i])
field = field[i..] field = field[i..]
if field.len > 0 { if field.len > 0 {
z := field[0] z := field[0]
match z { match z {
`"` { `"` { w.sb.write('""') }
w.sb.write('""') `\r`, `\n` { w.sb.write(le) }
}
`\r`, `\n` {
w.sb.write(le)
}
else {} else {}
} }
field = field[1..] field = field[1..]
@ -65,7 +55,6 @@ pub fn (mut w Writer) write(record []string) ?bool {
} }
w.sb.write('"') w.sb.write('"')
} }
w.sb.write(le) w.sb.write(le)
return true return true
} }
@ -76,12 +65,11 @@ pub fn (mut w Writer) write(record []string) ?bool {
// w.write(record) // w.write(record)
// } // }
// } // }
fn (w &Writer) field_needs_quotes(field string) bool { fn (w &Writer) field_needs_quotes(field string) bool {
if field == '' { if field == '' {
return false return false
} }
if field.contains(w.delimiter.str()) || (field.index_any('"\r\n') != -1) { if field.contains(w.delimiter.ascii_str()) || (field.index_any('"\r\n') != -1) {
return true return true
} }
return false return false

View File

@ -210,7 +210,7 @@ fn unescape(s_ string, mode EncodingMode) ?string {
x := s[i] x := s[i]
match x { match x {
`%` { `%` {
t.write(((unhex(s[i + 1]) << byte(4)) | unhex(s[i + 2])).str()) t.write(((unhex(s[i + 1]) << byte(4)) | unhex(s[i + 2])).ascii_str())
i += 2 i += 2
} }
`+` { `+` {
@ -221,7 +221,7 @@ fn unescape(s_ string, mode EncodingMode) ?string {
} }
} }
else { else {
t.write(s[i].str()) t.write(s[i].ascii_str())
} }
} }
} }
@ -315,14 +315,14 @@ fn escape(s string, mode EncodingMode) string {
pub struct URL { pub struct URL {
pub mut: pub mut:
scheme string scheme string
opaque string // encoded opaque data opaque string // encoded opaque data
user &Userinfo // username and password information user &Userinfo // username and password information
host string // host or host:port host string // host or host:port
path string // path (relative paths may omit leading slash) path string // path (relative paths may omit leading slash)
raw_path string // encoded path hint (see escaped_path method) raw_path string // encoded path hint (see escaped_path method)
force_query bool // append a query ('?') even if raw_query is empty force_query bool // append a query ('?') even if raw_query is empty
raw_query string // encoded query values, without '?' raw_query string // encoded query values, without '?'
fragment string // fragment for references, without '#' fragment string // fragment for references, without '#'
} }
// user returns a Userinfo containing the provided username // user returns a Userinfo containing the provided username
@ -402,9 +402,7 @@ fn split_by_scheme(rawurl string) ?[]string {
} }
fn get_scheme(rawurl string) ?string { fn get_scheme(rawurl string) ?string {
split := split_by_scheme(rawurl) or { split := split_by_scheme(rawurl) or { return err }
return err
}
return split[0] return split[0]
} }
@ -431,15 +429,11 @@ fn split(s string, sep byte, cutc bool) (string, string) {
pub fn parse(rawurl string) ?URL { pub fn parse(rawurl string) ?URL {
// Cut off #frag // Cut off #frag
u, frag := split(rawurl, `#`, true) u, frag := split(rawurl, `#`, true)
mut url := parse_url(u, false) or { mut url := parse_url(u, false) or { return error(error_msg(err_msg_parse, u)) }
return error(error_msg(err_msg_parse, u))
}
if frag == '' { if frag == '' {
return url return url
} }
f := unescape(frag, .encode_fragment) or { f := unescape(frag, .encode_fragment) or { return error(error_msg(err_msg_parse, u)) }
return error(error_msg(err_msg_parse, u))
}
url.fragment = f url.fragment = f
return url return url
} }
@ -501,12 +495,8 @@ fn parse_url(rawurl string, via_request bool) ?URL {
// RFC 3986, §3.3: // RFC 3986, §3.3:
// In addition, a URI reference (Section 4.1) may be a relative-path reference, // In addition, a URI reference (Section 4.1) may be a relative-path reference,
// in which case the first path segment cannot contain a colon (':') character. // in which case the first path segment cannot contain a colon (':') character.
colon := rest.index(':') or { colon := rest.index(':') or { return error('there should be a : in the URL') }
return error('there should be a : in the URL') slash := rest.index('/') or { return error('there should be a / in the URL') }
}
slash := rest.index('/') or {
return error('there should be a / in the URL')
}
if colon >= 0 && (slash < 0 || colon < slash) { if colon >= 0 && (slash < 0 || colon < slash) {
// First path segment has colon. Not allowed in relative URL. // First path segment has colon. Not allowed in relative URL.
return error(error_msg('parse_url: first path segment in URL cannot contain colon', return error(error_msg('parse_url: first path segment in URL cannot contain colon',
@ -534,9 +524,7 @@ struct ParseAuthorityRes {
} }
fn parse_authority(authority string) ?ParseAuthorityRes { fn parse_authority(authority string) ?ParseAuthorityRes {
i := authority.last_index('@') or { i := authority.last_index('@') or { -1 }
-1
}
mut host := '' mut host := ''
mut zuser := user('') mut zuser := user('')
if i < 0 { if i < 0 {
@ -595,15 +583,9 @@ fn parse_host(host string) ?string {
// We do impose some restrictions on the zone, to avoid stupidity // We do impose some restrictions on the zone, to avoid stupidity
// like newlines. // like newlines.
if zone := host[..i].index('%25') { if zone := host[..i].index('%25') {
host1 := unescape(host[..zone], .encode_host) or { host1 := unescape(host[..zone], .encode_host) or { return err }
return err host2 := unescape(host[zone..i], .encode_zone) or { return err }
} host3 := unescape(host[i..], .encode_host) or { return err }
host2 := unescape(host[zone..i], .encode_zone) or {
return err
}
host3 := unescape(host[i..], .encode_host) or {
return err
}
return host1 + host2 + host3 return host1 + host2 + host3
} }
if idx := host.last_index(':') { if idx := host.last_index(':') {
@ -614,9 +596,7 @@ fn parse_host(host string) ?string {
} }
} }
} }
h := unescape(host, .encode_host) or { h := unescape(host, .encode_host) or { return err }
return err
}
return h return h
// host = h // host = h
// return host // return host
@ -654,9 +634,7 @@ pub fn (mut u URL) set_path(p string) ?bool {
// reading u.raw_path directly. // reading u.raw_path directly.
fn (u &URL) escaped_path() string { fn (u &URL) escaped_path() string {
if u.raw_path != '' && valid_encoded_path(u.raw_path) { if u.raw_path != '' && valid_encoded_path(u.raw_path) {
unescape(u.raw_path, .encode_path) or { unescape(u.raw_path, .encode_path) or { return '' }
return ''
}
return u.raw_path return u.raw_path
} }
if u.path == '*' { if u.path == '*' {
@ -883,9 +861,7 @@ fn resolve_path(base string, ref string) string {
if ref == '' { if ref == '' {
full = base full = base
} else if ref[0] != `/` { } else if ref[0] != `/` {
i := base.last_index('/') or { i := base.last_index('/') or { -1 }
-1
}
full = base[..i + 1] + ref full = base[..i + 1] + ref
} else { } else {
full = ref full = ref
@ -1050,8 +1026,12 @@ pub fn valid_userinfo(s string) bool {
continue continue
} }
match r { match r {
`-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, `@` { continue } `-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, `@` {
else { return false } continue
}
else {
return false
}
} }
} }
return true return true

View File

@ -1224,7 +1224,7 @@ pub fn (mut c Checker) call_method(mut call_expr ast.CallExpr) table.Type {
} }
/* /*
if left_type == table.byte_type && method_name == 'str' { if left_type == table.byte_type && method_name == 'str' {
c.warn('byte str', call_expr.pos) c.error('byte str', call_expr.pos)
} }
*/ */
// TODO: remove this for actual methods, use only for compiler magic // TODO: remove this for actual methods, use only for compiler magic