From ffcff9ebd4a4701ae1c8e16cde2be9ea2e82f4b8 Mon Sep 17 00:00:00 2001 From: Henrixounez <30901439+Henrixounez@users.noreply.github.com> Date: Fri, 20 Sep 2019 18:07:38 +0200 Subject: [PATCH] builtins: ustring comparisons, concatenation and other functions --- compiler/fn.v | 10 +++- compiler/parser.v | 29 ++++++++-- vlib/builtin/string.v | 115 ++++++++++++++++++++++++++++++++++++- vlib/builtin/string_test.v | 33 +++++++++++ 4 files changed, 178 insertions(+), 9 deletions(-) diff --git a/compiler/fn.v b/compiler/fn.v index 1143d79173..41e4ea2e00 100644 --- a/compiler/fn.v +++ b/compiler/fn.v @@ -861,12 +861,18 @@ fn (p mut Parser) fn_call_args(f mut Fn) &Fn { } } p.expected_type = arg.typ - typ := p.bool_expression() + mut typ := p.bool_expression() // Optimize `println`: replace it with `printf` to avoid extra allocations and // function calls. // `println(777)` => `printf("%d\n", 777)` // (If we don't check for void, then V will compile `println(func())`) - if i == 0 && (f.name == 'println' || f.name == 'print') && typ != 'string' && typ != 'void' { + if i == 0 && (f.name == 'println' || f.name == 'print') && typ == 'ustring' { + if typ == 'ustring' { + p.gen('.s') + } + typ = 'string' + } + if i == 0 && (f.name == 'println' || f.name == 'print') && typ != 'string' && typ != 'ustring' && typ != 'void' { T := p.table.find_type(typ) $if !windows { $if !js { diff --git a/compiler/parser.v b/compiler/parser.v index 043d525ac0..f616f7bf44 100644 --- a/compiler/parser.v +++ b/compiler/parser.v @@ -1262,6 +1262,7 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) { p.mark_var_changed(v) } is_str := v.typ == 'string' + is_ustr := v.typ == 'ustring' switch tok { case Token.assign: if !is_map && !p.is_empty_c_struct_init { @@ -1271,6 +1272,9 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) { if is_str && !p.is_js { p.gen('= string_add($v.name, ')// TODO can't do `foo.bar += '!'` } + else if is_ustr { + p.gen('= ustring_add($v.name, ') + } else { p.gen(' += ') } @@ -1297,7 +1301,7 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) { p.scanner.line_nr-- p.error('cannot use type `$expr_type` as type `$p.assigned_type` in assignment') } - if is_str && tok == .plus_assign && !p.is_js { + if (is_str || is_ustr) && tok == .plus_assign && !p.is_js { p.gen(')') } // p.assigned_var = '' @@ -1393,11 +1397,12 @@ fn (p mut Parser) bterm() string { mut typ := p.expression() p.expected_type = typ is_str := typ=='string' && !p.is_sql + is_ustr := typ=='ustring' tok := p.tok // if tok in [ .eq, .gt, .lt, .le, .ge, .ne] { if tok == .eq || tok == .gt || tok == .lt || tok == .le || tok == .ge || tok == .ne { p.fgen(' ${p.tok.str()} ') - if is_str && !p.is_js { + if (is_str || is_ustr) && !p.is_js { p.gen(',') } else if p.is_sql && tok == .eq { @@ -1440,6 +1445,17 @@ fn (p mut Parser) bterm() string { Token.lt => p.cgen.set_placeholder(ph, 'string_lt(') */ } + if is_ustr { + p.gen(')') + switch tok { + case Token.eq: p.cgen.set_placeholder(ph, 'ustring_eq(') + case Token.ne: p.cgen.set_placeholder(ph, 'ustring_ne(') + case Token.le: p.cgen.set_placeholder(ph, 'ustring_le(') + case Token.ge: p.cgen.set_placeholder(ph, 'ustring_ge(') + case Token.gt: p.cgen.set_placeholder(ph, 'ustring_gt(') + case Token.lt: p.cgen.set_placeholder(ph, 'ustring_lt(') + } + } } return typ } @@ -2072,6 +2088,7 @@ fn (p mut Parser) expression() string { ph := p.cgen.add_placeholder() mut typ := p.term() is_str := typ=='string' + is_ustr := typ=='ustring' // `a << b` ==> `array_push(&a, b)` if p.tok == .left_shift { if typ.contains('array_') { @@ -2153,6 +2170,10 @@ fn (p mut Parser) expression() string { p.cgen.set_placeholder(ph, 'string_add(') p.gen(',') } + else if is_ustr && tok_op == .plus { + p.cgen.set_placeholder(ph, 'ustring_add(') + p.gen(',') + } // 3 + 4 else if is_num || p.is_js { if typ == 'void*' { @@ -2172,11 +2193,11 @@ fn (p mut Parser) expression() string { } } p.check_types(p.term(), typ) - if is_str && tok_op == .plus && !p.is_js { + if (is_str || is_ustr) && tok_op == .plus && !p.is_js { p.gen(')') } // Make sure operators are used with correct types - if !p.pref.translated && !is_str && !is_num { + if !p.pref.translated && !is_str && !is_ustr && !is_num { T := p.table.find_type(typ) if tok_op == .plus { if T.has_method('+') { diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 6a8f45dfc9..9f25da34a0 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -725,22 +725,128 @@ pub fn (s string) ustring_tmp() ustring { return res } +fn (u ustring) eq(a ustring) bool { + if u.len != a.len || u.s != a.s { + return false + } + return true +} + +fn (u ustring) ne(a ustring) bool { + return !u.eq(a) +} + +fn (u ustring) lt(a ustring) bool { + return u.s < a.s +} + +fn (u ustring) le(a ustring) bool { + return u.lt(a) || u.eq(a) +} + +fn (u ustring) gt(a ustring) bool { + return !u.le(a) +} + +fn (u ustring) ge(a ustring) bool { + return !u.lt(a) +} + +fn (u ustring) add(a ustring) ustring { + mut res := ustring { + s: u.s + a.s + runes: new_array(0, u.s.len + a.s.len, sizeof(int)) + } + mut j := 0 + for i := 0; i < u.s.len; i++ { + char_len := utf8_char_len(u.s.str[i]) + res.runes << j + i += char_len - 1 + j += char_len + res.len++ + } + for i := 0; i < a.s.len; i++ { + char_len := utf8_char_len(a.s.str[i]) + res.runes << j + i += char_len - 1 + j += char_len + res.len++ + } + return res +} + +pub fn (u ustring) index_after(p ustring, start int) int { + if p.len > u.len { + return -1 + } + mut strt := start + if start < 0 { + strt = 0 + } + if start > u.len { + return -1 + } + mut i := strt + for i < u.len { + mut j := 0 + mut ii := i + for j < p.len && u.at(ii) == p.at(j) { + j++ + ii++ + } + if j == p.len { + return i + } + i++ + } + return -1 +} + +// counts occurrences of substr in s +pub fn (u ustring) count(substr ustring) int { + if u.len == 0 || substr.len == 0 { + return 0 + } + if substr.len > u.len { + return 0 + } + mut n := 0 + mut i := 0 + for { + i = u.index_after(substr, i) + if i == -1 { + return n + } + i += substr.len + n++ + } + return 0 // TODO can never get here - v doesn't know that +} + pub fn (u ustring) substr(_start, _end int) string { - start := u.runes[_start] - end := if _end >= u.runes.len { + if _start > _end || _start > u.len || _end > u.len || _start < 0 || _end < 0 { + panic('substr($_start, $_end) out of bounds (len=$u.len)') + } + end := if _end >= u.len { u.s.len } else { u.runes[_end] } - return u.s.substr(start, end) + return u.s.substr(u.runes[_start], end) } pub fn (u ustring) left(pos int) string { + if pos >= u.len { + return u.s + } return u.substr(0, pos) } pub fn (u ustring) right(pos int) string { + if pos >= u.len { + return '' + } return u.substr(pos, u.len) } @@ -752,6 +858,9 @@ fn (s string) at(idx int) byte { } pub fn (u ustring) at(idx int) string { + if idx < 0 || idx >= u.len { + panic('string index out of range: $idx / $u.runes.len') + } return u.substr(idx, idx + 1) } diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index eff83f40cb..6e467efc56 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -423,3 +423,36 @@ fn test_quote() { a := `'` assert a.str() == '\'' } + +fn test_ustring_comparisons() { + assert ('h€llô !'.ustring() == 'h€llô !'.ustring()) == true + assert ('h€llô !'.ustring() == 'h€llô'.ustring()) == false + assert ('h€llô !'.ustring() == 'h€llo !'.ustring()) == false + + assert ('h€llô !'.ustring() != 'h€llô !'.ustring()) == false + assert ('h€llô !'.ustring() != 'h€llô'.ustring()) == true + + assert ('h€llô'.ustring() < 'h€llô!'.ustring()) == true + assert ('h€llô'.ustring() < 'h€llo'.ustring()) == false + assert ('h€llo'.ustring() < 'h€llô'.ustring()) == true + + assert ('h€llô'.ustring() <= 'h€llô!'.ustring()) == true + assert ('h€llô'.ustring() <= 'h€llô'.ustring()) == true + assert ('h€llô!'.ustring() <= 'h€llô'.ustring()) == false + + assert ('h€llô!'.ustring() > 'h€llô'.ustring()) == true + assert ('h€llô'.ustring() > 'h€llô'.ustring()) == false + + assert ('h€llô!'.ustring() >= 'h€llô'.ustring()) == true + assert ('h€llô'.ustring() >= 'h€llô'.ustring()) == true + assert ('h€llô'.ustring() >= 'h€llô!'.ustring()) == false +} + +fn test_ustring_count() { + a := 'h€llôﷰ h€llô ﷰ'.ustring() + assert (a.count('l'.ustring())) == 4 + assert (a.count('€'.ustring())) == 2 + assert (a.count('h€llô'.ustring())) == 2 + assert (a.count('ﷰ'.ustring())) == 2 + assert (a.count('a'.ustring())) == 0 +}