From ffcff9ebd4a4701ae1c8e16cde2be9ea2e82f4b8 Mon Sep 17 00:00:00 2001
From: Henrixounez <30901439+Henrixounez@users.noreply.github.com>
Date: Fri, 20 Sep 2019 18:07:38 +0200
Subject: [PATCH] builtins: ustring comparisons, concatenation and other
 functions

---
 compiler/fn.v              |  10 +++-
 compiler/parser.v          |  29 ++++++++--
 vlib/builtin/string.v      | 115 ++++++++++++++++++++++++++++++++++++-
 vlib/builtin/string_test.v |  33 +++++++++++
 4 files changed, 178 insertions(+), 9 deletions(-)

diff --git a/compiler/fn.v b/compiler/fn.v
index 1143d79173..41e4ea2e00 100644
--- a/compiler/fn.v
+++ b/compiler/fn.v
@@ -861,12 +861,18 @@ fn (p mut Parser) fn_call_args(f mut Fn) &Fn {
 			}
 		}
 		p.expected_type = arg.typ
-		typ := p.bool_expression()
+		mut typ := p.bool_expression()
 		// Optimize `println`: replace it with `printf` to avoid extra allocations and
 		// function calls.
 		// `println(777)` => `printf("%d\n", 777)`
 		// (If we don't check for void, then V will compile `println(func())`)
-		if i == 0 && (f.name == 'println' || f.name == 'print')  && typ != 'string' && typ != 'void' {
+		if i == 0 && (f.name == 'println' || f.name == 'print') && typ == 'ustring' {
+			if typ == 'ustring' {
+				p.gen('.s')
+			}
+			typ = 'string'
+		}
+		if i == 0 && (f.name == 'println' || f.name == 'print')  && typ != 'string' && typ != 'ustring' && typ != 'void' {
 			T := p.table.find_type(typ)
 			$if !windows {
 			$if !js {
diff --git a/compiler/parser.v b/compiler/parser.v
index 043d525ac0..f616f7bf44 100644
--- a/compiler/parser.v
+++ b/compiler/parser.v
@@ -1262,6 +1262,7 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) {
 		p.mark_var_changed(v)
 	}
 	is_str := v.typ == 'string'
+	is_ustr := v.typ == 'ustring'
 	switch tok {
 	case Token.assign:
 		if !is_map && !p.is_empty_c_struct_init {
@@ -1271,6 +1272,9 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) {
 		if is_str && !p.is_js {
 			p.gen('= string_add($v.name, ')// TODO can't do `foo.bar += '!'`
 		}
+		else if is_ustr {
+			p.gen('= ustring_add($v.name, ')
+		}
 		else {
 			p.gen(' += ')
 		}
@@ -1297,7 +1301,7 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) {
 		p.scanner.line_nr--
 		p.error('cannot use type `$expr_type` as type `$p.assigned_type` in assignment')
 	}
-	if is_str && tok == .plus_assign && !p.is_js {
+	if (is_str || is_ustr) && tok == .plus_assign && !p.is_js {
 		p.gen(')')
 	}
 	// p.assigned_var = ''
@@ -1393,11 +1397,12 @@ fn (p mut Parser) bterm() string {
 	mut typ := p.expression()
 	p.expected_type = typ
 	is_str := typ=='string'  &&   !p.is_sql
+	is_ustr := typ=='ustring'
 	tok := p.tok
 	// if tok in [ .eq, .gt, .lt, .le, .ge, .ne] {
 	if tok == .eq || tok == .gt || tok == .lt || tok == .le || tok == .ge || tok == .ne {
 		p.fgen(' ${p.tok.str()} ')
-		if is_str && !p.is_js {
+		if (is_str || is_ustr) && !p.is_js {
 			p.gen(',')
 		}
 		else if p.is_sql && tok == .eq {
@@ -1440,6 +1445,17 @@ fn (p mut Parser) bterm() string {
 			 Token.lt => p.cgen.set_placeholder(ph, 'string_lt(')
 */
 		}
+		if is_ustr {
+			p.gen(')')
+			switch tok {
+			case Token.eq: p.cgen.set_placeholder(ph, 'ustring_eq(')
+			case Token.ne: p.cgen.set_placeholder(ph, 'ustring_ne(')
+			case Token.le: p.cgen.set_placeholder(ph, 'ustring_le(')
+			case Token.ge: p.cgen.set_placeholder(ph, 'ustring_ge(')
+			case Token.gt: p.cgen.set_placeholder(ph, 'ustring_gt(')
+			case Token.lt: p.cgen.set_placeholder(ph, 'ustring_lt(')
+			}
+		}
 	}
 	return typ
 }
@@ -2072,6 +2088,7 @@ fn (p mut Parser) expression() string {
 	ph := p.cgen.add_placeholder()
 	mut typ := p.term()
 	is_str := typ=='string'
+	is_ustr := typ=='ustring'
 	// `a << b` ==> `array_push(&a, b)`
 	if p.tok == .left_shift {
 		if typ.contains('array_') {
@@ -2153,6 +2170,10 @@ fn (p mut Parser) expression() string {
 			p.cgen.set_placeholder(ph, 'string_add(')
 			p.gen(',')
 		}
+		else if is_ustr && tok_op == .plus {
+			p.cgen.set_placeholder(ph, 'ustring_add(')
+			p.gen(',')
+		}
 		// 3 + 4
 		else if is_num || p.is_js {
 			if typ == 'void*' {
@@ -2172,11 +2193,11 @@ fn (p mut Parser) expression() string {
 			}
 		}
 		p.check_types(p.term(), typ)
-		if is_str && tok_op == .plus && !p.is_js {
+		if (is_str || is_ustr) && tok_op == .plus && !p.is_js {
 			p.gen(')')
 		}
 		// Make sure operators are used with correct types
-		if !p.pref.translated && !is_str && !is_num {
+		if !p.pref.translated && !is_str && !is_ustr && !is_num {
 			T := p.table.find_type(typ)
 			if tok_op == .plus {
 				if T.has_method('+') {
diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v
index 6a8f45dfc9..9f25da34a0 100644
--- a/vlib/builtin/string.v
+++ b/vlib/builtin/string.v
@@ -725,22 +725,128 @@ pub fn (s string) ustring_tmp() ustring {
 	return res
 }
 
+fn (u ustring) eq(a ustring) bool {
+	if u.len != a.len || u.s != a.s {
+		return false
+	}
+	return true
+}
+
+fn (u ustring) ne(a ustring) bool {
+	return !u.eq(a)
+}
+
+fn (u ustring) lt(a ustring) bool {
+	return u.s < a.s
+}
+
+fn (u ustring) le(a ustring) bool {
+	return u.lt(a) || u.eq(a)
+}
+
+fn (u ustring) gt(a ustring) bool {
+	return !u.le(a)
+}
+
+fn (u ustring) ge(a ustring) bool {
+	return !u.lt(a)
+}
+
+fn (u ustring) add(a ustring) ustring {
+	mut res := ustring {
+		s: u.s + a.s
+		runes: new_array(0, u.s.len + a.s.len, sizeof(int))
+	}
+	mut j := 0
+	for i := 0; i < u.s.len; i++ {
+		char_len := utf8_char_len(u.s.str[i])
+		res.runes << j
+		i += char_len - 1
+		j += char_len
+		res.len++
+	}
+	for i := 0; i < a.s.len; i++ {
+		char_len := utf8_char_len(a.s.str[i])
+		res.runes << j
+		i += char_len - 1
+		j += char_len
+		res.len++
+	}
+	return res
+}
+
+pub fn (u ustring) index_after(p ustring, start int) int {
+	if p.len > u.len {
+		return -1
+	}
+	mut strt := start
+	if start < 0 {
+		strt = 0
+	}
+	if start > u.len {
+		return -1
+	}
+	mut i := strt
+	for i < u.len {
+		mut j := 0
+		mut ii := i
+		for j < p.len && u.at(ii) == p.at(j) {
+			j++
+			ii++
+		}
+		if j == p.len {
+			return i
+		}
+		i++
+	}
+	return -1
+}
+
+// counts occurrences of substr in s
+pub fn (u ustring) count(substr ustring) int {
+	if u.len == 0 || substr.len == 0 {
+		return 0
+	}
+	if substr.len > u.len {
+		return 0
+	}
+	mut n := 0
+	mut i := 0
+	for {
+		i = u.index_after(substr, i)
+		if i == -1 {
+			return n
+		}
+		i += substr.len
+		n++
+	}
+	return 0 // TODO can never get here - v doesn't know that
+}
+
 pub fn (u ustring) substr(_start, _end int) string {
-	start := u.runes[_start]
-	end := if _end >= u.runes.len {
+	if _start > _end || _start > u.len || _end > u.len || _start < 0 || _end < 0 {
+		panic('substr($_start, $_end) out of bounds (len=$u.len)')
+	}
+	end := if _end >= u.len {
 		u.s.len
 	}
 	else {
 		u.runes[_end]
 	}
-	return u.s.substr(start, end)
+	return u.s.substr(u.runes[_start], end)
 }
 
 pub fn (u ustring) left(pos int) string {
+	if pos >= u.len {
+		return u.s
+	}
 	return u.substr(0, pos)
 }
 
 pub fn (u ustring) right(pos int) string {
+	if pos >= u.len {
+		return ''
+	}
 	return u.substr(pos, u.len)
 }
 
@@ -752,6 +858,9 @@ fn (s string) at(idx int) byte {
 }
 
 pub fn (u ustring) at(idx int) string {
+	if idx < 0 || idx >= u.len {
+		panic('string index out of range: $idx / $u.runes.len')
+	}
 	return u.substr(idx, idx + 1)
 }
 
diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v
index eff83f40cb..6e467efc56 100644
--- a/vlib/builtin/string_test.v
+++ b/vlib/builtin/string_test.v
@@ -423,3 +423,36 @@ fn test_quote() {
 	a := `'`
 	assert a.str() == '\''
 }
+
+fn test_ustring_comparisons() {
+	assert ('h€llô !'.ustring() == 'h€llô !'.ustring()) == true
+	assert ('h€llô !'.ustring() == 'h€llô'.ustring()) == false
+	assert ('h€llô !'.ustring() == 'h€llo !'.ustring()) == false
+
+	assert ('h€llô !'.ustring() != 'h€llô !'.ustring()) == false
+	assert ('h€llô !'.ustring() != 'h€llô'.ustring()) == true
+
+	assert ('h€llô'.ustring() < 'h€llô!'.ustring()) == true
+	assert ('h€llô'.ustring() < 'h€llo'.ustring()) == false
+	assert ('h€llo'.ustring() < 'h€llô'.ustring()) == true
+
+	assert ('h€llô'.ustring() <= 'h€llô!'.ustring()) == true
+	assert ('h€llô'.ustring() <= 'h€llô'.ustring()) == true
+	assert ('h€llô!'.ustring() <= 'h€llô'.ustring()) == false
+
+	assert ('h€llô!'.ustring() > 'h€llô'.ustring()) == true
+	assert ('h€llô'.ustring() > 'h€llô'.ustring()) == false
+
+	assert ('h€llô!'.ustring() >= 'h€llô'.ustring()) == true
+	assert ('h€llô'.ustring() >= 'h€llô'.ustring()) == true
+	assert ('h€llô'.ustring() >= 'h€llô!'.ustring()) == false
+}
+
+fn test_ustring_count() {
+	a := 'h€llôﷰ h€llô ﷰ'.ustring()
+	assert (a.count('l'.ustring())) == 4
+	assert (a.count('€'.ustring())) == 2
+	assert (a.count('h€llô'.ustring())) == 2
+	assert (a.count('ﷰ'.ustring())) == 2
+	assert (a.count('a'.ustring())) == 0
+}