all: remove ustring (#10630)

2021-07-03 19:14:09 +02:00 · 2021-07-03 19:14:09 +02:00 · 0f9537ece5
parent 5a4a1997e7
commit 0f9537ece5
10 changed files with 56 additions and 354 deletions
--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -8,7 +8,7 @@
 - [ ] [C2V translator](https://github.com/vlang/v/issues/6985)
 - [ ] doom.v
 - [x] rune type
- [ ] replace `ustring` with `[]rune`
+- [x] replace `ustring` with `[]rune`
 - [x] fix `byte.str()`
 - [x] maps with non-string keys
 - [x] iOS/Android support
@ -25,4 +25,4 @@
 - [ ] merge v.c and v_win.c
 - [ ] more advanced errors, not just `error('message')`
 - [ ] VLS for autocomplete, refactoring, go to definition etc
- [ ] Recursive structs via optionals: `struct Node { next ?Node }`
+- [ ] Recursive structs via optionals: `struct Node { next ?Node }`
--- a/vlib/builtin/string.v
+++ b/vlib/builtin/string.v
@ -49,19 +49,6 @@ mut:
 	is_lit int
 }

-// NB string.is_lit is an enumeration of the following:
-// .is_lit == 0 => a fresh string, should be freed by autofree
-// .is_lit == 1 => a literal string from .rodata, should NOT be freed
-// .is_lit == -98761234 => already freed string, protects against double frees.
-// ---------> ^^^^^^^^^ calling free on these is a bug.
-// Any other value means that the string has been corrupted.
-pub struct ustring {
-pub mut:
-	s     string
-	runes []int
-	len   int
-}
-
 // vstrlen returns the V length of the C string `s` (0 terminator is not counted).
 [unsafe]
 pub fn vstrlen(s &byte) int {
@ -1257,177 +1244,6 @@ pub fn (s string) str() string {
 	return s.clone()
 }

-// str returns the string itself.
-pub fn (s ustring) str() string {
-	return s.s
-}
-
-// ustring converts the string to a unicode string.
-pub fn (s string) ustring() ustring {
-	mut res := ustring{
-		s: s // runes will have at least s.len elements, save reallocations
-		// TODO use VLA for small strings?
-	}
-	$if gcboehm_opt ? {
-		res.runes = __new_array_noscan(0, s.len, int(sizeof(int)))
-	} $else {
-		res.runes = __new_array(0, s.len, int(sizeof(int)))
-	}
-	for i := 0; i < s.len; i++ {
-		char_len := utf8_char_len(unsafe { s.str[i] })
-		res.runes << i
-		i += char_len - 1
-		res.len++
-	}
-	return res
-}
-
-// A hack that allows to create ustring without allocations.
-// It's called from functions like draw_text() where we know that the string is going to be freed
-// right away. Uses global buffer for storing runes []int array.
-__global (
-	g_ustring_runes []int
-)
-
-pub fn (s string) ustring_tmp() ustring {
-	if g_ustring_runes.len == 0 {
-		$if gcboehm_opt ? {
-			g_ustring_runes = __new_array_noscan(0, 128, int(sizeof(int)))
-		} $else {
-			g_ustring_runes = __new_array(0, 128, int(sizeof(int)))
-		}
-	}
-	mut res := ustring{
-		s: s
-	}
-	res.runes = g_ustring_runes
-	res.runes.len = s.len
-	mut j := 0
-	for i := 0; i < s.len; i++ {
-		char_len := utf8_char_len(unsafe { s.str[i] })
-		res.runes[j] = i
-		j++
-		i += char_len - 1
-		res.len++
-	}
-	return res
-}
-
-fn (u ustring) == (a ustring) bool {
-	return u.s == a.s
-}
-
-fn (u ustring) < (a ustring) bool {
-	return u.s < a.s
-}
-
-fn (u ustring) + (a ustring) ustring {
-	mut res := ustring{
-		s: u.s + a.s
-	}
-	$if gcboehm_opt ? {
-		res.runes = __new_array_noscan(0, u.s.len + a.s.len, int(sizeof(int)))
-	} $else {
-		res.runes = __new_array(0, u.s.len + a.s.len, int(sizeof(int)))
-	}
-	mut j := 0
-	for i := 0; i < u.s.len; i++ {
-		char_len := utf8_char_len(unsafe { u.s.str[i] })
-		res.runes << j
-		i += char_len - 1
-		j += char_len
-		res.len++
-	}
-	for i := 0; i < a.s.len; i++ {
-		char_len := utf8_char_len(unsafe { a.s.str[i] })
-		res.runes << j
-		i += char_len - 1
-		j += char_len
-		res.len++
-	}
-	return res
-}
-
-// index_after returns the position of the input string, starting search from `start` position.
-pub fn (u ustring) index_after(p ustring, start int) int {
-	if p.len > u.len {
-		return -1
-	}
-	mut strt := start
-	if start < 0 {
-		strt = 0
-	}
-	if start > u.len {
-		return -1
-	}
-	mut i := strt
-	for i < u.len {
-		mut j := 0
-		mut ii := i
-		for j < p.len && u.at(ii) == p.at(j) {
-			j++
-			ii++
-		}
-		if j == p.len {
-			return i
-		}
-		i++
-	}
-	return -1
-}
-
-// count returns the number of occurrences of `substr` in the string.
-// count returns -1 if no `substr` could be found.
-pub fn (u ustring) count(substr ustring) int {
-	if u.len == 0 || substr.len == 0 {
-		return 0
-	}
-	if substr.len > u.len {
-		return 0
-	}
-	mut n := 0
-	mut i := 0
-	for {
-		i = u.index_after(substr, i)
-		if i == -1 {
-			return n
-		}
-		i += substr.len
-		n++
-	}
-	return 0 // TODO can never get here - v doesn't know that
-}
-
-// substr returns the string between index positions `_start` and `_end`.
-// Example: assert 'ABCD'.substr(1,3) == 'BC'
-pub fn (u ustring) substr(_start int, _end int) string {
-	$if !no_bounds_checking ? {
-		if _start > _end || _start > u.len || _end > u.len || _start < 0 || _end < 0 {
-			panic('substr($_start, $_end) out of bounds (len=$u.len)')
-		}
-	}
-	end := if _end >= u.len { u.s.len } else { u.runes[_end] }
-	return u.s.substr(u.runes[_start], end)
-}
-
-// left returns the `n`th leftmost characters of the ustring.
-// Example: assert 'hello'.left(2) == 'he'
-pub fn (u ustring) left(pos int) string {
-	if pos >= u.len {
-		return u.s
-	}
-	return u.substr(0, pos)
-}
-
-// right returns the `n`th rightmost characters of the ustring.
-// Example: assert 'hello'.right(2) == 'lo'
-pub fn (u ustring) right(pos int) string {
-	if pos >= u.len {
-		return ''
-	}
-	return u.substr(pos, u.len)
-}
-
 // at returns the byte at index `idx`.
 // Example: assert 'ABC'.at(1) == byte(`B`)
 fn (s string) at(idx int) byte {
@ -1441,29 +1257,6 @@ fn (s string) at(idx int) byte {
 	}
 }

-// at returns the string at index `idx`.
-// Example: assert 'ABC'.at(1) == 'B'
-pub fn (u ustring) at(idx int) string {
-	$if !no_bounds_checking ? {
-		if idx < 0 || idx >= u.len {
-			panic('string index out of range: $idx / $u.runes.len')
-		}
-	}
-	return u.substr(idx, idx + 1)
-}
-
-// free allows for manually freeing the memory occupied by the unicode string.
-[unsafe]
-fn (u &ustring) free() {
-	$if prealloc {
-		return
-	}
-	unsafe {
-		u.runes.free()
-		u.s.free()
-	}
-}
-
 // is_space returns `true` if the byte is a white space character.
 // The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0
 // Example: assert byte(` `).is_space() == true
@ -1689,11 +1482,11 @@ pub fn (s string) reverse() string {
 // 'hello'.limit(2) => 'he'
 // 'hi'.limit(10) => 'hi'
 pub fn (s string) limit(max int) string {
-	u := s.ustring()
+	u := s.runes()
 	if u.len <= max {
 		return s.clone()
 	}
-	return u.substr(0, max)
+	return u[0..max].string()
 }

 // hash returns an integer hash of the string.
--- a/vlib/builtin/string_test.v
+++ b/vlib/builtin/string_test.v
@ -361,7 +361,7 @@ fn test_runes() {
 	assert s.len == 12
 	s2 := 'privet'
 	assert s2.len == 6
-	u := s.ustring()
+	u := s.runes()
 	assert u.len == 6
 	assert s2.substr(1, 4).len == 3
 	assert s2.substr(1, 4) == 'riv'
@ -371,30 +371,16 @@ fn test_runes() {
 	assert s2[..4] == 'priv'
 	assert s2[2..].len == 4
 	assert s2[2..] == 'ivet'
-	assert u.substr(1, 4).len == 6
-	assert u.substr(1, 4) == 'рив'
+	assert u[1..4].string().len == 6
+	assert u[1..4].string() == 'рив'
 	assert s2.substr(1, 2) == 'r'
-	assert u.substr(1, 2) == 'р'
-	assert s2.ustring().at(1) == 'r'
-	assert u.at(1) == 'р'
-	first := u.at(0)
-	last := u.at(u.len - 1)
-	assert first.len == 2
-	assert last.len == 2
-}
-
-fn test_left_right() {
-	s := 'ALOHA'
-	assert s[..3] == 'ALO'
-	assert s[..0] == ''
-	assert s[..5] == s
-	assert s[3..] == 'HA'
-	// assert s.right(6) == ''
-	u := s.ustring()
-	assert u.left(3) == 'ALO'
-	assert u.left(0) == ''
-	assert u.right(3) == 'HA'
-	assert u.right(6) == ''
+	assert u[1..2].string() == 'р'
+	assert s2.runes()[1] == `r`
+	assert u[1] == `р`
+	first := u[0]
+	last := u[u.len - 1]
+	assert first.str().len == 2
+	assert last.str().len == 2
 }

 fn test_contains() {
@ -672,42 +658,6 @@ fn test_quote() {
 	assert a.str() == "'"
 }

-fn test_ustring_comparisons() {
-	/*
-	QTODO
-	assert ('h€llô !'.ustring() == 'h€llô !'.ustring()) == true
-	assert ('h€llô !'.ustring() == 'h€llô'.ustring()) == false
-	assert ('h€llô !'.ustring() == 'h€llo !'.ustring()) == false
-
-	assert ('h€llô !'.ustring() != 'h€llô !'.ustring()) == false
-	assert ('h€llô !'.ustring() != 'h€llô'.ustring()) == true
-
-	assert ('h€llô'.ustring() < 'h€llô!'.ustring()) == true
-	assert ('h€llô'.ustring() < 'h€llo'.ustring()) == false
-	assert ('h€llo'.ustring() < 'h€llô'.ustring()) == true
-
-	assert ('h€llô'.ustring() <= 'h€llô!'.ustring()) == true
-	assert ('h€llô'.ustring() <= 'h€llô'.ustring()) == true
-	assert ('h€llô!'.ustring() <= 'h€llô'.ustring()) == false
-
-	assert ('h€llô!'.ustring() > 'h€llô'.ustring()) == true
-	assert ('h€llô'.ustring() > 'h€llô'.ustring()) == false
-
-	assert ('h€llô!'.ustring() >= 'h€llô'.ustring()) == true
-	assert ('h€llô'.ustring() >= 'h€llô'.ustring()) == true
-	assert ('h€llô'.ustring() >= 'h€llô!'.ustring()) == false
-	*/
-}
-
-fn test_ustring_count() {
-	a := 'h€llôﷰ h€llô ﷰ'.ustring()
-	assert (a.count('l'.ustring())) == 4
-	assert (a.count('€'.ustring())) == 2
-	assert (a.count('h€llô'.ustring())) == 2
-	assert (a.count('ﷰ'.ustring())) == 2
-	assert (a.count('a'.ustring())) == 0
-}
-
 fn test_limit() {
 	s := 'hello'
 	assert s.limit(2) == 'he'
--- a/vlib/encoding/utf8/utf8_util.v
+++ b/vlib/encoding/utf8/utf8_util.v
@ -33,11 +33,6 @@ pub fn len(s string) int {
 	return count
 }

-// u_len return the length as number of unicode chars from a ustring
-pub fn u_len(s ustring) int {
-	return len(s.s)
-}
-
 // char_len calculate the length in bytes of a utf8 char
 [deprecated: 'use builtin utf8_char_len']
 pub fn char_len(b byte) int {
@ -134,23 +129,11 @@ pub fn to_upper(s string) string {
 	return up_low(s, true)
 }

-// u_to_upper return an uppercase string from a ustring
-pub fn u_to_upper(s ustring) ustring {
-	tmp := up_low(s.s, true)
-	return tmp.ustring()
-}
-
 // to_lower return an lowercase string from a string
 pub fn to_lower(s string) string {
 	return up_low(s, false)
 }

-// u_to_lower return an lowercase string from a ustring
-pub fn u_to_lower(s ustring) ustring {
-	tmp := up_low(s.s, false)
-	return tmp.ustring()
-}
-
 /*
 Punctuation functions

--- a/vlib/encoding/utf8/utf8_util_test.v
+++ b/vlib/encoding/utf8/utf8_util_test.v
@ -13,22 +13,11 @@ fn test_utf8_util() {
 	assert utf8.to_upper('абвёabc12｛') == 'АБВЁABC12｛'
 	assert utf8.to_lower('АБВЁABC12｛') == 'абвёabc12｛'

-	// ustring test
-	src1 := src.ustring()
-	upper1 := utf8.u_to_upper(src1)
-	lower1 := utf8.u_to_lower(src1)
-	assert upper1 == (src_upper.ustring())
-	assert lower1 == (src_lower.ustring())
-
 	// test len function
 	assert utf8.len('') == 0
 	assert utf8.len('pippo') == 5
 	assert utf8.len(src) == 15 // 29
 	assert src.len == 24 // 49
-	// test u_len function
-	assert utf8.u_len(''.ustring()) == 0
-	assert utf8.u_len(src1) == 15 // 29
-	assert utf8.u_len('pippo'.ustring()) == 5

 	// western punctuation
 	a := '.abc?abcòàè.'
--- a/vlib/v/ast/types.v
+++ b/vlib/v/ast/types.v
@ -353,17 +353,16 @@ pub const (
 	bool_type_idx          = 16
 	none_type_idx          = 17
 	string_type_idx        = 18
-	ustring_type_idx       = 19
-	rune_type_idx          = 20
-	array_type_idx         = 21
-	map_type_idx           = 22
-	chan_type_idx          = 23
-	size_t_type_idx        = 24
-	any_type_idx           = 25
-	float_literal_type_idx = 26
-	int_literal_type_idx   = 27
-	thread_type_idx        = 28
-	error_type_idx         = 29
+	rune_type_idx          = 19
+	array_type_idx         = 20
+	map_type_idx           = 21
+	chan_type_idx          = 22
+	size_t_type_idx        = 23
+	any_type_idx           = 24
+	float_literal_type_idx = 25
+	int_literal_type_idx   = 26
+	thread_type_idx        = 27
+	error_type_idx         = 28
 )

 pub const (
@ -376,7 +375,7 @@ pub const (
 		byte_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, f32_type_idx, f64_type_idx,
 		int_literal_type_idx, float_literal_type_idx, rune_type_idx]
 	pointer_type_idxs          = [voidptr_type_idx, byteptr_type_idx, charptr_type_idx]
-	string_type_idxs           = [string_type_idx, ustring_type_idx]
+	string_type_idxs           = [string_type_idx]
 )

 pub const (
@ -399,7 +398,6 @@ pub const (
 	bool_type          = new_type(bool_type_idx)
 	none_type          = new_type(none_type_idx)
 	string_type        = new_type(string_type_idx)
-	ustring_type       = new_type(ustring_type_idx)
 	rune_type          = new_type(rune_type_idx)
 	array_type         = new_type(array_type_idx)
 	map_type           = new_type(map_type_idx)
@ -425,9 +423,9 @@ pub fn merge_types(params ...[]Type) []Type {

 pub const (
 	builtin_type_names = ['void', 'voidptr', 'charptr', 'byteptr', 'i8', 'i16', 'int', 'i64', 'u16',
-		'u32', 'u64', 'int_literal', 'f32', 'f64', 'float_literal', 'string', 'ustring', 'char',
-		'byte', 'bool', 'none', 'array', 'array_fixed', 'map', 'chan', 'any', 'struct', 'mapnode',
-		'size_t', 'rune', 'thread', 'Error']
+		'u32', 'u64', 'int_literal', 'f32', 'f64', 'float_literal', 'string', 'char', 'byte', 'bool',
+		'none', 'array', 'array_fixed', 'map', 'chan', 'any', 'struct', 'mapnode', 'size_t', 'rune',
+		'thread', 'Error']
 )

 pub struct MultiReturn {
@ -472,7 +470,6 @@ pub enum Kind {
 	bool
 	none_
 	string
-	ustring
 	array
 	array_fixed
 	map
@ -604,7 +601,6 @@ pub fn (mut t Table) register_builtin_type_symbols() {
 	t.register_type_symbol(kind: .bool, name: 'bool', cname: 'bool', mod: 'builtin')
 	t.register_type_symbol(kind: .none_, name: 'none', cname: 'none', mod: 'builtin')
 	t.register_type_symbol(kind: .string, name: 'string', cname: 'string', mod: 'builtin')
-	t.register_type_symbol(kind: .ustring, name: 'ustring', cname: 'ustring', mod: 'builtin')
 	t.register_type_symbol(kind: .rune, name: 'rune', cname: 'rune', mod: 'builtin')
 	t.register_type_symbol(kind: .array, name: 'array', cname: 'array', mod: 'builtin')
 	t.register_type_symbol(kind: .map, name: 'map', cname: 'map', mod: 'builtin')
@ -656,7 +652,7 @@ pub fn (t &TypeSymbol) is_float() bool {

 [inline]
 pub fn (t &TypeSymbol) is_string() bool {
-	return t.kind in [.string, .ustring]
+	return t.kind == .string
 }

 [inline]
@ -711,7 +707,6 @@ pub fn (k Kind) str() string {
 		.any { 'any' }
 		.function { 'function' }
 		.interface_ { 'interface' }
-		.ustring { 'ustring' }
 		.generic_struct_inst { 'generic_struct_inst' }
 		.rune { 'rune' }
 		.aggregate { 'aggregate' }
--- a/vlib/v/checker/checker.v
+++ b/vlib/v/checker/checker.v
@ -6658,9 +6658,9 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty
 	// println('index expr left=$typ_sym.name $node.pos.line_nr')
 	// if typ_sym.kind == .array && (!(ast.type_idx(index_type) in ast.number_type_idxs) &&
 	// index_type_sym.kind != .enum_) {
-	if typ_sym.kind in [.array, .array_fixed, .string, .ustring] {
+	if typ_sym.kind in [.array, .array_fixed, .string] {
 		if !(index_type.is_int() || index_type_sym.kind == .enum_) {
-			type_str := if typ_sym.kind in [.string, .ustring] {
+			type_str := if typ_sym.kind == .string {
 				'non-integer string index `$index_type_sym.name`'
 			} else {
 				'non-integer index `$index_type_sym.name` (array type `$typ_sym.name`)'
@ -6679,7 +6679,7 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty
 			}
 		}
 		if index_type.has_flag(.optional) {
-			type_str := if typ_sym.kind in [.string, .ustring] {
+			type_str := if typ_sym.kind == .string {
 				'(type `$typ_sym.name`)'
 			} else {
 				'(array type `$typ_sym.name`)'
--- a/vlib/v/gen/c/cgen.v
+++ b/vlib/v/gen/c/cgen.v
@ -5704,7 +5704,7 @@ fn (mut g Gen) type_default(typ_ ast.Type) string {
 				for field in info.fields {
 					field_sym := g.table.get_type_symbol(field.typ)
 					if field.has_default_expr
-						|| field_sym.kind in [.array, .map, .string, .ustring, .bool, .alias, .size_t, .i8, .i16, .int, .i64, .byte, .u16, .u32, .u64, .char, .voidptr, .byteptr, .charptr, .struct_] {
+						|| field_sym.kind in [.array, .map, .string, .bool, .alias, .size_t, .i8, .i16, .int, .i64, .byte, .u16, .u32, .u64, .char, .voidptr, .byteptr, .charptr, .struct_] {
 						field_name := c_name(field.name)
 						if field.has_default_expr {
 							expr_str := g.expr_string(field.default_expr)
--- a/vlib/v/gen/js/builtin_types.v
+++ b/vlib/v/gen/js/builtin_types.v
@ -136,7 +136,7 @@ pub fn (mut g JsGen) typ(t ast.Type) string {
 		.none_ {
 			styp = 'undefined'
 		}
-		.string, .ustring, .char {
+		.string, .char {
 			styp = '${g.sym_to_js_typ(sym)}'
 		}
 		// 'array_array_int' => 'number[][]'
--- a/vlib/v/markused/markused.v
+++ b/vlib/v/markused/markused.v
@ -72,39 +72,31 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []&ast.F
 		'18.le',
 		'18.ge',
 		'fast_string_eq',
-		// ustring. ==, !=, etc...
-		'19.eq',
-		'19.ne',
-		'19.lt',
-		'19.gt',
-		'19.le',
-		'19.ge',
-		'19.add',
 		// other array methods
-		'21.get',
-		'21.set',
-		'21.get_unsafe',
-		'21.set_unsafe',
-		'21.get_with_check' /* used for `x := a[i] or {}` */,
-		'21.clone_static_to_depth',
-		'21.clone_to_depth',
-		'21.first',
-		'21.last',
-		'21.pointers' /* TODO: handle generic methods calling array primitives more precisely in pool_test.v */,
-		'21.reverse',
-		'21.repeat_to_depth',
-		'21.slice',
-		'21.slice2',
+		'20.get',
+		'20.set',
+		'20.get_unsafe',
+		'20.set_unsafe',
+		'20.get_with_check' /* used for `x := a[i] or {}` */,
+		'20.clone_static_to_depth',
+		'20.clone_to_depth',
+		'20.first',
+		'20.last',
+		'20.pointers' /* TODO: handle generic methods calling array primitives more precisely in pool_test.v */,
+		'20.reverse',
+		'20.repeat_to_depth',
+		'20.slice',
+		'20.slice2',
 		'59.get',
 		'59.set',
-		'65557.last',
-		'65557.pop',
-		'65557.push',
-		'65557.insert_many',
-		'65557.prepend_many',
-		'65557.reverse',
-		'65557.set',
-		'65557.set_unsafe',
+		'65556.last',
+		'65556.pop',
+		'65556.push',
+		'65556.insert_many',
+		'65556.prepend_many',
+		'65556.reverse',
+		'65556.set',
+		'65556.set_unsafe',
 		// TODO: process the _vinit const initializations automatically too
 		'json__decode_string',
 		'os.getwd',