From 0f9537ece544b7fda31cadf4dc95fd4b552f94be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=A4schle?= Date: Sat, 3 Jul 2021 19:14:09 +0200 Subject: [PATCH] all: remove ustring (#10630) --- ROADMAP.md | 4 +- vlib/builtin/string.v | 211 +--------------------------- vlib/builtin/string_test.v | 70 ++------- vlib/encoding/utf8/utf8_util.v | 17 --- vlib/encoding/utf8/utf8_util_test.v | 11 -- vlib/v/ast/types.v | 35 ++--- vlib/v/checker/checker.v | 6 +- vlib/v/gen/c/cgen.v | 2 +- vlib/v/gen/js/builtin_types.v | 2 +- vlib/v/markused/markused.v | 52 +++---- 10 files changed, 56 insertions(+), 354 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index efdac05d62..101d5ed796 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,7 +8,7 @@ - [ ] [C2V translator](https://github.com/vlang/v/issues/6985) - [ ] doom.v - [x] rune type -- [ ] replace `ustring` with `[]rune` +- [x] replace `ustring` with `[]rune` - [x] fix `byte.str()` - [x] maps with non-string keys - [x] iOS/Android support @@ -25,4 +25,4 @@ - [ ] merge v.c and v_win.c - [ ] more advanced errors, not just `error('message')` - [ ] VLS for autocomplete, refactoring, go to definition etc -- [ ] Recursive structs via optionals: `struct Node { next ?Node }` \ No newline at end of file +- [ ] Recursive structs via optionals: `struct Node { next ?Node }` diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 85434f1bb4..108394545d 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -49,19 +49,6 @@ mut: is_lit int } -// NB string.is_lit is an enumeration of the following: -// .is_lit == 0 => a fresh string, should be freed by autofree -// .is_lit == 1 => a literal string from .rodata, should NOT be freed -// .is_lit == -98761234 => already freed string, protects against double frees. -// ---------> ^^^^^^^^^ calling free on these is a bug. -// Any other value means that the string has been corrupted. -pub struct ustring { -pub mut: - s string - runes []int - len int -} - // vstrlen returns the V length of the C string `s` (0 terminator is not counted). [unsafe] pub fn vstrlen(s &byte) int { @@ -1257,177 +1244,6 @@ pub fn (s string) str() string { return s.clone() } -// str returns the string itself. -pub fn (s ustring) str() string { - return s.s -} - -// ustring converts the string to a unicode string. -pub fn (s string) ustring() ustring { - mut res := ustring{ - s: s // runes will have at least s.len elements, save reallocations - // TODO use VLA for small strings? - } - $if gcboehm_opt ? { - res.runes = __new_array_noscan(0, s.len, int(sizeof(int))) - } $else { - res.runes = __new_array(0, s.len, int(sizeof(int))) - } - for i := 0; i < s.len; i++ { - char_len := utf8_char_len(unsafe { s.str[i] }) - res.runes << i - i += char_len - 1 - res.len++ - } - return res -} - -// A hack that allows to create ustring without allocations. -// It's called from functions like draw_text() where we know that the string is going to be freed -// right away. Uses global buffer for storing runes []int array. -__global ( - g_ustring_runes []int -) - -pub fn (s string) ustring_tmp() ustring { - if g_ustring_runes.len == 0 { - $if gcboehm_opt ? { - g_ustring_runes = __new_array_noscan(0, 128, int(sizeof(int))) - } $else { - g_ustring_runes = __new_array(0, 128, int(sizeof(int))) - } - } - mut res := ustring{ - s: s - } - res.runes = g_ustring_runes - res.runes.len = s.len - mut j := 0 - for i := 0; i < s.len; i++ { - char_len := utf8_char_len(unsafe { s.str[i] }) - res.runes[j] = i - j++ - i += char_len - 1 - res.len++ - } - return res -} - -fn (u ustring) == (a ustring) bool { - return u.s == a.s -} - -fn (u ustring) < (a ustring) bool { - return u.s < a.s -} - -fn (u ustring) + (a ustring) ustring { - mut res := ustring{ - s: u.s + a.s - } - $if gcboehm_opt ? { - res.runes = __new_array_noscan(0, u.s.len + a.s.len, int(sizeof(int))) - } $else { - res.runes = __new_array(0, u.s.len + a.s.len, int(sizeof(int))) - } - mut j := 0 - for i := 0; i < u.s.len; i++ { - char_len := utf8_char_len(unsafe { u.s.str[i] }) - res.runes << j - i += char_len - 1 - j += char_len - res.len++ - } - for i := 0; i < a.s.len; i++ { - char_len := utf8_char_len(unsafe { a.s.str[i] }) - res.runes << j - i += char_len - 1 - j += char_len - res.len++ - } - return res -} - -// index_after returns the position of the input string, starting search from `start` position. -pub fn (u ustring) index_after(p ustring, start int) int { - if p.len > u.len { - return -1 - } - mut strt := start - if start < 0 { - strt = 0 - } - if start > u.len { - return -1 - } - mut i := strt - for i < u.len { - mut j := 0 - mut ii := i - for j < p.len && u.at(ii) == p.at(j) { - j++ - ii++ - } - if j == p.len { - return i - } - i++ - } - return -1 -} - -// count returns the number of occurrences of `substr` in the string. -// count returns -1 if no `substr` could be found. -pub fn (u ustring) count(substr ustring) int { - if u.len == 0 || substr.len == 0 { - return 0 - } - if substr.len > u.len { - return 0 - } - mut n := 0 - mut i := 0 - for { - i = u.index_after(substr, i) - if i == -1 { - return n - } - i += substr.len - n++ - } - return 0 // TODO can never get here - v doesn't know that -} - -// substr returns the string between index positions `_start` and `_end`. -// Example: assert 'ABCD'.substr(1,3) == 'BC' -pub fn (u ustring) substr(_start int, _end int) string { - $if !no_bounds_checking ? { - if _start > _end || _start > u.len || _end > u.len || _start < 0 || _end < 0 { - panic('substr($_start, $_end) out of bounds (len=$u.len)') - } - } - end := if _end >= u.len { u.s.len } else { u.runes[_end] } - return u.s.substr(u.runes[_start], end) -} - -// left returns the `n`th leftmost characters of the ustring. -// Example: assert 'hello'.left(2) == 'he' -pub fn (u ustring) left(pos int) string { - if pos >= u.len { - return u.s - } - return u.substr(0, pos) -} - -// right returns the `n`th rightmost characters of the ustring. -// Example: assert 'hello'.right(2) == 'lo' -pub fn (u ustring) right(pos int) string { - if pos >= u.len { - return '' - } - return u.substr(pos, u.len) -} - // at returns the byte at index `idx`. // Example: assert 'ABC'.at(1) == byte(`B`) fn (s string) at(idx int) byte { @@ -1441,29 +1257,6 @@ fn (s string) at(idx int) byte { } } -// at returns the string at index `idx`. -// Example: assert 'ABC'.at(1) == 'B' -pub fn (u ustring) at(idx int) string { - $if !no_bounds_checking ? { - if idx < 0 || idx >= u.len { - panic('string index out of range: $idx / $u.runes.len') - } - } - return u.substr(idx, idx + 1) -} - -// free allows for manually freeing the memory occupied by the unicode string. -[unsafe] -fn (u &ustring) free() { - $if prealloc { - return - } - unsafe { - u.runes.free() - u.s.free() - } -} - // is_space returns `true` if the byte is a white space character. // The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0 // Example: assert byte(` `).is_space() == true @@ -1689,11 +1482,11 @@ pub fn (s string) reverse() string { // 'hello'.limit(2) => 'he' // 'hi'.limit(10) => 'hi' pub fn (s string) limit(max int) string { - u := s.ustring() + u := s.runes() if u.len <= max { return s.clone() } - return u.substr(0, max) + return u[0..max].string() } // hash returns an integer hash of the string. diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index 64a46abe4f..8a5ee6659b 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -361,7 +361,7 @@ fn test_runes() { assert s.len == 12 s2 := 'privet' assert s2.len == 6 - u := s.ustring() + u := s.runes() assert u.len == 6 assert s2.substr(1, 4).len == 3 assert s2.substr(1, 4) == 'riv' @@ -371,30 +371,16 @@ fn test_runes() { assert s2[..4] == 'priv' assert s2[2..].len == 4 assert s2[2..] == 'ivet' - assert u.substr(1, 4).len == 6 - assert u.substr(1, 4) == 'рив' + assert u[1..4].string().len == 6 + assert u[1..4].string() == 'рив' assert s2.substr(1, 2) == 'r' - assert u.substr(1, 2) == 'р' - assert s2.ustring().at(1) == 'r' - assert u.at(1) == 'р' - first := u.at(0) - last := u.at(u.len - 1) - assert first.len == 2 - assert last.len == 2 -} - -fn test_left_right() { - s := 'ALOHA' - assert s[..3] == 'ALO' - assert s[..0] == '' - assert s[..5] == s - assert s[3..] == 'HA' - // assert s.right(6) == '' - u := s.ustring() - assert u.left(3) == 'ALO' - assert u.left(0) == '' - assert u.right(3) == 'HA' - assert u.right(6) == '' + assert u[1..2].string() == 'р' + assert s2.runes()[1] == `r` + assert u[1] == `р` + first := u[0] + last := u[u.len - 1] + assert first.str().len == 2 + assert last.str().len == 2 } fn test_contains() { @@ -672,42 +658,6 @@ fn test_quote() { assert a.str() == "'" } -fn test_ustring_comparisons() { - /* - QTODO - assert ('h€llô !'.ustring() == 'h€llô !'.ustring()) == true - assert ('h€llô !'.ustring() == 'h€llô'.ustring()) == false - assert ('h€llô !'.ustring() == 'h€llo !'.ustring()) == false - - assert ('h€llô !'.ustring() != 'h€llô !'.ustring()) == false - assert ('h€llô !'.ustring() != 'h€llô'.ustring()) == true - - assert ('h€llô'.ustring() < 'h€llô!'.ustring()) == true - assert ('h€llô'.ustring() < 'h€llo'.ustring()) == false - assert ('h€llo'.ustring() < 'h€llô'.ustring()) == true - - assert ('h€llô'.ustring() <= 'h€llô!'.ustring()) == true - assert ('h€llô'.ustring() <= 'h€llô'.ustring()) == true - assert ('h€llô!'.ustring() <= 'h€llô'.ustring()) == false - - assert ('h€llô!'.ustring() > 'h€llô'.ustring()) == true - assert ('h€llô'.ustring() > 'h€llô'.ustring()) == false - - assert ('h€llô!'.ustring() >= 'h€llô'.ustring()) == true - assert ('h€llô'.ustring() >= 'h€llô'.ustring()) == true - assert ('h€llô'.ustring() >= 'h€llô!'.ustring()) == false - */ -} - -fn test_ustring_count() { - a := 'h€llôﷰ h€llô ﷰ'.ustring() - assert (a.count('l'.ustring())) == 4 - assert (a.count('€'.ustring())) == 2 - assert (a.count('h€llô'.ustring())) == 2 - assert (a.count('ﷰ'.ustring())) == 2 - assert (a.count('a'.ustring())) == 0 -} - fn test_limit() { s := 'hello' assert s.limit(2) == 'he' diff --git a/vlib/encoding/utf8/utf8_util.v b/vlib/encoding/utf8/utf8_util.v index bfdc99092c..8ab736634b 100644 --- a/vlib/encoding/utf8/utf8_util.v +++ b/vlib/encoding/utf8/utf8_util.v @@ -33,11 +33,6 @@ pub fn len(s string) int { return count } -// u_len return the length as number of unicode chars from a ustring -pub fn u_len(s ustring) int { - return len(s.s) -} - // char_len calculate the length in bytes of a utf8 char [deprecated: 'use builtin utf8_char_len'] pub fn char_len(b byte) int { @@ -134,23 +129,11 @@ pub fn to_upper(s string) string { return up_low(s, true) } -// u_to_upper return an uppercase string from a ustring -pub fn u_to_upper(s ustring) ustring { - tmp := up_low(s.s, true) - return tmp.ustring() -} - // to_lower return an lowercase string from a string pub fn to_lower(s string) string { return up_low(s, false) } -// u_to_lower return an lowercase string from a ustring -pub fn u_to_lower(s ustring) ustring { - tmp := up_low(s.s, false) - return tmp.ustring() -} - /* Punctuation functions diff --git a/vlib/encoding/utf8/utf8_util_test.v b/vlib/encoding/utf8/utf8_util_test.v index 1724604889..f09cb76bbb 100644 --- a/vlib/encoding/utf8/utf8_util_test.v +++ b/vlib/encoding/utf8/utf8_util_test.v @@ -13,22 +13,11 @@ fn test_utf8_util() { assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{' assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{' - // ustring test - src1 := src.ustring() - upper1 := utf8.u_to_upper(src1) - lower1 := utf8.u_to_lower(src1) - assert upper1 == (src_upper.ustring()) - assert lower1 == (src_lower.ustring()) - // test len function assert utf8.len('') == 0 assert utf8.len('pippo') == 5 assert utf8.len(src) == 15 // 29 assert src.len == 24 // 49 - // test u_len function - assert utf8.u_len(''.ustring()) == 0 - assert utf8.u_len(src1) == 15 // 29 - assert utf8.u_len('pippo'.ustring()) == 5 // western punctuation a := '.abc?abcòàè.' diff --git a/vlib/v/ast/types.v b/vlib/v/ast/types.v index 410a009606..ec5ecc5ea7 100644 --- a/vlib/v/ast/types.v +++ b/vlib/v/ast/types.v @@ -353,17 +353,16 @@ pub const ( bool_type_idx = 16 none_type_idx = 17 string_type_idx = 18 - ustring_type_idx = 19 - rune_type_idx = 20 - array_type_idx = 21 - map_type_idx = 22 - chan_type_idx = 23 - size_t_type_idx = 24 - any_type_idx = 25 - float_literal_type_idx = 26 - int_literal_type_idx = 27 - thread_type_idx = 28 - error_type_idx = 29 + rune_type_idx = 19 + array_type_idx = 20 + map_type_idx = 21 + chan_type_idx = 22 + size_t_type_idx = 23 + any_type_idx = 24 + float_literal_type_idx = 25 + int_literal_type_idx = 26 + thread_type_idx = 27 + error_type_idx = 28 ) pub const ( @@ -376,7 +375,7 @@ pub const ( byte_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, f32_type_idx, f64_type_idx, int_literal_type_idx, float_literal_type_idx, rune_type_idx] pointer_type_idxs = [voidptr_type_idx, byteptr_type_idx, charptr_type_idx] - string_type_idxs = [string_type_idx, ustring_type_idx] + string_type_idxs = [string_type_idx] ) pub const ( @@ -399,7 +398,6 @@ pub const ( bool_type = new_type(bool_type_idx) none_type = new_type(none_type_idx) string_type = new_type(string_type_idx) - ustring_type = new_type(ustring_type_idx) rune_type = new_type(rune_type_idx) array_type = new_type(array_type_idx) map_type = new_type(map_type_idx) @@ -425,9 +423,9 @@ pub fn merge_types(params ...[]Type) []Type { pub const ( builtin_type_names = ['void', 'voidptr', 'charptr', 'byteptr', 'i8', 'i16', 'int', 'i64', 'u16', - 'u32', 'u64', 'int_literal', 'f32', 'f64', 'float_literal', 'string', 'ustring', 'char', - 'byte', 'bool', 'none', 'array', 'array_fixed', 'map', 'chan', 'any', 'struct', 'mapnode', - 'size_t', 'rune', 'thread', 'Error'] + 'u32', 'u64', 'int_literal', 'f32', 'f64', 'float_literal', 'string', 'char', 'byte', 'bool', + 'none', 'array', 'array_fixed', 'map', 'chan', 'any', 'struct', 'mapnode', 'size_t', 'rune', + 'thread', 'Error'] ) pub struct MultiReturn { @@ -472,7 +470,6 @@ pub enum Kind { bool none_ string - ustring array array_fixed map @@ -604,7 +601,6 @@ pub fn (mut t Table) register_builtin_type_symbols() { t.register_type_symbol(kind: .bool, name: 'bool', cname: 'bool', mod: 'builtin') t.register_type_symbol(kind: .none_, name: 'none', cname: 'none', mod: 'builtin') t.register_type_symbol(kind: .string, name: 'string', cname: 'string', mod: 'builtin') - t.register_type_symbol(kind: .ustring, name: 'ustring', cname: 'ustring', mod: 'builtin') t.register_type_symbol(kind: .rune, name: 'rune', cname: 'rune', mod: 'builtin') t.register_type_symbol(kind: .array, name: 'array', cname: 'array', mod: 'builtin') t.register_type_symbol(kind: .map, name: 'map', cname: 'map', mod: 'builtin') @@ -656,7 +652,7 @@ pub fn (t &TypeSymbol) is_float() bool { [inline] pub fn (t &TypeSymbol) is_string() bool { - return t.kind in [.string, .ustring] + return t.kind == .string } [inline] @@ -711,7 +707,6 @@ pub fn (k Kind) str() string { .any { 'any' } .function { 'function' } .interface_ { 'interface' } - .ustring { 'ustring' } .generic_struct_inst { 'generic_struct_inst' } .rune { 'rune' } .aggregate { 'aggregate' } diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v index 2e45b5d2b5..52b0bff691 100644 --- a/vlib/v/checker/checker.v +++ b/vlib/v/checker/checker.v @@ -6658,9 +6658,9 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty // println('index expr left=$typ_sym.name $node.pos.line_nr') // if typ_sym.kind == .array && (!(ast.type_idx(index_type) in ast.number_type_idxs) && // index_type_sym.kind != .enum_) { - if typ_sym.kind in [.array, .array_fixed, .string, .ustring] { + if typ_sym.kind in [.array, .array_fixed, .string] { if !(index_type.is_int() || index_type_sym.kind == .enum_) { - type_str := if typ_sym.kind in [.string, .ustring] { + type_str := if typ_sym.kind == .string { 'non-integer string index `$index_type_sym.name`' } else { 'non-integer index `$index_type_sym.name` (array type `$typ_sym.name`)' @@ -6679,7 +6679,7 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty } } if index_type.has_flag(.optional) { - type_str := if typ_sym.kind in [.string, .ustring] { + type_str := if typ_sym.kind == .string { '(type `$typ_sym.name`)' } else { '(array type `$typ_sym.name`)' diff --git a/vlib/v/gen/c/cgen.v b/vlib/v/gen/c/cgen.v index 1832bd604d..81f588993e 100644 --- a/vlib/v/gen/c/cgen.v +++ b/vlib/v/gen/c/cgen.v @@ -5704,7 +5704,7 @@ fn (mut g Gen) type_default(typ_ ast.Type) string { for field in info.fields { field_sym := g.table.get_type_symbol(field.typ) if field.has_default_expr - || field_sym.kind in [.array, .map, .string, .ustring, .bool, .alias, .size_t, .i8, .i16, .int, .i64, .byte, .u16, .u32, .u64, .char, .voidptr, .byteptr, .charptr, .struct_] { + || field_sym.kind in [.array, .map, .string, .bool, .alias, .size_t, .i8, .i16, .int, .i64, .byte, .u16, .u32, .u64, .char, .voidptr, .byteptr, .charptr, .struct_] { field_name := c_name(field.name) if field.has_default_expr { expr_str := g.expr_string(field.default_expr) diff --git a/vlib/v/gen/js/builtin_types.v b/vlib/v/gen/js/builtin_types.v index 4576919682..9b84abd1f9 100644 --- a/vlib/v/gen/js/builtin_types.v +++ b/vlib/v/gen/js/builtin_types.v @@ -136,7 +136,7 @@ pub fn (mut g JsGen) typ(t ast.Type) string { .none_ { styp = 'undefined' } - .string, .ustring, .char { + .string, .char { styp = '${g.sym_to_js_typ(sym)}' } // 'array_array_int' => 'number[][]' diff --git a/vlib/v/markused/markused.v b/vlib/v/markused/markused.v index 3bb1922280..d2f5325248 100644 --- a/vlib/v/markused/markused.v +++ b/vlib/v/markused/markused.v @@ -72,39 +72,31 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []&ast.F '18.le', '18.ge', 'fast_string_eq', - // ustring. ==, !=, etc... - '19.eq', - '19.ne', - '19.lt', - '19.gt', - '19.le', - '19.ge', - '19.add', // other array methods - '21.get', - '21.set', - '21.get_unsafe', - '21.set_unsafe', - '21.get_with_check' /* used for `x := a[i] or {}` */, - '21.clone_static_to_depth', - '21.clone_to_depth', - '21.first', - '21.last', - '21.pointers' /* TODO: handle generic methods calling array primitives more precisely in pool_test.v */, - '21.reverse', - '21.repeat_to_depth', - '21.slice', - '21.slice2', + '20.get', + '20.set', + '20.get_unsafe', + '20.set_unsafe', + '20.get_with_check' /* used for `x := a[i] or {}` */, + '20.clone_static_to_depth', + '20.clone_to_depth', + '20.first', + '20.last', + '20.pointers' /* TODO: handle generic methods calling array primitives more precisely in pool_test.v */, + '20.reverse', + '20.repeat_to_depth', + '20.slice', + '20.slice2', '59.get', '59.set', - '65557.last', - '65557.pop', - '65557.push', - '65557.insert_many', - '65557.prepend_many', - '65557.reverse', - '65557.set', - '65557.set_unsafe', + '65556.last', + '65556.pop', + '65556.push', + '65556.insert_many', + '65556.prepend_many', + '65556.reverse', + '65556.set', + '65556.set_unsafe', // TODO: process the _vinit const initializations automatically too 'json__decode_string', 'os.getwd',