all: remove ustring (#10630)

pull/10661/head
Daniel Däschle 2021-07-03 19:14:09 +02:00 committed by GitHub
parent 5a4a1997e7
commit 0f9537ece5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 56 additions and 354 deletions

View File

@ -8,7 +8,7 @@
- [ ] [C2V translator](https://github.com/vlang/v/issues/6985)
- [ ] doom.v
- [x] rune type
- [ ] replace `ustring` with `[]rune`
- [x] replace `ustring` with `[]rune`
- [x] fix `byte.str()`
- [x] maps with non-string keys
- [x] iOS/Android support
@ -25,4 +25,4 @@
- [ ] merge v.c and v_win.c
- [ ] more advanced errors, not just `error('message')`
- [ ] VLS for autocomplete, refactoring, go to definition etc
- [ ] Recursive structs via optionals: `struct Node { next ?Node }`
- [ ] Recursive structs via optionals: `struct Node { next ?Node }`

View File

@ -49,19 +49,6 @@ mut:
is_lit int
}
// NB string.is_lit is an enumeration of the following:
// .is_lit == 0 => a fresh string, should be freed by autofree
// .is_lit == 1 => a literal string from .rodata, should NOT be freed
// .is_lit == -98761234 => already freed string, protects against double frees.
// ---------> ^^^^^^^^^ calling free on these is a bug.
// Any other value means that the string has been corrupted.
pub struct ustring {
pub mut:
s string
runes []int
len int
}
// vstrlen returns the V length of the C string `s` (0 terminator is not counted).
[unsafe]
pub fn vstrlen(s &byte) int {
@ -1257,177 +1244,6 @@ pub fn (s string) str() string {
return s.clone()
}
// str returns the string itself.
pub fn (s ustring) str() string {
return s.s
}
// ustring converts the string to a unicode string.
pub fn (s string) ustring() ustring {
mut res := ustring{
s: s // runes will have at least s.len elements, save reallocations
// TODO use VLA for small strings?
}
$if gcboehm_opt ? {
res.runes = __new_array_noscan(0, s.len, int(sizeof(int)))
} $else {
res.runes = __new_array(0, s.len, int(sizeof(int)))
}
for i := 0; i < s.len; i++ {
char_len := utf8_char_len(unsafe { s.str[i] })
res.runes << i
i += char_len - 1
res.len++
}
return res
}
// A hack that allows to create ustring without allocations.
// It's called from functions like draw_text() where we know that the string is going to be freed
// right away. Uses global buffer for storing runes []int array.
__global (
g_ustring_runes []int
)
pub fn (s string) ustring_tmp() ustring {
if g_ustring_runes.len == 0 {
$if gcboehm_opt ? {
g_ustring_runes = __new_array_noscan(0, 128, int(sizeof(int)))
} $else {
g_ustring_runes = __new_array(0, 128, int(sizeof(int)))
}
}
mut res := ustring{
s: s
}
res.runes = g_ustring_runes
res.runes.len = s.len
mut j := 0
for i := 0; i < s.len; i++ {
char_len := utf8_char_len(unsafe { s.str[i] })
res.runes[j] = i
j++
i += char_len - 1
res.len++
}
return res
}
fn (u ustring) == (a ustring) bool {
return u.s == a.s
}
fn (u ustring) < (a ustring) bool {
return u.s < a.s
}
fn (u ustring) + (a ustring) ustring {
mut res := ustring{
s: u.s + a.s
}
$if gcboehm_opt ? {
res.runes = __new_array_noscan(0, u.s.len + a.s.len, int(sizeof(int)))
} $else {
res.runes = __new_array(0, u.s.len + a.s.len, int(sizeof(int)))
}
mut j := 0
for i := 0; i < u.s.len; i++ {
char_len := utf8_char_len(unsafe { u.s.str[i] })
res.runes << j
i += char_len - 1
j += char_len
res.len++
}
for i := 0; i < a.s.len; i++ {
char_len := utf8_char_len(unsafe { a.s.str[i] })
res.runes << j
i += char_len - 1
j += char_len
res.len++
}
return res
}
// index_after returns the position of the input string, starting search from `start` position.
pub fn (u ustring) index_after(p ustring, start int) int {
if p.len > u.len {
return -1
}
mut strt := start
if start < 0 {
strt = 0
}
if start > u.len {
return -1
}
mut i := strt
for i < u.len {
mut j := 0
mut ii := i
for j < p.len && u.at(ii) == p.at(j) {
j++
ii++
}
if j == p.len {
return i
}
i++
}
return -1
}
// count returns the number of occurrences of `substr` in the string.
// count returns -1 if no `substr` could be found.
pub fn (u ustring) count(substr ustring) int {
if u.len == 0 || substr.len == 0 {
return 0
}
if substr.len > u.len {
return 0
}
mut n := 0
mut i := 0
for {
i = u.index_after(substr, i)
if i == -1 {
return n
}
i += substr.len
n++
}
return 0 // TODO can never get here - v doesn't know that
}
// substr returns the string between index positions `_start` and `_end`.
// Example: assert 'ABCD'.substr(1,3) == 'BC'
pub fn (u ustring) substr(_start int, _end int) string {
$if !no_bounds_checking ? {
if _start > _end || _start > u.len || _end > u.len || _start < 0 || _end < 0 {
panic('substr($_start, $_end) out of bounds (len=$u.len)')
}
}
end := if _end >= u.len { u.s.len } else { u.runes[_end] }
return u.s.substr(u.runes[_start], end)
}
// left returns the `n`th leftmost characters of the ustring.
// Example: assert 'hello'.left(2) == 'he'
pub fn (u ustring) left(pos int) string {
if pos >= u.len {
return u.s
}
return u.substr(0, pos)
}
// right returns the `n`th rightmost characters of the ustring.
// Example: assert 'hello'.right(2) == 'lo'
pub fn (u ustring) right(pos int) string {
if pos >= u.len {
return ''
}
return u.substr(pos, u.len)
}
// at returns the byte at index `idx`.
// Example: assert 'ABC'.at(1) == byte(`B`)
fn (s string) at(idx int) byte {
@ -1441,29 +1257,6 @@ fn (s string) at(idx int) byte {
}
}
// at returns the string at index `idx`.
// Example: assert 'ABC'.at(1) == 'B'
pub fn (u ustring) at(idx int) string {
$if !no_bounds_checking ? {
if idx < 0 || idx >= u.len {
panic('string index out of range: $idx / $u.runes.len')
}
}
return u.substr(idx, idx + 1)
}
// free allows for manually freeing the memory occupied by the unicode string.
[unsafe]
fn (u &ustring) free() {
$if prealloc {
return
}
unsafe {
u.runes.free()
u.s.free()
}
}
// is_space returns `true` if the byte is a white space character.
// The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0
// Example: assert byte(` `).is_space() == true
@ -1689,11 +1482,11 @@ pub fn (s string) reverse() string {
// 'hello'.limit(2) => 'he'
// 'hi'.limit(10) => 'hi'
pub fn (s string) limit(max int) string {
u := s.ustring()
u := s.runes()
if u.len <= max {
return s.clone()
}
return u.substr(0, max)
return u[0..max].string()
}
// hash returns an integer hash of the string.

View File

@ -361,7 +361,7 @@ fn test_runes() {
assert s.len == 12
s2 := 'privet'
assert s2.len == 6
u := s.ustring()
u := s.runes()
assert u.len == 6
assert s2.substr(1, 4).len == 3
assert s2.substr(1, 4) == 'riv'
@ -371,30 +371,16 @@ fn test_runes() {
assert s2[..4] == 'priv'
assert s2[2..].len == 4
assert s2[2..] == 'ivet'
assert u.substr(1, 4).len == 6
assert u.substr(1, 4) == 'рив'
assert u[1..4].string().len == 6
assert u[1..4].string() == 'рив'
assert s2.substr(1, 2) == 'r'
assert u.substr(1, 2) == 'р'
assert s2.ustring().at(1) == 'r'
assert u.at(1) == 'р'
first := u.at(0)
last := u.at(u.len - 1)
assert first.len == 2
assert last.len == 2
}
fn test_left_right() {
s := 'ALOHA'
assert s[..3] == 'ALO'
assert s[..0] == ''
assert s[..5] == s
assert s[3..] == 'HA'
// assert s.right(6) == ''
u := s.ustring()
assert u.left(3) == 'ALO'
assert u.left(0) == ''
assert u.right(3) == 'HA'
assert u.right(6) == ''
assert u[1..2].string() == 'р'
assert s2.runes()[1] == `r`
assert u[1] == `р`
first := u[0]
last := u[u.len - 1]
assert first.str().len == 2
assert last.str().len == 2
}
fn test_contains() {
@ -672,42 +658,6 @@ fn test_quote() {
assert a.str() == "'"
}
fn test_ustring_comparisons() {
/*
QTODO
assert ('hllô !'.ustring() == 'hllô !'.ustring()) == true
assert ('hllô !'.ustring() == 'hllô'.ustring()) == false
assert ('hllô !'.ustring() == 'hllo !'.ustring()) == false
assert ('hllô !'.ustring() != 'hllô !'.ustring()) == false
assert ('hllô !'.ustring() != 'hllô'.ustring()) == true
assert ('hllô'.ustring() < 'hllô!'.ustring()) == true
assert ('hllô'.ustring() < 'hllo'.ustring()) == false
assert ('hllo'.ustring() < 'hllô'.ustring()) == true
assert ('hllô'.ustring() <= 'hllô!'.ustring()) == true
assert ('hllô'.ustring() <= 'hllô'.ustring()) == true
assert ('hllô!'.ustring() <= 'hllô'.ustring()) == false
assert ('hllô!'.ustring() > 'hllô'.ustring()) == true
assert ('hllô'.ustring() > 'hllô'.ustring()) == false
assert ('hllô!'.ustring() >= 'hllô'.ustring()) == true
assert ('hllô'.ustring() >= 'hllô'.ustring()) == true
assert ('hllô'.ustring() >= 'hllô!'.ustring()) == false
*/
}
fn test_ustring_count() {
a := 'hllô hllô '.ustring()
assert (a.count('l'.ustring())) == 4
assert (a.count(''.ustring())) == 2
assert (a.count('hllô'.ustring())) == 2
assert (a.count(''.ustring())) == 2
assert (a.count('a'.ustring())) == 0
}
fn test_limit() {
s := 'hello'
assert s.limit(2) == 'he'

View File

@ -33,11 +33,6 @@ pub fn len(s string) int {
return count
}
// u_len return the length as number of unicode chars from a ustring
pub fn u_len(s ustring) int {
return len(s.s)
}
// char_len calculate the length in bytes of a utf8 char
[deprecated: 'use builtin utf8_char_len']
pub fn char_len(b byte) int {
@ -134,23 +129,11 @@ pub fn to_upper(s string) string {
return up_low(s, true)
}
// u_to_upper return an uppercase string from a ustring
pub fn u_to_upper(s ustring) ustring {
tmp := up_low(s.s, true)
return tmp.ustring()
}
// to_lower return an lowercase string from a string
pub fn to_lower(s string) string {
return up_low(s, false)
}
// u_to_lower return an lowercase string from a ustring
pub fn u_to_lower(s ustring) ustring {
tmp := up_low(s.s, false)
return tmp.ustring()
}
/*
Punctuation functions

View File

@ -13,22 +13,11 @@ fn test_utf8_util() {
assert utf8.to_upper('абвёabc12') == 'АБВЁABC12'
assert utf8.to_lower('АБВЁABC12') == 'абвёabc12'
// ustring test
src1 := src.ustring()
upper1 := utf8.u_to_upper(src1)
lower1 := utf8.u_to_lower(src1)
assert upper1 == (src_upper.ustring())
assert lower1 == (src_lower.ustring())
// test len function
assert utf8.len('') == 0
assert utf8.len('pippo') == 5
assert utf8.len(src) == 15 // 29
assert src.len == 24 // 49
// test u_len function
assert utf8.u_len(''.ustring()) == 0
assert utf8.u_len(src1) == 15 // 29
assert utf8.u_len('pippo'.ustring()) == 5
// western punctuation
a := '.abc?abcòàè.'

View File

@ -353,17 +353,16 @@ pub const (
bool_type_idx = 16
none_type_idx = 17
string_type_idx = 18
ustring_type_idx = 19
rune_type_idx = 20
array_type_idx = 21
map_type_idx = 22
chan_type_idx = 23
size_t_type_idx = 24
any_type_idx = 25
float_literal_type_idx = 26
int_literal_type_idx = 27
thread_type_idx = 28
error_type_idx = 29
rune_type_idx = 19
array_type_idx = 20
map_type_idx = 21
chan_type_idx = 22
size_t_type_idx = 23
any_type_idx = 24
float_literal_type_idx = 25
int_literal_type_idx = 26
thread_type_idx = 27
error_type_idx = 28
)
pub const (
@ -376,7 +375,7 @@ pub const (
byte_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, f32_type_idx, f64_type_idx,
int_literal_type_idx, float_literal_type_idx, rune_type_idx]
pointer_type_idxs = [voidptr_type_idx, byteptr_type_idx, charptr_type_idx]
string_type_idxs = [string_type_idx, ustring_type_idx]
string_type_idxs = [string_type_idx]
)
pub const (
@ -399,7 +398,6 @@ pub const (
bool_type = new_type(bool_type_idx)
none_type = new_type(none_type_idx)
string_type = new_type(string_type_idx)
ustring_type = new_type(ustring_type_idx)
rune_type = new_type(rune_type_idx)
array_type = new_type(array_type_idx)
map_type = new_type(map_type_idx)
@ -425,9 +423,9 @@ pub fn merge_types(params ...[]Type) []Type {
pub const (
builtin_type_names = ['void', 'voidptr', 'charptr', 'byteptr', 'i8', 'i16', 'int', 'i64', 'u16',
'u32', 'u64', 'int_literal', 'f32', 'f64', 'float_literal', 'string', 'ustring', 'char',
'byte', 'bool', 'none', 'array', 'array_fixed', 'map', 'chan', 'any', 'struct', 'mapnode',
'size_t', 'rune', 'thread', 'Error']
'u32', 'u64', 'int_literal', 'f32', 'f64', 'float_literal', 'string', 'char', 'byte', 'bool',
'none', 'array', 'array_fixed', 'map', 'chan', 'any', 'struct', 'mapnode', 'size_t', 'rune',
'thread', 'Error']
)
pub struct MultiReturn {
@ -472,7 +470,6 @@ pub enum Kind {
bool
none_
string
ustring
array
array_fixed
map
@ -604,7 +601,6 @@ pub fn (mut t Table) register_builtin_type_symbols() {
t.register_type_symbol(kind: .bool, name: 'bool', cname: 'bool', mod: 'builtin')
t.register_type_symbol(kind: .none_, name: 'none', cname: 'none', mod: 'builtin')
t.register_type_symbol(kind: .string, name: 'string', cname: 'string', mod: 'builtin')
t.register_type_symbol(kind: .ustring, name: 'ustring', cname: 'ustring', mod: 'builtin')
t.register_type_symbol(kind: .rune, name: 'rune', cname: 'rune', mod: 'builtin')
t.register_type_symbol(kind: .array, name: 'array', cname: 'array', mod: 'builtin')
t.register_type_symbol(kind: .map, name: 'map', cname: 'map', mod: 'builtin')
@ -656,7 +652,7 @@ pub fn (t &TypeSymbol) is_float() bool {
[inline]
pub fn (t &TypeSymbol) is_string() bool {
return t.kind in [.string, .ustring]
return t.kind == .string
}
[inline]
@ -711,7 +707,6 @@ pub fn (k Kind) str() string {
.any { 'any' }
.function { 'function' }
.interface_ { 'interface' }
.ustring { 'ustring' }
.generic_struct_inst { 'generic_struct_inst' }
.rune { 'rune' }
.aggregate { 'aggregate' }

View File

@ -6658,9 +6658,9 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty
// println('index expr left=$typ_sym.name $node.pos.line_nr')
// if typ_sym.kind == .array && (!(ast.type_idx(index_type) in ast.number_type_idxs) &&
// index_type_sym.kind != .enum_) {
if typ_sym.kind in [.array, .array_fixed, .string, .ustring] {
if typ_sym.kind in [.array, .array_fixed, .string] {
if !(index_type.is_int() || index_type_sym.kind == .enum_) {
type_str := if typ_sym.kind in [.string, .ustring] {
type_str := if typ_sym.kind == .string {
'non-integer string index `$index_type_sym.name`'
} else {
'non-integer index `$index_type_sym.name` (array type `$typ_sym.name`)'
@ -6679,7 +6679,7 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty
}
}
if index_type.has_flag(.optional) {
type_str := if typ_sym.kind in [.string, .ustring] {
type_str := if typ_sym.kind == .string {
'(type `$typ_sym.name`)'
} else {
'(array type `$typ_sym.name`)'

View File

@ -5704,7 +5704,7 @@ fn (mut g Gen) type_default(typ_ ast.Type) string {
for field in info.fields {
field_sym := g.table.get_type_symbol(field.typ)
if field.has_default_expr
|| field_sym.kind in [.array, .map, .string, .ustring, .bool, .alias, .size_t, .i8, .i16, .int, .i64, .byte, .u16, .u32, .u64, .char, .voidptr, .byteptr, .charptr, .struct_] {
|| field_sym.kind in [.array, .map, .string, .bool, .alias, .size_t, .i8, .i16, .int, .i64, .byte, .u16, .u32, .u64, .char, .voidptr, .byteptr, .charptr, .struct_] {
field_name := c_name(field.name)
if field.has_default_expr {
expr_str := g.expr_string(field.default_expr)

View File

@ -136,7 +136,7 @@ pub fn (mut g JsGen) typ(t ast.Type) string {
.none_ {
styp = 'undefined'
}
.string, .ustring, .char {
.string, .char {
styp = '${g.sym_to_js_typ(sym)}'
}
// 'array_array_int' => 'number[][]'

View File

@ -72,39 +72,31 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []&ast.F
'18.le',
'18.ge',
'fast_string_eq',
// ustring. ==, !=, etc...
'19.eq',
'19.ne',
'19.lt',
'19.gt',
'19.le',
'19.ge',
'19.add',
// other array methods
'21.get',
'21.set',
'21.get_unsafe',
'21.set_unsafe',
'21.get_with_check' /* used for `x := a[i] or {}` */,
'21.clone_static_to_depth',
'21.clone_to_depth',
'21.first',
'21.last',
'21.pointers' /* TODO: handle generic methods calling array primitives more precisely in pool_test.v */,
'21.reverse',
'21.repeat_to_depth',
'21.slice',
'21.slice2',
'20.get',
'20.set',
'20.get_unsafe',
'20.set_unsafe',
'20.get_with_check' /* used for `x := a[i] or {}` */,
'20.clone_static_to_depth',
'20.clone_to_depth',
'20.first',
'20.last',
'20.pointers' /* TODO: handle generic methods calling array primitives more precisely in pool_test.v */,
'20.reverse',
'20.repeat_to_depth',
'20.slice',
'20.slice2',
'59.get',
'59.set',
'65557.last',
'65557.pop',
'65557.push',
'65557.insert_many',
'65557.prepend_many',
'65557.reverse',
'65557.set',
'65557.set_unsafe',
'65556.last',
'65556.pop',
'65556.push',
'65556.insert_many',
'65556.prepend_many',
'65556.reverse',
'65556.set',
'65556.set_unsafe',
// TODO: process the _vinit const initializations automatically too
'json__decode_string',
'os.getwd',