builtin: add a `byterune` method on `[]byte` (#13145)

pull/13164/head
jeffmikels 2022-01-13 15:26:17 -05:00 committed by GitHub
parent 155f897270
commit 4189b7e280
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 53 additions and 23 deletions

View File

@ -546,6 +546,15 @@ pub fn (b []byte) bytestr() string {
} }
} }
// byterune attempts to decode a sequence of bytes
// from utf8 to utf32 and return the result as a rune
// it will produce an error if there are more than
// four bytes in the array.
pub fn (b []byte) byterune() ?rune {
r := b.utf8_to_utf32() ?
return rune(r)
}
// repeat returns a new string with `count` number of copies of the byte it was called on. // repeat returns a new string with `count` number of copies of the byte it was called on.
pub fn (b byte) repeat(count int) string { pub fn (b byte) repeat(count int) string {
if count < 0 { if count < 0 {

View File

@ -67,27 +67,48 @@ pub fn utf32_decode_to_buffer(code u32, buf &byte) int {
} }
// Convert utf8 to utf32 // Convert utf8 to utf32
// the original implementation did not check for
// valid utf8 in the string, and could result in
// values greater than the utf32 spec
// it has been replaced by `utf8_to_utf32` which
// has an optional return type.
//
// this function is left for backward compatibility
// it is used in vlib/builtin/string.v,
// and also in vlib/v/gen/c/cgen.v
pub fn (_rune string) utf32_code() int { pub fn (_rune string) utf32_code() int {
if _rune.len == 0 { return int(_rune.bytes().utf8_to_utf32() or {
// error('more than one utf-8 rune found in this string')
rune(0)
})
}
// convert array of utf8 bytes to single utf32 value
// will error if more than 4 bytes are submitted
pub fn (_bytes []byte) utf8_to_utf32() ?rune {
if _bytes.len == 0 {
return 0 return 0
} }
// save ASC symbol as is // return ASCII unchanged
if _rune.len == 1 { if _bytes.len == 1 {
return int(_rune[0]) return rune(_bytes[0])
} }
mut b := byte(int(_rune[0])) if _bytes.len > 4 {
// TODO should be return error('attempted to decode too many bytes, utf-8 is limited to four bytes maximum')
// res := int( rune[0] << rune.len) }
b = b << _rune.len
mut res := u32(b) mut b := byte(int(_bytes[0]))
mut shift := 6 - _rune.len
for i := 1; i < _rune.len; i++ { b = b << _bytes.len
c := u32(_rune[i]) mut res := rune(b)
res = u32(res) << shift mut shift := 6 - _bytes.len
for i := 1; i < _bytes.len; i++ {
c := rune(_bytes[i])
res = rune(res) << shift
res |= c & 63 // 0x3f res |= c & 63 // 0x3f
shift = 6 shift = 6
} }
return int(res) return res
} }
// Calculate length to read from the first byte // Calculate length to read from the first byte

View File

@ -1,5 +1,5 @@
vlib/v/checker/tests/fn_type_mismatch.vv:11:15: error: invalid array element: expected `fn (int, int) f32`, not `fn (f32, f32) f32` vlib/v/checker/tests/fn_type_mismatch.vv:11:15: error: invalid array element: expected `fn (int, int) f32`, not `fn (f32, f32) f32`
9 | 9 |
10 | fn main() { 10 | fn main() {
11 | fns := [add, div] 11 | fns := [add, div]
| ~~~ | ~~~

View File

@ -1,5 +1,5 @@
vlib/v/checker/tests/int_modulo_by_zero_err.vv:2:17: error: modulo by zero vlib/v/checker/tests/int_modulo_by_zero_err.vv:2:17: error: modulo by zero
1 | fn main() { 1 | fn main() {
2 | println(3 % 0) 2 | println(3 % 0)
| ^ | ^
3 | } 3 | }

View File

@ -6,9 +6,9 @@ vlib/v/checker/tests/modify_const_with_ref.vv:11:11: error: `constant` is immuta
12 | c.value = 200 12 | c.value = 200
13 | } 13 | }
vlib/v/checker/tests/modify_const_with_ref.vv:9:6: error: unused variable: `unused_var` vlib/v/checker/tests/modify_const_with_ref.vv:9:6: error: unused variable: `unused_var`
7 | 7 |
8 | fn main() { 8 | fn main() {
9 | mut unused_var := Foo{} 9 | mut unused_var := Foo{}
| ~~~~~~~~~~ | ~~~~~~~~~~
10 | unused_var = Foo{} 10 | unused_var = Foo{}
11 | mut c := &constant 11 | mut c := &constant

View File

@ -2,4 +2,4 @@ vlib/v/parser/tests/module_multiple_names_err.vv:1:13: error: `module main`, you
1 | module main os 1 | module main os
| ~~ | ~~
2 | fn main() { 2 | fn main() {
3 | println('hello, world') 3 | println('hello, world')