strconv: fix `atoi` returning 0 on large strings (#10635)

pull/10646/head
Flinner 2021-07-02 10:39:57 +03:00 committed by GitHub
parent abbf71c794
commit 1486258591
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 62 additions and 52 deletions

View File

@ -465,22 +465,22 @@ pub fn (s string) bool() bool {
// int returns the value of the string as an integer `'1'.int() == 1`.
pub fn (s string) int() int {
return int(strconv.common_parse_int(s, 0, 32, false, false))
return int(strconv.common_parse_int(s, 0, 32, false, false) or { 0 })
}
// i64 returns the value of the string as i64 `'1'.i64() == i64(1)`.
pub fn (s string) i64() i64 {
return strconv.common_parse_int(s, 0, 64, false, false)
return strconv.common_parse_int(s, 0, 64, false, false) or { 0 }
}
// i8 returns the value of the string as i8 `'1'.i8() == i8(1)`.
pub fn (s string) i8() i8 {
return i8(strconv.common_parse_int(s, 0, 8, false, false))
return i8(strconv.common_parse_int(s, 0, 8, false, false) or { 0 })
}
// i16 returns the value of the string as i16 `'1'.i16() == i16(1)`.
pub fn (s string) i16() i16 {
return i16(strconv.common_parse_int(s, 0, 16, false, false))
return i16(strconv.common_parse_int(s, 0, 16, false, false) or { 0 })
}
// f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`.
@ -497,17 +497,17 @@ pub fn (s string) f64() f64 {
// u16 returns the value of the string as u16 `'1'.u16() == u16(1)`.
pub fn (s string) u16() u16 {
return u16(strconv.common_parse_uint(s, 0, 16, false, false))
return u16(strconv.common_parse_uint(s, 0, 16, false, false) or { 0 })
}
// u32 returns the value of the string as u32 `'1'.u32() == u32(1)`.
pub fn (s string) u32() u32 {
return u32(strconv.common_parse_uint(s, 0, 32, false, false))
return u32(strconv.common_parse_uint(s, 0, 32, false, false) or { 0 })
}
// u64 returns the value of the string as u64 `'1'.u64() == u64(1)`.
pub fn (s string) u64() u64 {
return strconv.common_parse_uint(s, 0, 64, false, false)
return strconv.common_parse_uint(s, 0, 64, false, false) or { 0 }
}
[direct_array_access]

View File

@ -17,12 +17,16 @@ pub fn byte_to_lower(c byte) byte {
}
// common_parse_uint is called by parse_uint and allows the parsing
// to stop on non or invalid digit characters and return the result so far
pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) u64 {
result, error := common_parse_uint2(s, _base, _bit_size)
if error != 0 {
if error > 0 && (error_on_non_digit || error_on_high_digit) {
return u64(0)
// to stop on non or invalid digit characters and return with an error
pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?u64 {
result, err := common_parse_uint2(s, _base, _bit_size)
// TODO: error_on_non_digit and error_on_high_digit have no difference
if err != 0 && (error_on_non_digit || error_on_high_digit) {
match err {
-1 { return error('common_parse_uint: wrong base $_base for $s') }
-2 { return error('common_parse_uint: wrong bit size $_bit_size for $s') }
-3 { return error('common_parse_uint: integer overflow $s') }
else { return error('common_parse_uint: syntax error $s') }
}
}
return result
@ -114,13 +118,13 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
}
// parse_uint is like parse_int but for unsigned numbers.
pub fn parse_uint(s string, _base int, _bit_size int) u64 {
pub fn parse_uint(s string, _base int, _bit_size int) ?u64 {
return common_parse_uint(s, _base, _bit_size, true, true)
}
// common_parse_int is called by parse int and allows the parsing
// to stop on non or invalid digit characters and return the result so far
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) i64 {
// to stop on non or invalid digit characters and return with an error
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?i64 {
mut s := _s
mut bit_size := _bit_size
if s.len < 1 {
@ -139,7 +143,7 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b
// un := parse_uint(s, base, bit_size) or {
// return i64(0)
// }
un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)
un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit) ?
if un == 0 {
return i64(0)
}
@ -171,7 +175,7 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
pub fn parse_int(_s string, base int, _bit_size int) i64 {
pub fn parse_int(_s string, base int, _bit_size int) ?i64 {
return common_parse_int(_s, base, _bit_size, true, true)
}
@ -203,7 +207,7 @@ pub fn atoi(s string) ?int {
return if s[0] == `-` { -n } else { n }
}
// Slow path for invalid, big, or underscored integers.
int64 := parse_int(s, 10, 0)
int64 := parse_int(s, 10, 0) ?
return int(int64)
}

View File

@ -1,27 +1,23 @@
import strconv
fn test_atoi() {
if x := strconv.atoi('16') {
assert x == 16
} else {
assert false
}
if x := strconv.atoi('+16') {
assert x == 16
} else {
assert false
}
if x := strconv.atoi('-16') {
assert x == -16
} else {
assert false
}
fn test_atoi() ? {
assert strconv.atoi('16') ? == 16
assert strconv.atoi('+16') ? == 16
assert strconv.atoi('-16') ? == -16
// invalid strings
if x := strconv.atoi('str') {
println(x)
assert false
} else {
assert true
}
if x := strconv.atoi('string_longer_than_10_chars') {
println(x)
assert false
} else {
assert true
}
if x := strconv.atoi('') {
println(x)
assert false
@ -30,23 +26,34 @@ fn test_atoi() {
}
}
fn test_parse_int() {
fn test_parse_int() ? {
// Different bases
assert strconv.parse_int('16', 16, 0) == 0x16
assert strconv.parse_int('16', 8, 0) == 0o16
assert strconv.parse_int('11', 2, 0) == 3
assert strconv.parse_int('16', 16, 0) ? == 0x16
assert strconv.parse_int('16', 8, 0) ? == 0o16
assert strconv.parse_int('11', 2, 0) ? == 3
// Different bit sizes
assert strconv.parse_int('127', 10, 8) == 127
assert strconv.parse_int('128', 10, 8) == 127
assert strconv.parse_int('32767', 10, 16) == 32767
assert strconv.parse_int('32768', 10, 16) == 32767
assert strconv.parse_int('2147483647', 10, 32) == 2147483647
assert strconv.parse_int('2147483648', 10, 32) == 2147483647
assert strconv.parse_int('9223372036854775807', 10, 64) == 9223372036854775807
assert strconv.parse_int('9223372036854775808', 10, 64) == 9223372036854775807
assert strconv.parse_int('127', 10, 8) ? == 127
assert strconv.parse_int('128', 10, 8) ? == 127
assert strconv.parse_int('32767', 10, 16) ? == 32767
assert strconv.parse_int('32768', 10, 16) ? == 32767
assert strconv.parse_int('2147483647', 10, 32) ? == 2147483647
assert strconv.parse_int('2147483648', 10, 32) ? == 2147483647
assert strconv.parse_int('9223372036854775807', 10, 64) ? == 9223372036854775807
assert strconv.parse_int('9223372036854775808', 10, 64) ? == 9223372036854775807
assert strconv.parse_int('baobab', 36, 64) ? == 683058467
// Invalid bit sizes
assert strconv.parse_int('123', 10, 65) == 0
assert strconv.parse_int('123', 10, -1) == 0
if x := strconv.parse_int('123', 10, -1) {
println(x)
assert false
} else {
assert true
}
if x := strconv.parse_int('123', 10, 65) {
println(x)
assert false
} else {
assert true
}
}
fn test_common_parse_uint2() {

View File

@ -34,6 +34,5 @@ fn test_format_uint() {
assert strconv.format_int(255, 16) == 'ff'
assert strconv.format_uint(18446744073709551615, 2) == '1111111111111111111111111111111111111111111111111111111111111111'
assert strconv.format_uint(18446744073709551615, 16) == 'ffffffffffffffff'
assert strconv.parse_int('baobab', 36, 64) == 683058467
assert strconv.format_uint(683058467, 36) == 'baobab'
}

View File

@ -1221,7 +1221,7 @@ fn decode_u_escapes(s string, start int, escapes_pos []int) string {
for i, pos in escapes_pos {
idx := pos - start
end_idx := idx + 6 // "\uXXXX".len == 6
ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32)))
ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32) or { 0 }))
if i + 1 < escapes_pos.len {
ss << s[end_idx..escapes_pos[i + 1] - start]
} else {

View File

@ -159,7 +159,7 @@ fn (mut s Scanner) text_scan() Token {
if codepoint.len != 4 {
return s.error('unicode escape must have 4 hex digits')
}
val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32))
val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32) or { 0 })
converted := utf32_to_str(val)
converted_bytes := converted.bytes()
chrs << converted_bytes