strconv: fix `atoi` returning 0 on large strings (#10635)

pull/10646/head
Flinner 2021-07-02 10:39:57 +03:00 committed by GitHub
parent abbf71c794
commit 1486258591
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 62 additions and 52 deletions

View File

@ -465,22 +465,22 @@ pub fn (s string) bool() bool {
// int returns the value of the string as an integer `'1'.int() == 1`. // int returns the value of the string as an integer `'1'.int() == 1`.
pub fn (s string) int() int { pub fn (s string) int() int {
return int(strconv.common_parse_int(s, 0, 32, false, false)) return int(strconv.common_parse_int(s, 0, 32, false, false) or { 0 })
} }
// i64 returns the value of the string as i64 `'1'.i64() == i64(1)`. // i64 returns the value of the string as i64 `'1'.i64() == i64(1)`.
pub fn (s string) i64() i64 { pub fn (s string) i64() i64 {
return strconv.common_parse_int(s, 0, 64, false, false) return strconv.common_parse_int(s, 0, 64, false, false) or { 0 }
} }
// i8 returns the value of the string as i8 `'1'.i8() == i8(1)`. // i8 returns the value of the string as i8 `'1'.i8() == i8(1)`.
pub fn (s string) i8() i8 { pub fn (s string) i8() i8 {
return i8(strconv.common_parse_int(s, 0, 8, false, false)) return i8(strconv.common_parse_int(s, 0, 8, false, false) or { 0 })
} }
// i16 returns the value of the string as i16 `'1'.i16() == i16(1)`. // i16 returns the value of the string as i16 `'1'.i16() == i16(1)`.
pub fn (s string) i16() i16 { pub fn (s string) i16() i16 {
return i16(strconv.common_parse_int(s, 0, 16, false, false)) return i16(strconv.common_parse_int(s, 0, 16, false, false) or { 0 })
} }
// f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`. // f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`.
@ -497,17 +497,17 @@ pub fn (s string) f64() f64 {
// u16 returns the value of the string as u16 `'1'.u16() == u16(1)`. // u16 returns the value of the string as u16 `'1'.u16() == u16(1)`.
pub fn (s string) u16() u16 { pub fn (s string) u16() u16 {
return u16(strconv.common_parse_uint(s, 0, 16, false, false)) return u16(strconv.common_parse_uint(s, 0, 16, false, false) or { 0 })
} }
// u32 returns the value of the string as u32 `'1'.u32() == u32(1)`. // u32 returns the value of the string as u32 `'1'.u32() == u32(1)`.
pub fn (s string) u32() u32 { pub fn (s string) u32() u32 {
return u32(strconv.common_parse_uint(s, 0, 32, false, false)) return u32(strconv.common_parse_uint(s, 0, 32, false, false) or { 0 })
} }
// u64 returns the value of the string as u64 `'1'.u64() == u64(1)`. // u64 returns the value of the string as u64 `'1'.u64() == u64(1)`.
pub fn (s string) u64() u64 { pub fn (s string) u64() u64 {
return strconv.common_parse_uint(s, 0, 64, false, false) return strconv.common_parse_uint(s, 0, 64, false, false) or { 0 }
} }
[direct_array_access] [direct_array_access]

View File

@ -17,12 +17,16 @@ pub fn byte_to_lower(c byte) byte {
} }
// common_parse_uint is called by parse_uint and allows the parsing // common_parse_uint is called by parse_uint and allows the parsing
// to stop on non or invalid digit characters and return the result so far // to stop on non or invalid digit characters and return with an error
pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) u64 { pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?u64 {
result, error := common_parse_uint2(s, _base, _bit_size) result, err := common_parse_uint2(s, _base, _bit_size)
if error != 0 { // TODO: error_on_non_digit and error_on_high_digit have no difference
if error > 0 && (error_on_non_digit || error_on_high_digit) { if err != 0 && (error_on_non_digit || error_on_high_digit) {
return u64(0) match err {
-1 { return error('common_parse_uint: wrong base $_base for $s') }
-2 { return error('common_parse_uint: wrong bit size $_bit_size for $s') }
-3 { return error('common_parse_uint: integer overflow $s') }
else { return error('common_parse_uint: syntax error $s') }
} }
} }
return result return result
@ -114,13 +118,13 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
} }
// parse_uint is like parse_int but for unsigned numbers. // parse_uint is like parse_int but for unsigned numbers.
pub fn parse_uint(s string, _base int, _bit_size int) u64 { pub fn parse_uint(s string, _base int, _bit_size int) ?u64 {
return common_parse_uint(s, _base, _bit_size, true, true) return common_parse_uint(s, _base, _bit_size, true, true)
} }
// common_parse_int is called by parse int and allows the parsing // common_parse_int is called by parse int and allows the parsing
// to stop on non or invalid digit characters and return the result so far // to stop on non or invalid digit characters and return with an error
pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) i64 { pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?i64 {
mut s := _s mut s := _s
mut bit_size := _bit_size mut bit_size := _bit_size
if s.len < 1 { if s.len < 1 {
@ -139,7 +143,7 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b
// un := parse_uint(s, base, bit_size) or { // un := parse_uint(s, base, bit_size) or {
// return i64(0) // return i64(0)
// } // }
un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit) un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit) ?
if un == 0 { if un == 0 {
return i64(0) return i64(0)
} }
@ -171,7 +175,7 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64. // correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned. // If bitSize is below 0 or above 64, an error is returned.
pub fn parse_int(_s string, base int, _bit_size int) i64 { pub fn parse_int(_s string, base int, _bit_size int) ?i64 {
return common_parse_int(_s, base, _bit_size, true, true) return common_parse_int(_s, base, _bit_size, true, true)
} }
@ -203,7 +207,7 @@ pub fn atoi(s string) ?int {
return if s[0] == `-` { -n } else { n } return if s[0] == `-` { -n } else { n }
} }
// Slow path for invalid, big, or underscored integers. // Slow path for invalid, big, or underscored integers.
int64 := parse_int(s, 10, 0) int64 := parse_int(s, 10, 0) ?
return int(int64) return int(int64)
} }

View File

@ -1,27 +1,23 @@
import strconv import strconv
fn test_atoi() { fn test_atoi() ? {
if x := strconv.atoi('16') { assert strconv.atoi('16') ? == 16
assert x == 16 assert strconv.atoi('+16') ? == 16
} else { assert strconv.atoi('-16') ? == -16
assert false
} // invalid strings
if x := strconv.atoi('+16') {
assert x == 16
} else {
assert false
}
if x := strconv.atoi('-16') {
assert x == -16
} else {
assert false
}
if x := strconv.atoi('str') { if x := strconv.atoi('str') {
println(x) println(x)
assert false assert false
} else { } else {
assert true assert true
} }
if x := strconv.atoi('string_longer_than_10_chars') {
println(x)
assert false
} else {
assert true
}
if x := strconv.atoi('') { if x := strconv.atoi('') {
println(x) println(x)
assert false assert false
@ -30,23 +26,34 @@ fn test_atoi() {
} }
} }
fn test_parse_int() { fn test_parse_int() ? {
// Different bases // Different bases
assert strconv.parse_int('16', 16, 0) == 0x16 assert strconv.parse_int('16', 16, 0) ? == 0x16
assert strconv.parse_int('16', 8, 0) == 0o16 assert strconv.parse_int('16', 8, 0) ? == 0o16
assert strconv.parse_int('11', 2, 0) == 3 assert strconv.parse_int('11', 2, 0) ? == 3
// Different bit sizes // Different bit sizes
assert strconv.parse_int('127', 10, 8) == 127 assert strconv.parse_int('127', 10, 8) ? == 127
assert strconv.parse_int('128', 10, 8) == 127 assert strconv.parse_int('128', 10, 8) ? == 127
assert strconv.parse_int('32767', 10, 16) == 32767 assert strconv.parse_int('32767', 10, 16) ? == 32767
assert strconv.parse_int('32768', 10, 16) == 32767 assert strconv.parse_int('32768', 10, 16) ? == 32767
assert strconv.parse_int('2147483647', 10, 32) == 2147483647 assert strconv.parse_int('2147483647', 10, 32) ? == 2147483647
assert strconv.parse_int('2147483648', 10, 32) == 2147483647 assert strconv.parse_int('2147483648', 10, 32) ? == 2147483647
assert strconv.parse_int('9223372036854775807', 10, 64) == 9223372036854775807 assert strconv.parse_int('9223372036854775807', 10, 64) ? == 9223372036854775807
assert strconv.parse_int('9223372036854775808', 10, 64) == 9223372036854775807 assert strconv.parse_int('9223372036854775808', 10, 64) ? == 9223372036854775807
assert strconv.parse_int('baobab', 36, 64) ? == 683058467
// Invalid bit sizes // Invalid bit sizes
assert strconv.parse_int('123', 10, 65) == 0 if x := strconv.parse_int('123', 10, -1) {
assert strconv.parse_int('123', 10, -1) == 0 println(x)
assert false
} else {
assert true
}
if x := strconv.parse_int('123', 10, 65) {
println(x)
assert false
} else {
assert true
}
} }
fn test_common_parse_uint2() { fn test_common_parse_uint2() {

View File

@ -34,6 +34,5 @@ fn test_format_uint() {
assert strconv.format_int(255, 16) == 'ff' assert strconv.format_int(255, 16) == 'ff'
assert strconv.format_uint(18446744073709551615, 2) == '1111111111111111111111111111111111111111111111111111111111111111' assert strconv.format_uint(18446744073709551615, 2) == '1111111111111111111111111111111111111111111111111111111111111111'
assert strconv.format_uint(18446744073709551615, 16) == 'ffffffffffffffff' assert strconv.format_uint(18446744073709551615, 16) == 'ffffffffffffffff'
assert strconv.parse_int('baobab', 36, 64) == 683058467
assert strconv.format_uint(683058467, 36) == 'baobab' assert strconv.format_uint(683058467, 36) == 'baobab'
} }

View File

@ -1221,7 +1221,7 @@ fn decode_u_escapes(s string, start int, escapes_pos []int) string {
for i, pos in escapes_pos { for i, pos in escapes_pos {
idx := pos - start idx := pos - start
end_idx := idx + 6 // "\uXXXX".len == 6 end_idx := idx + 6 // "\uXXXX".len == 6
ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32))) ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32) or { 0 }))
if i + 1 < escapes_pos.len { if i + 1 < escapes_pos.len {
ss << s[end_idx..escapes_pos[i + 1] - start] ss << s[end_idx..escapes_pos[i + 1] - start]
} else { } else {

View File

@ -159,7 +159,7 @@ fn (mut s Scanner) text_scan() Token {
if codepoint.len != 4 { if codepoint.len != 4 {
return s.error('unicode escape must have 4 hex digits') return s.error('unicode escape must have 4 hex digits')
} }
val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32)) val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32) or { 0 })
converted := utf32_to_str(val) converted := utf32_to_str(val)
converted_bytes := converted.bytes() converted_bytes := converted.bytes()
chrs << converted_bytes chrs << converted_bytes