strconv: optimize functions
also make it the responsibility if the caller to properly trim whitespacepull/2404/head
parent
c355e967ad
commit
6dea2359ab
vlib/strconv
|
@ -8,112 +8,111 @@ module strconv
|
||||||
|
|
||||||
|
|
||||||
const(
|
const(
|
||||||
// int_size is the size in bits of an int or uint value.
|
// int_size is the size in bits of an int or uint value.
|
||||||
// int_size = 32 << (~u32(0) >> 63)
|
// int_size = 32 << (~u32(0) >> 63)
|
||||||
// max_u64 = u64(u64(1 << 63) - 1)
|
// max_u64 = u64(u64(1 << 63) - 1)
|
||||||
int_size = 32
|
int_size = 32
|
||||||
max_u64 = u64(C.UINT64_MAX) // use this until we add support
|
max_u64 = u64(C.UINT64_MAX) // use this until we add support
|
||||||
)
|
)
|
||||||
|
|
||||||
fn byte_to_lower(c byte) byte {
|
fn byte_to_lower(c byte) byte {
|
||||||
return c | (`x` - `X`)
|
return c | (`x` - `X`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse_uint is like parse_int but for unsigned numbers.
|
// parse_uint is like parse_int but for unsigned numbers.
|
||||||
pub fn parse_uint(_s string, _base int, _bit_size int) u64 {
|
pub fn parse_uint(s string, _base int, _bit_size int) u64 {
|
||||||
mut s := _s.trim_space()
|
mut bit_size := _bit_size
|
||||||
mut bit_size := _bit_size
|
mut base := _base
|
||||||
mut base := _base
|
|
||||||
|
|
||||||
if s == "" || !underscore_ok(s) {
|
if s.len < 1 || !underscore_ok(s) {
|
||||||
// return error('parse_uint: syntax error $s')
|
// return error('parse_uint: syntax error $s')
|
||||||
return u64(0)
|
return u64(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
base0 := base == 0
|
base0 := base == 0
|
||||||
s0 := s
|
mut start_index := 0
|
||||||
if 2 <= base && base <= 36 {
|
if 2 <= base && base <= 36 {
|
||||||
// valid base; nothing to do
|
// valid base; nothing to do
|
||||||
} else if base == 0 {
|
} else if base == 0 {
|
||||||
// Look for octal, hex prefix.
|
// Look for octal, hex prefix.
|
||||||
base = 10
|
base = 10
|
||||||
if s[0] == `0` {
|
if s[0] == `0` {
|
||||||
if s.len >= 3 && byte_to_lower(s[1]) == `b` {
|
if s.len >= 3 && byte_to_lower(s[1]) == `b` {
|
||||||
base = 2
|
base = 2
|
||||||
s = s.right(2)
|
start_index += 2
|
||||||
}
|
}
|
||||||
else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
|
else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
|
||||||
base = 8
|
base = 8
|
||||||
s = s.right(2)
|
start_index += 2
|
||||||
}
|
}
|
||||||
else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
|
else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
|
||||||
base = 16
|
base = 16
|
||||||
s = s.right(2)
|
start_index += 2
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
base = 8
|
base = 8
|
||||||
s = s.right(1)
|
start_index++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// return error('parse_uint: base error $s0 - $base')
|
// return error('parse_uint: base error $s - $base')
|
||||||
return u64(0)
|
return u64(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
if bit_size == 0 {
|
if bit_size == 0 {
|
||||||
bit_size = int(int_size)
|
bit_size = int(int_size)
|
||||||
} else if bit_size < 0 || bit_size > 64 {
|
} else if bit_size < 0 || bit_size > 64 {
|
||||||
// return error('parse_uint: bitsize error $s0 - $bit_size')
|
// return error('parse_uint: bitsize error $s - $bit_size')
|
||||||
return u64(0)
|
return u64(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cutoff is the smallest number such that cutoff*base > maxUint64.
|
// Cutoff is the smallest number such that cutoff*base > maxUint64.
|
||||||
// Use compile-time constants for common cases.
|
// Use compile-time constants for common cases.
|
||||||
cutoff := u64(max_u64/u64(base)) + u64(1)
|
cutoff := u64(max_u64/u64(base)) + u64(1)
|
||||||
max_val := if bit_size == 64 {
|
max_val := if bit_size == 64 {
|
||||||
// TODO: investigate
|
|
||||||
// u64(1)<<64(bit_size) - u64(1)
|
|
||||||
max_u64
|
max_u64
|
||||||
} else {
|
} else {
|
||||||
u64(u32(1)<<u32(bit_size - u32(1)))
|
u64(u64(1)<<u64(bit_size))-u64(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
mut underscores := false
|
mut underscores := false
|
||||||
mut n := u64(0)
|
mut n := u64(0)
|
||||||
for _, c in s {
|
for i in start_index..s.len {
|
||||||
|
c := s[i]
|
||||||
|
cl := byte_to_lower(c)
|
||||||
mut d := byte(0)
|
mut d := byte(0)
|
||||||
cl := byte_to_lower(c)
|
|
||||||
if c == `_` && base0 {
|
if c == `_` && base0 {
|
||||||
// underscore_ok already called
|
// underscore_ok already called
|
||||||
underscores = true
|
underscores = true
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
else if `0` <= c && c <= `9` { d = c - `0` }
|
else if `0` <= c && c <= `9` { d = c - `0` }
|
||||||
else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 }
|
else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 }
|
||||||
else {
|
else {
|
||||||
// return error('parse_uint: syntax error $s0')
|
// return error('parse_uint: syntax error $s')
|
||||||
return u64(0)
|
return u64(0)
|
||||||
}
|
}
|
||||||
if d >= byte(base) {
|
if d >= byte(base) {
|
||||||
// return error('parse_uint: syntax error $s0')
|
// return error('parse_uint: syntax error $s')
|
||||||
return u64(0)
|
return u64(0)
|
||||||
}
|
}
|
||||||
if n >= cutoff {
|
if n >= cutoff {
|
||||||
// n*base overflows
|
// n*base overflows
|
||||||
// return error('parse_uint: range error $s0')
|
// return error('parse_uint: range error $s')
|
||||||
return max_val
|
return max_val
|
||||||
}
|
}
|
||||||
n *= u64(base)
|
n *= u64(base)
|
||||||
n1 := n + u64(d)
|
n1 := n + u64(d)
|
||||||
if n1 < n || n1 > u64(max_val) {
|
if n1 < n || n1 > max_val {
|
||||||
// n+v overflows
|
// n+v overflows
|
||||||
// return error('parse_uint: range error $s0')
|
// return error('parse_uint: range error $s')
|
||||||
return max_val
|
return max_val
|
||||||
}
|
}
|
||||||
n = n1
|
n = n1
|
||||||
}
|
}
|
||||||
if underscores && !underscore_ok(s0) {
|
if underscores && !underscore_ok(s) {
|
||||||
// return error('parse_uint: syntax error $s0')
|
// return error('parse_uint: syntax error $s')
|
||||||
return u64(0)
|
return u64(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
return n
|
return n
|
||||||
|
@ -133,14 +132,13 @@ pub fn parse_uint(_s string, _base int, _bit_size int) u64 {
|
||||||
// If bitSize is below 0 or above 64, an error is returned.
|
// If bitSize is below 0 or above 64, an error is returned.
|
||||||
pub fn parse_int(_s string, base int, _bit_size int) i64 {
|
pub fn parse_int(_s string, base int, _bit_size int) i64 {
|
||||||
mut s := _s
|
mut s := _s
|
||||||
mut bit_size := _bit_size
|
mut bit_size := _bit_size
|
||||||
|
|
||||||
if s == '' {
|
if s.len < 1 {
|
||||||
// return error('parse_int: syntax error $s')
|
// return error('parse_int: syntax error $s')
|
||||||
return i64(0)
|
return i64(0)
|
||||||
}
|
}
|
||||||
// Pick off leading sign.
|
// Pick off leading sign.
|
||||||
s0 := s
|
|
||||||
mut neg := false
|
mut neg := false
|
||||||
if s[0] == `+` {
|
if s[0] == `+` {
|
||||||
s = s.right(1)
|
s = s.right(1)
|
||||||
|
@ -151,8 +149,8 @@ pub fn parse_int(_s string, base int, _bit_size int) i64 {
|
||||||
|
|
||||||
// Convert unsigned and check range.
|
// Convert unsigned and check range.
|
||||||
// un := parse_uint(s, base, bit_size) or {
|
// un := parse_uint(s, base, bit_size) or {
|
||||||
// return i64(0)
|
// return i64(0)
|
||||||
// }
|
// }
|
||||||
un := parse_uint(s, base, bit_size)
|
un := parse_uint(s, base, bit_size)
|
||||||
if un == 0 {
|
if un == 0 {
|
||||||
return i64(0)
|
return i64(0)
|
||||||
|
@ -178,32 +176,30 @@ pub fn parse_int(_s string, base int, _bit_size int) i64 {
|
||||||
|
|
||||||
|
|
||||||
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
|
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
|
||||||
pub fn atoi(_s string) int {
|
pub fn atoi(s string) int {
|
||||||
mut s := _s
|
|
||||||
|
|
||||||
if (int_size == 32 && (0 < s.len && s.len < 10)) ||
|
if (int_size == 32 && (0 < s.len && s.len < 10)) ||
|
||||||
(int_size == 64 && (0 < s.len && s.len < 19)) {
|
(int_size == 64 && (0 < s.len && s.len < 19)) {
|
||||||
// Fast path for small integers that fit int type.
|
// Fast path for small integers that fit int type.
|
||||||
s0 := s
|
mut start_idx := 0
|
||||||
if s[0] == `-` || s[0] == `+` {
|
if s[0] == `-` || s[0] == `+` {
|
||||||
s = s.right(1)
|
start_idx++
|
||||||
if s.len < 1 {
|
if s.len-start_idx < 1 {
|
||||||
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mut n := 0
|
mut n := 0
|
||||||
for _, ch0 in s {
|
for i in start_idx..s.len {
|
||||||
ch := ch0 - `0`
|
ch := s[i] - `0`
|
||||||
if ch > 9 {
|
if ch > 9 {
|
||||||
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
n = n*10 + int(ch)
|
n = n*10 + int(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
return if s0[0] == `-` { -n } else { n }
|
return if s[0] == `-` { -n } else { n }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow path for invalid, big, or underscored integers.
|
// Slow path for invalid, big, or underscored integers.
|
||||||
|
@ -215,8 +211,7 @@ pub fn atoi(_s string) int {
|
||||||
// underscore_ok reports whether the underscores in s are allowed.
|
// underscore_ok reports whether the underscores in s are allowed.
|
||||||
// Checking them in this one function lets all the parsers skip over them simply.
|
// Checking them in this one function lets all the parsers skip over them simply.
|
||||||
// Underscore must appear only between digits or between a base prefix and a digit.
|
// Underscore must appear only between digits or between a base prefix and a digit.
|
||||||
fn underscore_ok(_s string) bool {
|
fn underscore_ok(s string) bool {
|
||||||
mut s := _s
|
|
||||||
// saw tracks the last character (class) we saw:
|
// saw tracks the last character (class) we saw:
|
||||||
// ^ for beginning of number,
|
// ^ for beginning of number,
|
||||||
// 0 for a digit or base prefix,
|
// 0 for a digit or base prefix,
|
||||||
|
@ -227,21 +222,23 @@ fn underscore_ok(_s string) bool {
|
||||||
|
|
||||||
// Optional sign.
|
// Optional sign.
|
||||||
if s.len >= 1 && (s[0] == `-` || s[0] == `+`) {
|
if s.len >= 1 && (s[0] == `-` || s[0] == `+`) {
|
||||||
s = s.right(1)
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optional base prefix.
|
// Optional base prefix.
|
||||||
mut hex := false
|
mut hex := false
|
||||||
if s.len >= 2 && s[0] == `0` && (byte_to_lower(s[1]) == `b` || byte_to_lower(s[1]) == `o` || byte_to_lower(s[1]) == `x`) {
|
if s.len-i >= 2 && s[i] == `0` &&
|
||||||
i = 2
|
(byte_to_lower(s[i+1]) == `b` || byte_to_lower(s[i+1]) == `o` || byte_to_lower(s[i+1]) == `x`) {
|
||||||
saw = `0` // base prefix counts as a digit for "underscore as digit separator"
|
saw = `0` // base prefix counts as a digit for "underscore as digit separator"
|
||||||
hex = byte_to_lower(s[1]) == `x`
|
hex = byte_to_lower(s[i+1]) == `x`
|
||||||
|
i+=2
|
||||||
}
|
}
|
||||||
|
|
||||||
// Number proper.
|
// Number proper.
|
||||||
for ; i < s.len; i++ {
|
for ; i < s.len; i++ {
|
||||||
// Digits are always okay.
|
// Digits are always okay.
|
||||||
if (`0` <= s[i] && s[i] <= `9`) || (hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) {
|
if (`0` <= s[i] && s[i] <= `9`) ||
|
||||||
|
(hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) {
|
||||||
saw = `0`
|
saw = `0`
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -262,3 +259,4 @@ fn underscore_ok(_s string) bool {
|
||||||
}
|
}
|
||||||
return saw != `_`
|
return saw != `_`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue