strconv: optimize functions

also make it the responsibility if the caller to properly trim whitespace
pull/2404/head
joe-conigliaro 2019-10-18 16:20:03 +11:00 committed by Alexander Medvednikov
parent c355e967ad
commit 6dea2359ab
1 changed files with 72 additions and 74 deletions

View File

@ -8,112 +8,111 @@ module strconv
const( const(
// int_size is the size in bits of an int or uint value. // int_size is the size in bits of an int or uint value.
// int_size = 32 << (~u32(0) >> 63) // int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1) // max_u64 = u64(u64(1 << 63) - 1)
int_size = 32 int_size = 32
max_u64 = u64(C.UINT64_MAX) // use this until we add support max_u64 = u64(C.UINT64_MAX) // use this until we add support
) )
fn byte_to_lower(c byte) byte { fn byte_to_lower(c byte) byte {
return c | (`x` - `X`) return c | (`x` - `X`)
} }
// parse_uint is like parse_int but for unsigned numbers. // parse_uint is like parse_int but for unsigned numbers.
pub fn parse_uint(_s string, _base int, _bit_size int) u64 { pub fn parse_uint(s string, _base int, _bit_size int) u64 {
mut s := _s.trim_space() mut bit_size := _bit_size
mut bit_size := _bit_size mut base := _base
mut base := _base
if s == "" || !underscore_ok(s) { if s.len < 1 || !underscore_ok(s) {
// return error('parse_uint: syntax error $s') // return error('parse_uint: syntax error $s')
return u64(0) return u64(0)
} }
base0 := base == 0 base0 := base == 0
s0 := s mut start_index := 0
if 2 <= base && base <= 36 { if 2 <= base && base <= 36 {
// valid base; nothing to do // valid base; nothing to do
} else if base == 0 { } else if base == 0 {
// Look for octal, hex prefix. // Look for octal, hex prefix.
base = 10 base = 10
if s[0] == `0` { if s[0] == `0` {
if s.len >= 3 && byte_to_lower(s[1]) == `b` { if s.len >= 3 && byte_to_lower(s[1]) == `b` {
base = 2 base = 2
s = s.right(2) start_index += 2
} }
else if s.len >= 3 && byte_to_lower(s[1]) == `o` { else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
base = 8 base = 8
s = s.right(2) start_index += 2
} }
else if s.len >= 3 && byte_to_lower(s[1]) == `x` { else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
base = 16 base = 16
s = s.right(2) start_index += 2
} }
else { else {
base = 8 base = 8
s = s.right(1) start_index++
} }
} }
} else { } else {
// return error('parse_uint: base error $s0 - $base') // return error('parse_uint: base error $s - $base')
return u64(0) return u64(0)
} }
if bit_size == 0 { if bit_size == 0 {
bit_size = int(int_size) bit_size = int(int_size)
} else if bit_size < 0 || bit_size > 64 { } else if bit_size < 0 || bit_size > 64 {
// return error('parse_uint: bitsize error $s0 - $bit_size') // return error('parse_uint: bitsize error $s - $bit_size')
return u64(0) return u64(0)
} }
// Cutoff is the smallest number such that cutoff*base > maxUint64. // Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases. // Use compile-time constants for common cases.
cutoff := u64(max_u64/u64(base)) + u64(1) cutoff := u64(max_u64/u64(base)) + u64(1)
max_val := if bit_size == 64 { max_val := if bit_size == 64 {
// TODO: investigate
// u64(1)<<64(bit_size) - u64(1)
max_u64 max_u64
} else { } else {
u64(u32(1)<<u32(bit_size - u32(1))) u64(u64(1)<<u64(bit_size))-u64(1)
} }
mut underscores := false mut underscores := false
mut n := u64(0) mut n := u64(0)
for _, c in s { for i in start_index..s.len {
c := s[i]
cl := byte_to_lower(c)
mut d := byte(0) mut d := byte(0)
cl := byte_to_lower(c)
if c == `_` && base0 { if c == `_` && base0 {
// underscore_ok already called // underscore_ok already called
underscores = true underscores = true
continue continue
} }
else if `0` <= c && c <= `9` { d = c - `0` } else if `0` <= c && c <= `9` { d = c - `0` }
else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 } else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 }
else { else {
// return error('parse_uint: syntax error $s0') // return error('parse_uint: syntax error $s')
return u64(0) return u64(0)
} }
if d >= byte(base) { if d >= byte(base) {
// return error('parse_uint: syntax error $s0') // return error('parse_uint: syntax error $s')
return u64(0) return u64(0)
} }
if n >= cutoff { if n >= cutoff {
// n*base overflows // n*base overflows
// return error('parse_uint: range error $s0') // return error('parse_uint: range error $s')
return max_val return max_val
} }
n *= u64(base) n *= u64(base)
n1 := n + u64(d) n1 := n + u64(d)
if n1 < n || n1 > u64(max_val) { if n1 < n || n1 > max_val {
// n+v overflows // n+v overflows
// return error('parse_uint: range error $s0') // return error('parse_uint: range error $s')
return max_val return max_val
} }
n = n1 n = n1
} }
if underscores && !underscore_ok(s0) { if underscores && !underscore_ok(s) {
// return error('parse_uint: syntax error $s0') // return error('parse_uint: syntax error $s')
return u64(0) return u64(0)
} }
return n return n
@ -133,14 +132,13 @@ pub fn parse_uint(_s string, _base int, _bit_size int) u64 {
// If bitSize is below 0 or above 64, an error is returned. // If bitSize is below 0 or above 64, an error is returned.
pub fn parse_int(_s string, base int, _bit_size int) i64 { pub fn parse_int(_s string, base int, _bit_size int) i64 {
mut s := _s mut s := _s
mut bit_size := _bit_size mut bit_size := _bit_size
if s == '' { if s.len < 1 {
// return error('parse_int: syntax error $s') // return error('parse_int: syntax error $s')
return i64(0) return i64(0)
} }
// Pick off leading sign. // Pick off leading sign.
s0 := s
mut neg := false mut neg := false
if s[0] == `+` { if s[0] == `+` {
s = s.right(1) s = s.right(1)
@ -151,8 +149,8 @@ pub fn parse_int(_s string, base int, _bit_size int) i64 {
// Convert unsigned and check range. // Convert unsigned and check range.
// un := parse_uint(s, base, bit_size) or { // un := parse_uint(s, base, bit_size) or {
// return i64(0) // return i64(0)
// } // }
un := parse_uint(s, base, bit_size) un := parse_uint(s, base, bit_size)
if un == 0 { if un == 0 {
return i64(0) return i64(0)
@ -178,32 +176,30 @@ pub fn parse_int(_s string, base int, _bit_size int) i64 {
// atoi is equivalent to parse_int(s, 10, 0), converted to type int. // atoi is equivalent to parse_int(s, 10, 0), converted to type int.
pub fn atoi(_s string) int { pub fn atoi(s string) int {
mut s := _s
if (int_size == 32 && (0 < s.len && s.len < 10)) || if (int_size == 32 && (0 < s.len && s.len < 10)) ||
(int_size == 64 && (0 < s.len && s.len < 19)) { (int_size == 64 && (0 < s.len && s.len < 19)) {
// Fast path for small integers that fit int type. // Fast path for small integers that fit int type.
s0 := s mut start_idx := 0
if s[0] == `-` || s[0] == `+` { if s[0] == `-` || s[0] == `+` {
s = s.right(1) start_idx++
if s.len < 1 { if s.len-start_idx < 1 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax} // return 0, &NumError{fnAtoi, s0, ErrSyntax}
return 0 return 0
} }
} }
mut n := 0 mut n := 0
for _, ch0 in s { for i in start_idx..s.len {
ch := ch0 - `0` ch := s[i] - `0`
if ch > 9 { if ch > 9 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax} // return 0, &NumError{fnAtoi, s0, ErrSyntax}
return 0 return 0
} }
n = n*10 + int(ch) n = n*10 + int(ch)
} }
return if s0[0] == `-` { -n } else { n } return if s[0] == `-` { -n } else { n }
} }
// Slow path for invalid, big, or underscored integers. // Slow path for invalid, big, or underscored integers.
@ -215,8 +211,7 @@ pub fn atoi(_s string) int {
// underscore_ok reports whether the underscores in s are allowed. // underscore_ok reports whether the underscores in s are allowed.
// Checking them in this one function lets all the parsers skip over them simply. // Checking them in this one function lets all the parsers skip over them simply.
// Underscore must appear only between digits or between a base prefix and a digit. // Underscore must appear only between digits or between a base prefix and a digit.
fn underscore_ok(_s string) bool { fn underscore_ok(s string) bool {
mut s := _s
// saw tracks the last character (class) we saw: // saw tracks the last character (class) we saw:
// ^ for beginning of number, // ^ for beginning of number,
// 0 for a digit or base prefix, // 0 for a digit or base prefix,
@ -227,21 +222,23 @@ fn underscore_ok(_s string) bool {
// Optional sign. // Optional sign.
if s.len >= 1 && (s[0] == `-` || s[0] == `+`) { if s.len >= 1 && (s[0] == `-` || s[0] == `+`) {
s = s.right(1) i++
} }
// Optional base prefix. // Optional base prefix.
mut hex := false mut hex := false
if s.len >= 2 && s[0] == `0` && (byte_to_lower(s[1]) == `b` || byte_to_lower(s[1]) == `o` || byte_to_lower(s[1]) == `x`) { if s.len-i >= 2 && s[i] == `0` &&
i = 2 (byte_to_lower(s[i+1]) == `b` || byte_to_lower(s[i+1]) == `o` || byte_to_lower(s[i+1]) == `x`) {
saw = `0` // base prefix counts as a digit for "underscore as digit separator" saw = `0` // base prefix counts as a digit for "underscore as digit separator"
hex = byte_to_lower(s[1]) == `x` hex = byte_to_lower(s[i+1]) == `x`
i+=2
} }
// Number proper. // Number proper.
for ; i < s.len; i++ { for ; i < s.len; i++ {
// Digits are always okay. // Digits are always okay.
if (`0` <= s[i] && s[i] <= `9`) || (hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) { if (`0` <= s[i] && s[i] <= `9`) ||
(hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) {
saw = `0` saw = `0`
continue continue
} }
@ -262,3 +259,4 @@ fn underscore_ok(_s string) bool {
} }
return saw != `_` return saw != `_`
} }