2020-02-03 05:00:36 +01:00
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
2019-10-17 18:37:55 +02:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
// TODO: use optionals, or some way to return default with error.
module strconv
2019-12-19 22:29:37 +01:00
const (
// int_size is the size in bits of an int or uint value.
// int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1)
2019-10-17 18:37:55 +02:00
int_size = 32
2020-02-07 14:49:14 +01:00
max_u64 = u64 ( C . UINT64_MAX ) // as u64 // use this until we add support
2019-10-17 18:37:55 +02:00
)
2020-05-18 22:54:08 +02:00
pub fn byte_to_lower ( c byte ) byte {
2019-10-18 07:20:03 +02:00
return c | ( ` x ` - ` X ` )
2019-10-17 18:37:55 +02:00
}
2019-11-28 07:46:10 +01:00
// common_parse_uint is called by parse_uint and allows the parsing
// to stop on non or invalid digit characters and return the result so far
pub fn common_parse_uint ( s string , _base int , _bit_size int , error_on_non_digit bool , error_on_high_digit bool ) u64 {
2020-10-03 19:57:37 +02:00
result , error := common_parse_uint2 ( s , _base , _bit_size )
if error != 0 {
if error > 0 && ( error_on_non_digit || error_on_high_digit ) {
return u64 ( 0 )
}
}
return result
}
// the first returned value contains the parsed value,
// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
pub fn common_parse_uint2 ( s string , _base int , _bit_size int ) ( u64 , int ) {
2019-10-18 07:20:03 +02:00
mut bit_size := _bit_size
mut base := _base
if s . len < 1 || ! underscore_ok ( s ) {
2019-10-17 18:37:55 +02:00
// return error('parse_uint: syntax error $s')
2020-10-03 19:57:37 +02:00
return u64 ( 0 ) , 1
2019-10-17 18:37:55 +02:00
}
base0 := base == 0
2019-10-18 07:20:03 +02:00
mut start_index := 0
2019-10-17 18:37:55 +02:00
if 2 <= base && base <= 36 {
// valid base; nothing to do
2019-12-19 22:29:37 +01:00
}
else if base == 0 {
2019-10-17 18:37:55 +02:00
// Look for octal, hex prefix.
2019-10-18 07:20:03 +02:00
base = 10
2019-10-17 18:37:55 +02:00
if s [ 0 ] == ` 0 ` {
2019-11-11 15:18:32 +01:00
if s . len >= 3 && byte_to_lower ( s [ 1 ] ) == ` b ` {
base = 2
2019-10-18 07:20:03 +02:00
start_index += 2
}
2019-10-17 18:37:55 +02:00
else if s . len >= 3 && byte_to_lower ( s [ 1 ] ) == ` o ` {
2019-10-18 07:20:03 +02:00
base = 8
start_index += 2
2019-10-17 18:37:55 +02:00
}
else if s . len >= 3 && byte_to_lower ( s [ 1 ] ) == ` x ` {
2019-10-18 07:20:03 +02:00
base = 16
start_index += 2
2019-10-17 18:37:55 +02:00
}
2019-12-08 21:22:33 +01:00
// manage leading zeros in decimal base's numbers
2019-12-19 22:29:37 +01:00
else if s . len >= 2 && ( s [ 1 ] >= ` 0 ` && s [ 1 ] <= ` 9 ` ) {
base = 10
start_index ++
2019-12-08 21:22:33 +01:00
}
2019-10-17 18:37:55 +02:00
else {
2019-10-18 07:20:03 +02:00
base = 8
start_index ++
}
2019-10-17 18:37:55 +02:00
}
2019-12-19 22:29:37 +01:00
}
else {
2019-10-18 07:20:03 +02:00
// return error('parse_uint: base error $s - $base')
2020-10-03 19:57:37 +02:00
return u64 ( 0 ) , - 1
2019-10-17 18:37:55 +02:00
}
if bit_size == 0 {
2019-12-07 13:31:56 +01:00
bit_size = int_size
2019-12-19 22:29:37 +01:00
}
else if bit_size < 0 || bit_size > 64 {
2019-10-18 07:20:03 +02:00
// return error('parse_uint: bitsize error $s - $bit_size')
2020-10-03 19:57:37 +02:00
return u64 ( 0 ) , - 2
2019-10-17 18:37:55 +02:00
}
// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
2019-12-19 22:29:37 +01:00
cutoff := max_u64 / u64 ( base ) + u64 ( 1 )
max_val := if bit_size == 64 { max_u64 } else { ( u64 ( 1 ) << u64 ( bit_size ) ) - u64 ( 1 ) }
2019-10-17 18:37:55 +02:00
mut underscores := false
mut n := u64 ( 0 )
2019-12-19 22:29:37 +01:00
for i in start_index .. s . len {
2019-10-18 07:20:03 +02:00
c := s [ i ]
cl := byte_to_lower ( c )
2019-10-17 18:37:55 +02:00
mut d := byte ( 0 )
if c == ` _ ` && base0 {
// underscore_ok already called
underscores = true
continue
2019-10-18 07:20:03 +02:00
}
2019-12-19 22:29:37 +01:00
else if ` 0 ` <= c && c <= ` 9 ` {
d = c - ` 0 `
}
else if ` a ` <= cl && cl <= ` z ` {
d = cl - ` a ` + 10
}
2019-10-18 07:20:03 +02:00
else {
2020-10-03 19:57:37 +02:00
return n , i + 1
2019-10-17 18:37:55 +02:00
}
if d >= byte ( base ) {
2020-10-03 19:57:37 +02:00
return n , i + 1
2019-10-17 18:37:55 +02:00
}
if n >= cutoff {
// n*base overflows
2019-10-18 07:20:03 +02:00
// return error('parse_uint: range error $s')
2020-10-03 19:57:37 +02:00
return max_val , - 3
2019-10-17 18:37:55 +02:00
}
n *= u64 ( base )
n1 := n + u64 ( d )
2019-10-18 07:20:03 +02:00
if n1 < n || n1 > max_val {
// n+v overflows
// return error('parse_uint: range error $s')
2020-10-03 19:57:37 +02:00
return max_val , - 3
2019-10-17 18:37:55 +02:00
}
n = n1
}
2020-10-03 19:57:37 +02:00
return n , 0
2019-11-28 07:46:10 +01:00
}
2019-10-17 18:37:55 +02:00
2019-11-28 07:46:10 +01:00
// parse_uint is like parse_int but for unsigned numbers.
pub fn parse_uint ( s string , _base int , _bit_size int ) u64 {
return common_parse_uint ( s , _base , _bit_size , true , true )
2019-10-17 18:37:55 +02:00
}
2019-11-28 07:46:10 +01:00
// common_parse_int is called by parse int and allows the parsing
// to stop on non or invalid digit characters and return the result so far
pub fn common_parse_int ( _s string , base int , _bit_size int , error_on_non_digit bool , error_on_high_digit bool ) i64 {
2019-10-17 18:37:55 +02:00
mut s := _s
2019-10-18 07:20:03 +02:00
mut bit_size := _bit_size
if s . len < 1 {
2019-10-17 18:37:55 +02:00
// return error('parse_int: syntax error $s')
2019-10-18 07:20:03 +02:00
return i64 ( 0 )
2019-10-17 18:37:55 +02:00
}
// Pick off leading sign.
mut neg := false
if s [ 0 ] == ` + ` {
2019-10-27 08:03:15 +01:00
s = s [ 1 .. ]
2019-12-19 22:29:37 +01:00
}
else if s [ 0 ] == ` - ` {
2019-10-17 18:37:55 +02:00
neg = true
2019-10-27 08:03:15 +01:00
s = s [ 1 .. ]
2019-10-17 18:37:55 +02:00
}
// Convert unsigned and check range.
// un := parse_uint(s, base, bit_size) or {
2019-12-19 22:29:37 +01:00
// return i64(0)
2019-10-18 07:20:03 +02:00
// }
2019-11-28 07:46:10 +01:00
un := common_parse_uint ( s , base , bit_size , error_on_non_digit , error_on_high_digit )
2019-10-17 18:37:55 +02:00
if un == 0 {
return i64 ( 0 )
}
if bit_size == 0 {
2019-12-07 13:31:56 +01:00
bit_size = int_size
2019-10-17 18:37:55 +02:00
}
// TODO: check should u64(bit_size-1) be size of int (32)?
2019-12-19 22:29:37 +01:00
cutoff := u64 ( 1 ) << u64 ( bit_size - 1 )
2019-10-17 18:37:55 +02:00
if ! neg && un >= cutoff {
// return error('parse_int: range error $s0')
2019-12-19 22:29:37 +01:00
return i64 ( cutoff - u64 ( 1 ) )
2019-10-17 18:37:55 +02:00
}
if neg && un > cutoff {
// return error('parse_int: range error $s0')
return - i64 ( cutoff )
}
return if neg { - i64 ( un ) } else { i64 ( un ) }
}
2019-12-19 22:29:37 +01:00
2019-11-28 07:46:10 +01:00
// parse_int interprets a string s in the given base (0, 2 to 36) and
// bit size (0 to 64) and returns the corresponding value i.
//
// If the base argument is 0, the true base is implied by the string's
// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
// Also, for argument base 0 only, underscore characters are permitted
// as defined by the Go syntax for integer literals.
//
// The bitSize argument specifies the integer type
// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
// correspond to int, int8, int16, int32, and int64.
// If bitSize is below 0 or above 64, an error is returned.
pub fn parse_int ( _s string , base int , _bit_size int ) i64 {
return common_parse_int ( _s , base , _bit_size , true , true )
}
2019-10-17 18:37:55 +02:00
// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
2019-10-18 07:20:03 +02:00
pub fn atoi ( s string ) int {
2019-12-19 22:29:37 +01:00
if ( int_size == 32 && ( 0 < s . len && s . len < 10 ) ) || ( int_size == 64 && ( 0 < s . len && s . len < 19 ) ) {
2019-10-17 18:37:55 +02:00
// Fast path for small integers that fit int type.
2019-10-18 07:20:03 +02:00
mut start_idx := 0
2019-10-17 18:37:55 +02:00
if s [ 0 ] == ` - ` || s [ 0 ] == ` + ` {
2019-10-18 07:20:03 +02:00
start_idx ++
2019-12-19 22:29:37 +01:00
if s . len - start_idx < 1 {
2019-10-17 18:37:55 +02:00
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
2019-10-18 07:20:03 +02:00
return 0
2019-10-17 18:37:55 +02:00
}
}
mut n := 0
2019-12-19 22:29:37 +01:00
for i in start_idx .. s . len {
ch := s [ i ] - ` 0 `
2019-10-17 18:37:55 +02:00
if ch > 9 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
2019-10-18 07:20:03 +02:00
return 0
2019-10-17 18:37:55 +02:00
}
2019-12-19 22:29:37 +01:00
n = n * 10 + int ( ch )
2019-10-17 18:37:55 +02:00
}
2019-10-18 07:20:03 +02:00
return if s [ 0 ] == ` - ` { - n } else { n }
2019-10-17 18:37:55 +02:00
}
// Slow path for invalid, big, or underscored integers.
int64 := parse_int ( s , 10 , 0 )
return int ( int64 )
}
// underscore_ok reports whether the underscores in s are allowed.
// Checking them in this one function lets all the parsers skip over them simply.
// Underscore must appear only between digits or between a base prefix and a digit.
2019-10-18 07:20:03 +02:00
fn underscore_ok ( s string ) bool {
2019-10-17 18:37:55 +02:00
// saw tracks the last character (class) we saw:
// ^ for beginning of number,
// 0 for a digit or base prefix,
// _ for an underscore,
// ! for none of the above.
mut saw := ` ^ `
mut i := 0
// Optional sign.
if s . len >= 1 && ( s [ 0 ] == ` - ` || s [ 0 ] == ` + ` ) {
2019-10-18 07:20:03 +02:00
i ++
2019-10-17 18:37:55 +02:00
}
// Optional base prefix.
mut hex := false
2019-12-19 22:29:37 +01:00
if s . len - i >= 2 && s [ i ] == ` 0 ` && ( byte_to_lower ( s [ i + 1 ] ) == ` b ` || byte_to_lower ( s [ i + 1 ] ) == ` o ` || byte_to_lower ( s [ i + 1 ] ) == ` x ` ) {
2019-10-17 18:37:55 +02:00
saw = ` 0 ` // base prefix counts as a digit for "underscore as digit separator"
2019-12-19 22:29:37 +01:00
hex = byte_to_lower ( s [ i + 1 ] ) == ` x `
i += 2
2019-10-17 18:37:55 +02:00
}
// Number proper.
for ; i < s . len ; i ++ {
// Digits are always okay.
2019-12-19 22:29:37 +01:00
if ( ` 0 ` <= s [ i ] && s [ i ] <= ` 9 ` ) || ( hex && ` a ` <= byte_to_lower ( s [ i ] ) && byte_to_lower ( s [ i ] ) <= ` f ` ) {
2019-10-17 18:37:55 +02:00
saw = ` 0 `
continue
}
// Underscore must follow digit.
if s [ i ] == ` _ ` {
if saw != ` 0 ` {
return false
}
saw = ` _ `
continue
}
// Underscore must also be followed by digit.
if saw == ` _ ` {
return false
}
// Saw non-digit, non-underscore.
saw = ` ! `
}
return saw != ` _ `
}
2019-10-18 07:20:03 +02:00