280 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			V
		
	
	
			
		
		
	
	
			280 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			V
		
	
	
| // Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
 | |
| // Use of this source code is governed by an MIT license
 | |
| // that can be found in the LICENSE file.
 | |
| 
 | |
| // TODO: use optionals, or some way to return default with error.
 | |
| 
 | |
| module strconv
 | |
| 
 | |
| 
 | |
| const(
 | |
| 	// int_size is the size in bits of an int or uint value.
 | |
| 	// int_size = 32 << (~u32(0) >> 63)
 | |
| 	// max_u64 = u64(u64(1 << 63) - 1)
 | |
| 	int_size = 32
 | |
| 	max_u64  = u64(C.UINT64_MAX) // use this until we add support
 | |
| )
 | |
| 
 | |
| fn byte_to_lower(c byte) byte {
 | |
| 	return c | (`x` - `X`)
 | |
| }
 | |
| 
 | |
| // common_parse_uint is called by parse_uint and allows the parsing
 | |
| // to stop on non or invalid digit characters and return the result so far
 | |
| pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) u64 {
 | |
| 	mut bit_size := _bit_size
 | |
| 	mut base := _base
 | |
| 
 | |
| 	if s.len < 1 || !underscore_ok(s) {
 | |
| 		// return error('parse_uint: syntax error $s')
 | |
| 		return u64(0)
 | |
| 	}
 | |
| 
 | |
| 	base0 := base == 0
 | |
| 	mut start_index := 0
 | |
| 	if 2 <= base && base <= 36 {
 | |
| 		// valid base; nothing to do
 | |
| 	} else if base == 0 {
 | |
| 		// Look for octal, hex prefix.
 | |
| 		base = 10
 | |
| 		if s[0] == `0` {
 | |
| 			if s.len >= 3 && byte_to_lower(s[1]) == `b` {
 | |
| 				base = 2
 | |
| 				start_index += 2
 | |
| 			}
 | |
| 			else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
 | |
| 				base = 8
 | |
| 				start_index += 2
 | |
| 			}
 | |
| 			else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
 | |
| 				base = 16
 | |
| 				start_index += 2
 | |
| 			}
 | |
| 			else {
 | |
| 				base = 8
 | |
| 				start_index++
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		// return error('parse_uint: base error $s - $base')
 | |
| 		return u64(0)
 | |
| 	}
 | |
| 
 | |
| 	if bit_size == 0 {
 | |
| 		bit_size = int_size
 | |
| 	} else if bit_size < 0 || bit_size > 64 {
 | |
| 		// return error('parse_uint: bitsize error $s - $bit_size')
 | |
| 		return u64(0)
 | |
| 	}
 | |
| 
 | |
| 	// Cutoff is the smallest number such that cutoff*base > maxUint64.
 | |
| 	// Use compile-time constants for common cases.
 | |
| 	cutoff := max_u64/u64(base) + u64(1)
 | |
| 	max_val := if bit_size == 64 {
 | |
| 		max_u64
 | |
| 	} else {
 | |
| 		(u64(1)<<u64(bit_size))-u64(1)
 | |
| 	}
 | |
| 
 | |
| 	mut underscores := false
 | |
| 	mut n := u64(0)
 | |
| 	for i in start_index..s.len {
 | |
| 		c := s[i]
 | |
| 		cl := byte_to_lower(c)
 | |
| 		mut d := byte(0)
 | |
| 		if c == `_` && base0 {
 | |
| 			// underscore_ok already called
 | |
| 			underscores = true
 | |
| 			continue
 | |
| 		}
 | |
| 		else if `0` <= c && c <= `9`   { d = c - `0` }
 | |
| 		else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 }
 | |
| 		else {
 | |
| 			if error_on_non_digit {
 | |
| 				// return error('parse_uint: syntax error $s')
 | |
| 				return u64(0)
 | |
| 			} else {
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 		if d >= byte(base) {
 | |
| 			if error_on_high_digit {
 | |
| 				// return error('parse_uint: syntax error $s')
 | |
| 				return u64(0)
 | |
| 			}	else {
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 		if n >= cutoff {
 | |
| 			// n*base overflows
 | |
| 			// return error('parse_uint: range error $s')
 | |
| 			return max_val
 | |
| 		}
 | |
| 		n *= u64(base)
 | |
| 		n1 := n + u64(d)
 | |
| 		if n1 < n || n1 > max_val {
 | |
| 			// n+v overflows
 | |
| 			// return error('parse_uint: range error $s')
 | |
| 			return max_val
 | |
| 		}
 | |
| 		n = n1
 | |
| 	}
 | |
| 	if underscores && !underscore_ok(s) {
 | |
| 		// return error('parse_uint: syntax error $s')
 | |
| 		return u64(0)
 | |
| 	}
 | |
| 	return n
 | |
| }
 | |
| 
 | |
| // parse_uint is like parse_int but for unsigned numbers.
 | |
| pub fn parse_uint(s string, _base int, _bit_size int) u64 {
 | |
| 	return common_parse_uint(s, _base, _bit_size, true, true)
 | |
| }
 | |
| 
 | |
| // common_parse_int is called by parse int and allows the parsing
 | |
| // to stop on non or invalid digit characters and return the result so far
 | |
| pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) i64 {
 | |
| 	mut s := _s
 | |
| 	mut bit_size := _bit_size
 | |
| 
 | |
| 	if s.len < 1 {
 | |
| 		// return error('parse_int: syntax error $s')
 | |
| 		return i64(0)
 | |
| 	}
 | |
| 	// Pick off leading sign.
 | |
| 	mut neg := false
 | |
| 	if s[0] == `+` {
 | |
| 		s = s[1..]
 | |
| 	} else if s[0] == `-` {
 | |
| 		neg = true
 | |
| 		s = s[1..]
 | |
| 	}
 | |
| 
 | |
| 	// Convert unsigned and check range.
 | |
| 	// un := parse_uint(s, base, bit_size) or {
 | |
| 	//     return i64(0)
 | |
| 	// }
 | |
| 	un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)
 | |
| 	if un == 0 {
 | |
| 		return i64(0)
 | |
| 	}
 | |
| 
 | |
| 	if bit_size == 0 {
 | |
| 		bit_size = int_size
 | |
| 	}
 | |
| 
 | |
| 	// TODO: check should u64(bit_size-1) be size of int (32)?
 | |
| 	cutoff := u64(1) << u64(bit_size-1)
 | |
| 	if !neg && un >= cutoff {
 | |
| 		// return error('parse_int: range error $s0')
 | |
|         return i64(cutoff - u64(1))
 | |
| 	}
 | |
| 	if neg && un > cutoff {
 | |
| 		// return error('parse_int: range error $s0')
 | |
| 		return -i64(cutoff)
 | |
| 	}
 | |
| 
 | |
| 	return if neg { -i64(un) } else { i64(un) }
 | |
| }
 | |
| // parse_int interprets a string s in the given base (0, 2 to 36) and
 | |
| // bit size (0 to 64) and returns the corresponding value i.
 | |
| //
 | |
| // If the base argument is 0, the true base is implied by the string's
 | |
| // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
 | |
| // Also, for argument base 0 only, underscore characters are permitted
 | |
| // as defined by the Go syntax for integer literals.
 | |
| //
 | |
| // The bitSize argument specifies the integer type
 | |
| // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
 | |
| // correspond to int, int8, int16, int32, and int64.
 | |
| // If bitSize is below 0 or above 64, an error is returned.
 | |
| pub fn parse_int(_s string, base int, _bit_size int) i64 {
 | |
| 	return common_parse_int(_s, base, _bit_size, true, true)
 | |
| }
 | |
| 
 | |
| // atoi is equivalent to parse_int(s, 10, 0), converted to type int.
 | |
| pub fn atoi(s string) int {
 | |
| 	if (int_size == 32 && (0 < s.len && s.len < 10)) ||
 | |
| 		(int_size == 64 && (0 < s.len && s.len < 19)) {
 | |
| 		// Fast path for small integers that fit int type.
 | |
| 		mut start_idx := 0
 | |
| 		if s[0] == `-` || s[0] == `+` {
 | |
| 			start_idx++
 | |
| 			if s.len-start_idx < 1 {
 | |
| 				// return 0, &NumError{fnAtoi, s0, ErrSyntax}
 | |
| 				return 0
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		mut n := 0
 | |
| 		for i in start_idx..s.len {
 | |
| 			ch :=  s[i] - `0`
 | |
| 			if ch > 9 {
 | |
| 				// return 0, &NumError{fnAtoi, s0, ErrSyntax}
 | |
| 				return 0
 | |
| 			}
 | |
| 			n = n*10 + int(ch)
 | |
| 		}
 | |
| 
 | |
| 		return if s[0] == `-` { -n } else { n }
 | |
| 	}
 | |
| 
 | |
| 	// Slow path for invalid, big, or underscored integers.
 | |
| 	int64 := parse_int(s, 10, 0)
 | |
| 
 | |
| 	return int(int64)
 | |
| }
 | |
| 
 | |
| // underscore_ok reports whether the underscores in s are allowed.
 | |
| // Checking them in this one function lets all the parsers skip over them simply.
 | |
| // Underscore must appear only between digits or between a base prefix and a digit.
 | |
| fn underscore_ok(s string) bool {
 | |
| 	// saw tracks the last character (class) we saw:
 | |
| 	// ^ for beginning of number,
 | |
| 	// 0 for a digit or base prefix,
 | |
| 	// _ for an underscore,
 | |
| 	// ! for none of the above.
 | |
| 	mut saw := `^`
 | |
| 	mut i := 0
 | |
| 
 | |
| 	// Optional sign.
 | |
| 	if s.len >= 1 && (s[0] == `-` || s[0] == `+`) {
 | |
| 		i++
 | |
| 	}
 | |
| 
 | |
| 	// Optional base prefix.
 | |
| 	mut hex := false
 | |
| 	if s.len-i >= 2 && s[i] == `0` &&
 | |
| 		(byte_to_lower(s[i+1]) == `b` || byte_to_lower(s[i+1]) == `o` || byte_to_lower(s[i+1]) == `x`) {
 | |
| 		saw = `0` // base prefix counts as a digit for "underscore as digit separator"
 | |
| 		hex = byte_to_lower(s[i+1]) == `x`
 | |
| 		i+=2
 | |
| 	}
 | |
| 
 | |
| 	// Number proper.
 | |
| 	for ; i < s.len; i++ {
 | |
| 		// Digits are always okay.
 | |
| 		if (`0` <= s[i] && s[i] <= `9`) ||
 | |
| 			(hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) {
 | |
| 			saw = `0`
 | |
| 			continue
 | |
| 		}
 | |
| 		// Underscore must follow digit.
 | |
| 		if s[i] == `_` {
 | |
| 			if saw != `0` {
 | |
| 				return false
 | |
| 			}
 | |
| 			saw = `_`
 | |
| 			continue
 | |
| 		}
 | |
| 		// Underscore must also be followed by digit.
 | |
| 		if saw == `_` {
 | |
| 			return false
 | |
| 		}
 | |
| 		// Saw non-digit, non-underscore.
 | |
| 		saw = `!`
 | |
| 	}
 | |
| 	return saw != `_`
 | |
| }
 | |
| 
 |