strconv module + use it in builtin/string instead of C functions

2019-10-18 03:37:55 +11:00 · 2019-10-18 03:37:55 +11:00 · 8d16762f03
parent 270934441c
commit 8d16762f03
2 changed files with 270 additions and 13 deletions
--- a/vlib/builtin/string.v
+++ b/vlib/builtin/string.v
@ -41,6 +41,8 @@ NB: A V string should be/is immutable from the point of view of
    when used with modules using C functions (for example os and so on).
 */

+import strconv
+
 struct string {
 //mut:
 	//hash_cache int
@ -180,12 +182,12 @@ pub fn (s string) replace(rep, with string) string {
 }

 pub fn (s string) int() int {
-	return C.atoi(*char(s.str))
+	return strconv.parse_int(s, 0, 32)
 }


 pub fn (s string) i64() i64 {
-	return C.atoll(*char(s.str))
+	return strconv.parse_int(s, 0, 64)
 }

 pub fn (s string) f32() f32 {
@ -197,20 +199,11 @@ pub fn (s string) f64() f64 {
 }

 pub fn (s string) u32() u32 {
-	//$if tinyc {
-		//return u32(s.int()) // TODO
-	//} $else {
-		return C.strtoul(*char(s.str), 0, 0)
-	//}
+	return strconv.parse_uint(s, 0, 32)
 }

 pub fn (s string) u64() u64 {
-	//$if tinyc {
-		//return u64(s.i64()) // TODO
-	//} $else {
-		return C.strtoull(*char(s.str), 0, 0)
-	//}
-	//return C.atoll(s.str) // temporary fix for tcc on windows.
+	return strconv.parse_uint(s, 0, 64)
 }

 // ==
--- a/vlib/strconv/atoi.v
+++ b/vlib/strconv/atoi.v
@ -0,0 +1,264 @@
+// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+
+// TODO: use optionals, or some way to return default with error.
+
+module strconv
+
+
+const(
+    // int_size is the size in bits of an int or uint value.
+    // int_size = 32 << (~u32(0) >> 63)
+    // max_u64 = u64(u64(1 << 63) - 1)
+	int_size = 32
+	max_u64  = u64(C.UINT64_MAX) // use this until we add support
+)
+
+fn byte_to_lower(c byte) byte {
+    return c | (`x` - `X`)
+}
+
+// parse_uint is like parse_int but for unsigned numbers.
+pub fn parse_uint(_s string, _base int, _bit_size int) u64 {
+	mut s := _s.trim_space()
+    mut bit_size := _bit_size
+    mut base := _base
+
+	if s == "" || !underscore_ok(s) {
+		// return error('parse_uint: syntax error $s')
+        return u64(0)
+	}
+	base0 := base == 0
+	s0 := s
+	if 2 <= base && base <= 36 {
+		// valid base; nothing to do
+	} else if base == 0 {
+		// Look for octal, hex prefix.
+        base = 10
+		if s[0] == `0` {
+			if s.len >= 3 && byte_to_lower(s[1]) == `b` { 
+                base = 2 
+                s = s.right(2) 
+            }
+			else if s.len >= 3 && byte_to_lower(s[1]) == `o` {
+                base = 8
+                s = s.right(2)
+			}
+			else if s.len >= 3 && byte_to_lower(s[1]) == `x` {
+                base = 16
+                s = s.right(2)
+			}
+			else {
+                base = 8
+                s = s.right(1)
+            }
+		}
+	} else {
+		// return error('parse_uint: base error $s0 - $base')
+        return u64(0)
+	}
+
+	if bit_size == 0 {
+		bit_size = int(int_size)
+	} else if bit_size < 0 || bit_size > 64 {
+		// return error('parse_uint: bitsize error $s0 - $bit_size')
+		return u64(0)
+	}
+    
+	// Cutoff is the smallest number such that cutoff*base > maxUint64.
+	// Use compile-time constants for common cases.
+    cutoff := u64(max_u64/u64(base)) + u64(1)
+    max_val := if bit_size == 64 {
+		// TODO: investigate
+		// u64(1)<<64(bit_size) - u64(1)
+		max_u64
+    } else {
+        u64(u32(1)<<u32(bit_size - u32(1)))
+    }
+
+	mut underscores := false
+	mut n := u64(0)
+	for _, c in s {
+		mut d := byte(0)
+        cl := byte_to_lower(c)
+		if c == `_` && base0 {
+			// underscore_ok already called
+			underscores = true
+			continue
+        }
+        else if `0` <= c && c <= `9`   { d = c - `0` }
+        else if `a` <= cl && cl <= `z` { d = cl - `a` + 10 }
+        else {
+			// return error('parse_uint: syntax error $s0')
+            return u64(0)
+		}
+		if d >= byte(base) {
+			// return error('parse_uint: syntax error $s0')
+			return u64(0)
+		}
+		if n >= cutoff {
+			// n*base overflows
+			// return error('parse_uint: range error $s0')
+            return max_val
+		}
+		n *= u64(base)
+		n1 := n + u64(d)
+		if n1 < n || n1 > u64(max_val) {
+            // n+v overflows
+			// return error('parse_uint: range error $s0')
+            return max_val
+		}
+		n = n1
+	}
+	if underscores && !underscore_ok(s0) {
+			// return error('parse_uint: syntax error $s0')
+			return u64(0)
+	}
+
+    return n
+}
+
+// parse_int interprets a string s in the given base (0, 2 to 36) and
+// bit size (0 to 64) and returns the corresponding value i.
+//
+// If the base argument is 0, the true base is implied by the string's
+// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
+// Also, for argument base 0 only, underscore characters are permitted
+// as defined by the Go syntax for integer literals.
+//
+// The bitSize argument specifies the integer type
+// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
+// correspond to int, int8, int16, int32, and int64.
+// If bitSize is below 0 or above 64, an error is returned.
+pub fn parse_int(_s string, base int, _bit_size int) i64 {
+	mut s := _s
+    mut bit_size := _bit_size
+    
+	if s == '' {
+		// return error('parse_int: syntax error $s')
+        return i64(0)
+	}
+	// Pick off leading sign.
+	s0 := s
+	mut neg := false
+	if s[0] == `+` {
+		s = s.right(1)
+	} else if s[0] == `-` {
+		neg = true
+		s = s.right(1)
+	}
+
+	// Convert unsigned and check range.
+	// un := parse_uint(s, base, bit_size) or {
+    //     return i64(0)
+    // }
+	un := parse_uint(s, base, bit_size)
+	if un == 0 {
+		return i64(0)
+	}
+
+	if bit_size == 0 {
+		bit_size = int(int_size)
+	}
+
+	// TODO: check should u64(bit_size-1) be size of int (32)?
+	cutoff := u64(u64(1) << u64(bit_size-1))
+	if !neg && un >= cutoff {
+		// return error('parse_int: range error $s0')
+        return i64(cutoff - u64(1))
+	}
+	if neg && un > cutoff {
+		// return error('parse_int: range error $s0')
+		return -i64(cutoff)
+	}
+
+	return if neg { -i64(un) } else { i64(un) }
+}
+
+
+// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
+pub fn atoi(_s string) int {
+	mut s := _s
+
+	if (int_size == 32 && (0 < s.len && s.len < 10)) ||
+		(int_size == 64 && (0 < s.len && s.len < 19)) {
+		// Fast path for small integers that fit int type.
+		s0 := s
+		if s[0] == `-` || s[0] == `+` {
+			s = s.right(1)
+			if s.len < 1 {
+				// return 0, &NumError{fnAtoi, s0, ErrSyntax}
+                return 0
+			}
+		}
+
+		mut n := 0
+		for _, ch0 in s {
+            ch :=  ch0 - `0`
+			if ch > 9 {
+				// return 0, &NumError{fnAtoi, s0, ErrSyntax}
+                return 0
+			}
+			n = n*10 + int(ch)
+		}
+
+		return if s0[0] == `-` { -n } else { n }
+	}
+
+	// Slow path for invalid, big, or underscored integers.
+	int64 := parse_int(s, 10, 0)
+
+	return int(int64)
+}
+
+// underscore_ok reports whether the underscores in s are allowed.
+// Checking them in this one function lets all the parsers skip over them simply.
+// Underscore must appear only between digits or between a base prefix and a digit.
+fn underscore_ok(_s string) bool {
+	mut s := _s
+	// saw tracks the last character (class) we saw:
+	// ^ for beginning of number,
+	// 0 for a digit or base prefix,
+	// _ for an underscore,
+	// ! for none of the above.
+	mut saw := `^`
+	mut i := 0
+
+	// Optional sign.
+	if s.len >= 1 && (s[0] == `-` || s[0] == `+`) {
+		s = s.right(1)
+	}
+
+	// Optional base prefix.
+	mut hex := false
+	if s.len >= 2 && s[0] == `0` && (byte_to_lower(s[1]) == `b` || byte_to_lower(s[1]) == `o` || byte_to_lower(s[1]) == `x`) {
+		i = 2
+		saw = `0` // base prefix counts as a digit for "underscore as digit separator"
+		hex = byte_to_lower(s[1]) == `x`
+	}
+
+	// Number proper.
+	for ; i < s.len; i++ {
+		// Digits are always okay.
+		if (`0` <= s[i] && s[i] <= `9`) || (hex && `a` <= byte_to_lower(s[i]) && byte_to_lower(s[i]) <= `f`) {
+			saw = `0`
+			continue
+		}
+		// Underscore must follow digit.
+		if s[i] == `_` {
+			if saw != `0` {
+				return false
+			}
+			saw = `_`
+			continue
+		}
+		// Underscore must also be followed by digit.
+		if saw == `_` {
+			return false
+		}
+		// Saw non-digit, non-underscore.
+		saw = `!`
+	}
+	return saw != `_`
+}