From 96554fad71f6454b00f3ff63bb4f3c0212dd814c Mon Sep 17 00:00:00 2001
From: Larpon <Larpon@users.noreply.github.com>
Date: Thu, 18 Nov 2021 18:46:19 +0100
Subject: [PATCH] toml: improve number conversion (#12509)

---
 vlib/toml/ast/types.v                       | 21 +++++++++++++++++++++
 vlib/toml/checker/checker.v                 | 20 ++------------------
 vlib/toml/tests/burntsushi.toml-test_test.v | 19 +++++++++++--------
 vlib/toml/toml.v                            | 12 +++++++-----
 4 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/vlib/toml/ast/types.v b/vlib/toml/ast/types.v
index ef7f3aad82..f3412194f6 100644
--- a/vlib/toml/ast/types.v
+++ b/vlib/toml/ast/types.v
@@ -4,6 +4,7 @@
 module ast
 
 import toml.token
+import strconv
 
 // Key is a sumtype representing all types of keys that
 // can be found in a TOML document.
@@ -168,6 +169,26 @@ pub fn (n Number) str() string {
 	return str
 }
 
+// i64 returns the `n Number` as an `i64` value.
+pub fn (n Number) i64() i64 {
+	if n.text.starts_with('0x') {
+		hex := n.text.all_after('0x').to_upper().replace('_', '')
+		return strconv.parse_int(hex, 16, 64) or { i64(0) }
+	} else if n.text.starts_with('0o') {
+		oct := n.text.all_after('0o').replace('_', '')
+		return strconv.parse_int(oct, 8, 64) or { i64(0) }
+	} else if n.text.starts_with('0b') {
+		bin := n.text.all_after('0b').replace('_', '')
+		return strconv.parse_int(bin, 2, 64) or { i64(0) }
+	}
+	return strconv.parse_int(n.text, 0, 0) or { i64(0) }
+}
+
+// f64 returns the `n Number` as an `f64` value.
+pub fn (n Number) f64() f64 {
+	return n.text.replace('_', '').f64()
+}
+
 // Date is the data representation of a TOML date type (`YYYY-MM-DD`).
 // Date types can appear both as keys and values in TOML documents.
 // Keys named like dates e.g. `1980-12-29` are considered Bare key types.
diff --git a/vlib/toml/checker/checker.v b/vlib/toml/checker/checker.v
index 834e211c02..a437c8fee2 100644
--- a/vlib/toml/checker/checker.v
+++ b/vlib/toml/checker/checker.v
@@ -10,6 +10,7 @@ import toml.token
 import toml.scanner
 import encoding.utf8
 import time
+import strconv
 
 pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`]
 
@@ -464,28 +465,11 @@ fn (c Checker) check_utf8_validity(q ast.Quoted) ? {
 	}
 }
 
-// hex2int returns the value of `hex` as `int`.
-// NOTE that the code assumes `hex` to be in uppercase A-F.
-// It does not work if the length of the input string is beyond the max value of `int`.
-// Also and there is no error trapping for illegal hex characters.
-fn hex2int(hex string) int {
-	// Adapted from https://stackoverflow.com/a/130552/1904615
-	mut val := 0
-	for i := 0; i < hex.len; i++ {
-		if hex[i] <= 57 {
-			val += (hex[i] - 48) * (1 << (4 * (hex.len - 1 - i)))
-		} else {
-			val += (hex[i] - 55) * (1 << (4 * (hex.len - 1 - i)))
-		}
-	}
-	return val
-}
-
 // validate_utf8_codepoint_string returns an error if `str` is not a valid Unicode code point.
 // `str` is expected to be a `string` containing *only* hex values.
 // Any preludes or prefixes like `0x` could pontentially yield wrong results.
 fn validate_utf8_codepoint_string(str string) ? {
-	int_val := hex2int(str)
+	int_val := strconv.parse_int(str, 16, 64) or { i64(-1) }
 	if int_val > checker.utf8_max || int_val < 0 {
 		return error('Unicode code point `$str` is outside the valid Unicode scalar value ranges.')
 	}
diff --git a/vlib/toml/tests/burntsushi.toml-test_test.v b/vlib/toml/tests/burntsushi.toml-test_test.v
index 4399544b32..4a0dcf1033 100644
--- a/vlib/toml/tests/burntsushi.toml-test_test.v
+++ b/vlib/toml/tests/burntsushi.toml-test_test.v
@@ -22,11 +22,8 @@ const (
 		'string/escape-tricky.toml',
 		'string/multiline.toml',
 		// Integer
-		'integer/literals.toml',
 		'integer/long.toml',
 		// Float
-		'float/exponent.toml',
-		'float/underscore.toml',
 		'float/inf-and-nan.toml',
 		// Comment
 		'comment/tricky.toml',
@@ -241,12 +238,18 @@ fn to_burntsushi(value ast.Value) string {
 			return '{ "type": "null", "value": "$json_text" }'
 		}
 		ast.Number {
-			if value.text.contains('.') || value.text.to_lower().contains('e') {
-				json_text := value.text.f64()
-				return '{ "type": "float", "value": "$json_text" }'
+			if value.text.contains('inf') || value.text.contains('nan') {
+				return '{ "type": "float", "value": "$value.text" }'
 			}
-			i64_ := strconv.parse_int(value.text, 0, 0) or { i64(0) }
-			return '{ "type": "integer", "value": "$i64_" }'
+			if !value.text.starts_with('0x')
+				&& (value.text.contains('.') || value.text.to_lower().contains('e')) {
+				mut val := '$value.f64()'.replace('.e+', '.0e') // json notation
+				if !val.contains('.') && val != '0' { // json notation
+					val += '.0'
+				}
+				return '{ "type": "float", "value": "$val" }'
+			}
+			return '{ "type": "integer", "value": "$value.i64()" }'
 		}
 		map[string]ast.Value {
 			mut str := '{ '
diff --git a/vlib/toml/toml.v b/vlib/toml/toml.v
index 3f4cf809c2..d86aa09de8 100644
--- a/vlib/toml/toml.v
+++ b/vlib/toml/toml.v
@@ -7,7 +7,6 @@ import toml.ast
 import toml.input
 import toml.scanner
 import toml.parser
-import strconv
 
 // Null is used in sumtype checks as a "default" value when nothing else is possible.
 pub struct Null {
@@ -199,11 +198,14 @@ pub fn (d Doc) ast_to_any(value ast.Value) Any {
 			return Any(value.text)
 		}
 		ast.Number {
-			if value.text.contains('.') || value.text.to_lower().contains('e') {
-				return Any(value.text.f64())
+			// if value.text.contains('inf') || value.text.contains('nan') {
+			// return Any() // TODO
+			//}
+			if !value.text.starts_with('0x')
+				&& (value.text.contains('.') || value.text.to_lower().contains('e')) {
+				return Any(value.f64())
 			}
-			v := strconv.parse_int(value.text, 0, 0) or { i64(0) }
-			return Any(v)
+			return Any(value.i64())
 		}
 		ast.Bool {
 			str := (value as ast.Bool).text