From 96554fad71f6454b00f3ff63bb4f3c0212dd814c Mon Sep 17 00:00:00 2001 From: Larpon Date: Thu, 18 Nov 2021 18:46:19 +0100 Subject: [PATCH] toml: improve number conversion (#12509) --- vlib/toml/ast/types.v | 21 +++++++++++++++++++++ vlib/toml/checker/checker.v | 20 ++------------------ vlib/toml/tests/burntsushi.toml-test_test.v | 19 +++++++++++-------- vlib/toml/toml.v | 12 +++++++----- 4 files changed, 41 insertions(+), 31 deletions(-) diff --git a/vlib/toml/ast/types.v b/vlib/toml/ast/types.v index ef7f3aad82..f3412194f6 100644 --- a/vlib/toml/ast/types.v +++ b/vlib/toml/ast/types.v @@ -4,6 +4,7 @@ module ast import toml.token +import strconv // Key is a sumtype representing all types of keys that // can be found in a TOML document. @@ -168,6 +169,26 @@ pub fn (n Number) str() string { return str } +// i64 returns the `n Number` as an `i64` value. +pub fn (n Number) i64() i64 { + if n.text.starts_with('0x') { + hex := n.text.all_after('0x').to_upper().replace('_', '') + return strconv.parse_int(hex, 16, 64) or { i64(0) } + } else if n.text.starts_with('0o') { + oct := n.text.all_after('0o').replace('_', '') + return strconv.parse_int(oct, 8, 64) or { i64(0) } + } else if n.text.starts_with('0b') { + bin := n.text.all_after('0b').replace('_', '') + return strconv.parse_int(bin, 2, 64) or { i64(0) } + } + return strconv.parse_int(n.text, 0, 0) or { i64(0) } +} + +// f64 returns the `n Number` as an `f64` value. +pub fn (n Number) f64() f64 { + return n.text.replace('_', '').f64() +} + // Date is the data representation of a TOML date type (`YYYY-MM-DD`). // Date types can appear both as keys and values in TOML documents. // Keys named like dates e.g. `1980-12-29` are considered Bare key types. diff --git a/vlib/toml/checker/checker.v b/vlib/toml/checker/checker.v index 834e211c02..a437c8fee2 100644 --- a/vlib/toml/checker/checker.v +++ b/vlib/toml/checker/checker.v @@ -10,6 +10,7 @@ import toml.token import toml.scanner import encoding.utf8 import time +import strconv pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`] @@ -464,28 +465,11 @@ fn (c Checker) check_utf8_validity(q ast.Quoted) ? { } } -// hex2int returns the value of `hex` as `int`. -// NOTE that the code assumes `hex` to be in uppercase A-F. -// It does not work if the length of the input string is beyond the max value of `int`. -// Also and there is no error trapping for illegal hex characters. -fn hex2int(hex string) int { - // Adapted from https://stackoverflow.com/a/130552/1904615 - mut val := 0 - for i := 0; i < hex.len; i++ { - if hex[i] <= 57 { - val += (hex[i] - 48) * (1 << (4 * (hex.len - 1 - i))) - } else { - val += (hex[i] - 55) * (1 << (4 * (hex.len - 1 - i))) - } - } - return val -} - // validate_utf8_codepoint_string returns an error if `str` is not a valid Unicode code point. // `str` is expected to be a `string` containing *only* hex values. // Any preludes or prefixes like `0x` could pontentially yield wrong results. fn validate_utf8_codepoint_string(str string) ? { - int_val := hex2int(str) + int_val := strconv.parse_int(str, 16, 64) or { i64(-1) } if int_val > checker.utf8_max || int_val < 0 { return error('Unicode code point `$str` is outside the valid Unicode scalar value ranges.') } diff --git a/vlib/toml/tests/burntsushi.toml-test_test.v b/vlib/toml/tests/burntsushi.toml-test_test.v index 4399544b32..4a0dcf1033 100644 --- a/vlib/toml/tests/burntsushi.toml-test_test.v +++ b/vlib/toml/tests/burntsushi.toml-test_test.v @@ -22,11 +22,8 @@ const ( 'string/escape-tricky.toml', 'string/multiline.toml', // Integer - 'integer/literals.toml', 'integer/long.toml', // Float - 'float/exponent.toml', - 'float/underscore.toml', 'float/inf-and-nan.toml', // Comment 'comment/tricky.toml', @@ -241,12 +238,18 @@ fn to_burntsushi(value ast.Value) string { return '{ "type": "null", "value": "$json_text" }' } ast.Number { - if value.text.contains('.') || value.text.to_lower().contains('e') { - json_text := value.text.f64() - return '{ "type": "float", "value": "$json_text" }' + if value.text.contains('inf') || value.text.contains('nan') { + return '{ "type": "float", "value": "$value.text" }' } - i64_ := strconv.parse_int(value.text, 0, 0) or { i64(0) } - return '{ "type": "integer", "value": "$i64_" }' + if !value.text.starts_with('0x') + && (value.text.contains('.') || value.text.to_lower().contains('e')) { + mut val := '$value.f64()'.replace('.e+', '.0e') // json notation + if !val.contains('.') && val != '0' { // json notation + val += '.0' + } + return '{ "type": "float", "value": "$val" }' + } + return '{ "type": "integer", "value": "$value.i64()" }' } map[string]ast.Value { mut str := '{ ' diff --git a/vlib/toml/toml.v b/vlib/toml/toml.v index 3f4cf809c2..d86aa09de8 100644 --- a/vlib/toml/toml.v +++ b/vlib/toml/toml.v @@ -7,7 +7,6 @@ import toml.ast import toml.input import toml.scanner import toml.parser -import strconv // Null is used in sumtype checks as a "default" value when nothing else is possible. pub struct Null { @@ -199,11 +198,14 @@ pub fn (d Doc) ast_to_any(value ast.Value) Any { return Any(value.text) } ast.Number { - if value.text.contains('.') || value.text.to_lower().contains('e') { - return Any(value.text.f64()) + // if value.text.contains('inf') || value.text.contains('nan') { + // return Any() // TODO + //} + if !value.text.starts_with('0x') + && (value.text.contains('.') || value.text.to_lower().contains('e')) { + return Any(value.f64()) } - v := strconv.parse_int(value.text, 0, 0) or { i64(0) } - return Any(v) + return Any(value.i64()) } ast.Bool { str := (value as ast.Bool).text