toml: improve number conversion (#12509)

pull/12511/head
Larpon 2021-11-18 18:46:19 +01:00 committed by GitHub
parent 3caeadfa0d
commit 96554fad71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 31 deletions

View File

@ -4,6 +4,7 @@
module ast module ast
import toml.token import toml.token
import strconv
// Key is a sumtype representing all types of keys that // Key is a sumtype representing all types of keys that
// can be found in a TOML document. // can be found in a TOML document.
@ -168,6 +169,26 @@ pub fn (n Number) str() string {
return str return str
} }
// i64 returns the `n Number` as an `i64` value.
pub fn (n Number) i64() i64 {
if n.text.starts_with('0x') {
hex := n.text.all_after('0x').to_upper().replace('_', '')
return strconv.parse_int(hex, 16, 64) or { i64(0) }
} else if n.text.starts_with('0o') {
oct := n.text.all_after('0o').replace('_', '')
return strconv.parse_int(oct, 8, 64) or { i64(0) }
} else if n.text.starts_with('0b') {
bin := n.text.all_after('0b').replace('_', '')
return strconv.parse_int(bin, 2, 64) or { i64(0) }
}
return strconv.parse_int(n.text, 0, 0) or { i64(0) }
}
// f64 returns the `n Number` as an `f64` value.
pub fn (n Number) f64() f64 {
return n.text.replace('_', '').f64()
}
// Date is the data representation of a TOML date type (`YYYY-MM-DD`). // Date is the data representation of a TOML date type (`YYYY-MM-DD`).
// Date types can appear both as keys and values in TOML documents. // Date types can appear both as keys and values in TOML documents.
// Keys named like dates e.g. `1980-12-29` are considered Bare key types. // Keys named like dates e.g. `1980-12-29` are considered Bare key types.

View File

@ -10,6 +10,7 @@ import toml.token
import toml.scanner import toml.scanner
import encoding.utf8 import encoding.utf8
import time import time
import strconv
pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`] pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`]
@ -464,28 +465,11 @@ fn (c Checker) check_utf8_validity(q ast.Quoted) ? {
} }
} }
// hex2int returns the value of `hex` as `int`.
// NOTE that the code assumes `hex` to be in uppercase A-F.
// It does not work if the length of the input string is beyond the max value of `int`.
// Also and there is no error trapping for illegal hex characters.
fn hex2int(hex string) int {
// Adapted from https://stackoverflow.com/a/130552/1904615
mut val := 0
for i := 0; i < hex.len; i++ {
if hex[i] <= 57 {
val += (hex[i] - 48) * (1 << (4 * (hex.len - 1 - i)))
} else {
val += (hex[i] - 55) * (1 << (4 * (hex.len - 1 - i)))
}
}
return val
}
// validate_utf8_codepoint_string returns an error if `str` is not a valid Unicode code point. // validate_utf8_codepoint_string returns an error if `str` is not a valid Unicode code point.
// `str` is expected to be a `string` containing *only* hex values. // `str` is expected to be a `string` containing *only* hex values.
// Any preludes or prefixes like `0x` could pontentially yield wrong results. // Any preludes or prefixes like `0x` could pontentially yield wrong results.
fn validate_utf8_codepoint_string(str string) ? { fn validate_utf8_codepoint_string(str string) ? {
int_val := hex2int(str) int_val := strconv.parse_int(str, 16, 64) or { i64(-1) }
if int_val > checker.utf8_max || int_val < 0 { if int_val > checker.utf8_max || int_val < 0 {
return error('Unicode code point `$str` is outside the valid Unicode scalar value ranges.') return error('Unicode code point `$str` is outside the valid Unicode scalar value ranges.')
} }

View File

@ -22,11 +22,8 @@ const (
'string/escape-tricky.toml', 'string/escape-tricky.toml',
'string/multiline.toml', 'string/multiline.toml',
// Integer // Integer
'integer/literals.toml',
'integer/long.toml', 'integer/long.toml',
// Float // Float
'float/exponent.toml',
'float/underscore.toml',
'float/inf-and-nan.toml', 'float/inf-and-nan.toml',
// Comment // Comment
'comment/tricky.toml', 'comment/tricky.toml',
@ -241,12 +238,18 @@ fn to_burntsushi(value ast.Value) string {
return '{ "type": "null", "value": "$json_text" }' return '{ "type": "null", "value": "$json_text" }'
} }
ast.Number { ast.Number {
if value.text.contains('.') || value.text.to_lower().contains('e') { if value.text.contains('inf') || value.text.contains('nan') {
json_text := value.text.f64() return '{ "type": "float", "value": "$value.text" }'
return '{ "type": "float", "value": "$json_text" }'
} }
i64_ := strconv.parse_int(value.text, 0, 0) or { i64(0) } if !value.text.starts_with('0x')
return '{ "type": "integer", "value": "$i64_" }' && (value.text.contains('.') || value.text.to_lower().contains('e')) {
mut val := '$value.f64()'.replace('.e+', '.0e') // json notation
if !val.contains('.') && val != '0' { // json notation
val += '.0'
}
return '{ "type": "float", "value": "$val" }'
}
return '{ "type": "integer", "value": "$value.i64()" }'
} }
map[string]ast.Value { map[string]ast.Value {
mut str := '{ ' mut str := '{ '

View File

@ -7,7 +7,6 @@ import toml.ast
import toml.input import toml.input
import toml.scanner import toml.scanner
import toml.parser import toml.parser
import strconv
// Null is used in sumtype checks as a "default" value when nothing else is possible. // Null is used in sumtype checks as a "default" value when nothing else is possible.
pub struct Null { pub struct Null {
@ -199,11 +198,14 @@ pub fn (d Doc) ast_to_any(value ast.Value) Any {
return Any(value.text) return Any(value.text)
} }
ast.Number { ast.Number {
if value.text.contains('.') || value.text.to_lower().contains('e') { // if value.text.contains('inf') || value.text.contains('nan') {
return Any(value.text.f64()) // return Any() // TODO
//}
if !value.text.starts_with('0x')
&& (value.text.contains('.') || value.text.to_lower().contains('e')) {
return Any(value.f64())
} }
v := strconv.parse_int(value.text, 0, 0) or { i64(0) } return Any(value.i64())
return Any(v)
} }
ast.Bool { ast.Bool {
str := (value as ast.Bool).text str := (value as ast.Bool).text