From 278c08704c5a89b2839307406005eec397f26d31 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Wed, 22 Dec 2021 14:34:02 +0100 Subject: [PATCH] all: support slices with negative indexes `#[start..end]` (gated arrays) (#12914) --- vlib/builtin/array.v | 51 +++++++++++++++++ vlib/builtin/gated_array_string_test.v | 78 ++++++++++++++++++++++++++ vlib/builtin/string.v | 54 ++++++++++++++++++ vlib/v/ast/ast.v | 2 + vlib/v/checker/checker.v | 14 +++-- vlib/v/fmt/fmt.v | 5 ++ vlib/v/fmt/tests/gated_array_keep.vv | 5 ++ vlib/v/gen/c/index.v | 19 ++++++- vlib/v/parser/expr.v | 11 +++- vlib/v/parser/parse_type.v | 10 ++-- vlib/v/parser/parser.v | 8 ++- vlib/v/scanner/scanner.v | 6 ++ vlib/v/token/token.v | 3 + 13 files changed, 250 insertions(+), 16 deletions(-) create mode 100644 vlib/builtin/gated_array_string_test.v create mode 100644 vlib/v/fmt/tests/gated_array_keep.vv diff --git a/vlib/builtin/array.v b/vlib/builtin/array.v index ff1b12a213..96ce76239e 100644 --- a/vlib/builtin/array.v +++ b/vlib/builtin/array.v @@ -384,6 +384,57 @@ fn (a array) slice(start int, _end int) array { return res } +// slice_ni returns an array using the same buffer as original array +// but starting from the `start` element and ending with the element before +// the `end` element of the original array. +// This function can use negative indexes `a.slice_ni(-3, a.len)` +// that get the last 3 elements of the array otherwise it return an empty array. +// This function always return a valid array. +fn (a array) slice_ni(_start int, _end int) array { + mut end := _end + mut start := _start + + if start < 0 { + start = a.len + start + if start < 0 { + start = 0 + } + } + + if end < 0 { + end = a.len + end + if end < 0 { + end = 0 + } + } + if end >= a.len { + end = a.len + } + + if start >= a.len || start > end { + res := array{ + element_size: a.element_size + data: a.data + offset: 0 + len: 0 + cap: 0 + } + return res + } + + offset := start * a.element_size + data := unsafe { &byte(a.data) + offset } + l := end - start + res := array{ + element_size: a.element_size + data: data + offset: a.offset + offset + len: l + cap: l + } + return res +} + // used internally for [2..4] fn (a array) slice2(start int, _end int, end_max bool) array { end := if end_max { a.len } else { _end } diff --git a/vlib/builtin/gated_array_string_test.v b/vlib/builtin/gated_array_string_test.v new file mode 100644 index 0000000000..394d5dc990 --- /dev/null +++ b/vlib/builtin/gated_array_string_test.v @@ -0,0 +1,78 @@ +fn test_gated_arrays() { + a := [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + assert a#[-1..] == [9] + assert a#[..-9] == [0] + assert a#[-9..-7] == [1, 2] + assert a#[-2..] == [8, 9] + + // fixed array + a1 := [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]! + assert a1#[-1..] == [9] + assert a1#[..-9] == [0] + assert a1#[-9..-7] == [1, 2] + assert a1#[-2..] == [8, 9] + + // empty array + assert a#[-3..-4] == [] // start > end + assert a#[20..] == [] // start > array.len + assert a#[-20..-10] == [] // start+len < 0 + assert a#[20..-9] == [] // start > end && start > end +} + +fn test_gated_strings() { + a := '0123456789' + assert a#[-1..] == '9' + assert a#[..-9] == '0' + assert a#[-9..-7] == '12' + assert a#[-2..] == '89' + + // empty string + assert a#[-3..-4] == '' // start > end + assert a#[20..] == '' // start > array.len + assert a#[-20..-10] == '' // start+len < 0 + assert a#[20..-9] == '' // start > end && start > end + + // + // test negative indexes in slices from github discussion + // + s := '0123456789' + + // normal behaviour + assert s#[1..3] == '12' + assert s#[..3] == '012' + assert s#[8..] == '89' + + // negative indexes behaviour + assert s#[-2..] == '89' + assert s#[..-8] == '01' + assert s#[2..-2] == '234567' + assert s#[-12..-16] == '' + assert s#[-8..-2] == '234567' + + // out of bound both indexes + assert s#[12..14] == '' + assert s#[-12..16] == '0123456789' +} + +fn test_gated_mixed_strings() { + // + // test negative indexes in slices + // + a := [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + + // normal behaviour + assert a#[1..3].str() == '[1, 2]' + assert a#[..3].str() == '[0, 1, 2]' + assert a#[8..].str() == '[8, 9]' + + // negative indexes behaviour + assert a#[-2..].str() == '[8, 9]' + assert a#[..-8].str() == '[0, 1]' + assert a#[2..-2].str() == '[2, 3, 4, 5, 6, 7]' + assert a#[-12..-16].str() == '[]' + assert a#[-8..-2].str() == '[2, 3, 4, 5, 6, 7]' + + // out of bound both indexes + assert a#[12..14].str() == '[]' + assert a#[-12..16].str() == a.str() +} diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index d3d4c60a9b..cf5c654b48 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -788,6 +788,60 @@ pub fn (s string) substr(start int, end int) string { return res } +// substr_ni returns the string between index positions `start` and `end` allowing negative indexes +// This function always return a valid string. +[direct_array_access] +pub fn (s string) substr_ni(_start int, _end int) string { + mut start := _start + mut end := _end + + // borders math + if start < 0 { + start = s.len + start + if start < 0 { + start = 0 + } + } + + if end < 0 { + end = s.len + end + if end < 0 { + end = 0 + } + } + if end >= s.len { + end = s.len + } + + if start > s.len || end < start { + mut res := string{ + str: unsafe { malloc_noscan(1) } + len: 0 + } + unsafe { + res.str[0] = 0 + } + return res + } + + len := end - start + + // string copy + mut res := string{ + str: unsafe { malloc_noscan(len + 1) } + len: len + } + for i in 0 .. len { + unsafe { + res.str[i] = s.str[start + i] + } + } + unsafe { + res.str[len] = 0 + } + return res +} + // index returns the position of the first character of the input string. // It will return `-1` if the input string can't be found. [direct_array_access] diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v index eeeadfc4b9..52ffb87f9b 100644 --- a/vlib/v/ast/ast.v +++ b/vlib/v/ast/ast.v @@ -822,6 +822,7 @@ pub mut: is_farray bool is_option bool // IfGuard is_direct bool // Set if the underlying memory can be safely accessed + is_gated bool // #[] gated array } pub struct IfExpr { @@ -1207,6 +1208,7 @@ pub: has_high bool has_low bool pos token.Position + is_gated bool // #[] gated array } pub struct CastExpr { diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v index 1ec850bc85..e80a05dce5 100644 --- a/vlib/v/checker/checker.v +++ b/vlib/v/checker/checker.v @@ -4331,7 +4331,7 @@ pub fn (mut c Checker) prefix_expr(mut node ast.PrefixExpr) ast.Type { return right_type } -fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_type ast.Type, pos token.Position, range_index bool) { +fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_type ast.Type, pos token.Position, range_index bool, is_gated bool) { index_type_sym := c.table.sym(index_type) // println('index expr left=$typ_sym.name $node.pos.line_nr') // if typ_sym.kind == .array && (!(ast.type_idx(index_type) in ast.number_type_idxs) && @@ -4345,7 +4345,7 @@ fn (mut c Checker) check_index(typ_sym &ast.TypeSymbol, index ast.Expr, index_ty } c.error('$type_str', pos) } - if index is ast.IntegerLiteral { + if index is ast.IntegerLiteral && !is_gated { if index.val[0] == `-` { c.error('negative index `$index.val`', index.pos) } else if typ_sym.kind == .array_fixed { @@ -4428,11 +4428,11 @@ pub fn (mut c Checker) index_expr(mut node ast.IndexExpr) ast.Type { if mut node.index is ast.RangeExpr { // [1..2] if node.index.has_low { index_type := c.expr(node.index.low) - c.check_index(typ_sym, node.index.low, index_type, node.pos, true) + c.check_index(typ_sym, node.index.low, index_type, node.pos, true, node.is_gated) } if node.index.has_high { index_type := c.expr(node.index.high) - c.check_index(typ_sym, node.index.high, index_type, node.pos, true) + c.check_index(typ_sym, node.index.high, index_type, node.pos, true, node.is_gated) } // array[1..2] => array // fixed_array[1..2] => array @@ -4460,7 +4460,11 @@ pub fn (mut c Checker) index_expr(mut node ast.IndexExpr) ast.Type { } } else { index_type := c.expr(node.index) - c.check_index(typ_sym, node.index, index_type, node.pos, false) + // for [1] case #[1] is not allowed! + if node.is_gated == true { + c.error('`#[]` allowed only for ranges', node.pos) + } + c.check_index(typ_sym, node.index, index_type, node.pos, false, false) } value_type := c.table.value_type(typ) if value_type != ast.void_type { diff --git a/vlib/v/fmt/fmt.v b/vlib/v/fmt/fmt.v index 2c346ab3cb..d613e35c9d 100644 --- a/vlib/v/fmt/fmt.v +++ b/vlib/v/fmt/fmt.v @@ -1898,6 +1898,11 @@ pub fn (mut f Fmt) if_guard_expr(node ast.IfGuardExpr) { pub fn (mut f Fmt) index_expr(node ast.IndexExpr) { f.expr(node.left) + if node.index is ast.RangeExpr { + if node.index.is_gated { + f.write('#') + } + } f.write('[') f.expr(node.index) f.write(']') diff --git a/vlib/v/fmt/tests/gated_array_keep.vv b/vlib/v/fmt/tests/gated_array_keep.vv new file mode 100644 index 0000000000..b834048577 --- /dev/null +++ b/vlib/v/fmt/tests/gated_array_keep.vv @@ -0,0 +1,5 @@ +a := [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +assert a#[-1..] == [9] +assert a#[..-9] == [0] +assert a#[-9..-7] == [1, 2] +assert a#[-2..] == [8, 9] diff --git a/vlib/v/gen/c/index.v b/vlib/v/gen/c/index.v index 2658a3c5c0..baab24346d 100644 --- a/vlib/v/gen/c/index.v +++ b/vlib/v/gen/c/index.v @@ -60,10 +60,18 @@ fn (mut g Gen) index_expr(node ast.IndexExpr) { fn (mut g Gen) range_expr(node ast.IndexExpr, range ast.RangeExpr) { sym := g.table.final_sym(node.left_type) if sym.kind == .string { - g.write('string_substr(') + if node.is_gated { + g.write('string_substr_ni(') + } else { + g.write('string_substr(') + } g.expr(node.left) } else if sym.kind == .array { - g.write('array_slice(') + if node.is_gated { + g.write('array_slice_ni(') + } else { + g.write('array_slice(') + } if node.left_type.is_ptr() { g.write('*') } @@ -72,7 +80,12 @@ fn (mut g Gen) range_expr(node ast.IndexExpr, range ast.RangeExpr) { // Convert a fixed array to V array when doing `fixed_arr[start..end]` info := sym.info as ast.ArrayFixed noscan := g.check_noscan(info.elem_type) - g.write('array_slice(new_array_from_c_array${noscan}(') + if node.is_gated { + g.write('array_slice_ni(') + } else { + g.write('array_slice(') + } + g.write('new_array_from_c_array${noscan}(') g.write('$info.size') g.write(', $info.size') g.write(', sizeof(') diff --git a/vlib/v/parser/expr.v b/vlib/v/parser/expr.v index c4bdd1b58b..8e8a84fd54 100644 --- a/vlib/v/parser/expr.v +++ b/vlib/v/parser/expr.v @@ -361,8 +361,15 @@ pub fn (mut p Parser) expr_with_left(left ast.Expr, precedence int, is_stmt_iden return node } p.is_stmt_ident = is_stmt_ident - } else if p.tok.kind == .lsbr && (p.inside_fn || p.tok.line_nr == p.prev_tok.line_nr) { - node = p.index_expr(node) + } else if p.tok.kind in [.lsbr, .nilsbr] + && (p.inside_fn || p.tok.line_nr == p.prev_tok.line_nr) { + // node = p.index_expr(node) + if p.tok.kind == .nilsbr { + node = p.index_expr(node, true) + } else { + node = p.index_expr(node, false) + } + p.is_stmt_ident = is_stmt_ident if p.tok.kind == .lpar && p.tok.line_nr == p.prev_tok.line_nr && node is ast.IndexExpr { p.next() diff --git a/vlib/v/parser/parse_type.v b/vlib/v/parser/parse_type.v index 0ea2c20942..606492a858 100644 --- a/vlib/v/parser/parse_type.v +++ b/vlib/v/parser/parse_type.v @@ -8,8 +8,8 @@ import v.ast import v.util import v.token -pub fn (mut p Parser) parse_array_type() ast.Type { - p.check(.lsbr) +pub fn (mut p Parser) parse_array_type(expecting token.Kind) ast.Type { + p.check(expecting) // fixed array if p.tok.kind in [.number, .name] { mut fixed_size := 0 @@ -88,7 +88,7 @@ pub fn (mut p Parser) parse_array_type() ast.Type { mut nr_dims := 1 // detect attr not_attr := p.peek_tok.kind != .name && p.peek_token(2).kind !in [.semicolon, .rsbr] - for p.tok.kind == .lsbr && not_attr { + for p.tok.kind == expecting && not_attr { p.next() p.check(.rsbr) nr_dims++ @@ -408,9 +408,9 @@ pub fn (mut p Parser) parse_any_type(language ast.Language, is_ptr bool, check_d // func return p.parse_fn_type('') } - .lsbr { + .lsbr, .nilsbr { // array - return p.parse_array_type() + return p.parse_array_type(p.tok.kind) } .lpar { // multiple return diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index 3900309170..138825ad41 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -2354,7 +2354,7 @@ pub fn (mut p Parser) name_expr() ast.Expr { return node } -fn (mut p Parser) index_expr(left ast.Expr) ast.IndexExpr { +fn (mut p Parser) index_expr(left ast.Expr, is_gated bool) ast.IndexExpr { // left == `a` in `a[0]` start_pos := p.tok.position() p.next() // [ @@ -2379,7 +2379,9 @@ fn (mut p Parser) index_expr(left ast.Expr) ast.IndexExpr { high: high has_high: has_high pos: pos + is_gated: is_gated } + is_gated: is_gated } } expr := p.expr(0) // `[expr]` or `[expr..` @@ -2403,7 +2405,9 @@ fn (mut p Parser) index_expr(left ast.Expr) ast.IndexExpr { has_high: has_high has_low: has_low pos: pos + is_gated: is_gated } + is_gated: is_gated } } // [expr] @@ -2433,6 +2437,7 @@ fn (mut p Parser) index_expr(left ast.Expr) ast.IndexExpr { stmts: or_stmts pos: or_pos } + is_gated: is_gated } } // `a[i] ?` @@ -2451,6 +2456,7 @@ fn (mut p Parser) index_expr(left ast.Expr) ast.IndexExpr { stmts: or_stmts pos: or_pos } + is_gated: is_gated } } diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 1dae5ff435..39e281a16e 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -913,6 +913,12 @@ fn (mut s Scanner) text_scan() token.Token { return s.new_token(.dot, '', 1) } `#` { + // manage gated arrays/strings + if nextc == `[` { + s.pos++ + return s.new_token(.nilsbr, '', 2) + } + start := s.pos + 1 s.ignore_line() if nextc == `!` { diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v index 27a6a874b0..7aadb2bb9a 100644 --- a/vlib/v/token/token.v +++ b/vlib/v/token/token.v @@ -69,6 +69,7 @@ pub enum Kind { lpar // ( rpar // ) lsbr // [ + nilsbr // #[ rsbr // ] eq // == ne // != @@ -243,6 +244,7 @@ fn build_token_str() []string { s[Kind.lpar] = '(' s[Kind.rpar] = ')' s[Kind.lsbr] = '[' + s[Kind.nilsbr] = '#[' s[Kind.rsbr] = ']' s[Kind.eq] = '==' s[Kind.ne] = '!=' @@ -379,6 +381,7 @@ pub enum Precedence { pub fn build_precedences() []Precedence { mut p := []Precedence{len: int(Kind._end_)} p[Kind.lsbr] = .index + p[Kind.nilsbr] = .index p[Kind.dot] = .call // `++` | `--` | `?` p[Kind.inc] = .postfix