v.scanner: fix ambiguity of two-level generics and shift-right (#11540)

pull/11550/head
Ruofan XU 2021-09-19 21:22:28 +08:00 committed by GitHub
parent 76f70d51f3
commit b343f19bec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 108 additions and 7 deletions

View File

@ -1274,6 +1274,13 @@ pub fn (c byte) is_letter() bool {
return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`)
}
// is_alnum returns `true` if the byte is in range a-z, A-Z, 0-9 and `false` otherwise.
// Example: assert byte(`V`) == true
[inline]
pub fn (c byte) is_alnum() bool {
return c.is_letter() || c.is_digit()
}
// free allows for manually freeing the memory occupied by the string
[manualfree; unsafe]
pub fn (s &string) free() {

View File

@ -1534,7 +1534,7 @@ pub fn (mut f Fmt) call_expr(node ast.CallExpr) {
f.comments(arg.comments)
}
if node.is_method {
if node.name in ['map', 'filter'] {
if node.name in ['map', 'filter', 'all', 'any'] {
f.inside_lambda = true
defer {
f.inside_lambda = false

View File

@ -11,6 +11,7 @@ import v.pref
import v.util
import v.vet
import v.errors
import v.ast
const (
single_quote = `'`
@ -36,6 +37,7 @@ pub mut:
is_inter_end bool
is_enclosed_inter bool
line_comment string
last_lt int = -1 // position of latest <
// prev_tok TokenKind
is_started bool
is_print_line_on_error bool
@ -917,12 +919,49 @@ fn (mut s Scanner) text_scan() token.Token {
return s.new_token(.ge, '', 2)
} else if nextc == `>` {
if s.pos + 2 < s.text.len {
if s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.right_shift_assign, '', 3)
} else if s.text[s.pos + 2] in [`(`, `)`, `{`, `>`, `,`] {
// multi-level generics such as Foo<Bar<baz>>{ }, func<Bar<baz>>( ), etc
return s.new_token(.gt, '', 1)
// first eat the possible spaces eg `>> (` => `>>(`
mut non_space_pos := s.pos + 2
for non_space_pos < s.text.len && s.text[non_space_pos].is_space() {
non_space_pos++
}
match s.text[non_space_pos] {
`=` {
s.pos += 2
return s.new_token(.right_shift_assign, '', 3)
}
// definite generic cases such as Foo<Bar<int>>{}
`)`, `{`, `}`, `,`, `>`, `[`, `]` {
return s.new_token(.gt, '', 1)
}
// notice two-level generic call and shift-right share the rest patterns
// such as `foo<Baz, Bar<int>>(a)` vs `a, b := Foo{}<Foo{}, bar>>(baz)`
// which is hard but could be discriminated by my following algorithm
// @SleepyRoy if you have smarter algorithm :-)
else {
// almost correct heuristics: 2-level generic call's last <T> cannot be extremely long
// here we set the limit 100 which should be nice for real cases
if s.last_lt >= 0 && s.pos - s.last_lt < 100 {
// ...Bar<int, []Foo, [20]f64, map[string][]bool>> =>
// int, []Foo, [20]f64, map[string][]bool =>
// int, Foo, f64, bool
typs := s.text[s.last_lt + 1..s.pos].trim_right('>').split(',').map(it.trim_space().trim_right('>').after(']'))
// if any typ is neither builtin nor Type, then the case is not generics
for typ in typs {
if typ.len == 0 {
s.pos++
return s.new_token(.right_shift, '', 2)
}
if typ !in ast.builtin_type_names && !(typ[0].is_capital()
&& typ[1..].bytes().all(it.is_alnum())) {
s.pos++
return s.new_token(.right_shift, '', 2)
}
}
return s.new_token(.gt, '', 1)
}
s.pos++
return s.new_token(.right_shift, '', 2)
}
}
}
s.pos++
@ -946,6 +985,7 @@ fn (mut s Scanner) text_scan() token.Token {
s.pos++
return s.new_token(.arrow, '', 2)
} else {
s.last_lt = s.pos
return s.new_token(.lt, '', 1)
}
}

View File

@ -510,6 +510,21 @@ fn test_multi_level_generics() {
two) == 20
}
struct Empty {}
fn (e1 Empty) < (e2 Empty) bool {
return true
}
struct TandU<T, U> {
t T
u U
}
fn boring_function<T>(t T) bool {
return true
}
fn test_generic_detection() {
v1, v2 := -1, 1
@ -530,4 +545,43 @@ fn test_generic_detection() {
assert multi_generic_args<[]int, int>([]int{}, 0)
assert multi_generic_args<map[int]int, int>(map[int]int{}, 0)
assert 0 < return_one<int>(10, 0)
// "the hardest cases"
foo, bar, baz := 1, 2, 16
res1, res2 := foo < bar, baz >> (foo + 1 - 1)
assert res1
assert res2 == 8
res3, res4 := Empty{} < Empty{}, baz >> (foo + 1 - 1)
assert res3
assert res4 == 8
assert boring_function<TandU<Empty, int>>(TandU<Empty, int>{
t: Empty{}
u: 10
})
assert boring_function<MultiLevel<MultiLevel<int>>>(MultiLevel<MultiLevel<int>>{
foo: MultiLevel<int>{
foo: 10
}
})
assert boring_function<TandU<MultiLevel<int>, []int>>(TandU<MultiLevel<int>, []int>{
t: MultiLevel<int>{
foo: 10
}
u: [10]
})
// this final case challenges your scanner :-)
assert boring_function<TandU<TandU<int,MultiLevel<Empty>>, map[string][]int>>(TandU<TandU<int,MultiLevel<Empty>>, map[string][]int>{
t: TandU<int,MultiLevel<Empty>>{
t: 20
u: MultiLevel<Empty>{
foo: Empty{}
}
}
u: {
'bar': [40]
}
})
}