scanner: cleanup scanning of `>` (#11558)

pull/11565/head
Ruofan XU 2021-09-21 05:57:05 +08:00 committed by GitHub
parent d51f8ed878
commit 1430ebc5e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 24 additions and 41 deletions

View File

@ -919,56 +919,39 @@ fn (mut s Scanner) text_scan() token.Token {
return s.new_token(.ge, '', 2)
} else if nextc == `>` {
if s.pos + 2 < s.text.len {
// first eat the possible spaces eg `>> (` => `>>(`
mut non_space_pos := s.pos + 2
for non_space_pos < s.text.len && s.text[non_space_pos].is_space() {
non_space_pos++
}
match s.text[non_space_pos] {
`=` {
s.pos += 2
return s.new_token(.right_shift_assign, '', 3)
}
// definite generic cases such as Foo<Bar<int>>{}
`)`, `{`, `}`, `,`, `>`, `[`, `]` {
return s.new_token(.gt, '', 1)
}
// notice two-level generic call and shift-right share the rest patterns
if s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.right_shift_assign, '', 3)
} else if s.last_lt >= 0 && s.pos - s.last_lt < 100 {
// an algorithm to discriminate two-level generic call and shift-right patterns
// such as `foo<Baz, Bar<int>>(a)` vs `a, b := Foo{}<Foo{}, bar>>(baz)`
// which is hard but could be discriminated by my following algorithm
// @SleepyRoy if you have smarter algorithm :-)
else {
// almost correct heuristics: 2-level generic call's last <T> cannot be extremely long
// here we set the limit 100 which should be nice for real cases
if s.last_lt >= 0 && s.pos - s.last_lt < 100 {
// ...Bar<int, []Foo, [20]f64, map[string][]bool>> =>
// int, []Foo, [20]f64, map[string][]bool =>
// int, Foo, f64, bool
typs := s.text[s.last_lt + 1..s.pos].trim_right('>').split(',').map(it.trim_space().trim_right('>').after(']'))
// if any typ is neither builtin nor Type, then the case is not generics
for typ in typs {
if typ.len == 0 {
s.pos++
return s.new_token(.right_shift, '', 2)
}
if typ !in ast.builtin_type_names && !(typ[0].is_capital()
&& typ[1..].bytes().all(it.is_alnum())) {
s.pos++
return s.new_token(.right_shift, '', 2)
}
}
return s.new_token(.gt, '', 1)
// almost correct heuristics: 2-level generic call's last <T> cannot be extremely long
// here we set the limit 100 which should be nice for real cases
// ...Bar<int, []Foo, [20]f64, map[string][]bool>> =>
// int, []Foo, [20]f64, map[string][]bool =>
// int, Foo, f64, bool
typs := s.text[s.last_lt + 1..s.pos].split(',').map(it.trim_space().trim_right('>').after(']'))
// if any typ is neither Type nor builtin, then the case is shift-right
for typ in typs {
// TODO: combine two ifs once logic shortcut with `.all()` is fixed
if typ.len == 0 {
s.pos++
return s.new_token(.right_shift, '', 2)
}
if !(typ[0].is_capital() && typ[1..].bytes().all(it.is_alnum()))
&& typ !in ast.builtin_type_names {
s.pos++
return s.new_token(.right_shift, '', 2)
}
s.pos++
return s.new_token(.right_shift, '', 2)
}
return s.new_token(.gt, '', 1)
}
}
s.pos++
return s.new_token(.right_shift, '', 2)
} else {
return s.new_token(.gt, '', 1)
}
return s.new_token(.gt, '', 1)
}
`<` {
if nextc == `=` {