compiler: detect typos in function/variable/module names

pull/1955/head 0.1.19
joe-conigliaro 2019-09-13 21:10:24 +10:00 committed by Alexander Medvednikov
parent 5055ac4b23
commit 41734affb3
4 changed files with 147 additions and 0 deletions

View File

@ -1023,3 +1023,23 @@ fn (f &Fn) str_args(table &Table) string {
} }
return s return s
} }
// find local function variable with closest name to `name`
fn (f &Fn) find_misspelled_local_var(name string, min_match f64) string {
mut closest := f64(0)
mut closest_var := ''
for var in f.local_vars {
n := '${f.mod}.$var.name'
if var.name == '' || !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
p := strings.dice_coefficient(name, n)
println(' ## $name - $n: $p')
if p > closest {
closest = p
closest_var = n
}
}
if closest >= min_match {
return closest_var
}
return ''
}

View File

@ -1648,6 +1648,11 @@ fn (p mut Parser) name_expr() string {
f = p.table.find_fn(name) f = p.table.find_fn(name)
} }
if f.name == '' { if f.name == '' {
// check for misspelled function / variable / module
suggested := p.table.identify_typo(name, p.cur_fn, p.import_table)
if suggested != '' {
p.error('undefined: `$name`. did you mean:$suggested')
}
// If orig_name is a mod, then printing undefined: `mod` tells us nothing // If orig_name is a mod, then printing undefined: `mod` tells us nothing
// if p.table.known_mod(orig_name) { // if p.table.known_mod(orig_name) {
if p.table.known_mod(orig_name) || p.import_table.known_alias(orig_name) { if p.table.known_mod(orig_name) || p.import_table.known_alias(orig_name) {

View File

@ -926,3 +926,66 @@ fn (t &Type) contains_field_type(typ string) bool {
} }
return false return false
} }
// check for a function / variable / module typo in `name`
fn (table &Table) identify_typo(name string, current_fn &Fn, fit &FileImportTable) string {
// dont check if so short
if name.len < 2 { return '' }
min_match := 0.8 // for dice coefficient between 0.0 - 1.0
name_orig := name.replace('__', '.').replace('_dot_', '.')
mut output := ''
// check functions
mut n := table.find_misspelled_fn(name_orig, min_match)
if n != '' {
output += '\n * function: `$n`'
}
// check function local variables
n = current_fn.find_misspelled_local_var(name_orig, min_match)
if n != '' {
output += '\n * variable: `$n`'
}
// check imported modules
n = table.find_misspelled_imported_mod(name_orig, fit, min_match)
if n != '' {
output += '\n * module: `$n`'
}
return output
}
// find function with closest name to `name`
fn (table &Table) find_misspelled_fn(name string, min_match f64) string {
mut closest := f64(0)
mut closest_fn := ''
for _, f in table.fns {
n := '${f.mod}.$f.name'
if !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
p := strings.dice_coefficient(name, n)
if p > closest {
closest = p
closest_fn = n
}
}
if closest >= min_match {
return closest_fn
}
return ''
}
// find imported module with closest name to `name`
fn (table &Table) find_misspelled_imported_mod(name string, fit &FileImportTable, min_match f64) string {
mut closest := f64(0)
mut closest_mod := ''
for alias, mod in fit.imports {
n := '${fit.module_name}.$alias'
if !name.starts_with(fit.module_name) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
p := strings.dice_coefficient(name, n)
if p > closest {
closest = p
closest_mod = '$alias ($mod)'
}
}
if closest >= min_match {
return closest_mod
}
return ''
}

View File

@ -0,0 +1,59 @@
module strings
// use levenshtein distance algorithm to calculate
// the distance between between two strings (lower is closer)
pub fn levenshtein_distance(a, b string) int {
mut f := [int(0); b.len+1]
for ca in a {
mut j := 1
mut fj1 := f[0]
f[0]++
for cb in b {
mut mn := if f[j]+1 <= f[j-1]+1 { f[j]+1 } else { f[j-1]+1 }
if cb != ca {
mn = if mn <= fj1+1 { mn } else { fj1+1 }
} else {
mn = if mn <= fj1 { mn } else { fj1 }
}
fj1 = f[j]
f[j] = mn
j++
}
}
return f[f.len-1]
}
// use levenshtein distance algorithm to calculate
// how similar two strings are as a percentage (higher is closer)
pub fn levenshtein_distance_percentage(a, b string) f64 {
d := levenshtein_distance(a, b)
l := if a.len >= b.len { a.len } else { b.len }
return (1.00 - f64(d)/f64(l)) * 100.00
}
// implementation of SørensenDice coefficient.
// find the similarity between two strings.
// returns f64 between 0.0 (not similar) and 1.0 (exact match).
pub fn dice_coefficient(s1, s2 string) f64 {
if s1.len == 0 || s2.len == 0 { return 0.0 }
if s1 == s2 { return 1.0 }
if s1.len < 2 || s2.len < 2 { return 0.0 }
mut first_bigrams := map[string]int
for i := 0; i < s1.len-1; i++ {
a := s1[i]
b := s1[i+1]
bigram := (a+b).str()
first_bigrams[bigram] = if bigram in first_bigrams { first_bigrams[bigram]+1 } else { 1 }
}
mut intersection_size := 0
for i := 0; i < s2.len-1; i++ {
a := s2[i]
b := s2[i+1]
bigram := (a+b).str()
count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 }
if count > 0 {
intersection_size++
}
}
return (2.0 * intersection_size) / (f64(s1.len) + f64(s2.len) - 2)
}