2020-02-03 05:00:36 +01:00
|
|
|
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
|
2019-06-23 04:21:30 +02:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
|
|
|
|
2019-06-22 20:20:28 +02:00
|
|
|
import os
|
|
|
|
|
|
|
|
fn main() {
|
2019-06-28 15:24:46 +02:00
|
|
|
mut path := 'cinderella.txt'
|
2019-06-22 20:20:28 +02:00
|
|
|
if os.args.len != 2 {
|
|
|
|
println('usage: word_counter [text_file]')
|
|
|
|
println('using $path')
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
path = os.args[1]
|
|
|
|
}
|
2019-06-26 18:01:31 +02:00
|
|
|
contents := os.read_file(path.trim_space()) or {
|
|
|
|
println('failed to open $path')
|
|
|
|
return
|
|
|
|
}
|
2019-08-17 01:55:11 +02:00
|
|
|
mut m := map[string]int
|
2019-10-16 01:52:37 +02:00
|
|
|
for word in extract_words(contents) {
|
2020-10-18 22:46:13 +02:00
|
|
|
m[word]++
|
2019-06-22 20:20:28 +02:00
|
|
|
}
|
|
|
|
// Sort the keys
|
2020-04-26 13:49:31 +02:00
|
|
|
mut keys := m.keys()
|
2019-06-22 20:20:28 +02:00
|
|
|
keys.sort()
|
|
|
|
// Print the map
|
|
|
|
for key in keys {
|
|
|
|
val := m[key]
|
|
|
|
println('$key => $val')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-16 01:52:37 +02:00
|
|
|
// Creates an array of words from a given string
|
|
|
|
fn extract_words(contents string) []string {
|
2020-04-26 13:49:31 +02:00
|
|
|
mut splitted := []string{}
|
2019-10-16 01:52:37 +02:00
|
|
|
for space_splitted in contents.to_lower().split(' ') {
|
|
|
|
if space_splitted.contains('\n') {
|
|
|
|
splitted << space_splitted.split('\n')
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
splitted << space_splitted
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-26 13:49:31 +02:00
|
|
|
mut results := []string{}
|
2019-10-16 01:52:37 +02:00
|
|
|
for s in splitted {
|
|
|
|
result := filter_word(s)
|
|
|
|
if result == '' {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
results << result
|
|
|
|
}
|
|
|
|
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
2019-06-22 20:20:28 +02:00
|
|
|
// Removes punctuation
|
|
|
|
fn filter_word(word string) string {
|
|
|
|
if word == '' || word == ' ' {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
mut i := 0
|
2019-10-22 07:00:28 +02:00
|
|
|
for i < word.len && !word[i].is_letter() {
|
2019-06-22 20:20:28 +02:00
|
|
|
i++
|
|
|
|
}
|
|
|
|
start := i
|
2019-10-22 07:00:28 +02:00
|
|
|
for i < word.len && word[i].is_letter() {
|
2019-06-22 20:20:28 +02:00
|
|
|
i++
|
|
|
|
}
|
|
|
|
end := i
|
2019-10-27 08:03:15 +01:00
|
|
|
return word[start..end]
|
2019-06-22 20:20:28 +02:00
|
|
|
}
|