builtin: add string.split_by_whitespace()

2020-12-29 08:46:46 +02:00 · 2020-12-29 08:46:46 +02:00 · 84fc9cec4b
parent b87283e970
commit 84fc9cec4b
2 changed files with 38 additions and 0 deletions
--- a/vlib/builtin/string.v
+++ b/vlib/builtin/string.v
@ -1651,3 +1651,34 @@ pub fn (s string) strip_margin_custom(del byte) string {
 		return ret.vstring_with_len(count)
 	}
 }
+
+// split_by_whitespace - extract only the non whitespace tokens/words from the given string `s`.
+// example: '  sss   ssss'.split_by_whitespace() => ['sss', 'ssss']
+pub fn (s string) split_by_whitespace() []string {
+	mut res := []string{}
+	mut word_start := 0
+	mut word_end := 0
+	mut is_in_word := false
+	mut is_space := false
+	for i, c in s {
+		is_space = c in [` `, `\t`, `\n`]
+		if !is_in_word && !is_space {
+			word_start = i
+			is_in_word = true
+			continue
+		}
+		if is_space && is_in_word {
+			word_end = i
+			res << s[word_start .. word_end]
+			is_in_word = false
+			word_end = 0
+			word_start = 0
+			continue
+		}
+	}
+	if is_in_word && word_start > 0 {
+		// collect the remainder word at the end
+		res << s[word_start .. s.len]
+	}
+	return res
+}
--- a/vlib/builtin/string_test.v
+++ b/vlib/builtin/string_test.v
@ -902,3 +902,10 @@ fn test_sorter() {
 	assert arr[2].s == 'ccc'
 	assert arr[2].i == 102
 }
+
+fn test_split_by_whitespace() {
+	assert 'a bcde'.split_by_whitespace() == ['a', 'bcde']
+	assert '  sss \t  ssss '.split_by_whitespace() == ['sss', 'ssss']
+	assert '\n xyz \t abc   def'.split_by_whitespace() == ['xyz', 'abc', 'def']
+	assert ''.split_by_whitespace() == []
+}