From 84fc9cec4ba2a704751453c8ab50d958d22addc6 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Tue, 29 Dec 2020 08:46:46 +0200 Subject: [PATCH] builtin: add string.split_by_whitespace() --- vlib/builtin/string.v | 31 +++++++++++++++++++++++++++++++ vlib/builtin/string_test.v | 7 +++++++ 2 files changed, 38 insertions(+) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index bcf5c1a78d..8cd02dd860 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -1651,3 +1651,34 @@ pub fn (s string) strip_margin_custom(del byte) string { return ret.vstring_with_len(count) } } + +// split_by_whitespace - extract only the non whitespace tokens/words from the given string `s`. +// example: ' sss ssss'.split_by_whitespace() => ['sss', 'ssss'] +pub fn (s string) split_by_whitespace() []string { + mut res := []string{} + mut word_start := 0 + mut word_end := 0 + mut is_in_word := false + mut is_space := false + for i, c in s { + is_space = c in [` `, `\t`, `\n`] + if !is_in_word && !is_space { + word_start = i + is_in_word = true + continue + } + if is_space && is_in_word { + word_end = i + res << s[word_start .. word_end] + is_in_word = false + word_end = 0 + word_start = 0 + continue + } + } + if is_in_word && word_start > 0 { + // collect the remainder word at the end + res << s[word_start .. s.len] + } + return res +} diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index 24eff12fd4..31a5a51a0a 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -902,3 +902,10 @@ fn test_sorter() { assert arr[2].s == 'ccc' assert arr[2].i == 102 } + +fn test_split_by_whitespace() { + assert 'a bcde'.split_by_whitespace() == ['a', 'bcde'] + assert ' sss \t ssss '.split_by_whitespace() == ['sss', 'ssss'] + assert '\n xyz \t abc def'.split_by_whitespace() == ['xyz', 'abc', 'def'] + assert ''.split_by_whitespace() == [] +}