diff --git a/doc/docs.md b/doc/docs.md index 631f3fe518..33aa6ac989 100644 --- a/doc/docs.md +++ b/doc/docs.md @@ -505,6 +505,18 @@ s := '42' n := s.int() // 42 ``` +### Runes +A `rune` represents a unicode character and is an alias for `u32`. Runes can be created like this: +```v +x := `🚀` +``` + +A string can be converted to runes by the `.runes()` method. +```v +hello := 'Hello World 👋' +hello_runes := hello.runes() // [`H`, `e`, `l`, `l`, `o`, ` `, `W`, `o`, `r`, `l`, `d`, ` `, `👋`] +``` + ### String interpolation Basic interpolation syntax is pretty simple - use `$` before a variable name. diff --git a/vlib/builtin/rune.v b/vlib/builtin/rune.v index 7e7cc65ff1..f756452696 100644 --- a/vlib/builtin/rune.v +++ b/vlib/builtin/rune.v @@ -28,6 +28,15 @@ pub fn (c rune) str() string { */ } +// string converts a rune array to a string +pub fn (ra []rune) string() string { + mut res := '' + for r in ra { + res += r.str() + } + return res +} + // Define this on byte as well, so that we can do `s[0].is_capital()` pub fn (c byte) is_capital() bool { return c >= `A` && c <= `Z` @@ -42,7 +51,7 @@ pub fn (b []byte) clone() []byte { return res } -// TODO remove this once runes are implemented +// TODO: remove this once runes are implemented pub fn (b []byte) bytestr() string { unsafe { buf := malloc_noscan(b.len + 1) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index fecdd6eff8..73f81644ca 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -73,7 +73,8 @@ pub fn (s string) runes() []rune { for i := 0; i < s.len; i++ { char_len := utf8_char_len(unsafe { s.str[i] }) if char_len > 1 { - mut r := unsafe { s[i..i + char_len] } + end := if s.len - 1 >= i + char_len { i + char_len } else { s.len } + mut r := unsafe { s[i..end] } runes << r.utf32_code() i += char_len - 1 } else { diff --git a/vlib/os/notify/notify_test.v b/vlib/os/notify/notify_test.v index c2cddaaecc..253c94fd6e 100644 --- a/vlib/os/notify/notify_test.v +++ b/vlib/os/notify/notify_test.v @@ -1,6 +1,5 @@ import os import os.notify -import time // make a pipe and return the (read, write) file descriptors fn make_pipe() ?(int, int) { diff --git a/vlib/os/os.v b/vlib/os/os.v index 587fba6579..1a2b9afabf 100644 --- a/vlib/os/os.v +++ b/vlib/os/os.v @@ -90,18 +90,6 @@ pub fn read_lines(path string) ?[]string { return res } -// read_ulines reads the file in `path` into an array of ustring lines. -fn read_ulines(path string) ?[]ustring { - lines := read_lines(path) ? - // mut ulines := new_array(0, lines.len, sizeof(ustring)) - mut ulines := []ustring{} - for myline in lines { - // ulines[i] = ustr - ulines << myline.ustring() - } - return ulines -} - // sigint_to_signal_name will translate `si` signal integer code to it's string code representation. pub fn sigint_to_signal_name(si int) string { // POSIX signals: diff --git a/vlib/readline/readline.v b/vlib/readline/readline.v index 2e22a38208..4f0ebf7c40 100644 --- a/vlib/readline/readline.v +++ b/vlib/readline/readline.v @@ -33,13 +33,13 @@ pub struct Readline { mut: is_raw bool orig_termios Termios // Linux - current ustring // Line being edited + current []rune // Line being edited cursor int // Cursor position overwrite bool cursor_row_offset int prompt string prompt_offset int - previous_lines []ustring + previous_lines [][]rune search_index int is_tty bool } diff --git a/vlib/readline/readline_default.c.v b/vlib/readline/readline_default.c.v index e78c0be3d4..fcbbbb6eab 100644 --- a/vlib/readline/readline_default.c.v +++ b/vlib/readline/readline_default.c.v @@ -17,58 +17,58 @@ import os // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `ustring` or +// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. -pub fn (mut r Readline) read_line_utf8(prompt string) ?ustring { - r.current = ''.ustring() +pub fn (mut r Readline) read_line_utf8(prompt string) ?[]rune { + r.current = []rune{} r.cursor = 0 r.prompt = prompt r.search_index = 0 if r.previous_lines.len <= 1 { - r.previous_lines << ''.ustring() - r.previous_lines << ''.ustring() + r.previous_lines << []rune{} + r.previous_lines << []rune{} } else { - r.previous_lines[0] = ''.ustring() + r.previous_lines[0] = []rune{} } print(r.prompt) line := os.get_raw_line() if line.len >= 0 { - r.current = line.ustring() + r.current = line.runes() } - r.previous_lines[0] = ''.ustring() + r.previous_lines[0] = []rune{} r.search_index = 0 - if r.current.s == '' { + if r.current.len == 0 { return error('empty line') } return r.current } // read_line does the same as `read_line_utf8` but returns user input as a `string`. -// (As opposed to `ustring` returned by `read_line_utf8`). +// (As opposed to `[]rune` returned by `read_line_utf8`). pub fn (mut r Readline) read_line(prompt string) ?string { s := r.read_line_utf8(prompt) ? - return s.s + return s.string() } // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `ustring` or +// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. // NOTE that this version of `read_line_utf8` is a standalone function without // persistent functionalities (e.g. history). -pub fn read_line_utf8(prompt string) ?ustring { +pub fn read_line_utf8(prompt string) ?[]rune { mut r := Readline{} s := r.read_line_utf8(prompt) ? return s } // read_line does the same as `read_line_utf8` but returns user input as a `string`. -// (As opposed to `ustring` as returned by `read_line_utf8`). +// (As opposed to `[]rune` as returned by `read_line_utf8`). // NOTE that this version of `read_line` is a standalone function without // persistent functionalities (e.g. history). pub fn read_line(prompt string) ?string { diff --git a/vlib/readline/readline_linux.c.v b/vlib/readline/readline_linux.c.v index d52ac6469f..f96b613cad 100644 --- a/vlib/readline/readline_linux.c.v +++ b/vlib/readline/readline_linux.c.v @@ -101,21 +101,21 @@ pub fn (r Readline) read_char() int { // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `ustring` or +// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. -pub fn (mut r Readline) read_line_utf8(prompt string) ?ustring { - r.current = ''.ustring() +pub fn (mut r Readline) read_line_utf8(prompt string) ?[]rune { + r.current = []rune{} r.cursor = 0 r.prompt = prompt r.search_index = 0 r.prompt_offset = get_prompt_offset(prompt) if r.previous_lines.len <= 1 { - r.previous_lines << ''.ustring() - r.previous_lines << ''.ustring() + r.previous_lines << []rune{} + r.previous_lines << []rune{} } else { - r.previous_lines[0] = ''.ustring() + r.previous_lines[0] = []rune{} } if !r.is_raw { r.enable_raw_mode() @@ -129,39 +129,39 @@ pub fn (mut r Readline) read_line_utf8(prompt string) ?ustring { break } } - r.previous_lines[0] = ''.ustring() + r.previous_lines[0] = []rune{} r.search_index = 0 r.disable_raw_mode() - if r.current.s == '' { + if r.current.len == 0 { return error('empty line') } return r.current } // read_line does the same as `read_line_utf8` but returns user input as a `string`. -// (As opposed to `ustring` returned by `read_line_utf8`). +// (As opposed to `[]rune` returned by `read_line_utf8`). pub fn (mut r Readline) read_line(prompt string) ?string { s := r.read_line_utf8(prompt) ? - return s.s + return s.string() } // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `ustring` or +// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. // NOTE that this version of `read_line_utf8` is a standalone function without // persistent functionalities (e.g. history). -pub fn read_line_utf8(prompt string) ?ustring { +pub fn read_line_utf8(prompt string) ?[]rune { mut r := Readline{} s := r.read_line_utf8(prompt) ? return s } // read_line does the same as `read_line_utf8` but returns user input as a `string`. -// (As opposed to `ustring` as returned by `read_line_utf8`). +// (As opposed to `[]rune` as returned by `read_line_utf8`). // NOTE that this version of `read_line` is a standalone function without // persistent functionalities (e.g. history). pub fn read_line(prompt string) ?string { @@ -373,14 +373,14 @@ fn (mut r Readline) refresh_line() { mut end_of_input := [0, 0] end_of_input = calculate_screen_position(r.prompt.len, 0, get_screen_columns(), r.current.len, end_of_input) - end_of_input[1] += r.current.count('\n'.ustring()) + end_of_input[1] += r.current.filter(it == `\n`).len mut cursor_pos := [0, 0] cursor_pos = calculate_screen_position(r.prompt.len, 0, get_screen_columns(), r.cursor, cursor_pos) shift_cursor(0, -r.cursor_row_offset) term.erase_toend() print(r.prompt) - print(r.current) + print(r.current.string()) if end_of_input[0] == 0 && end_of_input[1] > 0 { print('\n') } @@ -401,11 +401,9 @@ fn (mut r Readline) eof() bool { // insert_character inserts the character `c` at current cursor position. fn (mut r Readline) insert_character(c int) { if !r.overwrite || r.cursor == r.current.len { - r.current = r.current.left(r.cursor).ustring() + utf32_to_str(u32(c)).ustring() + - r.current.right(r.cursor).ustring() + r.current.insert(r.cursor, c) } else { - r.current = r.current.left(r.cursor).ustring() + utf32_to_str(u32(c)).ustring() + - r.current.right(r.cursor + 1).ustring() + r.current[r.cursor] = rune(c) } r.cursor++ // Refresh the line to add the new character @@ -420,7 +418,7 @@ fn (mut r Readline) delete_character() { return } r.cursor-- - r.current = r.current.left(r.cursor).ustring() + r.current.right(r.cursor + 1).ustring() + r.current.delete(r.cursor) r.refresh_line() } @@ -429,15 +427,14 @@ fn (mut r Readline) suppr_character() { if r.cursor > r.current.len { return } - r.current = r.current.left(r.cursor).ustring() + r.current.right(r.cursor + 1).ustring() + r.current.delete(r.cursor) r.refresh_line() } // commit_line adds a line break and then stops the main loop. fn (mut r Readline) commit_line() bool { r.previous_lines.insert(1, r.current) - a := '\n'.ustring() - r.current += a + r.current << `\n` r.cursor = r.current.len if r.is_tty { r.refresh_line() @@ -483,9 +480,9 @@ fn (r Readline) is_break_character(c string) bool { // move_cursor_word_left moves the cursor relative one word length worth to the left. fn (mut r Readline) move_cursor_word_left() { if r.cursor > 0 { - for ; r.cursor > 0 && r.is_break_character(r.current.at(r.cursor - 1)); r.cursor-- { + for ; r.cursor > 0 && r.is_break_character(r.current[r.cursor - 1].str()); r.cursor-- { } - for ; r.cursor > 0 && !r.is_break_character(r.current.at(r.cursor - 1)); r.cursor-- { + for ; r.cursor > 0 && !r.is_break_character(r.current[r.cursor - 1].str()); r.cursor-- { } r.refresh_line() } @@ -494,9 +491,9 @@ fn (mut r Readline) move_cursor_word_left() { // move_cursor_word_right moves the cursor relative one word length worth to the right. fn (mut r Readline) move_cursor_word_right() { if r.cursor < r.current.len { - for ; r.cursor < r.current.len && r.is_break_character(r.current.at(r.cursor)); r.cursor++ { + for ; r.cursor < r.current.len && r.is_break_character(r.current[r.cursor].str()); r.cursor++ { } - for ; r.cursor < r.current.len && !r.is_break_character(r.current.at(r.cursor)); r.cursor++ { + for ; r.cursor < r.current.len && !r.is_break_character(r.current[r.cursor].str()); r.cursor++ { } r.refresh_line() } diff --git a/vlib/readline/readline_windows.c.v b/vlib/readline/readline_windows.c.v index 68fbc8aa23..887efdb912 100644 --- a/vlib/readline/readline_windows.c.v +++ b/vlib/readline/readline_windows.c.v @@ -16,55 +16,55 @@ import os // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `ustring` or +// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. -pub fn (mut r Readline) read_line_utf8(prompt string) ?ustring { - r.current = ''.ustring() +pub fn (mut r Readline) read_line_utf8(prompt string) ?[]rune { + r.current = []rune{} r.cursor = 0 r.prompt = prompt r.search_index = 0 if r.previous_lines.len <= 1 { - r.previous_lines << ''.ustring() - r.previous_lines << ''.ustring() + r.previous_lines << []rune{} + r.previous_lines << []rune{} } else { - r.previous_lines[0] = ''.ustring() + r.previous_lines[0] = []rune{} } print(r.prompt) - r.current = os.get_raw_line().ustring() - r.previous_lines[0] = ''.ustring() + r.current = os.get_raw_line().runes() + r.previous_lines[0] = []rune{} r.search_index = 0 - if r.current.s == '' { + if r.current.len == 0 { return error('empty line') } return r.current } // read_line does the same as `read_line_utf8` but returns user input as a `string`. -// (As opposed to `ustring` returned by `read_line_utf8`). +// (As opposed to `[]rune` returned by `read_line_utf8`). pub fn (mut r Readline) read_line(prompt string) ?string { s := r.read_line_utf8(prompt) ? - return s.s + return s.string() } // read_line_utf8 blocks execution in a loop and awaits user input // characters from a terminal until `EOF` or `Enter` key is encountered // in the input stream. -// read_line_utf8 returns the complete input line as an UTF-8 encoded `ustring` or +// read_line_utf8 returns the complete input line as an UTF-8 encoded `[]rune` or // an error if the line is empty. // The `prompt` `string` is output as a prefix text for the input capturing. // read_line_utf8 is the main method of the `readline` module and `Readline` struct. // NOTE that this version of `read_line_utf8` is a standalone function without // persistent functionalities (e.g. history). -pub fn read_line_utf8(prompt string) ?ustring { +pub fn read_line_utf8(prompt string) ?[]rune { mut r := Readline{} s := r.read_line_utf8(prompt) ? return s } // read_line does the same as `read_line_utf8` but returns user input as a `string`. -// (As opposed to `ustring` as returned by `read_line_utf8`). +// (As opposed to `[]rune` as returned by `read_line_utf8`). // NOTE that this version of `read_line` is a standalone function without // persistent functionalities (e.g. history). pub fn read_line(prompt string) ?string { diff --git a/vlib/v/fmt/tests/structs_expected.vv b/vlib/v/fmt/tests/structs_expected.vv index ee5b9c36d9..ca3fb0cab9 100644 --- a/vlib/v/fmt/tests/structs_expected.vv +++ b/vlib/v/fmt/tests/structs_expected.vv @@ -1,6 +1,6 @@ struct User { - name string // name - name2 ustring // name2 + name string // name + name2 []rune // name2 very_long_field bool age int // age very_long_type_field1 very_looooog_type // long diff --git a/vlib/v/fmt/tests/structs_input.vv b/vlib/v/fmt/tests/structs_input.vv index dd74dfc4f4..c17259a310 100644 --- a/vlib/v/fmt/tests/structs_input.vv +++ b/vlib/v/fmt/tests/structs_input.vv @@ -1,6 +1,6 @@ struct User { name string // name - name2 ustring // name2 + name2 []rune // name2 very_long_field bool age int // age very_long_type_field1 very_looooog_type // long diff --git a/vlib/v/gen/c/cgen.v b/vlib/v/gen/c/cgen.v index bf630b426a..9f4605d74a 100644 --- a/vlib/v/gen/c/cgen.v +++ b/vlib/v/gen/c/cgen.v @@ -3149,7 +3149,7 @@ fn (mut g Gen) expr(node ast.Expr) { } else { // TODO: optimize use L-char instead of u32 when possible if utf8_str_len(node.val) < node.val.len { - g.write('((u32)0x$node.val.utf32_code().hex())') + g.write('((rune)0x$node.val.utf32_code().hex() /* `$node.val` */)') } else { g.write("'$node.val'") } diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 96dd6b14ed..1314adb908 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -1270,7 +1270,7 @@ fn (mut s Scanner) ident_char() string { len-- c := s.text[start + 1..s.pos] if len != 1 { - u := c.ustring() + u := c.runes() if u.len != 1 { s.error('invalid character literal (more than one character)\n' + 'use quotes for strings, backticks for characters') diff --git a/vlib/v/tests/sumtype_equality_test.v b/vlib/v/tests/sumtype_equality_test.v index beac8dae99..27f37b59f5 100644 --- a/vlib/v/tests/sumtype_equality_test.v +++ b/vlib/v/tests/sumtype_equality_test.v @@ -1,4 +1,4 @@ -type Str = string | ustring +type Str = rune | string struct Foo { v int @@ -13,8 +13,8 @@ type FooBar = Bar | Foo fn test_sumtype_equality() { s1 := Str('s') s2 := Str('s2') - u1 := Str('s1'.ustring()) - u2 := Str('s2'.ustring()) + u1 := Str(`A`) + u2 := Str(`B`) assert s1 == s1 assert u1 == u1 assert s1 != s2