From 3e005074a34e3bf8c26a0aa1c16b5e95c375468e Mon Sep 17 00:00:00 2001 From: vitalyster Date: Sun, 21 Jul 2019 14:29:32 +0300 Subject: [PATCH] Windows Unicode I/O --- compiler/main.v | 15 +++- vlib/builtin/builtin.v | 12 ++- vlib/builtin/utf8.v | 40 +++++++++ vlib/math/math.v | 1 + vlib/os/os.v | 190 ++++++++++++++++++++++++++++------------- vlib/os/os_win.v | 11 +-- 6 files changed, 199 insertions(+), 70 deletions(-) diff --git a/compiler/main.v b/compiler/main.v index 3cb30f7a39..e3a7fb5176 100644 --- a/compiler/main.v +++ b/compiler/main.v @@ -200,8 +200,12 @@ fn (v mut V) compile() { #ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN #include -//#include +#include // _waccess +#include // _O_U8TEXT +#include // _wgetcwd +//#include #endif //================================== TYPEDEFS ================================*/ @@ -360,7 +364,7 @@ string _STR_TMP(const char *fmt, ...) { // It can be skipped in single file programs if v.pref.is_script { //println('Generating main()...') - cgen.genln('int main() { init_consts(); $cgen.fn_main; return 0; }') + cgen.genln('int main() { \n#ifdef _WIN32\n _setmode(_fileno(stdout), _O_U8TEXT); \n#endif\n init_consts(); $cgen.fn_main; return 0; }') } else { println('panic: function `main` is undeclared in the main module') @@ -369,7 +373,7 @@ string _STR_TMP(const char *fmt, ...) { } // Generate `main` which calls every single test function else if v.pref.is_test { - cgen.genln('int main() { init_consts();') + cgen.genln('int main() { \n#ifdef _WIN32\n _setmode(_fileno(stdout), _O_U8TEXT); \n#endif\n init_consts();') for key, f in v.table.fns { if f.name.starts_with('test_') { cgen.genln('$f.name();') @@ -500,7 +504,7 @@ fn (c &V) cc_windows_cross() { obj_name = obj_name.replace('.exe', '') obj_name = obj_name.replace('.o.o', '.o') mut include := '-I $winroot/include ' - cmd := 'clang -o $obj_name -w $include -m32 -c -target x86_64-win32 $ModPath/$c.out_name_c' + cmd := 'clang -o $obj_name -w $include -DUNICODE -D_UNICODE -m32 -c -target x86_64-win32 $ModPath/$c.out_name_c' if c.pref.show_c_cmd { println(cmd) } @@ -631,6 +635,9 @@ mut args := '' a << ' -ldl ' } } + if v.os == .windows { + a << '-DUNICODE -D_UNICODE' + } // Find clang executable //fast_clang := '/usr/local/Cellar/llvm/8.0.0/bin/clang' args := a.join(' ') diff --git a/vlib/builtin/builtin.v b/vlib/builtin/builtin.v index 342c886331..7925e7785d 100644 --- a/vlib/builtin/builtin.v +++ b/vlib/builtin/builtin.v @@ -37,7 +37,11 @@ pub fn println(s string) { if isnil(s.str) { panic('println(NIL)') } - C.printf('%.*s\n', s.len, s.str) + $if windows { + C._putws(s.to_wide()) + } $else { + C.printf('%.*s\n', s.len, s.str) + } } pub fn eprintln(s string) { @@ -54,7 +58,11 @@ pub fn eprintln(s string) { } pub fn print(s string) { - C.printf('%.*s', s.len, s.str) + $if windows { + C.wprintf(s.to_wide()) + } $else { + C.printf('%.*s', s.len, s.str) + } } __global total_m i64 = 0 diff --git a/vlib/builtin/utf8.v b/vlib/builtin/utf8.v index b87d328ff1..520be56091 100644 --- a/vlib/builtin/utf8.v +++ b/vlib/builtin/utf8.v @@ -91,3 +91,43 @@ pub fn (_rune string) utf32_code() int { return res } +const ( + CP_UTF8 = 65001 + ) + +pub fn (_str string) to_wide() &u16 { + $if windows { + num_chars := int(C.MultiByteToWideChar(CP_UTF8, 0, _str.str, _str.len, 0, 0)) + mut wstr := &u16(malloc((num_chars + 1) * 2)) // sizeof(wchar_t) + if wstr > 0 { + C.MultiByteToWideChar(CP_UTF8, 0, _str.str, _str.len, wstr, num_chars) + C.memset(&byte(wstr) + num_chars * 2, 0, 2) + } + return wstr + } $else { + return 0 + } +} + +pub fn string_from_wide(_wstr &u16) string { + $if windows { + wstr_len := int(C.wcslen(_wstr)) + return string_from_wide2(_wstr, wstr_len) + } $else { + return '' + } +} + +pub fn string_from_wide2(_wstr &u16, len int) string { + $if windows { + num_chars := int(C.WideCharToMultiByte(CP_UTF8, 0, _wstr, len, 0, 0, 0, 0)) + mut str_to := &byte(malloc(num_chars + 1)) + if str_to > 0 { + C.WideCharToMultiByte(CP_UTF8, 0, _wstr, len, str_to, num_chars, 0, 0) + C.memset(&byte(str_to) + num_chars, 0, 1) + } + return tos2(str_to) + } $else { + return '' + } +} diff --git a/vlib/math/math.v b/vlib/math/math.v index 0bd03e8974..1d15f736bb 100644 --- a/vlib/math/math.v +++ b/vlib/math/math.v @@ -4,6 +4,7 @@ module math +#include // NOTE // When adding a new function, please make sure it's in the right place. diff --git a/vlib/os/os.v b/vlib/os/os.v index 0523c0bbd2..06e5913c96 100644 --- a/vlib/os/os.v +++ b/vlib/os/os.v @@ -33,9 +33,10 @@ const ( import const ( FILE_ATTRIBUTE_DIRECTORY INVALID_FILE_ATTRIBUTES -) +) -struct FILE { +struct C.FILE { + } struct File { @@ -105,9 +106,14 @@ fn parse_windows_cmd_line(cmd byteptr) []string { // read_file reads the file in `path` and returns the contents. pub fn read_file(path string) ?string { - mut mode := 'rb' - cpath := path.cstr() - fp := C.fopen(cpath, mode.cstr()) + mode := 'rb' + mut fp := &C.FILE{} + $if windows { + fp = C._wfopen(path.to_wide(), mode.to_wide()) + } $else { + cpath := path.cstr() + fp = C.fopen(cpath, mode.cstr()) + } if isnil(fp) { return error('failed to open file "$path"') } @@ -124,13 +130,21 @@ pub fn read_file(path string) ?string { // file_size returns the size of the file located in `path`. pub fn file_size(path string) int { - s := C.stat{} - C.stat(path.str, &s) + mut s := C.stat{} + $if windows { + C._wstat(path.to_wide(), &s) + } $else { + C.stat(path.str, &s) + } return s.st_size } pub fn mv(old, new string) { - C.rename(old.cstr(), new.cstr()) + $if windows { + C._wrename(old.to_wide(), new.to_wide()) + } $else { + C.rename(old.cstr(), new.cstr()) + } } // read_lines reads the file in `path` into an array of lines. @@ -138,8 +152,14 @@ pub fn mv(old, new string) { pub fn read_lines(path string) []string { mut res := []string mut buf := [1000]byte - cpath := path.cstr() - fp := C.fopen(cpath, 'rb') + mode := 'rb' + mut fp := &C.FILE{} + $if windows { + fp = C._wfopen(path.to_wide(), mode.to_wide()) + } $else { + cpath := path.cstr() + fp = C.fopen(cpath, mode.cstr()) + } if isnil(fp) { // TODO // return error('failed to open file "$path"') @@ -171,9 +191,18 @@ fn read_ulines(path string) []ustring { } pub fn open(path string) ?File { - cpath := path.cstr() - file := File { - cfile: C.fopen(cpath, 'rb') + mut file := File{} + $if windows { + wpath := path.to_wide() + mode := 'rb' + file = File { + cfile: C._wfopen(wpath, mode.to_wide()) + } + } $else { + cpath := path.cstr() + file = File { + cfile: C.fopen(cpath, 'rb') + } } if isnil(file.cfile) { return error('failed to open file "$path"') @@ -183,9 +212,18 @@ pub fn open(path string) ?File { // create creates a file at a specified location and returns a writable `File` object. pub fn create(path string) ?File { - cpath := path.cstr() - file := File { - cfile: C.fopen(cpath, 'wb') + mut file := File{} + $if windows { + wpath := path.replace('/', '\\').to_wide() + mode := 'wb' + file = File { + cfile: C._wfopen(wpath, mode.to_wide()) + } + } $else { + cpath := path.cstr() + file = File { + cfile: C.fopen(cpath, 'wb') + } } if isnil(file.cfile) { return error('failed to create file "$path"') @@ -194,12 +232,21 @@ pub fn create(path string) ?File { } pub fn open_append(path string) ?File { - cpath := path.cstr() - file := File { - cfile: C.fopen(cpath, 'ab') + mut file := File{} + $if windows { + wpath := path.replace('/', '\\').to_wide() + mode := 'ab' + file = File { + cfile: C._wfopen(wpath, mode.to_wide()) + } + } $else { + cpath := path.cstr() + file = File { + cfile: C.fopen(cpath, 'ab') + } } if isnil(file.cfile) { - return error('failed to create file "$path"') + return error('failed to create(append) file "$path"') } return file } @@ -243,7 +290,12 @@ pub fn (f File) close() { // system starts the specified command, waits for it to complete, and returns its code. pub fn system(cmd string) int { - ret := C.system(cmd.cstr()) + mut ret := int(0) + $if windows { + ret = C._wsystem(cmd.to_wide()) + } $else { + ret = C.system(cmd.cstr()) + } if ret == -1 { os.print_c_errno() } @@ -251,11 +303,13 @@ pub fn system(cmd string) int { } fn popen(path string) *FILE { - cpath := path.cstr() $if windows { - return C._popen(cpath, 'r') + mode := string('rb') + wpath := path.to_wide() + return C._wpopen(wpath, mode.to_wide()) } $else { + cpath := path.cstr() return C.popen(cpath, 'r') } } @@ -263,7 +317,7 @@ fn popen(path string) *FILE { // exec starts the specified command, waits for it to complete, and returns its output. pub fn exec(cmd string) string { cmd = '$cmd 2>&1' - f := popen(cmd) + f := popen(cmd) if isnil(f) { // TODO optional or error code println('popen $cmd failed') @@ -278,12 +332,20 @@ pub fn exec(cmd string) string { } // `getenv` returns the value of the environment variable named by the key. -pub fn getenv(key string) string { - s := C.getenv(key.cstr()) - if isnil(s) { - return '' +pub fn getenv(key string) string { + $if windows { + s := C._wgetenv(key.to_wide()) + if isnil(s) { + return '' + } + return string_from_wide(s) + } $else { + s := C.getenv(key.cstr()) + if isnil(s) { + return '' + } + return string(s) } - return string(s) } pub fn setenv(name string, value string, overwrite bool) int { @@ -315,14 +377,17 @@ pub fn unsetenv(name string) int { // `file_exists` returns true if `path` exists. pub fn file_exists(path string) bool { $if windows { - return C._access( path.str, 0 ) != -1 + path = path.replace('/', '\\') + return C._waccess( path.to_wide(), 0 ) != -1 + } $else { + return C.access( path.str, 0 ) != -1 } - return C.access( path.str, 0 ) != -1 } pub fn dir_exists(path string) bool { $if windows { - attr := int(C.GetFileAttributes(path.cstr())) + path = path.replace('/', '\\') + attr := int(C.GetFileAttributes(path.to_wide())) if attr == INVALID_FILE_ATTRIBUTES { return false } @@ -345,7 +410,7 @@ pub fn dir_exists(path string) bool { pub fn mkdir(path string) { $if windows { path = path.replace('/', '\\') - C.CreateDirectory(path.cstr(), 0) + C._wmkdir(path.to_wide()) } $else { C.mkdir(path.cstr(), 511)// S_IRWXU | S_IRWXG | S_IRWXO @@ -354,7 +419,12 @@ pub fn mkdir(path string) { // rm removes file in `path`. pub fn rm(path string) { - C.remove(path.cstr()) + $if windows { + C._wremove(path.to_wide()) + } + $else { + C.remove(path.cstr()) + } // C.unlink(path.cstr()) } @@ -365,7 +435,7 @@ pub fn rmdir(path string) { C.rmdir(path.cstr()) } $else { - C.RemoveDirectoryA(path.cstr()) + C.RemoveDirectoryW(path.to_wide()) } } @@ -435,20 +505,18 @@ pub fn get_line() string { // get_raw_line returns a one-line string from stdin along with '\n' if there is any pub fn get_raw_line() string { $if windows { - max := 256 - buf := malloc(max) + max := 512 // MAX_PATH * sizeof(wchar_t) + buf := &u16(malloc(max)) h_input := C.GetStdHandle(STD_INPUT_HANDLE) if h_input == INVALID_HANDLE_VALUE { panic('get_raw_line() error getting input handle.') } - nr_chars := 0 - // NOTE: Once we have UTF8 encode function to - // convert utf16 to utf8, change to ReadConsoleW + mut nr_chars := 0 C.ReadConsole(h_input, buf, max, &nr_chars, 0) if nr_chars == 0 { return '' } - return tos(buf, nr_chars) + return string_from_wide2(buf, nr_chars) } $else { //u64 is used because C.getline needs a size_t as second argument @@ -536,8 +604,8 @@ fn on_segfault(f voidptr) { } pub fn executable() string { - mut result := malloc(MAX_PATH) $if linux { + mut result := malloc(MAX_PATH) count := int(C.readlink('/proc/self/exe', result, MAX_PATH )) if count < 0 { panic('error reading /proc/self/exe to get exe path') @@ -545,10 +613,12 @@ pub fn executable() string { return string(result, count) } $if windows { - ret := int(C.GetModuleFileName( 0, result, MAX_PATH )) - return string( result, ret) + mut result := &u16(malloc(512)) // MAX_PATH * sizeof(wchar_t) + len := int(C.GetModuleFileName( 0, result, MAX_PATH )) + return string_from_wide2(result, len) } $if mac { + mut result := malloc(MAX_PATH) pid := C.getpid() ret := C.proc_pidpath (pid, result, MAX_PATH) if ret <= 0 { @@ -558,6 +628,7 @@ pub fn executable() string { return string(result) } $if freebsd { + mut result := malloc(MAX_PATH) mut mib := [1 /* CTL_KERN */, 14 /* KERN_PROC */, 12 /* KERN_PROC_PATHNAME */, -1]!! size := MAX_PATH C.sysctl(mib, 4, result, &size, 0, 0) @@ -569,6 +640,7 @@ pub fn executable() string { return os.args[0] } $if netbsd { + mut result := malloc(MAX_PATH) count := int(C.readlink('/proc/curproc/exe', result, MAX_PATH )) if count < 0 { panic('error reading /proc/curproc/exe to get exe path') @@ -576,6 +648,7 @@ pub fn executable() string { return string(result, count) } $if dragonfly { + mut result := malloc(MAX_PATH) count := int(C.readlink('/proc/curproc/file', result, MAX_PATH )) if count < 0 { panic('error reading /proc/curproc/file to get exe path') @@ -587,9 +660,9 @@ pub fn executable() string { pub fn is_dir(path string) bool { $if windows { - val := int(C.GetFileAttributes(path.cstr())) + val := int(C.GetFileAttributes(path.to_wide())) // Note: this return is broke (wrong). we have dir_exists already how will this differ? - return val &FILE_ATTRIBUTE_DIRECTORY > 0 + return (val &FILE_ATTRIBUTE_DIRECTORY) > 0 } $else { statbuf := C.stat{} @@ -603,26 +676,29 @@ pub fn is_dir(path string) bool { pub fn chdir(path string) { $if windows { - C._chdir(path.cstr()) + C._wchdir(path.to_wide()) } $else { C.chdir(path.cstr()) } } -pub fn getwd() string { - buf := malloc(512) +pub fn getwd() string { $if windows { - if C._getcwd(buf, 512) == 0 { + max := 512 // MAX_PATH * sizeof(wchar_t) + buf := &u16(malloc(max)) + if C._wgetcwd(buf, max) == 0 { return '' } + return string_from_wide(buf) } - $else { + $else { + buf := malloc(512) if C.getcwd(buf, 512) == 0 { return '' } - } - return string(buf) + return string(buf) + } } // win: FILETIME @@ -633,7 +709,7 @@ struct filetime { } // win: WIN32_FIND_DATA -// https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-_win32_find_dataa +// https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-_win32_find_dataw struct win32finddata { mut: dwFileAttributes u32 @@ -670,13 +746,13 @@ pub fn ls(path string) []string { path_files := '$path\\*' // NOTE:TODO: once we have a way to convert utf16 wide character to utf8 // we should use FindFirstFileW and FindNextFileW - h_find_files := C.FindFirstFile(path_files.cstr(), &find_file_data) - first_filename := tos(&find_file_data.cFileName, strlen(find_file_data.cFileName)) + h_find_files := C.FindFirstFile(path_files.to_wide(), &find_file_data) + first_filename := string_from_wide(&u16(find_file_data.cFileName)) if first_filename != '.' && first_filename != '..' { dir_files << first_filename } for C.FindNextFile(h_find_files, &find_file_data) { - filename := tos(&find_file_data.cFileName, strlen(find_file_data.cFileName)) + filename := string_from_wide(&u16(find_file_data.cFileName)) if filename != '.' && filename != '..' { dir_files << filename.clone() } diff --git a/vlib/os/os_win.v b/vlib/os/os_win.v index 9a0296b59c..385bd8e1ec 100644 --- a/vlib/os/os_win.v +++ b/vlib/os/os_win.v @@ -15,7 +15,7 @@ type HANDLE voidptr // get_file_handle retrieves the operating-system file handle that is associated with the specified file descriptor. pub fn get_file_handle(path string) HANDLE { mode := 'rb' - _fd := C.fopen(path.cstr(), mode.cstr()) + _fd := C._wfopen(path.to_wide(), mode.cstr()) if _fd == 0 { return HANDLE(INVALID_HANDLE_VALUE) } @@ -27,17 +27,14 @@ pub fn get_file_handle(path string) HANDLE { // get_module_filename retrieves the fully qualified path for the file that contains the specified module. // The module must have been loaded by the current process. pub fn get_module_filename(handle HANDLE) ?string { - mut sz := int(1024) // Optimized length - mut buf := [byte(0); sz] // Not work for GetModuleFileNameW :( + mut sz := int(4096) // Optimized length + mut buf := &u16(malloc(4096)) for { status := C.GetModuleFileName(handle, &buf, sz) switch status { case SUCCESS: - _filename := tos(buf.data, sz) + _filename := string_from_wide2(buf, sz) return _filename - case ERROR_INSUFFICIENT_BUFFER: - sz += 1024 // increment buffer cluster by 1024 - buf = [byte(0); sz] // clear buffer default: // Must handled with GetLastError and converted by FormatMessage return error('Cannot get file name from handle.')