From fcb1f211e3f41effb867798ac417b098ccd071e6 Mon Sep 17 00:00:00 2001 From: vitalyster Date: Wed, 24 Jul 2019 13:16:45 +0300 Subject: [PATCH] Windows Unicode I/O --- .travis.yml | 3 +- compiler/main.v | 12 ++- vlib/builtin/builtin.v | 12 ++- vlib/builtin/utf8.v | 40 +++++++++ vlib/math/math.v | 1 + vlib/os/os.v | 183 +++++++++++++++++++++++++++++------------ vlib/os/os_win.v | 11 +-- 7 files changed, 198 insertions(+), 64 deletions(-) diff --git a/.travis.yml b/.travis.yml index eb7aa45c54..31436f2551 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,7 +35,8 @@ script: if [[ "${TRAVIS_OS_NAME}" == "windows" ]]; then echo "Running make.bat..." ./make.bat - ./v.exe -o v2.exe compiler + ./v.exe -debug -o v2.exe compiler + gcc -Ithirdparty/microsoft_craziness -DUNICODE -D_UNICODE .v2.exe.c thirdparty/microsoft_craziness/microsoft_craziness.c -o v2.exe -lole32 -ladvapi32 -loleaut32 ./v2.exe -o v3.exe compiler fi - | diff --git a/compiler/main.v b/compiler/main.v index 68b6cd2bba..3977757374 100644 --- a/compiler/main.v +++ b/compiler/main.v @@ -193,6 +193,9 @@ fn (v mut V) compile() { #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include +#include // _waccess +#include // _O_U8TEXT +#include // _wgetcwd //#include #ifdef _MSC_VER // On MSVC these are the same (as long as /volatile:ms is passed) @@ -385,7 +388,7 @@ string _STR_TMP(const char *fmt, ...) { // It can be skipped in single file programs if v.pref.is_script { //println('Generating main()...') - cgen.genln('int main() { init_consts(); $cgen.fn_main; return 0; }') + cgen.genln('int main() { \n#ifdef _WIN32\n _setmode(_fileno(stdout), _O_U8TEXT); \n#endif\n init_consts(); $cgen.fn_main; return 0; }') } else { println('panic: function `main` is undeclared in the main module') @@ -394,7 +397,7 @@ string _STR_TMP(const char *fmt, ...) { } // Generate `main` which calls every single test function else if v.pref.is_test { - cgen.genln('int main() { init_consts();') + cgen.genln('int main() { \n#ifdef _WIN32\n _setmode(_fileno(stdout), _O_U8TEXT); \n#endif\n init_consts();') for key, f in v.table.fns { if f.name.starts_with('test_') { cgen.genln('$f.name();') @@ -622,7 +625,7 @@ fn (c &V) cc_windows_cross() { obj_name = obj_name.replace('.exe', '') obj_name = obj_name.replace('.o.o', '.o') mut include := '-I $winroot/include ' - cmd := 'clang -o $obj_name -w $include -m32 -c -target x86_64-win32 $ModPath/$c.out_name_c' + cmd := 'clang -o $obj_name -w $include -DUNICODE -D_UNICODE -m32 -c -target x86_64-win32 $ModPath/$c.out_name_c' if c.pref.show_c_cmd { println(cmd) } @@ -757,6 +760,9 @@ mut args := '' a << ' -ldl ' } } + if v.os == .windows { + a << '-DUNICODE -D_UNICODE' + } // Find clang executable //fast_clang := '/usr/local/Cellar/llvm/8.0.0/bin/clang' args := a.join(' ') diff --git a/vlib/builtin/builtin.v b/vlib/builtin/builtin.v index 342c886331..7925e7785d 100644 --- a/vlib/builtin/builtin.v +++ b/vlib/builtin/builtin.v @@ -37,7 +37,11 @@ pub fn println(s string) { if isnil(s.str) { panic('println(NIL)') } - C.printf('%.*s\n', s.len, s.str) + $if windows { + C._putws(s.to_wide()) + } $else { + C.printf('%.*s\n', s.len, s.str) + } } pub fn eprintln(s string) { @@ -54,7 +58,11 @@ pub fn eprintln(s string) { } pub fn print(s string) { - C.printf('%.*s', s.len, s.str) + $if windows { + C.wprintf(s.to_wide()) + } $else { + C.printf('%.*s', s.len, s.str) + } } __global total_m i64 = 0 diff --git a/vlib/builtin/utf8.v b/vlib/builtin/utf8.v index b87d328ff1..520be56091 100644 --- a/vlib/builtin/utf8.v +++ b/vlib/builtin/utf8.v @@ -91,3 +91,43 @@ pub fn (_rune string) utf32_code() int { return res } +const ( + CP_UTF8 = 65001 + ) + +pub fn (_str string) to_wide() &u16 { + $if windows { + num_chars := int(C.MultiByteToWideChar(CP_UTF8, 0, _str.str, _str.len, 0, 0)) + mut wstr := &u16(malloc((num_chars + 1) * 2)) // sizeof(wchar_t) + if wstr > 0 { + C.MultiByteToWideChar(CP_UTF8, 0, _str.str, _str.len, wstr, num_chars) + C.memset(&byte(wstr) + num_chars * 2, 0, 2) + } + return wstr + } $else { + return 0 + } +} + +pub fn string_from_wide(_wstr &u16) string { + $if windows { + wstr_len := int(C.wcslen(_wstr)) + return string_from_wide2(_wstr, wstr_len) + } $else { + return '' + } +} + +pub fn string_from_wide2(_wstr &u16, len int) string { + $if windows { + num_chars := int(C.WideCharToMultiByte(CP_UTF8, 0, _wstr, len, 0, 0, 0, 0)) + mut str_to := &byte(malloc(num_chars + 1)) + if str_to > 0 { + C.WideCharToMultiByte(CP_UTF8, 0, _wstr, len, str_to, num_chars, 0, 0) + C.memset(&byte(str_to) + num_chars, 0, 1) + } + return tos2(str_to) + } $else { + return '' + } +} diff --git a/vlib/math/math.v b/vlib/math/math.v index 8a617651f4..e5639a3d33 100644 --- a/vlib/math/math.v +++ b/vlib/math/math.v @@ -4,6 +4,7 @@ module math +#include // NOTE // When adding a new function, please make sure it's in the right place. diff --git a/vlib/os/os.v b/vlib/os/os.v index 1f94314f83..73664e9a83 100644 --- a/vlib/os/os.v +++ b/vlib/os/os.v @@ -33,9 +33,10 @@ const ( import const ( FILE_ATTRIBUTE_DIRECTORY INVALID_FILE_ATTRIBUTES -) +) -struct FILE { +struct C.FILE { + } struct File { @@ -105,8 +106,14 @@ fn parse_windows_cmd_line(cmd byteptr) []string { // read_file reads the file in `path` and returns the contents. pub fn read_file(path string) ?string { - mut mode := 'rb' - fp := C.fopen(path.str, mode.str) + mode := 'rb' + mut fp := &C.FILE{} + $if windows { + fp = C._wfopen(path.to_wide(), mode.to_wide()) + } $else { + cpath := path.str + fp = C.fopen(cpath, mode.str) + } if isnil(fp) { return error('failed to open file "$path"') } @@ -123,13 +130,21 @@ pub fn read_file(path string) ?string { // file_size returns the size of the file located in `path`. pub fn file_size(path string) int { - s := C.stat{} - C.stat(path.str, &s) + mut s := C.stat{} + $if windows { + C._wstat(path.to_wide(), &s) + } $else { + C.stat(path.str, &s) + } return s.st_size } pub fn mv(old, new string) { - C.rename(old.str, new.str) + $if windows { + C._wrename(old.to_wide(), new.to_wide()) + } $else { + C.rename(old.str, new.str) + } } // read_lines reads the file in `path` into an array of lines. @@ -137,7 +152,14 @@ pub fn mv(old, new string) { pub fn read_lines(path string) []string { mut res := []string mut buf := [1000]byte - fp := C.fopen(path.str, 'rb') + mode := 'rb' + mut fp := &C.FILE{} + $if windows { + fp = C._wfopen(path.to_wide(), mode.to_wide()) + } $else { + cpath := path.str + fp = C.fopen(cpath, mode.str) + } if isnil(fp) { // TODO // return error('failed to open file "$path"') @@ -169,8 +191,18 @@ fn read_ulines(path string) []ustring { } pub fn open(path string) ?File { - file := File { - cfile: C.fopen(path.str, 'rb') + mut file := File{} + $if windows { + wpath := path.to_wide() + mode := 'rb' + file = File { + cfile: C._wfopen(wpath, mode.to_wide()) + } + } $else { + cpath := path.str + file = File { + cfile: C.fopen(cpath, 'rb') + } } if isnil(file.cfile) { return error('failed to open file "$path"') @@ -180,8 +212,18 @@ pub fn open(path string) ?File { // create creates a file at a specified location and returns a writable `File` object. pub fn create(path string) ?File { - file := File { - cfile: C.fopen(path.str, 'wb') + mut file := File{} + $if windows { + wpath := path.replace('/', '\\').to_wide() + mode := 'wb' + file = File { + cfile: C._wfopen(wpath, mode.to_wide()) + } + } $else { + cpath := path.str + file = File { + cfile: C.fopen(cpath, 'wb') + } } if isnil(file.cfile) { return error('failed to create file "$path"') @@ -190,11 +232,21 @@ pub fn create(path string) ?File { } pub fn open_append(path string) ?File { - file := File { - cfile: C.fopen(path.str, 'ab') + mut file := File{} + $if windows { + wpath := path.replace('/', '\\').to_wide() + mode := 'ab' + file = File { + cfile: C._wfopen(wpath, mode.to_wide()) + } + } $else { + cpath := path.str + file = File { + cfile: C.fopen(cpath, 'ab') + } } if isnil(file.cfile) { - return error('failed to create file "$path"') + return error('failed to create(append) file "$path"') } return file } @@ -238,7 +290,12 @@ pub fn (f File) close() { // system starts the specified command, waits for it to complete, and returns its code. pub fn system(cmd string) int { - ret := C.system(cmd.str) + mut ret := int(0) + $if windows { + ret = C._wsystem(cmd.to_wide()) + } $else { + ret = C.system(cmd.str) + } if ret == -1 { os.print_c_errno() } @@ -246,11 +303,13 @@ pub fn system(cmd string) int { } fn popen(path string) *FILE { - cpath := path.str $if windows { - return C._popen(cpath, 'r') + mode := 'rb' + wpath := path.to_wide() + return C._wpopen(wpath, mode.to_wide()) } $else { + cpath := path.str return C.popen(cpath, 'r') } } @@ -258,7 +317,7 @@ fn popen(path string) *FILE { // exec starts the specified command, waits for it to complete, and returns its output. pub fn exec(cmd string) string { cmd = '$cmd 2>&1' - f := popen(cmd) + f := popen(cmd) if isnil(f) { // TODO optional or error code println('popen $cmd failed') @@ -273,12 +332,20 @@ pub fn exec(cmd string) string { } // `getenv` returns the value of the environment variable named by the key. -pub fn getenv(key string) string { - s := C.getenv(key.str) - if isnil(s) { - return '' +pub fn getenv(key string) string { + $if windows { + s := C._wgetenv(key.to_wide()) + if isnil(s) { + return '' + } + return string_from_wide(s) + } $else { + s := C.getenv(key.str) + if isnil(s) { + return '' + } + return string(s) } - return string(s) } pub fn setenv(name string, value string, overwrite bool) int { @@ -310,14 +377,17 @@ pub fn unsetenv(name string) int { // `file_exists` returns true if `path` exists. pub fn file_exists(path string) bool { $if windows { - return C._access( path.str, 0 ) != -1 + path = path.replace('/', '\\') + return C._waccess( path.to_wide(), 0 ) != -1 + } $else { + return C.access( path.str, 0 ) != -1 } - return C.access( path.str, 0 ) != -1 } pub fn dir_exists(path string) bool { $if windows { - attr := int(C.GetFileAttributes(path.str)) + path = path.replace('/', '\\') + attr := int(C.GetFileAttributes(path.to_wide())) if attr == INVALID_FILE_ATTRIBUTES { return false } @@ -345,7 +415,7 @@ pub fn mkdir(path string) { if path.last_index('\\') != -1 { mkdir(path.all_before_last('\\')) } - C.CreateDirectory(path.str, 0) + C.CreateDirectory(path.to_wide(), 0) } $else { C.mkdir(path.str, 511)// S_IRWXU | S_IRWXG | S_IRWXO @@ -354,8 +424,13 @@ pub fn mkdir(path string) { // rm removes file in `path`. pub fn rm(path string) { - C.remove(path.str) - // C.unlink(path.str) + $if windows { + C._wremove(path.to_wide()) + } + $else { + C.remove(path.str) + } + // C.unlink(path.cstr()) } @@ -365,7 +440,7 @@ pub fn rmdir(path string) { C.rmdir(path.str) } $else { - C.RemoveDirectoryA(path.str) + C.RemoveDirectory(path.to_wide()) } } @@ -435,20 +510,18 @@ pub fn get_line() string { // get_raw_line returns a one-line string from stdin along with '\n' if there is any pub fn get_raw_line() string { $if windows { - max := 256 - buf := malloc(max) + max := 512 // MAX_PATH * sizeof(wchar_t) + buf := &u16(malloc(max)) h_input := C.GetStdHandle(STD_INPUT_HANDLE) if h_input == INVALID_HANDLE_VALUE { panic('get_raw_line() error getting input handle.') } - nr_chars := 0 - // NOTE: Once we have UTF8 encode function to - // convert utf16 to utf8, change to ReadConsoleW + mut nr_chars := 0 C.ReadConsole(h_input, buf, max, &nr_chars, 0) if nr_chars == 0 { return '' } - return tos(buf, nr_chars) + return string_from_wide2(buf, nr_chars) } $else { //u64 is used because C.getline needs a size_t as second argument @@ -539,8 +612,8 @@ fn on_segfault(f voidptr) { } pub fn executable() string { - mut result := malloc(MAX_PATH) $if linux { + mut result := malloc(MAX_PATH) count := int(C.readlink('/proc/self/exe', result, MAX_PATH )) if count < 0 { panic('error reading /proc/self/exe to get exe path') @@ -548,10 +621,12 @@ pub fn executable() string { return string(result, count) } $if windows { - ret := int(C.GetModuleFileName( 0, result, MAX_PATH )) - return string( result, ret) + mut result := &u16(malloc(512)) // MAX_PATH * sizeof(wchar_t) + len := int(C.GetModuleFileName( 0, result, MAX_PATH )) + return string_from_wide2(result, len) } $if mac { + mut result := malloc(MAX_PATH) pid := C.getpid() ret := C.proc_pidpath (pid, result, MAX_PATH) if ret <= 0 { @@ -561,6 +636,7 @@ pub fn executable() string { return string(result) } $if freebsd { + mut result := malloc(MAX_PATH) mut mib := [1 /* CTL_KERN */, 14 /* KERN_PROC */, 12 /* KERN_PROC_PATHNAME */, -1]!! size := MAX_PATH C.sysctl(mib, 4, result, &size, 0, 0) @@ -572,6 +648,7 @@ pub fn executable() string { return os.args[0] } $if netbsd { + mut result := malloc(MAX_PATH) count := int(C.readlink('/proc/curproc/exe', result, MAX_PATH )) if count < 0 { panic('error reading /proc/curproc/exe to get exe path') @@ -579,6 +656,7 @@ pub fn executable() string { return string(result, count) } $if dragonfly { + mut result := malloc(MAX_PATH) count := int(C.readlink('/proc/curproc/file', result, MAX_PATH )) if count < 0 { panic('error reading /proc/curproc/file to get exe path') @@ -607,26 +685,29 @@ pub fn is_dir(path string) bool { pub fn chdir(path string) { $if windows { - C._chdir(path.str) + C._wchdir(path.to_wide()) } $else { C.chdir(path.str) } } -pub fn getwd() string { - buf := malloc(512) +pub fn getwd() string { $if windows { - if C._getcwd(buf, 512) == 0 { + max := 512 // MAX_PATH * sizeof(wchar_t) + buf := &u16(malloc(max)) + if C._wgetcwd(buf, max) == 0 { return '' } + return string_from_wide(buf) } - $else { + $else { + buf := malloc(512) if C.getcwd(buf, 512) == 0 { return '' } - } - return string(buf) + return string(buf) + } } // win: FILETIME @@ -637,7 +718,7 @@ struct filetime { } // win: WIN32_FIND_DATA -// https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-_win32_find_dataa +// https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-_win32_find_dataw struct win32finddata { mut: dwFileAttributes u32 @@ -674,13 +755,13 @@ pub fn ls(path string) []string { path_files := '$path\\*' // NOTE:TODO: once we have a way to convert utf16 wide character to utf8 // we should use FindFirstFileW and FindNextFileW - h_find_files := C.FindFirstFile(path_files.str, &find_file_data) - first_filename := tos(&find_file_data.cFileName, strlen(find_file_data.cFileName)) + h_find_files := C.FindFirstFile(path_files.to_wide(), &find_file_data) + first_filename := string_from_wide(&u16(find_file_data.cFileName)) if first_filename != '.' && first_filename != '..' { dir_files << first_filename } for C.FindNextFile(h_find_files, &find_file_data) { - filename := tos(&find_file_data.cFileName, strlen(find_file_data.cFileName)) + filename := string_from_wide(&u16(find_file_data.cFileName)) if filename != '.' && filename != '..' { dir_files << filename.clone() } diff --git a/vlib/os/os_win.v b/vlib/os/os_win.v index 02a49b6e88..75b0034c75 100644 --- a/vlib/os/os_win.v +++ b/vlib/os/os_win.v @@ -15,7 +15,7 @@ type HANDLE voidptr // get_file_handle retrieves the operating-system file handle that is associated with the specified file descriptor. pub fn get_file_handle(path string) HANDLE { mode := 'rb' - _fd := C.fopen(path.str, mode.str) + _fd := C._wfopen(path.to_wide(), mode.to_wide()) if _fd == 0 { return HANDLE(INVALID_HANDLE_VALUE) } @@ -27,17 +27,14 @@ pub fn get_file_handle(path string) HANDLE { // get_module_filename retrieves the fully qualified path for the file that contains the specified module. // The module must have been loaded by the current process. pub fn get_module_filename(handle HANDLE) ?string { - mut sz := int(1024) // Optimized length - mut buf := [byte(0); sz] // Not work for GetModuleFileNameW :( + mut sz := int(4096) // Optimized length + mut buf := &u16(malloc(4096)) for { status := C.GetModuleFileName(handle, &buf, sz) switch status { case SUCCESS: - _filename := tos(buf.data, sz) + _filename := string_from_wide2(buf, sz) return _filename - case ERROR_INSUFFICIENT_BUFFER: - sz += 1024 // increment buffer cluster by 1024 - buf = [byte(0); sz] // clear buffer default: // Must handled with GetLastError and converted by FormatMessage return error('Cannot get file name from handle.')