diff --git a/builtin/utf8.v b/builtin/utf8.v index 46bd0c6fa8..21d135ac2a 100644 --- a/builtin/utf8.v +++ b/builtin/utf8.v @@ -251,71 +251,64 @@ fn (s string) runes() []string { // Convert utf32 to utf8 // utf32 == Codepoint pub fn utf32_to_str(code u32) string { - // println('code = $code') - buffer := malloc(5) - # if (code <= 0x7F) { - // println('!!!!!!!1') - # buffer[0] = code; - # return tos(buffer, 1); - # } - # if (code <= 0x7FF) { - // println('!!!!!!!2') - # buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ - # buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ - # return tos(buffer, 2); - # } - # if (code <= 0xFFFF) { - // println('!!!!!!!3') - # buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ - # buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ - # buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ - # return tos(buffer, 3); - # } - # if (code <= 0x10FFFF) { - # buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ - # buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ - # buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ - # buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ - # return tos(buffer, 4); - # } + icode := int(code) //Prevents doing casts everywhere + mut buffer := malloc(5) + if icode <= 127 /* 0x7F */ { + buffer[0] = icode + return tos(buffer, 1) + } + if (icode <= 2047 /* 0x7FF */) { + buffer[0] = 192 /*0xC0*/ | (icode >> 6) /* 110xxxxx */ + buffer[1] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ + return tos(buffer, 2) + } + if (icode <= 65535 /* 0xFFFF */) { + buffer[0] = 224 /*0xE0*/ | (icode >> 12) /* 1110xxxx */ + buffer[1] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ + buffer[2] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ + return tos(buffer, 3) + } + if (icode <= 1114111 /* 0x10FFFF */) { + buffer[0] = 240 /*0xF0*/ | (icode >> 18) /* 11110xxx */ + buffer[1] = 128 /*0x80*/ | ((icode >> 12) & 63 /*0x3F*/) /* 10xxxxxx */ + buffer[2] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ + buffer[3] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ + return tos(buffer, 4) + } return '' } // TODO copypasta pub fn utf32_to_str_no_malloc(code u32, buf voidptr) string { - // println('code = $code') - # char* buffer = buf; - # if (code <= 0x7F) { - // println('!!!!!!!1') - # buffer[0] = code; - # return tos(buffer, 1); - # } - # if (code <= 0x7FF) { - // println('!!!!!!!2') - # buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ - # buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ - # return tos(buffer, 2); - # } - # if (code <= 0xFFFF) { - // println('!!!!!!!3') - # buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ - # buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ - # buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ - # return tos(buffer, 3); - # } - # if (code <= 0x10FFFF) { - # buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ - # buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ - # buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ - # buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ - # return tos(buffer, 4); - # } + icode := int(code) //Prevents doing casts everywhere + mut buffer := byteptr(buf) + if icode <= 127 /* 0x7F */ { + buffer[0] = icode + return tos(buffer, 1) + } + if (icode <= 2047 /* 0x7FF */) { + buffer[0] = 192 /*0xC0*/ | (icode >> 6) /* 110xxxxx */ + buffer[1] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ + return tos(buffer, 2) + } + if (icode <= 65535 /* 0xFFFF */) { + buffer[0] = 224 /*0xE0*/ | (icode >> 12) /* 1110xxxx */ + buffer[1] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ + buffer[2] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ + return tos(buffer, 3) + } + if (icode <= 1114111 /* 0x10FFFF */) { + buffer[0] = 240 /*0xF0*/ | (icode >> 18) /* 11110xxx */ + buffer[1] = 128 /*0x80*/ | ((icode >> 12) & 63 /*0x3F*/) /* 10xxxxxx */ + buffer[2] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ + buffer[3] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ + return tos(buffer, 4) + } return '' } // Convert utf8 to utf32 pub fn (_rune string) utf32_code() int { - // println('utf 32 of $rune len=$rune.len') if _rune.len == 0 { return 0 } @@ -323,20 +316,18 @@ pub fn (_rune string) utf32_code() int { if _rune.len == 1 { return int(_rune[0]) } - b := byte(int(_rune[0])) + mut b := byte(int(_rune[0])) // TODO should be // res := int( rune[0] << rune.len) - # b <<= _rune.len; - res := int(b) + b = b << _rune.len + mut res := int(b) mut shift := 6 - _rune.len for i := 1; i < _rune.len; i++ { - // println('c=$res') c := int(_rune[i]) - # res <<= shift; - # res |= c & 0x3f; + res = res << shift + res |= c & 63 /* 0x3f */ shift = 6 } - // println('!!!!!!!! utf32 $rune res = $res') return res } diff --git a/compiler/scanner.v b/compiler/scanner.v index d3a8a1dfa4..286470bdc4 100644 --- a/compiler/scanner.v +++ b/compiler/scanner.v @@ -138,7 +138,7 @@ fn (s mut Scanner) get_var_name(pos int) string { // CAO stands for Compound Assignment Operators (e.g '+=' ) fn (s mut Scanner) cao_change(operator string) { - s.text = s.text.substr(0, s.pos - 1) + ' = ' + s.get_var_name(s.pos - 1) + ' ' + operator + ' ' + s.text.substr(s.pos + 1, s.text.len) + s.text = s.text.substr(0, s.pos - operator.len) + ' = ' + s.get_var_name(s.pos - operator.len) + ' ' + operator + ' ' + s.text.substr(s.pos + 1, s.text.len) } fn (s mut Scanner) scan() ScanRes {