From b9011804fc87b762ac99840e604f18121e9759fd Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Thu, 27 Jun 2019 02:12:28 +0200 Subject: [PATCH] Revert "removed bits C code inside utf8 builtin" This reverts commit f6a401aa025c603b58056a3d008fe03827796a34. --- builtin/utf8.v | 116 +++++++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 53 deletions(-) diff --git a/builtin/utf8.v b/builtin/utf8.v index 9da970dc67..d6fc0dfd2c 100644 --- a/builtin/utf8.v +++ b/builtin/utf8.v @@ -251,63 +251,71 @@ fn (s string) runes() []string { // Convert utf32 to utf8 // utf32 == Codepoint fn utf32_to_str(code u32) string { - icode := int(code) //Prevents doing casts everywhere - mut buffer := malloc(5) - if icode <= 127 /* 0x7F */ { - buffer[0] = icode - return tos(buffer, 1) - } - if (icode <= 2047 /* 0x7FF */) { - buffer[0] = 192 /*0xC0*/ | (icode >> 6) /* 110xxxxx */ - buffer[1] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 2) - } - if (icode <= 65535 /* 0xFFFF */) { - buffer[0] = 224 /*0xE0*/ | (icode >> 12) /* 1110xxxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 3) - } - if (icode <= 1114111 /* 0x10FFFF */) { - buffer[0] = 240 /*0xF0*/ | (icode >> 18) /* 11110xxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 12) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[3] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 4) - } + // println('code = $code') + buffer := malloc(5) + # if (code <= 0x7F) { + // println('!!!!!!!1') + # buffer[0] = code; + # return tos(buffer, 1); + # } + # if (code <= 0x7FF) { + // println('!!!!!!!2') + # buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + # buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 2); + # } + # if (code <= 0xFFFF) { + // println('!!!!!!!3') + # buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + # buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 3); + # } + # if (code <= 0x10FFFF) { + # buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + # buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 4); + # } return '' } // TODO copypasta -fn utf32_to_str_no_malloc(code u32, buffer byteptr) string { - icode := int(code) //Prevents doing casts everywhere - if icode <= 127 /* 0x7F */ { - buffer[0] = icode - return tos(buffer, 1) - } - if (icode <= 2047 /* 0x7FF */) { - buffer[0] = 192 /*0xC0*/ | (icode >> 6) /* 110xxxxx */ - buffer[1] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 2) - } - if (icode <= 65535 /* 0xFFFF */) { - buffer[0] = 224 /*0xE0*/ | (icode >> 12) /* 1110xxxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 3) - } - if (icode <= 1114111 /* 0x10FFFF */) { - buffer[0] = 240 /*0xF0*/ | (icode >> 18) /* 11110xxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 12) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[3] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 4) - } +fn utf32_to_str_no_malloc(code u32, buf voidptr) string { + // println('code = $code') + # char* buffer = buf; + # if (code <= 0x7F) { + // println('!!!!!!!1') + # buffer[0] = code; + # return tos(buffer, 1); + # } + # if (code <= 0x7FF) { + // println('!!!!!!!2') + # buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + # buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 2); + # } + # if (code <= 0xFFFF) { + // println('!!!!!!!3') + # buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + # buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 3); + # } + # if (code <= 0x10FFFF) { + # buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + # buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 4); + # } return '' } // Convert utf8 to utf32 fn (_rune string) utf32_code() int { + // println('utf 32 of $rune len=$rune.len') if _rune.len == 0 { return 0 } @@ -315,18 +323,20 @@ fn (_rune string) utf32_code() int { if _rune.len == 1 { return int(_rune[0]) } - mut b := byte(int(_rune[0])) + b := byte(int(_rune[0])) // TODO should be // res := int( rune[0] << rune.len) - b <<= _rune.len - mut res := int(b) + # b <<= _rune.len; + res := int(b) mut shift := 6 - _rune.len for i := 1; i < _rune.len; i++ { + // println('c=$res') c := int(_rune[i]) - res <<= shift - res |= c & 63 /* 0x3f */ + # res <<= shift; + # res |= c & 0x3f; shift = 6 } + // println('!!!!!!!! utf32 $rune res = $res') return res }