From b61d2ac346a16cc3c3a65225752e617833a85e19 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Thu, 27 Jun 2019 12:06:32 +0200 Subject: [PATCH] Revert "compiler & builtin: bitshifts CAO fix and C code removal in utf8" This reverts commit 223c35ffb90240e5a17bd4e1ea280847922c44e4. --- builtin/utf8.v | 116 ++++++++++++++++++++++++--------------------- compiler/scanner.v | 2 +- 2 files changed, 64 insertions(+), 54 deletions(-) diff --git a/builtin/utf8.v b/builtin/utf8.v index 9da970dc67..d6fc0dfd2c 100644 --- a/builtin/utf8.v +++ b/builtin/utf8.v @@ -251,63 +251,71 @@ fn (s string) runes() []string { // Convert utf32 to utf8 // utf32 == Codepoint fn utf32_to_str(code u32) string { - icode := int(code) //Prevents doing casts everywhere - mut buffer := malloc(5) - if icode <= 127 /* 0x7F */ { - buffer[0] = icode - return tos(buffer, 1) - } - if (icode <= 2047 /* 0x7FF */) { - buffer[0] = 192 /*0xC0*/ | (icode >> 6) /* 110xxxxx */ - buffer[1] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 2) - } - if (icode <= 65535 /* 0xFFFF */) { - buffer[0] = 224 /*0xE0*/ | (icode >> 12) /* 1110xxxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 3) - } - if (icode <= 1114111 /* 0x10FFFF */) { - buffer[0] = 240 /*0xF0*/ | (icode >> 18) /* 11110xxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 12) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[3] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 4) - } + // println('code = $code') + buffer := malloc(5) + # if (code <= 0x7F) { + // println('!!!!!!!1') + # buffer[0] = code; + # return tos(buffer, 1); + # } + # if (code <= 0x7FF) { + // println('!!!!!!!2') + # buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + # buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 2); + # } + # if (code <= 0xFFFF) { + // println('!!!!!!!3') + # buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + # buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 3); + # } + # if (code <= 0x10FFFF) { + # buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + # buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 4); + # } return '' } // TODO copypasta -fn utf32_to_str_no_malloc(code u32, buffer byteptr) string { - icode := int(code) //Prevents doing casts everywhere - if icode <= 127 /* 0x7F */ { - buffer[0] = icode - return tos(buffer, 1) - } - if (icode <= 2047 /* 0x7FF */) { - buffer[0] = 192 /*0xC0*/ | (icode >> 6) /* 110xxxxx */ - buffer[1] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 2) - } - if (icode <= 65535 /* 0xFFFF */) { - buffer[0] = 224 /*0xE0*/ | (icode >> 12) /* 1110xxxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 3) - } - if (icode <= 1114111 /* 0x10FFFF */) { - buffer[0] = 240 /*0xF0*/ | (icode >> 18) /* 11110xxx */ - buffer[1] = 128 /*0x80*/ | ((icode >> 12) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[2] = 128 /*0x80*/ | ((icode >> 6) & 63 /*0x3F*/) /* 10xxxxxx */ - buffer[3] = 128 /*0x80*/ | (icode & 63 /*0x3F*/) /* 10xxxxxx */ - return tos(buffer, 4) - } +fn utf32_to_str_no_malloc(code u32, buf voidptr) string { + // println('code = $code') + # char* buffer = buf; + # if (code <= 0x7F) { + // println('!!!!!!!1') + # buffer[0] = code; + # return tos(buffer, 1); + # } + # if (code <= 0x7FF) { + // println('!!!!!!!2') + # buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + # buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 2); + # } + # if (code <= 0xFFFF) { + // println('!!!!!!!3') + # buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + # buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 3); + # } + # if (code <= 0x10FFFF) { + # buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + # buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + # buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + # buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + # return tos(buffer, 4); + # } return '' } // Convert utf8 to utf32 fn (_rune string) utf32_code() int { + // println('utf 32 of $rune len=$rune.len') if _rune.len == 0 { return 0 } @@ -315,18 +323,20 @@ fn (_rune string) utf32_code() int { if _rune.len == 1 { return int(_rune[0]) } - mut b := byte(int(_rune[0])) + b := byte(int(_rune[0])) // TODO should be // res := int( rune[0] << rune.len) - b <<= _rune.len - mut res := int(b) + # b <<= _rune.len; + res := int(b) mut shift := 6 - _rune.len for i := 1; i < _rune.len; i++ { + // println('c=$res') c := int(_rune[i]) - res <<= shift - res |= c & 63 /* 0x3f */ + # res <<= shift; + # res |= c & 0x3f; shift = 6 } + // println('!!!!!!!! utf32 $rune res = $res') return res } diff --git a/compiler/scanner.v b/compiler/scanner.v index 286470bdc4..d3a8a1dfa4 100644 --- a/compiler/scanner.v +++ b/compiler/scanner.v @@ -138,7 +138,7 @@ fn (s mut Scanner) get_var_name(pos int) string { // CAO stands for Compound Assignment Operators (e.g '+=' ) fn (s mut Scanner) cao_change(operator string) { - s.text = s.text.substr(0, s.pos - operator.len) + ' = ' + s.get_var_name(s.pos - operator.len) + ' ' + operator + ' ' + s.text.substr(s.pos + 1, s.text.len) + s.text = s.text.substr(0, s.pos - 1) + ' = ' + s.get_var_name(s.pos - 1) + ' ' + operator + ' ' + s.text.substr(s.pos + 1, s.text.len) } fn (s mut Scanner) scan() ScanRes {