builtin: correct error underline for unicode wide chars (#9010)
							parent
							
								
									e937d6249c
								
							
						
					
					
						commit
						ce115dcbe0
					
				| 
						 | 
					@ -22,26 +22,28 @@ pub fn utf32_to_str_no_malloc(code u32, buf voidptr) string {
 | 
				
			||||||
	mut res := ''
 | 
						mut res := ''
 | 
				
			||||||
	unsafe {
 | 
						unsafe {
 | 
				
			||||||
		mut buffer := byteptr(buf)
 | 
							mut buffer := byteptr(buf)
 | 
				
			||||||
		if icode <= 127 { /* 0x7F */
 | 
							if icode <= 127 {
 | 
				
			||||||
 | 
								// 0x7F
 | 
				
			||||||
			buffer[0] = byte(icode)
 | 
								buffer[0] = byte(icode)
 | 
				
			||||||
			res = tos(buffer, 1)
 | 
								res = tos(buffer, 1)
 | 
				
			||||||
		}
 | 
							} else if icode <= 2047 {
 | 
				
			||||||
		else if icode <= 2047 { /* 0x7FF */
 | 
								// 0x7FF
 | 
				
			||||||
			buffer[0] = 192 | byte(icode>>6)  /* 0xC0 - 110xxxxx */
 | 
								buffer[0] = 192 | byte(icode >> 6) // 0xC0 - 110xxxxx
 | 
				
			||||||
			buffer[1] = 128 | byte(icode & 63) /* 0x80 - 0x3F - 10xxxxxx */
 | 
								buffer[1] = 128 | byte(icode & 63) // 0x80 - 0x3F - 10xxxxxx
 | 
				
			||||||
			res = tos(buffer, 2)
 | 
								res = tos(buffer, 2)
 | 
				
			||||||
		}
 | 
							} else if icode <= 65535 {
 | 
				
			||||||
		else if icode <= 65535 { /* 0xFFFF */
 | 
								// 0xFFFF
 | 
				
			||||||
			buffer[0] = 224 | byte(icode>>12)/* 0xE0 - 1110xxxx */
 | 
								buffer[0] = 224 | byte(icode >> 12) // 0xE0 - 1110xxxx
 | 
				
			||||||
			buffer[1] = 128 | (byte(icode>>6) & 63) /* 0x80 - 0x3F - 10xxxxxx */
 | 
								buffer[1] = 128 | (byte(icode >> 6) & 63) // 0x80 - 0x3F - 10xxxxxx
 | 
				
			||||||
			buffer[2] = 128 | byte(icode & 63) /* 0x80 - 0x3F - 10xxxxxx */
 | 
								buffer[2] = 128 | byte(icode & 63) // 0x80 - 0x3F - 10xxxxxx
 | 
				
			||||||
			res = tos(buffer, 3)
 | 
								res = tos(buffer, 3)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		else if icode <= 1114111/* 0x10FFFF */ {
 | 
							// 0x10FFFF
 | 
				
			||||||
			buffer[0] = 240 | byte(icode>>18)  /* 0xF0 - 11110xxx */
 | 
							else if icode <= 1114111 {
 | 
				
			||||||
			buffer[1] = 128 | (byte(icode>>12) & 63) /* 0x80 - 0x3F - 10xxxxxx */
 | 
								buffer[0] = 240 | byte(icode >> 18) // 0xF0 - 11110xxx
 | 
				
			||||||
			buffer[2] = 128 | (byte(icode>>6) & 63) /* 0x80 - 0x3F - 10xxxxxx */
 | 
								buffer[1] = 128 | (byte(icode >> 12) & 63) // 0x80 - 0x3F - 10xxxxxx
 | 
				
			||||||
			buffer[3] = 128 | byte(icode & 63) /* 0x80 - 0x3F - 10xxxxxx */
 | 
								buffer[2] = 128 | (byte(icode >> 6) & 63) // 0x80 - 0x3F - 10xxxxxx
 | 
				
			||||||
 | 
								buffer[3] = 128 | byte(icode & 63) // 0x80 - 0x3F - 10xxxxxx
 | 
				
			||||||
			res = tos(buffer, 4)
 | 
								res = tos(buffer, 4)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -80,15 +82,13 @@ fn utf8_len(c byte) int {
 | 
				
			||||||
	if (x & 240) != 0 {
 | 
						if (x & 240) != 0 {
 | 
				
			||||||
		// 0xF0
 | 
							// 0xF0
 | 
				
			||||||
		x >>= 4
 | 
							x >>= 4
 | 
				
			||||||
	}
 | 
						} else {
 | 
				
			||||||
	else {
 | 
					 | 
				
			||||||
		b += 4
 | 
							b += 4
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (x & 12) != 0 {
 | 
						if (x & 12) != 0 {
 | 
				
			||||||
		// 0x0C
 | 
							// 0x0C
 | 
				
			||||||
		x >>= 2
 | 
							x >>= 2
 | 
				
			||||||
	}
 | 
						} else {
 | 
				
			||||||
	else {
 | 
					 | 
				
			||||||
		b += 2
 | 
							b += 2
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (x & 2) == 0 {
 | 
						if (x & 2) == 0 {
 | 
				
			||||||
| 
						 | 
					@ -114,7 +114,9 @@ fn utf8_str_len(s string) int {
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Calculate string length for formatting, i.e. number of "characters"
 | 
					// Calculate string length for formatting, i.e. number of "characters"
 | 
				
			||||||
fn utf8_str_visible_length(s string) int {
 | 
					// This is simplified implementation. if you need specification compliant width,
 | 
				
			||||||
 | 
					// use utf8.east_asian.display_width.
 | 
				
			||||||
 | 
					pub fn utf8_str_visible_length(s string) int {
 | 
				
			||||||
	mut l := 0
 | 
						mut l := 0
 | 
				
			||||||
	mut ul := 1
 | 
						mut ul := 1
 | 
				
			||||||
	for i := 0; i < s.len; i += ul {
 | 
						for i := 0; i < s.len; i += ul {
 | 
				
			||||||
| 
						 | 
					@ -129,24 +131,58 @@ fn utf8_str_visible_length(s string) int {
 | 
				
			||||||
			return l
 | 
								return l
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		l++
 | 
							l++
 | 
				
			||||||
		// recognize combining characters
 | 
							// recognize combining characters and wide characters
 | 
				
			||||||
		if c == 0xcc || c == 0xcd {
 | 
							match ul {
 | 
				
			||||||
			r := (u16(c) << 8) | unsafe {s.str[i+1]}
 | 
								2 {
 | 
				
			||||||
			if r >= 0xcc80 && r < 0xcdb0 { // diacritical marks
 | 
									r := u64((u16(c) << 8) | unsafe { s.str[i + 1] })
 | 
				
			||||||
 | 
									if r >= 0xcc80 && r < 0xcdb0 {
 | 
				
			||||||
 | 
										// diacritical marks
 | 
				
			||||||
					l--
 | 
										l--
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
		} else if c == 0xe1 || c == 0xe2 || c == 0xef {
 | 
								}
 | 
				
			||||||
			r := (u32(c) << 16) | unsafe {(u32(s.str[i+1]) << 8) | s.str[i+2]}
 | 
								3 {
 | 
				
			||||||
			// diacritical marks extended 0xe1aab0 - 0xe1ac80
 | 
									r := u64((u32(c) << 16) | unsafe { (u32(s.str[i + 1]) << 8) | s.str[i + 2] })
 | 
				
			||||||
			// diacritical marks supplement 0xe1b780 - 0xe1b880
 | 
									// diacritical marks extended
 | 
				
			||||||
			// diacritical marks for symbols 0xe28390 - 0xe28480
 | 
									// diacritical marks supplement
 | 
				
			||||||
			// half marks 0xefb8a0 - 0xefb8b0
 | 
									// diacritical marks for symbols
 | 
				
			||||||
			if (r >= 0xe1aab0 && r < 0xe1ac80)
 | 
									if (r >= 0xe1aab0 && r <= 0xe1ac7f)
 | 
				
			||||||
			|| (r >= 0xe1b780 && r < 0xe1b880)
 | 
										|| (r >= 0xe1b780 && r <= 0xe1b87f)
 | 
				
			||||||
			|| (r >= 0xe28390 && r < 0xe28480)
 | 
										|| (r >= 0xe28390 && r <= 0xe2847f)
 | 
				
			||||||
			|| (r >= 0xefb8a0 && r < 0xefb8b0) {
 | 
										|| (r >= 0xefb8a0 && r <= 0xefb8af) {
 | 
				
			||||||
 | 
										// diacritical marks
 | 
				
			||||||
					l--
 | 
										l--
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
									// Hangru
 | 
				
			||||||
 | 
									// CJK Unified Ideographics
 | 
				
			||||||
 | 
									// Hangru
 | 
				
			||||||
 | 
									// CJK
 | 
				
			||||||
 | 
									else if (r >= 0xe18480 && r <= 0xe1859f)
 | 
				
			||||||
 | 
										|| (r >= 0xe2ba80 && r <= 0xe2bf95)
 | 
				
			||||||
 | 
										|| (r >= 0xe38080 && r <= 0xe4b77f)
 | 
				
			||||||
 | 
										|| (r >= 0xe4b880 && r <= 0xea807f)
 | 
				
			||||||
 | 
										|| (r >= 0xeaa5a0 && r <= 0xeaa79f)
 | 
				
			||||||
 | 
										|| (r >= 0xeab080 && r <= 0xed9eaf)
 | 
				
			||||||
 | 
										|| (r >= 0xefa480 && r <= 0xefac7f)
 | 
				
			||||||
 | 
										|| (r >= 0xefb8b8 && r <= 0xefb9af) {
 | 
				
			||||||
 | 
										// half marks
 | 
				
			||||||
 | 
										l++
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								4 {
 | 
				
			||||||
 | 
									r := u64((u32(c) << 24) | unsafe {
 | 
				
			||||||
 | 
										(u32(s.str[i + 1]) << 16) | (u32(s.str[i + 2]) << 8) | s.str[i + 3]
 | 
				
			||||||
 | 
									})
 | 
				
			||||||
 | 
									// Enclosed Ideographic Supplement
 | 
				
			||||||
 | 
									// Emoji
 | 
				
			||||||
 | 
									// CJK Unified Ideographs Extension B-G
 | 
				
			||||||
 | 
									if (r >= 0x0f9f8880 && r <= 0xf09f8a8f)
 | 
				
			||||||
 | 
										|| (r >= 0xf09f8c80 && r <= 0xf09f9c90)
 | 
				
			||||||
 | 
										|| (r >= 0xf09fa490 && r <= 0xf09fa7af)
 | 
				
			||||||
 | 
										|| (r >= 0xff0a08080 && r <= 0xf180807f) {
 | 
				
			||||||
 | 
										l++
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								else {}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return l
 | 
						return l
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4407,15 +4407,15 @@ fn (mut c Checker) match_exprs(mut node ast.MatchExpr, cond_type_sym table.TypeS
 | 
				
			||||||
		for expr in branch.exprs {
 | 
							for expr in branch.exprs {
 | 
				
			||||||
			mut key := ''
 | 
								mut key := ''
 | 
				
			||||||
			if expr is ast.RangeExpr {
 | 
								if expr is ast.RangeExpr {
 | 
				
			||||||
				mut low := 0
 | 
									mut low := i64(0)
 | 
				
			||||||
				mut high := 0
 | 
									mut high := i64(0)
 | 
				
			||||||
				c.expected_type = node.expected_type
 | 
									c.expected_type = node.expected_type
 | 
				
			||||||
				low_expr := expr.low
 | 
									low_expr := expr.low
 | 
				
			||||||
				high_expr := expr.high
 | 
									high_expr := expr.high
 | 
				
			||||||
				if low_expr is ast.IntegerLiteral {
 | 
									if low_expr is ast.IntegerLiteral {
 | 
				
			||||||
					if high_expr is ast.IntegerLiteral {
 | 
										if high_expr is ast.IntegerLiteral {
 | 
				
			||||||
						low = low_expr.val.int()
 | 
											low = low_expr.val.i64()
 | 
				
			||||||
						high = high_expr.val.int()
 | 
											high = high_expr.val.i64()
 | 
				
			||||||
					} else {
 | 
										} else {
 | 
				
			||||||
						c.error('mismatched range types', low_expr.pos)
 | 
											c.error('mismatched range types', low_expr.pos)
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
| 
						 | 
					@ -4430,6 +4430,11 @@ fn (mut c Checker) match_exprs(mut node ast.MatchExpr, cond_type_sym table.TypeS
 | 
				
			||||||
					typ := c.table.type_to_str(c.expr(expr.low))
 | 
										typ := c.table.type_to_str(c.expr(expr.low))
 | 
				
			||||||
					c.error('cannot use type `$typ` in match range', branch.pos)
 | 
										c.error('cannot use type `$typ` in match range', branch.pos)
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
									high_low_cutoff := 1000
 | 
				
			||||||
 | 
									if high - low > high_low_cutoff {
 | 
				
			||||||
 | 
										c.warn('more than $high_low_cutoff possibilities ($low ... $high) in match range',
 | 
				
			||||||
 | 
											branch.pos)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
				for i in low .. high + 1 {
 | 
									for i in low .. high + 1 {
 | 
				
			||||||
					key = i.str()
 | 
										key = i.str()
 | 
				
			||||||
					val := if key in branch_exprs { branch_exprs[key] } else { 0 }
 | 
										val := if key in branch_exprs { branch_exprs[key] } else { 0 }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,55 @@
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:5:17: error: cannot use `int literal` as `string` in argument 2 to `f1`
 | 
				
			||||||
 | 
					    3 |
 | 
				
			||||||
 | 
					    4 | fn main() {
 | 
				
			||||||
 | 
					    5 |     f1('🐀🐈', 0)
 | 
				
			||||||
 | 
					      |                ^
 | 
				
			||||||
 | 
					    6 |     f2(0, '🐟🐧')
 | 
				
			||||||
 | 
					    7 |     mut n := 0
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:6:8: error: cannot use `string` as `int` in argument 2 to `f2`
 | 
				
			||||||
 | 
					    4 | fn main() {
 | 
				
			||||||
 | 
					    5 |     f1('🐀🐈', 0)
 | 
				
			||||||
 | 
					    6 |     f2(0, '🐟🐧')
 | 
				
			||||||
 | 
					      |           ~~~~~~
 | 
				
			||||||
 | 
					    7 |     mut n := 0
 | 
				
			||||||
 | 
					    8 |     n = '漢字'
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:8:6: error: cannot assign to `n`: expected `int`, not `string`
 | 
				
			||||||
 | 
					    6 |     f2(0, '🐟🐧')
 | 
				
			||||||
 | 
					    7 |     mut n := 0
 | 
				
			||||||
 | 
					    8 |     n = '漢字'
 | 
				
			||||||
 | 
					      |         ~~~~~~
 | 
				
			||||||
 | 
					    9 |     n = 'ひらがな'
 | 
				
			||||||
 | 
					   10 |     n = '简体字'
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:9:6: error: cannot assign to `n`: expected `int`, not `string`
 | 
				
			||||||
 | 
					    7 |     mut n := 0
 | 
				
			||||||
 | 
					    8 |     n = '漢字'
 | 
				
			||||||
 | 
					    9 |     n = 'ひらがな'
 | 
				
			||||||
 | 
					      |         ~~~~~~~~~~
 | 
				
			||||||
 | 
					   10 |     n = '简体字'
 | 
				
			||||||
 | 
					   11 |     n = '繁體字'
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:10:6: error: cannot assign to `n`: expected `int`, not `string`
 | 
				
			||||||
 | 
					    8 |     n = '漢字'
 | 
				
			||||||
 | 
					    9 |     n = 'ひらがな'
 | 
				
			||||||
 | 
					   10 |     n = '简体字'
 | 
				
			||||||
 | 
					      |         ~~~~~~~~
 | 
				
			||||||
 | 
					   11 |     n = '繁體字'
 | 
				
			||||||
 | 
					   12 |     n = '한글'
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:11:6: error: cannot assign to `n`: expected `int`, not `string`
 | 
				
			||||||
 | 
					    9 |     n = 'ひらがな'
 | 
				
			||||||
 | 
					   10 |     n = '简体字'
 | 
				
			||||||
 | 
					   11 |     n = '繁體字'
 | 
				
			||||||
 | 
					      |         ~~~~~~~~
 | 
				
			||||||
 | 
					   12 |     n = '한글'
 | 
				
			||||||
 | 
					   13 |     n = 'Кириллица'
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:12:6: error: cannot assign to `n`: expected `int`, not `string`
 | 
				
			||||||
 | 
					   10 |     n = '简体字'
 | 
				
			||||||
 | 
					   11 |     n = '繁體字'
 | 
				
			||||||
 | 
					   12 |     n = '한글'
 | 
				
			||||||
 | 
					      |         ~~~~~~
 | 
				
			||||||
 | 
					   13 |     n = 'Кириллица'
 | 
				
			||||||
 | 
					   14 | }
 | 
				
			||||||
 | 
					vlib/v/checker/tests/error_with_unicode.vv:13:6: error: cannot assign to `n`: expected `int`, not `string`
 | 
				
			||||||
 | 
					   11 |     n = '繁體字'
 | 
				
			||||||
 | 
					   12 |     n = '한글'
 | 
				
			||||||
 | 
					   13 |     n = 'Кириллица'
 | 
				
			||||||
 | 
					      |         ~~~~~~~~~~~
 | 
				
			||||||
 | 
					   14 | }
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					fn f1(_ string, _ string) {}
 | 
				
			||||||
 | 
					fn f2(_ int, _ int) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
						f1('🐀🐈', 0)
 | 
				
			||||||
 | 
						f2(0, '🐟🐧')
 | 
				
			||||||
 | 
						mut n := 0
 | 
				
			||||||
 | 
						n = '漢字'
 | 
				
			||||||
 | 
						n = 'ひらがな'
 | 
				
			||||||
 | 
						n = '简体字'
 | 
				
			||||||
 | 
						n = '繁體字'
 | 
				
			||||||
 | 
						n = '한글'
 | 
				
			||||||
 | 
						n = 'Кириллица'
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -4,6 +4,7 @@
 | 
				
			||||||
module util
 | 
					module util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					import strings
 | 
				
			||||||
import term
 | 
					import term
 | 
				
			||||||
import v.token
 | 
					import v.token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -132,14 +133,22 @@ pub fn source_context(kind string, source string, column int, pos token.Position
 | 
				
			||||||
			// line, so that it prints the ^ character exactly on the *same spot*
 | 
								// line, so that it prints the ^ character exactly on the *same spot*
 | 
				
			||||||
			// where it is needed. That is the reason we can not just
 | 
								// where it is needed. That is the reason we can not just
 | 
				
			||||||
			// use strings.repeat(` `, col) to form it.
 | 
								// use strings.repeat(` `, col) to form it.
 | 
				
			||||||
			mut pointerline := ''
 | 
								mut pointerline_builder := strings.new_builder(sline.len)
 | 
				
			||||||
			for bchar in sline[..start_column] {
 | 
								for i := 0; i < start_column; {
 | 
				
			||||||
				x := if bchar.is_space() { bchar } else { ` ` }
 | 
									if sline[i].is_space() {
 | 
				
			||||||
				pointerline += x.ascii_str()
 | 
										pointerline_builder.write_b(sline[i])
 | 
				
			||||||
 | 
										i++
 | 
				
			||||||
 | 
									} else {
 | 
				
			||||||
 | 
										char_len := utf8_char_len(sline[i])
 | 
				
			||||||
 | 
										spaces := ' '.repeat(utf8_str_visible_length(sline[i..i + char_len]))
 | 
				
			||||||
 | 
										pointerline_builder.write_string(spaces)
 | 
				
			||||||
 | 
										i += char_len
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			underline := if pos.len > 1 { '~'.repeat(end_column - start_column) } else { '^' }
 | 
								}
 | 
				
			||||||
			pointerline += bold(color(kind, underline))
 | 
								underline_len := utf8_str_visible_length(sline[start_column..end_column])
 | 
				
			||||||
			clines << '      | ' + pointerline.replace('\t', tab_spaces)
 | 
								underline := if underline_len > 1 { '~'.repeat(underline_len) } else { '^' }
 | 
				
			||||||
 | 
								pointerline_builder.write_string(bold(color(kind, underline)))
 | 
				
			||||||
 | 
								clines << '      | ' + pointerline_builder.str().replace('\t', tab_spaces)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return clines
 | 
						return clines
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue