toml: check for illegal characters, fix all related skipped tests (#12270)
							parent
							
								
									eb364f0301
								
							
						
					
					
						commit
						8273c0582b
					
				|  | @ -175,7 +175,7 @@ pub fn (mut s Scanner) scan() ?token.Token { | |||
| 			} | ||||
| 			`#` { | ||||
| 				start := s.pos //+ 1
 | ||||
| 				s.ignore_line() | ||||
| 				s.ignore_line() ? | ||||
| 				hash := s.text[start..s.pos] | ||||
| 				util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified comment hash "$hash" ($hash.len)') | ||||
| 				return s.new_token(.hash, hash, hash.len + 1) | ||||
|  | @ -318,9 +318,14 @@ fn (mut s Scanner) new_token(kind token.Kind, lit string, len int) token.Token { | |||
| 
 | ||||
| // ignore_line forwards the scanner to the end of the current line.
 | ||||
| [direct_array_access; inline] | ||||
| fn (mut s Scanner) ignore_line() { | ||||
| fn (mut s Scanner) ignore_line() ? { | ||||
| 	util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' ignoring until EOL') | ||||
| 	for c := s.at(); c != -1 && c != `\n`; c = s.at() { | ||||
| 		// Check for control characters (allow TAB)
 | ||||
| 		if util.is_illegal_ascii_control_character(c) { | ||||
| 			return error(@MOD + '.' + @STRUCT + '.' + @FN + | ||||
| 				' control character `$c.hex()` is not allowed ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...') | ||||
| 		} | ||||
| 		s.next() | ||||
| 		util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()}"') | ||||
| 		continue | ||||
|  | @ -394,6 +399,11 @@ fn (mut s Scanner) extract_string() ?string { | |||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 		// Check for control characters (allow TAB)
 | ||||
| 		if util.is_illegal_ascii_control_character(c) { | ||||
| 			return error(@MOD + '.' + @STRUCT + '.' + @FN + | ||||
| 				' control character `$c.hex()` is not allowed at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...') | ||||
| 		} | ||||
| 
 | ||||
| 		if c == quote { | ||||
| 			s.pos++ | ||||
|  | @ -457,6 +467,11 @@ fn (mut s Scanner) extract_multiline_string() ?string { | |||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 		// Check for control characters (allow TAB)
 | ||||
| 		if util.is_illegal_ascii_control_character(c) { | ||||
| 			return error(@MOD + '.' + @STRUCT + '.' + @FN + | ||||
| 				' control character `$c.hex()` is not allowed at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...') | ||||
| 		} | ||||
| 
 | ||||
| 		if c == quote { | ||||
| 			if s.peek(1) == quote && s.peek(2) == quote { | ||||
|  |  | |||
|  | @ -42,28 +42,7 @@ const ( | |||
| 		'datetime/impossible-date.toml', | ||||
| 		'datetime/no-leads-with-milli.toml', | ||||
| 		'datetime/no-leads.toml', | ||||
| 		// Control
 | ||||
| 		'control/string-us.toml', | ||||
| 		'control/comment-lf.toml', | ||||
| 		'control/multi-us.toml', | ||||
| 		'control/rawstring-del.toml', | ||||
| 		'control/rawmulti-del.toml', | ||||
| 		'control/rawstring-us.toml', | ||||
| 		'control/string-bs.toml', | ||||
| 		'control/multi-null.toml', | ||||
| 		'control/rawstring-lf.toml', | ||||
| 		'control/rawmulti-null.toml', | ||||
| 		'control/comment-null.toml', | ||||
| 		'control/multi-lf.toml', | ||||
| 		'control/comment-del.toml', | ||||
| 		'control/rawstring-null.toml', | ||||
| 		'control/rawmulti-lf.toml', | ||||
| 		'control/multi-del.toml', | ||||
| 		'control/string-del.toml', | ||||
| 		'control/rawmulti-us.toml', | ||||
| 		'control/comment-us.toml', | ||||
| 		'control/string-lf.toml', | ||||
| 		'control/string-null.toml', | ||||
| 		// Inline table
 | ||||
| 		'inline-table/empty.toml', | ||||
| 		'inline-table/double-comma.toml', | ||||
| 		'inline-table/trailing-comma.toml', | ||||
|  |  | |||
|  | @ -8,6 +8,19 @@ pub fn is_key_char(c byte) bool { | |||
| 	return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) // || c == `_`  || c == `-` <- these are identified when tokenizing
 | ||||
| } | ||||
| 
 | ||||
| // is_ascii_control_character returns true if `byte_char` is an ASCII control character.
 | ||||
| [inline] | ||||
| pub fn is_ascii_control_character(byte_char byte) bool { | ||||
| 	return (byte_char >= 0 && byte_char <= 0x1f) || byte_char == 0x7f | ||||
| } | ||||
| 
 | ||||
| // is_illegal_ascii_control_character returns true if a `byte_char` ASCII control character
 | ||||
| // is considered "illegal" in TOML .
 | ||||
| [inline] | ||||
| pub fn is_illegal_ascii_control_character(byte_char byte) bool { | ||||
| 	return byte_char != 0x09 && is_ascii_control_character(byte_char) | ||||
| } | ||||
| 
 | ||||
| [if trace_toml ?] | ||||
| pub fn printdbg(id string, message string) { | ||||
| 	eprintln(id + ' ' + message) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue