strings: add find_between_pair (#13468)
							parent
							
								
									d25652fbcf
								
							
						
					
					
						commit
						80444c8ec4
					
				|  | @ -11,3 +11,118 @@ pub fn random(n int) string { | |||
| 	return tos(buf) | ||||
| } | ||||
| */ | ||||
| 
 | ||||
| // find_between_pair_byte returns the string found between the pair of marks defined
 | ||||
| // by `start` and `end`.
 | ||||
| // As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the
 | ||||
| // `string` type, this function can extract content between *nested* marks in `input`.
 | ||||
| // If `start` and `end` marks are nested in `input`, the characters
 | ||||
| // between the *outermost* mark pair is returned. It is expected that `start` and `end`
 | ||||
| // marks are *balanced*, meaning that the amount of `start` marks equal the
 | ||||
| // amount of `end` marks in the `input`. An empty string is returned otherwise.
 | ||||
| // Using two identical marks as `start` and `end` results in undefined output behavior.
 | ||||
| // find_between_pair_byte is the fastest in the find_between_pair_* family of functions.
 | ||||
| // Example: assert strings.find_between_pair_byte('(V) (NOT V)',`(`,`)`) == 'V'
 | ||||
| // Example: assert strings.find_between_pair_byte('s {X{Y}} s',`{`,`}`) == 'X{Y}'
 | ||||
| pub fn find_between_pair_byte(input string, start byte, end byte) string { | ||||
| 	mut marks := 0 | ||||
| 	mut start_index := -1 | ||||
| 	for i, b in input { | ||||
| 		if b == start { | ||||
| 			if start_index == -1 { | ||||
| 				start_index = i + 1 | ||||
| 			} | ||||
| 			marks++ | ||||
| 			continue | ||||
| 		} | ||||
| 		if start_index > 0 { | ||||
| 			if b == end { | ||||
| 				marks-- | ||||
| 				if marks == 0 { | ||||
| 					return input[start_index..i] | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return '' | ||||
| } | ||||
| 
 | ||||
| // find_between_pair_rune returns the string found between the pair of marks defined
 | ||||
| // by `start` and `end`.
 | ||||
| // As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the
 | ||||
| // `string` type, this function can extract content between *nested* marks in `input`.
 | ||||
| // If `start` and `end` marks are nested in `input`, the characters
 | ||||
| // between the *outermost* mark pair is returned. It is expected that `start` and `end`
 | ||||
| // marks are *balanced*, meaning that the amount of `start` marks equal the
 | ||||
| // amount of `end` marks in the `input`. An empty string is returned otherwise.
 | ||||
| // Using two identical marks as `start` and `end` results in undefined output behavior.
 | ||||
| // find_between_pair_rune is inbetween the fastest and slowest in the find_between_pair_* family of functions.
 | ||||
| // Example: assert strings.find_between_pair_rune('(V) (NOT V)',`(`,`)`) == 'V'
 | ||||
| // Example: assert strings.find_between_pair_rune('s {X{Y}} s',`{`,`}`) == 'X{Y}'
 | ||||
| pub fn find_between_pair_rune(input string, start rune, end rune) string { | ||||
| 	mut marks := 0 | ||||
| 	mut start_index := -1 | ||||
| 	runes := input.runes() | ||||
| 	for i, r in runes { | ||||
| 		if r == start { | ||||
| 			if start_index == -1 { | ||||
| 				start_index = i + 1 | ||||
| 			} | ||||
| 			marks++ | ||||
| 			continue | ||||
| 		} | ||||
| 		if start_index > 0 { | ||||
| 			if r == end { | ||||
| 				marks-- | ||||
| 				if marks == 0 { | ||||
| 					return runes[start_index..i].string() | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return '' | ||||
| } | ||||
| 
 | ||||
| // find_between_pair_string returns the string found between the pair of marks defined
 | ||||
| // by `start` and `end`.
 | ||||
| // As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the
 | ||||
| // `string` type, this function can extract content between *nested* marks in `input`.
 | ||||
| // If `start` and `end` marks are nested in `input`, the characters
 | ||||
| // between the *outermost* mark pair is returned. It is expected that `start` and `end`
 | ||||
| // marks are *balanced*, meaning that the amount of `start` marks equal the
 | ||||
| // amount of `end` marks in the `input`. An empty string is returned otherwise.
 | ||||
| // Using two identical marks as `start` and `end` results in undefined output behavior.
 | ||||
| // find_between_pair_string is the slowest in the find_between_pair_* function family.
 | ||||
| // Example: assert strings.find_between_pair_string('/*V*/ /*NOT V*/','/*','*/') == 'V'
 | ||||
| // Example: assert strings.find_between_pair_string('s {{X{{Y}}}} s','{{','}}') == 'X{{Y}}'
 | ||||
| pub fn find_between_pair_string(input string, start string, end string) string { | ||||
| 	mut start_index := -1 | ||||
| 	mut marks := 0 | ||||
| 	start_runes := start.runes() | ||||
| 	end_runes := end.runes() | ||||
| 	runes := input.runes() | ||||
| 	mut i := 0 | ||||
| 	for ; i < runes.len; i++ { | ||||
| 		start_slice := runes#[i..i + start_runes.len] | ||||
| 		if start_slice == start_runes { | ||||
| 			i = i + start_runes.len - 1 | ||||
| 			if start_index < 0 { | ||||
| 				start_index = i + 1 | ||||
| 			} | ||||
| 			marks++ | ||||
| 			continue | ||||
| 		} | ||||
| 		if start_index > 0 { | ||||
| 			end_slice := runes#[i..i + end_runes.len] | ||||
| 			if end_slice == end_runes { | ||||
| 				marks-- | ||||
| 				if marks == 0 { | ||||
| 					return runes[start_index..i].string() | ||||
| 				} | ||||
| 				i = i + end_runes.len - 1 | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return '' | ||||
| } | ||||
|  |  | |||
|  | @ -12,3 +12,85 @@ fn test_repeat_string() { | |||
| 	assert strings.repeat_string('abc', 0) == '' | ||||
| 	assert strings.repeat_string('', 200) == '' | ||||
| } | ||||
| 
 | ||||
| const test_rune_and_byte = [ | ||||
| 	'xxx[ok1]xxx', | ||||
| 	'xxx[[ok2]okok]', | ||||
| 	'xxx[ok3[[[ok]okok]]]', | ||||
| 	'yyy[ok4]', | ||||
| 	'[]', | ||||
| 	']', | ||||
| 	'[', | ||||
| 	'yyy[ok5][]zzz', | ||||
| 	'yyy[xxx', | ||||
| 	'xxx[xxx | ||||
| 	xxx]', | ||||
| ] | ||||
| 
 | ||||
| const test_strings = [ | ||||
| 	'xxx/*ok1*/xxx', | ||||
| 	'xxx/*/*ok2*/okok*/', | ||||
| 	'xxx/*ok3/*/*/*ok*/okok*/*/*/', | ||||
| 	'yyy/*ok4*/', | ||||
| 	'/**/', | ||||
| 	'*/', | ||||
| 	'/*', | ||||
| 	'yyy/*ok5*//**/zzz', | ||||
| 	'yyy/*xxx', | ||||
| 	'xxx/*xxx | ||||
| 	xxx*/xxx', | ||||
| ] | ||||
| 
 | ||||
| const expected_rune_and_byte_outputs = [ | ||||
| 	'ok1', | ||||
| 	'[ok2]okok', | ||||
| 	'ok3[[[ok]okok]]', | ||||
| 	'ok4', | ||||
| 	'', | ||||
| 	'', | ||||
| 	'', | ||||
| 	'ok5', | ||||
| 	'', | ||||
| 	'xxx | ||||
| 	xxx', | ||||
| ] | ||||
| 
 | ||||
| const expected_string_outputs = [ | ||||
| 	'ok1', | ||||
| 	'/*ok2*/okok', | ||||
| 	'ok3/*/*/*ok*/okok*/*/', | ||||
| 	'ok4', | ||||
| 	'', | ||||
| 	'', | ||||
| 	'', | ||||
| 	'ok5', | ||||
| 	'', | ||||
| 	'xxx | ||||
| 	xxx', | ||||
| ] | ||||
| 
 | ||||
| fn test_find_between_pair_family() { | ||||
| 	assert strings.find_between_pair_rune('xx♡ok❦yy', `♡`, `❦`) == 'ok' | ||||
| 	assert strings.find_between_pair_byte('xx{ok}yy', `{`, `}`) == 'ok' | ||||
| 	assert strings.find_between_pair_string('xx/*ok*/yy', '/*', '*/') == 'ok' | ||||
| 	assert strings.find_between_pair_byte('xx{ok}yy', `{`, `}`) == 'ok' | ||||
| 	assert strings.find_between_pair_string('xxxxokyyyy', 'xxx', 'yyy') == 'xok' | ||||
| 
 | ||||
| 	for i, tstr in test_rune_and_byte { | ||||
| 		e1 := strings.find_between_pair_rune(tstr, `[`, `]`) | ||||
| 		e2 := expected_rune_and_byte_outputs[i] | ||||
| 		assert '$e1' == '$e2' | ||||
| 	} | ||||
| 
 | ||||
| 	for i, tstr in test_rune_and_byte { | ||||
| 		e1 := strings.find_between_pair_byte(tstr, `[`, `]`) | ||||
| 		e2 := expected_rune_and_byte_outputs[i] | ||||
| 		assert '$e1' == '$e2' | ||||
| 	} | ||||
| 
 | ||||
| 	for i, tstr in test_strings { | ||||
| 		e1 := strings.find_between_pair_string(tstr, '/*', '*/') | ||||
| 		e2 := expected_string_outputs[i] | ||||
| 		assert '$e1' == '$e2' | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue