strings: add find_between_pair (#13468)
							parent
							
								
									d25652fbcf
								
							
						
					
					
						commit
						80444c8ec4
					
				| 
						 | 
					@ -11,3 +11,118 @@ pub fn random(n int) string {
 | 
				
			||||||
	return tos(buf)
 | 
						return tos(buf)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// find_between_pair_byte returns the string found between the pair of marks defined
 | 
				
			||||||
 | 
					// by `start` and `end`.
 | 
				
			||||||
 | 
					// As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the
 | 
				
			||||||
 | 
					// `string` type, this function can extract content between *nested* marks in `input`.
 | 
				
			||||||
 | 
					// If `start` and `end` marks are nested in `input`, the characters
 | 
				
			||||||
 | 
					// between the *outermost* mark pair is returned. It is expected that `start` and `end`
 | 
				
			||||||
 | 
					// marks are *balanced*, meaning that the amount of `start` marks equal the
 | 
				
			||||||
 | 
					// amount of `end` marks in the `input`. An empty string is returned otherwise.
 | 
				
			||||||
 | 
					// Using two identical marks as `start` and `end` results in undefined output behavior.
 | 
				
			||||||
 | 
					// find_between_pair_byte is the fastest in the find_between_pair_* family of functions.
 | 
				
			||||||
 | 
					// Example: assert strings.find_between_pair_byte('(V) (NOT V)',`(`,`)`) == 'V'
 | 
				
			||||||
 | 
					// Example: assert strings.find_between_pair_byte('s {X{Y}} s',`{`,`}`) == 'X{Y}'
 | 
				
			||||||
 | 
					pub fn find_between_pair_byte(input string, start byte, end byte) string {
 | 
				
			||||||
 | 
						mut marks := 0
 | 
				
			||||||
 | 
						mut start_index := -1
 | 
				
			||||||
 | 
						for i, b in input {
 | 
				
			||||||
 | 
							if b == start {
 | 
				
			||||||
 | 
								if start_index == -1 {
 | 
				
			||||||
 | 
									start_index = i + 1
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								marks++
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if start_index > 0 {
 | 
				
			||||||
 | 
								if b == end {
 | 
				
			||||||
 | 
									marks--
 | 
				
			||||||
 | 
									if marks == 0 {
 | 
				
			||||||
 | 
										return input[start_index..i]
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return ''
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// find_between_pair_rune returns the string found between the pair of marks defined
 | 
				
			||||||
 | 
					// by `start` and `end`.
 | 
				
			||||||
 | 
					// As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the
 | 
				
			||||||
 | 
					// `string` type, this function can extract content between *nested* marks in `input`.
 | 
				
			||||||
 | 
					// If `start` and `end` marks are nested in `input`, the characters
 | 
				
			||||||
 | 
					// between the *outermost* mark pair is returned. It is expected that `start` and `end`
 | 
				
			||||||
 | 
					// marks are *balanced*, meaning that the amount of `start` marks equal the
 | 
				
			||||||
 | 
					// amount of `end` marks in the `input`. An empty string is returned otherwise.
 | 
				
			||||||
 | 
					// Using two identical marks as `start` and `end` results in undefined output behavior.
 | 
				
			||||||
 | 
					// find_between_pair_rune is inbetween the fastest and slowest in the find_between_pair_* family of functions.
 | 
				
			||||||
 | 
					// Example: assert strings.find_between_pair_rune('(V) (NOT V)',`(`,`)`) == 'V'
 | 
				
			||||||
 | 
					// Example: assert strings.find_between_pair_rune('s {X{Y}} s',`{`,`}`) == 'X{Y}'
 | 
				
			||||||
 | 
					pub fn find_between_pair_rune(input string, start rune, end rune) string {
 | 
				
			||||||
 | 
						mut marks := 0
 | 
				
			||||||
 | 
						mut start_index := -1
 | 
				
			||||||
 | 
						runes := input.runes()
 | 
				
			||||||
 | 
						for i, r in runes {
 | 
				
			||||||
 | 
							if r == start {
 | 
				
			||||||
 | 
								if start_index == -1 {
 | 
				
			||||||
 | 
									start_index = i + 1
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								marks++
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if start_index > 0 {
 | 
				
			||||||
 | 
								if r == end {
 | 
				
			||||||
 | 
									marks--
 | 
				
			||||||
 | 
									if marks == 0 {
 | 
				
			||||||
 | 
										return runes[start_index..i].string()
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return ''
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// find_between_pair_string returns the string found between the pair of marks defined
 | 
				
			||||||
 | 
					// by `start` and `end`.
 | 
				
			||||||
 | 
					// As opposed to the `find_between`, `all_after*`, `all_before*` methods defined on the
 | 
				
			||||||
 | 
					// `string` type, this function can extract content between *nested* marks in `input`.
 | 
				
			||||||
 | 
					// If `start` and `end` marks are nested in `input`, the characters
 | 
				
			||||||
 | 
					// between the *outermost* mark pair is returned. It is expected that `start` and `end`
 | 
				
			||||||
 | 
					// marks are *balanced*, meaning that the amount of `start` marks equal the
 | 
				
			||||||
 | 
					// amount of `end` marks in the `input`. An empty string is returned otherwise.
 | 
				
			||||||
 | 
					// Using two identical marks as `start` and `end` results in undefined output behavior.
 | 
				
			||||||
 | 
					// find_between_pair_string is the slowest in the find_between_pair_* function family.
 | 
				
			||||||
 | 
					// Example: assert strings.find_between_pair_string('/*V*/ /*NOT V*/','/*','*/') == 'V'
 | 
				
			||||||
 | 
					// Example: assert strings.find_between_pair_string('s {{X{{Y}}}} s','{{','}}') == 'X{{Y}}'
 | 
				
			||||||
 | 
					pub fn find_between_pair_string(input string, start string, end string) string {
 | 
				
			||||||
 | 
						mut start_index := -1
 | 
				
			||||||
 | 
						mut marks := 0
 | 
				
			||||||
 | 
						start_runes := start.runes()
 | 
				
			||||||
 | 
						end_runes := end.runes()
 | 
				
			||||||
 | 
						runes := input.runes()
 | 
				
			||||||
 | 
						mut i := 0
 | 
				
			||||||
 | 
						for ; i < runes.len; i++ {
 | 
				
			||||||
 | 
							start_slice := runes#[i..i + start_runes.len]
 | 
				
			||||||
 | 
							if start_slice == start_runes {
 | 
				
			||||||
 | 
								i = i + start_runes.len - 1
 | 
				
			||||||
 | 
								if start_index < 0 {
 | 
				
			||||||
 | 
									start_index = i + 1
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								marks++
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if start_index > 0 {
 | 
				
			||||||
 | 
								end_slice := runes#[i..i + end_runes.len]
 | 
				
			||||||
 | 
								if end_slice == end_runes {
 | 
				
			||||||
 | 
									marks--
 | 
				
			||||||
 | 
									if marks == 0 {
 | 
				
			||||||
 | 
										return runes[start_index..i].string()
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									i = i + end_runes.len - 1
 | 
				
			||||||
 | 
									continue
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return ''
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,3 +12,85 @@ fn test_repeat_string() {
 | 
				
			||||||
	assert strings.repeat_string('abc', 0) == ''
 | 
						assert strings.repeat_string('abc', 0) == ''
 | 
				
			||||||
	assert strings.repeat_string('', 200) == ''
 | 
						assert strings.repeat_string('', 200) == ''
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const test_rune_and_byte = [
 | 
				
			||||||
 | 
						'xxx[ok1]xxx',
 | 
				
			||||||
 | 
						'xxx[[ok2]okok]',
 | 
				
			||||||
 | 
						'xxx[ok3[[[ok]okok]]]',
 | 
				
			||||||
 | 
						'yyy[ok4]',
 | 
				
			||||||
 | 
						'[]',
 | 
				
			||||||
 | 
						']',
 | 
				
			||||||
 | 
						'[',
 | 
				
			||||||
 | 
						'yyy[ok5][]zzz',
 | 
				
			||||||
 | 
						'yyy[xxx',
 | 
				
			||||||
 | 
						'xxx[xxx
 | 
				
			||||||
 | 
						xxx]',
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const test_strings = [
 | 
				
			||||||
 | 
						'xxx/*ok1*/xxx',
 | 
				
			||||||
 | 
						'xxx/*/*ok2*/okok*/',
 | 
				
			||||||
 | 
						'xxx/*ok3/*/*/*ok*/okok*/*/*/',
 | 
				
			||||||
 | 
						'yyy/*ok4*/',
 | 
				
			||||||
 | 
						'/**/',
 | 
				
			||||||
 | 
						'*/',
 | 
				
			||||||
 | 
						'/*',
 | 
				
			||||||
 | 
						'yyy/*ok5*//**/zzz',
 | 
				
			||||||
 | 
						'yyy/*xxx',
 | 
				
			||||||
 | 
						'xxx/*xxx
 | 
				
			||||||
 | 
						xxx*/xxx',
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const expected_rune_and_byte_outputs = [
 | 
				
			||||||
 | 
						'ok1',
 | 
				
			||||||
 | 
						'[ok2]okok',
 | 
				
			||||||
 | 
						'ok3[[[ok]okok]]',
 | 
				
			||||||
 | 
						'ok4',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'ok5',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'xxx
 | 
				
			||||||
 | 
						xxx',
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const expected_string_outputs = [
 | 
				
			||||||
 | 
						'ok1',
 | 
				
			||||||
 | 
						'/*ok2*/okok',
 | 
				
			||||||
 | 
						'ok3/*/*/*ok*/okok*/*/',
 | 
				
			||||||
 | 
						'ok4',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'ok5',
 | 
				
			||||||
 | 
						'',
 | 
				
			||||||
 | 
						'xxx
 | 
				
			||||||
 | 
						xxx',
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn test_find_between_pair_family() {
 | 
				
			||||||
 | 
						assert strings.find_between_pair_rune('xx♡ok❦yy', `♡`, `❦`) == 'ok'
 | 
				
			||||||
 | 
						assert strings.find_between_pair_byte('xx{ok}yy', `{`, `}`) == 'ok'
 | 
				
			||||||
 | 
						assert strings.find_between_pair_string('xx/*ok*/yy', '/*', '*/') == 'ok'
 | 
				
			||||||
 | 
						assert strings.find_between_pair_byte('xx{ok}yy', `{`, `}`) == 'ok'
 | 
				
			||||||
 | 
						assert strings.find_between_pair_string('xxxxokyyyy', 'xxx', 'yyy') == 'xok'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i, tstr in test_rune_and_byte {
 | 
				
			||||||
 | 
							e1 := strings.find_between_pair_rune(tstr, `[`, `]`)
 | 
				
			||||||
 | 
							e2 := expected_rune_and_byte_outputs[i]
 | 
				
			||||||
 | 
							assert '$e1' == '$e2'
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i, tstr in test_rune_and_byte {
 | 
				
			||||||
 | 
							e1 := strings.find_between_pair_byte(tstr, `[`, `]`)
 | 
				
			||||||
 | 
							e2 := expected_rune_and_byte_outputs[i]
 | 
				
			||||||
 | 
							assert '$e1' == '$e2'
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i, tstr in test_strings {
 | 
				
			||||||
 | 
							e1 := strings.find_between_pair_string(tstr, '/*', '*/')
 | 
				
			||||||
 | 
							e2 := expected_string_outputs[i]
 | 
				
			||||||
 | 
							assert '$e1' == '$e2'
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue