net.urllib: update enum / const style

pull/1487/head
joe-conigliaro 2019-08-06 21:01:05 +10:00 committed by Alexander Medvednikov
parent 347e59d604
commit 69932758db
1 changed files with 53 additions and 53 deletions

View File

@ -18,18 +18,18 @@ module urllib
import strings import strings
enum EncodingMode { enum EncodingMode {
EncodePath encode_path
EncodePathSegment encode_path_segment
EncodeHost encode_host
EncodeZone encode_zone
EncodeUserPassword encode_user_password
EncodeQueryComponent encode_query_component
EncodeFragment encode_fragment
} }
const ( const (
EscapeError = 'invalid URL escape' err_msg_escape = 'invalid URL escape'
ParseError = 'error parsing url' err_msg_parse = 'error parsing url'
) )
fn error_msg(message, val string) string { fn error_msg(message, val string) string {
@ -49,7 +49,7 @@ fn should_escape(c byte, mode EncodingMode) bool {
return false return false
} }
if mode == .EncodeHost || mode == .EncodeZone { if mode == .encode_host || mode == .encode_zone {
// §3.2.2 host allows // §3.2.2 host allows
// sub-delims = `!` / `$` / `&` / ``` / `(` / `)` / `*` / `+` / `,` / `;` / `=` // sub-delims = `!` / `$` / `&` / ``` / `(` / `)` / `*` / `+` / `,` / `;` / `=`
// as part of reg-name. // as part of reg-name.
@ -73,37 +73,37 @@ fn should_escape(c byte, mode EncodingMode) bool {
// Different sections of the URL allow a few of // Different sections of the URL allow a few of
// the reserved characters to appear unescaped. // the reserved characters to appear unescaped.
switch mode { switch mode {
case EncodingMode.EncodePath: // §3.3 case EncodingMode.encode_path: // §3.3
// The RFC allows : @ & = + $ but saves / ; , for assigning // The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments. This package // meaning to individual path segments. This package
// only manipulates the path as a whole, so we allow those // only manipulates the path as a whole, so we allow those
// last three as well. That leaves only ? to escape. // last three as well. That leaves only ? to escape.
return c == `?` return c == `?`
case EncodingMode.EncodePathSegment: // §3.3 case EncodingMode.encode_path_segment: // §3.3
// The RFC allows : @ & = + $ but saves / ; , for assigning // The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments. // meaning to individual path segments.
return c == `/` || c == `;` || c == `,` || c == `?` return c == `/` || c == `;` || c == `,` || c == `?`
case EncodingMode.EncodeUserPassword: // §3.2.1 case EncodingMode.encode_user_password: // §3.2.1
// The RFC allows `;`, `:`, `&`, `=`, `+`, `$`, and `,` in // The RFC allows `;`, `:`, `&`, `=`, `+`, `$`, and `,` in
// userinfo, so we must escape only `@`, `/`, and `?`. // userinfo, so we must escape only `@`, `/`, and `?`.
// The parsing of userinfo treats `:` as special so we must escape // The parsing of userinfo treats `:` as special so we must escape
// that too. // that too.
return c == `@` || c == `/` || c == `?` || c == `:` return c == `@` || c == `/` || c == `?` || c == `:`
case EncodingMode.EncodeQueryComponent: // §3.4 case EncodingMode.encode_query_component: // §3.4
// The RFC reserves (so we must escape) everything. // The RFC reserves (so we must escape) everything.
return true return true
case EncodingMode.EncodeFragment: // §4.1 case EncodingMode.encode_fragment: // §4.1
// The RFC text is silent but the grammar allows // The RFC text is silent but the grammar allows
// everything, so escape nothing. // everything, so escape nothing.
return false return false
} }
} }
if mode == .EncodeFragment { if mode == .encode_fragment {
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
// need to be escaped. To minimize potential breakage, we apply two restrictions: // need to be escaped. To minimize potential breakage, we apply two restrictions:
@ -126,7 +126,7 @@ fn should_escape(c byte, mode EncodingMode) bool {
// It returns an error if any % is not followed by two hexadecimal // It returns an error if any % is not followed by two hexadecimal
// digits. // digits.
pub fn query_unescape(s string) ?string { pub fn query_unescape(s string) ?string {
return unescape(s, .EncodeQueryComponent) return unescape(s, .encode_query_component)
} }
// path_unescape does the inverse transformation of path_escape, // path_unescape does the inverse transformation of path_escape,
@ -137,7 +137,7 @@ pub fn query_unescape(s string) ?string {
// path_unescape is identical to query_unescape except that it does not // path_unescape is identical to query_unescape except that it does not
// unescape '+' to ' ' (space). // unescape '+' to ' ' (space).
pub fn path_unescape(s string) ?string { pub fn path_unescape(s string) ?string {
return unescape(s, .EncodePathSegment) return unescape(s, .encode_path_segment)
} }
// unescape unescapes a string; the mode specifies // unescape unescapes a string; the mode specifies
@ -159,7 +159,7 @@ fn unescape(s string, mode EncodingMode) ?string {
if s.len > 3 { if s.len > 3 {
s = s.left(3) s = s.left(3)
} }
return error(error_msg(EscapeError, s)) return error(error_msg(err_msg_escape, s))
} }
// Per https://tools.ietf.org/html/rfc3986#page-21 // Per https://tools.ietf.org/html/rfc3986#page-21
// in the host component %-encoding can only be used // in the host component %-encoding can only be used
@ -167,10 +167,10 @@ fn unescape(s string, mode EncodingMode) ?string {
// But https://tools.ietf.org/html/rfc6874#section-2 // But https://tools.ietf.org/html/rfc6874#section-2
// introduces %25 being allowed to escape a percent sign // introduces %25 being allowed to escape a percent sign
// in IPv6 scoped-address literals. Yay. // in IPv6 scoped-address literals. Yay.
if mode == .EncodeHost && unhex(s[i+1]) < 8 && s.substr(i, i+3) != '%25' { if mode == .encode_host && unhex(s[i+1]) < 8 && s.substr(i, i+3) != '%25' {
return error(error_msg(EscapeError, s.substr(i, i+3))) return error(error_msg(err_msg_escape, s.substr(i, i+3)))
} }
if mode == .EncodeZone { if mode == .encode_zone {
// RFC 6874 says basically 'anything goes' for zone identifiers // RFC 6874 says basically 'anything goes' for zone identifiers
// and that even non-ASCII can be redundantly escaped, // and that even non-ASCII can be redundantly escaped,
// but it seems prudent to restrict %-escaped bytes here to those // but it seems prudent to restrict %-escaped bytes here to those
@ -179,16 +179,16 @@ fn unescape(s string, mode EncodingMode) ?string {
// to introduce bytes you couldn't just write directly. // to introduce bytes you couldn't just write directly.
// But Windows puts spaces here! Yay. // But Windows puts spaces here! Yay.
v := byte(unhex(s[i+1])<<byte(4) | unhex(s[i+2])) v := byte(unhex(s[i+1])<<byte(4) | unhex(s[i+2]))
if s.substr(i, i+3) != '%25' && v != ` ` && should_escape(v, .EncodeHost) { if s.substr(i, i+3) != '%25' && v != ` ` && should_escape(v, .encode_host) {
error(error_msg(EscapeError, s.substr(i, i+3))) error(error_msg(err_msg_escape, s.substr(i, i+3)))
} }
} }
i += 3 i += 3
case `+`: case `+`:
has_plus = mode == .EncodeQueryComponent has_plus = mode == .encode_query_component
i++ i++
default: default:
if (mode == .EncodeHost || mode == .EncodeZone) && s[i] < 0x80 && should_escape(s[i], mode) { if (mode == .encode_host || mode == .encode_zone) && s[i] < 0x80 && should_escape(s[i], mode) {
error(error_msg('invalid character in host name', s.substr(i, i+1))) error(error_msg('invalid character in host name', s.substr(i, i+1)))
} }
i++ i++
@ -207,7 +207,7 @@ fn unescape(s string, mode EncodingMode) ?string {
t.write( byte(unhex(s[i+1])<<byte(4) | unhex(s[i+2])).str() ) t.write( byte(unhex(s[i+1])<<byte(4) | unhex(s[i+2])).str() )
i += 2 i += 2
case `+`: case `+`:
if mode == .EncodeQueryComponent { if mode == .encode_query_component {
t.write(' ') t.write(' ')
} else { } else {
t.write('+') t.write('+')
@ -222,13 +222,13 @@ fn unescape(s string, mode EncodingMode) ?string {
// query_escape escapes the string so it can be safely placed // query_escape escapes the string so it can be safely placed
// inside a URL query. // inside a URL query.
pub fn query_escape(s string) string { pub fn query_escape(s string) string {
return escape(s, .EncodeQueryComponent) return escape(s, .encode_query_component)
} }
// path_escape escapes the string so it can be safely placed inside a URL path segment, // path_escape escapes the string so it can be safely placed inside a URL path segment,
// replacing special characters (including /) with %XX sequences as needed. // replacing special characters (including /) with %XX sequences as needed.
pub fn path_escape(s string) string { pub fn path_escape(s string) string {
return escape(s, .EncodePathSegment) return escape(s, .encode_path_segment)
} }
fn escape(s string, mode EncodingMode) string { fn escape(s string, mode EncodingMode) string {
@ -238,7 +238,7 @@ fn escape(s string, mode EncodingMode) string {
for i := 0; i < s.len; i++ { for i := 0; i < s.len; i++ {
c = s[i] c = s[i]
if should_escape(c, mode) { if should_escape(c, mode) {
if c == ` ` && mode == .EncodeQueryComponent { if c == ` ` && mode == .encode_query_component {
space_count++ space_count++
} else { } else {
hex_count++ hex_count++
@ -270,17 +270,17 @@ fn escape(s string, mode EncodingMode) string {
return string(t) return string(t)
} }
ctab := '0123456789ABCDEF'
mut j := 0 mut j := 0
for i := 0; i < s.len; i++ { for i := 0; i < s.len; i++ {
c1 := s[i] c1 := s[i]
if c1 == ` ` && mode == .EncodeQueryComponent { if c1 == ` ` && mode == .encode_query_component {
t[j] = `+` t[j] = `+`
j++ j++
} else if should_escape(c1, mode) { } else if should_escape(c1, mode) {
t[j] = `%` t[j] = `%`
x := '0123456789ABCDEF' t[j+1] = ctab[c1>>4]
t[j+1] = x[c1>>4] t[j+2] = ctab[c1&15]
t[j+2] = x[c1&15]
j += 3 j += 3
} else { } else {
t[j] = s[i] t[j] = s[i]
@ -364,9 +364,9 @@ fn (u &Userinfo) string() string {
if u.empty() { if u.empty() {
return '' return ''
} }
mut s := escape(u.username, .EncodeUserPassword) mut s := escape(u.username, .encode_user_password)
if u.password_set { if u.password_set {
s += ':' + escape(u.password, .EncodeUserPassword) s += ':' + escape(u.password, .encode_user_password)
} }
return s return s
} }
@ -433,13 +433,13 @@ pub fn parse(rawurl string) ?URL {
u := p[0] u := p[0]
frag := p[1] frag := p[1]
mut url := _parse(u, false) or { mut url := _parse(u, false) or {
return error(error_msg(ParseError, u)) return error(error_msg(err_msg_parse, u))
} }
if frag == '' { if frag == '' {
return url return url
} }
f := unescape(frag, .EncodeFragment) or { f := unescape(frag, .encode_fragment) or {
return error(error_msg(ParseError, u)) return error(error_msg(err_msg_parse, u))
} }
url.fragment = f url.fragment = f
return url return url
@ -564,7 +564,7 @@ fn parse_authority(authority string) ?ParseAuthorityRes {
return error(error_msg('invalid userinfo', '')) return error(error_msg('invalid userinfo', ''))
} }
if !userinfo.contains(':') { if !userinfo.contains(':') {
u := unescape(userinfo, .EncodeUserPassword) or { u := unescape(userinfo, .encode_user_password) or {
return error(err) return error(err)
} }
userinfo = u userinfo = u
@ -573,11 +573,11 @@ fn parse_authority(authority string) ?ParseAuthorityRes {
parts := split(userinfo, ':', true) parts := split(userinfo, ':', true)
mut username := parts[0] mut username := parts[0]
mut password := parts[1] mut password := parts[1]
u := unescape(username, .EncodeUserPassword) or { u := unescape(username, .encode_user_password) or {
return error(err) return error(err)
} }
username = u username = u
p := unescape(password, .EncodeUserPassword) or { p := unescape(password, .encode_user_password) or {
return error(err) return error(err)
} }
password = p password = p
@ -612,20 +612,20 @@ fn parse_host(host string) ?string {
// like newlines. // like newlines.
zone := host.left(i).index('%25') zone := host.left(i).index('%25')
if zone >= 0 { if zone >= 0 {
host1 := unescape(host.left(zone), .EncodeHost) or { host1 := unescape(host.left(zone), .encode_host) or {
return err return err
} }
host2 := unescape(host.substr(zone, i), .EncodeZone) or { host2 := unescape(host.substr(zone, i), .encode_zone) or {
return err return err
} }
host3 := unescape(host.right(i), .EncodeHost) or { host3 := unescape(host.right(i), .encode_host) or {
return err return err
} }
return host1 + host2 + host3 return host1 + host2 + host3
} }
} }
h := unescape(host, .EncodeHost) or { h := unescape(host, .encode_host) or {
return err return err
} }
host = h host = h
@ -641,11 +641,11 @@ fn parse_host(host string) ?string {
// set_path will return an error only if the provided path contains an invalid // set_path will return an error only if the provided path contains an invalid
// escaping. // escaping.
fn (u &URL) set_path(p string) ?bool { fn (u &URL) set_path(p string) ?bool {
path := unescape(p, .EncodePath) or { path := unescape(p, .encode_path) or {
return error(err) return error(err)
} }
u.path = path u.path = path
escp := escape(path, .EncodePath) escp := escape(path, .encode_path)
if p == escp { if p == escp {
// Default encoding is fine. // Default encoding is fine.
u.raw_path = '' u.raw_path = ''
@ -666,13 +666,13 @@ fn (u &URL) set_path(p string) ?bool {
// reading u.raw_path directly. // reading u.raw_path directly.
fn (u &URL) escaped_path() string { fn (u &URL) escaped_path() string {
if u.raw_path != '' && valid_encoded_path(u.raw_path) { if u.raw_path != '' && valid_encoded_path(u.raw_path) {
p := unescape(u.raw_path, .EncodePath) p := unescape(u.raw_path, .encode_path)
return u.raw_path return u.raw_path
} }
if u.path == '*' { if u.path == '*' {
return '*' // don't escape (Issue 11202) return '*' // don't escape (Issue 11202)
} }
return escape(u.path, .EncodePath) return escape(u.path, .encode_path)
} }
// valid_encoded_path reports whether s is a valid encoded path. // valid_encoded_path reports whether s is a valid encoded path.
@ -693,7 +693,7 @@ fn valid_encoded_path(s string) bool {
case `%`: case `%`:
// ok - percent encoded, will decode // ok - percent encoded, will decode
default: default:
if should_escape(s[i], .EncodePath) { if should_escape(s[i], .encode_path) {
return false return false
} }
} }
@ -757,7 +757,7 @@ pub fn (u &URL) str() string {
buf.write('@') buf.write('@')
} }
if u.host != '' { if u.host != '' {
buf.write(escape(u.host, .EncodeHost)) buf.write(escape(u.host, .encode_host))
} }
} }
path := u.escaped_path() path := u.escaped_path()
@ -784,7 +784,7 @@ pub fn (u &URL) str() string {
} }
if u.fragment != '' { if u.fragment != '' {
buf.write('#') buf.write('#')
buf.write(escape(u.fragment, .EncodeFragment)) buf.write(escape(u.fragment, .encode_fragment))
} }
return buf.str() return buf.str()
} }