net.http: fix multipart/form-data file upload handling

2021-11-10 17:31:41 +02:00 · 2021-11-10 17:31:41 +02:00 · 466ced2876
parent 66e53279c2
commit 466ced2876
3 changed files with 69 additions and 30 deletions
--- a/examples/vweb/file_upload/index.html
+++ b/examples/vweb/file_upload/index.html
@ -1,6 +1,15 @@
 <h2>File Upload</h2>
 File form:
 <form method="POST" enctype="multipart/form-data" action="/upload">
-	<input type="file" name="upfile" multiple>
+	<input type="hidden" name="file_id" value="aafueB5Quie" />
-    <input type="submit" value="Press">
+	<input type="file" name="upfile" multiple/>
 	<input type="submit" value="Upload"/>
 </form>
 Text form:
 <form method="POST" enctype="multipart/form-data" action="/submit">
 	<input type="text" name="content1" value="abc" />
 	<input type="text" name="content2" value="xyz" />
 	<input type="submit" value="Submit content">
 </form>
--- a/examples/vweb/file_upload/upload.html
+++ b/examples/vweb/file_upload/upload.html
@ -5,6 +5,7 @@ File amount: @fdata.len
@for i, data in fdata
 <h2>Filename: @data.filename</h2>
 <h2>Length: @data.data.len</h2>
 <h2>Type: @data.content_type</h2>
 <p>@{files[i]}</p>
--- a/vlib/net/http/request.v
+++ b/vlib/net/http/request.v
@ -300,44 +300,85 @@ fn multipart_form_body(form map[string]string, files map[string][]FileData) (str
 	return sb.str(), boundary
 }
 struct LineSegmentIndexes {
 mut:
 	start int
 	end   int
 }
 // parse_multipart_form parses an http request body, given a boundary string
 // For more details about multipart forms, see:
 //   https://datatracker.ietf.org/doc/html/rfc2183
 //   https://datatracker.ietf.org/doc/html/rfc2388
 //   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
 pub fn parse_multipart_form(body string, boundary string) (map[string]string, map[string][]FileData) {
-	sections := body.split(boundary)
+	// dump(body)
-	fields := sections[1..sections.len - 1]
+	// dump(boundary)
 	mut form := map[string]string{}
 	mut files := map[string][]FileData{}
-
+	// TODO: do not use split, but only indexes, to reduce copying of potentially large data
 	sections := body.split(boundary)
 	fields := sections[1..sections.len - 1]
 	for field in fields {
-		// TODO: do not split into lines; do same parsing for HTTP body
+		mut line_segments := []LineSegmentIndexes{cap: 100}
-		lines := field.split_into_lines()[1..]
+		mut line_idx, mut line_start := 0, 0
-		disposition := parse_disposition(lines[0])
+		for cidx, c in field {
 			if line_idx >= 6 {
 				// no need to scan further
 				break
 			}
 			if c == `\n` {
 				line_segments << LineSegmentIndexes{line_start, cidx}
 				line_start = cidx + 1
 				line_idx++
 			}
 		}
 		line_segments << LineSegmentIndexes{line_start, field.len}
 		line1 := field[line_segments[1].start..line_segments[1].end]
 		line2 := field[line_segments[2].start..line_segments[2].end]
 		disposition := parse_disposition(line1.trim_space())
 		// Grab everything between the double quotes
 		name := disposition['name'] or { continue }
 		// Parse files
-		// TODO: filename*
+		// TODO: handle `filename*`, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
 		if filename := disposition['filename'] {
-			// Parse Content-Type header
+			// reject early broken content
-			if lines.len == 1 || !lines[1].to_lower().starts_with('content-type:') {
+			if line_segments.len < 5 {
 				continue
 			}
-			mut ct := lines[1].split_nth(':', 2)[1]
+			// reject early non Content-Type headers
-			ct = ct.trim_left(' \t')
+			if !line2.to_lower().starts_with('content-type:') {
-			data := lines_to_string(field.len, lines, 3, lines.len - 1)
+				continue
 			}
 			content_type := line2.split_nth(':', 2)[1].trim_space()
 			// line1: Content-Disposition: form-data; name="upfile"; filename="photo123.jpg"
 			// line2: Content-Type: image/jpeg
 			// line3:
 			// line4: DATA
 			// ...
 			// lineX: --
 			data := field[line_segments[4].start..field.len - 4] // each multipart field ends with \r\n--
 			// dump(data.limit(20).bytes())
 			// dump(data.len)
 			files[name] << FileData{
 				filename: filename
-				content_type: ct
+				content_type: content_type
 				data: data
 			}
 			continue
 		}
-		data := lines_to_string(field.len, lines, 2, lines.len - 1)
+		if line_segments.len < 4 {
-		form[name] = data
+			continue
 		}
 		form[name] = field[line_segments[3].start..field.len - 4]
 	}
 	// dump(form)
 	return form, files
 }
 // Parse the Content-Disposition header of a multipart form
 // Returns a map of the key="value" pairs
-// Example: parse_disposition('Content-Disposition: form-data; name="a"; filename="b"') == {'name': 'a', 'filename': 'b'}
+// Example: assert parse_disposition('Content-Disposition: form-data; name="a"; filename="b"') == {'name': 'a', 'filename': 'b'}
 fn parse_disposition(line string) map[string]string {
 	mut data := map[string]string{}
 	for word in line.split(';') {
@ -354,15 +395,3 @@ fn parse_disposition(line string) map[string]string {
 	}
 	return data
 }
 [manualfree]
 fn lines_to_string(len int, lines []string, start int, end int) string {
 	mut sb := strings.new_builder(len)
 	for i in start .. end {
 		sb.writeln(lines[i])
 	}
 	sb.cut_last(1) // last newline
 	res := sb.str()
 	unsafe { sb.free() }
 	return res
 }