627 lines
18 KiB
JavaScript
627 lines
18 KiB
JavaScript
const assert = require('assert')
|
||
const { atob } = require('buffer')
|
||
const { isomorphicDecode } = require('./util')
|
||
|
||
const encoder = new TextEncoder()
|
||
|
||
/**
|
||
* @see https://mimesniff.spec.whatwg.org/#http-token-code-point
|
||
*/
|
||
const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/
|
||
const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line
|
||
/**
|
||
* @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
|
||
*/
|
||
const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line
|
||
|
||
// https://fetch.spec.whatwg.org/#data-url-processor
|
||
/** @param {URL} dataURL */
|
||
function dataURLProcessor (dataURL) {
|
||
// 1. Assert: dataURL’s scheme is "data".
|
||
assert(dataURL.protocol === 'data:')
|
||
|
||
// 2. Let input be the result of running the URL
|
||
// serializer on dataURL with exclude fragment
|
||
// set to true.
|
||
let input = URLSerializer(dataURL, true)
|
||
|
||
// 3. Remove the leading "data:" string from input.
|
||
input = input.slice(5)
|
||
|
||
// 4. Let position point at the start of input.
|
||
const position = { position: 0 }
|
||
|
||
// 5. Let mimeType be the result of collecting a
|
||
// sequence of code points that are not equal
|
||
// to U+002C (,), given position.
|
||
let mimeType = collectASequenceOfCodePointsFast(
|
||
',',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 6. Strip leading and trailing ASCII whitespace
|
||
// from mimeType.
|
||
// Undici implementation note: we need to store the
|
||
// length because if the mimetype has spaces removed,
|
||
// the wrong amount will be sliced from the input in
|
||
// step #9
|
||
const mimeTypeLength = mimeType.length
|
||
mimeType = removeASCIIWhitespace(mimeType, true, true)
|
||
|
||
// 7. If position is past the end of input, then
|
||
// return failure
|
||
if (position.position >= input.length) {
|
||
return 'failure'
|
||
}
|
||
|
||
// 8. Advance position by 1.
|
||
position.position++
|
||
|
||
// 9. Let encodedBody be the remainder of input.
|
||
const encodedBody = input.slice(mimeTypeLength + 1)
|
||
|
||
// 10. Let body be the percent-decoding of encodedBody.
|
||
let body = stringPercentDecode(encodedBody)
|
||
|
||
// 11. If mimeType ends with U+003B (;), followed by
|
||
// zero or more U+0020 SPACE, followed by an ASCII
|
||
// case-insensitive match for "base64", then:
|
||
if (/;(\u0020){0,}base64$/i.test(mimeType)) {
|
||
// 1. Let stringBody be the isomorphic decode of body.
|
||
const stringBody = isomorphicDecode(body)
|
||
|
||
// 2. Set body to the forgiving-base64 decode of
|
||
// stringBody.
|
||
body = forgivingBase64(stringBody)
|
||
|
||
// 3. If body is failure, then return failure.
|
||
if (body === 'failure') {
|
||
return 'failure'
|
||
}
|
||
|
||
// 4. Remove the last 6 code points from mimeType.
|
||
mimeType = mimeType.slice(0, -6)
|
||
|
||
// 5. Remove trailing U+0020 SPACE code points from mimeType,
|
||
// if any.
|
||
mimeType = mimeType.replace(/(\u0020)+$/, '')
|
||
|
||
// 6. Remove the last U+003B (;) code point from mimeType.
|
||
mimeType = mimeType.slice(0, -1)
|
||
}
|
||
|
||
// 12. If mimeType starts with U+003B (;), then prepend
|
||
// "text/plain" to mimeType.
|
||
if (mimeType.startsWith(';')) {
|
||
mimeType = 'text/plain' + mimeType
|
||
}
|
||
|
||
// 13. Let mimeTypeRecord be the result of parsing
|
||
// mimeType.
|
||
let mimeTypeRecord = parseMIMEType(mimeType)
|
||
|
||
// 14. If mimeTypeRecord is failure, then set
|
||
// mimeTypeRecord to text/plain;charset=US-ASCII.
|
||
if (mimeTypeRecord === 'failure') {
|
||
mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII')
|
||
}
|
||
|
||
// 15. Return a new data: URL struct whose MIME
|
||
// type is mimeTypeRecord and body is body.
|
||
// https://fetch.spec.whatwg.org/#data-url-struct
|
||
return { mimeType: mimeTypeRecord, body }
|
||
}
|
||
|
||
// https://url.spec.whatwg.org/#concept-url-serializer
|
||
/**
|
||
* @param {URL} url
|
||
* @param {boolean} excludeFragment
|
||
*/
|
||
function URLSerializer (url, excludeFragment = false) {
|
||
if (!excludeFragment) {
|
||
return url.href
|
||
}
|
||
|
||
const href = url.href
|
||
const hashLength = url.hash.length
|
||
|
||
return hashLength === 0 ? href : href.substring(0, href.length - hashLength)
|
||
}
|
||
|
||
// https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
|
||
/**
|
||
* @param {(char: string) => boolean} condition
|
||
* @param {string} input
|
||
* @param {{ position: number }} position
|
||
*/
|
||
function collectASequenceOfCodePoints (condition, input, position) {
|
||
// 1. Let result be the empty string.
|
||
let result = ''
|
||
|
||
// 2. While position doesn’t point past the end of input and the
|
||
// code point at position within input meets the condition condition:
|
||
while (position.position < input.length && condition(input[position.position])) {
|
||
// 1. Append that code point to the end of result.
|
||
result += input[position.position]
|
||
|
||
// 2. Advance position by 1.
|
||
position.position++
|
||
}
|
||
|
||
// 3. Return result.
|
||
return result
|
||
}
|
||
|
||
/**
|
||
* A faster collectASequenceOfCodePoints that only works when comparing a single character.
|
||
* @param {string} char
|
||
* @param {string} input
|
||
* @param {{ position: number }} position
|
||
*/
|
||
function collectASequenceOfCodePointsFast (char, input, position) {
|
||
const idx = input.indexOf(char, position.position)
|
||
const start = position.position
|
||
|
||
if (idx === -1) {
|
||
position.position = input.length
|
||
return input.slice(start)
|
||
}
|
||
|
||
position.position = idx
|
||
return input.slice(start, position.position)
|
||
}
|
||
|
||
// https://url.spec.whatwg.org/#string-percent-decode
|
||
/** @param {string} input */
|
||
function stringPercentDecode (input) {
|
||
// 1. Let bytes be the UTF-8 encoding of input.
|
||
const bytes = encoder.encode(input)
|
||
|
||
// 2. Return the percent-decoding of bytes.
|
||
return percentDecode(bytes)
|
||
}
|
||
|
||
// https://url.spec.whatwg.org/#percent-decode
|
||
/** @param {Uint8Array} input */
|
||
function percentDecode (input) {
|
||
// 1. Let output be an empty byte sequence.
|
||
/** @type {number[]} */
|
||
const output = []
|
||
|
||
// 2. For each byte byte in input:
|
||
for (let i = 0; i < input.length; i++) {
|
||
const byte = input[i]
|
||
|
||
// 1. If byte is not 0x25 (%), then append byte to output.
|
||
if (byte !== 0x25) {
|
||
output.push(byte)
|
||
|
||
// 2. Otherwise, if byte is 0x25 (%) and the next two bytes
|
||
// after byte in input are not in the ranges
|
||
// 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
|
||
// and 0x61 (a) to 0x66 (f), all inclusive, append byte
|
||
// to output.
|
||
} else if (
|
||
byte === 0x25 &&
|
||
!/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
|
||
) {
|
||
output.push(0x25)
|
||
|
||
// 3. Otherwise:
|
||
} else {
|
||
// 1. Let bytePoint be the two bytes after byte in input,
|
||
// decoded, and then interpreted as hexadecimal number.
|
||
const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2])
|
||
const bytePoint = Number.parseInt(nextTwoBytes, 16)
|
||
|
||
// 2. Append a byte whose value is bytePoint to output.
|
||
output.push(bytePoint)
|
||
|
||
// 3. Skip the next two bytes in input.
|
||
i += 2
|
||
}
|
||
}
|
||
|
||
// 3. Return output.
|
||
return Uint8Array.from(output)
|
||
}
|
||
|
||
// https://mimesniff.spec.whatwg.org/#parse-a-mime-type
|
||
/** @param {string} input */
|
||
function parseMIMEType (input) {
|
||
// 1. Remove any leading and trailing HTTP whitespace
|
||
// from input.
|
||
input = removeHTTPWhitespace(input, true, true)
|
||
|
||
// 2. Let position be a position variable for input,
|
||
// initially pointing at the start of input.
|
||
const position = { position: 0 }
|
||
|
||
// 3. Let type be the result of collecting a sequence
|
||
// of code points that are not U+002F (/) from
|
||
// input, given position.
|
||
const type = collectASequenceOfCodePointsFast(
|
||
'/',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 4. If type is the empty string or does not solely
|
||
// contain HTTP token code points, then return failure.
|
||
// https://mimesniff.spec.whatwg.org/#http-token-code-point
|
||
if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) {
|
||
return 'failure'
|
||
}
|
||
|
||
// 5. If position is past the end of input, then return
|
||
// failure
|
||
if (position.position > input.length) {
|
||
return 'failure'
|
||
}
|
||
|
||
// 6. Advance position by 1. (This skips past U+002F (/).)
|
||
position.position++
|
||
|
||
// 7. Let subtype be the result of collecting a sequence of
|
||
// code points that are not U+003B (;) from input, given
|
||
// position.
|
||
let subtype = collectASequenceOfCodePointsFast(
|
||
';',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 8. Remove any trailing HTTP whitespace from subtype.
|
||
subtype = removeHTTPWhitespace(subtype, false, true)
|
||
|
||
// 9. If subtype is the empty string or does not solely
|
||
// contain HTTP token code points, then return failure.
|
||
if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) {
|
||
return 'failure'
|
||
}
|
||
|
||
const typeLowercase = type.toLowerCase()
|
||
const subtypeLowercase = subtype.toLowerCase()
|
||
|
||
// 10. Let mimeType be a new MIME type record whose type
|
||
// is type, in ASCII lowercase, and subtype is subtype,
|
||
// in ASCII lowercase.
|
||
// https://mimesniff.spec.whatwg.org/#mime-type
|
||
const mimeType = {
|
||
type: typeLowercase,
|
||
subtype: subtypeLowercase,
|
||
/** @type {Map<string, string>} */
|
||
parameters: new Map(),
|
||
// https://mimesniff.spec.whatwg.org/#mime-type-essence
|
||
essence: `${typeLowercase}/${subtypeLowercase}`
|
||
}
|
||
|
||
// 11. While position is not past the end of input:
|
||
while (position.position < input.length) {
|
||
// 1. Advance position by 1. (This skips past U+003B (;).)
|
||
position.position++
|
||
|
||
// 2. Collect a sequence of code points that are HTTP
|
||
// whitespace from input given position.
|
||
collectASequenceOfCodePoints(
|
||
// https://fetch.spec.whatwg.org/#http-whitespace
|
||
char => HTTP_WHITESPACE_REGEX.test(char),
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 3. Let parameterName be the result of collecting a
|
||
// sequence of code points that are not U+003B (;)
|
||
// or U+003D (=) from input, given position.
|
||
let parameterName = collectASequenceOfCodePoints(
|
||
(char) => char !== ';' && char !== '=',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 4. Set parameterName to parameterName, in ASCII
|
||
// lowercase.
|
||
parameterName = parameterName.toLowerCase()
|
||
|
||
// 5. If position is not past the end of input, then:
|
||
if (position.position < input.length) {
|
||
// 1. If the code point at position within input is
|
||
// U+003B (;), then continue.
|
||
if (input[position.position] === ';') {
|
||
continue
|
||
}
|
||
|
||
// 2. Advance position by 1. (This skips past U+003D (=).)
|
||
position.position++
|
||
}
|
||
|
||
// 6. If position is past the end of input, then break.
|
||
if (position.position > input.length) {
|
||
break
|
||
}
|
||
|
||
// 7. Let parameterValue be null.
|
||
let parameterValue = null
|
||
|
||
// 8. If the code point at position within input is
|
||
// U+0022 ("), then:
|
||
if (input[position.position] === '"') {
|
||
// 1. Set parameterValue to the result of collecting
|
||
// an HTTP quoted string from input, given position
|
||
// and the extract-value flag.
|
||
parameterValue = collectAnHTTPQuotedString(input, position, true)
|
||
|
||
// 2. Collect a sequence of code points that are not
|
||
// U+003B (;) from input, given position.
|
||
collectASequenceOfCodePointsFast(
|
||
';',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 9. Otherwise:
|
||
} else {
|
||
// 1. Set parameterValue to the result of collecting
|
||
// a sequence of code points that are not U+003B (;)
|
||
// from input, given position.
|
||
parameterValue = collectASequenceOfCodePointsFast(
|
||
';',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 2. Remove any trailing HTTP whitespace from parameterValue.
|
||
parameterValue = removeHTTPWhitespace(parameterValue, false, true)
|
||
|
||
// 3. If parameterValue is the empty string, then continue.
|
||
if (parameterValue.length === 0) {
|
||
continue
|
||
}
|
||
}
|
||
|
||
// 10. If all of the following are true
|
||
// - parameterName is not the empty string
|
||
// - parameterName solely contains HTTP token code points
|
||
// - parameterValue solely contains HTTP quoted-string token code points
|
||
// - mimeType’s parameters[parameterName] does not exist
|
||
// then set mimeType’s parameters[parameterName] to parameterValue.
|
||
if (
|
||
parameterName.length !== 0 &&
|
||
HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
|
||
(parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) &&
|
||
!mimeType.parameters.has(parameterName)
|
||
) {
|
||
mimeType.parameters.set(parameterName, parameterValue)
|
||
}
|
||
}
|
||
|
||
// 12. Return mimeType.
|
||
return mimeType
|
||
}
|
||
|
||
// https://infra.spec.whatwg.org/#forgiving-base64-decode
|
||
/** @param {string} data */
|
||
function forgivingBase64 (data) {
|
||
// 1. Remove all ASCII whitespace from data.
|
||
data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '') // eslint-disable-line
|
||
|
||
// 2. If data’s code point length divides by 4 leaving
|
||
// no remainder, then:
|
||
if (data.length % 4 === 0) {
|
||
// 1. If data ends with one or two U+003D (=) code points,
|
||
// then remove them from data.
|
||
data = data.replace(/=?=$/, '')
|
||
}
|
||
|
||
// 3. If data’s code point length divides by 4 leaving
|
||
// a remainder of 1, then return failure.
|
||
if (data.length % 4 === 1) {
|
||
return 'failure'
|
||
}
|
||
|
||
// 4. If data contains a code point that is not one of
|
||
// U+002B (+)
|
||
// U+002F (/)
|
||
// ASCII alphanumeric
|
||
// then return failure.
|
||
if (/[^+/0-9A-Za-z]/.test(data)) {
|
||
return 'failure'
|
||
}
|
||
|
||
const binary = atob(data)
|
||
const bytes = new Uint8Array(binary.length)
|
||
|
||
for (let byte = 0; byte < binary.length; byte++) {
|
||
bytes[byte] = binary.charCodeAt(byte)
|
||
}
|
||
|
||
return bytes
|
||
}
|
||
|
||
// https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
|
||
// tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
|
||
/**
|
||
* @param {string} input
|
||
* @param {{ position: number }} position
|
||
* @param {boolean?} extractValue
|
||
*/
|
||
function collectAnHTTPQuotedString (input, position, extractValue) {
|
||
// 1. Let positionStart be position.
|
||
const positionStart = position.position
|
||
|
||
// 2. Let value be the empty string.
|
||
let value = ''
|
||
|
||
// 3. Assert: the code point at position within input
|
||
// is U+0022 (").
|
||
assert(input[position.position] === '"')
|
||
|
||
// 4. Advance position by 1.
|
||
position.position++
|
||
|
||
// 5. While true:
|
||
while (true) {
|
||
// 1. Append the result of collecting a sequence of code points
|
||
// that are not U+0022 (") or U+005C (\) from input, given
|
||
// position, to value.
|
||
value += collectASequenceOfCodePoints(
|
||
(char) => char !== '"' && char !== '\\',
|
||
input,
|
||
position
|
||
)
|
||
|
||
// 2. If position is past the end of input, then break.
|
||
if (position.position >= input.length) {
|
||
break
|
||
}
|
||
|
||
// 3. Let quoteOrBackslash be the code point at position within
|
||
// input.
|
||
const quoteOrBackslash = input[position.position]
|
||
|
||
// 4. Advance position by 1.
|
||
position.position++
|
||
|
||
// 5. If quoteOrBackslash is U+005C (\), then:
|
||
if (quoteOrBackslash === '\\') {
|
||
// 1. If position is past the end of input, then append
|
||
// U+005C (\) to value and break.
|
||
if (position.position >= input.length) {
|
||
value += '\\'
|
||
break
|
||
}
|
||
|
||
// 2. Append the code point at position within input to value.
|
||
value += input[position.position]
|
||
|
||
// 3. Advance position by 1.
|
||
position.position++
|
||
|
||
// 6. Otherwise:
|
||
} else {
|
||
// 1. Assert: quoteOrBackslash is U+0022 (").
|
||
assert(quoteOrBackslash === '"')
|
||
|
||
// 2. Break.
|
||
break
|
||
}
|
||
}
|
||
|
||
// 6. If the extract-value flag is set, then return value.
|
||
if (extractValue) {
|
||
return value
|
||
}
|
||
|
||
// 7. Return the code points from positionStart to position,
|
||
// inclusive, within input.
|
||
return input.slice(positionStart, position.position)
|
||
}
|
||
|
||
/**
|
||
* @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
|
||
*/
|
||
function serializeAMimeType (mimeType) {
|
||
assert(mimeType !== 'failure')
|
||
const { parameters, essence } = mimeType
|
||
|
||
// 1. Let serialization be the concatenation of mimeType’s
|
||
// type, U+002F (/), and mimeType’s subtype.
|
||
let serialization = essence
|
||
|
||
// 2. For each name → value of mimeType’s parameters:
|
||
for (let [name, value] of parameters.entries()) {
|
||
// 1. Append U+003B (;) to serialization.
|
||
serialization += ';'
|
||
|
||
// 2. Append name to serialization.
|
||
serialization += name
|
||
|
||
// 3. Append U+003D (=) to serialization.
|
||
serialization += '='
|
||
|
||
// 4. If value does not solely contain HTTP token code
|
||
// points or value is the empty string, then:
|
||
if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
|
||
// 1. Precede each occurence of U+0022 (") or
|
||
// U+005C (\) in value with U+005C (\).
|
||
value = value.replace(/(\\|")/g, '\\$1')
|
||
|
||
// 2. Prepend U+0022 (") to value.
|
||
value = '"' + value
|
||
|
||
// 3. Append U+0022 (") to value.
|
||
value += '"'
|
||
}
|
||
|
||
// 5. Append value to serialization.
|
||
serialization += value
|
||
}
|
||
|
||
// 3. Return serialization.
|
||
return serialization
|
||
}
|
||
|
||
/**
|
||
* @see https://fetch.spec.whatwg.org/#http-whitespace
|
||
* @param {string} char
|
||
*/
|
||
function isHTTPWhiteSpace (char) {
|
||
return char === '\r' || char === '\n' || char === '\t' || char === ' '
|
||
}
|
||
|
||
/**
|
||
* @see https://fetch.spec.whatwg.org/#http-whitespace
|
||
* @param {string} str
|
||
*/
|
||
function removeHTTPWhitespace (str, leading = true, trailing = true) {
|
||
let lead = 0
|
||
let trail = str.length - 1
|
||
|
||
if (leading) {
|
||
for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++);
|
||
}
|
||
|
||
if (trailing) {
|
||
for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--);
|
||
}
|
||
|
||
return str.slice(lead, trail + 1)
|
||
}
|
||
|
||
/**
|
||
* @see https://infra.spec.whatwg.org/#ascii-whitespace
|
||
* @param {string} char
|
||
*/
|
||
function isASCIIWhitespace (char) {
|
||
return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' '
|
||
}
|
||
|
||
/**
|
||
* @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
|
||
*/
|
||
function removeASCIIWhitespace (str, leading = true, trailing = true) {
|
||
let lead = 0
|
||
let trail = str.length - 1
|
||
|
||
if (leading) {
|
||
for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++);
|
||
}
|
||
|
||
if (trailing) {
|
||
for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--);
|
||
}
|
||
|
||
return str.slice(lead, trail + 1)
|
||
}
|
||
|
||
module.exports = {
|
||
dataURLProcessor,
|
||
URLSerializer,
|
||
collectASequenceOfCodePoints,
|
||
collectASequenceOfCodePointsFast,
|
||
stringPercentDecode,
|
||
parseMIMEType,
|
||
collectAnHTTPQuotedString,
|
||
serializeAMimeType
|
||
}
|