231 lines
7.3 KiB
TypeScript
231 lines
7.3 KiB
TypeScript
/**
|
|
* (c) 2017 Rob Wu <rob@robwu.nl> (https://robwu.nl)
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
"use strict";
|
|
/* eslint-disable max-len,no-magic-numbers */
|
|
// License: MPL-2
|
|
|
|
/**
|
|
* This typescript port was done by Nils Maier based on
|
|
* https://github.com/Rob--W/open-in-browser/blob/83248155b633ed41bc9cdb1205042653e644abd2/extension/content-disposition.js
|
|
* Special thanks goes to Rob doing all the heavy lifting and putting
|
|
* it together in a reuseable, open source'd library.
|
|
*/
|
|
|
|
const R_RFC6266 = /(?:^|;)\s*filename\*\s*=\s*([^";\s][^;\s]*|"(?:[^"\\]|\\"?)+"?)/i;
|
|
const R_RFC5987 = /(?:^|;)\s*filename\s*=\s*([^";\s][^;\s]*|"(?:[^"\\]|\\"?)+"?)/i;
|
|
|
|
function unquoteRFC2616(value: string) {
|
|
if (!value.startsWith("\"")) {
|
|
return value;
|
|
}
|
|
|
|
const parts = value.slice(1).split("\\\"");
|
|
// Find the first unescaped " and terminate there.
|
|
for (let i = 0; i < parts.length; ++i) {
|
|
const quotindex = parts[i].indexOf("\"");
|
|
if (quotindex !== -1) {
|
|
parts[i] = parts[i].slice(0, quotindex);
|
|
// Truncate and stop the iteration.
|
|
parts.length = i + 1;
|
|
}
|
|
parts[i] = parts[i].replace(/\\(.)/g, "$1");
|
|
}
|
|
value = parts.join("\"");
|
|
return value;
|
|
}
|
|
|
|
export class CDHeaderParser {
|
|
private needsFixup: boolean;
|
|
|
|
// We need to keep this per instance, because of the global flag.
|
|
// Hence we need to reset it after a use.
|
|
private R_MULTI = /(?:^|;)\s*filename\*((?!0\d)\d+)(\*?)\s*=\s*([^";\s][^;\s]*|"(?:[^"\\]|\\"?)+"?)/gi;
|
|
|
|
/**
|
|
* Parse a content-disposition header, with relaxed spec tolerance
|
|
*
|
|
* @param {string} header Header to parse
|
|
* @returns {string} Parsed header
|
|
*/
|
|
parse(header: string) {
|
|
this.needsFixup = true;
|
|
|
|
// filename*=ext-value ("ext-value" from RFC 5987, referenced by RFC 6266).
|
|
{
|
|
const match = R_RFC6266.exec(header);
|
|
if (match) {
|
|
const [, tmp] = match;
|
|
let filename = unquoteRFC2616(tmp);
|
|
filename = unescape(filename);
|
|
filename = this.decodeRFC5897(filename);
|
|
filename = this.decodeRFC2047(filename);
|
|
return this.maybeFixupEncoding(filename);
|
|
}
|
|
}
|
|
|
|
// Continuations (RFC 2231 section 3, referenced by RFC 5987 section 3.1).
|
|
// filename*n*=part
|
|
// filename*n=part
|
|
{
|
|
const tmp = this.getParamRFC2231(header);
|
|
if (tmp) {
|
|
// RFC 2047, section
|
|
const filename = this.decodeRFC2047(tmp);
|
|
return this.maybeFixupEncoding(filename);
|
|
}
|
|
}
|
|
|
|
// filename=value (RFC 5987, section 4.1).
|
|
{
|
|
const match = R_RFC5987.exec(header);
|
|
if (match) {
|
|
const [, tmp] = match;
|
|
let filename = unquoteRFC2616(tmp);
|
|
filename = this.decodeRFC2047(filename);
|
|
return this.maybeFixupEncoding(filename);
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
private maybeDecode(encoding: string, value: string) {
|
|
if (!encoding) {
|
|
return value;
|
|
}
|
|
const bytes = Array.from(value, c => c.charCodeAt(0));
|
|
if (!bytes.every(code => code <= 0xff)) {
|
|
return value;
|
|
}
|
|
try {
|
|
value = new TextDecoder(encoding, {fatal: true}).
|
|
decode(new Uint8Array(bytes));
|
|
this.needsFixup = false;
|
|
}
|
|
catch {
|
|
// TextDecoder constructor threw - unrecognized encoding.
|
|
}
|
|
return value;
|
|
}
|
|
|
|
private maybeFixupEncoding(value: string) {
|
|
if (!this.needsFixup && /[\x80-\xff]/.test(value)) {
|
|
return value;
|
|
}
|
|
|
|
// Maybe multi-byte UTF-8.
|
|
value = this.maybeDecode("utf-8", value);
|
|
if (!this.needsFixup) {
|
|
return value;
|
|
}
|
|
|
|
// Try iso-8859-1 encoding.
|
|
return this.maybeDecode("iso-8859-1", value);
|
|
}
|
|
|
|
private getParamRFC2231(value: string) {
|
|
const matches: string[][] = [];
|
|
|
|
// Iterate over all filename*n= and filename*n*= with n being an integer
|
|
// of at least zero. Any non-zero number must not start with '0'.
|
|
let match;
|
|
this.R_MULTI.lastIndex = 0;
|
|
while ((match = this.R_MULTI.exec(value)) !== null) {
|
|
const [, num, quot, part] = match;
|
|
const n = parseInt(num, 10);
|
|
if (n in matches) {
|
|
// Ignore anything after the invalid second filename*0.
|
|
if (n === 0) {
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
matches[n] = [quot, part];
|
|
}
|
|
|
|
const parts: string[] = [];
|
|
for (let n = 0; n < matches.length; ++n) {
|
|
if (!(n in matches)) {
|
|
// Numbers must be consecutive. Truncate when there is a hole.
|
|
break;
|
|
}
|
|
const [quot, rawPart] = matches[n];
|
|
let part = unquoteRFC2616(rawPart);
|
|
if (quot) {
|
|
part = unescape(part);
|
|
if (n === 0) {
|
|
part = this.decodeRFC5897(part);
|
|
}
|
|
}
|
|
parts.push(part);
|
|
}
|
|
return parts.join("");
|
|
}
|
|
|
|
private decodeRFC2047(value: string) {
|
|
// RFC 2047-decode the result. Firefox tried to drop support for it, but
|
|
// backed out because some servers use it - https://bugzil.la/875615
|
|
// Firefox's condition for decoding is here:
|
|
|
|
// eslint-disable-next-line max-len
|
|
// https://searchfox.org/mozilla-central/rev/4a590a5a15e35d88a3b23dd6ac3c471cf85b04a8/netwerk/mime/nsMIMEHeaderParamImpl.cpp#742-748
|
|
|
|
// We are more strict and only recognize RFC 2047-encoding if the value
|
|
// starts with "=?", since then it is likely that the full value is
|
|
// RFC 2047-encoded.
|
|
|
|
// Firefox also decodes words even where RFC 2047 section 5 states:
|
|
// "An 'encoded-word' MUST NOT appear within a 'quoted-string'."
|
|
|
|
// eslint-disable-next-line no-control-regex
|
|
if (!value.startsWith("=?") || /[\x00-\x19\x80-\xff]/.test(value)) {
|
|
return value;
|
|
}
|
|
// RFC 2047, section 2.4
|
|
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
|
|
// charset = token (but let's restrict to characters that denote a
|
|
// possibly valid encoding).
|
|
// encoding = q or b
|
|
// encoded-text = any printable ASCII character other than ? or space.
|
|
// ... but Firefox permits ? and space.
|
|
return value.replace(
|
|
/=\?([\w-]*)\?([QqBb])\?((?:[^?]|\?(?!=))*)\?=/g,
|
|
(_, charset, encoding, text) => {
|
|
if (encoding === "q" || encoding === "Q") {
|
|
// RFC 2047 section 4.2.
|
|
text = text.replace(/_/g, " ");
|
|
text = text.replace(/=([0-9a-fA-F]{2})/g,
|
|
(_: string, hex: string) => String.fromCharCode(parseInt(hex, 16)));
|
|
return this.maybeDecode(charset, text);
|
|
}
|
|
|
|
// else encoding is b or B - base64 (RFC 2047 section 4.1)
|
|
try {
|
|
text = atob(text);
|
|
}
|
|
catch {
|
|
// ignored
|
|
}
|
|
return this.maybeDecode(charset, text);
|
|
});
|
|
}
|
|
|
|
private decodeRFC5897(extValue: string) {
|
|
// Decodes "ext-value" from RFC 5987.
|
|
const extEnd = extValue.indexOf("'");
|
|
if (extEnd < 0) {
|
|
// Some servers send "filename*=" without encoding'language' prefix,
|
|
// e.g. in https://github.com/Rob--W/open-in-browser/issues/26
|
|
// Let's accept the value like Firefox (57) (Chrome 62 rejects it).
|
|
return extValue;
|
|
}
|
|
const encoding = extValue.slice(0, extEnd);
|
|
const langvalue = extValue.slice(extEnd + 1);
|
|
// Ignore language (RFC 5987 section 3.2.1, and RFC 6266 section 4.1 ).
|
|
return this.maybeDecode(encoding, langvalue.replace(/^[^']*'/, ""));
|
|
}
|
|
}
|