/** * (c) 2017 Rob Wu (https://robwu.nl) * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; /* eslint-disable max-len,no-magic-numbers */ // License: MPL-2 /** * THis typescript port was done by Nils Maier based on * https://github.com/Rob--W/open-in-browser/blob/83248155b633ed41bc9cdb1205042653e644abd2/extension/content-disposition.js * Special thanks goes to Rob doing all the heavy lifting and putting * it together in a reuseable, open source'd library. */ const R_RFC6266 = /(?:^|;)\s*filename\*\s*=\s*([^";\s][^;\s]*|"(?:[^"\\]|\\"?)+"?)/i; const R_RFC5987 = /(?:^|;)\s*filename\s*=\s*([^";\s][^;\s]*|"(?:[^"\\]|\\"?)+"?)/i; function unquoteRFC2616(value: string) { if (!value.startsWith("\"")) { return value; } const parts = value.slice(1).split("\\\""); // Find the first unescaped " and terminate there. for (let i = 0; i < parts.length; ++i) { const quotindex = parts[i].indexOf("\""); if (quotindex !== -1) { parts[i] = parts[i].slice(0, quotindex); // Truncate and stop the iteration. parts.length = i + 1; } parts[i] = parts[i].replace(/\\(.)/g, "$1"); } value = parts.join("\""); return value; } export class CDHeaderParser { private needsFixup: boolean; // We need to keep this per instance, because of the global flag. // Hence we need to reset it after a use. private R_MULTI = /(?:^|;)\s*filename\*((?!0\d)\d+)(\*?)\s*=\s*([^";\s][^;\s]*|"(?:[^"\\]|\\"?)+"?)/gi; /** * Parse a content-disposition header, with relaxed spec tolerance * * @param {string} header Header to parse * @returns {string} Parsed header */ parse(header: string) { this.needsFixup = true; // filename*=ext-value ("ext-value" from RFC 5987, referenced by RFC 6266). { const match = R_RFC6266.exec(header); if (match) { const [, tmp] = match; let filename = unquoteRFC2616(tmp); filename = unescape(filename); filename = this.decodeRFC5897(filename); filename = this.decodeRFC2047(filename); return this.maybeFixupEncoding(filename); } } // Continuations (RFC 2231 section 3, referenced by RFC 5987 section 3.1). // filename*n*=part // filename*n=part { const tmp = this.getParamRFC2231(header); if (tmp) { // RFC 2047, section const filename = this.decodeRFC2047(tmp); return this.maybeFixupEncoding(filename); } } // filename=value (RFC 5987, section 4.1). { const match = R_RFC5987.exec(header); if (match) { const [, tmp] = match; let filename = unquoteRFC2616(tmp); filename = this.decodeRFC2047(filename); return this.maybeFixupEncoding(filename); } } return ""; } private maybeDecode(encoding: string, value: string) { if (!encoding) { return value; } const bytes = Array.from(value, c => c.charCodeAt(0)); if (!bytes.every(code => code <= 0xff)) { return value; } try { value = new TextDecoder(encoding, {fatal: true}). decode(new Uint8Array(bytes)); this.needsFixup = false; } catch { // TextDecoder constructor threw - unrecognized encoding. } return value; } private maybeFixupEncoding(value: string) { if (!this.needsFixup && /[\x80-\xff]/.test(value)) { return value; } // Maybe multi-byte UTF-8. value = this.maybeDecode("utf-8", value); if (!this.needsFixup) { return value; } // Try iso-8859-1 encoding. return this.maybeDecode("iso-8859-1", value); } private getParamRFC2231(value: string) { const matches: string[][] = []; // Iterate over all filename*n= and filename*n*= with n being an integer // of at least zero. Any non-zero number must not start with '0'. let match; this.R_MULTI.lastIndex = 0; while ((match = this.R_MULTI.exec(value)) !== null) { const [, num, quot, part] = match; const n = parseInt(num, 10); if (n in matches) { // Ignore anything after the invalid second filename*0. if (n === 0) { break; } continue; } matches[n] = [quot, part]; } const parts: string[] = []; for (let n = 0; n < matches.length; ++n) { if (!(n in matches)) { // Numbers must be consecutive. Truncate when there is a hole. break; } const [quot, rawPart] = matches[n]; let part = unquoteRFC2616(rawPart); if (quot) { part = unescape(part); if (n === 0) { part = this.decodeRFC5897(part); } } parts.push(part); } return parts.join(""); } private decodeRFC2047(value: string) { // RFC 2047-decode the result. Firefox tried to drop support for it, but // backed out because some servers use it - https://bugzil.la/875615 // Firefox's condition for decoding is here: // eslint-disable-next-line max-len // https://searchfox.org/mozilla-central/rev/4a590a5a15e35d88a3b23dd6ac3c471cf85b04a8/netwerk/mime/nsMIMEHeaderParamImpl.cpp#742-748 // We are more strict and only recognize RFC 2047-encoding if the value // starts with "=?", since then it is likely that the full value is // RFC 2047-encoded. // Firefox also decodes words even where RFC 2047 section 5 states: // "An 'encoded-word' MUST NOT appear within a 'quoted-string'." // eslint-disable-next-line no-control-regex if (!value.startsWith("=?") || /[\x00-\x19\x80-\xff]/.test(value)) { return value; } // RFC 2047, section 2.4 // encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" // charset = token (but let's restrict to characters that denote a // possibly valid encoding). // encoding = q or b // encoded-text = any printable ASCII character other than ? or space. // ... but Firefox permits ? and space. return value.replace( /=\?([\w-]*)\?([QqBb])\?((?:[^?]|\?(?!=))*)\?=/g, (_, charset, encoding, text) => { if (encoding === "q" || encoding === "Q") { // RFC 2047 section 4.2. text = text.replace(/_/g, " "); text = text.replace(/=([0-9a-fA-F]{2})/g, (_: string, hex: string) => String.fromCharCode(parseInt(hex, 16))); return this.maybeDecode(charset, text); } // else encoding is b or B - base64 (RFC 2047 section 4.1) try { text = atob(text); } catch { // ignored } return this.maybeDecode(charset, text); }); } private decodeRFC5897(extValue: string) { // Decodes "ext-value" from RFC 5987. const extEnd = extValue.indexOf("'"); if (extEnd < 0) { // Some servers send "filename*=" without encoding'language' prefix, // e.g. in https://github.com/Rob--W/open-in-browser/issues/26 // Let's accept the value like Firefox (57) (Chrome 62 rejects it). return extValue; } const encoding = extValue.slice(0, extEnd); const langvalue = extValue.slice(extEnd + 1); // Ignore language (RFC 5987 section 3.2.1, and RFC 6266 section 4.1 ). return this.maybeDecode(encoding, langvalue.replace(/^[^']*'/, "")); } }