"use strict"; // License: MIT import { getTextLinks } from "../lib/textlinks"; import { runtime } from "../lib/browser"; const REG_CLEAN = /[\s\t\r\n\v]+/g; const baseURL = function() { const base = document.querySelector("base[href]"); let url; if (base) { try { const burl = base.getAttribute("href"); if (burl) { url = new URL(burl); } } catch (ex) { // ignore } } if (!url) { url = new URL(location.href); } url.hash = ""; return url; }(); function makeURL(url: string) { return new URL(url, baseURL); } function sanitize(str: string | null | undefined) { return str && str.replace(REG_CLEAN, " ").trim() || ""; } function *extractDescriptionInternal(parent: Node): Iterable { for (const node of Array.from(parent.childNodes)) { switch (node.nodeType) { case Node.TEXT_NODE: { const val = sanitize(node.textContent); if (val) { yield val; } break; } case Node.ELEMENT_NODE: yield *extractDescriptionInternal(node); break; default: break; } } } function extractDescription(el: HTMLElement) { return Array.from(extractDescriptionInternal(el)).join(" "); } function urlToUsable(e: any, u: string) { try { const usable = decodeURIComponent(u); if (usable !== u) { e.usable = usable; } else { e.usable = true; } } catch (ex) { // ignore } } class Gatherer { textLinks: boolean; selectionOnly: boolean; selection: Selection | null; schemes: Set; transferable: string[]; constructor(options: any) { this.textLinks = options.textLinks; this.selectionOnly = options.selectionOnly; this.selection = options.selectionOnly ? getSelection() : null; this.schemes = new Set(options.schemes); this.transferable = options.transferable; this.collectLink = this.collectLink.bind(this); this.collectImages = this.collectImages.bind(this); this.collectMedia = this.collectMedia.bind(this); Object.freeze(this); } collectLink(a: HTMLAnchorElement) { const item = this.makeItem(a.href, a); if (!item) { return item; } urlToUsable(item, item.url); item.fileName = sanitize(a.getAttribute("download")); item.description = extractDescription(a); return item; } *collectImagesInternal(img: HTMLImageElement) { const src = img.currentSrc || img.src; const item = this.makeItem(src, img); item.fileName = ""; item.description = item.title; yield item; const {srcset} = img; if (!srcset) { return; } const imgs = srcset.split(",").flatMap(e => { const idx = e.lastIndexOf(" "); return (idx > 0 ? e.slice(0, idx) : e).trim(); }); for (const i of imgs) { const item = this.makeItem(i, img); if (item) { yield item; } } } collectImages(img: HTMLImageElement) { return [...this.collectImagesInternal(img)]; } collectMediaInternal(title: string | undefined | null, el: HTMLMediaElement) { try { const src = el.currentSrc || el.getAttribute("src"); if (!src) { return null; } const item = this.makeItem(src, el, title); if (!item) { return null; } item.fileName = ""; item.description = item.title; return item; } catch (ex) { console.error("Failed to get media from", el && el.outerHTML, ex); } return null; } collectMedia(el: HTMLMediaElement) { const item = this.collectMediaInternal(el.getAttribute("title"), el); const rv = item ? [item] : []; const title: string | undefined = item && item.title || el.getAttribute("title"); rv.push(...Array.from(el.querySelectorAll("source")). map(this.collectMediaInternal.bind(this, title))); return rv; } *findTexts() { let doc = document; const {selection} = this; if (this.selectionOnly && selection) { let copy = document.createElement("div"); for (let i = 0; i < selection.rangeCount; ++i) { const r = selection.getRangeAt(i); copy.appendChild(r.cloneContents()); } doc = document.implementation.createDocument( "http://www.w3.org/1999/xhtml", "html", null); copy = doc.adoptNode(copy); doc.documentElement.appendChild(doc.adoptNode(copy)); } const set = doc.evaluate( "//*[not(ancestor-or-self::a) and " + "not(ancestor-or-self::style) and " + "not(ancestor-or-self::script)]/text()", doc, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null ); for (let r = set.iterateNext(); r; r = set.iterateNext()) { const {textContent} = r; if (textContent) { yield textContent; continue; } } } *findTextLinks() { for (const text of this.findTexts()) { yield *getTextLinks(text, true); } } collectTextLinks() { if (!this.textLinks) { return []; } return Array.from(this.findTextLinks()). map(link => this.makeItem(link.href, link)); } makeItem(surl: string, el: HTMLElement, title?: string | null): any { if (!(el as any).fake && this.selectionOnly && (!this.selection || !this.selection.containsNode(el, true))) { return null; } try { const url = makeURL(surl); if (!this.schemes.has(url.protocol)) { return null; } title = sanitize(el.getAttribute("title") || title) || sanitize(el.getAttribute("alt")); return { url: url.href, title, }; } catch (ex) { console.error("failed to make", surl, ex.message); return null; } } makeUniqueItemsInternal(arr: any[], known: Map, result: any[]) { for (const e of arr) { if (!e || !e.url) { continue; } const other = known.get(e.url); if (other) { for (const p of this.transferable) { if (!other[p] && e[p]) { other[p] = e[p]; } } continue; } known.set(e.url, e); result.push(e); } } makeUniqueItems(...arrs: any[]) { const known = new Map(); const result: any[] = []; for (const arr of arrs) { this.makeUniqueItemsInternal(arr, known, result); } return result; } } function gather(msg: any, sender: any, callback: Function) { try { if (!msg || msg.type !== "DTA:gather" || !callback) { return; } const gatherer = new Gatherer(msg); const result = { baseURL: baseURL.href, links: gatherer.makeUniqueItems( Array.from(document.links).map(gatherer.collectLink), gatherer.collectTextLinks()), media: gatherer.makeUniqueItems( Array.from(document.querySelectorAll("img")). flatMap(gatherer.collectImages), Array.from(document.querySelectorAll("video")). flatMap(gatherer.collectMedia), Array.from(document.querySelectorAll("audio")). flatMap(gatherer.collectMedia), ), }; urlToUsable(result, result.baseURL); callback(result); } catch (ex) { console.error(ex.toString(), ex.stack, ex); } } runtime.onMessage.addListener(gather);