/** * Extract data from current tab / multiple urls. * @param {string} itemsSelector items selectors for selecting items (data rows) * @param {Array} fieldSelectors fields selectors for selecting fields (data columns) under each item * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages] */ async function extract(itemsSelector, fieldSelectors, ...args) { let result = await getData(itemsSelector, fieldSelectors, ...args); if (confirm( `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` )) { saveFile(result, "text/csv"); } } /** * Extract data from current tab / multiple urls. * @param {string} itemsSelector items selectors for selecting items (data rows) * @param {Array} fieldSelectors fields selectors for selecting fields (data columns) under each item * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages] */ async function getData(itemsSelector, fieldSelectors, ...args) { if (!testArgs(itemsSelector, fieldSelectors, ...args)) throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`); let urls = []; if (args.length) { let arg = args.shift(); if (arg instanceof Array) { urls = arg; } else if (arg instanceof ExtractResult) { urls = arg.squash().filter(v => !!v); } else { let urlTempl = arg; if (urlTempl) { if (args[0] instanceof Array) { urls = args[0].map(p => urlTempl.replace("${page}", p)); } else if (args.length >= 3) { let from = args.shift(); let to = args.shift(); let interval = args.shift(); for (let i = from; i <= to; i += interval) { urls.push(urlTempl.replace("${page}", i)); } } } } } let data = []; let tab = await getActiveTab(true) || await getActiveTab(false); if (!tab) throw new Error("Cannot find active tab."); return new Promise((resolve, reject) => { let pms; if (urls.length) { pms = urls.reduce((p, url) => p.then( results => { data.push(...results); return redirectTab(tab, url).then( () => extractTabData(tab, itemsSelector, fieldSelectors) ); }, () => p ), Promise.resolve([])); } else { pms = extractTabData(tab, itemsSelector, fieldSelectors); } pms.then( results => { data.push(...results); resolve(new ExtractResult(data)); }, err => reject(err) ); }); } function redirectTab(tab, url) { let curUrl = ""; return queryUrl(tab) .then(u => { if (url !== u) { curUrl = u; let req = { from: "GotoUrl", url: url } sendMessage(tab, req); } }) .then(() => queryUrl(tab, curUrl)) .then(() => reportIn(tab)); } /** * extract data in from the target tab. * @param {any} tab target tab * @param {string} itemsSelector items selectors for selecting items (data rows) * @param {Array} fieldSelectors fields selectors for selecting fields (data columns) under each item * @returns {Promise} a promise of extracted data */ function extractTabData(tab, itemsSelector, fieldSelectors) { let req = { from: "Extract", itemsSelector: itemsSelector, fieldSelectors: fieldSelectors } let cond = r => r && r.length; return sendMessage(tab, req, cond); } /** * get report in from the target tab, usually used to detect if the content script is ready. * @param {any} tab target tab * @returns {Promise} a promise of the report in message */ function reportIn(tab) { let req = { from: "ReportIn" } let cond = r => r == req.from; return sendMessage(tab, req, cond); } /** * get the url of the target tab * @param {any} tab target tab * @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded * @returns {Promise} a promise of the url */ function queryUrl(tab, urlExcluded) { let req = { from: "QueryUrl" } let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url)); return sendMessage(tab, req, cond); } /** * Repeatedly sending a message to target tab until the response is detected good. * @param {object} tab the table where to send the message * @param {object} req the request data. * @param {function} cond success condition function, r:any=>boolean * @param {number} interval interval for detecting * @return {Promise} a promise of the response. */ function sendMessage(tab, req, cond, interval) { req.from = "DataExtracter:" + req.from; interval = interval || 500; return new Promise((resolve, reject) => { loop(); async function loop() { console.log("request for", req.from); let tabAvailable = await getTabByID(tab.id); if (!tabAvailable) { reject("Task interrupted due to the target tab is closed."); return; } chrome.tabs.sendMessage(tab.id, req, r => { if (!cond || cond(r)) { resolve(r); } else { setTimeout(() => { loop(); }, interval); } }); } }); } async function getActiveTab(currentWindow) { return new Promise((resolve, reject) => { chrome.tabs.query({ active: true, currentWindow: currentWindow }, function (tabs) { resolve(tabs[0]); }) }) } async function getTabByID(id) { return new Promise((resolve, reject) => { chrome.tabs.get(id, function (tab) { chrome.runtime.lastError; resolve(tab); }) }) }