/** * Extract data from current page / multiple urls. * getData(tab, itemsSelector:string, fieldSelectors:string[]) * getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) * getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[]) * getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[]) * getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) * getData(itemsSelector:string, fieldSelectors:string[]) * getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) * getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[]) * getData(itemsSelector:string, fieldSelectors:string[], urls:string[]) * getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) * @param {...any} args */ async function getData(...args) { let tab; if (typeof args[0] !== 'string') tab = args.shift(); if (!testArgs(...args)) throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`); itemsSelector = args.shift(); fieldSelectors = args.shift(); let urls = parseUrls(...args); let data = []; if (!tab) tab = await getActiveTab(true) || await getActiveTab(false); if (!tab) throw new Error("Cannot find active tab."); return new Promise((resolve, reject) => { let pms; if (urls.length) { pms = urls.reduce((p, url) => p.then( results => { if (results) data.push(...results); return redirectTab(tab, url).then( () => extractTabData(tab, itemsSelector, fieldSelectors) ); }, () => p ), Promise.resolve([])); } else { pms = extractTabData(tab, itemsSelector, fieldSelectors); } pms.then( results => { if (results) data.push(...results); data.unshift(fieldSelectors); resolve(new ExtractResult(data)); }, err => reject(err) ); }); } function parseUrls(...args) { if (!args.length) return []; let arg = args.shift(); if (arg instanceof Array) { return arg; } else if (arg instanceof ExtractResult) { return arg.squash().filter(v => URL_REG.test(v)); } else { let urlTempl = arg; if (urlTempl) { if (args[0] instanceof Array) { return args[0].map(p => urlTempl.replace("${page}", p)); } else if (args.length >= 3) { let urls = []; let from = args.shift(); let to = args.shift(); let interval = args.shift(); for (let i = from; i <= to; i += interval) { urls.push(urlTempl.replace("${page}", i)); } return urls; } } } return []; } function redirectTab(tab, url) { let curUrl = ""; return queryUrl(tab, undefined, 'Query current url...') .then(u => { if (url !== u) { curUrl = u; let req = { action: ACTION_GOTO_URL, url: url } return sendMessage(tab, req, `Goto url: ${url}`); } }) .then(() => queryUrl(tab, url, 'Check if tab url matches expected...')) } /** * extract data in from the target tab. * @param {any} tab target tab * @param {string} itemsSelector items selectors for selecting items (data rows) * @param {Array} fieldSelectors fields selectors for selecting fields (data columns) under each item * @returns {Promise} a promise of extracted data */ function extractTabData(tab, itemsSelector, fieldSelectors) { let req = { action: ACTION_EXTRACT, itemsSelector: itemsSelector, fieldSelectors: fieldSelectors } let cond = r => !MSG_ELEMENT_NOT_FOUND.isEqual(r); return sendMessage(tab, req, 'Extract data from the tab...', cond); } /** * get report in from the target tab, usually used to detect if the content script is ready. * @param {any} tab target tab * @returns {Promise} a promise of the report in message */ function reportIn(tab) { let req = { action: ACTION_REPORT_IN } let cond = r => r == req.action; return sendMessage(tab, req, 'Check tab availability...', cond); } /** * get the url of the target tab * @param {any} tab target tab * @param {string} expected if specified, queryUrl resolves only when tab url equals to expected * @returns {Promise} a promise of the url */ function queryUrl(tab, expected, log) { let req = { action: ACTION_QUERY_URL } let cond = url => url && (!expected || (expected && expected == url)); return sendMessage(tab, req, log, cond); } /** * get the url of the target tab * @param {any} tab target tab * @param {string} expected if specified, queryUrl resolves only when tab url equals to expected * @returns {Promise} a promise of the url */ function scrollToBottom(tab) { let req = { action: ACTION_SCROLL_BOTTOM } return sendMessage(tab, req, 'Scroll to page bottom...'); } async function createTab(url, active) { return new Promise((resolve, reject) => { chrome.tabs.create({ 'url': url, 'active': active }, function (tab) { resolve(tab); }) }) } async function getActiveTab(currentWindow) { return new Promise((resolve, reject) => { chrome.tabs.query({ active: true, currentWindow: currentWindow }, function (tabs) { resolve(tabs[0]); }) }) } async function getTabByID(id) { return new Promise((resolve, reject) => { chrome.tabs.get(id, function (tab) { chrome.runtime.lastError; resolve(tab); }) }) }