Files
data-extracter-extesion/scripts/background/actions.js
2020-01-10 12:07:21 +08:00

168 lines
5.6 KiB
JavaScript

/**
* Extract data from current page / multiple urls.
* getData(tab, itemsSelector:string, fieldSelectors:string[])
* getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
* getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[])
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
* getData(itemsSelector:string, fieldSelectors:string[])
* getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
* getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
* getData(itemsSelector:string, fieldSelectors:string[], urls:string[])
* getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
* @param {...any} args
*/
async function getData(...args) {
let tab;
if (typeof args[0] !== 'string') tab = args.shift();
if (!testArgs(...args))
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
itemsSelector = args.shift();
fieldSelectors = args.shift();
let urls = parseUrls(...args);
let data = [];
if (!tab) tab = await getActiveTab(true) || await getActiveTab(false);
if (!tab) throw new Error("Cannot find active tab.");
return new Promise((resolve, reject) => {
let pms;
if (urls.length) {
pms = urls.reduce((p, url) => p.then(
results => {
if (results) data.push(...results);
return redirectTab(tab, url).then(
() => extractTabData(tab, itemsSelector, fieldSelectors)
);
},
() => p
), Promise.resolve([]));
} else {
pms = extractTabData(tab, itemsSelector, fieldSelectors);
}
pms.then(
results => {
if (results) data.push(...results);
data.unshift(fieldSelectors);
resolve(new ExtractResult(data));
},
err => reject(err)
);
});
}
function parseUrls(...args) {
if (!args.length) return [];
let arg = args.shift();
if (arg instanceof Array) {
return arg;
} else if (arg instanceof ExtractResult) {
return arg.squash().filter(v => !!v);
} else {
let urlTempl = arg;
if (urlTempl) {
if (args[0] instanceof Array) {
return args[0].map(p => urlTempl.replace("${page}", p));
} else if (args.length >= 3) {
let urls = [];
let from = args.shift();
let to = args.shift();
let interval = args.shift();
for (let i = from; i <= to; i += interval) {
urls.push(urlTempl.replace("${page}", i));
}
return urls;
}
}
}
return [];
}
function redirectTab(tab, url) {
let curUrl = "";
return queryUrl(tab, undefined, 'Query current url...')
.then(u => {
if (url !== u) {
curUrl = u;
let req = {
action: ACTION_GOTO_URL,
url: url
}
sendMessage(tab, req, `Goto url: ${url}`);
}
})
.then(() => queryUrl(tab, curUrl, 'Check if tab url matches expected...'))
}
/**
* extract data in from the target tab.
* @param {any} tab target tab
* @param {string} itemsSelector items selectors for selecting items (data rows)
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
* @returns {Promise<string[]>} a promise of extracted data
*/
function extractTabData(tab, itemsSelector, fieldSelectors) {
let req = {
action: ACTION_EXTRACT,
itemsSelector: itemsSelector,
fieldSelectors: fieldSelectors
}
let cond = r => r !== undefined;
return sendMessage(tab, req, 'Extract data from the tab...', cond);
}
/**
* get report in from the target tab, usually used to detect if the content script is ready.
* @param {any} tab target tab
* @returns {Promise<string>} a promise of the report in message
*/
function reportIn(tab) {
let req = {
action: ACTION_REPORT_IN
}
let cond = r => r == req.action;
return sendMessage(tab, req, 'Check tab availability...', cond);
}
/**
* get the url of the target tab
* @param {any} tab target tab
* @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
* @returns {Promise<string>} a promise of the url
*/
function queryUrl(tab, urlExcluded, log) {
let req = {
action: ACTION_QUERY_URL
}
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
return sendMessage(tab, req, log, cond);
}
async function createTab(url, active) {
return new Promise((resolve, reject) => {
chrome.tabs.create({
'url': url,
'active': active
}, function (tab) {
resolve(tab);
})
})
}
async function getActiveTab(currentWindow) {
return new Promise((resolve, reject) => {
chrome.tabs.query({
active: true,
currentWindow: currentWindow
}, function (tabs) {
resolve(tabs[0]);
})
})
}
async function getTabByID(id) {
return new Promise((resolve, reject) => {
chrome.tabs.get(id, function (tab) {
chrome.runtime.lastError;
resolve(tab);
})
})
}