181 lines
6.0 KiB
JavaScript
181 lines
6.0 KiB
JavaScript
/**
|
|
* Extract data from current page / multiple urls.
|
|
* getData(tab, itemsSelector:string, fieldSelectors:string[])
|
|
* getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
|
* getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
|
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[])
|
|
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
|
* getData(itemsSelector:string, fieldSelectors:string[])
|
|
* getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
|
* getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
|
* getData(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
|
* getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
|
* @param {...any} args
|
|
*/
|
|
async function getData(...args) {
|
|
let tab;
|
|
if (typeof args[0] !== 'string') tab = args.shift();
|
|
if (!testArgs(...args))
|
|
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
|
itemsSelector = args.shift();
|
|
fieldSelectors = args.shift();
|
|
let urls = parseUrls(...args);
|
|
let data = [];
|
|
if (!tab) tab = await getActiveTab(true) || await getActiveTab(false);
|
|
if (!tab) throw new Error("Cannot find active tab.");
|
|
return new Promise((resolve, reject) => {
|
|
let pms;
|
|
if (urls.length) {
|
|
pms = urls.reduce((p, url) => p.then(
|
|
results => {
|
|
if (results) data.push(...results);
|
|
return redirectTab(tab, url).then(
|
|
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
|
);
|
|
},
|
|
() => p
|
|
), Promise.resolve([]));
|
|
} else {
|
|
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
|
}
|
|
pms.then(
|
|
results => {
|
|
if (results) data.push(...results);
|
|
data.unshift(fieldSelectors);
|
|
resolve(new ExtractResult(data));
|
|
},
|
|
err => reject(err)
|
|
);
|
|
});
|
|
}
|
|
|
|
function parseUrls(...args) {
|
|
if (!args.length) return [];
|
|
let arg = args.shift();
|
|
if (arg instanceof Array) {
|
|
return arg;
|
|
} else if (arg instanceof ExtractResult) {
|
|
return arg.squash().filter(v => URL_REG.test(v));
|
|
} else {
|
|
let urlTempl = arg;
|
|
if (urlTempl) {
|
|
if (args[0] instanceof Array) {
|
|
return args[0].map(p => urlTempl.replace("${page}", p));
|
|
} else if (args.length >= 3) {
|
|
let urls = [];
|
|
let from = args.shift();
|
|
let to = args.shift();
|
|
let interval = args.shift();
|
|
for (let i = from; i <= to; i += interval) {
|
|
urls.push(urlTempl.replace("${page}", i));
|
|
}
|
|
return urls;
|
|
}
|
|
}
|
|
}
|
|
return [];
|
|
}
|
|
|
|
function redirectTab(tab, url) {
|
|
let curUrl = "";
|
|
return queryUrl(tab, undefined, 'Query current url...')
|
|
.then(u => {
|
|
if (url !== u) {
|
|
curUrl = u;
|
|
let req = {
|
|
action: ACTION_GOTO_URL,
|
|
url: url
|
|
}
|
|
return sendMessage(tab, req, `Goto url: ${url}`);
|
|
}
|
|
})
|
|
.then(() => queryUrl(tab, url, 'Check if tab url matches expected...'))
|
|
}
|
|
|
|
/**
|
|
* extract data in from the target tab.
|
|
* @param {any} tab target tab
|
|
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
|
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
|
* @returns {Promise<string[]>} a promise of extracted data
|
|
*/
|
|
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
|
let req = {
|
|
action: ACTION_EXTRACT,
|
|
itemsSelector: itemsSelector,
|
|
fieldSelectors: fieldSelectors
|
|
}
|
|
let cond = r => !MSG_ELEMENT_NOT_FOUND.isEqual(r);
|
|
return sendMessage(tab, req, 'Extract data from the tab...', cond);
|
|
}
|
|
|
|
/**
|
|
* get report in from the target tab, usually used to detect if the content script is ready.
|
|
* @param {any} tab target tab
|
|
* @returns {Promise<string>} a promise of the report in message
|
|
*/
|
|
function reportIn(tab) {
|
|
let req = {
|
|
action: ACTION_REPORT_IN
|
|
}
|
|
let cond = r => r == req.action;
|
|
return sendMessage(tab, req, 'Check tab availability...', cond);
|
|
}
|
|
|
|
/**
|
|
* get the url of the target tab
|
|
* @param {any} tab target tab
|
|
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
|
* @returns {Promise<string>} a promise of the url
|
|
*/
|
|
function queryUrl(tab, expected, log) {
|
|
let req = {
|
|
action: ACTION_QUERY_URL
|
|
}
|
|
let cond = url => url && (!expected || (expected && expected == url));
|
|
return sendMessage(tab, req, log, cond);
|
|
}
|
|
|
|
/**
|
|
* get the url of the target tab
|
|
* @param {any} tab target tab
|
|
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
|
* @returns {Promise<string>} a promise of the url
|
|
*/
|
|
function scrollToBottom(tab) {
|
|
let req = {
|
|
action: ACTION_SCROLL_BOTTOM
|
|
}
|
|
return sendMessage(tab, req, 'Scroll to page bottom...');
|
|
}
|
|
|
|
async function createTab(url, active) {
|
|
return new Promise((resolve, reject) => {
|
|
chrome.tabs.create({
|
|
'url': url,
|
|
'active': active
|
|
}, function (tab) {
|
|
resolve(tab);
|
|
})
|
|
})
|
|
}
|
|
|
|
async function getActiveTab(currentWindow) {
|
|
return new Promise((resolve, reject) => {
|
|
chrome.tabs.query({
|
|
active: true,
|
|
currentWindow: currentWindow
|
|
}, function (tabs) {
|
|
resolve(tabs[0]);
|
|
})
|
|
})
|
|
}
|
|
|
|
async function getTabByID(id) {
|
|
return new Promise((resolve, reject) => {
|
|
chrome.tabs.get(id, function (tab) {
|
|
chrome.runtime.lastError;
|
|
resolve(tab);
|
|
})
|
|
})
|
|
} |