170 lines
5.8 KiB
JavaScript
170 lines
5.8 KiB
JavaScript
/**
|
|
* Extract data from current tab / multiple urls.
|
|
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
|
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
|
* @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
|
|
*/
|
|
async function extract(itemsSelector, fieldSelectors, ...args) {
|
|
let data = await extractData(itemsSelector, fieldSelectors, ...args);
|
|
data.unshift(fieldSelectors);
|
|
saveFileAsk(data);
|
|
resolve("save done.")
|
|
}
|
|
|
|
/**
|
|
* Extract data from current tab / multiple urls.
|
|
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
|
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
|
* @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
|
|
*/
|
|
async function extractData(itemsSelector, fieldSelectors, ...args) {
|
|
let urls = [];
|
|
let arg = args.shift();
|
|
if (arg instanceof Array) {
|
|
urls = arg;
|
|
} else {
|
|
let urlTempl = arg;
|
|
if (urlTempl) {
|
|
if (args[0] instanceof Array) {
|
|
urls = args[0].map(p => urlTempl.replace("${page}", p));
|
|
} else if (args.length >= 3) {
|
|
let from = args.shift();
|
|
let to = args.shift();
|
|
let interval = args.shift();
|
|
for (let i = from; i <= to; i += interval) {
|
|
urls.push(urlTempl.replace("${page}", i));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
let data = [];
|
|
return new Promise((resolve, reject) => {
|
|
chrome.tabs.query({
|
|
active: true,
|
|
currentWindow: true
|
|
}, function (tabs) {
|
|
let pms;
|
|
let tab = tabs[0];
|
|
if (urls.length) {
|
|
pms = urls.reduce((p, url) => p.then(
|
|
results => {
|
|
data.push(...results);
|
|
return redirectTab(tab, url).then(
|
|
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
|
);
|
|
},
|
|
() => p
|
|
), Promise.resolve([]));
|
|
} else {
|
|
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
|
}
|
|
pms.then(
|
|
results => {
|
|
data.push(...results);
|
|
resolve(data);
|
|
},
|
|
err => reject(err)
|
|
);
|
|
});
|
|
});
|
|
}
|
|
|
|
function redirectTab(tab, url) {
|
|
let curUrl = "";
|
|
return queryUrl(tab)
|
|
.then(u => {
|
|
if (url !== u) {
|
|
curUrl = u;
|
|
let req = {
|
|
from: "DataExtracter:GotoUrl",
|
|
url: url
|
|
}
|
|
chrome.tabs.sendMessage(tab.id, req);
|
|
}
|
|
})
|
|
.then(() => queryUrl(tab, curUrl))
|
|
.then(() => reportIn(tab));
|
|
}
|
|
|
|
/**
|
|
* extract data in from the target tab.
|
|
* @param {any} tab target tab
|
|
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
|
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
|
* @returns {Promise<string[]>} a promise of extracted data
|
|
*/
|
|
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
|
let req = {
|
|
from: "DataExtracter:Extract",
|
|
itemsSelector: itemsSelector,
|
|
fieldSelectors: fieldSelectors
|
|
}
|
|
let failMsg = "extractTabData failed after 10 second.";
|
|
let cond = r => !!r;
|
|
return sendMessageAndDetect(tab, req, cond, failMsg);
|
|
}
|
|
|
|
/**
|
|
* get report in from the target tab, usually used to detect if the content script is ready.
|
|
* @param {any} tab target tab
|
|
* @returns {Promise<string>} a promise of the report in message
|
|
*/
|
|
function reportIn(tab) {
|
|
let req = {
|
|
from: "DataExtracter:ReportIn"
|
|
}
|
|
let failMsg = "reportIn failed after 10 second.";
|
|
let cond = r => r == req.from;
|
|
return sendMessageAndDetect(tab, req, cond, failMsg);
|
|
}
|
|
|
|
/**
|
|
* get the url of the target tab
|
|
* @param {any} tab target tab
|
|
* @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
|
|
* @returns {Promise<string>} a promise of the url
|
|
*/
|
|
function queryUrl(tab, urlExcluded) {
|
|
let req = {
|
|
from: "DataExtracter:QueryUrl"
|
|
}
|
|
let failMsg = "queryUrl failed after 10 second.";
|
|
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
|
|
return sendMessageAndDetect(tab, req, cond, failMsg);
|
|
}
|
|
|
|
/**
|
|
* Repeatedly sending a message to target tab until the response is detected good.
|
|
* @param {object} tab the table where to send the message
|
|
* @param {object} req the request data.
|
|
* @param {function} cond success condition function, r:any=>boolean
|
|
* @param {string} failMsg message when failed after time out
|
|
* @param {number} failedTimeOut fail time out
|
|
* @param {number} detectInterval interval for detecting
|
|
* @return {Promise} a promise of the response.
|
|
*/
|
|
function sendMessageAndDetect(tab, req, cond, failMsg, failedTimeOut, detectInterval) {
|
|
failedTimeOut = failedTimeOut || 10000;
|
|
detectInterval = detectInterval || 500;
|
|
return new Promise((resolve, reject) => {
|
|
let timeOut;
|
|
let rejectTimeout = setTimeout(() => {
|
|
reject(failMsg);
|
|
clearTimeout(timeOut);
|
|
}, failedTimeOut);
|
|
loop();
|
|
|
|
function loop() {
|
|
chrome.tabs.sendMessage(tab.id, req, r => {
|
|
if (cond(r)) {
|
|
resolve(r);
|
|
clearTimeout(rejectTimeout);
|
|
} else {
|
|
timeOut = setTimeout(() => {
|
|
loop();
|
|
}, detectInterval);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
} |