Files
data-extracter-extesion/scripts/extract.js
2018-05-23 11:28:17 +08:00

131 lines
4.2 KiB
JavaScript

// function extract(itemsSelector, fieldSelectors, url, from, to, interval)
// function extract(itemsSelector, fieldSelectors, url, pages)
function extract(itemsSelector, fieldSelectors, url, ...args) {
let urls = [];
if (url) {
if (args[0] instanceof Array) {
urls = args[0].map(p => url.replace("${page}", p));
} else if (args.length >= 3) {
let from = args.shift();
let to = args.shift();
let interval = args.shift();
for (let i = from; i <= to; i += interval) {
urls.push(url.replace("${page}", i));
}
}
}
let data = [];
return new Promise((resolve, reject) => {
chrome.tabs.query({
active: true,
currentWindow: true
}, function (tabs) {
let pms;
let tab = tabs[0];
if (urls.length) {
pms = urls.reduce((p, url) => p.then(
results => {
data.push(...results);
return redirectTab(tab, url).then(
() => extractData(tab, itemsSelector, fieldSelectors)
);
},
() => p
), Promise.resolve([]));
} else {
pms = extractData(tab, itemsSelector, fieldSelectors);
}
pms.then(
results => {
data.push(...results);
data.unshift(fieldSelectors);
saveFileAsk(data);
resolve("save done.")
},
err => reject(err)
);
});
});
}
function redirectTab(tab, url) {
let curUrl = "";
return queryUrl(tab)
.then(u => {
if (url !== u) {
curUrl = u;
let req = {
from: "DataExtracter:GotoUrl",
url: url
}
chrome.tabs.sendMessage(tab.id, req);
}
})
.then(() => queryUrl(tab, curUrl))
.then(() => reportIn(tab));
}
function extractData(tab, itemsSelector, fieldSelectors) {
let req = {
from: "DataExtracter:Extract",
itemsSelector: itemsSelector,
fieldSelectors: fieldSelectors
}
let failMsg = "extractTabData failed after 10 second.";
let cond = r => !!r;
return sendMessageAndDetect(tab, req, cond, failMsg);
}
function reportIn(tab) {
let req = {
from: "DataExtracter:ReportIn"
}
let failMsg = "reportIn failed after 10 second.";
let cond = r => r == req.from;
return sendMessageAndDetect(tab, req, cond, failMsg);
}
function queryUrl(tab, urlExcluded) {
let req = {
from: "DataExtracter:QueryUrl"
}
let failMsg = "queryUrl failed after 10 second.";
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
return sendMessageAndDetect(tab, req, cond, failMsg);
}
/**
* Repeatedly sending a message to target tab until the response is detected good.
* The response is returned with the Promise.
* @param {chrome.tab} tab the table where to send the message
* @param {object} req the request data.
* @param {function} cond success condition function, r:any=>boolean
* @param {string} failMsg message when failed after time out
* @param {number} failedTimeOut fail time out
* @param {number} detectInterval interval for detecting
*/
function sendMessageAndDetect(tab, req, cond, failMsg, failedTimeOut, detectInterval) {
failedTimeOut = failedTimeOut || 10000;
detectInterval = detectInterval || 500;
return new Promise((resolve, reject) => {
let timeOut;
let rejectTimeout = setTimeout(() => {
reject(failMsg);
clearTimeout(timeOut);
}, failedTimeOut);
loop();
function loop() {
chrome.tabs.sendMessage(tab.id, req, r => {
if (cond(r)) {
resolve(r);
clearTimeout(rejectTimeout);
} else {
timeOut = setTimeout(() => {
loop();
}, detectInterval);
}
});
}
});
}