Files
data-extracter-extesion/scripts/extract.js
2018-09-28 16:20:52 +08:00

191 lines
6.2 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Extract data from current tab / multiple urls.
* @param {string} itemsSelector items selectors for selecting items (data rows)
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
* @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
*/
async function extract(itemsSelector, fieldSelectors, ...args) {
let result = await getData(itemsSelector, fieldSelectors, ...args);
if (confirm(
`Click confirm to download if the sample data looks good (${result.data.length} items)\n\n${result.toString(50) || "- Empty -"}`
)) {
saveFile(result, "text/csv");
}
}
/**
* Extract data from current tab / multiple urls.
* @param {string} itemsSelector items selectors for selecting items (data rows)
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
* @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
*/
async function getData(itemsSelector, fieldSelectors, ...args) {
if (!testArgs(itemsSelector, fieldSelectors, ...args))
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
let urls = [];
if (args.length) {
let arg = args.shift();
if (arg instanceof Array) {
urls = arg;
} else if (arg instanceof ExractResult) {
urls = arg.squash().filter(v => !!v);
} else {
let urlTempl = arg;
if (urlTempl) {
if (args[0] instanceof Array) {
urls = args[0].map(p => urlTempl.replace("${page}", p));
} else if (args.length >= 3) {
let from = args.shift();
let to = args.shift();
let interval = args.shift();
for (let i = from; i <= to; i += interval) {
urls.push(urlTempl.replace("${page}", i));
}
}
}
}
}
let data = [];
let tab = await getActiveTab(true) || await getActiveTab(false);
if (!tab) throw new Error("Cannot find active tab.");
return new Promise((resolve, reject) => {
let pms;
if (urls.length) {
pms = urls.reduce((p, url) => p.then(
results => {
data.push(...results);
return redirectTab(tab, url).then(
() => extractTabData(tab, itemsSelector, fieldSelectors)
);
},
() => p
), Promise.resolve([]));
} else {
pms = extractTabData(tab, itemsSelector, fieldSelectors);
}
pms.then(
results => {
data.push(...results);
resolve(new ExractResult(data));
},
err => reject(err)
);
});
}
function redirectTab(tab, url) {
let curUrl = "";
return queryUrl(tab)
.then(u => {
if (url !== u) {
curUrl = u;
let req = {
from: "GotoUrl",
url: url
}
sendMessage(tab, req);
}
})
.then(() => queryUrl(tab, curUrl))
.then(() => reportIn(tab));
}
/**
* extract data in from the target tab.
* @param {any} tab target tab
* @param {string} itemsSelector items selectors for selecting items (data rows)
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
* @returns {Promise<string[]>} a promise of extracted data
*/
function extractTabData(tab, itemsSelector, fieldSelectors) {
let req = {
from: "Extract",
itemsSelector: itemsSelector,
fieldSelectors: fieldSelectors
}
let cond = r => r && r.length;
return sendMessage(tab, req, cond);
}
/**
* get report in from the target tab, usually used to detect if the content script is ready.
* @param {any} tab target tab
* @returns {Promise<string>} a promise of the report in message
*/
function reportIn(tab) {
let req = {
from: "ReportIn"
}
let cond = r => r == req.from;
return sendMessage(tab, req, cond);
}
/**
* get the url of the target tab
* @param {any} tab target tab
* @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
* @returns {Promise<string>} a promise of the url
*/
function queryUrl(tab, urlExcluded) {
let req = {
from: "QueryUrl"
}
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
return sendMessage(tab, req, cond);
}
/**
* Repeatedly sending a message to target tab until the response is detected good.
* @param {object} tab the table where to send the message
* @param {object} req the request data.
* @param {function} cond success condition function, r:any=>boolean
* @param {number} interval interval for detecting
* @return {Promise} a promise of the response.
*/
function sendMessage(tab, req, cond, interval) {
req.from = "DataExtracter:" + req.from;
interval = interval || 500;
return new Promise((resolve, reject) => {
loop();
async function loop() {
console.log("request for", req.from);
let tabAvailable = await getTabByID(tab.id);
if (!tabAvailable) {
reject("Task interupted due to the target tab is closed.");
return;
}
chrome.tabs.sendMessage(tab.id, req, r => {
if (!cond || cond(r)) {
resolve(r);
} else {
setTimeout(() => {
loop();
}, interval);
}
});
}
});
}
async function getActiveTab(currentWindow) {
return new Promise((resolve, reject) => {
chrome.tabs.query({
active: true,
currentWindow: currentWindow
}, function (tabs) {
resolve(tabs[0]);
})
})
}
async function getTabByID(id) {
return new Promise((resolve, reject) => {
chrome.tabs.get(id, function (tab) {
chrome.runtime.lastError;
resolve(tab);
})
})
}