Files
data-extracter-extesion/scripts/background/extractor.js
2020-01-10 12:07:21 +08:00

143 lines
5.0 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

class Extractor {
constructor() {
this._tasks = [];
this._tab = undefined;
this._running = false;
this._results = {};
}
/**
* Add a task to Extractor. \n
* One Extractor could has multiple tasks, which orgnized in a task chian.
* Later task will use previous task result as input (target url list).
* So only the first task can have target url arguments, while later tasks can't.
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
*/
task(...args) {
if (!testArgs(...args)) {
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
// break call chain to avoid unexpected task running
return this;
}
// given >2 arguments means the task specifies target page,
// so it won't accept last task result as url list.
// in this case, former tasks are useless, can be cleared.
if (args.length > 2) this.clear();
this._tasks.push(args);
return this;
}
/**
* Clear tasks and caches.
*/
clear() {
this._tasks = [];
this._results = [];
}
/**
* Start the task chain.
*/
async start() {
if (this._running) {
console.log('The Extractor is running. Please wait..');
return;
}
if (!this._tasks.length) {
console.log('No task to run.');
return;
}
let firstTaskArgs = this._tasks[0];
if (firstTaskArgs.length > 2) {
// task specifies target urls, create new tab with first url for it
let urls = parseUrls(...firstTaskArgs.slice(2, firstTaskArgs.length));
this._tab = await createTab(urls[0], false);
} else {
this._tab = await getActiveTab(false);
}
this._running = true;
return this._tasks.reduce((pms, args, i, tasks) => {
return pms.then(
result => {
if (result === undefined) return getData(this._tab, ...args);
this._results[tasks[i - 1]] = result;
return getData(this._tab, ...args, result);
});
}, Promise.resolve(undefined)).then(
result => {
this._results[this._tasks[this._tasks.length - 1]] = result;
this._running = false;
this.save();
}
).catch(err => {
this._running = false;
console.log(err)
});
}
/**
* restart from specified task, but don't restart the previous tasks.
* @param {number} taskid from which restart the tasks
*/
async restart(taskid) {
if (this._running) {
console.log('The Extractor is running. Please wait..');
return;
}
taskid = this._checkTaskId(taskid, 1);
if (!taskid) return;
if (taskid == 1) {
this.start();
return;
}
let cache = this._results[this._tasks[taskid - 2]];
if (!cache) {
console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`);
return;
}
this._running = true;
this._tab = await createTab(parseUrls(cache)[0], false)
return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => {
return pms.then(
result => {
this._results[tasks[i - 1]] = result;
return getData(this._tab, ...args, result);
});
}, Promise.resolve(cache)).then(
result => {
this._results[this._tasks[this._tasks.length - 1]] = result;
this._running = false;
this.save();
}
).catch(err => {
this._running = false;
console.log(err)
});
}
/**
* Save result of a task
* @param {number} taskid which task id to save.
*/
save(taskid) {
taskid = this._checkTaskId(taskid, this._tasks.length);
if (!taskid) return;
const result = this._results[this._tasks[taskid - 1]];
if (!result) {
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
return;
}
if (confirm(
`Click confirm to download if the sample data looks good (${result.data.length} items)\n\n${result.toString(50) || "- Empty -"}`
)) {
saveFile(result, "text/csv");
}
}
_checkTaskId(id, defaultId) {
if (!this._tasks.length) {
console.log("No task found.");
return 0;
}
if (defaultId && id === undefined || this.task === null) id = defaultId;
if (isNaN(id) || id < 1 || id > this._tasks.length) {
console.log(`Invalid task id. Rang(1-${this._tasks.length})`);
return 0;
}
return id
}
}