Files
data-extracter-extesion/scripts/extractor.js
2018-09-28 16:20:52 +08:00

111 lines
3.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

class Extractor {
constructor() {
this._tasks = [];
this._results = {};
}
/**
* Add a task to Extractor. \n
* One Extractor could has multiple tasks, which orgnized in a task chian.
* Later task will use previous task result as input (target url list).
* So only the first task can have target url arguments, while later tasks can't.
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
*/
task(...args) {
if (!testArgs(...args))
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
// given >2 arguments means the task specifies target page,
// so it won't accept last task result as url list.
// in this case, former tasks are useless, can be cleared.
if (args.length > 2) this.clear();
this._tasks.push(args);
return this;
}
/**
* Clear tasks and caches.
*/
clear() {
this._tasks = [];
this._results = [];
}
/**
* Start the task chain.
*/
async start() {
if (!this._tasks.length) {
console.log('No task to run.');
return;
}
return this._tasks.reduce((pms, args, i, tasks) => {
return pms.then(
result => {
if (result === undefined) return getData(...args);
this._results[tasks[i - 1]] = result;
return getData(...args, result);
});
}, Promise.resolve(undefined)).then(
result => {
this._results[this._tasks[this._tasks.length - 1]] = result;
this.save();
}
).catch(err => console.log(err));
}
/**
* restart from specified task, but don't restart the previous tasks.
* @param {number} taskid from which restart the tasks
*/
async restart(taskid) {
taskid = this._checkTaskId(taskid, 1);
if (!taskid) return;
if (taskid == 1) {
this.start();
return;
}
let cache = this._results[this._tasks[taskid - 2]];
if (!cache) {
console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`);
return;
}
return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => {
return pms.then(
result => {
this._results[tasks[i - 1]] = result;
return getData(...args, result);
});
}, Promise.resolve(cache)).then(
result => {
this._results[this._tasks[this._tasks.length - 1]] = result;
this.save();
}
).catch(err => console.log(err));
}
/**
* Save result of a task
* @param {number} taskid which task id to save.
*/
save(taskid) {
taskid = this._checkTaskId(taskid, this._tasks.length);
if (!taskid) return;
const result = this._results[this._tasks[taskid - 1]];
if (!result) {
console.log(`No task result for id (${taskid}). Forget to call ".start()"?`);
return;
}
if (confirm(
`Click confirm to download if the sample data looks good (${result.data.length} items)\n\n${result.toString(50) || "- Empty -"}`
)) {
saveFile(result, "text/csv");
}
}
_checkTaskId(id, defaultId) {
if (!this._tasks.length) {
console.log("No task found.");
return 0;
}
if (defaultId && id === undefined || this.task === null) id = defaultId;
if (isNaN(id) || id < 1 || id > this._tasks.length) {
console.log(`Invalid task id. Rang(1-${this._tasks.length})`);
return 0;
}
return id
}
}