task chain & management
This commit is contained in:
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"manifest_version": 2,
|
"manifest_version": 2,
|
||||||
"name": "Data Extracter",
|
"name": "Data Extracter",
|
||||||
"version": "0.0.1",
|
"version": "0.1.0",
|
||||||
"author": "jebbs",
|
"author": "jebbs",
|
||||||
"description": "Extract data from web page elements as sheet.",
|
"description": "Extract data from web page elements as sheet.",
|
||||||
"icons": {
|
"icons": {
|
||||||
@ -19,7 +19,8 @@
|
|||||||
"scripts/background.js",
|
"scripts/background.js",
|
||||||
"scripts/result.js",
|
"scripts/result.js",
|
||||||
"scripts/tools.js",
|
"scripts/tools.js",
|
||||||
"scripts/extract.js"
|
"scripts/extract.js",
|
||||||
|
"scripts/extractor.js"
|
||||||
],
|
],
|
||||||
"persistent": false
|
"persistent": false
|
||||||
},
|
},
|
||||||
|
|||||||
111
scripts/extractor.js
Normal file
111
scripts/extractor.js
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
class Exractor {
|
||||||
|
constructor() {
|
||||||
|
this._tasks = [];
|
||||||
|
this._results = {};
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Add a task to Extractor. \n
|
||||||
|
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||||
|
* Later task will use previous task result as input (target url list).
|
||||||
|
* So only the first task can have target url arguments, while later tasks can't.
|
||||||
|
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
||||||
|
*/
|
||||||
|
task(...args) {
|
||||||
|
if (!testArgs(...args))
|
||||||
|
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
||||||
|
// given >2 arguments means the task specifies target page,
|
||||||
|
// so it won't accept last task result as url list.
|
||||||
|
// in this case, former tasks are useless, can be cleared.
|
||||||
|
if (args.length > 2) this.clear();
|
||||||
|
this._tasks.push(args);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Clear tasks and caches.
|
||||||
|
*/
|
||||||
|
clear() {
|
||||||
|
this._tasks = [];
|
||||||
|
this._results = [];
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Start the task chain.
|
||||||
|
*/
|
||||||
|
async start() {
|
||||||
|
if (!this._tasks.length) {
|
||||||
|
console.log('No task to run.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
return this._tasks.reduce((pms, args, i, tasks) => {
|
||||||
|
return pms.then(
|
||||||
|
result => {
|
||||||
|
if (result === undefined) return getData(...args);
|
||||||
|
this._results[tasks[i - 1]] = result;
|
||||||
|
return getData(...args, result);
|
||||||
|
});
|
||||||
|
}, Promise.resolve(undefined)).then(
|
||||||
|
result => {
|
||||||
|
this._results[this._tasks[this._tasks.length - 1]] = result;
|
||||||
|
this.save();
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* restart from specified task, but don't restart the previous tasks.
|
||||||
|
* @param {number} taskid from which restart the tasks
|
||||||
|
*/
|
||||||
|
async restart(taskid) {
|
||||||
|
taskid = this._checkTaskId(taskid, 1);
|
||||||
|
if (!taskid) return;
|
||||||
|
if (taskid == 1) {
|
||||||
|
this.start();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let cache = this._results[this._tasks[taskid - 2]];
|
||||||
|
if (!cache) {
|
||||||
|
console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => {
|
||||||
|
return pms.then(
|
||||||
|
result => {
|
||||||
|
this._results[tasks[i - 1]] = result;
|
||||||
|
return getData(...args, result);
|
||||||
|
});
|
||||||
|
}, Promise.resolve(cache)).then(
|
||||||
|
result => {
|
||||||
|
this._results[this._tasks[this._tasks.length - 1]] = result;
|
||||||
|
this.save();
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Save result of a task
|
||||||
|
* @param {number} taskid which task id to save.
|
||||||
|
*/
|
||||||
|
save(taskid) {
|
||||||
|
taskid = this._checkTaskId(taskid, this._tasks.length);
|
||||||
|
if (!taskid) return;
|
||||||
|
const result = this._results[this._tasks[taskid - 1]];
|
||||||
|
if (!result) {
|
||||||
|
console.log(`No task result for id (${taskid}). Forget to call ".start()"?`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (confirm(
|
||||||
|
`Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
|
||||||
|
)) {
|
||||||
|
saveFile(result, "text/csv");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_checkTaskId(id, defaultId) {
|
||||||
|
if (!this._tasks.length) {
|
||||||
|
console.log("No task found.");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (defaultId && id === undefined || this.task === null) id = defaultId;
|
||||||
|
if (isNaN(id) || id < 1 || id > this._tasks.length) {
|
||||||
|
console.log(`Invalid task id. Rang(1-${this._tasks.length})`);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user