diff --git a/manifest.json b/manifest.json index 9f07b43..1062617 100755 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 2, "name": "Data Extracter", - "version": "0.0.1", + "version": "0.1.0", "author": "jebbs", "description": "Extract data from web page elements as sheet.", "icons": { @@ -19,7 +19,8 @@ "scripts/background.js", "scripts/result.js", "scripts/tools.js", - "scripts/extract.js" + "scripts/extract.js", + "scripts/extractor.js" ], "persistent": false }, diff --git a/scripts/extractor.js b/scripts/extractor.js new file mode 100644 index 0000000..1be0b7b --- /dev/null +++ b/scripts/extractor.js @@ -0,0 +1,111 @@ +class Exractor { + constructor() { + this._tasks = []; + this._results = {}; + } + /** + * Add a task to Extractor. \n + * One Extractor could has multiple tasks, which orgnized in a task chian. + * Later task will use previous task result as input (target url list). + * So only the first task can have target url arguments, while later tasks can't. + * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. + */ + task(...args) { + if (!testArgs(...args)) + throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`); + // given >2 arguments means the task specifies target page, + // so it won't accept last task result as url list. + // in this case, former tasks are useless, can be cleared. + if (args.length > 2) this.clear(); + this._tasks.push(args); + return this; + } + /** + * Clear tasks and caches. + */ + clear() { + this._tasks = []; + this._results = []; + } + /** + * Start the task chain. + */ + async start() { + if (!this._tasks.length) { + console.log('No task to run.'); + return; + } + return this._tasks.reduce((pms, args, i, tasks) => { + return pms.then( + result => { + if (result === undefined) return getData(...args); + this._results[tasks[i - 1]] = result; + return getData(...args, result); + }); + }, Promise.resolve(undefined)).then( + result => { + this._results[this._tasks[this._tasks.length - 1]] = result; + this.save(); + } + ); + } + /** + * restart from specified task, but don't restart the previous tasks. + * @param {number} taskid from which restart the tasks + */ + async restart(taskid) { + taskid = this._checkTaskId(taskid, 1); + if (!taskid) return; + if (taskid == 1) { + this.start(); + return; + } + let cache = this._results[this._tasks[taskid - 2]]; + if (!cache) { + console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`); + return; + } + return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => { + return pms.then( + result => { + this._results[tasks[i - 1]] = result; + return getData(...args, result); + }); + }, Promise.resolve(cache)).then( + result => { + this._results[this._tasks[this._tasks.length - 1]] = result; + this.save(); + } + ); + } + /** + * Save result of a task + * @param {number} taskid which task id to save. + */ + save(taskid) { + taskid = this._checkTaskId(taskid, this._tasks.length); + if (!taskid) return; + const result = this._results[this._tasks[taskid - 1]]; + if (!result) { + console.log(`No task result for id (${taskid}). Forget to call ".start()"?`); + return; + } + if (confirm( + `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` + )) { + saveFile(result, "text/csv"); + } + } + _checkTaskId(id, defaultId) { + if (!this._tasks.length) { + console.log("No task found."); + return 0; + } + if (defaultId && id === undefined || this.task === null) id = defaultId; + if (isNaN(id) || id < 1 || id > this._tasks.length) { + console.log(`Invalid task id. Rang(1-${this._tasks.length})`); + return 0; + } + return id + } +} \ No newline at end of file