class Extractor { constructor() { this._tasks = []; this._tab = undefined; this._running = false; this._results = {}; } /** * Add a task to Extractor. \n * One Extractor could has multiple tasks, which orgnized in a task chian. * Later task will use previous task result as input (target url list). * So only the first task can have target url arguments, while later tasks can't. * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. */ task(...args) { if (!testArgs(...args)) { console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`); // break call chain to avoid unexpected task running return this; } // given >2 arguments means the task specifies target page, // so it won't accept last task result as url list. // in this case, former tasks are useless, can be cleared. if (args.length > 2) this.clear(); this._tasks.push(args); return this; } /** * Clear tasks and caches. */ clear() { this._tasks = []; this._results = []; } /** * Start the task chain. */ async start() { if (this._running) { console.log('The Extractor is running. Please wait..'); return; } if (!this._tasks.length) { console.log('No task to run.'); return; } let firstTaskArgs = this._tasks[0]; if (firstTaskArgs.length > 2) { // task specifies target urls, create new tab with first url for it let urls = parseUrls(...firstTaskArgs.slice(2, firstTaskArgs.length)); this._tab = await createTab(urls[0], false); } else { this._tab = await getActiveTab(false); } this._running = true; return this._tasks.reduce((pms, args, i, tasks) => { return pms.then( result => { if (result === undefined) return getData(this._tab, ...args); this._results[tasks[i - 1]] = result; return getData(this._tab, ...args, result); }); }, Promise.resolve(undefined)).then( result => { this._results[this._tasks[this._tasks.length - 1]] = result; this._running = false; this.save(); } ).catch(err => { this._running = false; console.log(err) }); } /** * restart from specified task, but don't restart the previous tasks. * @param {number} taskid from which restart the tasks */ async restart(taskid) { if (this._running) { console.log('The Extractor is running. Please wait..'); return; } taskid = this._checkTaskId(taskid, 1); if (!taskid) return; if (taskid == 1) { this.start(); return; } let cache = this._results[this._tasks[taskid - 2]]; if (!cache) { console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`); return; } this._running = true; this._tab = await createTab(parseUrls(cache)[0], false) return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => { return pms.then( result => { this._results[tasks[i - 1]] = result; return getData(this._tab, ...args, result); }); }, Promise.resolve(cache)).then( result => { this._results[this._tasks[this._tasks.length - 1]] = result; this._running = false; this.save(); } ).catch(err => { this._running = false; console.log(err) }); } /** * Save result of a task * @param {number} taskid which task id to save. */ save(taskid) { taskid = this._checkTaskId(taskid, this._tasks.length); if (!taskid) return; const result = this._results[this._tasks[taskid - 1]]; if (!result) { console.log(`No result for task #${taskid}. Forget to call ".start()"?`); return; } if (confirm( `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` )) { saveFile(result, "text/csv"); } } _checkTaskId(id, defaultId) { if (!this._tasks.length) { console.log("No task found."); return 0; } if (defaultId && id === undefined || this.task === null) id = defaultId; if (isNaN(id) || id < 1 || id > this._tasks.length) { console.log(`Invalid task id. Rang(1-${this._tasks.length})`); return 0; } return id } }