class Extractor { constructor() { this._tasks = []; this._results = {}; } /** * Add a task to Extractor. \n * One Extractor could has multiple tasks, which orgnized in a task chian. * Later task will use previous task result as input (target url list). * So only the first task can have target url arguments, while later tasks can't. * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. */ task(...args) { if (!testArgs(...args)) throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`); // given >2 arguments means the task specifies target page, // so it won't accept last task result as url list. // in this case, former tasks are useless, can be cleared. if (args.length > 2) this.clear(); this._tasks.push(args); return this; } /** * Clear tasks and caches. */ clear() { this._tasks = []; this._results = []; } /** * Start the task chain. */ async start() { if (!this._tasks.length) { console.log('No task to run.'); return; } return this._tasks.reduce((pms, args, i, tasks) => { return pms.then( result => { if (result === undefined) return getData(...args); this._results[tasks[i - 1]] = result; return getData(...args, result); }); }, Promise.resolve(undefined)).then( result => { this._results[this._tasks[this._tasks.length - 1]] = result; this.save(); } ).catch(err => console.log(err)); } /** * restart from specified task, but don't restart the previous tasks. * @param {number} taskid from which restart the tasks */ async restart(taskid) { taskid = this._checkTaskId(taskid, 1); if (!taskid) return; if (taskid == 1) { this.start(); return; } let cache = this._results[this._tasks[taskid - 2]]; if (!cache) { console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`); return; } return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => { return pms.then( result => { this._results[tasks[i - 1]] = result; return getData(...args, result); }); }, Promise.resolve(cache)).then( result => { this._results[this._tasks[this._tasks.length - 1]] = result; this.save(); } ).catch(err => console.log(err)); } /** * Save result of a task * @param {number} taskid which task id to save. */ save(taskid) { taskid = this._checkTaskId(taskid, this._tasks.length); if (!taskid) return; const result = this._results[this._tasks[taskid - 1]]; if (!result) { console.log(`No task result for id (${taskid}). Forget to call ".start()"?`); return; } if (confirm( `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` )) { saveFile(result, "text/csv"); } } _checkTaskId(id, defaultId) { if (!this._tasks.length) { console.log("No task found."); return 0; } if (defaultId && id === undefined || this.task === null) id = defaultId; if (isNaN(id) || id < 1 || id > this._tasks.length) { console.log(`Invalid task id. Rang(1-${this._tasks.length})`); return 0; } return id } }