111 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			111 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| class Extractor {
 | ||
|     constructor() {
 | ||
|         this._tasks = [];
 | ||
|         this._results = {};
 | ||
|     }
 | ||
|     /**
 | ||
|      * Add a task to Extractor. \n
 | ||
|      * One Extractor could has multiple tasks, which orgnized in a task chian.
 | ||
|      * Later task will use previous task result as input (target url list).
 | ||
|      * So only the first task can have target url arguments, while later tasks can't.
 | ||
|      * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
 | ||
|      */
 | ||
|     task(...args) {
 | ||
|         if (!testArgs(...args))
 | ||
|             throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
 | ||
|         // given >2 arguments means the task specifies target page, 
 | ||
|         // so it won't accept last task result as url list.
 | ||
|         // in this case, former tasks are useless, can be cleared.
 | ||
|         if (args.length > 2) this.clear();
 | ||
|         this._tasks.push(args);
 | ||
|         return this;
 | ||
|     }
 | ||
|     /**
 | ||
|      * Clear tasks and caches.
 | ||
|      */
 | ||
|     clear() {
 | ||
|         this._tasks = [];
 | ||
|         this._results = [];
 | ||
|     }
 | ||
|     /**
 | ||
|      * Start the task chain.
 | ||
|      */
 | ||
|     async start() {
 | ||
|         if (!this._tasks.length) {
 | ||
|             console.log('No task to run.');
 | ||
|             return;
 | ||
|         }
 | ||
|         return this._tasks.reduce((pms, args, i, tasks) => {
 | ||
|             return pms.then(
 | ||
|                 result => {
 | ||
|                     if (result === undefined) return getData(...args);
 | ||
|                     this._results[tasks[i - 1]] = result;
 | ||
|                     return getData(...args, result);
 | ||
|                 });
 | ||
|         }, Promise.resolve(undefined)).then(
 | ||
|             result => {
 | ||
|                 this._results[this._tasks[this._tasks.length - 1]] = result;
 | ||
|                 this.save();
 | ||
|             }
 | ||
|         );
 | ||
|     }
 | ||
|     /**
 | ||
|      * restart from specified task, but don't restart the previous tasks.
 | ||
|      * @param {number} taskid from which restart the tasks
 | ||
|      */
 | ||
|     async restart(taskid) {
 | ||
|         taskid = this._checkTaskId(taskid, 1);
 | ||
|         if (!taskid) return;
 | ||
|         if (taskid == 1) {
 | ||
|             this.start();
 | ||
|             return;
 | ||
|         }
 | ||
|         let cache = this._results[this._tasks[taskid - 2]];
 | ||
|         if (!cache) {
 | ||
|             console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`);
 | ||
|             return;
 | ||
|         }
 | ||
|         return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => {
 | ||
|             return pms.then(
 | ||
|                 result => {
 | ||
|                     this._results[tasks[i - 1]] = result;
 | ||
|                     return getData(...args, result);
 | ||
|                 });
 | ||
|         }, Promise.resolve(cache)).then(
 | ||
|             result => {
 | ||
|                 this._results[this._tasks[this._tasks.length - 1]] = result;
 | ||
|                 this.save();
 | ||
|             }
 | ||
|         );
 | ||
|     }
 | ||
|     /**
 | ||
|      * Save result of a task
 | ||
|      * @param {number} taskid which task id to save.
 | ||
|      */
 | ||
|     save(taskid) {
 | ||
|         taskid = this._checkTaskId(taskid, this._tasks.length);
 | ||
|         if (!taskid) return;
 | ||
|         const result = this._results[this._tasks[taskid - 1]];
 | ||
|         if (!result) {
 | ||
|             console.log(`No task result for id (${taskid}). Forget to call ".start()"?`);
 | ||
|             return;
 | ||
|         }
 | ||
|         if (confirm(
 | ||
|                 `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
 | ||
|             )) {
 | ||
|             saveFile(result, "text/csv");
 | ||
|         }
 | ||
|     }
 | ||
|     _checkTaskId(id, defaultId) {
 | ||
|         if (!this._tasks.length) {
 | ||
|             console.log("No task found.");
 | ||
|             return 0;
 | ||
|         }
 | ||
|         if (defaultId && id === undefined || this.task === null) id = defaultId;
 | ||
|         if (isNaN(id) || id < 1 || id > this._tasks.length) {
 | ||
|             console.log(`Invalid task id. Rang(1-${this._tasks.length})`);
 | ||
|             return 0;
 | ||
|         }
 | ||
|         return id
 | ||
|     }
 | ||
| } |