145 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			145 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| var __EXTRACTOR_STATE__ = "";
 | ||
| 
 | ||
| class Extractor {
 | ||
|     constructor(options) {
 | ||
|         this._tasks = [];
 | ||
|         this._running = false;
 | ||
|         this._options = options;
 | ||
|     }
 | ||
|     /**
 | ||
|      * Save current state, in case we restore it later.
 | ||
|      */
 | ||
|     save() {
 | ||
|         saveFile(JSON.stringify(this), 'application/json', 'state.json');
 | ||
|     }
 | ||
|     /**
 | ||
|      * Restore previous state by loading from saved state.
 | ||
|      */
 | ||
|     load() {
 | ||
|         if (!__EXTRACTOR_STATE__) {
 | ||
|             console.log('No state found. \nPlease upload a saved state from the popup window first.');
 | ||
|             return;
 | ||
|         }
 | ||
|         let state = JSON.parse(__EXTRACTOR_STATE__);
 | ||
|         __EXTRACTOR_STATE__ = "";
 | ||
|         this._options = state._options;
 | ||
|         this._tasks = state._tasks.map(t => new Task(this._options, 'whaterver', ['whaterver']).load(t));
 | ||
|         return this;
 | ||
|     }
 | ||
|     /**
 | ||
|      * Add a task to Extractor. \n
 | ||
|      * One Extractor could has multiple tasks, which orgnized in a task chian.
 | ||
|      * If url arguments not given within later tasks, they will use previous task result as input (target url list).
 | ||
|      * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
 | ||
|      */
 | ||
|     task(...args) {
 | ||
|         this._tasks.push(new Task(this._options, ...args));
 | ||
|         return this;
 | ||
|     }
 | ||
|     /**
 | ||
|      * Clear tasks and task caches.
 | ||
|      */
 | ||
|     clear() {
 | ||
|         this._tasks = [];
 | ||
|         return this;
 | ||
|     }
 | ||
|     /**
 | ||
|      * Start the task chain.
 | ||
|      */
 | ||
|     async start() {
 | ||
|         return this._startTasks(0);
 | ||
|     }
 | ||
|     /**
 | ||
|      * restart from specified task, but don't restart the previous tasks.
 | ||
|      * @param {number} from where to restart the tasks, begins with 0
 | ||
|      */
 | ||
|     async restart(from = 0) {
 | ||
|         let id = this._checkTaskId(from, 0);
 | ||
|         if (id < 0) return;
 | ||
|         for (let i = id; i < this._tasks.length; i++) {
 | ||
|             this._tasks[i].clean();
 | ||
|         }
 | ||
|         return this._startTasks(0);
 | ||
|     }
 | ||
|     async _startTasks(from) {
 | ||
|         if (this._running) {
 | ||
|             console.log('The Extractor is running. Please wait..');
 | ||
|             return;
 | ||
|         }
 | ||
|         if (!this._tasks.length) {
 | ||
|             console.log('No task to run.');
 | ||
|             return;
 | ||
|         }
 | ||
| 
 | ||
|         let tab;
 | ||
|         let task = this._tasks[0];
 | ||
|         if (task.urls.length) {
 | ||
|             // task specifies target urls, create new tab with first url for it
 | ||
|             tab = await createTab(task.urls[0], false);
 | ||
|         } else {
 | ||
|             tab = await getActiveTab(true) || await getActiveTab(false);
 | ||
|             let succ = await ping(tab);
 | ||
|             if (!succ) {
 | ||
|                 console.log('Cannot contact with active tab.');
 | ||
|                 return;
 | ||
|             }
 | ||
|         }
 | ||
|         this._running = true;
 | ||
|         return this._tasks.reduce((pms, task, i) => {
 | ||
|             return pms.then(
 | ||
|                 () => {
 | ||
|                     if (i < from) return;
 | ||
|                     if (i > 0) {
 | ||
|                         let prevTask = this._tasks[i - 1];
 | ||
|                         return task.execute(tab, new ExtractResult(prevTask.results));
 | ||
|                     }
 | ||
|                     return task.execute(tab, undefined);
 | ||
|                 });
 | ||
|         }, Promise.resolve(undefined)).then(
 | ||
|             () => {
 | ||
|                 this._running = false;
 | ||
|                 this.export();
 | ||
|             }
 | ||
|         ).catch(err => {
 | ||
|             this._running = false;
 | ||
|             console.log(err);
 | ||
|         });
 | ||
|     }
 | ||
|     /**
 | ||
|      * export result of a task to CSV
 | ||
|      * @param {number} taskid which task id to save, begins with 0
 | ||
|      */
 | ||
|     export(taskid) {
 | ||
|         let id = this._checkTaskId(taskid, this._tasks.length - 1);
 | ||
|         if (id < 0) return;
 | ||
|         let results = this._tasks[id].results
 | ||
|         results.unshift(this._tasks[id].fieldSelectors);
 | ||
| 
 | ||
|         let exResults = new ExtractResult(results);
 | ||
| 
 | ||
|         if (!results.length) {
 | ||
|             console.log(`No result for task #${id}. Forget to call ".start()"?`);
 | ||
|             return;
 | ||
|         }
 | ||
|         let msg = `
 | ||
| Please confirm to download (${results.length - 1} items):
 | ||
| 
 | ||
| ${exResults.toString(50) || "- Empty -"}
 | ||
| `.trim();
 | ||
|         if (confirm(msg)) {
 | ||
|             saveFile(exResults, "text/csv");
 | ||
|         }
 | ||
|     }
 | ||
|     _checkTaskId(id, defaultId) {
 | ||
|         if (!this._tasks.length) {
 | ||
|             console.log("No task found.");
 | ||
|             return -1;
 | ||
|         }
 | ||
|         if (!isNaN(defaultId) && id === undefined) id = defaultId;
 | ||
|         if (isNaN(id) || id < 0 || id >= this._tasks.length) {
 | ||
|             console.log(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
 | ||
|             return -1;
 | ||
|         }
 | ||
|         return id
 | ||
|     }
 | ||
| } |