keep state and continue
This commit is contained in:
		| @ -55,7 +55,7 @@ function parseUrls(...args) { | ||||
|     if (arg instanceof Array) { | ||||
|         return arg; | ||||
|     } else if (arg instanceof ExtractResult) { | ||||
|         return arg.squash().filter(v => !!v); | ||||
|         return arg.squash().filter(v => URL_REG.test(v)); | ||||
|     } else { | ||||
|         let urlTempl = arg; | ||||
|         if (urlTempl) { | ||||
|  | ||||
| @ -1,41 +1,44 @@ | ||||
| class Extractor { | ||||
|     constructor() { | ||||
|         this._tasks = []; | ||||
|         this._tab = undefined; | ||||
|         this._running = false; | ||||
|         this._results = {}; | ||||
|     } | ||||
|     /** | ||||
|      * Add a task to Extractor. \n | ||||
|      * One Extractor could has multiple tasks, which orgnized in a task chian. | ||||
|      * Later task will use previous task result as input (target url list). | ||||
|      * So only the first task can have target url arguments, while later tasks can't. | ||||
|      * If url arguments not given within later tasks, they will use previous task result as input (target url list). | ||||
|      * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. | ||||
|      */ | ||||
|     task(...args) { | ||||
|         if (!testArgs(...args)) { | ||||
|             console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`); | ||||
|             return this; | ||||
|         } | ||||
|         // given >2 arguments means the task specifies target page,  | ||||
|         // so it won't accept last task result as url list. | ||||
|         // in this case, former tasks are useless, can be cleared. | ||||
|         if (args.length > 2) this.clear(); | ||||
|         this._tasks.push(args); | ||||
|         this._tasks.push(new Task(...args)); | ||||
|         return this; | ||||
|     } | ||||
|     /** | ||||
|      * Clear tasks and caches. | ||||
|      * Clear tasks and task caches. | ||||
|      */ | ||||
|     clear() { | ||||
|         this._tasks = []; | ||||
|         this._results = []; | ||||
|         return this; | ||||
|     } | ||||
|     /** | ||||
|      * Start the task chain. | ||||
|      */ | ||||
|     async start() { | ||||
|         return this._startTasks(0); | ||||
|     } | ||||
|     /** | ||||
|      * restart from specified task, but don't restart the previous tasks. | ||||
|      * @param {number} from where to restart the tasks, begins with 0 | ||||
|      */ | ||||
|     async restart(from = 0) { | ||||
|         let id = this._checkTaskId(from, 0); | ||||
|         if (!id) return; | ||||
|         for (let i = id; i < this._tasks.length; i++) { | ||||
|             this._tasks[i].clean(); | ||||
|         } | ||||
|         return this._startTasks(0); | ||||
|     } | ||||
|     async _startTasks(from) { | ||||
|         if (this._running) { | ||||
|             console.log('The Extractor is running. Please wait..'); | ||||
|             return; | ||||
| @ -44,68 +47,28 @@ class Extractor { | ||||
|             console.log('No task to run.'); | ||||
|             return; | ||||
|         } | ||||
|         let firstTaskArgs = this._tasks[0]; | ||||
|         if (firstTaskArgs.length > 2) { | ||||
|  | ||||
|         let tab; | ||||
|         let task = this._tasks[0]; | ||||
|         if (task.urls.length) { | ||||
|             // task specifies target urls, create new tab with first url for it | ||||
|             let urls = parseUrls(...firstTaskArgs.slice(2, firstTaskArgs.length)); | ||||
|             this._tab = await createTab(urls[0], false); | ||||
|             tab = await createTab(task.urls[0], false); | ||||
|         } else { | ||||
|             this._tab = await getActiveTab(false); | ||||
|             tab = await getActiveTab(true) || await getActiveTab(false); | ||||
|         } | ||||
|         this._running = true; | ||||
|         return this._tasks.reduce((pms, args, i, tasks) => { | ||||
|         return this._tasks.reduce((pms, task, i) => { | ||||
|             return pms.then( | ||||
|                 result => { | ||||
|                     if (result === undefined) return getData(this._tab, ...args); | ||||
|                     this._results[tasks[i - 1]] = result; | ||||
|                     return getData(this._tab, ...args, result); | ||||
|                 () => { | ||||
|                     if (i < from) return; | ||||
|                     if (i > 0) { | ||||
|                         let prevTask = this._tasks[i - 1]; | ||||
|                         return task.execute(tab, new ExtractResult(prevTask.results)); | ||||
|                     } | ||||
|                     return task.execute(tab, undefined); | ||||
|                 }); | ||||
|         }, Promise.resolve(undefined)).then( | ||||
|             result => { | ||||
|                 this._results[this._tasks[this._tasks.length - 1]] = result; | ||||
|                 this._running = false; | ||||
|                 console.log("Tasks are all done.") | ||||
|                 this.save(); | ||||
|             } | ||||
|         ).catch(err => { | ||||
|             this._running = false; | ||||
|             console.log(err) | ||||
|         }); | ||||
|     } | ||||
|     /** | ||||
|      * restart from specified task, but don't restart the previous tasks. | ||||
|      * @param {number} taskid from which restart the tasks | ||||
|      */ | ||||
|     async restart(taskid) { | ||||
|         if (this._running) { | ||||
|             console.log('The Extractor is running. Please wait..'); | ||||
|             return; | ||||
|         } | ||||
|         taskid = this._checkTaskId(taskid, 1); | ||||
|         if (!taskid) return; | ||||
|         if (taskid == 1) { | ||||
|             this.start(); | ||||
|             return; | ||||
|         } | ||||
|         let cache = this._results[this._tasks[taskid - 2]]; | ||||
|         if (!cache) { | ||||
|             console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`); | ||||
|             return; | ||||
|         } | ||||
|         this._running = true; | ||||
|         this._tab = await createTab(parseUrls(cache)[0], false) | ||||
|         return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => { | ||||
|             return pms.then( | ||||
|                 result => { | ||||
|                     this._results[tasks[i - 1]] = result; | ||||
|                     return getData(this._tab, ...args, result); | ||||
|                 }); | ||||
|         }, Promise.resolve(cache)).then( | ||||
|             result => { | ||||
|                 this._results[this._tasks[this._tasks.length - 1]] = result; | ||||
|                 this._running = false; | ||||
|                 this.save(); | ||||
|             } | ||||
|             () => this.save() | ||||
|         ).catch(err => { | ||||
|             this._running = false; | ||||
|             console.log(err) | ||||
| @ -113,18 +76,15 @@ class Extractor { | ||||
|     } | ||||
|     /** | ||||
|      * Save result of a task | ||||
|      * @param {number} taskid which task id to save. | ||||
|      * @param {number} taskid which task id to save, begins with 0 | ||||
|      */ | ||||
|     save(taskid) { | ||||
|         taskid = this._checkTaskId(taskid, this._tasks.length); | ||||
|         if (!taskid) return; | ||||
|         const result = this._results[this._tasks[taskid - 1]]; | ||||
|         if (!result) { | ||||
|             console.log(`No result for task #${taskid}. Forget to call ".start()"?`); | ||||
|             return; | ||||
|         } | ||||
|         if (result.data.length <= 1) { // 1 for selector headers | ||||
|             console.log(`No result for task #${taskid}. Forget to call ".start()"?`); | ||||
|         let id = this._checkTaskId(taskid, this._tasks.length - 1); | ||||
|         if (!id) return; | ||||
|         let result = new ExtractResult(this._tasks[id].results); | ||||
|  | ||||
|         if (!result.data.length) { | ||||
|             console.log(`No result for task #${id}. Forget to call ".start()"?`); | ||||
|             return; | ||||
|         } | ||||
|         let msg = ` | ||||
| @ -141,9 +101,9 @@ ${result.toString(50) || "- Empty -"} | ||||
|             console.log("No task found."); | ||||
|             return 0; | ||||
|         } | ||||
|         if (defaultId && id === undefined || this.task === null) id = defaultId; | ||||
|         if (isNaN(id) || id < 1 || id > this._tasks.length) { | ||||
|             console.log(`Invalid task id. Rang(1-${this._tasks.length})`); | ||||
|         if (defaultId && id === undefined) id = defaultId; | ||||
|         if (isNaN(id) || id < 0 || id >= this._tasks.length) { | ||||
|             console.log(`Invalid task id. Rang(0-${this._tasks.length - 1})`); | ||||
|             return 0; | ||||
|         } | ||||
|         return id | ||||
|  | ||||
							
								
								
									
										66
									
								
								scripts/background/task.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								scripts/background/task.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | ||||
| class Task { | ||||
|     // _manager = undefined; | ||||
|     // _id = 0; | ||||
|     // _urls = []; | ||||
|     _data = {}; | ||||
|     /** | ||||
|      * Create a task. | ||||
|      * constructor(itemsSelector:string, fieldSelectors:string[]) | ||||
|      * constructor(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | ||||
|      * constructor(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]) | ||||
|      * constructor(itemsSelector:string, fieldSelectors:string[], urls:string[]) | ||||
|      * @param {...any} args | ||||
|      */ | ||||
|     constructor(...args) { | ||||
|         if (!testArgs(...args)) | ||||
|             throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`); | ||||
|         this._itemsSelector = args.shift(); | ||||
|         this._fieldSelectors = args.shift(); | ||||
|         this._urls = parseUrls(...args); | ||||
|     } | ||||
|     get urls() { | ||||
|         return this._urls; | ||||
|     } | ||||
|     get data() { | ||||
|         return this._data; | ||||
|     } | ||||
|     get results() { | ||||
|         return this._urls.reduce((p, c) => { | ||||
|             return p.concat(this._data[c]); | ||||
|         }, []); | ||||
|     } | ||||
|     clean() { | ||||
|         this._data = {}; | ||||
|     } | ||||
|     async execute(tab, upstreamData) { | ||||
|         if (!tab) throw new Error("No tab to execute the task."); | ||||
|         if (!this._urls.length) { | ||||
|             if (upstreamData) { | ||||
|                 this._urls = parseUrls(upstreamData); | ||||
|             } else { | ||||
|                 this._urls = [await queryUrl(tab)]; | ||||
|             } | ||||
|         } | ||||
|         return this._urls.reduce((p, url, i) => p.then( | ||||
|             results => { | ||||
|                 if (i > 0) { | ||||
|                     if (!MSG_URL_SKIPPED.isEqual(results)) { | ||||
|                         let lastURL = this._urls[i - 1]; | ||||
|                         this._data[lastURL] = results; | ||||
|                     }  | ||||
|                 } | ||||
|                 return this._data[url] ? MSG_URL_SKIPPED : redirectTab(tab, url).then( | ||||
|                     () => extractTabData(tab, this._itemsSelector, this._fieldSelectors) | ||||
|                 ); | ||||
|             } | ||||
|         ), Promise.resolve(null)).then( | ||||
|             results => { | ||||
|                 if (!MSG_URL_SKIPPED.isEqual(results)) { | ||||
|                     let lastURL = this._urls[this._urls.length - 1]; | ||||
|                     this._data[lastURL] = results; | ||||
|                     return; | ||||
|                 } | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
| } | ||||
| @ -1,8 +1,11 @@ | ||||
| const EXT_NAME = "DataExtracter"; | ||||
|  | ||||
| const URL_REG = getWebUrl(); | ||||
|  | ||||
| const ACTION_EXTRACT = `${EXT_NAME}:Extract`; | ||||
| const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`; | ||||
| const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`; | ||||
| const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`; | ||||
|  | ||||
| const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet"); | ||||
| const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL"); | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Reference in New Issue
	
	Block a user