refactoring
This commit is contained in:
		
							
								
								
									
										168
									
								
								scripts/background/actions.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										168
									
								
								scripts/background/actions.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,168 @@ | ||||
| /** | ||||
|  * Extract data from current page / multiple urls. | ||||
|  * getData(tab, itemsSelector:string, fieldSelectors:string[]) | ||||
|  * getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | ||||
|  * getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[]) | ||||
|  * getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[]) | ||||
|  * getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) | ||||
|  * getData(itemsSelector:string, fieldSelectors:string[]) | ||||
|  * getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | ||||
|  * getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[]) | ||||
|  * getData(itemsSelector:string, fieldSelectors:string[], urls:string[]) | ||||
|  * getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) | ||||
|  * @param {...any} args | ||||
|  */ | ||||
| async function getData(...args) { | ||||
|     let tab; | ||||
|     if (typeof args[0] !== 'string') tab = args.shift(); | ||||
|     if (!testArgs(...args)) | ||||
|         throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`); | ||||
|     itemsSelector = args.shift(); | ||||
|     fieldSelectors = args.shift(); | ||||
|     let urls = parseUrls(...args); | ||||
|     let data = []; | ||||
|     if (!tab) tab = await getActiveTab(true) || await getActiveTab(false); | ||||
|     if (!tab) throw new Error("Cannot find active tab."); | ||||
|     return new Promise((resolve, reject) => { | ||||
|         let pms; | ||||
|         if (urls.length) { | ||||
|             pms = urls.reduce((p, url) => p.then( | ||||
|                 results => { | ||||
|                     if (results) data.push(...results); | ||||
|                     return redirectTab(tab, url).then( | ||||
|                         () => extractTabData(tab, itemsSelector, fieldSelectors) | ||||
|                     ); | ||||
|                 }, | ||||
|                 () => p | ||||
|             ), Promise.resolve([])); | ||||
|         } else { | ||||
|             pms = extractTabData(tab, itemsSelector, fieldSelectors); | ||||
|         } | ||||
|         pms.then( | ||||
|             results => { | ||||
|                 if (results) data.push(...results); | ||||
|                 data.unshift(fieldSelectors); | ||||
|                 resolve(new ExtractResult(data)); | ||||
|             }, | ||||
|             err => reject(err) | ||||
|         ); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| function parseUrls(...args) { | ||||
|     if (!args.length) return []; | ||||
|     let arg = args.shift(); | ||||
|     if (arg instanceof Array) { | ||||
|         return arg; | ||||
|     } else if (arg instanceof ExtractResult) { | ||||
|         return arg.squash().filter(v => !!v); | ||||
|     } else { | ||||
|         let urlTempl = arg; | ||||
|         if (urlTempl) { | ||||
|             if (args[0] instanceof Array) { | ||||
|                 return args[0].map(p => urlTempl.replace("${page}", p)); | ||||
|             } else if (args.length >= 3) { | ||||
|                 let urls = []; | ||||
|                 let from = args.shift(); | ||||
|                 let to = args.shift(); | ||||
|                 let interval = args.shift(); | ||||
|                 for (let i = from; i <= to; i += interval) { | ||||
|                     urls.push(urlTempl.replace("${page}", i)); | ||||
|                 } | ||||
|                 return urls; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     return []; | ||||
| } | ||||
|  | ||||
| function redirectTab(tab, url) { | ||||
|     let curUrl = ""; | ||||
|     return queryUrl(tab, undefined, 'Query current url...') | ||||
|         .then(u => { | ||||
|             if (url !== u) { | ||||
|                 curUrl = u; | ||||
|                 let req = { | ||||
|                     action: ACTION_GOTO_URL, | ||||
|                     url: url | ||||
|                 } | ||||
|                 sendMessage(tab, req, `Goto url: ${url}`); | ||||
|             } | ||||
|         }) | ||||
|         .then(() => queryUrl(tab, curUrl, 'Check if tab url matches expected...')) | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * extract data in from the target tab. | ||||
|  * @param {any} tab target tab | ||||
|  * @param {string} itemsSelector items selectors for selecting items (data rows) | ||||
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item | ||||
|  * @returns {Promise<string[]>} a promise of extracted data | ||||
|  */ | ||||
| function extractTabData(tab, itemsSelector, fieldSelectors) { | ||||
|     let req = { | ||||
|         action: ACTION_EXTRACT, | ||||
|         itemsSelector: itemsSelector, | ||||
|         fieldSelectors: fieldSelectors | ||||
|     } | ||||
|     let cond = r => r !== undefined; | ||||
|     return sendMessage(tab, req, 'Extract data from the tab...', cond); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * get report in from the target tab, usually used to detect if the content script is ready. | ||||
|  * @param {any} tab target tab | ||||
|  * @returns {Promise<string>} a promise of the report in message | ||||
|  */ | ||||
| function reportIn(tab) { | ||||
|     let req = { | ||||
|         action: ACTION_REPORT_IN | ||||
|     } | ||||
|     let cond = r => r == req.action; | ||||
|     return sendMessage(tab, req, 'Check tab availability...', cond); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * get the url of the target tab | ||||
|  * @param {any} tab target tab | ||||
|  * @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded | ||||
|  * @returns {Promise<string>} a promise of the url | ||||
|  */ | ||||
| function queryUrl(tab, urlExcluded, log) { | ||||
|     let req = { | ||||
|         action: ACTION_QUERY_URL | ||||
|     } | ||||
|     let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url)); | ||||
|     return sendMessage(tab, req, log, cond); | ||||
| } | ||||
|  | ||||
| async function createTab(url, active) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|         chrome.tabs.create({ | ||||
|             'url': url, | ||||
|             'active': active | ||||
|         }, function (tab) { | ||||
|             resolve(tab); | ||||
|         }) | ||||
|     }) | ||||
| } | ||||
|  | ||||
| async function getActiveTab(currentWindow) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|         chrome.tabs.query({ | ||||
|             active: true, | ||||
|             currentWindow: currentWindow | ||||
|         }, function (tabs) { | ||||
|             resolve(tabs[0]); | ||||
|         }) | ||||
|     }) | ||||
| } | ||||
|  | ||||
| async function getTabByID(id) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|         chrome.tabs.get(id, function (tab) { | ||||
|             chrome.runtime.lastError; | ||||
|             resolve(tab); | ||||
|         }) | ||||
|     }) | ||||
| } | ||||
							
								
								
									
										143
									
								
								scripts/background/extractor.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								scripts/background/extractor.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,143 @@ | ||||
| class Extractor { | ||||
|     constructor() { | ||||
|         this._tasks = []; | ||||
|         this._tab = undefined; | ||||
|         this._running = false; | ||||
|         this._results = {}; | ||||
|     } | ||||
|     /** | ||||
|      * Add a task to Extractor. \n | ||||
|      * One Extractor could has multiple tasks, which orgnized in a task chian. | ||||
|      * Later task will use previous task result as input (target url list). | ||||
|      * So only the first task can have target url arguments, while later tasks can't. | ||||
|      * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. | ||||
|      */ | ||||
|     task(...args) { | ||||
|         if (!testArgs(...args)) { | ||||
|             console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`); | ||||
|             // break call chain to avoid unexpected task running | ||||
|             return this; | ||||
|         } | ||||
|         // given >2 arguments means the task specifies target page,  | ||||
|         // so it won't accept last task result as url list. | ||||
|         // in this case, former tasks are useless, can be cleared. | ||||
|         if (args.length > 2) this.clear(); | ||||
|         this._tasks.push(args); | ||||
|         return this; | ||||
|     } | ||||
|     /** | ||||
|      * Clear tasks and caches. | ||||
|      */ | ||||
|     clear() { | ||||
|         this._tasks = []; | ||||
|         this._results = []; | ||||
|     } | ||||
|     /** | ||||
|      * Start the task chain. | ||||
|      */ | ||||
|     async start() { | ||||
|         if (this._running) { | ||||
|             console.log('The Extractor is running. Please wait..'); | ||||
|             return; | ||||
|         } | ||||
|         if (!this._tasks.length) { | ||||
|             console.log('No task to run.'); | ||||
|             return; | ||||
|         } | ||||
|         let firstTaskArgs = this._tasks[0]; | ||||
|         if (firstTaskArgs.length > 2) { | ||||
|             // task specifies target urls, create new tab with first url for it | ||||
|             let urls = parseUrls(...firstTaskArgs.slice(2, firstTaskArgs.length)); | ||||
|             this._tab = await createTab(urls[0], false); | ||||
|         } else { | ||||
|             this._tab = await getActiveTab(false); | ||||
|         } | ||||
|         this._running = true; | ||||
|         return this._tasks.reduce((pms, args, i, tasks) => { | ||||
|             return pms.then( | ||||
|                 result => { | ||||
|                     if (result === undefined) return getData(this._tab, ...args); | ||||
|                     this._results[tasks[i - 1]] = result; | ||||
|                     return getData(this._tab, ...args, result); | ||||
|                 }); | ||||
|         }, Promise.resolve(undefined)).then( | ||||
|             result => { | ||||
|                 this._results[this._tasks[this._tasks.length - 1]] = result; | ||||
|                 this._running = false; | ||||
|                 this.save(); | ||||
|             } | ||||
|         ).catch(err => { | ||||
|             this._running = false; | ||||
|             console.log(err) | ||||
|         }); | ||||
|     } | ||||
|     /** | ||||
|      * restart from specified task, but don't restart the previous tasks. | ||||
|      * @param {number} taskid from which restart the tasks | ||||
|      */ | ||||
|     async restart(taskid) { | ||||
|         if (this._running) { | ||||
|             console.log('The Extractor is running. Please wait..'); | ||||
|             return; | ||||
|         } | ||||
|         taskid = this._checkTaskId(taskid, 1); | ||||
|         if (!taskid) return; | ||||
|         if (taskid == 1) { | ||||
|             this.start(); | ||||
|             return; | ||||
|         } | ||||
|         let cache = this._results[this._tasks[taskid - 2]]; | ||||
|         if (!cache) { | ||||
|             console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`); | ||||
|             return; | ||||
|         } | ||||
|         this._running = true; | ||||
|         this._tab = await createTab(parseUrls(cache)[0], false) | ||||
|         return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => { | ||||
|             return pms.then( | ||||
|                 result => { | ||||
|                     this._results[tasks[i - 1]] = result; | ||||
|                     return getData(this._tab, ...args, result); | ||||
|                 }); | ||||
|         }, Promise.resolve(cache)).then( | ||||
|             result => { | ||||
|                 this._results[this._tasks[this._tasks.length - 1]] = result; | ||||
|                 this._running = false; | ||||
|                 this.save(); | ||||
|             } | ||||
|         ).catch(err => { | ||||
|             this._running = false; | ||||
|             console.log(err) | ||||
|         }); | ||||
|     } | ||||
|     /** | ||||
|      * Save result of a task | ||||
|      * @param {number} taskid which task id to save. | ||||
|      */ | ||||
|     save(taskid) { | ||||
|         taskid = this._checkTaskId(taskid, this._tasks.length); | ||||
|         if (!taskid) return; | ||||
|         const result = this._results[this._tasks[taskid - 1]]; | ||||
|         if (!result) { | ||||
|             console.log(`No result for task #${taskid}. Forget to call ".start()"?`); | ||||
|             return; | ||||
|         } | ||||
|         if (confirm( | ||||
|                 `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` | ||||
|             )) { | ||||
|             saveFile(result, "text/csv"); | ||||
|         } | ||||
|     } | ||||
|     _checkTaskId(id, defaultId) { | ||||
|         if (!this._tasks.length) { | ||||
|             console.log("No task found."); | ||||
|             return 0; | ||||
|         } | ||||
|         if (defaultId && id === undefined || this.task === null) id = defaultId; | ||||
|         if (isNaN(id) || id < 1 || id > this._tasks.length) { | ||||
|             console.log(`Invalid task id. Rang(1-${this._tasks.length})`); | ||||
|             return 0; | ||||
|         } | ||||
|         return id | ||||
|     } | ||||
| } | ||||
							
								
								
									
										50
									
								
								scripts/background/messaging.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								scripts/background/messaging.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,50 @@ | ||||
|  | ||||
| /** | ||||
|  * Repeatedly sending a message to target tab until the response is detected good. | ||||
|  * @param {object} tab the table where to send the message | ||||
|  * @param {object} req the request data. | ||||
|  * @param {function} cond success condition function, r:any=>boolean | ||||
|  * @param {number} interval interval for detecting | ||||
|  * @param {string} log messages logged to console. | ||||
|  * @return {Promise} a promise of the response. | ||||
|  */ | ||||
| function sendMessage(tab, req, log, cond, interval) { | ||||
|     interval = interval || 500; | ||||
|     return new Promise((resolve, reject) => { | ||||
|  | ||||
|         loop(); | ||||
|  | ||||
|         async function loop() { | ||||
|             // console.log("request for", req.action); | ||||
|             let tabAvailable = await getTabByID(tab.id); | ||||
|             if (!tabAvailable) { | ||||
|                 reject("Task interrupted due to the target tab is closed."); | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             chrome.tabs.sendMessage(tab.id, req, r => { | ||||
|                 if (chrome.runtime.lastError) { | ||||
|                     reject(chrome.runtime.lastError.message); | ||||
|                     return; | ||||
|                 } | ||||
|                 let flag = !cond || cond(r); | ||||
|                 if (log) console.log(log, flag ? '(OK)' : '(failed)'); | ||||
|                 if (flag) { | ||||
|                     resolve(r); | ||||
|                 } else { | ||||
|                     setTimeout(() => { | ||||
|                         loop(); | ||||
|                     }, interval); | ||||
|                 } | ||||
|             }); | ||||
|         } | ||||
|     }); | ||||
| } | ||||
|  | ||||
| chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) { | ||||
|     if (!message.action || !message.action.startsWith(EXT_NAME)) { | ||||
|         return; | ||||
|     } | ||||
|     sendResponse("Calling from user pages is not allowed."); | ||||
|     return; | ||||
| }); | ||||
							
								
								
									
										34
									
								
								scripts/background/result.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								scripts/background/result.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,34 @@ | ||||
| class ExtractResult { | ||||
|     constructor(data) { | ||||
|         this._data = data || []; | ||||
|  | ||||
|     } | ||||
|     row(index) { | ||||
|         return this._data[index]; | ||||
|     } | ||||
|     column(index) { | ||||
|         return [...new Array(this._data.length).keys()].map( | ||||
|             i => this._data[i][index] | ||||
|         ); | ||||
|     } | ||||
|     squash() { | ||||
|         return this._data.reduce((p, c) => p.concat(c), []); | ||||
|     } | ||||
|     get data() { | ||||
|         return this._data; | ||||
|     } | ||||
|     toString(rowsCount) { | ||||
|         let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data; | ||||
|         return data.slice().reduce( | ||||
|             (csv, lineCells) => { | ||||
|                 let line = lineCells.reduce( | ||||
|                     (lineText, cell, idx) => { | ||||
|                         cell = '"' + cell.trim().replace(/"/g, '""') + '"'; | ||||
|                         return lineText + cell + (idx == lineCells.length - 1 ? "" : ",") | ||||
|                     }, ""); | ||||
|                 return csv + line + "\n"; | ||||
|             }, | ||||
|             "" | ||||
|         ); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										68
									
								
								scripts/background/signiture.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								scripts/background/signiture.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,68 @@ | ||||
| const signitures = ` | ||||
| ## Usage | ||||
| new Extractor().task(...args).task(...args).start(); | ||||
|  | ||||
| ## Extractor.task() Signitures: | ||||
| function(itemsSelector:string, fieldSelectors:string[]) | ||||
| function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | ||||
| function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]) | ||||
| function(itemsSelector:string, fieldSelectors:string[], urls:string[]) | ||||
|  | ||||
| ## Example: | ||||
| // extract all links text & url under '.item' elements | ||||
| // use 'selector@attr' to get attribute of the field elements | ||||
| new Extractor().task(".item", ["a", "a@href"]).start(); | ||||
|  | ||||
| ## See Detailed Help: | ||||
| https://git.jebbs.co/jebbs/data-extracter-extesion | ||||
| `.trim(); | ||||
|  | ||||
| function testArgs(...args) { | ||||
|     switch (args.length) { | ||||
|         case 0, 1: | ||||
|             return false; | ||||
|         case 2: | ||||
|             return args[0] && args[1] && | ||||
|                 (typeof args[0] == "string") && | ||||
|                 (args[1] instanceof Array) && | ||||
|                 testArrayVals(args[1], v => typeof v == "string"); | ||||
|         case 3: | ||||
|             return args[0] && args[1] && | ||||
|                 typeof args[0] == "string" && | ||||
|                 args[1] instanceof Array && | ||||
|                 testArrayVals(args[1], v => typeof v == "string") && | ||||
|                 ( | ||||
|                     ( | ||||
|                         args[2] instanceof Array && | ||||
|                         testArrayVals(args[2], v => typeof v == "string") | ||||
|                     ) || ( | ||||
|                         args[2] instanceof ExtractResult | ||||
|                     ) | ||||
|                 ); | ||||
|         case 4: | ||||
|             return args[0] && args[1] && | ||||
|                 typeof args[0] == "string" && | ||||
|                 args[1] instanceof Array && | ||||
|                 testArrayVals(args[1], v => typeof v == "string") && | ||||
|                 typeof args[2] == "string" && | ||||
|                 args[3] instanceof Array && | ||||
|                 testArrayVals(args[3], v => typeof v == "number"); | ||||
|         case 6: | ||||
|             return args[0] && args[1] && | ||||
|                 typeof args[0] == "string" && | ||||
|                 args[1] instanceof Array && | ||||
|                 testArrayVals(args[1], v => typeof v == "string") && | ||||
|                 typeof args[2] == "string" && | ||||
|                 !isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]); | ||||
|         default: | ||||
|             return false; | ||||
|     } | ||||
|  | ||||
|     function testArrayVals(arr, tester) { | ||||
|         return arr.reduce((p, c) => p && tester(c), true); | ||||
|     } | ||||
| } | ||||
|  | ||||
| function argsToString(...args) { | ||||
|     return args.map(v => (v instanceof Array ? `[${v.join(', ')}]` : v.toString())).join(', '); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user