init
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | |||||||
|  | .DS_Store | ||||||
							
								
								
									
										38
									
								
								manifest.json
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										38
									
								
								manifest.json
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,38 @@ | |||||||
|  | { | ||||||
|  |   "manifest_version": 2, | ||||||
|  |   "name": "Data Extracter", | ||||||
|  |   "version": "0.0.1", | ||||||
|  |   "author": "jebbs", | ||||||
|  |   "description": "Extract data as sheet from web page elements。", | ||||||
|  |   "icons": { | ||||||
|  |     "16": "icon.png", | ||||||
|  |     "48": "icon.png", | ||||||
|  |     "128": "icon.png" | ||||||
|  |   }, | ||||||
|  |   "browser_action": { | ||||||
|  |     "default_icon": "icon.png", | ||||||
|  |     "default_popup": "popup/tip.html", | ||||||
|  |     "default_title": "Data Extracter" | ||||||
|  |   }, | ||||||
|  |   "background": { | ||||||
|  |     "scripts": [ | ||||||
|  |       "scripts/tools.js", | ||||||
|  |       "scripts/extract.js", | ||||||
|  |       "scripts/background.js" | ||||||
|  |     ], | ||||||
|  |     "persistent": false | ||||||
|  |   }, | ||||||
|  |   "content_scripts": [{ | ||||||
|  |     "matches": ["*://*/*"], | ||||||
|  |     "js": [ | ||||||
|  |       "scripts/jquery.min.js", | ||||||
|  |       "scripts/tools.js", | ||||||
|  |       "scripts/content.js" | ||||||
|  |     ], | ||||||
|  |     "run_at": "document_idle" | ||||||
|  |   }], | ||||||
|  |   "permissions": [ | ||||||
|  |     "activeTab", | ||||||
|  |     "storage" | ||||||
|  |   ] | ||||||
|  | } | ||||||
							
								
								
									
										
											BIN
										
									
								
								popup/demo.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								popup/demo.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 35 KiB | 
							
								
								
									
										52
									
								
								popup/index.html
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										52
									
								
								popup/index.html
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,52 @@ | |||||||
|  | <!doctype html> | ||||||
|  | <html> | ||||||
|  | <link> | ||||||
|  | <meta charset="utf-8"> | ||||||
|  | <title>Data Extractor</title> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="../scripts/tools.js"></script> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="../scripts/extract.js"></script> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="popup.js"></script> | ||||||
|  |  | ||||||
|  | <link rel="stylesheet" href="../styles/bootstrap.min.css"> | ||||||
|  | </head> | ||||||
|  |  | ||||||
|  | <body style="margin: 20px 10px;"> | ||||||
|  |     <div class="container-fluid" style="min-width: 420px;"> | ||||||
|  |         <div class="row"> | ||||||
|  |             <div class="col"> | ||||||
|  |                 <h3>Data Extractor</h3> | ||||||
|  |                 <div class="alert alert-info small" role="alert"> | ||||||
|  |                     Selector fields require | ||||||
|  |                     <b>JQuery Selectors</b> to select data items and fields. | ||||||
|  |                 </div> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row"> | ||||||
|  |             <div class="col"> | ||||||
|  |                 <form> | ||||||
|  |                     <div class="form-group"> | ||||||
|  |                         <label for="formFileName">File Name</label> | ||||||
|  |                         <input type="text" class="form-control" id="formFileName" placeholder="fileName"> | ||||||
|  |                     </div> | ||||||
|  |                     <div class="form-group"> | ||||||
|  |                         <label for="formItemSelector">Items Selector</label> | ||||||
|  |                         <input type="text" class="form-control" id="formItemSelector" placeholder=".item-selector"> | ||||||
|  |                     </div> | ||||||
|  |                     <div class="form-group"> | ||||||
|  |                         <label for="formFieldSelector">Field Selectors (one selector per line)</label> | ||||||
|  |                         <textarea class="form-control" id="formFieldSelector" rows="3" placeholder=".field-selector"></textarea> | ||||||
|  |                     </div> | ||||||
|  |                 </form> | ||||||
|  |                 <input type="button" class="btn btn-primary" value="Extract" id="doExtract"> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row"> | ||||||
|  |             <div class="col"> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |     </div> | ||||||
|  | </body> | ||||||
|  |  | ||||||
|  | </html> | ||||||
							
								
								
									
										10
									
								
								popup/popup.js
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										10
									
								
								popup/popup.js
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,10 @@ | |||||||
|  | $().ready( | ||||||
|  |     () => $("#doExtract").on('click', doExtract) | ||||||
|  | ); | ||||||
|  |  | ||||||
|  | function doExtract() { | ||||||
|  |     let fname = $("#formFileName")[0].value; | ||||||
|  |     let itemSelector = $("#formItemSelector")[0].value; | ||||||
|  |     let fieldSelectors = $("#formFieldSelector")[0].value.split('\n').map(s => s.trim()); | ||||||
|  |     eval($("#formFieldSelector")[0].value); | ||||||
|  | } | ||||||
							
								
								
									
										46
									
								
								popup/tip.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								popup/tip.html
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | |||||||
|  | <!doctype html> | ||||||
|  | <html> | ||||||
|  | <link> | ||||||
|  | <meta charset="utf-8"> | ||||||
|  | <title>Data Extractor</title> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script> | ||||||
|  | <script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script> | ||||||
|  |  | ||||||
|  | <link rel="stylesheet" href="../styles/bootstrap.min.css"> | ||||||
|  | </head> | ||||||
|  |  | ||||||
|  | <body style="margin: 20px 10px;"> | ||||||
|  |     <div class="container-fluid" style="min-width: 420px;"> | ||||||
|  |         <div class="row"> | ||||||
|  |             <div class="col"> | ||||||
|  |                 <h3>Data Extractor</h3> | ||||||
|  |                 <div class="alert alert-info small" role="alert"> | ||||||
|  |                     <h6>Usage:</h6> | ||||||
|  |                     <p> | ||||||
|  |                         <b>Open console</b> and | ||||||
|  |                         <b>switch to Data Extracter</b>, then call the | ||||||
|  |                         <b>extract</b> function. | ||||||
|  |                     </p> | ||||||
|  |                     <p> | ||||||
|  |                         <img src="demo.png" alt="" style="max-width: 489px; width: 100%; border-radius: 5px"> | ||||||
|  |                     </p> | ||||||
|  |                     <p> | ||||||
|  |                         <b>Example</b>:<br> extract("table tr", ["td:nth-child(0)","td:nth-child(1)"]) | ||||||
|  |                     </p> | ||||||
|  |                 </div> | ||||||
|  |                 <div class="alert alert-info small" role="alert"> | ||||||
|  |                     <h6>Interface:</h6> | ||||||
|  |                     <p>function extract(itemsSelector:string, fieldSelectors:string[]) </p> | ||||||
|  |                     <p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)</p> | ||||||
|  |                     <p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])</p> | ||||||
|  |                 </div> | ||||||
|  |                 <!-- <div class="alert alert-danger small" role="alert"> | ||||||
|  |                     Selector means | ||||||
|  |                     <b>JQuery Selectors</b> which is used to select data items and fields. | ||||||
|  |                 </div> --> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |     </div> | ||||||
|  | </body> | ||||||
|  |  | ||||||
|  | </html> | ||||||
							
								
								
									
										6
									
								
								scripts/background.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								scripts/background.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | |||||||
|  | chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) { | ||||||
|  |     if (message.from === "doExtractRequest") | ||||||
|  |         extract(...message.args).catch( | ||||||
|  |             err => console.log(err) | ||||||
|  |         ); | ||||||
|  | }); | ||||||
							
								
								
									
										64
									
								
								scripts/content.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								scripts/content.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,64 @@ | |||||||
|  | chrome.runtime.onMessage.addListener( | ||||||
|  |     function (request, sender, sendResponse) { | ||||||
|  |         // console.log(request); | ||||||
|  |         if (request.from == "doExtractRequest") { | ||||||
|  |             // console.log(request); | ||||||
|  |             let data = extractData(request.itemsSelector, request.fieldSelectors); | ||||||
|  |             // console.log(data); | ||||||
|  |             sendResponse(data); | ||||||
|  |         } else if (request.from == "doExtractGotoUrl") { | ||||||
|  |             // console.log(request); | ||||||
|  |             window.location.replace(request.url); | ||||||
|  |         } else if (request.from == "doExtractReportIn") { | ||||||
|  |             // console.log("doExtractReportIn"); | ||||||
|  |             sendResponse(request.from); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | ); | ||||||
|  |  | ||||||
|  | function extractData(itemsSelector, fieldSelectors) { | ||||||
|  |     return $(itemsSelector).toArray().map( | ||||||
|  |         item => fieldSelectors.map( | ||||||
|  |             cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n') | ||||||
|  |         ) | ||||||
|  |     ); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function extract(...args) { | ||||||
|  |     let sig = `Invalid call args. | ||||||
|  | function extract(itemsSelector:string, fieldSelectors:string[]) | ||||||
|  | function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | ||||||
|  | function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`; | ||||||
|  |     if (!testArgs(...args)) { | ||||||
|  |         console.log(sig); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     if (args.length == 2) { | ||||||
|  |         saveFileAsk(extractData(args[0], args[1])); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     let message = { | ||||||
|  |         from: "doExtractRequest", | ||||||
|  |         args: args | ||||||
|  |     } | ||||||
|  |     chrome.runtime.sendMessage(message, r => { | ||||||
|  |         if (r) { | ||||||
|  |             console.log(r); | ||||||
|  |             alert(r); | ||||||
|  |         } | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function testArgs(...args) { | ||||||
|  |  | ||||||
|  |     if (args.length < 2) return false; | ||||||
|  |  | ||||||
|  |     if (args.length == 2) | ||||||
|  |         return (args[0] && args[1] && (typeof args[0] == "string") && (args[1] instanceof Array)) | ||||||
|  |  | ||||||
|  |     let urls = []; | ||||||
|  |     if (args.length > 2) return (typeof args[2] == "string") && ( | ||||||
|  |         (args[3] instanceof Array) || | ||||||
|  |         (!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5])) | ||||||
|  |     ) | ||||||
|  | } | ||||||
							
								
								
									
										121
									
								
								scripts/extract.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								scripts/extract.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,121 @@ | |||||||
|  | // function extract(itemsSelector, fieldSelectors, url, from, to, interval) | ||||||
|  | // function extract(itemsSelector, fieldSelectors, url, pages) | ||||||
|  | function extract(itemsSelector, fieldSelectors, url, ...args) { | ||||||
|  |     let urls = []; | ||||||
|  |     if (url) { | ||||||
|  |         if (args[0] instanceof Array) { | ||||||
|  |             urls = args[0].map(p => url.replace("${page}", p)); | ||||||
|  |         } else if (args.length >= 3) { | ||||||
|  |             let from = args.shift(); | ||||||
|  |             let to = args.shift(); | ||||||
|  |             let interval = args.shift(); | ||||||
|  |             for (let i = from; i <= to; i += interval) { | ||||||
|  |                 urls.push(url.replace("${page}", i)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     let data = []; | ||||||
|  |     return new Promise((resolve, reject) => { | ||||||
|  |         chrome.tabs.query({ | ||||||
|  |             active: true, | ||||||
|  |             currentWindow: true | ||||||
|  |         }, function (tabs) { | ||||||
|  |             let pms; | ||||||
|  |             let tab = tabs[0]; | ||||||
|  |             if (urls.length) { | ||||||
|  |                 pms = urls.reduce((p, url) => p.then( | ||||||
|  |                     results => { | ||||||
|  |                         data.push(...results); | ||||||
|  |                         return redirectTab(tab, url).then( | ||||||
|  |                             tab => extractTabData(tab, itemsSelector, fieldSelectors) | ||||||
|  |                         ); | ||||||
|  |                     }, | ||||||
|  |                     () => p | ||||||
|  |                 ), Promise.resolve([])); | ||||||
|  |             } else { | ||||||
|  |                 pms = extractTabData(tab, itemsSelector, fieldSelectors); | ||||||
|  |             } | ||||||
|  |             pms.then( | ||||||
|  |                 results => { | ||||||
|  |                     data.push(...results); | ||||||
|  |                     data.unshift(fieldSelectors); | ||||||
|  |                     saveFileAsk(data); | ||||||
|  |                     resolve("save done.") | ||||||
|  |                 }, | ||||||
|  |                 err => reject(err) | ||||||
|  |             ); | ||||||
|  |         }); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function extractTabData(tab, itemsSelector, fieldSelectors) { | ||||||
|  |     let done = false; | ||||||
|  |     return new Promise((resolve, reject) => { | ||||||
|  |         chrome.tabs.sendMessage( | ||||||
|  |             tab.id, { | ||||||
|  |                 from: "doExtractRequest", | ||||||
|  |                 itemsSelector: itemsSelector, | ||||||
|  |                 fieldSelectors: fieldSelectors | ||||||
|  |             }, | ||||||
|  |             response => { | ||||||
|  |                 done = true; | ||||||
|  |                 resolve(response); | ||||||
|  |             } | ||||||
|  |         ); | ||||||
|  |         setTimeout(() => { | ||||||
|  |             if (!done) reject(`extractTabData failed after 2 second.`); | ||||||
|  |         }, 2000); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function redirectTab(tab, url) { | ||||||
|  |     let done = false; | ||||||
|  |     let timeOut; | ||||||
|  |     return new Promise((resolve, reject) => { | ||||||
|  |         let req = { | ||||||
|  |             from: "doExtractGotoUrl", | ||||||
|  |             url: url | ||||||
|  |         } | ||||||
|  |         chrome.tabs.sendMessage(tab.id, req, r => { | ||||||
|  |             // if (r != req.from) return; | ||||||
|  |             // detectLoop(); | ||||||
|  |             setTimeout(() => { | ||||||
|  |                 detectLoop(); | ||||||
|  |             }, 500); | ||||||
|  |         }); | ||||||
|  |         setTimeout(() => { | ||||||
|  |             if (!done) { | ||||||
|  |                 reject(`redirectTab failed after 10 second.`); | ||||||
|  |                 clearTimeout(timeOut); | ||||||
|  |             } | ||||||
|  |         }, 10000); | ||||||
|  |  | ||||||
|  |         function detectLoop() { | ||||||
|  |             let req = { | ||||||
|  |                 from: "doExtractReportIn" | ||||||
|  |             } | ||||||
|  |             chrome.tabs.sendMessage(tab.id, req, r => { | ||||||
|  |                 if (r != req.from) return; | ||||||
|  |                 resolve(tab) | ||||||
|  |                 done = true; | ||||||
|  |             }); | ||||||
|  |             if (!done) { | ||||||
|  |                 clearTimeout(timeOut); | ||||||
|  |                 timeOut = setTimeout(() => { | ||||||
|  |                     detectLoop(); | ||||||
|  |                 }, 100); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function createTab(url) { | ||||||
|  |     return new Promise((resolve, reject) => { | ||||||
|  |         chrome.tabs.create({ | ||||||
|  |             active: true, | ||||||
|  |             url: url | ||||||
|  |         }, function (tab) { | ||||||
|  |             resolve(tab); | ||||||
|  |         }); | ||||||
|  |     }); | ||||||
|  | } | ||||||
							
								
								
									
										4
									
								
								scripts/jquery.min.js
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										4
									
								
								scripts/jquery.min.js
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										53
									
								
								scripts/tools.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								scripts/tools.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | |||||||
|  | function formatCSV(data) { | ||||||
|  |     return data.reduce( | ||||||
|  |         (csv, lineCells) => { | ||||||
|  |             let line = lineCells.reduce( | ||||||
|  |                 (lineText, cell, idx) => { | ||||||
|  |                     cell = '"' + cell.trim().replace(/"/g, '""') + '"'; | ||||||
|  |                     return lineText + cell + (idx == lineCells.length - 1 ? "" : ",") | ||||||
|  |                 }, ""); | ||||||
|  |             return csv + line + "\n"; | ||||||
|  |         }, | ||||||
|  |         "" | ||||||
|  |     ); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function saveFile(data, mimeType, fileName) { | ||||||
|  |     fileName = fileName || document.title || "result"; | ||||||
|  |     var blob; | ||||||
|  |     if (typeof window.Blob == "function") { | ||||||
|  |         blob = new Blob([data], { | ||||||
|  |             type: mimeType | ||||||
|  |         }) | ||||||
|  |     } else { | ||||||
|  |         var BlobBuiler = window.BlobBuilder || window.MozBlobBuilder || window.WebKitBlobBuilder || window.MSBlobBuilder; | ||||||
|  |         var builer = new BlobBuiler(); | ||||||
|  |         builer.append(data); | ||||||
|  |         blob = builer.getBlob(mimeType) | ||||||
|  |     } | ||||||
|  |     var URL = window.URL || window.webkitURL; | ||||||
|  |     var url = URL.createObjectURL(blob); | ||||||
|  |     var link = document.createElement("a"); | ||||||
|  |     if ('download' in link) { | ||||||
|  |         link.style.visibility = "hidden"; | ||||||
|  |         link.href = url; | ||||||
|  |         link.download = fileName; | ||||||
|  |         document.body.appendChild(link); | ||||||
|  |         var j = document.createEvent("MouseEvents"); | ||||||
|  |         j.initEvent("click", true, true); | ||||||
|  |         link.dispatchEvent(j); | ||||||
|  |         document.body.removeChild(link) | ||||||
|  |     } else if (navigator.msSaveBlob) { | ||||||
|  |         navigator.msSaveBlob(blob, fileName) | ||||||
|  |     } else { | ||||||
|  |         location.href = url | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | function saveFileAsk(data) { | ||||||
|  |     let csv = formatCSV(data.slice(1, 50)).trim() || "- Empty -"; | ||||||
|  |     if (confirm(`Click confirm to download if the sample data looks good (${data.length-1} items):\n\n${csv}`)) { | ||||||
|  |         csv = formatCSV(data); | ||||||
|  |         saveFile(csv, "text/csv"); | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										7
									
								
								styles/bootstrap.min.css
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								styles/bootstrap.min.css
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										7
									
								
								styles/bootstrap.min.js
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								styles/bootstrap.min.js
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Reference in New Issue
	
	Block a user