169 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			169 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | |
|  * Extract data from current tab / multiple urls.
 | |
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | |
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | |
|  * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
 | |
|  */
 | |
| async function extract(itemsSelector, fieldSelectors, ...args) {
 | |
|     let data = await getData(itemsSelector, fieldSelectors, ...args);
 | |
|     data.unshift(fieldSelectors);
 | |
|     saveFileAsk(data);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Extract data from current tab / multiple urls.
 | |
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | |
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | |
|  * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
 | |
|  */
 | |
| async function getData(itemsSelector, fieldSelectors, ...args) {
 | |
|     let urls = [];
 | |
|     let arg = args.shift();
 | |
|     if (arg instanceof Array) {
 | |
|         urls = arg;
 | |
|     } else {
 | |
|         let urlTempl = arg;
 | |
|         if (urlTempl) {
 | |
|             if (args[0] instanceof Array) {
 | |
|                 urls = args[0].map(p => urlTempl.replace("${page}", p));
 | |
|             } else if (args.length >= 3) {
 | |
|                 let from = args.shift();
 | |
|                 let to = args.shift();
 | |
|                 let interval = args.shift();
 | |
|                 for (let i = from; i <= to; i += interval) {
 | |
|                     urls.push(urlTempl.replace("${page}", i));
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     let data = [];
 | |
|     return new Promise((resolve, reject) => {
 | |
|         chrome.tabs.query({
 | |
|             active: true,
 | |
|             currentWindow: false
 | |
|         }, function (tabs) {
 | |
|             let pms;
 | |
|             let tab = tabs[0];
 | |
|             if (urls.length) {
 | |
|                 pms = urls.reduce((p, url) => p.then(
 | |
|                     results => {
 | |
|                         data.push(...results);
 | |
|                         return redirectTab(tab, url).then(
 | |
|                             () => extractTabData(tab, itemsSelector, fieldSelectors)
 | |
|                         );
 | |
|                     },
 | |
|                     () => p
 | |
|                 ), Promise.resolve([]));
 | |
|             } else {
 | |
|                 pms = extractTabData(tab, itemsSelector, fieldSelectors);
 | |
|             }
 | |
|             pms.then(
 | |
|                 results => {
 | |
|                     data.push(...results);
 | |
|                     resolve(data);
 | |
|                 },
 | |
|                 err => reject(err)
 | |
|             );
 | |
|         });
 | |
|     });
 | |
| }
 | |
| 
 | |
| function redirectTab(tab, url) {
 | |
|     let curUrl = "";
 | |
|     return queryUrl(tab)
 | |
|         .then(u => {
 | |
|             if (url !== u) {
 | |
|                 curUrl = u;
 | |
|                 let req = {
 | |
|                     from: "DataExtracter:GotoUrl",
 | |
|                     url: url
 | |
|                 }
 | |
|                 chrome.tabs.sendMessage(tab.id, req);
 | |
|             }
 | |
|         })
 | |
|         .then(() => queryUrl(tab, curUrl))
 | |
|         .then(() => reportIn(tab));
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * extract data in from the target tab.
 | |
|  * @param {any} tab target tab
 | |
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | |
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | |
|  * @returns {Promise<string[]>} a promise of extracted data
 | |
|  */
 | |
| function extractTabData(tab, itemsSelector, fieldSelectors) {
 | |
|     let req = {
 | |
|         from: "DataExtracter:Extract",
 | |
|         itemsSelector: itemsSelector,
 | |
|         fieldSelectors: fieldSelectors
 | |
|     }
 | |
|     let failMsg = "extractTabData failed after 10 second.";
 | |
|     let cond = r => !!r;
 | |
|     return sendMessageAndDetect(tab, req, cond, failMsg);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * get report in from the target tab, usually used to detect if the content script is ready.
 | |
|  * @param {any} tab target tab
 | |
|  * @returns {Promise<string>} a promise of the report in message
 | |
|  */
 | |
| function reportIn(tab) {
 | |
|     let req = {
 | |
|         from: "DataExtracter:ReportIn"
 | |
|     }
 | |
|     let failMsg = "reportIn failed after 10 second.";
 | |
|     let cond = r => r == req.from;
 | |
|     return sendMessageAndDetect(tab, req, cond, failMsg);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * get the url of the target tab
 | |
|  * @param {any} tab target tab
 | |
|  * @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
 | |
|  * @returns {Promise<string>} a promise of the url
 | |
|  */
 | |
| function queryUrl(tab, urlExcluded) {
 | |
|     let req = {
 | |
|         from: "DataExtracter:QueryUrl"
 | |
|     }
 | |
|     let failMsg = "queryUrl failed after 10 second.";
 | |
|     let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
 | |
|     return sendMessageAndDetect(tab, req, cond, failMsg);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Repeatedly sending a message to target tab until the response is detected good.
 | |
|  * @param {object} tab the table where to send the message
 | |
|  * @param {object} req the request data.
 | |
|  * @param {function} cond success condition function, r:any=>boolean
 | |
|  * @param {string} failMsg message when failed after time out
 | |
|  * @param {number} failedTimeOut fail time out
 | |
|  * @param {number} detectInterval interval for detecting
 | |
|  * @return {Promise} a promise of the response.
 | |
|  */
 | |
| function sendMessageAndDetect(tab, req, cond, failMsg, failedTimeOut, detectInterval) {
 | |
|     failedTimeOut = failedTimeOut || 10000;
 | |
|     detectInterval = detectInterval || 500;
 | |
|     return new Promise((resolve, reject) => {
 | |
|         let timeOut;
 | |
|         let rejectTimeout = setTimeout(() => {
 | |
|             reject(failMsg);
 | |
|             clearTimeout(timeOut);
 | |
|         }, failedTimeOut);
 | |
|         loop();
 | |
| 
 | |
|         function loop() {
 | |
|             chrome.tabs.sendMessage(tab.id, req, r => {
 | |
|                 if (cond(r)) {
 | |
|                     resolve(r);
 | |
|                     clearTimeout(rejectTimeout);
 | |
|                 } else {
 | |
|                     timeOut = setTimeout(() => {
 | |
|                         loop();
 | |
|                     }, detectInterval);
 | |
|                 }
 | |
|             });
 | |
|         }
 | |
|     });
 | |
| } |