168 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			168 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | |
|  * Extract data from current page / multiple urls.
 | |
|  * getData(tab, itemsSelector:string, fieldSelectors:string[])
 | |
|  * getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
 | |
|  * getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
 | |
|  * getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[])
 | |
|  * getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
 | |
|  * getData(itemsSelector:string, fieldSelectors:string[])
 | |
|  * getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
 | |
|  * getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
 | |
|  * getData(itemsSelector:string, fieldSelectors:string[], urls:string[])
 | |
|  * getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
 | |
|  * @param {...any} args
 | |
|  */
 | |
| async function getData(...args) {
 | |
|     let tab;
 | |
|     if (typeof args[0] !== 'string') tab = args.shift();
 | |
|     if (!testArgs(...args))
 | |
|         throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
 | |
|     itemsSelector = args.shift();
 | |
|     fieldSelectors = args.shift();
 | |
|     let urls = parseUrls(...args);
 | |
|     let data = [];
 | |
|     if (!tab) tab = await getActiveTab(true) || await getActiveTab(false);
 | |
|     if (!tab) throw new Error("Cannot find active tab.");
 | |
|     return new Promise((resolve, reject) => {
 | |
|         let pms;
 | |
|         if (urls.length) {
 | |
|             pms = urls.reduce((p, url) => p.then(
 | |
|                 results => {
 | |
|                     if (results) data.push(...results);
 | |
|                     return redirectTab(tab, url).then(
 | |
|                         () => extractTabData(tab, itemsSelector, fieldSelectors)
 | |
|                     );
 | |
|                 },
 | |
|                 () => p
 | |
|             ), Promise.resolve([]));
 | |
|         } else {
 | |
|             pms = extractTabData(tab, itemsSelector, fieldSelectors);
 | |
|         }
 | |
|         pms.then(
 | |
|             results => {
 | |
|                 if (results) data.push(...results);
 | |
|                 data.unshift(fieldSelectors);
 | |
|                 resolve(new ExtractResult(data));
 | |
|             },
 | |
|             err => reject(err)
 | |
|         );
 | |
|     });
 | |
| }
 | |
| 
 | |
| function parseUrls(...args) {
 | |
|     if (!args.length) return [];
 | |
|     let arg = args.shift();
 | |
|     if (arg instanceof Array) {
 | |
|         return arg;
 | |
|     } else if (arg instanceof ExtractResult) {
 | |
|         return arg.squash().filter(v => !!v);
 | |
|     } else {
 | |
|         let urlTempl = arg;
 | |
|         if (urlTempl) {
 | |
|             if (args[0] instanceof Array) {
 | |
|                 return args[0].map(p => urlTempl.replace("${page}", p));
 | |
|             } else if (args.length >= 3) {
 | |
|                 let urls = [];
 | |
|                 let from = args.shift();
 | |
|                 let to = args.shift();
 | |
|                 let interval = args.shift();
 | |
|                 for (let i = from; i <= to; i += interval) {
 | |
|                     urls.push(urlTempl.replace("${page}", i));
 | |
|                 }
 | |
|                 return urls;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     return [];
 | |
| }
 | |
| 
 | |
| function redirectTab(tab, url) {
 | |
|     let curUrl = "";
 | |
|     return queryUrl(tab, undefined, 'Query current url...')
 | |
|         .then(u => {
 | |
|             if (url !== u) {
 | |
|                 curUrl = u;
 | |
|                 let req = {
 | |
|                     action: ACTION_GOTO_URL,
 | |
|                     url: url
 | |
|                 }
 | |
|                 return sendMessage(tab, req, `Goto url: ${url}`);
 | |
|             }
 | |
|         })
 | |
|         .then(() => queryUrl(tab, url, 'Check if tab url matches expected...'))
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * extract data in from the target tab.
 | |
|  * @param {any} tab target tab
 | |
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | |
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | |
|  * @returns {Promise<string[]>} a promise of extracted data
 | |
|  */
 | |
| function extractTabData(tab, itemsSelector, fieldSelectors) {
 | |
|     let req = {
 | |
|         action: ACTION_EXTRACT,
 | |
|         itemsSelector: itemsSelector,
 | |
|         fieldSelectors: fieldSelectors
 | |
|     }
 | |
|     let cond = r => !MSG_ELEMENT_NOT_FOUND.isEqual(r);
 | |
|     return sendMessage(tab, req, 'Extract data from the tab...', cond);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * get report in from the target tab, usually used to detect if the content script is ready.
 | |
|  * @param {any} tab target tab
 | |
|  * @returns {Promise<string>} a promise of the report in message
 | |
|  */
 | |
| function reportIn(tab) {
 | |
|     let req = {
 | |
|         action: ACTION_REPORT_IN
 | |
|     }
 | |
|     let cond = r => r == req.action;
 | |
|     return sendMessage(tab, req, 'Check tab availability...', cond);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * get the url of the target tab
 | |
|  * @param {any} tab target tab
 | |
|  * @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
 | |
|  * @returns {Promise<string>} a promise of the url
 | |
|  */
 | |
| function queryUrl(tab, expected, log) {
 | |
|     let req = {
 | |
|         action: ACTION_QUERY_URL
 | |
|     }
 | |
|     let cond = url => url && (!expected || (expected && expected == url));
 | |
|     return sendMessage(tab, req, log, cond);
 | |
| }
 | |
| 
 | |
| async function createTab(url, active) {
 | |
|     return new Promise((resolve, reject) => {
 | |
|         chrome.tabs.create({
 | |
|             'url': url,
 | |
|             'active': active
 | |
|         }, function (tab) {
 | |
|             resolve(tab);
 | |
|         })
 | |
|     })
 | |
| }
 | |
| 
 | |
| async function getActiveTab(currentWindow) {
 | |
|     return new Promise((resolve, reject) => {
 | |
|         chrome.tabs.query({
 | |
|             active: true,
 | |
|             currentWindow: currentWindow
 | |
|         }, function (tabs) {
 | |
|             resolve(tabs[0]);
 | |
|         })
 | |
|     })
 | |
| }
 | |
| 
 | |
| async function getTabByID(id) {
 | |
|     return new Promise((resolve, reject) => {
 | |
|         chrome.tabs.get(id, function (tab) {
 | |
|             chrome.runtime.lastError;
 | |
|             resolve(tab);
 | |
|         })
 | |
|     })
 | |
| } |