189 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			189 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | ||
|  * Extract data from current tab / multiple urls.
 | ||
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | ||
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | ||
|  * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
 | ||
|  */
 | ||
| async function extract(itemsSelector, fieldSelectors, ...args) {
 | ||
|     let result = await getData(itemsSelector, fieldSelectors, ...args);
 | ||
|     if (confirm(
 | ||
|             `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
 | ||
|         )) {
 | ||
|         saveFile(result, "text/csv");
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * Extract data from current tab / multiple urls.
 | ||
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | ||
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | ||
|  * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
 | ||
|  */
 | ||
| async function getData(itemsSelector, fieldSelectors, ...args) {
 | ||
|     if (!testArgs(itemsSelector, fieldSelectors, ...args))
 | ||
|         throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
 | ||
|     let urls = [];
 | ||
|     if (args.length) {
 | ||
|         let arg = args.shift();
 | ||
|         if (arg instanceof Array) {
 | ||
|             urls = arg;
 | ||
|         } else if (arg instanceof ExractResult) {
 | ||
|             urls = arg.squash().filter(v => !!v);
 | ||
|         } else {
 | ||
|             let urlTempl = arg;
 | ||
|             if (urlTempl) {
 | ||
|                 if (args[0] instanceof Array) {
 | ||
|                     urls = args[0].map(p => urlTempl.replace("${page}", p));
 | ||
|                 } else if (args.length >= 3) {
 | ||
|                     let from = args.shift();
 | ||
|                     let to = args.shift();
 | ||
|                     let interval = args.shift();
 | ||
|                     for (let i = from; i <= to; i += interval) {
 | ||
|                         urls.push(urlTempl.replace("${page}", i));
 | ||
|                     }
 | ||
|                 }
 | ||
|             }
 | ||
|         }
 | ||
|     }
 | ||
|     let data = [];
 | ||
|     let tab = await getActiveTab(true) || await getActiveTab(false);
 | ||
|     if (!tab) throw new Error("Cannot find active tab.");
 | ||
|     return new Promise((resolve, reject) => {
 | ||
|         let pms;
 | ||
|         if (urls.length) {
 | ||
|             pms = urls.reduce((p, url) => p.then(
 | ||
|                 results => {
 | ||
|                     data.push(...results);
 | ||
|                     return redirectTab(tab, url).then(
 | ||
|                         () => extractTabData(tab, itemsSelector, fieldSelectors)
 | ||
|                     );
 | ||
|                 },
 | ||
|                 () => p
 | ||
|             ), Promise.resolve([]));
 | ||
|         } else {
 | ||
|             pms = extractTabData(tab, itemsSelector, fieldSelectors);
 | ||
|         }
 | ||
|         pms.then(
 | ||
|             results => {
 | ||
|                 data.push(...results);
 | ||
|                 resolve(new ExractResult(data));
 | ||
|             },
 | ||
|             err => reject(err)
 | ||
|         );
 | ||
|     });
 | ||
| }
 | ||
| 
 | ||
| function redirectTab(tab, url) {
 | ||
|     let curUrl = "";
 | ||
|     return queryUrl(tab)
 | ||
|         .then(u => {
 | ||
|             if (url !== u) {
 | ||
|                 curUrl = u;
 | ||
|                 let req = {
 | ||
|                     from: "GotoUrl",
 | ||
|                     url: url
 | ||
|                 }
 | ||
|                 sendMessage(tab, req);
 | ||
|             }
 | ||
|         })
 | ||
|         .then(() => queryUrl(tab, curUrl))
 | ||
|         .then(() => reportIn(tab));
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * extract data in from the target tab.
 | ||
|  * @param {any} tab target tab
 | ||
|  * @param {string} itemsSelector items selectors for selecting items (data rows)
 | ||
|  * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
 | ||
|  * @returns {Promise<string[]>} a promise of extracted data
 | ||
|  */
 | ||
| function extractTabData(tab, itemsSelector, fieldSelectors) {
 | ||
|     let req = {
 | ||
|         from: "Extract",
 | ||
|         itemsSelector: itemsSelector,
 | ||
|         fieldSelectors: fieldSelectors
 | ||
|     }
 | ||
|     let cond = r => r && r.length;
 | ||
|     return sendMessage(tab, req, cond);
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * get report in from the target tab, usually used to detect if the content script is ready.
 | ||
|  * @param {any} tab target tab
 | ||
|  * @returns {Promise<string>} a promise of the report in message
 | ||
|  */
 | ||
| function reportIn(tab) {
 | ||
|     let req = {
 | ||
|         from: "ReportIn"
 | ||
|     }
 | ||
|     let cond = r => r == req.from;
 | ||
|     return sendMessage(tab, req, cond);
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * get the url of the target tab
 | ||
|  * @param {any} tab target tab
 | ||
|  * @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
 | ||
|  * @returns {Promise<string>} a promise of the url
 | ||
|  */
 | ||
| function queryUrl(tab, urlExcluded) {
 | ||
|     let req = {
 | ||
|         from: "QueryUrl"
 | ||
|     }
 | ||
|     let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
 | ||
|     return sendMessage(tab, req, cond);
 | ||
| }
 | ||
| 
 | ||
| /**
 | ||
|  * Repeatedly sending a message to target tab until the response is detected good.
 | ||
|  * @param {object} tab the table where to send the message
 | ||
|  * @param {object} req the request data.
 | ||
|  * @param {function} cond success condition function, r:any=>boolean
 | ||
|  * @param {number} interval interval for detecting
 | ||
|  * @return {Promise} a promise of the response.
 | ||
|  */
 | ||
| function sendMessage(tab, req, cond, interval) {
 | ||
|     req.from = "DataExtracter:" + req.from;
 | ||
|     interval = interval || 500;
 | ||
|     return new Promise((resolve, reject) => {
 | ||
| 
 | ||
|         loop();
 | ||
| 
 | ||
|         async function loop() {
 | ||
|             console.log("request for", req.from);
 | ||
|             let tabAvailable = await getTabByID(tab.id);
 | ||
|             if (!tabAvailable) {
 | ||
|                 throw new Error("Task interupted due to the target tab is closed.");
 | ||
|             }
 | ||
| 
 | ||
|             chrome.tabs.sendMessage(tab.id, req, r => {
 | ||
|                 if (!cond || cond(r)) {
 | ||
|                     resolve(r);
 | ||
|                 } else {
 | ||
|                     setTimeout(() => {
 | ||
|                         loop();
 | ||
|                     }, interval);
 | ||
|                 }
 | ||
|             });
 | ||
|         }
 | ||
|     });
 | ||
| }
 | ||
| 
 | ||
| async function getActiveTab(currentWindow) {
 | ||
|     return new Promise((resolve, reject) => {
 | ||
|         chrome.tabs.query({
 | ||
|             active: true,
 | ||
|             currentWindow: currentWindow
 | ||
|         }, function (tabs) {
 | ||
|             resolve(tabs[0]);
 | ||
|         })
 | ||
|     })
 | ||
| }
 | ||
| 
 | ||
| async function getTabByID(id) {
 | ||
|     return new Promise((resolve, reject) => {
 | ||
|         chrome.tabs.get(id, function (tab) {
 | ||
|             resolve(tab);
 | ||
|         })
 | ||
|     })
 | ||
| } |