refactoring
This commit is contained in:
168
scripts/background/actions.js
Normal file
168
scripts/background/actions.js
Normal file
@ -0,0 +1,168 @@
|
||||
/**
|
||||
* Extract data from current page / multiple urls.
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[])
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||
* getData(itemsSelector:string, fieldSelectors:string[])
|
||||
* getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
* getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
||||
* getData(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
* getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||
* @param {...any} args
|
||||
*/
|
||||
async function getData(...args) {
|
||||
let tab;
|
||||
if (typeof args[0] !== 'string') tab = args.shift();
|
||||
if (!testArgs(...args))
|
||||
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
||||
itemsSelector = args.shift();
|
||||
fieldSelectors = args.shift();
|
||||
let urls = parseUrls(...args);
|
||||
let data = [];
|
||||
if (!tab) tab = await getActiveTab(true) || await getActiveTab(false);
|
||||
if (!tab) throw new Error("Cannot find active tab.");
|
||||
return new Promise((resolve, reject) => {
|
||||
let pms;
|
||||
if (urls.length) {
|
||||
pms = urls.reduce((p, url) => p.then(
|
||||
results => {
|
||||
if (results) data.push(...results);
|
||||
return redirectTab(tab, url).then(
|
||||
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
||||
);
|
||||
},
|
||||
() => p
|
||||
), Promise.resolve([]));
|
||||
} else {
|
||||
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
||||
}
|
||||
pms.then(
|
||||
results => {
|
||||
if (results) data.push(...results);
|
||||
data.unshift(fieldSelectors);
|
||||
resolve(new ExtractResult(data));
|
||||
},
|
||||
err => reject(err)
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function parseUrls(...args) {
|
||||
if (!args.length) return [];
|
||||
let arg = args.shift();
|
||||
if (arg instanceof Array) {
|
||||
return arg;
|
||||
} else if (arg instanceof ExtractResult) {
|
||||
return arg.squash().filter(v => !!v);
|
||||
} else {
|
||||
let urlTempl = arg;
|
||||
if (urlTempl) {
|
||||
if (args[0] instanceof Array) {
|
||||
return args[0].map(p => urlTempl.replace("${page}", p));
|
||||
} else if (args.length >= 3) {
|
||||
let urls = [];
|
||||
let from = args.shift();
|
||||
let to = args.shift();
|
||||
let interval = args.shift();
|
||||
for (let i = from; i <= to; i += interval) {
|
||||
urls.push(urlTempl.replace("${page}", i));
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function redirectTab(tab, url) {
|
||||
let curUrl = "";
|
||||
return queryUrl(tab, undefined, 'Query current url...')
|
||||
.then(u => {
|
||||
if (url !== u) {
|
||||
curUrl = u;
|
||||
let req = {
|
||||
action: ACTION_GOTO_URL,
|
||||
url: url
|
||||
}
|
||||
sendMessage(tab, req, `Goto url: ${url}`);
|
||||
}
|
||||
})
|
||||
.then(() => queryUrl(tab, curUrl, 'Check if tab url matches expected...'))
|
||||
}
|
||||
|
||||
/**
|
||||
* extract data in from the target tab.
|
||||
* @param {any} tab target tab
|
||||
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
||||
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
||||
* @returns {Promise<string[]>} a promise of extracted data
|
||||
*/
|
||||
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
||||
let req = {
|
||||
action: ACTION_EXTRACT,
|
||||
itemsSelector: itemsSelector,
|
||||
fieldSelectors: fieldSelectors
|
||||
}
|
||||
let cond = r => r !== undefined;
|
||||
return sendMessage(tab, req, 'Extract data from the tab...', cond);
|
||||
}
|
||||
|
||||
/**
|
||||
* get report in from the target tab, usually used to detect if the content script is ready.
|
||||
* @param {any} tab target tab
|
||||
* @returns {Promise<string>} a promise of the report in message
|
||||
*/
|
||||
function reportIn(tab) {
|
||||
let req = {
|
||||
action: ACTION_REPORT_IN
|
||||
}
|
||||
let cond = r => r == req.action;
|
||||
return sendMessage(tab, req, 'Check tab availability...', cond);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the url of the target tab
|
||||
* @param {any} tab target tab
|
||||
* @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
|
||||
* @returns {Promise<string>} a promise of the url
|
||||
*/
|
||||
function queryUrl(tab, urlExcluded, log) {
|
||||
let req = {
|
||||
action: ACTION_QUERY_URL
|
||||
}
|
||||
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
|
||||
return sendMessage(tab, req, log, cond);
|
||||
}
|
||||
|
||||
async function createTab(url, active) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.create({
|
||||
'url': url,
|
||||
'active': active
|
||||
}, function (tab) {
|
||||
resolve(tab);
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function getActiveTab(currentWindow) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.query({
|
||||
active: true,
|
||||
currentWindow: currentWindow
|
||||
}, function (tabs) {
|
||||
resolve(tabs[0]);
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function getTabByID(id) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.get(id, function (tab) {
|
||||
chrome.runtime.lastError;
|
||||
resolve(tab);
|
||||
})
|
||||
})
|
||||
}
|
||||
143
scripts/background/extractor.js
Normal file
143
scripts/background/extractor.js
Normal file
@ -0,0 +1,143 @@
|
||||
class Extractor {
|
||||
constructor() {
|
||||
this._tasks = [];
|
||||
this._tab = undefined;
|
||||
this._running = false;
|
||||
this._results = {};
|
||||
}
|
||||
/**
|
||||
* Add a task to Extractor. \n
|
||||
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||
* Later task will use previous task result as input (target url list).
|
||||
* So only the first task can have target url arguments, while later tasks can't.
|
||||
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
||||
*/
|
||||
task(...args) {
|
||||
if (!testArgs(...args)) {
|
||||
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
|
||||
// break call chain to avoid unexpected task running
|
||||
return this;
|
||||
}
|
||||
// given >2 arguments means the task specifies target page,
|
||||
// so it won't accept last task result as url list.
|
||||
// in this case, former tasks are useless, can be cleared.
|
||||
if (args.length > 2) this.clear();
|
||||
this._tasks.push(args);
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Clear tasks and caches.
|
||||
*/
|
||||
clear() {
|
||||
this._tasks = [];
|
||||
this._results = [];
|
||||
}
|
||||
/**
|
||||
* Start the task chain.
|
||||
*/
|
||||
async start() {
|
||||
if (this._running) {
|
||||
console.log('The Extractor is running. Please wait..');
|
||||
return;
|
||||
}
|
||||
if (!this._tasks.length) {
|
||||
console.log('No task to run.');
|
||||
return;
|
||||
}
|
||||
let firstTaskArgs = this._tasks[0];
|
||||
if (firstTaskArgs.length > 2) {
|
||||
// task specifies target urls, create new tab with first url for it
|
||||
let urls = parseUrls(...firstTaskArgs.slice(2, firstTaskArgs.length));
|
||||
this._tab = await createTab(urls[0], false);
|
||||
} else {
|
||||
this._tab = await getActiveTab(false);
|
||||
}
|
||||
this._running = true;
|
||||
return this._tasks.reduce((pms, args, i, tasks) => {
|
||||
return pms.then(
|
||||
result => {
|
||||
if (result === undefined) return getData(this._tab, ...args);
|
||||
this._results[tasks[i - 1]] = result;
|
||||
return getData(this._tab, ...args, result);
|
||||
});
|
||||
}, Promise.resolve(undefined)).then(
|
||||
result => {
|
||||
this._results[this._tasks[this._tasks.length - 1]] = result;
|
||||
this._running = false;
|
||||
this.save();
|
||||
}
|
||||
).catch(err => {
|
||||
this._running = false;
|
||||
console.log(err)
|
||||
});
|
||||
}
|
||||
/**
|
||||
* restart from specified task, but don't restart the previous tasks.
|
||||
* @param {number} taskid from which restart the tasks
|
||||
*/
|
||||
async restart(taskid) {
|
||||
if (this._running) {
|
||||
console.log('The Extractor is running. Please wait..');
|
||||
return;
|
||||
}
|
||||
taskid = this._checkTaskId(taskid, 1);
|
||||
if (!taskid) return;
|
||||
if (taskid == 1) {
|
||||
this.start();
|
||||
return;
|
||||
}
|
||||
let cache = this._results[this._tasks[taskid - 2]];
|
||||
if (!cache) {
|
||||
console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`);
|
||||
return;
|
||||
}
|
||||
this._running = true;
|
||||
this._tab = await createTab(parseUrls(cache)[0], false)
|
||||
return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => {
|
||||
return pms.then(
|
||||
result => {
|
||||
this._results[tasks[i - 1]] = result;
|
||||
return getData(this._tab, ...args, result);
|
||||
});
|
||||
}, Promise.resolve(cache)).then(
|
||||
result => {
|
||||
this._results[this._tasks[this._tasks.length - 1]] = result;
|
||||
this._running = false;
|
||||
this.save();
|
||||
}
|
||||
).catch(err => {
|
||||
this._running = false;
|
||||
console.log(err)
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Save result of a task
|
||||
* @param {number} taskid which task id to save.
|
||||
*/
|
||||
save(taskid) {
|
||||
taskid = this._checkTaskId(taskid, this._tasks.length);
|
||||
if (!taskid) return;
|
||||
const result = this._results[this._tasks[taskid - 1]];
|
||||
if (!result) {
|
||||
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
||||
return;
|
||||
}
|
||||
if (confirm(
|
||||
`Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
|
||||
)) {
|
||||
saveFile(result, "text/csv");
|
||||
}
|
||||
}
|
||||
_checkTaskId(id, defaultId) {
|
||||
if (!this._tasks.length) {
|
||||
console.log("No task found.");
|
||||
return 0;
|
||||
}
|
||||
if (defaultId && id === undefined || this.task === null) id = defaultId;
|
||||
if (isNaN(id) || id < 1 || id > this._tasks.length) {
|
||||
console.log(`Invalid task id. Rang(1-${this._tasks.length})`);
|
||||
return 0;
|
||||
}
|
||||
return id
|
||||
}
|
||||
}
|
||||
50
scripts/background/messaging.js
Normal file
50
scripts/background/messaging.js
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
/**
|
||||
* Repeatedly sending a message to target tab until the response is detected good.
|
||||
* @param {object} tab the table where to send the message
|
||||
* @param {object} req the request data.
|
||||
* @param {function} cond success condition function, r:any=>boolean
|
||||
* @param {number} interval interval for detecting
|
||||
* @param {string} log messages logged to console.
|
||||
* @return {Promise} a promise of the response.
|
||||
*/
|
||||
function sendMessage(tab, req, log, cond, interval) {
|
||||
interval = interval || 500;
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
loop();
|
||||
|
||||
async function loop() {
|
||||
// console.log("request for", req.action);
|
||||
let tabAvailable = await getTabByID(tab.id);
|
||||
if (!tabAvailable) {
|
||||
reject("Task interrupted due to the target tab is closed.");
|
||||
return;
|
||||
}
|
||||
|
||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(chrome.runtime.lastError.message);
|
||||
return;
|
||||
}
|
||||
let flag = !cond || cond(r);
|
||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||
if (flag) {
|
||||
resolve(r);
|
||||
} else {
|
||||
setTimeout(() => {
|
||||
loop();
|
||||
}, interval);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
||||
if (!message.action || !message.action.startsWith(EXT_NAME)) {
|
||||
return;
|
||||
}
|
||||
sendResponse("Calling from user pages is not allowed.");
|
||||
return;
|
||||
});
|
||||
34
scripts/background/result.js
Normal file
34
scripts/background/result.js
Normal file
@ -0,0 +1,34 @@
|
||||
class ExtractResult {
|
||||
constructor(data) {
|
||||
this._data = data || [];
|
||||
|
||||
}
|
||||
row(index) {
|
||||
return this._data[index];
|
||||
}
|
||||
column(index) {
|
||||
return [...new Array(this._data.length).keys()].map(
|
||||
i => this._data[i][index]
|
||||
);
|
||||
}
|
||||
squash() {
|
||||
return this._data.reduce((p, c) => p.concat(c), []);
|
||||
}
|
||||
get data() {
|
||||
return this._data;
|
||||
}
|
||||
toString(rowsCount) {
|
||||
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
||||
return data.slice().reduce(
|
||||
(csv, lineCells) => {
|
||||
let line = lineCells.reduce(
|
||||
(lineText, cell, idx) => {
|
||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
||||
}, "");
|
||||
return csv + line + "\n";
|
||||
},
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
68
scripts/background/signiture.js
Normal file
68
scripts/background/signiture.js
Normal file
@ -0,0 +1,68 @@
|
||||
const signitures = `
|
||||
## Usage
|
||||
new Extractor().task(...args).task(...args).start();
|
||||
|
||||
## Extractor.task() Signitures:
|
||||
function(itemsSelector:string, fieldSelectors:string[])
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])
|
||||
function(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
|
||||
## Example:
|
||||
// extract all links text & url under '.item' elements
|
||||
// use 'selector@attr' to get attribute of the field elements
|
||||
new Extractor().task(".item", ["a", "a@href"]).start();
|
||||
|
||||
## See Detailed Help:
|
||||
https://git.jebbs.co/jebbs/data-extracter-extesion
|
||||
`.trim();
|
||||
|
||||
function testArgs(...args) {
|
||||
switch (args.length) {
|
||||
case 0, 1:
|
||||
return false;
|
||||
case 2:
|
||||
return args[0] && args[1] &&
|
||||
(typeof args[0] == "string") &&
|
||||
(args[1] instanceof Array) &&
|
||||
testArrayVals(args[1], v => typeof v == "string");
|
||||
case 3:
|
||||
return args[0] && args[1] &&
|
||||
typeof args[0] == "string" &&
|
||||
args[1] instanceof Array &&
|
||||
testArrayVals(args[1], v => typeof v == "string") &&
|
||||
(
|
||||
(
|
||||
args[2] instanceof Array &&
|
||||
testArrayVals(args[2], v => typeof v == "string")
|
||||
) || (
|
||||
args[2] instanceof ExtractResult
|
||||
)
|
||||
);
|
||||
case 4:
|
||||
return args[0] && args[1] &&
|
||||
typeof args[0] == "string" &&
|
||||
args[1] instanceof Array &&
|
||||
testArrayVals(args[1], v => typeof v == "string") &&
|
||||
typeof args[2] == "string" &&
|
||||
args[3] instanceof Array &&
|
||||
testArrayVals(args[3], v => typeof v == "number");
|
||||
case 6:
|
||||
return args[0] && args[1] &&
|
||||
typeof args[0] == "string" &&
|
||||
args[1] instanceof Array &&
|
||||
testArrayVals(args[1], v => typeof v == "string") &&
|
||||
typeof args[2] == "string" &&
|
||||
!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
function testArrayVals(arr, tester) {
|
||||
return arr.reduce((p, c) => p && tester(c), true);
|
||||
}
|
||||
}
|
||||
|
||||
function argsToString(...args) {
|
||||
return args.map(v => (v instanceof Array ? `[${v.join(', ')}]` : v.toString())).join(', ');
|
||||
}
|
||||
Reference in New Issue
Block a user