Compare commits
5 Commits
c504942144
...
f1cf32b83a
| Author | SHA1 | Date | |
|---|---|---|---|
| f1cf32b83a | |||
| 341abebc66 | |||
| 0cf04c3f79 | |||
| 6134289d0a | |||
| 0e62d914c1 |
@ -22,6 +22,7 @@
|
|||||||
"scripts/background/result.js",
|
"scripts/background/result.js",
|
||||||
"scripts/background/signiture.js",
|
"scripts/background/signiture.js",
|
||||||
"scripts/background/actions.js",
|
"scripts/background/actions.js",
|
||||||
|
"scripts/background/task.js",
|
||||||
"scripts/background/extractor.js",
|
"scripts/background/extractor.js",
|
||||||
"scripts/background/helpers.js"
|
"scripts/background/helpers.js"
|
||||||
],
|
],
|
||||||
|
|||||||
73
readme.md
73
readme.md
@ -50,17 +50,14 @@ function (itemsSelector:string, fieldSelectors:string[], urls:string[])
|
|||||||
function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced Usage
|
## Stop Tasks
|
||||||
|
|
||||||
### Stop Tasks
|
The only way to stop tasks before its finish, is `Closing the target tab`.
|
||||||
|
|
||||||
Tasks wait for their target elements' appearance, given some elements were loaded asynchronously.
|
> Tasks wait for their target elements' appearance, given some elements were loaded asynchronously.
|
||||||
|
> If you typed wrong selectors, the task waits forever for elements which don't exists.
|
||||||
|
|
||||||
But if you typed wrong selectors, the task waits forever for elements which don't exists.
|
## Extract Attributes.
|
||||||
|
|
||||||
The only way to stop tasks before its finish, is `Closing the host tab`.
|
|
||||||
|
|
||||||
### Extract Attributes.
|
|
||||||
|
|
||||||
e.g.: link text and target (use 'selector@attribute')
|
e.g.: link text and target (use 'selector@attribute')
|
||||||
|
|
||||||
@ -68,20 +65,43 @@ e.g.: link text and target (use 'selector@attribute')
|
|||||||
new Extractor().task('.item', ['a', 'a@href']).start();
|
new Extractor().task('.item', ['a', 'a@href']).start();
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Advanced Usage
|
||||||
|
|
||||||
### Use Task Chain.
|
### Use Task Chain.
|
||||||
|
|
||||||
e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each link
|
e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each link
|
||||||
|
|
||||||
```js
|
```js
|
||||||
new Extractor()
|
e = new Extractor()
|
||||||
.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
.task('list-item', ["a.title", "p.content"])
|
.task('list-item', ["a.title", "p.content"])
|
||||||
.start();
|
.start();
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Continue Tasks
|
||||||
|
|
||||||
|
You can always continue tasks (with following), even it stops in the middle of a task:
|
||||||
|
|
||||||
|
```js
|
||||||
|
e.start()
|
||||||
|
```
|
||||||
|
|
||||||
|
The `Extractor` kept the state of last execution, and starts from where it stopped.
|
||||||
|
|
||||||
|
### Restart Tasks
|
||||||
|
|
||||||
|
What should I do, if I don't like to continue from last state, but restart from certain task?
|
||||||
|
|
||||||
|
```js
|
||||||
|
// restart all tasks
|
||||||
|
e.restart(0)
|
||||||
|
// restart from 2nd task
|
||||||
|
e.restart(1)
|
||||||
|
```
|
||||||
|
|
||||||
### Save Result of Any Task
|
### Save Result of Any Task
|
||||||
|
|
||||||
To a multiple task (chain) Extractor `e`:
|
To a multiple task Extractor `e`:
|
||||||
|
|
||||||
```js
|
```js
|
||||||
e = new Extractor()
|
e = new Extractor()
|
||||||
@ -98,37 +118,12 @@ Incase you want to save it again, use:
|
|||||||
e.save()
|
e.save()
|
||||||
```
|
```
|
||||||
|
|
||||||
You may want to save another task's result, other than the final:
|
To save another task result, other than the final one:
|
||||||
|
|
||||||
```js
|
```js
|
||||||
// save the result of first task
|
// save the result of first task
|
||||||
// to the example above, that is a list of urls
|
// to the example above, that is a list of urls
|
||||||
|
e.save(0)
|
||||||
|
// save the result of second task
|
||||||
e.save(1)
|
e.save(1)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Restart Tasks
|
|
||||||
|
|
||||||
In cases some later task fails, you don't need to restart all task.
|
|
||||||
|
|
||||||
Here we have 2 tasks:
|
|
||||||
|
|
||||||
```js
|
|
||||||
e = new Extractor()
|
|
||||||
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
|
||||||
.task('list-item', ["a.title", "p.content"])
|
|
||||||
.start();
|
|
||||||
```
|
|
||||||
|
|
||||||
Suppose the second task fails, we can restart and continue from the task 2:
|
|
||||||
|
|
||||||
```js
|
|
||||||
e.restart(2);
|
|
||||||
```
|
|
||||||
|
|
||||||
If you'd like restart all task, use:
|
|
||||||
|
|
||||||
```js
|
|
||||||
e.start();
|
|
||||||
// or
|
|
||||||
e.restart();
|
|
||||||
```
|
|
||||||
@ -55,7 +55,7 @@ function parseUrls(...args) {
|
|||||||
if (arg instanceof Array) {
|
if (arg instanceof Array) {
|
||||||
return arg;
|
return arg;
|
||||||
} else if (arg instanceof ExtractResult) {
|
} else if (arg instanceof ExtractResult) {
|
||||||
return arg.squash().filter(v => !!v);
|
return arg.squash().filter(v => URL_REG.test(v));
|
||||||
} else {
|
} else {
|
||||||
let urlTempl = arg;
|
let urlTempl = arg;
|
||||||
if (urlTempl) {
|
if (urlTempl) {
|
||||||
@ -86,10 +86,10 @@ function redirectTab(tab, url) {
|
|||||||
action: ACTION_GOTO_URL,
|
action: ACTION_GOTO_URL,
|
||||||
url: url
|
url: url
|
||||||
}
|
}
|
||||||
sendMessage(tab, req, `Goto url: ${url}`);
|
return sendMessage(tab, req, `Goto url: ${url}`);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.then(() => queryUrl(tab, curUrl, 'Check if tab url matches expected...'))
|
.then(() => queryUrl(tab, url, 'Check if tab url matches expected...'))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -110,32 +110,46 @@ function extractTabData(tab, itemsSelector, fieldSelectors) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get report in from the target tab, usually used to detect if the content script is ready.
|
* ping target tab, usually used to detect if the content script is ready.
|
||||||
* @param {any} tab target tab
|
* @param {any} tab target tab
|
||||||
* @returns {Promise<string>} a promise of the report in message
|
* @returns {Promise<boolean>} a promise of boolean value indicates if ping success
|
||||||
*/
|
*/
|
||||||
function reportIn(tab) {
|
async function ping(tab, count = 1) {
|
||||||
let req = {
|
let req = {
|
||||||
action: ACTION_REPORT_IN
|
action: ACTION_REPORT_IN
|
||||||
}
|
}
|
||||||
let cond = r => r == req.action;
|
let cond = r => r == req.action;
|
||||||
return sendMessage(tab, req, 'Check tab availability...', cond);
|
let pong = await sendMessage(tab, req, 'Check tab availability...', cond, 1000, count).catch(() => { });
|
||||||
|
return pong == ACTION_REPORT_IN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the url of the target tab
|
* get the url of the target tab
|
||||||
* @param {any} tab target tab
|
* @param {any} tab target tab
|
||||||
* @param {string} urlExcluded if specified, queryUrl resolves only when response not equals to urlExcluded
|
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
||||||
* @returns {Promise<string>} a promise of the url
|
* @returns {Promise<string>} a promise of the url
|
||||||
*/
|
*/
|
||||||
function queryUrl(tab, urlExcluded, log) {
|
function queryUrl(tab, expected, log) {
|
||||||
let req = {
|
let req = {
|
||||||
action: ACTION_QUERY_URL
|
action: ACTION_QUERY_URL
|
||||||
}
|
}
|
||||||
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
|
let cond = url => url && (!expected || (expected && expected == url));
|
||||||
return sendMessage(tab, req, log, cond);
|
return sendMessage(tab, req, log, cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the url of the target tab
|
||||||
|
* @param {any} tab target tab
|
||||||
|
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
||||||
|
* @returns {Promise<string>} a promise of the url
|
||||||
|
*/
|
||||||
|
function scrollToBottom(tab) {
|
||||||
|
let req = {
|
||||||
|
action: ACTION_SCROLL_BOTTOM
|
||||||
|
}
|
||||||
|
return sendMessage(tab, req, 'Scroll to page bottom...');
|
||||||
|
}
|
||||||
|
|
||||||
async function createTab(url, active) {
|
async function createTab(url, active) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.tabs.create({
|
chrome.tabs.create({
|
||||||
|
|||||||
@ -1,41 +1,45 @@
|
|||||||
class Extractor {
|
class Extractor {
|
||||||
constructor() {
|
constructor(options) {
|
||||||
this._tasks = [];
|
this._tasks = [];
|
||||||
this._tab = undefined;
|
|
||||||
this._running = false;
|
this._running = false;
|
||||||
this._results = {};
|
this._options = options;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Add a task to Extractor. \n
|
* Add a task to Extractor. \n
|
||||||
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||||
* Later task will use previous task result as input (target url list).
|
* If url arguments not given within later tasks, they will use previous task result as input (target url list).
|
||||||
* So only the first task can have target url arguments, while later tasks can't.
|
|
||||||
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
||||||
*/
|
*/
|
||||||
task(...args) {
|
task(...args) {
|
||||||
if (!testArgs(...args)) {
|
this._tasks.push(new Task(this._options, ...args));
|
||||||
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
// given >2 arguments means the task specifies target page,
|
|
||||||
// so it won't accept last task result as url list.
|
|
||||||
// in this case, former tasks are useless, can be cleared.
|
|
||||||
if (args.length > 2) this.clear();
|
|
||||||
this._tasks.push(args);
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Clear tasks and caches.
|
* Clear tasks and task caches.
|
||||||
*/
|
*/
|
||||||
clear() {
|
clear() {
|
||||||
this._tasks = [];
|
this._tasks = [];
|
||||||
this._results = [];
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Start the task chain.
|
* Start the task chain.
|
||||||
*/
|
*/
|
||||||
async start() {
|
async start() {
|
||||||
|
return this._startTasks(0);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* restart from specified task, but don't restart the previous tasks.
|
||||||
|
* @param {number} from where to restart the tasks, begins with 0
|
||||||
|
*/
|
||||||
|
async restart(from = 0) {
|
||||||
|
let id = this._checkTaskId(from, 0);
|
||||||
|
if (id < 0) return;
|
||||||
|
for (let i = id; i < this._tasks.length; i++) {
|
||||||
|
this._tasks[i].clean();
|
||||||
|
}
|
||||||
|
return this._startTasks(0);
|
||||||
|
}
|
||||||
|
async _startTasks(from) {
|
||||||
if (this._running) {
|
if (this._running) {
|
||||||
console.log('The Extractor is running. Please wait..');
|
console.log('The Extractor is running. Please wait..');
|
||||||
return;
|
return;
|
||||||
@ -44,107 +48,75 @@ class Extractor {
|
|||||||
console.log('No task to run.');
|
console.log('No task to run.');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let firstTaskArgs = this._tasks[0];
|
|
||||||
if (firstTaskArgs.length > 2) {
|
let tab;
|
||||||
|
let task = this._tasks[0];
|
||||||
|
if (task.urls.length) {
|
||||||
// task specifies target urls, create new tab with first url for it
|
// task specifies target urls, create new tab with first url for it
|
||||||
let urls = parseUrls(...firstTaskArgs.slice(2, firstTaskArgs.length));
|
tab = await createTab(task.urls[0], false);
|
||||||
this._tab = await createTab(urls[0], false);
|
|
||||||
} else {
|
} else {
|
||||||
this._tab = await getActiveTab(false);
|
tab = await getActiveTab(true) || await getActiveTab(false);
|
||||||
|
let succ = await ping(tab);
|
||||||
|
if (!succ) {
|
||||||
|
console.log('Cannot contact with active tab.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this._running = true;
|
this._running = true;
|
||||||
return this._tasks.reduce((pms, args, i, tasks) => {
|
return this._tasks.reduce((pms, task, i) => {
|
||||||
return pms.then(
|
return pms.then(
|
||||||
result => {
|
() => {
|
||||||
if (result === undefined) return getData(this._tab, ...args);
|
if (i < from) return;
|
||||||
this._results[tasks[i - 1]] = result;
|
if (i > 0) {
|
||||||
return getData(this._tab, ...args, result);
|
let prevTask = this._tasks[i - 1];
|
||||||
|
return task.execute(tab, new ExtractResult(prevTask.results));
|
||||||
|
}
|
||||||
|
return task.execute(tab, undefined);
|
||||||
});
|
});
|
||||||
}, Promise.resolve(undefined)).then(
|
}, Promise.resolve(undefined)).then(
|
||||||
result => {
|
() => {
|
||||||
this._results[this._tasks[this._tasks.length - 1]] = result;
|
|
||||||
this._running = false;
|
|
||||||
console.log("Tasks are all done.")
|
|
||||||
this.save();
|
|
||||||
}
|
|
||||||
).catch(err => {
|
|
||||||
this._running = false;
|
|
||||||
console.log(err)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* restart from specified task, but don't restart the previous tasks.
|
|
||||||
* @param {number} taskid from which restart the tasks
|
|
||||||
*/
|
|
||||||
async restart(taskid) {
|
|
||||||
if (this._running) {
|
|
||||||
console.log('The Extractor is running. Please wait..');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
taskid = this._checkTaskId(taskid, 1);
|
|
||||||
if (!taskid) return;
|
|
||||||
if (taskid == 1) {
|
|
||||||
this.start();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let cache = this._results[this._tasks[taskid - 2]];
|
|
||||||
if (!cache) {
|
|
||||||
console.log(`No result cache for task (id ${taskid}). \nMake sure call ".start()" before ".restart()"?`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this._running = true;
|
|
||||||
this._tab = await createTab(parseUrls(cache)[0], false)
|
|
||||||
return this._tasks.slice(taskid - 1).reduce((pms, args, i, tasks) => {
|
|
||||||
return pms.then(
|
|
||||||
result => {
|
|
||||||
this._results[tasks[i - 1]] = result;
|
|
||||||
return getData(this._tab, ...args, result);
|
|
||||||
});
|
|
||||||
}, Promise.resolve(cache)).then(
|
|
||||||
result => {
|
|
||||||
this._results[this._tasks[this._tasks.length - 1]] = result;
|
|
||||||
this._running = false;
|
this._running = false;
|
||||||
this.save();
|
this.save();
|
||||||
}
|
}
|
||||||
).catch(err => {
|
).catch(err => {
|
||||||
this._running = false;
|
this._running = false;
|
||||||
console.log(err)
|
console.log(err);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Save result of a task
|
* Save result of a task
|
||||||
* @param {number} taskid which task id to save.
|
* @param {number} taskid which task id to save, begins with 0
|
||||||
*/
|
*/
|
||||||
save(taskid) {
|
save(taskid) {
|
||||||
taskid = this._checkTaskId(taskid, this._tasks.length);
|
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
||||||
if (!taskid) return;
|
if (id < 0) return;
|
||||||
const result = this._results[this._tasks[taskid - 1]];
|
let results = this._tasks[id].results
|
||||||
if (!result) {
|
results.unshift(this._tasks[id].fieldSelectors);
|
||||||
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
|
||||||
return;
|
let exResults = new ExtractResult(results);
|
||||||
}
|
|
||||||
if (result.data.length <= 1) { // 1 for selector headers
|
if (!results.length) {
|
||||||
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
console.log(`No result for task #${id}. Forget to call ".start()"?`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let msg = `
|
let msg = `
|
||||||
Please confirm to download (${result.data.length - 1} items):
|
Please confirm to download (${results.length - 1} items):
|
||||||
|
|
||||||
${result.toString(50) || "- Empty -"}
|
${exResults.toString(50) || "- Empty -"}
|
||||||
`.trim();
|
`.trim();
|
||||||
if (confirm(msg)) {
|
if (confirm(msg)) {
|
||||||
saveFile(result, "text/csv");
|
saveFile(exResults, "text/csv");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_checkTaskId(id, defaultId) {
|
_checkTaskId(id, defaultId) {
|
||||||
if (!this._tasks.length) {
|
if (!this._tasks.length) {
|
||||||
console.log("No task found.");
|
console.log("No task found.");
|
||||||
return 0;
|
return -1;
|
||||||
}
|
}
|
||||||
if (defaultId && id === undefined || this.task === null) id = defaultId;
|
if (!isNaN(defaultId) && id === undefined) id = defaultId;
|
||||||
if (isNaN(id) || id < 1 || id > this._tasks.length) {
|
if (isNaN(id) || id < 0 || id >= this._tasks.length) {
|
||||||
console.log(`Invalid task id. Rang(1-${this._tasks.length})`);
|
console.log(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
||||||
return 0;
|
return -1;
|
||||||
}
|
}
|
||||||
return id
|
return id
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,12 +4,15 @@
|
|||||||
* @param {object} tab the table where to send the message
|
* @param {object} tab the table where to send the message
|
||||||
* @param {object} req the request data.
|
* @param {object} req the request data.
|
||||||
* @param {function} cond success condition function, r:any=>boolean
|
* @param {function} cond success condition function, r:any=>boolean
|
||||||
* @param {number} interval interval for detecting
|
* @param {number} interval retry interval, default: 500ms.
|
||||||
|
* @param {number} limit retry limit, default: 0, no limit.
|
||||||
* @param {string} log messages logged to console.
|
* @param {string} log messages logged to console.
|
||||||
* @return {Promise} a promise of the response.
|
* @return {Promise} a promise of the response.
|
||||||
*/
|
*/
|
||||||
function sendMessage(tab, req, log, cond, interval) {
|
function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
||||||
interval = interval || 500;
|
interval = interval || 500;
|
||||||
|
limit = limit && !isNaN(limit) ? limit : 0;
|
||||||
|
count = 0;
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
|
|
||||||
loop();
|
loop();
|
||||||
@ -22,11 +25,17 @@ function sendMessage(tab, req, log, cond, interval) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
if (limit && count >= limit) {
|
||||||
if (chrome.runtime.lastError) {
|
reject(`sendMessage loop limit ${limit} reached.`);
|
||||||
reject(chrome.runtime.lastError.message);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
count++;
|
||||||
|
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||||
|
// check error but do nothing.
|
||||||
|
// do not interrupt promise chains even if error, or the task always fail when:
|
||||||
|
// a tab is newly created, and the content scripts won't have time to initialize
|
||||||
|
chrome.runtime.lastError;
|
||||||
|
|
||||||
let flag = !cond || cond(r);
|
let flag = !cond || cond(r);
|
||||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||||
if (flag) {
|
if (flag) {
|
||||||
|
|||||||
78
scripts/background/task.js
Normal file
78
scripts/background/task.js
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
class Task {
|
||||||
|
_data = {};
|
||||||
|
_data_keys = [];
|
||||||
|
/**
|
||||||
|
* Create a task.
|
||||||
|
* constructor(itemsSelector:string, fieldSelectors:string[])
|
||||||
|
* constructor(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||||
|
* constructor(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])
|
||||||
|
* constructor(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||||
|
* @param {...any} args
|
||||||
|
*/
|
||||||
|
constructor(options, ...args) {
|
||||||
|
if (!testArgs(...args))
|
||||||
|
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
||||||
|
this._options = options;
|
||||||
|
this._itemsSelector = args.shift();
|
||||||
|
this._fieldSelectors = args.shift();
|
||||||
|
this._urls = parseUrls(...args);
|
||||||
|
}
|
||||||
|
get urls() {
|
||||||
|
return this._urls;
|
||||||
|
}
|
||||||
|
get data() {
|
||||||
|
return this._data;
|
||||||
|
}
|
||||||
|
get results() {
|
||||||
|
return this._data_keys.reduce((p, c) => {
|
||||||
|
return p.concat(this._data[c]);
|
||||||
|
}, []);
|
||||||
|
}
|
||||||
|
get fieldSelectors() {
|
||||||
|
return this._fieldSelectors;
|
||||||
|
}
|
||||||
|
clean() {
|
||||||
|
this._data = {};
|
||||||
|
}
|
||||||
|
async execute(tab, upstreamData) {
|
||||||
|
if (!tab) return Promise.reject("No tab to execute the task.");
|
||||||
|
let urls = this._urls
|
||||||
|
if (!urls.length) {
|
||||||
|
if (upstreamData) {
|
||||||
|
urls = parseUrls(upstreamData);
|
||||||
|
} else {
|
||||||
|
urls = [await queryUrl(tab)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let saveResult = (results, key) => {
|
||||||
|
this._data[key] = results;
|
||||||
|
this._data_keys.push(key);
|
||||||
|
}
|
||||||
|
return urls.reduce((p, url, i) => p.then(
|
||||||
|
results => {
|
||||||
|
if (i > 0) {
|
||||||
|
if (!MSG_URL_SKIPPED.isEqual(results)) {
|
||||||
|
let lastURL = urls[i - 1];
|
||||||
|
saveResult(results, lastURL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (this._data[url]) return MSG_URL_SKIPPED;
|
||||||
|
let pms = redirectTab(tab, url);
|
||||||
|
if (this._options["scrollToBottom"]) {
|
||||||
|
pms = pms.then(() => scrollToBottom(tab));
|
||||||
|
}
|
||||||
|
return pms.then(
|
||||||
|
() => extractTabData(tab, this._itemsSelector, this._fieldSelectors)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
), Promise.resolve(null)).then(
|
||||||
|
results => {
|
||||||
|
if (!MSG_URL_SKIPPED.isEqual(results)) {
|
||||||
|
let lastURL = urls[urls.length - 1];
|
||||||
|
saveResult(results, lastURL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,29 +1,40 @@
|
|||||||
chrome.runtime.onMessage.addListener(
|
(function () {
|
||||||
|
chrome.runtime.onMessage.addListener(
|
||||||
function (request, sender, sendResponse) {
|
function (request, sender, sendResponse) {
|
||||||
if (!request.action) return;
|
if (!request.action) return;
|
||||||
// console.log("Recieved request:",request);
|
// console.log("Recieved request:",request);
|
||||||
|
doAction(request, sender).then(r => sendResponse && sendResponse(r));
|
||||||
|
// return true to indicate you wish to send a response asynchronously
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
async function doAction(request, sender) {
|
||||||
switch (request.action) {
|
switch (request.action) {
|
||||||
case ACTION_EXTRACT:
|
case ACTION_EXTRACT:
|
||||||
let data = extract(request.itemsSelector, request.fieldSelectors);
|
let data = extract(request.itemsSelector, request.fieldSelectors);
|
||||||
if (sendResponse) sendResponse(data);
|
return data;
|
||||||
break;
|
|
||||||
case ACTION_GOTO_URL:
|
case ACTION_GOTO_URL:
|
||||||
window.location.replace(request.url);
|
window.location.replace(request.url);
|
||||||
if (sendResponse) sendResponse(request.url);
|
return request.url;
|
||||||
break;
|
|
||||||
case ACTION_REPORT_IN:
|
case ACTION_REPORT_IN:
|
||||||
if (sendResponse) sendResponse(request.action);
|
return request.action;
|
||||||
break;
|
|
||||||
case ACTION_QUERY_URL:
|
case ACTION_QUERY_URL:
|
||||||
if (sendResponse) sendResponse(window.location.href);
|
return window.location.href;
|
||||||
break;
|
case ACTION_SCROLL_BOTTOM:
|
||||||
|
return executeUntil(
|
||||||
|
() => window.scrollTo(0, document.body.clientHeight),
|
||||||
|
() => document.body.clientHeight - window.scrollY - window.innerHeight < 20,
|
||||||
|
"Scroll to page bottom...",
|
||||||
|
1000,
|
||||||
|
10
|
||||||
|
)
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
|
||||||
|
|
||||||
function extract(itemsSelector, fieldSelectors) {
|
function extract(itemsSelector, fieldSelectors) {
|
||||||
// since some elements may be loaded asynchronously.
|
// since some elements may be loaded asynchronously.
|
||||||
// if one field is never found, we should return undefined,
|
// if one field is never found, we should return undefined,
|
||||||
// so that senders can detect to retry until elements loaded.
|
// so that senders can detect to retry until elements loaded.
|
||||||
@ -50,4 +61,41 @@ function extract(itemsSelector, fieldSelectors) {
|
|||||||
// if it exists a field, which is not found in any row, the sender should retry.
|
// if it exists a field, which is not found in any row, the sender should retry.
|
||||||
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
||||||
return shouldWait ? MSG_ELEMENT_NOT_FOUND : results
|
return shouldWait ? MSG_ELEMENT_NOT_FOUND : results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Repeatedly execute an function until the the detector returns true.
|
||||||
|
* @param {object} fn the function to execute
|
||||||
|
* @param {object} detector the detector.
|
||||||
|
* @param {string} log messages logged to console.
|
||||||
|
* @param {number} interval interval for detecting
|
||||||
|
* @param {number} limit max execute times of a function
|
||||||
|
* @return {Promise} a promise of the response.
|
||||||
|
*/
|
||||||
|
function executeUntil(fn, detector, log, interval, limit) {
|
||||||
|
interval = interval || 500;
|
||||||
|
let count = 0;
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
|
||||||
|
loop();
|
||||||
|
|
||||||
|
async function loop() {
|
||||||
|
fn();
|
||||||
|
limit++;
|
||||||
|
if (limit && count >= limit) {
|
||||||
|
reject(false);
|
||||||
|
}
|
||||||
|
setTimeout(() => {
|
||||||
|
let flag = !detector || detector();
|
||||||
|
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||||
|
if (flag) {
|
||||||
|
resolve(true);
|
||||||
|
} else {
|
||||||
|
loop();
|
||||||
|
}
|
||||||
|
}, interval);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
})();
|
||||||
|
|||||||
@ -1,8 +1,12 @@
|
|||||||
const EXT_NAME = "DataExtracter";
|
const EXT_NAME = "DataExtracter";
|
||||||
|
|
||||||
|
const URL_REG = getWebUrl();
|
||||||
|
|
||||||
const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
||||||
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
||||||
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
||||||
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
||||||
|
const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
||||||
|
|
||||||
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
||||||
|
const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL");
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user