Compare commits
6 Commits
f1cf32b83a
...
3d375261df
| Author | SHA1 | Date | |
|---|---|---|---|
| 3d375261df | |||
| 13e233fbe7 | |||
| 21d3dfb247 | |||
| 97c8aac58d | |||
| 09112bb506 | |||
| c7f4fe7cc4 |
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"manifest_version": 2,
|
"manifest_version": 2,
|
||||||
"name": "Data Extracter",
|
"name": "Data Extracter",
|
||||||
"version": "0.1.0",
|
"version": "0.5.0",
|
||||||
"author": "jebbs",
|
"author": "jebbs",
|
||||||
"description": "Extract data from web page elements as sheet.",
|
"description": "Extract data from web page elements as sheet.",
|
||||||
"icons": {
|
"icons": {
|
||||||
@ -18,6 +18,7 @@
|
|||||||
"scripts": [
|
"scripts": [
|
||||||
"scripts/shared/tools.js",
|
"scripts/shared/tools.js",
|
||||||
"scripts/shared/common.js",
|
"scripts/shared/common.js",
|
||||||
|
"scripts/background/logger.js",
|
||||||
"scripts/background/messaging.js",
|
"scripts/background/messaging.js",
|
||||||
"scripts/background/result.js",
|
"scripts/background/result.js",
|
||||||
"scripts/background/signiture.js",
|
"scripts/background/signiture.js",
|
||||||
@ -38,6 +39,7 @@
|
|||||||
"run_at": "document_idle"
|
"run_at": "document_idle"
|
||||||
}],
|
}],
|
||||||
"permissions": [
|
"permissions": [
|
||||||
"activeTab"
|
"activeTab",
|
||||||
|
"notifications"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -3,6 +3,7 @@
|
|||||||
<link>
|
<link>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>Data Extractor</title>
|
<title>Data Extractor</title>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../scripts/shared/common.js"></script>
|
||||||
<script charset="UTF-8" type="text/javascript" src="tip.js"></script>
|
<script charset="UTF-8" type="text/javascript" src="tip.js"></script>
|
||||||
|
|
||||||
<link rel="stylesheet" href="styles/bootstrap.min.css">
|
<link rel="stylesheet" href="styles/bootstrap.min.css">
|
||||||
@ -32,7 +33,6 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
|
|
||||||
<div class="col">
|
<div class="col">
|
||||||
<h6>Quick Start</h6>
|
<h6>Quick Start</h6>
|
||||||
</div>
|
</div>
|
||||||
@ -42,22 +42,33 @@
|
|||||||
<div class="alert alert-success small">
|
<div class="alert alert-success small">
|
||||||
<p>
|
<p>
|
||||||
<b>Extract current page</b>:
|
<b>Extract current page</b>:
|
||||||
<br>new Extractor().task(".list-item", ["a.title", "p.content"]).start();
|
<br>> $(".list-item", ["a.title", "p.content"]);
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<b>Extract multiple pages (1-10, interval 1)</b>:
|
<b>Extract multiple pages (1-10, interval 1)</b>:
|
||||||
<br>new Extractor().task(".list-item", ["a.title", "p.content"],
|
<br>> job=new Extractor().task(".list-item", ["a.title", "p.content"],
|
||||||
"http://sample.com/?pn=${page}", 1, 10, 1).start();
|
"http://sample.com/?pn=${page}", 1, 10, 1);
|
||||||
|
<br>> job.start();
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<b>Full document:</b>
|
<b>Full document at:</b>
|
||||||
<br>
|
<br>
|
||||||
<a href="#" id="link-document">https://git.jebbs.co/jebbs/data-extracter-extesion</a>
|
<a href="#" id="link-document">https://git.jebbs.co/jebbs/data-extracter-extesion</a>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<h6>Saved State</h6>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<input type="file" name="state" id="state-input">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
|
|||||||
18
popup/tip.js
18
popup/tip.js
@ -11,4 +11,22 @@ window.onload = function () {
|
|||||||
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
|
document.querySelector('#state-input')
|
||||||
|
.addEventListener('change', function (...args) {
|
||||||
|
if (this.files.length == 1) {
|
||||||
|
var reader = new FileReader();
|
||||||
|
let fileName = this.files[0].name;
|
||||||
|
reader.readAsText(this.files[0], "UTF-8");
|
||||||
|
reader.onload = function (evt) {
|
||||||
|
var fileString = evt.target.result;
|
||||||
|
chrome.runtime.sendMessage({
|
||||||
|
action: ACTION_UPLOAD_STATE,
|
||||||
|
state: fileString,
|
||||||
|
name: fileName
|
||||||
|
}, r => {
|
||||||
|
if (r) console.log('State sent:', r);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
70
readme.md
70
readme.md
@ -78,8 +78,54 @@ e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
|||||||
.start();
|
.start();
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Extractor Options
|
||||||
|
|
||||||
|
Specify extra options, to make task do some actions before scrape the data.
|
||||||
|
|
||||||
|
```js
|
||||||
|
var job = new Extractor({ "scrollToBottom": 1 });
|
||||||
|
```
|
||||||
|
|
||||||
|
Available options:
|
||||||
|
|
||||||
|
- `scrollToBottom`: Try scroll pages to the bottom, some elements are loaded only we user need them.
|
||||||
|
|
||||||
|
|
||||||
|
### Export Result of Any Task
|
||||||
|
|
||||||
|
To a multiple task Extractor `e`:
|
||||||
|
|
||||||
|
```js
|
||||||
|
e = new Extractor()
|
||||||
|
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
|
.task('list-item', ["a.title", "p.content"])
|
||||||
|
.start();
|
||||||
|
```
|
||||||
|
|
||||||
|
User will be asked to export the final result when it finishes.
|
||||||
|
|
||||||
|
Incase you want to export it again, use:
|
||||||
|
|
||||||
|
```js
|
||||||
|
e.export()
|
||||||
|
```
|
||||||
|
|
||||||
|
To export another task result, other than the final one:
|
||||||
|
|
||||||
|
```js
|
||||||
|
// export the result of first task
|
||||||
|
// to the example above, that is a list of urls
|
||||||
|
e.export(0)
|
||||||
|
// export the result of second task
|
||||||
|
e.export(1)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Task Management
|
||||||
|
|
||||||
### Continue Tasks
|
### Continue Tasks
|
||||||
|
|
||||||
|
Sometimes, it's hard to finish them in an single execution, that why we need "Continuing of Tasks".
|
||||||
|
|
||||||
You can always continue tasks (with following), even it stops in the middle of a task:
|
You can always continue tasks (with following), even it stops in the middle of a task:
|
||||||
|
|
||||||
```js
|
```js
|
||||||
@ -99,9 +145,11 @@ e.restart(0)
|
|||||||
e.restart(1)
|
e.restart(1)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Save Result of Any Task
|
### Save & Load State
|
||||||
|
|
||||||
To a multiple task Extractor `e`:
|
It may also be hard to finish tasks in even a single day, we need a way to save current state, and come back tommorow.
|
||||||
|
|
||||||
|
Create and run an extractor:
|
||||||
|
|
||||||
```js
|
```js
|
||||||
e = new Extractor()
|
e = new Extractor()
|
||||||
@ -110,20 +158,16 @@ e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
|||||||
.start();
|
.start();
|
||||||
```
|
```
|
||||||
|
|
||||||
User will be asked to save the final result when it finishes.
|
Save the state:
|
||||||
|
|
||||||
Incase you want to save it again, use:
|
|
||||||
|
|
||||||
```js
|
```js
|
||||||
e.save()
|
e.save();
|
||||||
```
|
```
|
||||||
|
|
||||||
To save another task result, other than the final one:
|
Load the state:
|
||||||
|
|
||||||
|
Open the popup window, upload the saved state file. Then, and in the backgoud console:
|
||||||
|
|
||||||
```js
|
```js
|
||||||
// save the result of first task
|
e = new Extractor().load();
|
||||||
// to the example above, that is a list of urls
|
```
|
||||||
e.save(0)
|
|
||||||
// save the result of second task
|
|
||||||
e.save(1)
|
|
||||||
```
|
|
||||||
@ -1,54 +1,3 @@
|
|||||||
/**
|
|
||||||
* Extract data from current page / multiple urls.
|
|
||||||
* getData(tab, itemsSelector:string, fieldSelectors:string[])
|
|
||||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
|
||||||
* getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
|
||||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[])
|
|
||||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
|
||||||
* getData(itemsSelector:string, fieldSelectors:string[])
|
|
||||||
* getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
|
||||||
* getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
|
||||||
* getData(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
|
||||||
* getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
|
||||||
* @param {...any} args
|
|
||||||
*/
|
|
||||||
async function getData(...args) {
|
|
||||||
let tab;
|
|
||||||
if (typeof args[0] !== 'string') tab = args.shift();
|
|
||||||
if (!testArgs(...args))
|
|
||||||
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
|
||||||
itemsSelector = args.shift();
|
|
||||||
fieldSelectors = args.shift();
|
|
||||||
let urls = parseUrls(...args);
|
|
||||||
let data = [];
|
|
||||||
if (!tab) tab = await getActiveTab(true) || await getActiveTab(false);
|
|
||||||
if (!tab) throw new Error("Cannot find active tab.");
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
let pms;
|
|
||||||
if (urls.length) {
|
|
||||||
pms = urls.reduce((p, url) => p.then(
|
|
||||||
results => {
|
|
||||||
if (results) data.push(...results);
|
|
||||||
return redirectTab(tab, url).then(
|
|
||||||
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
|
||||||
);
|
|
||||||
},
|
|
||||||
() => p
|
|
||||||
), Promise.resolve([]));
|
|
||||||
} else {
|
|
||||||
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
|
||||||
}
|
|
||||||
pms.then(
|
|
||||||
results => {
|
|
||||||
if (results) data.push(...results);
|
|
||||||
data.unshift(fieldSelectors);
|
|
||||||
resolve(new ExtractResult(data));
|
|
||||||
},
|
|
||||||
err => reject(err)
|
|
||||||
);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseUrls(...args) {
|
function parseUrls(...args) {
|
||||||
if (!args.length) return [];
|
if (!args.length) return [];
|
||||||
let arg = args.shift();
|
let arg = args.shift();
|
||||||
@ -77,19 +26,26 @@ function parseUrls(...args) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function redirectTab(tab, url) {
|
function redirectTab(tab, url) {
|
||||||
let curUrl = "";
|
return queryUrl(tab).then(u => {
|
||||||
return queryUrl(tab, undefined, 'Query current url...')
|
if (url !== u) {
|
||||||
.then(u => {
|
let req = {
|
||||||
if (url !== u) {
|
action: ACTION_GOTO_URL,
|
||||||
curUrl = u;
|
url: url
|
||||||
let req = {
|
|
||||||
action: ACTION_GOTO_URL,
|
|
||||||
url: url
|
|
||||||
}
|
|
||||||
return sendMessage(tab, req, `Goto url: ${url}`);
|
|
||||||
}
|
}
|
||||||
})
|
let checker = async (url, err, tryCount) => {
|
||||||
.then(() => queryUrl(tab, url, 'Check if tab url matches expected...'))
|
let newURL = await queryUrl(tab).catch(() => { });
|
||||||
|
if (newURL == url) return url;
|
||||||
|
if (
|
||||||
|
tryCount % 5 == 0 &&
|
||||||
|
!confirm('Cannot navigate to target url. \nPress OK to continue, Cancel to stop.')
|
||||||
|
) {
|
||||||
|
return MSG_USER_ABORT;
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return sendMessage(tab, req, `Goto url: ${url}`, checker);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -105,8 +61,19 @@ function extractTabData(tab, itemsSelector, fieldSelectors) {
|
|||||||
itemsSelector: itemsSelector,
|
itemsSelector: itemsSelector,
|
||||||
fieldSelectors: fieldSelectors
|
fieldSelectors: fieldSelectors
|
||||||
}
|
}
|
||||||
let cond = r => !MSG_ELEMENT_NOT_FOUND.isEqual(r);
|
let checker = (result, err, tryCount) => {
|
||||||
return sendMessage(tab, req, 'Extract data from the tab...', cond);
|
if (MSG_ELEMENT_NOT_FOUND.isEqual(result)) {
|
||||||
|
if (tryCount % 20 == 0) {
|
||||||
|
if (confirm('No data found in current page. \n\nContinue to next page?')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
return sendMessage(tab, req, 'Extract data from the tab...', checker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -118,23 +85,21 @@ async function ping(tab, count = 1) {
|
|||||||
let req = {
|
let req = {
|
||||||
action: ACTION_REPORT_IN
|
action: ACTION_REPORT_IN
|
||||||
}
|
}
|
||||||
let cond = r => r == req.action;
|
let checker = r => r == req.action ? req.action : undefined;
|
||||||
let pong = await sendMessage(tab, req, 'Check tab availability...', cond, 1000, count).catch(() => { });
|
let pong = await sendMessage(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { });
|
||||||
return pong == ACTION_REPORT_IN;
|
return pong == ACTION_REPORT_IN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the url of the target tab
|
* get the url of the target tab
|
||||||
* @param {any} tab target tab
|
* @param {any} tab target tab
|
||||||
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
|
||||||
* @returns {Promise<string>} a promise of the url
|
* @returns {Promise<string>} a promise of the url
|
||||||
*/
|
*/
|
||||||
function queryUrl(tab, expected, log) {
|
function queryUrl(tab) {
|
||||||
let req = {
|
let req = {
|
||||||
action: ACTION_QUERY_URL
|
action: ACTION_QUERY_URL
|
||||||
}
|
}
|
||||||
let cond = url => url && (!expected || (expected && expected == url));
|
return sendMessage(tab, req);
|
||||||
return sendMessage(tab, req, log, cond);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -1,9 +1,31 @@
|
|||||||
|
var __EXTRACTOR_STATE__ = "";
|
||||||
|
|
||||||
class Extractor {
|
class Extractor {
|
||||||
constructor(options) {
|
constructor(options) {
|
||||||
this._tasks = [];
|
this._tasks = [];
|
||||||
this._running = false;
|
this._running = false;
|
||||||
this._options = options;
|
this._options = options;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Save current state, in case we restore it later.
|
||||||
|
*/
|
||||||
|
save() {
|
||||||
|
saveFile(JSON.stringify(this), 'application/json', 'state.json');
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Restore previous state by loading from saved state.
|
||||||
|
*/
|
||||||
|
load() {
|
||||||
|
if (!__EXTRACTOR_STATE__) {
|
||||||
|
logger.info('No state found. Please upload a saved state from the popup window first.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let state = JSON.parse(__EXTRACTOR_STATE__);
|
||||||
|
__EXTRACTOR_STATE__ = "";
|
||||||
|
this._options = state._options;
|
||||||
|
this._tasks = state._tasks.map(t => new Task(this._options, 'whaterver', ['whaterver']).load(t));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Add a task to Extractor. \n
|
* Add a task to Extractor. \n
|
||||||
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||||
@ -41,11 +63,11 @@ class Extractor {
|
|||||||
}
|
}
|
||||||
async _startTasks(from) {
|
async _startTasks(from) {
|
||||||
if (this._running) {
|
if (this._running) {
|
||||||
console.log('The Extractor is running. Please wait..');
|
logger.info('The Extractor is running. Please wait..');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!this._tasks.length) {
|
if (!this._tasks.length) {
|
||||||
console.log('No task to run.');
|
logger.info('No task to run.');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,7 +80,7 @@ class Extractor {
|
|||||||
tab = await getActiveTab(true) || await getActiveTab(false);
|
tab = await getActiveTab(true) || await getActiveTab(false);
|
||||||
let succ = await ping(tab);
|
let succ = await ping(tab);
|
||||||
if (!succ) {
|
if (!succ) {
|
||||||
console.log('Cannot contact with active tab.');
|
logger.error('Cannot contact with active tab.');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -76,29 +98,27 @@ class Extractor {
|
|||||||
}, Promise.resolve(undefined)).then(
|
}, Promise.resolve(undefined)).then(
|
||||||
() => {
|
() => {
|
||||||
this._running = false;
|
this._running = false;
|
||||||
this.save();
|
this.export();
|
||||||
}
|
}
|
||||||
).catch(err => {
|
).catch(err => {
|
||||||
this._running = false;
|
this._running = false;
|
||||||
console.log(err);
|
logger.error(err);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Save result of a task
|
* export result of a task to CSV
|
||||||
* @param {number} taskid which task id to save, begins with 0
|
* @param {number} taskid which task id to save, begins with 0
|
||||||
*/
|
*/
|
||||||
save(taskid) {
|
export(taskid) {
|
||||||
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
||||||
if (id < 0) return;
|
if (id < 0) return;
|
||||||
let results = this._tasks[id].results
|
let results = this._tasks[id].results
|
||||||
results.unshift(this._tasks[id].fieldSelectors);
|
|
||||||
|
|
||||||
let exResults = new ExtractResult(results);
|
|
||||||
|
|
||||||
if (!results.length) {
|
if (!results.length) {
|
||||||
console.log(`No result for task #${id}. Forget to call ".start()"?`);
|
logger.info(`No result for task #${id}. Forget to call ".start()"?`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
results.unshift(this._tasks[id].fieldSelectors);
|
||||||
|
let exResults = new ExtractResult(results);
|
||||||
let msg = `
|
let msg = `
|
||||||
Please confirm to download (${results.length - 1} items):
|
Please confirm to download (${results.length - 1} items):
|
||||||
|
|
||||||
@ -110,12 +130,12 @@ ${exResults.toString(50) || "- Empty -"}
|
|||||||
}
|
}
|
||||||
_checkTaskId(id, defaultId) {
|
_checkTaskId(id, defaultId) {
|
||||||
if (!this._tasks.length) {
|
if (!this._tasks.length) {
|
||||||
console.log("No task found.");
|
logger.info("No task found.");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (!isNaN(defaultId) && id === undefined) id = defaultId;
|
if (!isNaN(defaultId) && id === undefined) id = defaultId;
|
||||||
if (isNaN(id) || id < 0 || id >= this._tasks.length) {
|
if (isNaN(id) || id < 0 || id >= this._tasks.length) {
|
||||||
console.log(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
logger.info(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return id
|
return id
|
||||||
|
|||||||
81
scripts/background/logger.js
Normal file
81
scripts/background/logger.js
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
const LOGGER_LEVEL = {
|
||||||
|
DEBUG: 1,
|
||||||
|
INFO: 2,
|
||||||
|
WARNING: 3,
|
||||||
|
ERROR: 4,
|
||||||
|
DISABLED: 100,
|
||||||
|
properties: {
|
||||||
|
1: { name: "debug", value: 1, prefix: "DEBUG" },
|
||||||
|
2: { name: "info", value: 2, prefix: "INFO" },
|
||||||
|
3: { name: "warning", value: 3, prefix: "WARN" },
|
||||||
|
4: { name: "error", value: 3, prefix: "ERROR" }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Logger {
|
||||||
|
_notificationId = undefined;
|
||||||
|
_log_level = LOGGER_LEVEL.INFO;
|
||||||
|
_notify_level = LOGGER_LEVEL.ERROR;
|
||||||
|
constructor(logLevel, notifyLevel) {
|
||||||
|
if (logLevel) this._log_level = logLevel;
|
||||||
|
if (notifyLevel) this._notify_level = notifyLevel;
|
||||||
|
chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
||||||
|
}
|
||||||
|
get logLevel() {
|
||||||
|
return this._log_level;
|
||||||
|
}
|
||||||
|
set logLevel(val) {
|
||||||
|
this._log_level = val;
|
||||||
|
}
|
||||||
|
get notifyLevel() {
|
||||||
|
return this._notify_level;
|
||||||
|
}
|
||||||
|
set notifyLevel(val) {
|
||||||
|
this._notify_level = val;
|
||||||
|
}
|
||||||
|
log(level, loggerFn, ...msgs) {
|
||||||
|
if (level < this._log_level) return;
|
||||||
|
let time = new Date().toLocaleString();
|
||||||
|
loggerFn(`${time} [${LOGGER_LEVEL.properties[level].prefix}]`, ...msgs);
|
||||||
|
if (level < this._notify_level) return;
|
||||||
|
this.notify(...msgs);
|
||||||
|
}
|
||||||
|
debug(...msgs) {
|
||||||
|
this.log(LOGGER_LEVEL.DEBUG, console.debug, ...msgs);
|
||||||
|
}
|
||||||
|
info(...msgs) {
|
||||||
|
this.log(LOGGER_LEVEL.INFO, console.info, ...msgs);
|
||||||
|
}
|
||||||
|
warn(...msgs) {
|
||||||
|
this.log(LOGGER_LEVEL.WARNING, console.info, ...msgs);
|
||||||
|
}
|
||||||
|
error(...msgs) {
|
||||||
|
this.log(LOGGER_LEVEL.ERROR, console.info, ...msgs);
|
||||||
|
}
|
||||||
|
notify(...msgs) {
|
||||||
|
let msg = msgs.join(' ');
|
||||||
|
if (!this._notificationId) {
|
||||||
|
chrome.notifications.create(
|
||||||
|
null,
|
||||||
|
{
|
||||||
|
"type": "basic",
|
||||||
|
"iconUrl": chrome.extension.getURL('icon.png'),
|
||||||
|
"title": "Data Extractor",
|
||||||
|
"message": msg,
|
||||||
|
"priority": 0,
|
||||||
|
"requireInteraction": true,
|
||||||
|
},
|
||||||
|
notificationId => {
|
||||||
|
this._notificationId = notificationId;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
chrome.notifications.update(
|
||||||
|
this._notificationId,
|
||||||
|
{ "message": msg }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);
|
||||||
@ -1,24 +1,27 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Repeatedly sending a message to target tab until the response is detected good.
|
* Sending a message to target tab repeatedly until the response is not undefined.
|
||||||
* @param {object} tab the table where to send the message
|
* @param {object} tab the table where to send the message
|
||||||
* @param {object} req the request data.
|
* @param {object} req the request data.
|
||||||
* @param {function} cond success condition function, r:any=>boolean
|
* @param {function} dataChecker (reulst:any, err:error, tryCount:number) => any.
|
||||||
|
* Check and decide what value finally returns.
|
||||||
|
* Return undefined to make 'sendMessage' retry.
|
||||||
|
* Return MSG_USER_ABORT to cancel this promise.
|
||||||
* @param {number} interval retry interval, default: 500ms.
|
* @param {number} interval retry interval, default: 500ms.
|
||||||
* @param {number} limit retry limit, default: 0, no limit.
|
* @param {number} limit retry limit, default: 0, no limit.
|
||||||
* @param {string} log messages logged to console.
|
* @param {string} log messages logged to console.
|
||||||
* @return {Promise} a promise of the response.
|
* @return {Promise} a promise of the response.
|
||||||
*/
|
*/
|
||||||
function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
function sendMessage(tab, req, log, dataChecker, interval, limit = 0) {
|
||||||
interval = interval || 500;
|
interval = interval || 500;
|
||||||
limit = limit && !isNaN(limit) ? limit : 0;
|
limit = limit && !isNaN(limit) ? limit : 0;
|
||||||
count = 0;
|
let count = 0;
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
|
|
||||||
loop();
|
loop();
|
||||||
|
|
||||||
async function loop() {
|
async function loop() {
|
||||||
// console.log("request for", req.action);
|
logger.debug("Request for", req.action);
|
||||||
let tabAvailable = await getTabByID(tab.id);
|
let tabAvailable = await getTabByID(tab.id);
|
||||||
if (!tabAvailable) {
|
if (!tabAvailable) {
|
||||||
reject("Task interrupted due to the target tab is closed.");
|
reject("Task interrupted due to the target tab is closed.");
|
||||||
@ -30,16 +33,22 @@ function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
chrome.tabs.sendMessage(tab.id, req, async r => {
|
||||||
// check error but do nothing.
|
// check error but do nothing.
|
||||||
// do not interrupt promise chains even if error, or the task always fail when:
|
// do not interrupt promise chains even if error, or the task always fail when:
|
||||||
// a tab is newly created, and the content scripts won't have time to initialize
|
// a tab is newly created, and the content scripts won't have time to initialize
|
||||||
chrome.runtime.lastError;
|
let err = chrome.runtime.lastError;
|
||||||
|
let result = r;
|
||||||
let flag = !cond || cond(r);
|
if (dataChecker) {
|
||||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
result = await dataChecker(r, err, count);
|
||||||
|
if (MSG_USER_ABORT.isEqual(result)) {
|
||||||
|
reject(MSG_USER_ABORT.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let flag = result !== undefined && result !== null;
|
||||||
|
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
|
||||||
if (flag) {
|
if (flag) {
|
||||||
resolve(r);
|
resolve(result);
|
||||||
} else {
|
} else {
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
loop();
|
loop();
|
||||||
@ -50,10 +59,18 @@ function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
chrome.runtime.onMessage.addListener(function (request, sender, sendResponse) {
|
||||||
if (!message.action || !message.action.startsWith(EXT_NAME)) {
|
if (!request.action || !request.action.startsWith(EXT_NAME)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
sendResponse("Calling from user pages is not allowed.");
|
switch (request.action) {
|
||||||
return;
|
case ACTION_UPLOAD_STATE:
|
||||||
|
sendResponse('recieved!');
|
||||||
|
__EXTRACTOR_STATE__ = request.state;
|
||||||
|
logger.info(`State (${request.name}) recieved. To load it: some_var = new Extractor().load()`);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
sendResponse("Request not supported.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
@ -21,6 +21,9 @@ class ExtractResult {
|
|||||||
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
||||||
return data.slice().reduce(
|
return data.slice().reduce(
|
||||||
(csv, lineCells) => {
|
(csv, lineCells) => {
|
||||||
|
if (!lineCells || !lineCells.length) {
|
||||||
|
return csv + "\n";
|
||||||
|
}
|
||||||
let line = lineCells.reduce(
|
let line = lineCells.reduce(
|
||||||
(lineText, cell, idx) => {
|
(lineText, cell, idx) => {
|
||||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||||
|
|||||||
@ -17,6 +17,15 @@ class Task {
|
|||||||
this._fieldSelectors = args.shift();
|
this._fieldSelectors = args.shift();
|
||||||
this._urls = parseUrls(...args);
|
this._urls = parseUrls(...args);
|
||||||
}
|
}
|
||||||
|
load(state) {
|
||||||
|
this._itemsSelector = state._itemsSelector;
|
||||||
|
this._data = state._data;
|
||||||
|
this._data_keys = state._data_keys;
|
||||||
|
this._itemsSelector = state._itemsSelector;
|
||||||
|
this._fieldSelectors = state._fieldSelectors;
|
||||||
|
this._urls = state._urls;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
get urls() {
|
get urls() {
|
||||||
return this._urls;
|
return this._urls;
|
||||||
}
|
}
|
||||||
@ -33,6 +42,7 @@ class Task {
|
|||||||
}
|
}
|
||||||
clean() {
|
clean() {
|
||||||
this._data = {};
|
this._data = {};
|
||||||
|
this._data_keys = [];
|
||||||
}
|
}
|
||||||
async execute(tab, upstreamData) {
|
async execute(tab, upstreamData) {
|
||||||
if (!tab) return Promise.reject("No tab to execute the task.");
|
if (!tab) return Promise.reject("No tab to execute the task.");
|
||||||
|
|||||||
@ -1,7 +1,12 @@
|
|||||||
(function () {
|
(function () {
|
||||||
|
let asleep = false;
|
||||||
chrome.runtime.onMessage.addListener(
|
chrome.runtime.onMessage.addListener(
|
||||||
function (request, sender, sendResponse) {
|
function (request, sender, sendResponse) {
|
||||||
if (!request.action) return;
|
if (!request.action) return;
|
||||||
|
if (asleep && ACTION_WAKEUP != request.action) {
|
||||||
|
sendResponse && sendResponse(undefined);
|
||||||
|
return;
|
||||||
|
}
|
||||||
// console.log("Recieved request:",request);
|
// console.log("Recieved request:",request);
|
||||||
doAction(request, sender).then(r => sendResponse && sendResponse(r));
|
doAction(request, sender).then(r => sendResponse && sendResponse(r));
|
||||||
// return true to indicate you wish to send a response asynchronously
|
// return true to indicate you wish to send a response asynchronously
|
||||||
@ -16,6 +21,8 @@
|
|||||||
return data;
|
return data;
|
||||||
case ACTION_GOTO_URL:
|
case ACTION_GOTO_URL:
|
||||||
window.location.replace(request.url);
|
window.location.replace(request.url);
|
||||||
|
// should not recieve any request until the page & script reload
|
||||||
|
asleep = true;
|
||||||
return request.url;
|
return request.url;
|
||||||
case ACTION_REPORT_IN:
|
case ACTION_REPORT_IN:
|
||||||
return request.action;
|
return request.action;
|
||||||
@ -29,6 +36,12 @@
|
|||||||
1000,
|
1000,
|
||||||
10
|
10
|
||||||
)
|
)
|
||||||
|
case ACTION_SLEEP:
|
||||||
|
asleep = true;
|
||||||
|
return "Content script is sleeping.";
|
||||||
|
case ACTION_WAKEUP:
|
||||||
|
asleep = false;
|
||||||
|
return "Content script is available.";
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,10 @@
|
|||||||
const EXT_NAME = "DataExtracter";
|
const EXT_NAME = "DataExtracter";
|
||||||
|
|
||||||
const URL_REG = getWebUrl();
|
|
||||||
|
|
||||||
const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
||||||
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
||||||
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
||||||
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
||||||
const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
||||||
|
const ACTION_UPLOAD_STATE = `${EXT_NAME}:UploadStateFile`;
|
||||||
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
const ACTION_SLEEP = `${EXT_NAME}:Sleep`;
|
||||||
const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL");
|
const ACTION_WAKEUP = `${EXT_NAME}:WakeUp`;
|
||||||
|
|||||||
@ -9,6 +9,11 @@ class ConstMessage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const URL_REG = getWebUrl();
|
||||||
|
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
||||||
|
const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL");
|
||||||
|
const MSG_USER_ABORT = new ConstMessage(100, "Tasks stopped by user.");
|
||||||
|
|
||||||
function saveFile(data, mimeType, fileName) {
|
function saveFile(data, mimeType, fileName) {
|
||||||
fileName = fileName || document.title || "result";
|
fileName = fileName || document.title || "result";
|
||||||
var blob;
|
var blob;
|
||||||
|
|||||||
Reference in New Issue
Block a user