Compare commits
6 Commits
f1cf32b83a
...
3d375261df
| Author | SHA1 | Date | |
|---|---|---|---|
| 3d375261df | |||
| 13e233fbe7 | |||
| 21d3dfb247 | |||
| 97c8aac58d | |||
| 09112bb506 | |||
| c7f4fe7cc4 |
@ -1,7 +1,7 @@
|
||||
{
|
||||
"manifest_version": 2,
|
||||
"name": "Data Extracter",
|
||||
"version": "0.1.0",
|
||||
"version": "0.5.0",
|
||||
"author": "jebbs",
|
||||
"description": "Extract data from web page elements as sheet.",
|
||||
"icons": {
|
||||
@ -18,6 +18,7 @@
|
||||
"scripts": [
|
||||
"scripts/shared/tools.js",
|
||||
"scripts/shared/common.js",
|
||||
"scripts/background/logger.js",
|
||||
"scripts/background/messaging.js",
|
||||
"scripts/background/result.js",
|
||||
"scripts/background/signiture.js",
|
||||
@ -38,6 +39,7 @@
|
||||
"run_at": "document_idle"
|
||||
}],
|
||||
"permissions": [
|
||||
"activeTab"
|
||||
"activeTab",
|
||||
"notifications"
|
||||
]
|
||||
}
|
||||
@ -3,6 +3,7 @@
|
||||
<link>
|
||||
<meta charset="utf-8">
|
||||
<title>Data Extractor</title>
|
||||
<script charset="UTF-8" type="text/javascript" src="../scripts/shared/common.js"></script>
|
||||
<script charset="UTF-8" type="text/javascript" src="tip.js"></script>
|
||||
|
||||
<link rel="stylesheet" href="styles/bootstrap.min.css">
|
||||
@ -32,7 +33,6 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
|
||||
<div class="col">
|
||||
<h6>Quick Start</h6>
|
||||
</div>
|
||||
@ -42,22 +42,33 @@
|
||||
<div class="alert alert-success small">
|
||||
<p>
|
||||
<b>Extract current page</b>:
|
||||
<br>new Extractor().task(".list-item", ["a.title", "p.content"]).start();
|
||||
<br>> $(".list-item", ["a.title", "p.content"]);
|
||||
</p>
|
||||
<p>
|
||||
<b>Extract multiple pages (1-10, interval 1)</b>:
|
||||
<br>new Extractor().task(".list-item", ["a.title", "p.content"],
|
||||
"http://sample.com/?pn=${page}", 1, 10, 1).start();
|
||||
<br>> job=new Extractor().task(".list-item", ["a.title", "p.content"],
|
||||
"http://sample.com/?pn=${page}", 1, 10, 1);
|
||||
<br>> job.start();
|
||||
|
||||
</p>
|
||||
<p>
|
||||
<b>Full document:</b>
|
||||
<b>Full document at:</b>
|
||||
<br>
|
||||
<a href="#" id="link-document">https://git.jebbs.co/jebbs/data-extracter-extesion</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<h6>Saved State</h6>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<input type="file" name="state" id="state-input">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
|
||||
18
popup/tip.js
18
popup/tip.js
@ -11,4 +11,22 @@ window.onload = function () {
|
||||
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
||||
});
|
||||
})
|
||||
document.querySelector('#state-input')
|
||||
.addEventListener('change', function (...args) {
|
||||
if (this.files.length == 1) {
|
||||
var reader = new FileReader();
|
||||
let fileName = this.files[0].name;
|
||||
reader.readAsText(this.files[0], "UTF-8");
|
||||
reader.onload = function (evt) {
|
||||
var fileString = evt.target.result;
|
||||
chrome.runtime.sendMessage({
|
||||
action: ACTION_UPLOAD_STATE,
|
||||
state: fileString,
|
||||
name: fileName
|
||||
}, r => {
|
||||
if (r) console.log('State sent:', r);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
70
readme.md
70
readme.md
@ -78,8 +78,54 @@ e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.start();
|
||||
```
|
||||
|
||||
### Extractor Options
|
||||
|
||||
Specify extra options, to make task do some actions before scrape the data.
|
||||
|
||||
```js
|
||||
var job = new Extractor({ "scrollToBottom": 1 });
|
||||
```
|
||||
|
||||
Available options:
|
||||
|
||||
- `scrollToBottom`: Try scroll pages to the bottom, some elements are loaded only we user need them.
|
||||
|
||||
|
||||
### Export Result of Any Task
|
||||
|
||||
To a multiple task Extractor `e`:
|
||||
|
||||
```js
|
||||
e = new Extractor()
|
||||
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.task('list-item', ["a.title", "p.content"])
|
||||
.start();
|
||||
```
|
||||
|
||||
User will be asked to export the final result when it finishes.
|
||||
|
||||
Incase you want to export it again, use:
|
||||
|
||||
```js
|
||||
e.export()
|
||||
```
|
||||
|
||||
To export another task result, other than the final one:
|
||||
|
||||
```js
|
||||
// export the result of first task
|
||||
// to the example above, that is a list of urls
|
||||
e.export(0)
|
||||
// export the result of second task
|
||||
e.export(1)
|
||||
```
|
||||
|
||||
## Task Management
|
||||
|
||||
### Continue Tasks
|
||||
|
||||
Sometimes, it's hard to finish them in an single execution, that why we need "Continuing of Tasks".
|
||||
|
||||
You can always continue tasks (with following), even it stops in the middle of a task:
|
||||
|
||||
```js
|
||||
@ -99,9 +145,11 @@ e.restart(0)
|
||||
e.restart(1)
|
||||
```
|
||||
|
||||
### Save Result of Any Task
|
||||
### Save & Load State
|
||||
|
||||
To a multiple task Extractor `e`:
|
||||
It may also be hard to finish tasks in even a single day, we need a way to save current state, and come back tommorow.
|
||||
|
||||
Create and run an extractor:
|
||||
|
||||
```js
|
||||
e = new Extractor()
|
||||
@ -110,20 +158,16 @@ e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.start();
|
||||
```
|
||||
|
||||
User will be asked to save the final result when it finishes.
|
||||
|
||||
Incase you want to save it again, use:
|
||||
Save the state:
|
||||
|
||||
```js
|
||||
e.save()
|
||||
e.save();
|
||||
```
|
||||
|
||||
To save another task result, other than the final one:
|
||||
Load the state:
|
||||
|
||||
Open the popup window, upload the saved state file. Then, and in the backgoud console:
|
||||
|
||||
```js
|
||||
// save the result of first task
|
||||
// to the example above, that is a list of urls
|
||||
e.save(0)
|
||||
// save the result of second task
|
||||
e.save(1)
|
||||
```
|
||||
e = new Extractor().load();
|
||||
```
|
||||
@ -1,54 +1,3 @@
|
||||
/**
|
||||
* Extract data from current page / multiple urls.
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[])
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
* getData(tab, itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||
* getData(itemsSelector:string, fieldSelectors:string[])
|
||||
* getData(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
* getData(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
||||
* getData(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
* getData(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||
* @param {...any} args
|
||||
*/
|
||||
async function getData(...args) {
|
||||
let tab;
|
||||
if (typeof args[0] !== 'string') tab = args.shift();
|
||||
if (!testArgs(...args))
|
||||
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
||||
itemsSelector = args.shift();
|
||||
fieldSelectors = args.shift();
|
||||
let urls = parseUrls(...args);
|
||||
let data = [];
|
||||
if (!tab) tab = await getActiveTab(true) || await getActiveTab(false);
|
||||
if (!tab) throw new Error("Cannot find active tab.");
|
||||
return new Promise((resolve, reject) => {
|
||||
let pms;
|
||||
if (urls.length) {
|
||||
pms = urls.reduce((p, url) => p.then(
|
||||
results => {
|
||||
if (results) data.push(...results);
|
||||
return redirectTab(tab, url).then(
|
||||
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
||||
);
|
||||
},
|
||||
() => p
|
||||
), Promise.resolve([]));
|
||||
} else {
|
||||
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
||||
}
|
||||
pms.then(
|
||||
results => {
|
||||
if (results) data.push(...results);
|
||||
data.unshift(fieldSelectors);
|
||||
resolve(new ExtractResult(data));
|
||||
},
|
||||
err => reject(err)
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
function parseUrls(...args) {
|
||||
if (!args.length) return [];
|
||||
let arg = args.shift();
|
||||
@ -77,19 +26,26 @@ function parseUrls(...args) {
|
||||
}
|
||||
|
||||
function redirectTab(tab, url) {
|
||||
let curUrl = "";
|
||||
return queryUrl(tab, undefined, 'Query current url...')
|
||||
.then(u => {
|
||||
if (url !== u) {
|
||||
curUrl = u;
|
||||
let req = {
|
||||
action: ACTION_GOTO_URL,
|
||||
url: url
|
||||
}
|
||||
return sendMessage(tab, req, `Goto url: ${url}`);
|
||||
return queryUrl(tab).then(u => {
|
||||
if (url !== u) {
|
||||
let req = {
|
||||
action: ACTION_GOTO_URL,
|
||||
url: url
|
||||
}
|
||||
})
|
||||
.then(() => queryUrl(tab, url, 'Check if tab url matches expected...'))
|
||||
let checker = async (url, err, tryCount) => {
|
||||
let newURL = await queryUrl(tab).catch(() => { });
|
||||
if (newURL == url) return url;
|
||||
if (
|
||||
tryCount % 5 == 0 &&
|
||||
!confirm('Cannot navigate to target url. \nPress OK to continue, Cancel to stop.')
|
||||
) {
|
||||
return MSG_USER_ABORT;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
return sendMessage(tab, req, `Goto url: ${url}`, checker);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@ -105,8 +61,19 @@ function extractTabData(tab, itemsSelector, fieldSelectors) {
|
||||
itemsSelector: itemsSelector,
|
||||
fieldSelectors: fieldSelectors
|
||||
}
|
||||
let cond = r => !MSG_ELEMENT_NOT_FOUND.isEqual(r);
|
||||
return sendMessage(tab, req, 'Extract data from the tab...', cond);
|
||||
let checker = (result, err, tryCount) => {
|
||||
if (MSG_ELEMENT_NOT_FOUND.isEqual(result)) {
|
||||
if (tryCount % 20 == 0) {
|
||||
if (confirm('No data found in current page. \n\nContinue to next page?')) {
|
||||
return [];
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
return sendMessage(tab, req, 'Extract data from the tab...', checker);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -118,23 +85,21 @@ async function ping(tab, count = 1) {
|
||||
let req = {
|
||||
action: ACTION_REPORT_IN
|
||||
}
|
||||
let cond = r => r == req.action;
|
||||
let pong = await sendMessage(tab, req, 'Check tab availability...', cond, 1000, count).catch(() => { });
|
||||
let checker = r => r == req.action ? req.action : undefined;
|
||||
let pong = await sendMessage(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { });
|
||||
return pong == ACTION_REPORT_IN;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the url of the target tab
|
||||
* @param {any} tab target tab
|
||||
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
||||
* @returns {Promise<string>} a promise of the url
|
||||
*/
|
||||
function queryUrl(tab, expected, log) {
|
||||
function queryUrl(tab) {
|
||||
let req = {
|
||||
action: ACTION_QUERY_URL
|
||||
}
|
||||
let cond = url => url && (!expected || (expected && expected == url));
|
||||
return sendMessage(tab, req, log, cond);
|
||||
return sendMessage(tab, req);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -1,9 +1,31 @@
|
||||
var __EXTRACTOR_STATE__ = "";
|
||||
|
||||
class Extractor {
|
||||
constructor(options) {
|
||||
this._tasks = [];
|
||||
this._running = false;
|
||||
this._options = options;
|
||||
}
|
||||
/**
|
||||
* Save current state, in case we restore it later.
|
||||
*/
|
||||
save() {
|
||||
saveFile(JSON.stringify(this), 'application/json', 'state.json');
|
||||
}
|
||||
/**
|
||||
* Restore previous state by loading from saved state.
|
||||
*/
|
||||
load() {
|
||||
if (!__EXTRACTOR_STATE__) {
|
||||
logger.info('No state found. Please upload a saved state from the popup window first.');
|
||||
return;
|
||||
}
|
||||
let state = JSON.parse(__EXTRACTOR_STATE__);
|
||||
__EXTRACTOR_STATE__ = "";
|
||||
this._options = state._options;
|
||||
this._tasks = state._tasks.map(t => new Task(this._options, 'whaterver', ['whaterver']).load(t));
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Add a task to Extractor. \n
|
||||
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||
@ -41,11 +63,11 @@ class Extractor {
|
||||
}
|
||||
async _startTasks(from) {
|
||||
if (this._running) {
|
||||
console.log('The Extractor is running. Please wait..');
|
||||
logger.info('The Extractor is running. Please wait..');
|
||||
return;
|
||||
}
|
||||
if (!this._tasks.length) {
|
||||
console.log('No task to run.');
|
||||
logger.info('No task to run.');
|
||||
return;
|
||||
}
|
||||
|
||||
@ -58,7 +80,7 @@ class Extractor {
|
||||
tab = await getActiveTab(true) || await getActiveTab(false);
|
||||
let succ = await ping(tab);
|
||||
if (!succ) {
|
||||
console.log('Cannot contact with active tab.');
|
||||
logger.error('Cannot contact with active tab.');
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -76,29 +98,27 @@ class Extractor {
|
||||
}, Promise.resolve(undefined)).then(
|
||||
() => {
|
||||
this._running = false;
|
||||
this.save();
|
||||
this.export();
|
||||
}
|
||||
).catch(err => {
|
||||
this._running = false;
|
||||
console.log(err);
|
||||
logger.error(err);
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Save result of a task
|
||||
* export result of a task to CSV
|
||||
* @param {number} taskid which task id to save, begins with 0
|
||||
*/
|
||||
save(taskid) {
|
||||
export(taskid) {
|
||||
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
||||
if (id < 0) return;
|
||||
let results = this._tasks[id].results
|
||||
results.unshift(this._tasks[id].fieldSelectors);
|
||||
|
||||
let exResults = new ExtractResult(results);
|
||||
|
||||
if (!results.length) {
|
||||
console.log(`No result for task #${id}. Forget to call ".start()"?`);
|
||||
logger.info(`No result for task #${id}. Forget to call ".start()"?`);
|
||||
return;
|
||||
}
|
||||
results.unshift(this._tasks[id].fieldSelectors);
|
||||
let exResults = new ExtractResult(results);
|
||||
let msg = `
|
||||
Please confirm to download (${results.length - 1} items):
|
||||
|
||||
@ -110,12 +130,12 @@ ${exResults.toString(50) || "- Empty -"}
|
||||
}
|
||||
_checkTaskId(id, defaultId) {
|
||||
if (!this._tasks.length) {
|
||||
console.log("No task found.");
|
||||
logger.info("No task found.");
|
||||
return -1;
|
||||
}
|
||||
if (!isNaN(defaultId) && id === undefined) id = defaultId;
|
||||
if (isNaN(id) || id < 0 || id >= this._tasks.length) {
|
||||
console.log(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
||||
logger.info(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
||||
return -1;
|
||||
}
|
||||
return id
|
||||
|
||||
81
scripts/background/logger.js
Normal file
81
scripts/background/logger.js
Normal file
@ -0,0 +1,81 @@
|
||||
const LOGGER_LEVEL = {
|
||||
DEBUG: 1,
|
||||
INFO: 2,
|
||||
WARNING: 3,
|
||||
ERROR: 4,
|
||||
DISABLED: 100,
|
||||
properties: {
|
||||
1: { name: "debug", value: 1, prefix: "DEBUG" },
|
||||
2: { name: "info", value: 2, prefix: "INFO" },
|
||||
3: { name: "warning", value: 3, prefix: "WARN" },
|
||||
4: { name: "error", value: 3, prefix: "ERROR" }
|
||||
}
|
||||
};
|
||||
|
||||
class Logger {
|
||||
_notificationId = undefined;
|
||||
_log_level = LOGGER_LEVEL.INFO;
|
||||
_notify_level = LOGGER_LEVEL.ERROR;
|
||||
constructor(logLevel, notifyLevel) {
|
||||
if (logLevel) this._log_level = logLevel;
|
||||
if (notifyLevel) this._notify_level = notifyLevel;
|
||||
chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
||||
}
|
||||
get logLevel() {
|
||||
return this._log_level;
|
||||
}
|
||||
set logLevel(val) {
|
||||
this._log_level = val;
|
||||
}
|
||||
get notifyLevel() {
|
||||
return this._notify_level;
|
||||
}
|
||||
set notifyLevel(val) {
|
||||
this._notify_level = val;
|
||||
}
|
||||
log(level, loggerFn, ...msgs) {
|
||||
if (level < this._log_level) return;
|
||||
let time = new Date().toLocaleString();
|
||||
loggerFn(`${time} [${LOGGER_LEVEL.properties[level].prefix}]`, ...msgs);
|
||||
if (level < this._notify_level) return;
|
||||
this.notify(...msgs);
|
||||
}
|
||||
debug(...msgs) {
|
||||
this.log(LOGGER_LEVEL.DEBUG, console.debug, ...msgs);
|
||||
}
|
||||
info(...msgs) {
|
||||
this.log(LOGGER_LEVEL.INFO, console.info, ...msgs);
|
||||
}
|
||||
warn(...msgs) {
|
||||
this.log(LOGGER_LEVEL.WARNING, console.info, ...msgs);
|
||||
}
|
||||
error(...msgs) {
|
||||
this.log(LOGGER_LEVEL.ERROR, console.info, ...msgs);
|
||||
}
|
||||
notify(...msgs) {
|
||||
let msg = msgs.join(' ');
|
||||
if (!this._notificationId) {
|
||||
chrome.notifications.create(
|
||||
null,
|
||||
{
|
||||
"type": "basic",
|
||||
"iconUrl": chrome.extension.getURL('icon.png'),
|
||||
"title": "Data Extractor",
|
||||
"message": msg,
|
||||
"priority": 0,
|
||||
"requireInteraction": true,
|
||||
},
|
||||
notificationId => {
|
||||
this._notificationId = notificationId;
|
||||
}
|
||||
);
|
||||
return;
|
||||
}
|
||||
chrome.notifications.update(
|
||||
this._notificationId,
|
||||
{ "message": msg }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);
|
||||
@ -1,24 +1,27 @@
|
||||
|
||||
/**
|
||||
* Repeatedly sending a message to target tab until the response is detected good.
|
||||
* Sending a message to target tab repeatedly until the response is not undefined.
|
||||
* @param {object} tab the table where to send the message
|
||||
* @param {object} req the request data.
|
||||
* @param {function} cond success condition function, r:any=>boolean
|
||||
* @param {function} dataChecker (reulst:any, err:error, tryCount:number) => any.
|
||||
* Check and decide what value finally returns.
|
||||
* Return undefined to make 'sendMessage' retry.
|
||||
* Return MSG_USER_ABORT to cancel this promise.
|
||||
* @param {number} interval retry interval, default: 500ms.
|
||||
* @param {number} limit retry limit, default: 0, no limit.
|
||||
* @param {string} log messages logged to console.
|
||||
* @return {Promise} a promise of the response.
|
||||
*/
|
||||
function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
||||
function sendMessage(tab, req, log, dataChecker, interval, limit = 0) {
|
||||
interval = interval || 500;
|
||||
limit = limit && !isNaN(limit) ? limit : 0;
|
||||
count = 0;
|
||||
let count = 0;
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
loop();
|
||||
|
||||
async function loop() {
|
||||
// console.log("request for", req.action);
|
||||
logger.debug("Request for", req.action);
|
||||
let tabAvailable = await getTabByID(tab.id);
|
||||
if (!tabAvailable) {
|
||||
reject("Task interrupted due to the target tab is closed.");
|
||||
@ -30,16 +33,22 @@ function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
||||
return;
|
||||
}
|
||||
count++;
|
||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||
chrome.tabs.sendMessage(tab.id, req, async r => {
|
||||
// check error but do nothing.
|
||||
// do not interrupt promise chains even if error, or the task always fail when:
|
||||
// a tab is newly created, and the content scripts won't have time to initialize
|
||||
chrome.runtime.lastError;
|
||||
|
||||
let flag = !cond || cond(r);
|
||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||
let err = chrome.runtime.lastError;
|
||||
let result = r;
|
||||
if (dataChecker) {
|
||||
result = await dataChecker(r, err, count);
|
||||
if (MSG_USER_ABORT.isEqual(result)) {
|
||||
reject(MSG_USER_ABORT.message);
|
||||
}
|
||||
}
|
||||
let flag = result !== undefined && result !== null;
|
||||
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
|
||||
if (flag) {
|
||||
resolve(r);
|
||||
resolve(result);
|
||||
} else {
|
||||
setTimeout(() => {
|
||||
loop();
|
||||
@ -50,10 +59,18 @@ function sendMessage(tab, req, log, cond, interval, limit = 0) {
|
||||
});
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
||||
if (!message.action || !message.action.startsWith(EXT_NAME)) {
|
||||
chrome.runtime.onMessage.addListener(function (request, sender, sendResponse) {
|
||||
if (!request.action || !request.action.startsWith(EXT_NAME)) {
|
||||
return;
|
||||
}
|
||||
sendResponse("Calling from user pages is not allowed.");
|
||||
return;
|
||||
switch (request.action) {
|
||||
case ACTION_UPLOAD_STATE:
|
||||
sendResponse('recieved!');
|
||||
__EXTRACTOR_STATE__ = request.state;
|
||||
logger.info(`State (${request.name}) recieved. To load it: some_var = new Extractor().load()`);
|
||||
break;
|
||||
default:
|
||||
sendResponse("Request not supported.");
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
@ -21,6 +21,9 @@ class ExtractResult {
|
||||
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
||||
return data.slice().reduce(
|
||||
(csv, lineCells) => {
|
||||
if (!lineCells || !lineCells.length) {
|
||||
return csv + "\n";
|
||||
}
|
||||
let line = lineCells.reduce(
|
||||
(lineText, cell, idx) => {
|
||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||
|
||||
@ -17,6 +17,15 @@ class Task {
|
||||
this._fieldSelectors = args.shift();
|
||||
this._urls = parseUrls(...args);
|
||||
}
|
||||
load(state) {
|
||||
this._itemsSelector = state._itemsSelector;
|
||||
this._data = state._data;
|
||||
this._data_keys = state._data_keys;
|
||||
this._itemsSelector = state._itemsSelector;
|
||||
this._fieldSelectors = state._fieldSelectors;
|
||||
this._urls = state._urls;
|
||||
return this;
|
||||
}
|
||||
get urls() {
|
||||
return this._urls;
|
||||
}
|
||||
@ -33,6 +42,7 @@ class Task {
|
||||
}
|
||||
clean() {
|
||||
this._data = {};
|
||||
this._data_keys = [];
|
||||
}
|
||||
async execute(tab, upstreamData) {
|
||||
if (!tab) return Promise.reject("No tab to execute the task.");
|
||||
|
||||
@ -1,7 +1,12 @@
|
||||
(function () {
|
||||
let asleep = false;
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function (request, sender, sendResponse) {
|
||||
if (!request.action) return;
|
||||
if (asleep && ACTION_WAKEUP != request.action) {
|
||||
sendResponse && sendResponse(undefined);
|
||||
return;
|
||||
}
|
||||
// console.log("Recieved request:",request);
|
||||
doAction(request, sender).then(r => sendResponse && sendResponse(r));
|
||||
// return true to indicate you wish to send a response asynchronously
|
||||
@ -16,6 +21,8 @@
|
||||
return data;
|
||||
case ACTION_GOTO_URL:
|
||||
window.location.replace(request.url);
|
||||
// should not recieve any request until the page & script reload
|
||||
asleep = true;
|
||||
return request.url;
|
||||
case ACTION_REPORT_IN:
|
||||
return request.action;
|
||||
@ -29,6 +36,12 @@
|
||||
1000,
|
||||
10
|
||||
)
|
||||
case ACTION_SLEEP:
|
||||
asleep = true;
|
||||
return "Content script is sleeping.";
|
||||
case ACTION_WAKEUP:
|
||||
asleep = false;
|
||||
return "Content script is available.";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1,12 +1,10 @@
|
||||
const EXT_NAME = "DataExtracter";
|
||||
|
||||
const URL_REG = getWebUrl();
|
||||
|
||||
const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
||||
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
||||
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
||||
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
||||
const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
||||
|
||||
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
||||
const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL");
|
||||
const ACTION_UPLOAD_STATE = `${EXT_NAME}:UploadStateFile`;
|
||||
const ACTION_SLEEP = `${EXT_NAME}:Sleep`;
|
||||
const ACTION_WAKEUP = `${EXT_NAME}:WakeUp`;
|
||||
|
||||
@ -9,6 +9,11 @@ class ConstMessage {
|
||||
}
|
||||
}
|
||||
|
||||
const URL_REG = getWebUrl();
|
||||
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
||||
const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL");
|
||||
const MSG_USER_ABORT = new ConstMessage(100, "Tasks stopped by user.");
|
||||
|
||||
function saveFile(data, mimeType, fileName) {
|
||||
fileName = fileName || document.title || "result";
|
||||
var blob;
|
||||
|
||||
Reference in New Issue
Block a user