Compare commits
3 Commits
51da68fee5
...
c504942144
| Author | SHA1 | Date | |
|---|---|---|---|
| c504942144 | |||
| 4656e4ff64 | |||
| 26c6c1159e |
@ -16,24 +16,27 @@
|
|||||||
},
|
},
|
||||||
"background": {
|
"background": {
|
||||||
"scripts": [
|
"scripts": [
|
||||||
"scripts/background.js",
|
"scripts/shared/tools.js",
|
||||||
"scripts/result.js",
|
"scripts/shared/common.js",
|
||||||
"scripts/tools.js",
|
"scripts/background/messaging.js",
|
||||||
"scripts/extract.js",
|
"scripts/background/result.js",
|
||||||
"scripts/extractor.js"
|
"scripts/background/signiture.js",
|
||||||
|
"scripts/background/actions.js",
|
||||||
|
"scripts/background/extractor.js",
|
||||||
|
"scripts/background/helpers.js"
|
||||||
],
|
],
|
||||||
"persistent": false
|
"persistent": false
|
||||||
},
|
},
|
||||||
"content_scripts": [{
|
"content_scripts": [{
|
||||||
"matches": ["*://*/*"],
|
"matches": ["*://*/*"],
|
||||||
"js": [
|
"js": [
|
||||||
"scripts/jquery.min.js",
|
"scripts/shared/tools.js",
|
||||||
"scripts/content.js"
|
"scripts/shared/common.js",
|
||||||
|
"scripts/content/content.js"
|
||||||
],
|
],
|
||||||
"run_at": "document_idle"
|
"run_at": "document_idle"
|
||||||
}],
|
}],
|
||||||
"permissions": [
|
"permissions": [
|
||||||
"activeTab",
|
"activeTab"
|
||||||
"storage"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -3,11 +3,9 @@
|
|||||||
<link>
|
<link>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>Data Extractor</title>
|
<title>Data Extractor</title>
|
||||||
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
|
<script charset="UTF-8" type="text/javascript" src="tip.js"></script>
|
||||||
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
|
|
||||||
<script charset="UTF-8" type="text/javascript" src="./tip.js"></script>
|
|
||||||
|
|
||||||
<link rel="stylesheet" href="../styles/bootstrap.min.css">
|
<link rel="stylesheet" href="styles/bootstrap.min.css">
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body style="margin: 20px 10px;">
|
<body style="margin: 20px 10px;">
|
||||||
@ -26,7 +24,8 @@
|
|||||||
and type your scripts in the console.
|
and type your scripts in the console.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<img src="../images/console.png" alt="" style="max-width: 489px; width: 100%; border-radius: 5px">
|
<img src="../images/console.png" alt=""
|
||||||
|
style="max-width: 489px; width: 100%; border-radius: 5px">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
10
popup/tip.js
10
popup/tip.js
@ -1,14 +1,14 @@
|
|||||||
$().ready(
|
window.onload = function () {
|
||||||
() => {
|
document.querySelector('#link-extension-detail')
|
||||||
$("#link-extension-detail").on('click', () => {
|
.addEventListener('click', () => {
|
||||||
chrome.tabs.create({
|
chrome.tabs.create({
|
||||||
'url': `chrome://extensions/?id=${chrome.runtime.id}`
|
'url': `chrome://extensions/?id=${chrome.runtime.id}`
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
$("#link-document").on('click', () => {
|
document.querySelector('#link-document')
|
||||||
|
.addEventListener('click', () => {
|
||||||
chrome.tabs.create({
|
chrome.tabs.create({
|
||||||
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
);
|
|
||||||
40
readme.md
40
readme.md
@ -5,7 +5,7 @@ DataExtracter helps you quickly extract data from any web pages.
|
|||||||
|
|
||||||
All you need to do is:
|
All you need to do is:
|
||||||
|
|
||||||
- Find out the selectors (JQuery selectors) for target data
|
- Find out the selectors for target data
|
||||||
- Type scripts in the console of `extension backgroud page`, as introduced bellow.
|
- Type scripts in the console of `extension backgroud page`, as introduced bellow.
|
||||||
|
|
||||||

|

|
||||||
@ -14,40 +14,40 @@ All you need to do is:
|
|||||||
|
|
||||||
Extract current page
|
Extract current page
|
||||||
```js
|
```js
|
||||||
new Extractor().task(".list-item", ["a.title", "p.content"]).start();
|
$('.item', ['a', 'a@href']);
|
||||||
```
|
```
|
||||||
|
|
||||||
Extract multiple pages (1-10, interval 1)
|
Extract multiple pages (1-10, interval 1)
|
||||||
|
|
||||||
```js
|
```js
|
||||||
new Extractor().task(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=${page}", 1, 10, 1).start();
|
$('.item', ['a', 'a@href'],"http://sample.com/?pn=${page}", 1, 10, 1);
|
||||||
```
|
```
|
||||||
|
|
||||||
Extract multiple urls (list)
|
Extract multiple urls (list)
|
||||||
|
|
||||||
```js
|
```js
|
||||||
new Extractor().task(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"]).start();
|
$('.item', ['a', 'a@href'],["http://sample.com/abc","http://sample.com/xyz"]);
|
||||||
```
|
```
|
||||||
|
|
||||||
Extract specified pages (1,3,5)
|
Extract specified pages (1,3,5)
|
||||||
|
|
||||||
```js
|
```js
|
||||||
new Extractor().task(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", [1, 3, 5]).start();
|
$('.item', ['a', 'a@href'], "http://sample.com/?pn=${page}", [1, 3, 5]);
|
||||||
```
|
```
|
||||||
|
|
||||||
## Extractor.task() Signitures
|
## Task Call Signitures
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
// a task extracting data from current page
|
// extract data from current page
|
||||||
task(itemsSelector:string, fieldSelectors:string[])
|
function (itemsSelector:string, fieldSelectors:string[])
|
||||||
// a task extracting data from a range of pages
|
// extract data from a range of pages
|
||||||
task(itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
|
function (itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
|
||||||
// a task extracting data from a list of pages
|
// extract data from a list of pages
|
||||||
task(itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
|
function (itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
|
||||||
// a task extracting data from a list of pages
|
// extract data from a list of pages
|
||||||
task(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
function (itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||||
// a task extracting data of urls which extracted from last task result
|
// extract data of urls which extracted from last task result
|
||||||
task(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced Usage
|
## Advanced Usage
|
||||||
@ -65,7 +65,7 @@ The only way to stop tasks before its finish, is `Closing the host tab`.
|
|||||||
e.g.: link text and target (use 'selector@attribute')
|
e.g.: link text and target (use 'selector@attribute')
|
||||||
|
|
||||||
```js
|
```js
|
||||||
new Extractor().task('.list-item', ['a.title', 'a.title@href']).start();
|
new Extractor().task('.item', ['a', 'a@href']).start();
|
||||||
```
|
```
|
||||||
|
|
||||||
### Use Task Chain.
|
### Use Task Chain.
|
||||||
@ -74,7 +74,7 @@ e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each lin
|
|||||||
|
|
||||||
```js
|
```js
|
||||||
new Extractor()
|
new Extractor()
|
||||||
.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
|
.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
.task('list-item', ["a.title", "p.content"])
|
.task('list-item', ["a.title", "p.content"])
|
||||||
.start();
|
.start();
|
||||||
```
|
```
|
||||||
@ -85,7 +85,7 @@ To a multiple task (chain) Extractor `e`:
|
|||||||
|
|
||||||
```js
|
```js
|
||||||
e = new Extractor()
|
e = new Extractor()
|
||||||
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
|
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
.task('list-item', ["a.title", "p.content"])
|
.task('list-item', ["a.title", "p.content"])
|
||||||
.start();
|
.start();
|
||||||
```
|
```
|
||||||
@ -114,7 +114,7 @@ Here we have 2 tasks:
|
|||||||
|
|
||||||
```js
|
```js
|
||||||
e = new Extractor()
|
e = new Extractor()
|
||||||
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
|
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
.task('list-item', ["a.title", "p.content"])
|
.task('list-item', ["a.title", "p.content"])
|
||||||
.start();
|
.start();
|
||||||
```
|
```
|
||||||
|
|||||||
@ -1,14 +0,0 @@
|
|||||||
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
|
||||||
if (message.from === "DataExtracter:Extract") {
|
|
||||||
if (!testArgs(...message.args)) {
|
|
||||||
sendResponse(signitures);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
extract(...message.args).catch(
|
|
||||||
err => {
|
|
||||||
console.log(err);
|
|
||||||
alert(err);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
@ -1,18 +1,3 @@
|
|||||||
/**
|
|
||||||
* Extract data from current tab / multiple urls.
|
|
||||||
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
|
||||||
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
|
||||||
* @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
|
|
||||||
*/
|
|
||||||
async function extract(itemsSelector, fieldSelectors, ...args) {
|
|
||||||
let result = await getData(itemsSelector, fieldSelectors, ...args);
|
|
||||||
if (confirm(
|
|
||||||
`Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
|
|
||||||
)) {
|
|
||||||
saveFile(result, "text/csv");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract data from current page / multiple urls.
|
* Extract data from current page / multiple urls.
|
||||||
* getData(tab, itemsSelector:string, fieldSelectors:string[])
|
* getData(tab, itemsSelector:string, fieldSelectors:string[])
|
||||||
@ -43,7 +28,7 @@ async function getData(...args) {
|
|||||||
if (urls.length) {
|
if (urls.length) {
|
||||||
pms = urls.reduce((p, url) => p.then(
|
pms = urls.reduce((p, url) => p.then(
|
||||||
results => {
|
results => {
|
||||||
data.push(...results);
|
if (results) data.push(...results);
|
||||||
return redirectTab(tab, url).then(
|
return redirectTab(tab, url).then(
|
||||||
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
() => extractTabData(tab, itemsSelector, fieldSelectors)
|
||||||
);
|
);
|
||||||
@ -55,7 +40,8 @@ async function getData(...args) {
|
|||||||
}
|
}
|
||||||
pms.then(
|
pms.then(
|
||||||
results => {
|
results => {
|
||||||
data.push(...results);
|
if (results) data.push(...results);
|
||||||
|
data.unshift(fieldSelectors);
|
||||||
resolve(new ExtractResult(data));
|
resolve(new ExtractResult(data));
|
||||||
},
|
},
|
||||||
err => reject(err)
|
err => reject(err)
|
||||||
@ -97,7 +83,7 @@ function redirectTab(tab, url) {
|
|||||||
if (url !== u) {
|
if (url !== u) {
|
||||||
curUrl = u;
|
curUrl = u;
|
||||||
let req = {
|
let req = {
|
||||||
from: "GotoUrl",
|
action: ACTION_GOTO_URL,
|
||||||
url: url
|
url: url
|
||||||
}
|
}
|
||||||
sendMessage(tab, req, `Goto url: ${url}`);
|
sendMessage(tab, req, `Goto url: ${url}`);
|
||||||
@ -115,11 +101,11 @@ function redirectTab(tab, url) {
|
|||||||
*/
|
*/
|
||||||
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
||||||
let req = {
|
let req = {
|
||||||
from: "Extract",
|
action: ACTION_EXTRACT,
|
||||||
itemsSelector: itemsSelector,
|
itemsSelector: itemsSelector,
|
||||||
fieldSelectors: fieldSelectors
|
fieldSelectors: fieldSelectors
|
||||||
}
|
}
|
||||||
let cond = r => r && r.length;
|
let cond = r => !MSG_ELEMENT_NOT_FOUND.isEqual(r);
|
||||||
return sendMessage(tab, req, 'Extract data from the tab...', cond);
|
return sendMessage(tab, req, 'Extract data from the tab...', cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,9 +116,9 @@ function extractTabData(tab, itemsSelector, fieldSelectors) {
|
|||||||
*/
|
*/
|
||||||
function reportIn(tab) {
|
function reportIn(tab) {
|
||||||
let req = {
|
let req = {
|
||||||
from: "ReportIn"
|
action: ACTION_REPORT_IN
|
||||||
}
|
}
|
||||||
let cond = r => r == req.from;
|
let cond = r => r == req.action;
|
||||||
return sendMessage(tab, req, 'Check tab availability...', cond);
|
return sendMessage(tab, req, 'Check tab availability...', cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,51 +130,12 @@ function reportIn(tab) {
|
|||||||
*/
|
*/
|
||||||
function queryUrl(tab, urlExcluded, log) {
|
function queryUrl(tab, urlExcluded, log) {
|
||||||
let req = {
|
let req = {
|
||||||
from: "QueryUrl"
|
action: ACTION_QUERY_URL
|
||||||
}
|
}
|
||||||
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
|
let cond = url => url && (!urlExcluded || (urlExcluded && urlExcluded != url));
|
||||||
return sendMessage(tab, req, log, cond);
|
return sendMessage(tab, req, log, cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Repeatedly sending a message to target tab until the response is detected good.
|
|
||||||
* @param {object} tab the table where to send the message
|
|
||||||
* @param {object} req the request data.
|
|
||||||
* @param {function} cond success condition function, r:any=>boolean
|
|
||||||
* @param {number} interval interval for detecting
|
|
||||||
* @param {string} log messages logged to console.
|
|
||||||
* @return {Promise} a promise of the response.
|
|
||||||
*/
|
|
||||||
function sendMessage(tab, req, log, cond, interval) {
|
|
||||||
req.from = "DataExtracter:" + req.from;
|
|
||||||
interval = interval || 500;
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
|
|
||||||
loop();
|
|
||||||
|
|
||||||
async function loop() {
|
|
||||||
// console.log("request for", req.from);
|
|
||||||
let tabAvailable = await getTabByID(tab.id);
|
|
||||||
if (!tabAvailable) {
|
|
||||||
reject("Task interrupted due to the target tab is closed.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
|
||||||
let flag = !cond || cond(r);
|
|
||||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
|
||||||
if (flag) {
|
|
||||||
resolve(r);
|
|
||||||
} else {
|
|
||||||
setTimeout(() => {
|
|
||||||
loop();
|
|
||||||
}, interval);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function createTab(url, active) {
|
async function createTab(url, active) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.tabs.create({
|
chrome.tabs.create({
|
||||||
@ -14,9 +14,8 @@ class Extractor {
|
|||||||
*/
|
*/
|
||||||
task(...args) {
|
task(...args) {
|
||||||
if (!testArgs(...args)) {
|
if (!testArgs(...args)) {
|
||||||
console.log(`Invalid call arguments.\n\n${argsToString(...args)}\n${signitures}\n`);
|
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
|
||||||
// break call chain to avoid unexpected task running
|
return this;
|
||||||
return undefined;
|
|
||||||
}
|
}
|
||||||
// given >2 arguments means the task specifies target page,
|
// given >2 arguments means the task specifies target page,
|
||||||
// so it won't accept last task result as url list.
|
// so it won't accept last task result as url list.
|
||||||
@ -31,6 +30,7 @@ class Extractor {
|
|||||||
clear() {
|
clear() {
|
||||||
this._tasks = [];
|
this._tasks = [];
|
||||||
this._results = [];
|
this._results = [];
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Start the task chain.
|
* Start the task chain.
|
||||||
@ -64,6 +64,7 @@ class Extractor {
|
|||||||
result => {
|
result => {
|
||||||
this._results[this._tasks[this._tasks.length - 1]] = result;
|
this._results[this._tasks[this._tasks.length - 1]] = result;
|
||||||
this._running = false;
|
this._running = false;
|
||||||
|
console.log("Tasks are all done.")
|
||||||
this.save();
|
this.save();
|
||||||
}
|
}
|
||||||
).catch(err => {
|
).catch(err => {
|
||||||
@ -119,12 +120,19 @@ class Extractor {
|
|||||||
if (!taskid) return;
|
if (!taskid) return;
|
||||||
const result = this._results[this._tasks[taskid - 1]];
|
const result = this._results[this._tasks[taskid - 1]];
|
||||||
if (!result) {
|
if (!result) {
|
||||||
console.log(`No task result for id (${taskid}). Forget to call ".start()"?`);
|
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (confirm(
|
if (result.data.length <= 1) { // 1 for selector headers
|
||||||
`Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
|
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
||||||
)) {
|
return;
|
||||||
|
}
|
||||||
|
let msg = `
|
||||||
|
Please confirm to download (${result.data.length - 1} items):
|
||||||
|
|
||||||
|
${result.toString(50) || "- Empty -"}
|
||||||
|
`.trim();
|
||||||
|
if (confirm(msg)) {
|
||||||
saveFile(result, "text/csv");
|
saveFile(result, "text/csv");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
3
scripts/background/helpers.js
Normal file
3
scripts/background/helpers.js
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
function $(...args) {
|
||||||
|
return new Extractor().task(...args).start();
|
||||||
|
}
|
||||||
50
scripts/background/messaging.js
Normal file
50
scripts/background/messaging.js
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
|
||||||
|
/**
|
||||||
|
* Repeatedly sending a message to target tab until the response is detected good.
|
||||||
|
* @param {object} tab the table where to send the message
|
||||||
|
* @param {object} req the request data.
|
||||||
|
* @param {function} cond success condition function, r:any=>boolean
|
||||||
|
* @param {number} interval interval for detecting
|
||||||
|
* @param {string} log messages logged to console.
|
||||||
|
* @return {Promise} a promise of the response.
|
||||||
|
*/
|
||||||
|
function sendMessage(tab, req, log, cond, interval) {
|
||||||
|
interval = interval || 500;
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
|
||||||
|
loop();
|
||||||
|
|
||||||
|
async function loop() {
|
||||||
|
// console.log("request for", req.action);
|
||||||
|
let tabAvailable = await getTabByID(tab.id);
|
||||||
|
if (!tabAvailable) {
|
||||||
|
reject("Task interrupted due to the target tab is closed.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||||
|
if (chrome.runtime.lastError) {
|
||||||
|
reject(chrome.runtime.lastError.message);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let flag = !cond || cond(r);
|
||||||
|
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||||
|
if (flag) {
|
||||||
|
resolve(r);
|
||||||
|
} else {
|
||||||
|
setTimeout(() => {
|
||||||
|
loop();
|
||||||
|
}, interval);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
||||||
|
if (!message.action || !message.action.startsWith(EXT_NAME)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sendResponse("Calling from user pages is not allowed.");
|
||||||
|
return;
|
||||||
|
});
|
||||||
@ -1,53 +1,26 @@
|
|||||||
const signitures = `
|
const signitures = `
|
||||||
## Usage
|
## Usage
|
||||||
new Extractor().task(...args).task(...args).start();
|
// single task
|
||||||
|
$(...args);
|
||||||
|
// managed task chains
|
||||||
|
e = new Extractor();
|
||||||
|
e.task(...args).task(...args).start();
|
||||||
|
|
||||||
## Extractor.task() Signitures:
|
## Task Call Signitures:
|
||||||
----------------------------
|
function(itemsSelector:string, fieldSelectors:string[]);
|
||||||
|
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number);
|
||||||
|
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]);
|
||||||
|
function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
|
||||||
|
|
||||||
task(itemsSelector:string, fieldSelectors:string[])
|
## Example:
|
||||||
task(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
// extract all links text & url under '.item' elements
|
||||||
task(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
// use 'selector@attr' to get attribute of the field elements
|
||||||
task(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
$(".item", ["a", "a@href"]);
|
||||||
task(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
|
||||||
|
|
||||||
## See Detailed Help:
|
## See Detailed Help:
|
||||||
https://git.jebbs.co/jebbs/data-extracter-extesion
|
https://git.jebbs.co/jebbs/data-extracter-extesion
|
||||||
`.trim();
|
`.trim();
|
||||||
|
|
||||||
|
|
||||||
function saveFile(data, mimeType, fileName) {
|
|
||||||
fileName = fileName || document.title || "result";
|
|
||||||
var blob;
|
|
||||||
if (typeof window.Blob == "function") {
|
|
||||||
blob = new Blob([data], {
|
|
||||||
type: mimeType
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
var BlobBuiler = window.BlobBuilder || window.MozBlobBuilder || window.WebKitBlobBuilder || window.MSBlobBuilder;
|
|
||||||
var builer = new BlobBuiler();
|
|
||||||
builer.append(data);
|
|
||||||
blob = builer.getBlob(mimeType)
|
|
||||||
}
|
|
||||||
var URL = window.URL || window.webkitURL;
|
|
||||||
var url = URL.createObjectURL(blob);
|
|
||||||
var link = document.createElement("a");
|
|
||||||
if ('download' in link) {
|
|
||||||
link.style.visibility = "hidden";
|
|
||||||
link.href = url;
|
|
||||||
link.download = fileName;
|
|
||||||
document.body.appendChild(link);
|
|
||||||
var j = document.createEvent("MouseEvents");
|
|
||||||
j.initEvent("click", true, true);
|
|
||||||
link.dispatchEvent(j);
|
|
||||||
document.body.removeChild(link)
|
|
||||||
} else if (navigator.msSaveBlob) {
|
|
||||||
navigator.msSaveBlob(blob, fileName)
|
|
||||||
} else {
|
|
||||||
location.href = url
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function testArgs(...args) {
|
function testArgs(...args) {
|
||||||
switch (args.length) {
|
switch (args.length) {
|
||||||
case 0, 1:
|
case 0, 1:
|
||||||
@ -1,58 +0,0 @@
|
|||||||
function extract(...args) {
|
|
||||||
let message = {
|
|
||||||
from: "DataExtracter:Extract",
|
|
||||||
args: args
|
|
||||||
}
|
|
||||||
chrome.runtime.sendMessage(message, r => {
|
|
||||||
if (r) console.log(r);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
chrome.runtime.onMessage.addListener(
|
|
||||||
function (request, sender, sendResponse) {
|
|
||||||
if (!request.from) return;
|
|
||||||
let [ext, act] = request.from.split(":");
|
|
||||||
if (ext.toLowerCase() !== 'dataextracter') return;
|
|
||||||
// console.log(request);
|
|
||||||
switch (act.toLowerCase()) {
|
|
||||||
case "extract":
|
|
||||||
let data = extractTabData(request.itemsSelector, request.fieldSelectors);
|
|
||||||
if (sendResponse) sendResponse(data);
|
|
||||||
break;
|
|
||||||
case "gotourl":
|
|
||||||
window.location.replace(request.url);
|
|
||||||
if (sendResponse) sendResponse(request.url);
|
|
||||||
break;
|
|
||||||
case "reportin":
|
|
||||||
if (sendResponse) sendResponse(request.from);
|
|
||||||
break;
|
|
||||||
case "queryurl":
|
|
||||||
if (sendResponse) sendResponse(window.location.href);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
function extractTabData(itemsSelector, fieldSelectors) {
|
|
||||||
let fieldNotFound = false;
|
|
||||||
let results = $(itemsSelector).toArray().map(
|
|
||||||
item => {
|
|
||||||
return fieldSelectors.map(
|
|
||||||
selector => {
|
|
||||||
let [cls, attr] = selector.split('@').slice(0, 2);
|
|
||||||
// TODO: close tab to cancel task tip
|
|
||||||
if (fieldNotFound) return;
|
|
||||||
let fieldVals = $(item).find(cls).toArray();
|
|
||||||
if (!fieldVals.length) {
|
|
||||||
fieldNotFound = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
return fieldVals.map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
);
|
|
||||||
return fieldNotFound ? [] : results
|
|
||||||
}
|
|
||||||
53
scripts/content/content.js
Normal file
53
scripts/content/content.js
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
chrome.runtime.onMessage.addListener(
|
||||||
|
function (request, sender, sendResponse) {
|
||||||
|
if (!request.action) return;
|
||||||
|
// console.log("Recieved request:",request);
|
||||||
|
switch (request.action) {
|
||||||
|
case ACTION_EXTRACT:
|
||||||
|
let data = extract(request.itemsSelector, request.fieldSelectors);
|
||||||
|
if (sendResponse) sendResponse(data);
|
||||||
|
break;
|
||||||
|
case ACTION_GOTO_URL:
|
||||||
|
window.location.replace(request.url);
|
||||||
|
if (sendResponse) sendResponse(request.url);
|
||||||
|
break;
|
||||||
|
case ACTION_REPORT_IN:
|
||||||
|
if (sendResponse) sendResponse(request.action);
|
||||||
|
break;
|
||||||
|
case ACTION_QUERY_URL:
|
||||||
|
if (sendResponse) sendResponse(window.location.href);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
function extract(itemsSelector, fieldSelectors) {
|
||||||
|
// since some elements may be loaded asynchronously.
|
||||||
|
// if one field is never found, we should return undefined,
|
||||||
|
// so that senders can detect to retry until elements loaded.
|
||||||
|
// If user writes wrong selectors, the task retries infinitely.
|
||||||
|
let fieldFound = {};
|
||||||
|
let items = Array.from(document.querySelectorAll(itemsSelector));
|
||||||
|
// items may not loaded yet, tell the sender to retry.
|
||||||
|
if (!items.length) return MSG_ELEMENT_NOT_FOUND;
|
||||||
|
let results = items.map(
|
||||||
|
item => {
|
||||||
|
return fieldSelectors.map(
|
||||||
|
selector => {
|
||||||
|
let [cls, attr] = selector.split('@').slice(0, 2);
|
||||||
|
let fieldVals = Array.from(item.querySelectorAll(cls));
|
||||||
|
if (!fieldVals.length) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fieldFound[selector] = true;
|
||||||
|
return fieldVals.map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
);
|
||||||
|
// if it exists a field, which is not found in any row, the sender should retry.
|
||||||
|
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
||||||
|
return shouldWait ? MSG_ELEMENT_NOT_FOUND : results
|
||||||
|
}
|
||||||
4
scripts/jquery.min.js
vendored
4
scripts/jquery.min.js
vendored
File diff suppressed because one or more lines are too long
8
scripts/shared/common.js
Normal file
8
scripts/shared/common.js
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
const EXT_NAME = "DataExtracter";
|
||||||
|
|
||||||
|
const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
||||||
|
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
||||||
|
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
||||||
|
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
||||||
|
|
||||||
|
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
||||||
42
scripts/shared/tools.js
Normal file
42
scripts/shared/tools.js
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
class ConstMessage {
|
||||||
|
constructor(id, message) {
|
||||||
|
this.id = id;
|
||||||
|
this.message = message;
|
||||||
|
}
|
||||||
|
isEqual(err) {
|
||||||
|
if (!err || !err.id) return false;
|
||||||
|
return this.id == err.id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveFile(data, mimeType, fileName) {
|
||||||
|
fileName = fileName || document.title || "result";
|
||||||
|
var blob;
|
||||||
|
if (typeof window.Blob == "function") {
|
||||||
|
blob = new Blob([data], {
|
||||||
|
type: mimeType
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
var BlobBuiler = window.BlobBuilder || window.MozBlobBuilder || window.WebKitBlobBuilder || window.MSBlobBuilder;
|
||||||
|
var builer = new BlobBuiler();
|
||||||
|
builer.append(data);
|
||||||
|
blob = builer.getBlob(mimeType)
|
||||||
|
}
|
||||||
|
var URL = window.URL || window.webkitURL;
|
||||||
|
var url = URL.createObjectURL(blob);
|
||||||
|
var link = document.createElement("a");
|
||||||
|
if ('download' in link) {
|
||||||
|
link.style.visibility = "hidden";
|
||||||
|
link.href = url;
|
||||||
|
link.download = fileName;
|
||||||
|
document.body.appendChild(link);
|
||||||
|
var j = document.createEvent("MouseEvents");
|
||||||
|
j.initEvent("click", true, true);
|
||||||
|
link.dispatchEvent(j);
|
||||||
|
document.body.removeChild(link)
|
||||||
|
} else if (navigator.msSaveBlob) {
|
||||||
|
navigator.msSaveBlob(blob, fileName)
|
||||||
|
} else {
|
||||||
|
location.href = url
|
||||||
|
}
|
||||||
|
}
|
||||||
7
styles/bootstrap.min.js
vendored
7
styles/bootstrap.min.js
vendored
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user