helper function $
This commit is contained in:
@ -21,7 +21,8 @@
|
||||
"scripts/background/result.js",
|
||||
"scripts/background/signiture.js",
|
||||
"scripts/background/actions.js",
|
||||
"scripts/background/extractor.js"
|
||||
"scripts/background/extractor.js",
|
||||
"scripts/background/helpers.js"
|
||||
],
|
||||
"persistent": false
|
||||
},
|
||||
|
||||
40
readme.md
40
readme.md
@ -5,7 +5,7 @@ DataExtracter helps you quickly extract data from any web pages.
|
||||
|
||||
All you need to do is:
|
||||
|
||||
- Find out the selectors (JQuery selectors) for target data
|
||||
- Find out the selectors for target data
|
||||
- Type scripts in the console of `extension backgroud page`, as introduced bellow.
|
||||
|
||||

|
||||
@ -14,40 +14,40 @@ All you need to do is:
|
||||
|
||||
Extract current page
|
||||
```js
|
||||
new Extractor().task(".list-item", ["a.title", "p.content"]).start();
|
||||
$('.item', ['a', 'a@href']);
|
||||
```
|
||||
|
||||
Extract multiple pages (1-10, interval 1)
|
||||
|
||||
```js
|
||||
new Extractor().task(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=${page}", 1, 10, 1).start();
|
||||
$('.item', ['a', 'a@href'],"http://sample.com/?pn=${page}", 1, 10, 1);
|
||||
```
|
||||
|
||||
Extract multiple urls (list)
|
||||
|
||||
```js
|
||||
new Extractor().task(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"]).start();
|
||||
$('.item', ['a', 'a@href'],["http://sample.com/abc","http://sample.com/xyz"]);
|
||||
```
|
||||
|
||||
Extract specified pages (1,3,5)
|
||||
|
||||
```js
|
||||
new Extractor().task(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", [1, 3, 5]).start();
|
||||
$('.item', ['a', 'a@href'], "http://sample.com/?pn=${page}", [1, 3, 5]);
|
||||
```
|
||||
|
||||
## Extractor.task() Signitures
|
||||
## Task Call Signitures
|
||||
|
||||
```ts
|
||||
// a task extracting data from current page
|
||||
task(itemsSelector:string, fieldSelectors:string[])
|
||||
// a task extracting data from a range of pages
|
||||
task(itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
|
||||
// a task extracting data from a list of pages
|
||||
task(itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
|
||||
// a task extracting data from a list of pages
|
||||
task(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
// a task extracting data of urls which extracted from last task result
|
||||
task(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||
// extract data from current page
|
||||
function (itemsSelector:string, fieldSelectors:string[])
|
||||
// extract data from a range of pages
|
||||
function (itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
|
||||
// extract data from a list of pages
|
||||
function (itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
|
||||
// extract data from a list of pages
|
||||
function (itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
// extract data of urls which extracted from last task result
|
||||
function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
||||
```
|
||||
|
||||
## Advanced Usage
|
||||
@ -65,7 +65,7 @@ The only way to stop tasks before its finish, is `Closing the host tab`.
|
||||
e.g.: link text and target (use 'selector@attribute')
|
||||
|
||||
```js
|
||||
new Extractor().task('.list-item', ['a.title', 'a.title@href']).start();
|
||||
new Extractor().task('.item', ['a', 'a@href']).start();
|
||||
```
|
||||
|
||||
### Use Task Chain.
|
||||
@ -74,7 +74,7 @@ e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each lin
|
||||
|
||||
```js
|
||||
new Extractor()
|
||||
.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
|
||||
.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.task('list-item', ["a.title", "p.content"])
|
||||
.start();
|
||||
```
|
||||
@ -85,7 +85,7 @@ To a multiple task (chain) Extractor `e`:
|
||||
|
||||
```js
|
||||
e = new Extractor()
|
||||
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
|
||||
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.task('list-item', ["a.title", "p.content"])
|
||||
.start();
|
||||
```
|
||||
@ -114,7 +114,7 @@ Here we have 2 tasks:
|
||||
|
||||
```js
|
||||
e = new Extractor()
|
||||
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
|
||||
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.task('list-item', ["a.title", "p.content"])
|
||||
.start();
|
||||
```
|
||||
|
||||
@ -15,7 +15,6 @@ class Extractor {
|
||||
task(...args) {
|
||||
if (!testArgs(...args)) {
|
||||
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
|
||||
// break call chain to avoid unexpected task running
|
||||
return this;
|
||||
}
|
||||
// given >2 arguments means the task specifies target page,
|
||||
@ -31,6 +30,7 @@ class Extractor {
|
||||
clear() {
|
||||
this._tasks = [];
|
||||
this._results = [];
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Start the task chain.
|
||||
@ -64,6 +64,7 @@ class Extractor {
|
||||
result => {
|
||||
this._results[this._tasks[this._tasks.length - 1]] = result;
|
||||
this._running = false;
|
||||
console.log("Tasks are all done.")
|
||||
this.save();
|
||||
}
|
||||
).catch(err => {
|
||||
@ -122,9 +123,16 @@ class Extractor {
|
||||
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
||||
return;
|
||||
}
|
||||
if (confirm(
|
||||
`Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}`
|
||||
)) {
|
||||
if (result.data.length <= 1) { // 1 for selector headers
|
||||
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
|
||||
return;
|
||||
}
|
||||
let msg = `
|
||||
Please confirm to download (${result.data.length - 1} items):
|
||||
|
||||
${result.toString(50) || "- Empty -"}
|
||||
`.trim();
|
||||
if (confirm(msg)) {
|
||||
saveFile(result, "text/csv");
|
||||
}
|
||||
}
|
||||
|
||||
3
scripts/background/helpers.js
Normal file
3
scripts/background/helpers.js
Normal file
@ -0,0 +1,3 @@
|
||||
function $(...args) {
|
||||
return new Extractor().task(...args).start();
|
||||
}
|
||||
@ -1,17 +1,21 @@
|
||||
const signitures = `
|
||||
## Usage
|
||||
new Extractor().task(...args).task(...args).start();
|
||||
// single task
|
||||
$(...args);
|
||||
// managed task chains
|
||||
e = new Extractor();
|
||||
e.task(...args).task(...args).start();
|
||||
|
||||
## Extractor.task() Signitures:
|
||||
function(itemsSelector:string, fieldSelectors:string[])
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])
|
||||
function(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
## Task Call Signitures:
|
||||
function(itemsSelector:string, fieldSelectors:string[]);
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number);
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]);
|
||||
function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
|
||||
|
||||
## Example:
|
||||
// extract all links text & url under '.item' elements
|
||||
// use 'selector@attr' to get attribute of the field elements
|
||||
new Extractor().task(".item", ["a", "a@href"]).start();
|
||||
$(".item", ["a", "a@href"]);
|
||||
|
||||
## See Detailed Help:
|
||||
https://git.jebbs.co/jebbs/data-extracter-extesion
|
||||
|
||||
Reference in New Issue
Block a user