helper function $

This commit is contained in:
2020-01-10 13:22:37 +08:00
parent 26c6c1159e
commit 4656e4ff64
5 changed files with 48 additions and 32 deletions

View File

@ -21,7 +21,8 @@
"scripts/background/result.js",
"scripts/background/signiture.js",
"scripts/background/actions.js",
"scripts/background/extractor.js"
"scripts/background/extractor.js",
"scripts/background/helpers.js"
],
"persistent": false
},

View File

@ -5,7 +5,7 @@ DataExtracter helps you quickly extract data from any web pages.
All you need to do is:
- Find out the selectors (JQuery selectors) for target data
- Find out the selectors for target data
- Type scripts in the console of `extension backgroud page`, as introduced bellow.
![](images/console.png)
@ -14,40 +14,40 @@ All you need to do is:
Extract current page
```js
new Extractor().task(".list-item", ["a.title", "p.content"]).start();
$('.item', ['a', 'a@href']);
```
Extract multiple pages (1-10, interval 1)
```js
new Extractor().task(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=${page}", 1, 10, 1).start();
$('.item', ['a', 'a@href'],"http://sample.com/?pn=${page}", 1, 10, 1);
```
Extract multiple urls (list)
```js
new Extractor().task(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"]).start();
$('.item', ['a', 'a@href'],["http://sample.com/abc","http://sample.com/xyz"]);
```
Extract specified pages (1,3,5)
```js
new Extractor().task(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", [1, 3, 5]).start();
$('.item', ['a', 'a@href'], "http://sample.com/?pn=${page}", [1, 3, 5]);
```
## Extractor.task() Signitures
## Task Call Signitures
```ts
// a task extracting data from current page
task(itemsSelector:string, fieldSelectors:string[])
// a task extracting data from a range of pages
task(itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
// a task extracting data from a list of pages
task(itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
// a task extracting data from a list of pages
task(itemsSelector:string, fieldSelectors:string[], urls:string[])
// a task extracting data of urls which extracted from last task result
task(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
// extract data from current page
function (itemsSelector:string, fieldSelectors:string[])
// extract data from a range of pages
function (itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
// extract data from a list of pages
function (itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
// extract data from a list of pages
function (itemsSelector:string, fieldSelectors:string[], urls:string[])
// extract data of urls which extracted from last task result
function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
```
## Advanced Usage
@ -65,7 +65,7 @@ The only way to stop tasks before its finish, is `Closing the host tab`.
e.g.: link text and target (use 'selector@attribute')
```js
new Extractor().task('.list-item', ['a.title', 'a.title@href']).start();
new Extractor().task('.item', ['a', 'a@href']).start();
```
### Use Task Chain.
@ -74,7 +74,7 @@ e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each lin
```js
new Extractor()
.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"])
.start();
```
@ -85,7 +85,7 @@ To a multiple task (chain) Extractor `e`:
```js
e = new Extractor()
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"])
.start();
```
@ -114,7 +114,7 @@ Here we have 2 tasks:
```js
e = new Extractor()
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"])
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"])
.start();
```

View File

@ -15,7 +15,6 @@ class Extractor {
task(...args) {
if (!testArgs(...args)) {
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
// break call chain to avoid unexpected task running
return this;
}
// given >2 arguments means the task specifies target page,
@ -31,6 +30,7 @@ class Extractor {
clear() {
this._tasks = [];
this._results = [];
return this;
}
/**
* Start the task chain.
@ -64,6 +64,7 @@ class Extractor {
result => {
this._results[this._tasks[this._tasks.length - 1]] = result;
this._running = false;
console.log("Tasks are all done.")
this.save();
}
).catch(err => {
@ -122,9 +123,16 @@ class Extractor {
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
return;
}
if (confirm(
`Click confirm to download if the sample data looks good (${result.data.length} items)\n\n${result.toString(50) || "- Empty -"}`
)) {
if (result.data.length <= 1) { // 1 for selector headers
console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
return;
}
let msg = `
Please confirm to download (${result.data.length - 1} items)
${result.toString(50) || "- Empty -"}
`.trim();
if (confirm(msg)) {
saveFile(result, "text/csv");
}
}

View File

@ -0,0 +1,3 @@
function $(...args) {
return new Extractor().task(...args).start();
}

View File

@ -1,17 +1,21 @@
const signitures = `
## Usage
new Extractor().task(...args).task(...args).start();
// single task
$(...args);
// managed task chains
e = new Extractor();
e.task(...args).task(...args).start();
## Extractor.task() Signitures:
function(itemsSelector:string, fieldSelectors:string[])
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])
function(itemsSelector:string, fieldSelectors:string[], urls:string[])
## Task Call Signitures:
function(itemsSelector:string, fieldSelectors:string[]);
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number);
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]);
function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
## Example:
// extract all links text & url under '.item' elements
// use 'selector@attr' to get attribute of the field elements
new Extractor().task(".item", ["a", "a@href"]).start();
$(".item", ["a", "a@href"]);
## See Detailed Help:
https://git.jebbs.co/jebbs/data-extracter-extesion