helper function $

This commit is contained in:
2020-01-10 13:22:37 +08:00
parent 26c6c1159e
commit 4656e4ff64
5 changed files with 48 additions and 32 deletions

View File

@ -21,7 +21,8 @@
"scripts/background/result.js", "scripts/background/result.js",
"scripts/background/signiture.js", "scripts/background/signiture.js",
"scripts/background/actions.js", "scripts/background/actions.js",
"scripts/background/extractor.js" "scripts/background/extractor.js",
"scripts/background/helpers.js"
], ],
"persistent": false "persistent": false
}, },

View File

@ -5,7 +5,7 @@ DataExtracter helps you quickly extract data from any web pages.
All you need to do is: All you need to do is:
- Find out the selectors (JQuery selectors) for target data - Find out the selectors for target data
- Type scripts in the console of `extension backgroud page`, as introduced bellow. - Type scripts in the console of `extension backgroud page`, as introduced bellow.
![](images/console.png) ![](images/console.png)
@ -14,40 +14,40 @@ All you need to do is:
Extract current page Extract current page
```js ```js
new Extractor().task(".list-item", ["a.title", "p.content"]).start(); $('.item', ['a', 'a@href']);
``` ```
Extract multiple pages (1-10, interval 1) Extract multiple pages (1-10, interval 1)
```js ```js
new Extractor().task(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=${page}", 1, 10, 1).start(); $('.item', ['a', 'a@href'],"http://sample.com/?pn=${page}", 1, 10, 1);
``` ```
Extract multiple urls (list) Extract multiple urls (list)
```js ```js
new Extractor().task(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"]).start(); $('.item', ['a', 'a@href'],["http://sample.com/abc","http://sample.com/xyz"]);
``` ```
Extract specified pages (1,3,5) Extract specified pages (1,3,5)
```js ```js
new Extractor().task(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", [1, 3, 5]).start(); $('.item', ['a', 'a@href'], "http://sample.com/?pn=${page}", [1, 3, 5]);
``` ```
## Extractor.task() Signitures ## Task Call Signitures
```ts ```ts
// a task extracting data from current page // extract data from current page
task(itemsSelector:string, fieldSelectors:string[]) function (itemsSelector:string, fieldSelectors:string[])
// a task extracting data from a range of pages // extract data from a range of pages
task(itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number) function (itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number)
// a task extracting data from a list of pages // extract data from a list of pages
task(itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[]) function (itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[])
// a task extracting data from a list of pages // extract data from a list of pages
task(itemsSelector:string, fieldSelectors:string[], urls:string[]) function (itemsSelector:string, fieldSelectors:string[], urls:string[])
// a task extracting data of urls which extracted from last task result // extract data of urls which extracted from last task result
task(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
``` ```
## Advanced Usage ## Advanced Usage
@ -65,7 +65,7 @@ The only way to stop tasks before its finish, is `Closing the host tab`.
e.g.: link text and target (use 'selector@attribute') e.g.: link text and target (use 'selector@attribute')
```js ```js
new Extractor().task('.list-item', ['a.title', 'a.title@href']).start(); new Extractor().task('.item', ['a', 'a@href']).start();
``` ```
### Use Task Chain. ### Use Task Chain.
@ -74,7 +74,7 @@ e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each lin
```js ```js
new Extractor() new Extractor()
.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"]) .task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"]) .task('list-item', ["a.title", "p.content"])
.start(); .start();
``` ```
@ -85,7 +85,7 @@ To a multiple task (chain) Extractor `e`:
```js ```js
e = new Extractor() e = new Extractor()
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"]) e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"]) .task('list-item', ["a.title", "p.content"])
.start(); .start();
``` ```
@ -114,7 +114,7 @@ Here we have 2 tasks:
```js ```js
e = new Extractor() e = new Extractor()
e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"]) e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"]) .task('list-item', ["a.title", "p.content"])
.start(); .start();
``` ```

View File

@ -15,7 +15,6 @@ class Extractor {
task(...args) { task(...args) {
if (!testArgs(...args)) { if (!testArgs(...args)) {
console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`); console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`);
// break call chain to avoid unexpected task running
return this; return this;
} }
// given >2 arguments means the task specifies target page, // given >2 arguments means the task specifies target page,
@ -31,6 +30,7 @@ class Extractor {
clear() { clear() {
this._tasks = []; this._tasks = [];
this._results = []; this._results = [];
return this;
} }
/** /**
* Start the task chain. * Start the task chain.
@ -64,6 +64,7 @@ class Extractor {
result => { result => {
this._results[this._tasks[this._tasks.length - 1]] = result; this._results[this._tasks[this._tasks.length - 1]] = result;
this._running = false; this._running = false;
console.log("Tasks are all done.")
this.save(); this.save();
} }
).catch(err => { ).catch(err => {
@ -122,9 +123,16 @@ class Extractor {
console.log(`No result for task #${taskid}. Forget to call ".start()"?`); console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
return; return;
} }
if (confirm( if (result.data.length <= 1) { // 1 for selector headers
`Click confirm to download if the sample data looks good (${result.data.length} items)\n\n${result.toString(50) || "- Empty -"}` console.log(`No result for task #${taskid}. Forget to call ".start()"?`);
)) { return;
}
let msg = `
Please confirm to download (${result.data.length - 1} items)
${result.toString(50) || "- Empty -"}
`.trim();
if (confirm(msg)) {
saveFile(result, "text/csv"); saveFile(result, "text/csv");
} }
} }

View File

@ -0,0 +1,3 @@
function $(...args) {
return new Extractor().task(...args).start();
}

View File

@ -1,17 +1,21 @@
const signitures = ` const signitures = `
## Usage ## Usage
new Extractor().task(...args).task(...args).start(); // single task
$(...args);
// managed task chains
e = new Extractor();
e.task(...args).task(...args).start();
## Extractor.task() Signitures: ## Task Call Signitures:
function(itemsSelector:string, fieldSelectors:string[]) function(itemsSelector:string, fieldSelectors:string[]);
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number);
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]) function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]);
function(itemsSelector:string, fieldSelectors:string[], urls:string[]) function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
## Example: ## Example:
// extract all links text & url under '.item' elements // extract all links text & url under '.item' elements
// use 'selector@attr' to get attribute of the field elements // use 'selector@attr' to get attribute of the field elements
new Extractor().task(".item", ["a", "a@href"]).start(); $(".item", ["a", "a@href"]);
## See Detailed Help: ## See Detailed Help:
https://git.jebbs.co/jebbs/data-extracter-extesion https://git.jebbs.co/jebbs/data-extracter-extesion