extract attributes & improve help
This commit is contained in:
@ -35,27 +35,25 @@
|
|||||||
<div class="row">
|
<div class="row">
|
||||||
|
|
||||||
<div class="col">
|
<div class="col">
|
||||||
<h6>Examples</h6>
|
<h6>Quick Start</h6>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col">
|
<div class="col">
|
||||||
<div class="alert alert-success small">
|
<div class="alert alert-success small">
|
||||||
|
<p>
|
||||||
|
<b>View Help</b>:
|
||||||
|
<br>extract()
|
||||||
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<b>Extract current page</b>:
|
<b>Extract current page</b>:
|
||||||
<br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"])
|
<br>extract("list-item", ["a.title", "p.content"])
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<b>Extract pages of sample.com (1-10, interval 1)</b>:
|
<b>Extract multiple pages (1-10, interval 1)</b>:
|
||||||
<br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"],"http://sample.com/?pn=${page}",1,10,1)
|
<br>extract("list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", 1, 10, 1)
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
<p>
|
|
||||||
<b>Extract specified pages (1,3,5)</b>:
|
|
||||||
<br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"],"http://sample.com/?pn=${page}",[1,3,5])
|
|
||||||
|
|
||||||
</p>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -25,17 +25,44 @@
|
|||||||
function extractData(itemsSelector, fieldSelectors) {
|
function extractData(itemsSelector, fieldSelectors) {
|
||||||
return $(itemsSelector).toArray().map(
|
return $(itemsSelector).toArray().map(
|
||||||
item => fieldSelectors.map(
|
item => fieldSelectors.map(
|
||||||
cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n')
|
selector => {
|
||||||
|
let [cls, attr] = selector.split('@').slice(0, 2);
|
||||||
|
return $(item).find(cls).toArray().map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
||||||
|
}
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function extract(...args) {
|
function extract(...args) {
|
||||||
let sig = `Invalid call args.
|
let sig = `
|
||||||
|
# DataExtracter Help
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
## Signitures:
|
||||||
|
----------------------------
|
||||||
|
|
||||||
function extract(itemsSelector:string, fieldSelectors:string[])
|
function extract(itemsSelector:string, fieldSelectors:string[])
|
||||||
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||||
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`;
|
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
|
||||||
|
|
||||||
|
## Examples:
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
### Extract current page
|
||||||
|
extract(".list-item", ["a.title", "p.content"])
|
||||||
|
|
||||||
|
### Extract multiple pages (1-10, interval 1)
|
||||||
|
extract(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=\${page}", 1, 10, 1)
|
||||||
|
|
||||||
|
### Extract specified pages (1,3,5)
|
||||||
|
extract(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=\${page}", [1, 3, 5])
|
||||||
|
|
||||||
|
## Advanced Examples:
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
### Extract link text and target (use 'selector@attribute')
|
||||||
|
extract('.list-item', ['a.title', 'a.title@href'])
|
||||||
|
`.trim();
|
||||||
if (!testArgs(...args)) {
|
if (!testArgs(...args)) {
|
||||||
console.log(sig);
|
console.log(sig);
|
||||||
return;
|
return;
|
||||||
|
|||||||
Reference in New Issue
Block a user