extract attributes & improve help

This commit is contained in:
2018-09-21 12:10:15 +08:00
parent 79e64f4735
commit 667bb49e0d
2 changed files with 38 additions and 13 deletions

View File

@ -35,27 +35,25 @@
<div class="row">
<div class="col">
<h6>Examples</h6>
<h6>Quick Start</h6>
</div>
</div>
<div class="row">
<div class="col">
<div class="alert alert-success small">
<p>
<b>View Help</b>:
<br>extract()
</p>
<p>
<b>Extract current page</b>:
<br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"])
<br>extract("list-item", ["a.title", "p.content"])
</p>
<p>
<b>Extract pages of sample.com (1-10, interval 1)</b>:
<br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"],"http://sample.com/?pn=${page}",1,10,1)
<b>Extract multiple pages (1-10, interval 1)</b>:
<br>extract("list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", 1, 10, 1)
</p>
<p>
<b>Extract specified pages (1,3,5)</b>:
<br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"],"http://sample.com/?pn=${page}",[1,3,5])
</p>
</div>
</div>
</div>

View File

@ -25,17 +25,44 @@
function extractData(itemsSelector, fieldSelectors) {
return $(itemsSelector).toArray().map(
item => fieldSelectors.map(
cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n')
selector => {
let [cls, attr] = selector.split('@').slice(0, 2);
return $(item).find(cls).toArray().map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
}
)
);
}
function extract(...args) {
let sig = `Invalid call args.
let sig = `
# DataExtracter Help
----------------------------
## Signitures:
----------------------------
function extract(itemsSelector:string, fieldSelectors:string[])
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`;
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
## Examples:
----------------------------
### Extract current page
extract(".list-item", ["a.title", "p.content"])
### Extract multiple pages (1-10, interval 1)
extract(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=\${page}", 1, 10, 1)
### Extract specified pages (1,3,5)
extract(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=\${page}", [1, 3, 5])
## Advanced Examples:
----------------------------
### Extract link text and target (use 'selector@attribute')
extract('.list-item', ['a.title', 'a.title@href'])
`.trim();
if (!testArgs(...args)) {
console.log(sig);
return;