extract attributes & improve help
This commit is contained in:
		| @ -35,27 +35,25 @@ | |||||||
|         <div class="row"> |         <div class="row"> | ||||||
|  |  | ||||||
|             <div class="col"> |             <div class="col"> | ||||||
|                 <h6>Examples</h6> |                 <h6>Quick Start</h6> | ||||||
|             </div> |             </div> | ||||||
|         </div> |         </div> | ||||||
|         <div class="row"> |         <div class="row"> | ||||||
|             <div class="col"> |             <div class="col"> | ||||||
|                 <div class="alert alert-success small"> |                 <div class="alert alert-success small"> | ||||||
|  |                     <p> | ||||||
|  |                         <b>View Help</b>: | ||||||
|  |                         <br>extract() | ||||||
|  |                     </p> | ||||||
|                     <p> |                     <p> | ||||||
|                         <b>Extract current page</b>: |                         <b>Extract current page</b>: | ||||||
|                         <br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"]) |                         <br>extract("list-item", ["a.title", "p.content"]) | ||||||
|                     </p> |                     </p> | ||||||
|                     <p> |                     <p> | ||||||
|                         <b>Extract pages of sample.com (1-10, interval 1)</b>: |                         <b>Extract multiple pages (1-10, interval 1)</b>: | ||||||
|                         <br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"],"http://sample.com/?pn=${page}",1,10,1) |                         <br>extract("list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", 1, 10, 1) | ||||||
|  |  | ||||||
|                     </p> |                     </p> | ||||||
|                     <p> |  | ||||||
|                         <b>Extract specified pages (1,3,5)</b>: |  | ||||||
|                         <br>extract("table tr", ["td:nth-child(0)","td:nth-child(1)"],"http://sample.com/?pn=${page}",[1,3,5]) |  | ||||||
|  |  | ||||||
|                     </p> |  | ||||||
|  |  | ||||||
|                 </div> |                 </div> | ||||||
|             </div> |             </div> | ||||||
|         </div> |         </div> | ||||||
|  | |||||||
| @ -25,17 +25,44 @@ | |||||||
| function extractData(itemsSelector, fieldSelectors) { | function extractData(itemsSelector, fieldSelectors) { | ||||||
|     return $(itemsSelector).toArray().map( |     return $(itemsSelector).toArray().map( | ||||||
|         item => fieldSelectors.map( |         item => fieldSelectors.map( | ||||||
|             cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n') |             selector => { | ||||||
|  |                 let [cls, attr] = selector.split('@').slice(0, 2); | ||||||
|  |                 return $(item).find(cls).toArray().map(find => attr ? find[attr] : find.textContent.trim()).join('\n') | ||||||
|  |             } | ||||||
|         ) |         ) | ||||||
|     ); |     ); | ||||||
| } | } | ||||||
|  |  | ||||||
| function extract(...args) { | function extract(...args) { | ||||||
|     let sig = `Invalid call args. |     let sig = ` | ||||||
|  | # DataExtracter Help | ||||||
|  | ---------------------------- | ||||||
|  |  | ||||||
|  | ## Signitures: | ||||||
|  | ---------------------------- | ||||||
|  |  | ||||||
| function extract(itemsSelector:string, fieldSelectors:string[]) | function extract(itemsSelector:string, fieldSelectors:string[]) | ||||||
| function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) | ||||||
| function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`; | function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[]) | ||||||
|  |  | ||||||
|  | ## Examples: | ||||||
|  | ---------------------------- | ||||||
|  |  | ||||||
|  | ### Extract current page | ||||||
|  | extract(".list-item", ["a.title", "p.content"]) | ||||||
|  |  | ||||||
|  | ### Extract multiple pages (1-10, interval 1) | ||||||
|  | extract(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=\${page}", 1, 10, 1) | ||||||
|  |  | ||||||
|  | ### Extract specified pages (1,3,5) | ||||||
|  | extract(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=\${page}", [1, 3, 5]) | ||||||
|  |  | ||||||
|  | ## Advanced Examples: | ||||||
|  | ---------------------------- | ||||||
|  |  | ||||||
|  | ### Extract link text and target (use 'selector@attribute') | ||||||
|  | extract('.list-item', ['a.title', 'a.title@href']) | ||||||
|  | `.trim(); | ||||||
|     if (!testArgs(...args)) { |     if (!testArgs(...args)) { | ||||||
|         console.log(sig); |         console.log(sig); | ||||||
|         return; |         return; | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user