From 4656e4ff646ae6c24345933d4615208f3a7e90cd Mon Sep 17 00:00:00 2001 From: jebbs Date: Fri, 10 Jan 2020 13:22:37 +0800 Subject: [PATCH] helper function $ --- manifest.json | 3 ++- readme.md | 40 ++++++++++++++++----------------- scripts/background/extractor.js | 16 +++++++++---- scripts/background/helpers.js | 3 +++ scripts/background/signiture.js | 18 +++++++++------ 5 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 scripts/background/helpers.js diff --git a/manifest.json b/manifest.json index 06ab895..9536651 100755 --- a/manifest.json +++ b/manifest.json @@ -21,7 +21,8 @@ "scripts/background/result.js", "scripts/background/signiture.js", "scripts/background/actions.js", - "scripts/background/extractor.js" + "scripts/background/extractor.js", + "scripts/background/helpers.js" ], "persistent": false }, diff --git a/readme.md b/readme.md index 528ca33..83aceee 100644 --- a/readme.md +++ b/readme.md @@ -5,7 +5,7 @@ DataExtracter helps you quickly extract data from any web pages. All you need to do is: -- Find out the selectors (JQuery selectors) for target data +- Find out the selectors for target data - Type scripts in the console of `extension backgroud page`, as introduced bellow. ![](images/console.png) @@ -14,40 +14,40 @@ All you need to do is: Extract current page ```js -new Extractor().task(".list-item", ["a.title", "p.content"]).start(); +$('.item', ['a', 'a@href']); ``` Extract multiple pages (1-10, interval 1) ```js -new Extractor().task(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=${page}", 1, 10, 1).start(); +$('.item', ['a', 'a@href'],"http://sample.com/?pn=${page}", 1, 10, 1); ``` Extract multiple urls (list) ```js -new Extractor().task(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"]).start(); +$('.item', ['a', 'a@href'],["http://sample.com/abc","http://sample.com/xyz"]); ``` Extract specified pages (1,3,5) ```js -new Extractor().task(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=${page}", [1, 3, 5]).start(); +$('.item', ['a', 'a@href'], "http://sample.com/?pn=${page}", [1, 3, 5]); ``` -## Extractor.task() Signitures +## Task Call Signitures ```ts -// a task extracting data from current page -task(itemsSelector:string, fieldSelectors:string[]) -// a task extracting data from a range of pages -task(itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number) -// a task extracting data from a list of pages -task(itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[]) -// a task extracting data from a list of pages -task(itemsSelector:string, fieldSelectors:string[], urls:string[]) -// a task extracting data of urls which extracted from last task result -task(itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) +// extract data from current page +function (itemsSelector:string, fieldSelectors:string[]) +// extract data from a range of pages +function (itemsSelector:string, fieldSelectors:string[], urlTemplate:string, from:number, to:number, interval:number) +// extract data from a list of pages +function (itemsSelector:string, fieldSelectors:string, urlTemplate:string, pages:number[]) +// extract data from a list of pages +function (itemsSelector:string, fieldSelectors:string[], urls:string[]) +// extract data of urls which extracted from last task result +function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult) ``` ## Advanced Usage @@ -65,7 +65,7 @@ The only way to stop tasks before its finish, is `Closing the host tab`. e.g.: link text and target (use 'selector@attribute') ```js -new Extractor().task('.list-item', ['a.title', 'a.title@href']).start(); +new Extractor().task('.item', ['a', 'a@href']).start(); ``` ### Use Task Chain. @@ -74,7 +74,7 @@ e.g.: Collect links from `http://sample.com/abc`, then, Extract data of each lin ```js new Extractor() - .task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"]) + .task('.search-list-item', ['a@href'], ["http://sample.com/abc"]) .task('list-item', ["a.title", "p.content"]) .start(); ``` @@ -85,7 +85,7 @@ To a multiple task (chain) Extractor `e`: ```js e = new Extractor() -e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"]) +e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"]) .task('list-item', ["a.title", "p.content"]) .start(); ``` @@ -114,7 +114,7 @@ Here we have 2 tasks: ```js e = new Extractor() -e.task('.search-list-item', ['.item a@href'], ["http://sample.com/abc"]) +e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"]) .task('list-item', ["a.title", "p.content"]) .start(); ``` diff --git a/scripts/background/extractor.js b/scripts/background/extractor.js index 9e6547e..24ae4eb 100644 --- a/scripts/background/extractor.js +++ b/scripts/background/extractor.js @@ -15,7 +15,6 @@ class Extractor { task(...args) { if (!testArgs(...args)) { console.log(`Invalid task arguments: ${argsToString(...args)}\n\n${signitures}\n`); - // break call chain to avoid unexpected task running return this; } // given >2 arguments means the task specifies target page, @@ -31,6 +30,7 @@ class Extractor { clear() { this._tasks = []; this._results = []; + return this; } /** * Start the task chain. @@ -64,6 +64,7 @@ class Extractor { result => { this._results[this._tasks[this._tasks.length - 1]] = result; this._running = false; + console.log("Tasks are all done.") this.save(); } ).catch(err => { @@ -122,9 +123,16 @@ class Extractor { console.log(`No result for task #${taskid}. Forget to call ".start()"?`); return; } - if (confirm( - `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` - )) { + if (result.data.length <= 1) { // 1 for selector headers + console.log(`No result for task #${taskid}. Forget to call ".start()"?`); + return; + } + let msg = ` +Please confirm to download (${result.data.length - 1} items): + +${result.toString(50) || "- Empty -"} +`.trim(); + if (confirm(msg)) { saveFile(result, "text/csv"); } } diff --git a/scripts/background/helpers.js b/scripts/background/helpers.js new file mode 100644 index 0000000..ea8aea1 --- /dev/null +++ b/scripts/background/helpers.js @@ -0,0 +1,3 @@ +function $(...args) { + return new Extractor().task(...args).start(); +} \ No newline at end of file diff --git a/scripts/background/signiture.js b/scripts/background/signiture.js index f6cc0e3..51570e2 100644 --- a/scripts/background/signiture.js +++ b/scripts/background/signiture.js @@ -1,17 +1,21 @@ const signitures = ` ## Usage -new Extractor().task(...args).task(...args).start(); +// single task +$(...args); +// managed task chains +e = new Extractor(); +e.task(...args).task(...args).start(); -## Extractor.task() Signitures: -function(itemsSelector:string, fieldSelectors:string[]) -function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number) -function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]) -function(itemsSelector:string, fieldSelectors:string[], urls:string[]) +## Task Call Signitures: +function(itemsSelector:string, fieldSelectors:string[]); +function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number); +function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]); +function(itemsSelector:string, fieldSelectors:string[], urls:string[]); ## Example: // extract all links text & url under '.item' elements // use 'selector@attr' to get attribute of the field elements -new Extractor().task(".item", ["a", "a@href"]).start(); +$(".item", ["a", "a@href"]); ## See Detailed Help: https://git.jebbs.co/jebbs/data-extracter-extesion