From 09112bb5062dba2dbf36ee18d58c3efad4b82508 Mon Sep 17 00:00:00 2001 From: jebbs Date: Sun, 12 Jan 2020 16:54:24 +0800 Subject: [PATCH] update documents --- manifest.json | 2 +- readme.md | 70 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/manifest.json b/manifest.json index 510af17..15a4574 100755 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 2, "name": "Data Extracter", - "version": "0.1.0", + "version": "0.5.0", "author": "jebbs", "description": "Extract data from web page elements as sheet.", "icons": { diff --git a/readme.md b/readme.md index ce3f782..58162f8 100644 --- a/readme.md +++ b/readme.md @@ -78,8 +78,54 @@ e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"]) .start(); ``` +### Extractor Options + +Specify extra options, to make task do some actions before scrape the data. + +```js +var job = new Extractor({ "scrollToBottom": 1 }); +``` + +Available options: + +- `scrollToBottom`: Try scroll pages to the bottom, some elements are loaded only we user need them. + + +### Export Result of Any Task + +To a multiple task Extractor `e`: + +```js +e = new Extractor() +e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"]) + .task('list-item', ["a.title", "p.content"]) + .start(); +``` + +User will be asked to export the final result when it finishes. + +Incase you want to export it again, use: + +```js +e.export() +``` + +To export another task result, other than the final one: + +```js +// export the result of first task +// to the example above, that is a list of urls +e.export(0) +// export the result of second task +e.export(1) +``` + +## Task Management + ### Continue Tasks +Sometimes, it's hard to finish them in an single execution, that why we need "Continuing of Tasks". + You can always continue tasks (with following), even it stops in the middle of a task: ```js @@ -99,9 +145,11 @@ e.restart(0) e.restart(1) ``` -### Save Result of Any Task +### Save & Load State -To a multiple task Extractor `e`: +It may also be hard to finish tasks in even a single day, we need a way to save current state, and come back tommorow. + +Create and run an extractor: ```js e = new Extractor() @@ -110,20 +158,16 @@ e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"]) .start(); ``` -User will be asked to save the final result when it finishes. - -Incase you want to save it again, use: +Save the state: ```js -e.save() +e.save(); ``` -To save another task result, other than the final one: +Load the state: + +Open the popup window, upload the saved state file. Then, and in the backgoud console: ```js -// save the result of first task -// to the example above, that is a list of urls -e.save(0) -// save the result of second task -e.save(1) -``` +e = new Extractor().load(); +``` \ No newline at end of file