Compare commits
15 Commits
f06a6f4e78
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| e87e7010ec | |||
| 108ebb835f | |||
| e0b0a5e986 | |||
| 9cd25e3c1d | |||
| 7827d385bd | |||
| ade0670415 | |||
| 63aec616b1 | |||
| 378883b626 | |||
| c78f593c70 | |||
| d82010686d | |||
| 7644a1363f | |||
| 3338f78d91 | |||
| da7ae057f4 | |||
| 2224db1ad1 | |||
| 790c95ffc3 |
80
readme.md
80
readme.md
@ -8,15 +8,21 @@ All you need to do is:
|
|||||||
- Find out the selectors for target data
|
- Find out the selectors for target data
|
||||||
- Type scripts in the console of `extension backgroud page`, as introduced bellow.
|
- Type scripts in the console of `extension backgroud page`, as introduced bellow.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Qucik Start
|
## Qucik Start
|
||||||
|
|
||||||
Extract current page
|
Extract current page
|
||||||
|
|
||||||
```js
|
```js
|
||||||
$('.item', ['a', 'a@href']);
|
$('.item', ['a', 'a@href']);
|
||||||
|
new Extractor().task('.item', ['a', 'a@href']).start();
|
||||||
|
// fieldSelectors can be empty strings if items have no child to select
|
||||||
|
new Extractor().task('.item a', ['', '@href']).start();
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> `$(...args)` is the short form of `new Extractor().task(...args).start();`, which is introduced later.
|
||||||
|
|
||||||
Extract multiple pages (1-10, interval 1)
|
Extract multiple pages (1-10, interval 1)
|
||||||
|
|
||||||
```js
|
```js
|
||||||
@ -52,12 +58,20 @@ function (itemsSelector:string, fieldSelectors:string[], urls:ExtractResult)
|
|||||||
|
|
||||||
## Stop Tasks
|
## Stop Tasks
|
||||||
|
|
||||||
The only way to stop tasks before its finish, is `Closing the target tab`.
|
Close the target tab, in which current tasks is running.
|
||||||
|
|
||||||
> Tasks wait for their target elements' appearance, given some elements were loaded asynchronously.
|
Or use `job.stop()`:
|
||||||
> If you typed wrong selectors, the task waits forever for elements which don't exists.
|
|
||||||
|
|
||||||
## Extract Attributes.
|
```js
|
||||||
|
job = new Extractor().task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
|
.task('list-item', ["a.title", "p.content"])
|
||||||
|
.start();
|
||||||
|
job.stop();
|
||||||
|
```
|
||||||
|
|
||||||
|
> Next time you call `job.start();`, the job will continues from where it stopped.
|
||||||
|
|
||||||
|
## Extract Attributes
|
||||||
|
|
||||||
e.g.: link text and target (use 'selector@attribute')
|
e.g.: link text and target (use 'selector@attribute')
|
||||||
|
|
||||||
@ -65,6 +79,14 @@ e.g.: link text and target (use 'selector@attribute')
|
|||||||
new Extractor().task('.item', ['a', 'a@href']).start();
|
new Extractor().task('.item', ['a', 'a@href']).start();
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Click Selected Elements
|
||||||
|
|
||||||
|
The following clicks selected links and extracts link `text` and `href`
|
||||||
|
|
||||||
|
```js
|
||||||
|
new Extractor().task('.item', ['!a', 'a@href']).start();
|
||||||
|
```
|
||||||
|
|
||||||
## Advanced Usage
|
## Advanced Usage
|
||||||
|
|
||||||
### Use Task Chain.
|
### Use Task Chain.
|
||||||
@ -126,17 +148,17 @@ e.export(1)
|
|||||||
|
|
||||||
Sometimes, it's hard to finish them in an single execution, that why we need "Continuing of Tasks".
|
Sometimes, it's hard to finish them in an single execution, that why we need "Continuing of Tasks".
|
||||||
|
|
||||||
You can always continue tasks (with following), even it stops in the middle of a task:
|
You can always continue tasks by start it again, not matter in what phase it stops.
|
||||||
|
|
||||||
```js
|
```js
|
||||||
e.start()
|
e.start()
|
||||||
```
|
```
|
||||||
|
|
||||||
The `Extractor` kept the state of last execution, and starts from where it stopped.
|
The `Extractor` kept the execution state, and starts from where it stopped.
|
||||||
|
|
||||||
### Restart Tasks
|
### Restart Tasks
|
||||||
|
|
||||||
What should I do, if I don't like to continue from last state, but restart from certain task?
|
What if I don't like to continue from last state, but restart certain tasks?
|
||||||
|
|
||||||
```js
|
```js
|
||||||
// restart all tasks
|
// restart all tasks
|
||||||
@ -166,10 +188,50 @@ e.save();
|
|||||||
|
|
||||||
Load the state:
|
Load the state:
|
||||||
|
|
||||||
Open the popup window, upload the saved state file. Then, and in the backgoud console:
|
Open the popup window, upload the saved state file. Then, and in the backgroud console:
|
||||||
|
|
||||||
```js
|
```js
|
||||||
e = new Extractor().load();
|
e = new Extractor().load();
|
||||||
|
e.start();
|
||||||
|
```
|
||||||
|
|
||||||
|
> The uploaded state will be cleaned in 30 seconds, if you don't load it.
|
||||||
|
|
||||||
|
## Watch Mode
|
||||||
|
|
||||||
|
Watch mode tries to exract data from every page you visit **in current window**.
|
||||||
|
|
||||||
|
```js
|
||||||
|
e = new Extractor();
|
||||||
|
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||||
|
.task('list-item', ["a.title", "p.content"]);
|
||||||
|
e.watch(1); // start watching for first task
|
||||||
|
```
|
||||||
|
|
||||||
|
To stop watching, you can either `close current window`, or:
|
||||||
|
|
||||||
|
```js
|
||||||
|
e.stop();
|
||||||
|
```
|
||||||
|
|
||||||
|
## Results Operation
|
||||||
|
|
||||||
|
To get the results of a task:
|
||||||
|
|
||||||
|
```js
|
||||||
|
let results = job.results(0);
|
||||||
|
```
|
||||||
|
|
||||||
|
Visit URLs (if any) in the results one by one:
|
||||||
|
|
||||||
|
```js
|
||||||
|
results.visit();
|
||||||
|
```
|
||||||
|
|
||||||
|
Walk through all results one by one:
|
||||||
|
|
||||||
|
```js
|
||||||
|
results.walk((row,col,value)=>{console.log(value)});
|
||||||
```
|
```
|
||||||
|
|
||||||
## Developpment
|
## Developpment
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import { ACTION_GOTO_URL, ACTION_EXTRACT, ACTION_PING as ACTION_PING, ACTION_QUERY_URL, ACTION_SCROLL_BOTTOM } from "../common";
|
import { Actions, Request } from "../common";
|
||||||
import { sendMessage } from "./messaging";
|
import { sendMessage, ResponseChecker } from "./messaging";
|
||||||
|
import { logger } from "../common/logger";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* redirect tab to url.
|
* redirect tab to url.
|
||||||
@ -7,25 +8,27 @@ import { sendMessage } from "./messaging";
|
|||||||
* @param {string} url target URL
|
* @param {string} url target URL
|
||||||
* @returns {Promise<string[]>} a promise of target URL
|
* @returns {Promise<string[]>} a promise of target URL
|
||||||
*/
|
*/
|
||||||
export function redirectTab(tab: chrome.tabs.Tab, url: string) {
|
export function redirectTab(tab: chrome.tabs.Tab, url: string, check?: boolean) {
|
||||||
return queryUrl(tab).then(u => {
|
return queryUrl(tab).then(u => {
|
||||||
if (url !== u) {
|
if (url !== u) {
|
||||||
let req = {
|
let req: Request = {
|
||||||
action: ACTION_GOTO_URL,
|
action: Actions.GOTO_URL,
|
||||||
url: url
|
url: url
|
||||||
}
|
}
|
||||||
let checker = async (u, err, tryCount): Promise<string> => {
|
let checker: ResponseChecker<string> = !check ? undefined : async (r, err, tryCount): Promise<string> => {
|
||||||
let queryErr: any;
|
let queryErr: any;
|
||||||
let newURL = await queryUrl(tab).catch(e => queryErr = e);
|
let newURL = await queryUrl(tab).catch(e => queryErr = e);
|
||||||
if (queryErr) {
|
if (queryErr) {
|
||||||
return Promise.reject(queryErr);
|
throw queryErr;
|
||||||
}
|
}
|
||||||
if (newURL == url) return url;
|
if (newURL == url) return url;
|
||||||
if (
|
if (
|
||||||
tryCount % 5 == 0 &&
|
confirm(`Cannot navigate to target url.
|
||||||
!confirm('Cannot navigate to target url. \nPress OK to continue, Cancel to stop.')
|
expected: ${url}\n
|
||||||
|
actual: ${newURL}\n
|
||||||
|
Press OK to continue, Cancel to retry. Close the tab to stop`)
|
||||||
) {
|
) {
|
||||||
return Promise.reject("Tasks stopped by user.");
|
return newURL;
|
||||||
}
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
@ -41,15 +44,24 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
|
|||||||
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
||||||
* @returns {Promise<string[]>} a promise of extracted data
|
* @returns {Promise<string[]>} a promise of extracted data
|
||||||
*/
|
*/
|
||||||
export function extractTabData(tab, itemsSelector, fieldSelectors) {
|
export function extractTabData(tab: chrome.tabs.Tab, itemsSelector: string, fieldSelectors: string[], expectedURL?: string, askOnfail?: boolean) {
|
||||||
let req = {
|
let req: Request = {
|
||||||
action: ACTION_EXTRACT,
|
action: Actions.EXTRACT,
|
||||||
itemsSelector: itemsSelector,
|
itemsSelector: itemsSelector,
|
||||||
fieldSelectors: fieldSelectors
|
fieldSelectors: fieldSelectors,
|
||||||
|
url: expectedURL,
|
||||||
}
|
}
|
||||||
let checker = (result, err, tryCount) => {
|
let checker: ResponseChecker<string[][]> = (response, err, tryCount) => {
|
||||||
|
if (response.error) throw response.error;
|
||||||
|
let result = response.result;
|
||||||
if (!result || !result.length) {
|
if (!result || !result.length) {
|
||||||
if (tryCount % 20 == 0 && confirm('No data found in current page. \n\nContinue to next page?')) {
|
if (
|
||||||
|
tryCount % 20 == 0 && (
|
||||||
|
!askOnfail ||
|
||||||
|
confirm('No data found in current page. \n\nContinue to next page?')
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
logger.warn(`Failed after ${tryCount} tries: ${tab.url}`)
|
||||||
return [];
|
return [];
|
||||||
} else {
|
} else {
|
||||||
return undefined;
|
return undefined;
|
||||||
@ -67,10 +79,12 @@ export function extractTabData(tab, itemsSelector, fieldSelectors) {
|
|||||||
*/
|
*/
|
||||||
export async function ping(tab, count = 1) {
|
export async function ping(tab, count = 1) {
|
||||||
let req = {
|
let req = {
|
||||||
action: ACTION_PING
|
action: Actions.PING
|
||||||
}
|
}
|
||||||
let checker = (r: string, e, c) => r == "pong" ? r : undefined;
|
let checker: ResponseChecker<string> = (r, e, c) =>
|
||||||
let pong = await sendMessage<string>(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { });
|
r.result == "pong" ? r.result : undefined;
|
||||||
|
|
||||||
|
let pong = await sendMessage<string>(tab, req, 'Check tab availability...', checker, 1000, 1000, count).catch(() => { });
|
||||||
return pong == "pong";
|
return pong == "pong";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,7 +95,7 @@ export async function ping(tab, count = 1) {
|
|||||||
*/
|
*/
|
||||||
export function queryUrl(tab: chrome.tabs.Tab) {
|
export function queryUrl(tab: chrome.tabs.Tab) {
|
||||||
let req = {
|
let req = {
|
||||||
action: ACTION_QUERY_URL
|
action: Actions.QUERY_URL
|
||||||
}
|
}
|
||||||
return sendMessage<string>(tab, req);
|
return sendMessage<string>(tab, req);
|
||||||
}
|
}
|
||||||
@ -94,23 +108,80 @@ export function queryUrl(tab: chrome.tabs.Tab) {
|
|||||||
*/
|
*/
|
||||||
export function scrollToBottom(tab: chrome.tabs.Tab) {
|
export function scrollToBottom(tab: chrome.tabs.Tab) {
|
||||||
let req = {
|
let req = {
|
||||||
action: ACTION_SCROLL_BOTTOM
|
action: Actions.SCROLL_BOTTOM
|
||||||
}
|
}
|
||||||
return sendMessage(tab, req, 'Scroll to page bottom...');
|
return sendMessage(tab, req, 'Scroll to page bottom...');
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function createTab(url: string, active: boolean) {
|
export async function createTab(url: string, active: boolean): Promise<chrome.tabs.Tab> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.tabs.create({
|
findIncognitoWindow().then(
|
||||||
'url': url,
|
incognitoWindow => {
|
||||||
'active': active
|
chrome.tabs.create({
|
||||||
}, function (tab) {
|
'url': url,
|
||||||
resolve(tab);
|
'active': active,
|
||||||
|
// createTab to incognito window first
|
||||||
|
'windowId': incognitoWindow ? incognitoWindow.id : undefined
|
||||||
|
}, function (tab) {
|
||||||
|
resolve(tab);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function findIncognitoWindow(): Promise<chrome.windows.Window> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
chrome.windows.getAll(
|
||||||
|
{
|
||||||
|
windowTypes: ['normal'],
|
||||||
|
},
|
||||||
|
(windows: chrome.windows.Window[]) => {
|
||||||
|
for (let window of windows) {
|
||||||
|
if (window.incognito) {
|
||||||
|
resolve(window);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolve(undefined);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getCurrentWindow(): Promise<chrome.windows.Window> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
chrome.windows.getCurrent(
|
||||||
|
(windows: chrome.windows.Window) => {
|
||||||
|
return resolve(windows);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getWindowByID(id: number) {
|
||||||
|
return new Promise<chrome.windows.Window>((resolve, reject) => {
|
||||||
|
chrome.windows.get(id, function (window) {
|
||||||
|
chrome.runtime.lastError;
|
||||||
|
resolve(window);
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getActiveTab(currentWindow: boolean) {
|
export async function CreateIncognitoWindow() {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
chrome.windows.create(
|
||||||
|
<chrome.windows.CreateData>{
|
||||||
|
incognito: true,
|
||||||
|
},
|
||||||
|
(window: chrome.windows.Window) => {
|
||||||
|
resolve(window);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getActiveTab(currentWindow: boolean): Promise<chrome.tabs.Tab> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.tabs.query({
|
chrome.tabs.query({
|
||||||
active: true,
|
active: true,
|
||||||
@ -121,7 +192,7 @@ export async function getActiveTab(currentWindow: boolean) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getTabByID(id: number) {
|
export async function getTabByID(id: number): Promise<chrome.tabs.Tab> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
chrome.tabs.get(id, function (tab) {
|
chrome.tabs.get(id, function (tab) {
|
||||||
chrome.runtime.lastError;
|
chrome.runtime.lastError;
|
||||||
|
|||||||
@ -1,8 +1,15 @@
|
|||||||
import { logger } from "./common";
|
import { logger } from "../common/logger";
|
||||||
|
import { Actions } from "../common";
|
||||||
|
import { messageSubscribers } from "./messaging";
|
||||||
|
|
||||||
export class Caches {
|
export class Caches {
|
||||||
private _state: string = "";
|
private _state: string = "";
|
||||||
constructor() { }
|
constructor() {
|
||||||
|
messageSubscribers.addListener(Actions.UPLOAD_STATE, (request, sender, sendResponse) => {
|
||||||
|
sendResponse('recieved!');
|
||||||
|
this.setState(request.fileName, request.state)
|
||||||
|
});
|
||||||
|
}
|
||||||
get state(): string {
|
get state(): string {
|
||||||
let s = this._state;
|
let s = this._state;
|
||||||
this._state = "";
|
this._state = "";
|
||||||
@ -11,5 +18,14 @@ export class Caches {
|
|||||||
setState(name: string, content: string) {
|
setState(name: string, content: string) {
|
||||||
this._state = content;
|
this._state = content;
|
||||||
logger.info(`State (${name}) recieved. To load it: some_var = new Extractor().load()`);
|
logger.info(`State (${name}) recieved. To load it: some_var = new Extractor().load()`);
|
||||||
|
// clear cache in 30 seconds
|
||||||
|
setTimeout(() => {
|
||||||
|
if (this._state) {
|
||||||
|
logger.info(`Uploaded state is cleaned after 30 second.`);
|
||||||
|
this._state = "";
|
||||||
|
}
|
||||||
|
}, 30000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const caches = new Caches();
|
||||||
|
|||||||
@ -1,6 +0,0 @@
|
|||||||
import { Logger, LOGGER_LEVEL } from "./logger";
|
|
||||||
import { Caches } from "./caches";
|
|
||||||
|
|
||||||
export const caches = new Caches();
|
|
||||||
export const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);
|
|
||||||
export const URL_REG = /^\s*(https?):\/\//im;
|
|
||||||
@ -1,7 +1,8 @@
|
|||||||
import { Task } from "./task";
|
import { Task } from "./task";
|
||||||
import { saveFile } from "./tools";
|
import { parseUrls, saveFile } from "./tools";
|
||||||
import { createTab, getActiveTab, ping } from "./actions";
|
import { createTab, getActiveTab, ping, redirectTab } from "./actions";
|
||||||
import { logger, caches } from "./common";
|
import { logger } from "../common/logger";
|
||||||
|
import { caches } from "./caches";
|
||||||
import { ExtractResult } from "./result";
|
import { ExtractResult } from "./result";
|
||||||
|
|
||||||
export class Extractor {
|
export class Extractor {
|
||||||
@ -11,6 +12,14 @@ export class Extractor {
|
|||||||
constructor(options?) {
|
constructor(options?) {
|
||||||
if (options) this._options = options;
|
if (options) this._options = options;
|
||||||
}
|
}
|
||||||
|
static async ping(count: number = 1) {
|
||||||
|
let tab = await getActiveTab(true) || await getActiveTab(false);
|
||||||
|
let succ = await ping(tab, count);
|
||||||
|
if (!succ) {
|
||||||
|
logger.error('Cannot contact with active tab.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Save current state, in case we restore it later.
|
* Save current state, in case we restore it later.
|
||||||
*/
|
*/
|
||||||
@ -41,6 +50,17 @@ export class Extractor {
|
|||||||
this._tasks.push(new Task(this._options, ...args));
|
this._tasks.push(new Task(this._options, ...args));
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Add a task to Extractor. \n
|
||||||
|
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||||
|
* If url arguments not given within later tasks, they will use previous task result as input (target url list).
|
||||||
|
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
||||||
|
*/
|
||||||
|
results(id?: number): ExtractResult {
|
||||||
|
id = this._checkTaskId(id);
|
||||||
|
if (id < 0) return;
|
||||||
|
return this._tasks[id].results;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Clear tasks and task caches.
|
* Clear tasks and task caches.
|
||||||
*/
|
*/
|
||||||
@ -51,14 +71,30 @@ export class Extractor {
|
|||||||
/**
|
/**
|
||||||
* Start the task chain.
|
* Start the task chain.
|
||||||
*/
|
*/
|
||||||
async start() {
|
start() {
|
||||||
return this._startTasks(0);
|
return this._startTasks(0);
|
||||||
}
|
}
|
||||||
|
stop(id?: number) {
|
||||||
|
if (id !== undefined) {
|
||||||
|
id = this._checkTaskId(id);
|
||||||
|
if (id < 0) return;
|
||||||
|
this._tasks[id].stop();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (let i = 0; i < this._tasks.length; i++) {
|
||||||
|
this._tasks[i].stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
watch(id: number) {
|
||||||
|
id = this._checkTaskId(id);
|
||||||
|
if (id < 0) return;
|
||||||
|
this._tasks[id].watch();
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* restart from specified task, but don't restart the previous tasks.
|
* restart from specified task, but don't restart the previous tasks.
|
||||||
* @param {number} from where to restart the tasks, begins with 0
|
* @param {number} from where to restart the tasks, begins with 0
|
||||||
*/
|
*/
|
||||||
async restart(from: number = 0) {
|
restart(from: number = 0) {
|
||||||
let id = this._checkTaskId(from, 0);
|
let id = this._checkTaskId(from, 0);
|
||||||
if (id < 0) return;
|
if (id < 0) return;
|
||||||
for (let i = id; i < this._tasks.length; i++) {
|
for (let i = id; i < this._tasks.length; i++) {
|
||||||
@ -96,7 +132,7 @@ export class Extractor {
|
|||||||
if (i < from) return;
|
if (i < from) return;
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
let prevTask = this._tasks[i - 1];
|
let prevTask = this._tasks[i - 1];
|
||||||
return task.execute(tab, new ExtractResult(prevTask.results));
|
return task.execute(tab, prevTask.results);
|
||||||
}
|
}
|
||||||
return task.execute(tab);
|
return task.execute(tab);
|
||||||
});
|
});
|
||||||
@ -118,22 +154,22 @@ export class Extractor {
|
|||||||
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
||||||
if (id < 0) return;
|
if (id < 0) return;
|
||||||
let results = this._tasks[id].results
|
let results = this._tasks[id].results
|
||||||
if (!results.length) {
|
let count = results.data.length
|
||||||
|
if (!count) {
|
||||||
logger.info(`No result for task #${id}. Forget to call ".start()"?`);
|
logger.info(`No result for task #${id}. Forget to call ".start()"?`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
results.unshift(this._tasks[id].fieldSelectors);
|
results.header = this._tasks[id].fieldSelectors;
|
||||||
let exResults = new ExtractResult(results);
|
|
||||||
let msg = `
|
let msg = `
|
||||||
Please confirm to download (${results.length - 1} items):
|
Please confirm to download (${count} items):
|
||||||
|
|
||||||
${exResults.toString(50) || "- Empty -"}
|
${results.toString(50) || "- Empty -"}
|
||||||
`.trim();
|
`.trim();
|
||||||
if (confirm(msg)) {
|
if (confirm(msg)) {
|
||||||
saveFile(exResults.toString(), "text/csv");
|
saveFile(results.toString(), "text/csv");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_checkTaskId(id: number, defaultId: number) {
|
private _checkTaskId(id: number, defaultId?: number) {
|
||||||
if (!this._tasks.length) {
|
if (!this._tasks.length) {
|
||||||
logger.info("No task found.");
|
logger.info("No task found.");
|
||||||
return -1;
|
return -1;
|
||||||
|
|||||||
@ -1,7 +1,11 @@
|
|||||||
import { EXT_NAME, ACTION_UPLOAD_STATE } from "../common";
|
import { Request, Actions, Response } from "../common";
|
||||||
import { getTabByID } from "./actions";
|
import { getTabByID } from "./actions";
|
||||||
import { caches, logger } from "./common";
|
import { logger } from "../common/logger";
|
||||||
|
|
||||||
|
|
||||||
|
export type ResponseCheckerSync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => T;
|
||||||
|
export type ResponseCheckerAsync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => Promise<T>;
|
||||||
|
export type ResponseChecker<T> = ResponseCheckerSync<T> | ResponseCheckerAsync<T>;
|
||||||
/**
|
/**
|
||||||
* Sending a message to target tab repeatedly until the response is not undefined.
|
* Sending a message to target tab repeatedly until the response is not undefined.
|
||||||
* @param {object} tab the table where to send the message
|
* @param {object} tab the table where to send the message
|
||||||
@ -17,12 +21,14 @@ import { caches, logger } from "./common";
|
|||||||
*/
|
*/
|
||||||
export function sendMessage<T>(
|
export function sendMessage<T>(
|
||||||
tab: chrome.tabs.Tab,
|
tab: chrome.tabs.Tab,
|
||||||
req,
|
req: Request,
|
||||||
log?: string,
|
log?: string,
|
||||||
dataChecker?: (r: T, err: chrome.runtime.LastError, count: number) => T | Promise<T>,
|
dataChecker?: ResponseChecker<T>,
|
||||||
|
timeout?: number,
|
||||||
interval?: number,
|
interval?: number,
|
||||||
limit?: number
|
limit?: number
|
||||||
) {
|
) {
|
||||||
|
timeout = timeout || 10;
|
||||||
interval = interval || 500;
|
interval = interval || 500;
|
||||||
limit = isNaN(limit) ? 0 : limit;
|
limit = isNaN(limit) ? 0 : limit;
|
||||||
let count = 0;
|
let count = 0;
|
||||||
@ -31,7 +37,7 @@ export function sendMessage<T>(
|
|||||||
loop();
|
loop();
|
||||||
|
|
||||||
async function loop() {
|
async function loop() {
|
||||||
logger.debug("Request for", req.action);
|
logger.debug("Request for", Actions[req.action]);
|
||||||
let tabAvailable = await getTabByID(tab.id);
|
let tabAvailable = await getTabByID(tab.id);
|
||||||
if (!tabAvailable) {
|
if (!tabAvailable) {
|
||||||
reject("Task interrupted due to the target tab is closed.");
|
reject("Task interrupted due to the target tab is closed.");
|
||||||
@ -43,33 +49,23 @@ export function sendMessage<T>(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
chrome.tabs.sendMessage(tab.id, req, async (r: T) => {
|
let timeout = setTimeout(() => { reject(`${Actions[req.action]} requset timeout after ${timeout}s`) }, 10000);
|
||||||
|
chrome.tabs.sendMessage(tab.id, req, async (r: Response<T>) => {
|
||||||
|
clearTimeout(timeout);
|
||||||
// check error but do nothing until dataChecker.
|
// check error but do nothing until dataChecker.
|
||||||
let err = chrome.runtime.lastError;
|
let err = chrome.runtime.lastError;
|
||||||
let result: T = r;
|
let [result, error] = await checkResponse(dataChecker, r, err, count);
|
||||||
|
if (error) {
|
||||||
if (dataChecker) {
|
reject(error);
|
||||||
let pms = dataChecker(r, err, count);
|
return;
|
||||||
// don't catch if it's not a Promise
|
|
||||||
if (pms instanceof Promise) {
|
|
||||||
let checkerError: any;
|
|
||||||
pms = pms.catch(e => checkerError = e);
|
|
||||||
result = await pms;
|
|
||||||
if (checkerError) {
|
|
||||||
reject(checkerError);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
result = pms;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
let flag = result !== undefined;
|
||||||
let flag = result !== undefined && result !== null;
|
|
||||||
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
|
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
|
||||||
if (flag) {
|
if (flag) {
|
||||||
resolve(result);
|
resolve(result);
|
||||||
} else {
|
} else {
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
|
logger.debug('Invalid response', r, 'retry...');
|
||||||
loop();
|
loop();
|
||||||
}, interval);
|
}, interval);
|
||||||
}
|
}
|
||||||
@ -78,17 +74,77 @@ export function sendMessage<T>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
chrome.runtime.onMessage.addListener(function (request, sender, sendResponse) {
|
async function checkResponse<T>(
|
||||||
if (!request.action || !request.action.startsWith(EXT_NAME)) {
|
dataChecker: ResponseChecker<T>,
|
||||||
|
response: Response<T>,
|
||||||
|
error: chrome.runtime.LastError,
|
||||||
|
tryCount: number
|
||||||
|
): Promise<[T, string]> {
|
||||||
|
// response could be undefined if the content script is interrupted.
|
||||||
|
// don't check, tell sendMessage to retry.
|
||||||
|
if (!response) return [undefined, undefined];
|
||||||
|
if (!dataChecker) {
|
||||||
|
return [response.result, response.error];
|
||||||
|
}
|
||||||
|
let result: T;
|
||||||
|
let pms: T | Promise<T>;
|
||||||
|
try {
|
||||||
|
pms = dataChecker(response, error, tryCount);
|
||||||
|
} catch (err) {
|
||||||
|
return [undefined, err];
|
||||||
|
}
|
||||||
|
// don't catch if it's not a Promise
|
||||||
|
if (pms instanceof Promise) {
|
||||||
|
let checkerError: any;
|
||||||
|
pms = pms.catch(e => checkerError = e);
|
||||||
|
result = await pms;
|
||||||
|
if (checkerError) {
|
||||||
|
return [undefined, checkerError];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result = pms;
|
||||||
|
}
|
||||||
|
return [result, undefined];
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ActionSubscriberSync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void;
|
||||||
|
export type ActionSubscriberAsync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => Promise<void>;
|
||||||
|
export type ActionSubscriber = ActionSubscriberSync | ActionSubscriberAsync;
|
||||||
|
|
||||||
|
class MessageSubscribers {
|
||||||
|
private listeners: { [key: number]: ActionSubscriber[] } = {};
|
||||||
|
addListener(action: Actions, subscriber: ActionSubscriber) {
|
||||||
|
this.listeners[action] || (this.listeners[action] = []);
|
||||||
|
this.listeners[action].push(subscriber);
|
||||||
|
}
|
||||||
|
removeListener(action: Actions, subscriber: ActionSubscriber) {
|
||||||
|
this.listeners[action] || (this.listeners[action] = []);
|
||||||
|
for (let i = 0; i < this.listeners[action].length; i++) {
|
||||||
|
if (this.listeners[action][i] == subscriber) {
|
||||||
|
this.listeners[action].splice(i, 1);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.debug(`${this.listeners[action].length} subscriber(s) remained for action ${Actions[action]}`);
|
||||||
|
}
|
||||||
|
getListeners(action: Actions): ActionSubscriber[] {
|
||||||
|
return this.listeners[action]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
export const messageSubscribers = new MessageSubscribers();
|
||||||
|
|
||||||
|
chrome.runtime.onMessage.addListener(function (request: Request, sender, sendResponse) {
|
||||||
|
let subscribers = messageSubscribers.getListeners(request.action);
|
||||||
|
if (!subscribers || !subscribers.length) {
|
||||||
|
sendResponse("Request not supported.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
switch (request.action) {
|
let promises: Promise<any>[] = [];
|
||||||
case ACTION_UPLOAD_STATE:
|
for (let subscriber of subscribers) {
|
||||||
sendResponse('recieved!');
|
let p = subscriber(request, sender, sendResponse);
|
||||||
caches.setState(request.name, request.state)
|
if (p instanceof Promise) promises.push(p);
|
||||||
break;
|
|
||||||
default:
|
|
||||||
sendResponse("Request not supported.");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
if (promises.length)
|
||||||
|
return Promise.all(promises);
|
||||||
|
return;
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,6 +1,11 @@
|
|||||||
|
import { logger } from "../common/logger";
|
||||||
|
import { getActiveTab, ping, redirectTab } from "./actions";
|
||||||
|
import { parseUrls } from "./tools";
|
||||||
|
|
||||||
export class ExtractResult {
|
export class ExtractResult {
|
||||||
|
private _header: string[];
|
||||||
private _data: string[][] = [];
|
private _data: string[][] = [];
|
||||||
constructor(data) {
|
constructor(data: string[][]) {
|
||||||
this._data = data || [];
|
this._data = data || [];
|
||||||
}
|
}
|
||||||
row(index: number): string[] {
|
row(index: number): string[] {
|
||||||
@ -14,11 +19,17 @@ export class ExtractResult {
|
|||||||
squash(): string[] {
|
squash(): string[] {
|
||||||
return this._data.reduce((p, c) => p.concat(c), []);
|
return this._data.reduce((p, c) => p.concat(c), []);
|
||||||
}
|
}
|
||||||
|
set header(h: string[]) {
|
||||||
|
this._header = h
|
||||||
|
}
|
||||||
get data(): string[][] {
|
get data(): string[][] {
|
||||||
return this._data;
|
return this._data;
|
||||||
}
|
}
|
||||||
toString(rowsCount: number = 0): string {
|
toString(rowsCount: number = 0): string {
|
||||||
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
||||||
|
if (this._header && this._header.length) {
|
||||||
|
data.unshift(this._header);
|
||||||
|
}
|
||||||
return data.slice().reduce(
|
return data.slice().reduce(
|
||||||
(csv, lineCells) => {
|
(csv, lineCells) => {
|
||||||
if (!lineCells || !lineCells.length) {
|
if (!lineCells || !lineCells.length) {
|
||||||
@ -26,6 +37,7 @@ export class ExtractResult {
|
|||||||
}
|
}
|
||||||
let line = lineCells.reduce(
|
let line = lineCells.reduce(
|
||||||
(lineText, cell, idx) => {
|
(lineText, cell, idx) => {
|
||||||
|
cell = cell || "";
|
||||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||||
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
||||||
}, "");
|
}, "");
|
||||||
@ -34,4 +46,40 @@ export class ExtractResult {
|
|||||||
""
|
""
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
async walk(fn: (row: number, col: number, value: string) => void) {
|
||||||
|
let pms = Promise.resolve(null);
|
||||||
|
for (let i = 0; i < this._data.length; i++) {
|
||||||
|
let cells = this._data[i];
|
||||||
|
for (let j = 0; j < cells.length; j++) {
|
||||||
|
let row = i;
|
||||||
|
let col = j;
|
||||||
|
let value = cells[j];
|
||||||
|
pms = pms.then(
|
||||||
|
() => fn(row, col, value)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pms.catch(err => {
|
||||||
|
logger.error(err);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
async visit() {
|
||||||
|
let urls = parseUrls(this);
|
||||||
|
let tab = await getActiveTab(true) || await getActiveTab(false);
|
||||||
|
let succ = await ping(tab);
|
||||||
|
if (!succ) {
|
||||||
|
logger.error('Cannot contact with active tab.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
return urls.reduce(
|
||||||
|
(pms, url: string, i: number) => {
|
||||||
|
return pms.then(
|
||||||
|
async () => {
|
||||||
|
return redirectTab(tab, url, false);
|
||||||
|
});
|
||||||
|
}, Promise.resolve<void>(undefined)
|
||||||
|
).catch(err => {
|
||||||
|
logger.error(err);
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@ -20,7 +20,7 @@ function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
|
|||||||
$(".item", ["a", "a@href"]);
|
$(".item", ["a", "a@href"]);
|
||||||
|
|
||||||
## See Detailed Help:
|
## See Detailed Help:
|
||||||
https://git.jebbs.co/jebbs/data-extracter-extesion
|
https://git.qjebbs.com/jebbs/data-extracter-extesion
|
||||||
`.trim();
|
`.trim();
|
||||||
|
|
||||||
export function testArgs(...args: any) {
|
export function testArgs(...args: any) {
|
||||||
|
|||||||
@ -1,7 +1,10 @@
|
|||||||
import { parseUrls } from "./tools";
|
import { parseUrls } from "./tools";
|
||||||
import { queryUrl, redirectTab, scrollToBottom, extractTabData } from "./actions";
|
import { queryUrl, redirectTab, scrollToBottom, extractTabData, findIncognitoWindow, getCurrentWindow, getWindowByID } from "./actions";
|
||||||
import { testArgs, signitures } from "./signiture";
|
import { testArgs, signitures } from "./signiture";
|
||||||
import { ExtractResult } from "./result";
|
import { ExtractResult } from "./result";
|
||||||
|
import { messageSubscribers, ActionSubscriber } from "./messaging";
|
||||||
|
import { Actions } from "../common";
|
||||||
|
import { logger } from "../common/logger";
|
||||||
|
|
||||||
export class Task {
|
export class Task {
|
||||||
private _data: { [key: string]: string[][] } = {};
|
private _data: { [key: string]: string[][] } = {};
|
||||||
@ -10,6 +13,8 @@ export class Task {
|
|||||||
private _itemsSelector: string;
|
private _itemsSelector: string;
|
||||||
private _fieldSelectors: string[];
|
private _fieldSelectors: string[];
|
||||||
private _urls: string[] = [];
|
private _urls: string[] = [];
|
||||||
|
private _running = false;
|
||||||
|
private _listeners: ActionSubscriber[] = [];
|
||||||
|
|
||||||
constructor(options: any, ...arg: any);
|
constructor(options: any, ...arg: any);
|
||||||
constructor(options: any, itemsSelector: string, fieldSelectors: string[]);
|
constructor(options: any, itemsSelector: string, fieldSelectors: string[]);
|
||||||
@ -36,56 +41,138 @@ export class Task {
|
|||||||
get urls(): string[] {
|
get urls(): string[] {
|
||||||
return this._urls;
|
return this._urls;
|
||||||
}
|
}
|
||||||
get results(): string[][] {
|
get results(): ExtractResult {
|
||||||
return this._data_keys.reduce((p, c) => {
|
let rs: string[][] = this._data_keys.reduce((p, c) => {
|
||||||
return p.concat(this._data[c]);
|
return p.concat(this._data[c]);
|
||||||
}, []);
|
}, []);
|
||||||
|
return new ExtractResult(rs);
|
||||||
}
|
}
|
||||||
get fieldSelectors(): string[] {
|
get fieldSelectors(): string[] {
|
||||||
return this._fieldSelectors;
|
return this._fieldSelectors;
|
||||||
}
|
}
|
||||||
clean(): Task {
|
clean(): Task {
|
||||||
|
this.stop();
|
||||||
this._data = {};
|
this._data = {};
|
||||||
this._data_keys = [];
|
this._data_keys = [];
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
stop() {
|
||||||
|
this._running = false;
|
||||||
|
let listener: ActionSubscriber;
|
||||||
|
while (listener = this._listeners.pop()) {
|
||||||
|
messageSubscribers.removeListener(Actions.REPORT_NEW_PAGE, listener);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async watch() {
|
||||||
|
if (this._running) {
|
||||||
|
logger.info("The task is running. Please wait...");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this._running = true;
|
||||||
|
let window = await findIncognitoWindow() || await getCurrentWindow();
|
||||||
|
if (!window) {
|
||||||
|
logger.info("No window to watch...");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let watchTaskID = 0;
|
||||||
|
let listener: ActionSubscriber = async (request, sender, sendResponse) => {
|
||||||
|
let findWindow = await getWindowByID(window.id);
|
||||||
|
if (!findWindow) {
|
||||||
|
// stop watch on window close.
|
||||||
|
messageSubscribers.removeListener(Actions.REPORT_NEW_PAGE, listener);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// only watch current window.
|
||||||
|
if (sender.tab.windowId != window.id) return;
|
||||||
|
let taskID = watchTaskID++;
|
||||||
|
logger.info(`Watcher #${taskID} starts.`);
|
||||||
|
let pm = this.makeOptionalTasks(sender.tab);
|
||||||
|
return pm.then(
|
||||||
|
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, true)
|
||||||
|
).then(
|
||||||
|
results => {
|
||||||
|
if (results && results.length) {
|
||||||
|
this.saveResult(results, sender.tab.url);
|
||||||
|
}
|
||||||
|
logger.info(`Watcher #${taskID} ends.`);
|
||||||
|
}
|
||||||
|
).catch(
|
||||||
|
e => logger.error(`Watcher #${taskID} ends with:`, e)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
this._listeners.push(listener);
|
||||||
|
messageSubscribers.addListener(Actions.REPORT_NEW_PAGE, listener);
|
||||||
|
}
|
||||||
async execute(tab: chrome.tabs.Tab, upstreamData?: ExtractResult): Promise<void> {
|
async execute(tab: chrome.tabs.Tab, upstreamData?: ExtractResult): Promise<void> {
|
||||||
if (!tab) return Promise.reject("No tab to execute the task.");
|
if (!tab) throw "No tab to execute the task.";
|
||||||
|
if (this._running) throw "The task is running. Please wait...";
|
||||||
|
this._running = true;
|
||||||
let urls = this._urls
|
let urls = this._urls
|
||||||
if (!urls.length) {
|
if (!urls.length) {
|
||||||
if (upstreamData) {
|
if (upstreamData) {
|
||||||
urls = parseUrls(upstreamData);
|
urls = parseUrls(upstreamData);
|
||||||
} else {
|
} else {
|
||||||
urls = [await queryUrl(tab)];
|
let tabURL: string;
|
||||||
|
await queryUrl(tab)
|
||||||
|
.then(u => {
|
||||||
|
tabURL = u;
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
e => {
|
||||||
|
this._running = false;
|
||||||
|
return Promise.reject(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
urls = [tabURL];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let saveResult = (results, key) => {
|
|
||||||
this._data[key] = results;
|
|
||||||
this._data_keys.push(key);
|
|
||||||
}
|
|
||||||
return urls.reduce((p, url, i) => p.then(
|
return urls.reduce((p, url, i) => p.then(
|
||||||
results => {
|
results => {
|
||||||
if (i > 0 && results instanceof Array) {
|
if (i > 0 && results instanceof Array) {
|
||||||
let lastURL = urls[i - 1];
|
let lastURL = urls[i - 1];
|
||||||
saveResult(results, lastURL);
|
this.saveResult(results, lastURL);
|
||||||
}
|
}
|
||||||
if (this._data[url]) return;
|
if (this._data[url]) return;
|
||||||
let pms: Promise<any> = redirectTab(tab, url);
|
|
||||||
if (this._options["scrollToBottom"]) {
|
let pms: Promise<any> = this.runningCheck(() => redirectTab(tab, url));
|
||||||
pms = pms.then(() => scrollToBottom(tab));
|
return pms
|
||||||
}
|
.then(() => this.makeOptionalTasks(tab))
|
||||||
return pms.then(
|
.then(
|
||||||
() => extractTabData(tab, this._itemsSelector, this._fieldSelectors)
|
() => this.runningCheck(() => extractTabData(tab, this._itemsSelector, this._fieldSelectors))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
), Promise.resolve<string[][]>(null)).then(
|
), Promise.resolve<string[][]>(null)).then(
|
||||||
results => {
|
results => {
|
||||||
if (results && results.length) {
|
if (results && results.length) {
|
||||||
let lastURL = urls[urls.length - 1];
|
let lastURL = urls[urls.length - 1];
|
||||||
saveResult(results, lastURL);
|
this.saveResult(results, lastURL);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
this._running = false;
|
||||||
|
}
|
||||||
|
).catch(
|
||||||
|
e => {
|
||||||
|
this._running = false;
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
private makeOptionalTasks(tab: chrome.tabs.Tab): Promise<any> {
|
||||||
|
let pm: Promise<any>;
|
||||||
|
if (this._options["scrollToBottom"]) {
|
||||||
|
pm = this.runningCheck(() => scrollToBottom(tab));
|
||||||
|
}
|
||||||
|
return pm;
|
||||||
|
}
|
||||||
|
private runningCheck(fn: () => Promise<any>): Promise<any> {
|
||||||
|
if (!this._running) throw "The task is stopped by user.";
|
||||||
|
return fn();
|
||||||
|
}
|
||||||
|
private saveResult(results, key) {
|
||||||
|
if (this._data[key] === undefined) {
|
||||||
|
// do not add keys again
|
||||||
|
this._data_keys.push(key);
|
||||||
|
}
|
||||||
|
this._data[key] = results;
|
||||||
|
logger.info(`${results.length} items found.`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@ -1,7 +1,8 @@
|
|||||||
import { URL_REG } from "./common";
|
|
||||||
import { ExtractResult } from "./result";
|
import { ExtractResult } from "./result";
|
||||||
|
|
||||||
export function parseUrls(...args) {
|
const URL_REG = /^\s*(https?):\/\//im;
|
||||||
|
|
||||||
|
export function parseUrls(...args): string[] {
|
||||||
if (!args.length) return [];
|
if (!args.length) return [];
|
||||||
let arg = args.shift();
|
let arg = args.shift();
|
||||||
if (arg instanceof Array) {
|
if (arg instanceof Array) {
|
||||||
|
|||||||
@ -1,11 +0,0 @@
|
|||||||
|
|
||||||
export const EXT_NAME = "DataExtracter";
|
|
||||||
|
|
||||||
export const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
|
||||||
export const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
|
||||||
export const ACTION_PING = `${EXT_NAME}:ReportIn`;
|
|
||||||
export const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
|
||||||
export const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
|
||||||
export const ACTION_UPLOAD_STATE = `${EXT_NAME}:UploadStateFile`;
|
|
||||||
export const ACTION_SLEEP = `${EXT_NAME}:Sleep`;
|
|
||||||
export const ACTION_WAKEUP = `${EXT_NAME}:WakeUp`;
|
|
||||||
28
src/common/index.ts
Normal file
28
src/common/index.ts
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
export enum Actions {
|
||||||
|
// from background to content script
|
||||||
|
EXTRACT = 1,
|
||||||
|
GOTO_URL,
|
||||||
|
PING,
|
||||||
|
QUERY_URL,
|
||||||
|
SCROLL_BOTTOM,
|
||||||
|
SLEEP,
|
||||||
|
WAKEUP,
|
||||||
|
// from popup to background script
|
||||||
|
UPLOAD_STATE,
|
||||||
|
// from content to background script
|
||||||
|
REPORT_NEW_PAGE,
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Request {
|
||||||
|
action: Actions
|
||||||
|
itemsSelector?: string
|
||||||
|
fieldSelectors?: string[]
|
||||||
|
url?: string
|
||||||
|
fileName?: string
|
||||||
|
state?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Response<T> {
|
||||||
|
result: T;
|
||||||
|
error: string;
|
||||||
|
}
|
||||||
@ -13,7 +13,7 @@ export class Logger {
|
|||||||
constructor(logLevel, notifyLevel) {
|
constructor(logLevel, notifyLevel) {
|
||||||
if (logLevel) this._log_level = logLevel;
|
if (logLevel) this._log_level = logLevel;
|
||||||
if (notifyLevel) this._notify_level = notifyLevel;
|
if (notifyLevel) this._notify_level = notifyLevel;
|
||||||
chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
if (chrome.notifications) chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
||||||
}
|
}
|
||||||
get logLevel() {
|
get logLevel() {
|
||||||
return this._log_level;
|
return this._log_level;
|
||||||
@ -71,3 +71,5 @@ export class Logger {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);
|
||||||
@ -1,4 +1,9 @@
|
|||||||
export function extract(itemsSelector: string, fieldSelectors: string[]): string[][] {
|
import { logger } from "../common/logger";
|
||||||
|
|
||||||
|
export function extract(itemsSelector: string, fieldSelectors: string[], expectedURL: string): string[][] {
|
||||||
|
if (expectedURL && location.href != expectedURL) {
|
||||||
|
throw 'Target tab URL changed, aborting...';
|
||||||
|
}
|
||||||
// since some elements may be loaded asynchronously.
|
// since some elements may be loaded asynchronously.
|
||||||
// if one field is never found, we should return undefined,
|
// if one field is never found, we should return undefined,
|
||||||
// so that senders can detect to retry until elements loaded.
|
// so that senders can detect to retry until elements loaded.
|
||||||
@ -11,19 +16,42 @@ export function extract(itemsSelector: string, fieldSelectors: string[]): string
|
|||||||
item => {
|
item => {
|
||||||
return fieldSelectors.map(
|
return fieldSelectors.map(
|
||||||
selector => {
|
selector => {
|
||||||
|
let doClick = false;
|
||||||
|
if (selector.startsWith("!")) {
|
||||||
|
doClick = true;
|
||||||
|
selector = selector.substring(1);
|
||||||
|
}
|
||||||
let [cls, attr] = selector.split('@').slice(0, 2);
|
let [cls, attr] = selector.split('@').slice(0, 2);
|
||||||
let fieldVals = Array.from(item.querySelectorAll(cls));
|
let fieldElements: Element[];
|
||||||
if (!fieldVals.length) {
|
cls = cls.trim()
|
||||||
|
if (cls != "") {
|
||||||
|
fieldElements = Array.from(item.querySelectorAll(cls));
|
||||||
|
} else {
|
||||||
|
fieldElements = [item];
|
||||||
|
}
|
||||||
|
if (!fieldElements.length) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fieldFound[selector] = true;
|
fieldFound[selector] = true;
|
||||||
return fieldVals.map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
return fieldElements.map(find => {
|
||||||
|
if (doClick) {
|
||||||
|
let e = document.createEvent("MouseEvents");
|
||||||
|
e.initEvent("click", true, true);
|
||||||
|
find.dispatchEvent(e);
|
||||||
|
}
|
||||||
|
return attr ? find[attr] : find.textContent.trim();
|
||||||
|
}).join('\n')
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
// TODO: configurable wait logic
|
||||||
// if it exists a field, which is not found in any row, the sender should retry.
|
// if it exists a field, which is not found in any row, the sender should retry.
|
||||||
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
let notFoundFields = fieldSelectors.filter(f => !fieldFound[f]);
|
||||||
|
let shouldWait = notFoundFields.length > 0;
|
||||||
|
if (shouldWait) {
|
||||||
|
logger.debug('should wait for:', fieldSelectors.filter(f => !fieldFound[f]).join(','));
|
||||||
|
}
|
||||||
return shouldWait ? [] : results;
|
return shouldWait ? [] : results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
import { ACTION_WAKEUP, ACTION_EXTRACT, ACTION_GOTO_URL, ACTION_PING, ACTION_QUERY_URL, ACTION_SCROLL_BOTTOM, ACTION_SLEEP } from '../common';
|
import { Request, Actions, Response } from '../common';
|
||||||
import { scrollToBottom, extract } from './actions';
|
import { scrollToBottom, extract } from './actions';
|
||||||
|
|
||||||
let asleep = false;
|
let asleep = false;
|
||||||
chrome.runtime.onMessage.addListener(
|
chrome.runtime.onMessage.addListener(
|
||||||
function (request, sender: chrome.runtime.MessageSender, sendResponse: (r: any) => void) {
|
function (request, sender: chrome.runtime.MessageSender, sendResponse: (r: any) => void) {
|
||||||
if (!request.action) return;
|
if (!request.action) return;
|
||||||
if (asleep && ACTION_WAKEUP != request.action) {
|
if (asleep && Actions.WAKEUP != request.action) {
|
||||||
sendResponse && sendResponse(undefined);
|
sendResponse && sendResponse(undefined);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -16,30 +16,60 @@ chrome.runtime.onMessage.addListener(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
async function doAction(request: any, sender: chrome.runtime.MessageSender) {
|
chrome.runtime.sendMessage(<Request>{
|
||||||
switch (request.action) {
|
action: Actions.REPORT_NEW_PAGE,
|
||||||
case ACTION_EXTRACT:
|
});
|
||||||
let data = extract(request.itemsSelector, request.fieldSelectors);
|
|
||||||
return data;
|
async function doAction(request: Request, sender: chrome.runtime.MessageSender): Promise<Response<any>> {
|
||||||
case ACTION_GOTO_URL:
|
let result: any;
|
||||||
window.location.replace(request.url);
|
let error: string;
|
||||||
// should not recieve any request until the page & script reload
|
try {
|
||||||
asleep = true;
|
switch (request.action) {
|
||||||
return request.url;
|
case Actions.EXTRACT:
|
||||||
case ACTION_PING:
|
result = extract(request.itemsSelector, request.fieldSelectors, request.url);
|
||||||
return "pong";
|
break;
|
||||||
case ACTION_QUERY_URL:
|
case Actions.GOTO_URL:
|
||||||
return window.location.href;
|
window.location.replace(request.url);
|
||||||
case ACTION_SCROLL_BOTTOM:
|
// should not recieve any request until the page & script reload
|
||||||
return scrollToBottom();
|
asleep = true;
|
||||||
case ACTION_SLEEP:
|
result = request.url;
|
||||||
asleep = true;
|
break;
|
||||||
return "Content script is sleeping.";
|
case Actions.PING:
|
||||||
case ACTION_WAKEUP:
|
result = "pong";
|
||||||
asleep = false;
|
break;
|
||||||
return "Content script is available.";
|
case Actions.QUERY_URL:
|
||||||
default:
|
result = window.location.href;
|
||||||
break;
|
break;
|
||||||
|
case Actions.SCROLL_BOTTOM:
|
||||||
|
result = scrollToBottom();
|
||||||
|
break;
|
||||||
|
case Actions.SLEEP:
|
||||||
|
asleep = true;
|
||||||
|
result = "Content script is sleeping.";
|
||||||
|
break;
|
||||||
|
case Actions.WAKEUP:
|
||||||
|
asleep = false;
|
||||||
|
result = "Content script is available.";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
error = 'Unsupported action.'
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error) {
|
||||||
|
error = err.message;
|
||||||
|
} else {
|
||||||
|
error = err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return newResponse(result, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function newResponse<T>(result: T, err?: string): Response<T> {
|
||||||
|
let r: Response<T> = {
|
||||||
|
result: result,
|
||||||
|
error: err,
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
@ -1,4 +1,4 @@
|
|||||||
import { ACTION_UPLOAD_STATE } from '../common';
|
import { Request, Actions } from '../common';
|
||||||
|
|
||||||
window.onload = function () {
|
window.onload = function () {
|
||||||
document.querySelector('#link-extension-detail')
|
document.querySelector('#link-extension-detail')
|
||||||
@ -10,7 +10,7 @@ window.onload = function () {
|
|||||||
document.querySelector('#link-document')
|
document.querySelector('#link-document')
|
||||||
.addEventListener('click', () => {
|
.addEventListener('click', () => {
|
||||||
chrome.tabs.create({
|
chrome.tabs.create({
|
||||||
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
'url': `https://git.qjebbs.com/jebbs/data-extracter-extesion`
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
document.querySelector('#state-input')
|
document.querySelector('#state-input')
|
||||||
@ -21,10 +21,10 @@ window.onload = function () {
|
|||||||
reader.readAsText(this.files[0], "UTF-8");
|
reader.readAsText(this.files[0], "UTF-8");
|
||||||
reader.onload = function (evt) {
|
reader.onload = function (evt) {
|
||||||
var fileString = evt.target.result;
|
var fileString = evt.target.result;
|
||||||
chrome.runtime.sendMessage({
|
chrome.runtime.sendMessage(<Request>{
|
||||||
action: ACTION_UPLOAD_STATE,
|
action: Actions.UPLOAD_STATE,
|
||||||
state: fileString,
|
state: fileString,
|
||||||
name: fileName
|
fileName: fileName
|
||||||
}, r => {
|
}, r => {
|
||||||
if (r) console.log('State sent:', r);
|
if (r) console.log('State sent:', r);
|
||||||
});
|
});
|
||||||
|
|||||||
@ -52,7 +52,7 @@
|
|||||||
<p>
|
<p>
|
||||||
<b>Full document at:</b>
|
<b>Full document at:</b>
|
||||||
<br>
|
<br>
|
||||||
<a href="#" id="link-document">https://git.jebbs.co/jebbs/data-extracter-extesion</a>
|
<a href="#" id="link-document">https://git.qjebbs.com/jebbs/data-extracter-extesion</a>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -27,6 +27,7 @@
|
|||||||
],
|
],
|
||||||
"run_at": "document_idle"
|
"run_at": "document_idle"
|
||||||
}],
|
}],
|
||||||
|
"incognito": "spanning",
|
||||||
"permissions": [
|
"permissions": [
|
||||||
"activeTab",
|
"activeTab",
|
||||||
"notifications"
|
"notifications"
|
||||||
|
|||||||
Reference in New Issue
Block a user