Compare commits
4 Commits
378883b626
...
9cd25e3c1d
| Author | SHA1 | Date | |
|---|---|---|---|
| 9cd25e3c1d | |||
| 7827d385bd | |||
| ade0670415 | |||
| 63aec616b1 |
17
readme.md
17
readme.md
@ -185,6 +185,23 @@ e.start();
|
||||
|
||||
> The uploaded state will be cleaned in 30 seconds, if you don't load it.
|
||||
|
||||
## Watch Mode
|
||||
|
||||
Watch mode tries to exract data from every page you visit **in current window**.
|
||||
|
||||
```js
|
||||
e = new Extractor();
|
||||
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
|
||||
.task('list-item', ["a.title", "p.content"]);
|
||||
e.watch(1); // start watching for first task
|
||||
```
|
||||
|
||||
To stop watching, you can either `close current window`, or:
|
||||
|
||||
```js
|
||||
e.stop();
|
||||
```
|
||||
|
||||
## Developpment
|
||||
|
||||
Clone this project and execute:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { Actions, Request } from "../common";
|
||||
import { sendMessage, ResponseChecker } from "./messaging";
|
||||
import { logger } from "./logger";
|
||||
import { logger } from "../common/logger";
|
||||
|
||||
/**
|
||||
* redirect tab to url.
|
||||
@ -19,14 +19,14 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
|
||||
let queryErr: any;
|
||||
let newURL = await queryUrl(tab).catch(e => queryErr = e);
|
||||
if (queryErr) {
|
||||
return Promise.reject(queryErr);
|
||||
throw queryErr;
|
||||
}
|
||||
if (newURL == url) return url;
|
||||
if (
|
||||
tryCount % 1 == 0 &&
|
||||
!confirm('Cannot navigate to target url. \nPress OK to continue, Cancel to stop.')
|
||||
) {
|
||||
return Promise.reject("Tasks stopped by user.");
|
||||
throw "Tasks stopped by user.";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { logger } from "./logger";
|
||||
import { logger } from "../common/logger";
|
||||
import { Actions } from "../common";
|
||||
import { messageSubscribers } from "./messaging";
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@ import { Task } from "./task";
|
||||
import { saveFile } from "./tools";
|
||||
import { createTab, getActiveTab, ping } from "./actions";
|
||||
import { ExtractResult } from "./result";
|
||||
import { logger } from "./logger";
|
||||
import { logger } from "../common/logger";
|
||||
import { caches } from "./caches";
|
||||
|
||||
export class Extractor {
|
||||
@ -12,6 +12,14 @@ export class Extractor {
|
||||
constructor(options?) {
|
||||
if (options) this._options = options;
|
||||
}
|
||||
static async ping(count: number = 1) {
|
||||
let tab = await getActiveTab(true) || await getActiveTab(false);
|
||||
let succ = await ping(tab, count);
|
||||
if (!succ) {
|
||||
logger.error('Cannot contact with active tab.');
|
||||
return;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Save current state, in case we restore it later.
|
||||
*/
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { Request, Actions, Response } from "../common";
|
||||
import { getTabByID } from "./actions";
|
||||
import { logger } from "./logger";
|
||||
import { logger } from "../common/logger";
|
||||
|
||||
|
||||
export type ResponseCheckerSync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => T;
|
||||
@ -50,40 +50,18 @@ export function sendMessage<T>(
|
||||
chrome.tabs.sendMessage(tab.id, req, async (r: Response<T>) => {
|
||||
// check error but do nothing until dataChecker.
|
||||
let err = chrome.runtime.lastError;
|
||||
let result: T;
|
||||
// r could be undefined if the content script is interrupted.
|
||||
if (r) {
|
||||
result = r.result;
|
||||
|
||||
if (dataChecker) {
|
||||
let pms: T | Promise<T>;
|
||||
try {
|
||||
pms = dataChecker(r, err, count);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
// don't catch if it's not a Promise
|
||||
if (pms instanceof Promise) {
|
||||
let checkerError: any;
|
||||
pms = pms.catch(e => checkerError = e);
|
||||
result = await pms;
|
||||
if (checkerError) {
|
||||
reject(checkerError);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
result = pms;
|
||||
}
|
||||
}
|
||||
let [result, error] = await checkResponse(dataChecker, r, err, count);
|
||||
if (error) {
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
|
||||
let flag = result !== undefined && result !== null;
|
||||
let flag = result !== undefined;
|
||||
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
|
||||
if (flag) {
|
||||
resolve(result);
|
||||
} else {
|
||||
setTimeout(() => {
|
||||
logger.debug('Invalid response', r, 'retry...');
|
||||
loop();
|
||||
}, interval);
|
||||
}
|
||||
@ -92,6 +70,39 @@ export function sendMessage<T>(
|
||||
});
|
||||
}
|
||||
|
||||
async function checkResponse<T>(
|
||||
dataChecker: ResponseChecker<T>,
|
||||
response: Response<T>,
|
||||
error: chrome.runtime.LastError,
|
||||
tryCount: number
|
||||
): Promise<[T, string]> {
|
||||
// response could be undefined if the content script is interrupted.
|
||||
// don't check, tell sendMessage to retry.
|
||||
if (!response) return [undefined, undefined];
|
||||
if (!dataChecker) {
|
||||
return [response.result, response.error];
|
||||
}
|
||||
let result: T;
|
||||
let pms: T | Promise<T>;
|
||||
try {
|
||||
pms = dataChecker(response, error, tryCount);
|
||||
} catch (err) {
|
||||
return [undefined, err];
|
||||
}
|
||||
// don't catch if it's not a Promise
|
||||
if (pms instanceof Promise) {
|
||||
let checkerError: any;
|
||||
pms = pms.catch(e => checkerError = e);
|
||||
result = await pms;
|
||||
if (checkerError) {
|
||||
return [undefined, checkerError];
|
||||
}
|
||||
} else {
|
||||
result = pms;
|
||||
}
|
||||
return [result, undefined];
|
||||
}
|
||||
|
||||
export type ActionSubscriberSync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void;
|
||||
export type ActionSubscriberAsync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => Promise<void>;
|
||||
export type ActionSubscriber = ActionSubscriberSync | ActionSubscriberAsync;
|
||||
|
||||
@ -26,6 +26,7 @@ export class ExtractResult {
|
||||
}
|
||||
let line = lineCells.reduce(
|
||||
(lineText, cell, idx) => {
|
||||
cell = cell || "";
|
||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
||||
}, "");
|
||||
|
||||
@ -20,7 +20,7 @@ function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
|
||||
$(".item", ["a", "a@href"]);
|
||||
|
||||
## See Detailed Help:
|
||||
https://git.jebbs.co/jebbs/data-extracter-extesion
|
||||
https://git.qjebbs.com/jebbs/data-extracter-extesion
|
||||
`.trim();
|
||||
|
||||
export function testArgs(...args: any) {
|
||||
|
||||
@ -4,7 +4,7 @@ import { testArgs, signitures } from "./signiture";
|
||||
import { ExtractResult } from "./result";
|
||||
import { messageSubscribers, ActionSubscriber } from "./messaging";
|
||||
import { Actions } from "../common";
|
||||
import { logger } from "./logger";
|
||||
import { logger } from "../common/logger";
|
||||
|
||||
export class Task {
|
||||
private _data: { [key: string]: string[][] } = {};
|
||||
@ -87,7 +87,7 @@ export class Task {
|
||||
logger.info(`Watcher #${taskID} starts.`);
|
||||
let pm = this.makeOptionalTasks(sender.tab);
|
||||
return pm.then(
|
||||
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, false)
|
||||
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, true)
|
||||
).then(
|
||||
results => {
|
||||
if (results && results.length) {
|
||||
@ -103,8 +103,8 @@ export class Task {
|
||||
messageSubscribers.addListener(Actions.REPORT_NEW_PAGE, listener);
|
||||
}
|
||||
async execute(tab: chrome.tabs.Tab, upstreamData?: ExtractResult): Promise<void> {
|
||||
if (!tab) return Promise.reject("No tab to execute the task.");
|
||||
if (this._running) return Promise.reject("The task is running. Please wait...");
|
||||
if (!tab) throw "No tab to execute the task.";
|
||||
if (this._running) throw "The task is running. Please wait...";
|
||||
this._running = true;
|
||||
let urls = this._urls
|
||||
if (!urls.length) {
|
||||
@ -140,7 +140,7 @@ export class Task {
|
||||
).catch(
|
||||
e => {
|
||||
this._running = false;
|
||||
return Promise.reject(e);
|
||||
throw e;
|
||||
}
|
||||
);
|
||||
}
|
||||
@ -152,7 +152,7 @@ export class Task {
|
||||
return pm;
|
||||
}
|
||||
private runningCheck(fn: () => Promise<any>): Promise<any> {
|
||||
if (!this._running) return Promise.reject("The task is stopped by user.");
|
||||
if (!this._running) throw "The task is stopped by user.";
|
||||
return fn();
|
||||
}
|
||||
private saveResult(results, key) {
|
||||
|
||||
@ -13,7 +13,7 @@ export class Logger {
|
||||
constructor(logLevel, notifyLevel) {
|
||||
if (logLevel) this._log_level = logLevel;
|
||||
if (notifyLevel) this._notify_level = notifyLevel;
|
||||
chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
||||
if (chrome.notifications) chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
||||
}
|
||||
get logLevel() {
|
||||
return this._log_level;
|
||||
@ -10,7 +10,7 @@ window.onload = function () {
|
||||
document.querySelector('#link-document')
|
||||
.addEventListener('click', () => {
|
||||
chrome.tabs.create({
|
||||
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
||||
'url': `https://git.qjebbs.com/jebbs/data-extracter-extesion`
|
||||
});
|
||||
})
|
||||
document.querySelector('#state-input')
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
<p>
|
||||
<b>Full document at:</b>
|
||||
<br>
|
||||
<a href="#" id="link-document">https://git.jebbs.co/jebbs/data-extracter-extesion</a>
|
||||
<a href="#" id="link-document">https://git.qjebbs.com/jebbs/data-extracter-extesion</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user