Compare commits

...

4 Commits

Author SHA1 Message Date
9cd25e3c1d update url 2021-04-19 15:58:04 +08:00
7827d385bd refactor 2020-06-16 14:45:36 +08:00
ade0670415 update readme 2020-01-17 11:01:13 +08:00
63aec616b1 code optimize 2020-01-17 09:38:40 +08:00
12 changed files with 81 additions and 44 deletions

View File

@ -185,6 +185,23 @@ e.start();
> The uploaded state will be cleaned in 30 seconds, if you don't load it.
## Watch Mode
Watch mode tries to exract data from every page you visit **in current window**.
```js
e = new Extractor();
e.task('.search-list-item', ['a@href'], ["http://sample.com/abc"])
.task('list-item', ["a.title", "p.content"]);
e.watch(1); // start watching for first task
```
To stop watching, you can either `close current window`, or:
```js
e.stop();
```
## Developpment
Clone this project and execute:

View File

@ -1,6 +1,6 @@
import { Actions, Request } from "../common";
import { sendMessage, ResponseChecker } from "./messaging";
import { logger } from "./logger";
import { logger } from "../common/logger";
/**
* redirect tab to url.
@ -19,14 +19,14 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
let queryErr: any;
let newURL = await queryUrl(tab).catch(e => queryErr = e);
if (queryErr) {
return Promise.reject(queryErr);
throw queryErr;
}
if (newURL == url) return url;
if (
tryCount % 1 == 0 &&
!confirm('Cannot navigate to target url. \nPress OK to continue, Cancel to stop.')
) {
return Promise.reject("Tasks stopped by user.");
throw "Tasks stopped by user.";
}
return undefined;
}

View File

@ -1,4 +1,4 @@
import { logger } from "./logger";
import { logger } from "../common/logger";
import { Actions } from "../common";
import { messageSubscribers } from "./messaging";

View File

@ -2,7 +2,7 @@ import { Task } from "./task";
import { saveFile } from "./tools";
import { createTab, getActiveTab, ping } from "./actions";
import { ExtractResult } from "./result";
import { logger } from "./logger";
import { logger } from "../common/logger";
import { caches } from "./caches";
export class Extractor {
@ -12,6 +12,14 @@ export class Extractor {
constructor(options?) {
if (options) this._options = options;
}
static async ping(count: number = 1) {
let tab = await getActiveTab(true) || await getActiveTab(false);
let succ = await ping(tab, count);
if (!succ) {
logger.error('Cannot contact with active tab.');
return;
}
}
/**
* Save current state, in case we restore it later.
*/

View File

@ -1,6 +1,6 @@
import { Request, Actions, Response } from "../common";
import { getTabByID } from "./actions";
import { logger } from "./logger";
import { logger } from "../common/logger";
export type ResponseCheckerSync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => T;
@ -50,40 +50,18 @@ export function sendMessage<T>(
chrome.tabs.sendMessage(tab.id, req, async (r: Response<T>) => {
// check error but do nothing until dataChecker.
let err = chrome.runtime.lastError;
let result: T;
// r could be undefined if the content script is interrupted.
if (r) {
result = r.result;
if (dataChecker) {
let pms: T | Promise<T>;
try {
pms = dataChecker(r, err, count);
} catch (error) {
reject(error);
return;
}
// don't catch if it's not a Promise
if (pms instanceof Promise) {
let checkerError: any;
pms = pms.catch(e => checkerError = e);
result = await pms;
if (checkerError) {
reject(checkerError);
return;
}
} else {
result = pms;
}
}
let [result, error] = await checkResponse(dataChecker, r, err, count);
if (error) {
reject(error);
return;
}
let flag = result !== undefined && result !== null;
let flag = result !== undefined;
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
if (flag) {
resolve(result);
} else {
setTimeout(() => {
logger.debug('Invalid response', r, 'retry...');
loop();
}, interval);
}
@ -92,6 +70,39 @@ export function sendMessage<T>(
});
}
async function checkResponse<T>(
dataChecker: ResponseChecker<T>,
response: Response<T>,
error: chrome.runtime.LastError,
tryCount: number
): Promise<[T, string]> {
// response could be undefined if the content script is interrupted.
// don't check, tell sendMessage to retry.
if (!response) return [undefined, undefined];
if (!dataChecker) {
return [response.result, response.error];
}
let result: T;
let pms: T | Promise<T>;
try {
pms = dataChecker(response, error, tryCount);
} catch (err) {
return [undefined, err];
}
// don't catch if it's not a Promise
if (pms instanceof Promise) {
let checkerError: any;
pms = pms.catch(e => checkerError = e);
result = await pms;
if (checkerError) {
return [undefined, checkerError];
}
} else {
result = pms;
}
return [result, undefined];
}
export type ActionSubscriberSync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void;
export type ActionSubscriberAsync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => Promise<void>;
export type ActionSubscriber = ActionSubscriberSync | ActionSubscriberAsync;

View File

@ -26,6 +26,7 @@ export class ExtractResult {
}
let line = lineCells.reduce(
(lineText, cell, idx) => {
cell = cell || "";
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
}, "");

View File

@ -20,7 +20,7 @@ function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
$(".item", ["a", "a@href"]);
## See Detailed Help:
https://git.jebbs.co/jebbs/data-extracter-extesion
https://git.qjebbs.com/jebbs/data-extracter-extesion
`.trim();
export function testArgs(...args: any) {

View File

@ -4,7 +4,7 @@ import { testArgs, signitures } from "./signiture";
import { ExtractResult } from "./result";
import { messageSubscribers, ActionSubscriber } from "./messaging";
import { Actions } from "../common";
import { logger } from "./logger";
import { logger } from "../common/logger";
export class Task {
private _data: { [key: string]: string[][] } = {};
@ -87,7 +87,7 @@ export class Task {
logger.info(`Watcher #${taskID} starts.`);
let pm = this.makeOptionalTasks(sender.tab);
return pm.then(
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, false)
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, true)
).then(
results => {
if (results && results.length) {
@ -103,8 +103,8 @@ export class Task {
messageSubscribers.addListener(Actions.REPORT_NEW_PAGE, listener);
}
async execute(tab: chrome.tabs.Tab, upstreamData?: ExtractResult): Promise<void> {
if (!tab) return Promise.reject("No tab to execute the task.");
if (this._running) return Promise.reject("The task is running. Please wait...");
if (!tab) throw "No tab to execute the task.";
if (this._running) throw "The task is running. Please wait...";
this._running = true;
let urls = this._urls
if (!urls.length) {
@ -140,7 +140,7 @@ export class Task {
).catch(
e => {
this._running = false;
return Promise.reject(e);
throw e;
}
);
}
@ -152,7 +152,7 @@ export class Task {
return pm;
}
private runningCheck(fn: () => Promise<any>): Promise<any> {
if (!this._running) return Promise.reject("The task is stopped by user.");
if (!this._running) throw "The task is stopped by user.";
return fn();
}
private saveResult(results, key) {

View File

@ -13,7 +13,7 @@ export class Logger {
constructor(logLevel, notifyLevel) {
if (logLevel) this._log_level = logLevel;
if (notifyLevel) this._notify_level = notifyLevel;
chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
if (chrome.notifications) chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
}
get logLevel() {
return this._log_level;

View File

@ -10,7 +10,7 @@ window.onload = function () {
document.querySelector('#link-document')
.addEventListener('click', () => {
chrome.tabs.create({
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
'url': `https://git.qjebbs.com/jebbs/data-extracter-extesion`
});
})
document.querySelector('#state-input')

View File

@ -52,7 +52,7 @@
<p>
<b>Full document at:</b>
<br>
<a href="#" id="link-document">https://git.jebbs.co/jebbs/data-extracter-extesion</a>
<a href="#" id="link-document">https://git.qjebbs.com/jebbs/data-extracter-extesion</a>
</p>
</div>
</div>