Compare commits

...

2 Commits

Author SHA1 Message Date
378883b626 check url change before extract data 2020-01-16 15:11:49 +08:00
c78f593c70 code optimize 2020-01-16 09:59:19 +08:00
11 changed files with 124 additions and 64 deletions

View File

@ -1,6 +1,6 @@
import { Actions, Request } from "../common";
import { sendMessage } from "./messaging";
import { logger } from "./common";
import { sendMessage, ResponseChecker } from "./messaging";
import { logger } from "./logger";
/**
* redirect tab to url.
@ -15,7 +15,7 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
action: Actions.GOTO_URL,
url: url
}
let checker = async (u, err, tryCount): Promise<string> => {
let checker: ResponseChecker<string> = async (r, err, tryCount): Promise<string> => {
let queryErr: any;
let newURL = await queryUrl(tab).catch(e => queryErr = e);
if (queryErr) {
@ -42,13 +42,16 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
* @returns {Promise<string[]>} a promise of extracted data
*/
export function extractTabData(tab: chrome.tabs.Tab, itemsSelector: string, fieldSelectors: string[], askOnfail?: boolean) {
let req = {
export function extractTabData(tab: chrome.tabs.Tab, itemsSelector: string, fieldSelectors: string[], expectedURL?: string, askOnfail?: boolean) {
let req: Request = {
action: Actions.EXTRACT,
itemsSelector: itemsSelector,
fieldSelectors: fieldSelectors
fieldSelectors: fieldSelectors,
url: expectedURL,
}
let checker = (result, err, tryCount) => {
let checker: ResponseChecker<string[][]> = (response, err, tryCount) => {
if (response.error) throw response.error;
let result = response.result;
if (!result || !result.length) {
if (
tryCount % 20 == 0 && (
@ -76,7 +79,9 @@ export async function ping(tab, count = 1) {
let req = {
action: Actions.PING
}
let checker = (r: string, e, c) => r == "pong" ? r : undefined;
let checker: ResponseChecker<string> = (r, e, c) =>
r.result == "pong" ? r.result : undefined;
let pong = await sendMessage<string>(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { });
return pong == "pong";
}

View File

@ -1,4 +1,4 @@
import { logger } from "./common";
import { logger } from "./logger";
import { Actions } from "../common";
import { messageSubscribers } from "./messaging";
@ -26,4 +26,6 @@ export class Caches {
}
}, 30000);
}
}
}
export const caches = new Caches();

View File

@ -1,6 +0,0 @@
import { Logger, LOGGER_LEVEL } from "./logger";
import { Caches } from "./caches";
export const caches = new Caches();
export const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);
export const URL_REG = /^\s*(https?):\/\//im;

View File

@ -1,8 +1,9 @@
import { Task } from "./task";
import { saveFile } from "./tools";
import { createTab, getActiveTab, ping } from "./actions";
import { logger, caches } from "./common";
import { ExtractResult } from "./result";
import { logger } from "./logger";
import { caches } from "./caches";
export class Extractor {
private _tasks: Task[] = [];

View File

@ -71,3 +71,5 @@ export class Logger {
);
}
}
export const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);

View File

@ -1,7 +1,11 @@
import { Request, Actions } from "../common";
import { Request, Actions, Response } from "../common";
import { getTabByID } from "./actions";
import { logger } from "./common";
import { logger } from "./logger";
export type ResponseCheckerSync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => T;
export type ResponseCheckerAsync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => Promise<T>;
export type ResponseChecker<T> = ResponseCheckerSync<T> | ResponseCheckerAsync<T>;
/**
* Sending a message to target tab repeatedly until the response is not undefined.
* @param {object} tab the table where to send the message
@ -19,7 +23,7 @@ export function sendMessage<T>(
tab: chrome.tabs.Tab,
req,
log?: string,
dataChecker?: (r: T, err: chrome.runtime.LastError, count: number) => T | Promise<T>,
dataChecker?: ResponseChecker<T>,
interval?: number,
limit?: number
) {
@ -43,24 +47,34 @@ export function sendMessage<T>(
return;
}
count++;
chrome.tabs.sendMessage(tab.id, req, async (r: T) => {
chrome.tabs.sendMessage(tab.id, req, async (r: Response<T>) => {
// check error but do nothing until dataChecker.
let err = chrome.runtime.lastError;
let result: T = r;
let result: T;
// r could be undefined if the content script is interrupted.
if (r) {
result = r.result;
if (dataChecker) {
let pms = dataChecker(r, err, count);
// don't catch if it's not a Promise
if (pms instanceof Promise) {
let checkerError: any;
pms = pms.catch(e => checkerError = e);
result = await pms;
if (checkerError) {
reject(checkerError);
if (dataChecker) {
let pms: T | Promise<T>;
try {
pms = dataChecker(r, err, count);
} catch (error) {
reject(error);
return;
}
} else {
result = pms;
// don't catch if it's not a Promise
if (pms instanceof Promise) {
let checkerError: any;
pms = pms.catch(e => checkerError = e);
result = await pms;
if (checkerError) {
reject(checkerError);
return;
}
} else {
result = pms;
}
}
}
@ -78,7 +92,10 @@ export function sendMessage<T>(
});
}
export type ActionSubscriber = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void | Promise<void>;
export type ActionSubscriberSync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void;
export type ActionSubscriberAsync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => Promise<void>;
export type ActionSubscriber = ActionSubscriberSync | ActionSubscriberAsync;
class MessageSubscribers {
private listeners: { [key: number]: ActionSubscriber[] } = {};
addListener(action: Actions, subscriber: ActionSubscriber) {

View File

@ -4,7 +4,7 @@ import { testArgs, signitures } from "./signiture";
import { ExtractResult } from "./result";
import { messageSubscribers, ActionSubscriber } from "./messaging";
import { Actions } from "../common";
import { logger } from "./common";
import { logger } from "./logger";
export class Task {
private _data: { [key: string]: string[][] } = {};
@ -73,6 +73,7 @@ export class Task {
logger.info("No window to watch...");
return;
}
let watchTaskID = 0;
let listener: ActionSubscriber = async (request, sender, sendResponse) => {
let findWindow = await getWindowByID(window.id);
if (!findWindow) {
@ -82,17 +83,20 @@ export class Task {
}
// only watch current window.
if (sender.tab.windowId != window.id) return;
let taskID = watchTaskID++;
logger.info(`Watcher #${taskID} starts.`);
let pm = this.makeOptionalTasks(sender.tab);
return pm.then(
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, false)
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, false)
).then(
results => {
if (results && results.length) {
this.saveResult(results, sender.tab.url);
}
logger.info(`Watcher #${taskID} ends.`);
}
).catch(
e => logger.error(e)
e => logger.error(`Watcher #${taskID} ends with:`, e)
)
}
this._listeners.push(listener);

View File

@ -1,6 +1,7 @@
import { URL_REG } from "./common";
import { ExtractResult } from "./result";
const URL_REG = /^\s*(https?):\/\//im;
export function parseUrls(...args) {
if (!args.length) return [];
let arg = args.shift();

View File

@ -20,4 +20,9 @@ export interface Request {
url?: string
fileName?: string
state?: string
}
export interface Response<T> {
result: T;
error: string;
}

View File

@ -1,4 +1,7 @@
export function extract(itemsSelector: string, fieldSelectors: string[]): string[][] {
export function extract(itemsSelector: string, fieldSelectors: string[], expectedURL: string): string[][] {
if (expectedURL && location.href != expectedURL) {
throw 'Target tab URL changed, aborting...';
}
// since some elements may be loaded asynchronously.
// if one field is never found, we should return undefined,
// so that senders can detect to retry until elements loaded.

View File

@ -1,4 +1,4 @@
import { Request, Actions } from '../common';
import { Request, Actions, Response } from '../common';
import { scrollToBottom, extract } from './actions';
let asleep = false;
@ -20,30 +20,56 @@ chrome.runtime.sendMessage(<Request>{
action: Actions.REPORT_NEW_PAGE,
});
async function doAction(request: Request, sender: chrome.runtime.MessageSender) {
switch (request.action) {
case Actions.EXTRACT:
let data = extract(request.itemsSelector, request.fieldSelectors);
return data;
case Actions.GOTO_URL:
window.location.replace(request.url);
// should not recieve any request until the page & script reload
asleep = true;
return request.url;
case Actions.PING:
return "pong";
case Actions.QUERY_URL:
return window.location.href;
case Actions.SCROLL_BOTTOM:
return scrollToBottom();
case Actions.SLEEP:
asleep = true;
return "Content script is sleeping.";
case Actions.WAKEUP:
asleep = false;
return "Content script is available.";
default:
break;
async function doAction(request: Request, sender: chrome.runtime.MessageSender): Promise<Response<any>> {
let result: any;
let error: string;
try {
switch (request.action) {
case Actions.EXTRACT:
result = extract(request.itemsSelector, request.fieldSelectors, request.url);
break;
case Actions.GOTO_URL:
window.location.replace(request.url);
// should not recieve any request until the page & script reload
asleep = true;
result = request.url;
break;
case Actions.PING:
result = "pong";
break;
case Actions.QUERY_URL:
result = window.location.href;
break;
case Actions.SCROLL_BOTTOM:
result = scrollToBottom();
break;
case Actions.SLEEP:
asleep = true;
result = "Content script is sleeping.";
break;
case Actions.WAKEUP:
asleep = false;
result = "Content script is available.";
break;
default:
error = 'Unsupported action.'
break;
}
} catch (err) {
if (err instanceof Error) {
error = err.message;
} else {
error = err;
}
}
return newResponse(result, error);
}
function newResponse<T>(result: T, err?: string): Response<T> {
let r: Response<T> = {
result: result,
error: err,
}
return r;
}