check url change before extract data

This commit is contained in:
2020-01-16 15:11:49 +08:00
parent c78f593c70
commit 378883b626
6 changed files with 111 additions and 57 deletions

View File

@ -1,5 +1,5 @@
import { Actions, Request } from "../common"; import { Actions, Request } from "../common";
import { sendMessage } from "./messaging"; import { sendMessage, ResponseChecker } from "./messaging";
import { logger } from "./logger"; import { logger } from "./logger";
/** /**
@ -15,7 +15,7 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
action: Actions.GOTO_URL, action: Actions.GOTO_URL,
url: url url: url
} }
let checker = async (u, err, tryCount): Promise<string> => { let checker: ResponseChecker<string> = async (r, err, tryCount): Promise<string> => {
let queryErr: any; let queryErr: any;
let newURL = await queryUrl(tab).catch(e => queryErr = e); let newURL = await queryUrl(tab).catch(e => queryErr = e);
if (queryErr) { if (queryErr) {
@ -42,13 +42,16 @@ export function redirectTab(tab: chrome.tabs.Tab, url: string) {
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item * @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
* @returns {Promise<string[]>} a promise of extracted data * @returns {Promise<string[]>} a promise of extracted data
*/ */
export function extractTabData(tab: chrome.tabs.Tab, itemsSelector: string, fieldSelectors: string[], askOnfail?: boolean) { export function extractTabData(tab: chrome.tabs.Tab, itemsSelector: string, fieldSelectors: string[], expectedURL?: string, askOnfail?: boolean) {
let req = { let req: Request = {
action: Actions.EXTRACT, action: Actions.EXTRACT,
itemsSelector: itemsSelector, itemsSelector: itemsSelector,
fieldSelectors: fieldSelectors fieldSelectors: fieldSelectors,
url: expectedURL,
} }
let checker = (result, err, tryCount) => { let checker: ResponseChecker<string[][]> = (response, err, tryCount) => {
if (response.error) throw response.error;
let result = response.result;
if (!result || !result.length) { if (!result || !result.length) {
if ( if (
tryCount % 20 == 0 && ( tryCount % 20 == 0 && (
@ -76,7 +79,9 @@ export async function ping(tab, count = 1) {
let req = { let req = {
action: Actions.PING action: Actions.PING
} }
let checker = (r: string, e, c) => r == "pong" ? r : undefined; let checker: ResponseChecker<string> = (r, e, c) =>
r.result == "pong" ? r.result : undefined;
let pong = await sendMessage<string>(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { }); let pong = await sendMessage<string>(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { });
return pong == "pong"; return pong == "pong";
} }

View File

@ -1,7 +1,11 @@
import { Request, Actions } from "../common"; import { Request, Actions, Response } from "../common";
import { getTabByID } from "./actions"; import { getTabByID } from "./actions";
import { logger } from "./logger"; import { logger } from "./logger";
export type ResponseCheckerSync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => T;
export type ResponseCheckerAsync<T> = (r: Response<T>, err: chrome.runtime.LastError, count: number) => Promise<T>;
export type ResponseChecker<T> = ResponseCheckerSync<T> | ResponseCheckerAsync<T>;
/** /**
* Sending a message to target tab repeatedly until the response is not undefined. * Sending a message to target tab repeatedly until the response is not undefined.
* @param {object} tab the table where to send the message * @param {object} tab the table where to send the message
@ -19,7 +23,7 @@ export function sendMessage<T>(
tab: chrome.tabs.Tab, tab: chrome.tabs.Tab,
req, req,
log?: string, log?: string,
dataChecker?: (r: T, err: chrome.runtime.LastError, count: number) => T | Promise<T>, dataChecker?: ResponseChecker<T>,
interval?: number, interval?: number,
limit?: number limit?: number
) { ) {
@ -43,10 +47,13 @@ export function sendMessage<T>(
return; return;
} }
count++; count++;
chrome.tabs.sendMessage(tab.id, req, async (r: T) => { chrome.tabs.sendMessage(tab.id, req, async (r: Response<T>) => {
// check error but do nothing until dataChecker. // check error but do nothing until dataChecker.
let err = chrome.runtime.lastError; let err = chrome.runtime.lastError;
let result: T = r; let result: T;
// r could be undefined if the content script is interrupted.
if (r) {
result = r.result;
if (dataChecker) { if (dataChecker) {
let pms: T | Promise<T>; let pms: T | Promise<T>;
@ -69,6 +76,7 @@ export function sendMessage<T>(
result = pms; result = pms;
} }
} }
}
let flag = result !== undefined && result !== null; let flag = result !== undefined && result !== null;
if (log) logger.info(log, flag ? '(OK)' : '(failed)'); if (log) logger.info(log, flag ? '(OK)' : '(failed)');
@ -84,7 +92,10 @@ export function sendMessage<T>(
}); });
} }
export type ActionSubscriber = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void | Promise<void>; export type ActionSubscriberSync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => void;
export type ActionSubscriberAsync = (request: Request, sender: chrome.runtime.MessageSender, sendResponse: (response?: any) => void) => Promise<void>;
export type ActionSubscriber = ActionSubscriberSync | ActionSubscriberAsync;
class MessageSubscribers { class MessageSubscribers {
private listeners: { [key: number]: ActionSubscriber[] } = {}; private listeners: { [key: number]: ActionSubscriber[] } = {};
addListener(action: Actions, subscriber: ActionSubscriber) { addListener(action: Actions, subscriber: ActionSubscriber) {

View File

@ -73,6 +73,7 @@ export class Task {
logger.info("No window to watch..."); logger.info("No window to watch...");
return; return;
} }
let watchTaskID = 0;
let listener: ActionSubscriber = async (request, sender, sendResponse) => { let listener: ActionSubscriber = async (request, sender, sendResponse) => {
let findWindow = await getWindowByID(window.id); let findWindow = await getWindowByID(window.id);
if (!findWindow) { if (!findWindow) {
@ -82,17 +83,20 @@ export class Task {
} }
// only watch current window. // only watch current window.
if (sender.tab.windowId != window.id) return; if (sender.tab.windowId != window.id) return;
let taskID = watchTaskID++;
logger.info(`Watcher #${taskID} starts.`);
let pm = this.makeOptionalTasks(sender.tab); let pm = this.makeOptionalTasks(sender.tab);
return pm.then( return pm.then(
() => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, false) () => extractTabData(sender.tab, this._itemsSelector, this._fieldSelectors, sender.tab.url, false)
).then( ).then(
results => { results => {
if (results && results.length) { if (results && results.length) {
this.saveResult(results, sender.tab.url); this.saveResult(results, sender.tab.url);
} }
logger.info(`Watcher #${taskID} ends.`);
} }
).catch( ).catch(
e => logger.error(e) e => logger.error(`Watcher #${taskID} ends with:`, e)
) )
} }
this._listeners.push(listener); this._listeners.push(listener);

View File

@ -21,3 +21,8 @@ export interface Request {
fileName?: string fileName?: string
state?: string state?: string
} }
export interface Response<T> {
result: T;
error: string;
}

View File

@ -1,4 +1,7 @@
export function extract(itemsSelector: string, fieldSelectors: string[]): string[][] { export function extract(itemsSelector: string, fieldSelectors: string[], expectedURL: string): string[][] {
if (expectedURL && location.href != expectedURL) {
throw 'Target tab URL changed, aborting...';
}
// since some elements may be loaded asynchronously. // since some elements may be loaded asynchronously.
// if one field is never found, we should return undefined, // if one field is never found, we should return undefined,
// so that senders can detect to retry until elements loaded. // so that senders can detect to retry until elements loaded.

View File

@ -1,4 +1,4 @@
import { Request, Actions } from '../common'; import { Request, Actions, Response } from '../common';
import { scrollToBottom, extract } from './actions'; import { scrollToBottom, extract } from './actions';
let asleep = false; let asleep = false;
@ -20,30 +20,56 @@ chrome.runtime.sendMessage(<Request>{
action: Actions.REPORT_NEW_PAGE, action: Actions.REPORT_NEW_PAGE,
}); });
async function doAction(request: Request, sender: chrome.runtime.MessageSender) { async function doAction(request: Request, sender: chrome.runtime.MessageSender): Promise<Response<any>> {
let result: any;
let error: string;
try {
switch (request.action) { switch (request.action) {
case Actions.EXTRACT: case Actions.EXTRACT:
let data = extract(request.itemsSelector, request.fieldSelectors); result = extract(request.itemsSelector, request.fieldSelectors, request.url);
return data; break;
case Actions.GOTO_URL: case Actions.GOTO_URL:
window.location.replace(request.url); window.location.replace(request.url);
// should not recieve any request until the page & script reload // should not recieve any request until the page & script reload
asleep = true; asleep = true;
return request.url; result = request.url;
break;
case Actions.PING: case Actions.PING:
return "pong"; result = "pong";
break;
case Actions.QUERY_URL: case Actions.QUERY_URL:
return window.location.href; result = window.location.href;
break;
case Actions.SCROLL_BOTTOM: case Actions.SCROLL_BOTTOM:
return scrollToBottom(); result = scrollToBottom();
break;
case Actions.SLEEP: case Actions.SLEEP:
asleep = true; asleep = true;
return "Content script is sleeping."; result = "Content script is sleeping.";
break;
case Actions.WAKEUP: case Actions.WAKEUP:
asleep = false; asleep = false;
return "Content script is available."; result = "Content script is available.";
break;
default: default:
error = 'Unsupported action.'
break; break;
} }
} catch (err) {
if (err instanceof Error) {
error = err.message;
} else {
error = err;
}
}
return newResponse(result, error);
} }
function newResponse<T>(result: T, err?: string): Response<T> {
let r: Response<T> = {
result: result,
error: err,
}
return r;
}