migrate to typescript, with fixes
This commit is contained in:
131
src/background/actions.ts
Normal file
131
src/background/actions.ts
Normal file
@ -0,0 +1,131 @@
|
||||
import { ACTION_GOTO_URL, ACTION_EXTRACT, ACTION_PING as ACTION_PING, ACTION_QUERY_URL, ACTION_SCROLL_BOTTOM } from "../common";
|
||||
import { sendMessage } from "./messaging";
|
||||
|
||||
/**
|
||||
* redirect tab to url.
|
||||
* @param {any} tab target tab
|
||||
* @param {string} url target URL
|
||||
* @returns {Promise<string[]>} a promise of target URL
|
||||
*/
|
||||
export function redirectTab(tab: chrome.tabs.Tab, url: string) {
|
||||
return queryUrl(tab).then(u => {
|
||||
if (url !== u) {
|
||||
let req = {
|
||||
action: ACTION_GOTO_URL,
|
||||
url: url
|
||||
}
|
||||
let checker = async (u, err, tryCount): Promise<string> => {
|
||||
let queryErr: any;
|
||||
let newURL = await queryUrl(tab).catch(e => queryErr = e);
|
||||
if (queryErr) {
|
||||
return Promise.reject(queryErr);
|
||||
}
|
||||
if (newURL == url) return url;
|
||||
if (
|
||||
tryCount % 5 == 0 &&
|
||||
!confirm('Cannot navigate to target url. \nPress OK to continue, Cancel to stop.')
|
||||
) {
|
||||
return Promise.reject("Tasks stopped by user.");
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
return sendMessage<string>(tab, req, `Goto url: ${url}`, checker);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* extract data in from the target tab.
|
||||
* @param {any} tab target tab
|
||||
* @param {string} itemsSelector items selectors for selecting items (data rows)
|
||||
* @param {Array<string>} fieldSelectors fields selectors for selecting fields (data columns) under each item
|
||||
* @returns {Promise<string[]>} a promise of extracted data
|
||||
*/
|
||||
export function extractTabData(tab, itemsSelector, fieldSelectors) {
|
||||
let req = {
|
||||
action: ACTION_EXTRACT,
|
||||
itemsSelector: itemsSelector,
|
||||
fieldSelectors: fieldSelectors
|
||||
}
|
||||
let checker = (result, err, tryCount) => {
|
||||
if (!result || !result.length) {
|
||||
if (tryCount % 20 == 0 && confirm('No data found in current page. \n\nContinue to next page?')) {
|
||||
return [];
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
return sendMessage<string[][]>(tab, req, 'Extract data from the tab...', checker);
|
||||
}
|
||||
|
||||
/**
|
||||
* ping target tab, usually used to detect if the content script is ready.
|
||||
* @param {any} tab target tab
|
||||
* @returns {Promise<boolean>} a promise of boolean value indicates if ping success
|
||||
*/
|
||||
export async function ping(tab, count = 1) {
|
||||
let req = {
|
||||
action: ACTION_PING
|
||||
}
|
||||
let checker = (r: string, e, c) => r == "pong" ? r : undefined;
|
||||
let pong = await sendMessage<string>(tab, req, 'Check tab availability...', checker, 1000, count).catch(() => { });
|
||||
return pong == "pong";
|
||||
}
|
||||
|
||||
/**
|
||||
* get the url of the target tab
|
||||
* @param {any} tab target tab
|
||||
* @returns {Promise<string>} a promise of the url
|
||||
*/
|
||||
export function queryUrl(tab: chrome.tabs.Tab) {
|
||||
let req = {
|
||||
action: ACTION_QUERY_URL
|
||||
}
|
||||
return sendMessage<string>(tab, req);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the url of the target tab
|
||||
* @param {any} tab target tab
|
||||
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
||||
* @returns {Promise<string>} a promise of the url
|
||||
*/
|
||||
export function scrollToBottom(tab: chrome.tabs.Tab) {
|
||||
let req = {
|
||||
action: ACTION_SCROLL_BOTTOM
|
||||
}
|
||||
return sendMessage(tab, req, 'Scroll to page bottom...');
|
||||
}
|
||||
|
||||
export async function createTab(url: string, active: boolean) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.create({
|
||||
'url': url,
|
||||
'active': active
|
||||
}, function (tab) {
|
||||
resolve(tab);
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export async function getActiveTab(currentWindow: boolean) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.query({
|
||||
active: true,
|
||||
currentWindow: currentWindow
|
||||
}, function (tabs) {
|
||||
resolve(tabs[0]);
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export async function getTabByID(id: number) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.get(id, function (tab) {
|
||||
chrome.runtime.lastError;
|
||||
resolve(tab);
|
||||
})
|
||||
})
|
||||
}
|
||||
15
src/background/caches.ts
Normal file
15
src/background/caches.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import { logger } from "./common";
|
||||
|
||||
export class Caches {
|
||||
private _state: string = "";
|
||||
constructor() { }
|
||||
get state(): string {
|
||||
let s = this._state;
|
||||
this._state = "";
|
||||
return s;
|
||||
}
|
||||
setState(name: string, content: string) {
|
||||
this._state = content;
|
||||
logger.info(`State (${name}) recieved. To load it: some_var = new Extractor().load()`);
|
||||
}
|
||||
}
|
||||
6
src/background/common.ts
Normal file
6
src/background/common.ts
Normal file
@ -0,0 +1,6 @@
|
||||
import { Logger, LOGGER_LEVEL } from "./logger";
|
||||
import { Caches } from "./caches";
|
||||
|
||||
export const caches = new Caches();
|
||||
export const logger = new Logger(LOGGER_LEVEL.DEBUG, LOGGER_LEVEL.DISABLED);
|
||||
export const URL_REG = /^\s*(https?):\/\//im;
|
||||
148
src/background/extractor.ts
Normal file
148
src/background/extractor.ts
Normal file
@ -0,0 +1,148 @@
|
||||
import { Task } from "./task";
|
||||
import { saveFile } from "./tools";
|
||||
import { createTab, getActiveTab, ping } from "./actions";
|
||||
import { logger, caches } from "./common";
|
||||
import { ExtractResult } from "./result";
|
||||
|
||||
export class Extractor {
|
||||
private _tasks: Task[] = [];
|
||||
private _running = false;
|
||||
private _options: any = {};
|
||||
constructor(options?) {
|
||||
if (options) this._options = options;
|
||||
}
|
||||
/**
|
||||
* Save current state, in case we restore it later.
|
||||
*/
|
||||
save() {
|
||||
saveFile(JSON.stringify(this), 'application/json', 'state.json');
|
||||
}
|
||||
/**
|
||||
* Restore previous state by loading from saved state.
|
||||
*/
|
||||
load() {
|
||||
let content = caches.state;
|
||||
if (!content) {
|
||||
logger.info('No state found. Please upload a saved state from the popup window first.');
|
||||
return;
|
||||
}
|
||||
let state = JSON.parse(content);
|
||||
this._options = state._options;
|
||||
this._tasks = state._tasks.map(t => new Task(this._options, 'whaterver', ['whaterver']).load(t));
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Add a task to Extractor. \n
|
||||
* One Extractor could has multiple tasks, which orgnized in a task chian.
|
||||
* If url arguments not given within later tasks, they will use previous task result as input (target url list).
|
||||
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
||||
*/
|
||||
task(...args: any) {
|
||||
this._tasks.push(new Task(this._options, ...args));
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Clear tasks and task caches.
|
||||
*/
|
||||
clear() {
|
||||
this._tasks = [];
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Start the task chain.
|
||||
*/
|
||||
async start() {
|
||||
return this._startTasks(0);
|
||||
}
|
||||
/**
|
||||
* restart from specified task, but don't restart the previous tasks.
|
||||
* @param {number} from where to restart the tasks, begins with 0
|
||||
*/
|
||||
async restart(from: number = 0) {
|
||||
let id = this._checkTaskId(from, 0);
|
||||
if (id < 0) return;
|
||||
for (let i = id; i < this._tasks.length; i++) {
|
||||
this._tasks[i].clean();
|
||||
}
|
||||
return this._startTasks(0);
|
||||
}
|
||||
async _startTasks(from: number) {
|
||||
if (this._running) {
|
||||
logger.info('The Extractor is running. Please wait..');
|
||||
return;
|
||||
}
|
||||
if (!this._tasks.length) {
|
||||
logger.info('No task to run.');
|
||||
return;
|
||||
}
|
||||
|
||||
let tab;
|
||||
let task = this._tasks[0];
|
||||
if (task.urls.length) {
|
||||
// task specifies target urls, create new tab with first url for it
|
||||
tab = await createTab(task.urls[0], false);
|
||||
} else {
|
||||
tab = await getActiveTab(true) || await getActiveTab(false);
|
||||
let succ = await ping(tab);
|
||||
if (!succ) {
|
||||
logger.error('Cannot contact with active tab.');
|
||||
return;
|
||||
}
|
||||
}
|
||||
this._running = true;
|
||||
return this._tasks.reduce((pms, task: Task, i: number) => {
|
||||
return pms.then(
|
||||
() => {
|
||||
if (i < from) return;
|
||||
if (i > 0) {
|
||||
let prevTask = this._tasks[i - 1];
|
||||
return task.execute(tab, new ExtractResult(prevTask.results));
|
||||
}
|
||||
return task.execute(tab);
|
||||
});
|
||||
}, Promise.resolve<void>(undefined)).then(
|
||||
() => {
|
||||
this._running = false;
|
||||
this.export();
|
||||
}
|
||||
).catch(err => {
|
||||
this._running = false;
|
||||
logger.error(err);
|
||||
});
|
||||
}
|
||||
/**
|
||||
* export result of a task to CSV
|
||||
* @param {number} taskid which task id to save, begins with 0
|
||||
*/
|
||||
export(taskid?: number) {
|
||||
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
||||
if (id < 0) return;
|
||||
let results = this._tasks[id].results
|
||||
if (!results.length) {
|
||||
logger.info(`No result for task #${id}. Forget to call ".start()"?`);
|
||||
return;
|
||||
}
|
||||
results.unshift(this._tasks[id].fieldSelectors);
|
||||
let exResults = new ExtractResult(results);
|
||||
let msg = `
|
||||
Please confirm to download (${results.length - 1} items):
|
||||
|
||||
${exResults.toString(50) || "- Empty -"}
|
||||
`.trim();
|
||||
if (confirm(msg)) {
|
||||
saveFile(exResults.toString(), "text/csv");
|
||||
}
|
||||
}
|
||||
_checkTaskId(id: number, defaultId: number) {
|
||||
if (!this._tasks.length) {
|
||||
logger.info("No task found.");
|
||||
return -1;
|
||||
}
|
||||
if (!isNaN(defaultId) && id === undefined) id = defaultId;
|
||||
if (isNaN(id) || id < 0 || id >= this._tasks.length) {
|
||||
logger.info(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
||||
return -1;
|
||||
}
|
||||
return id
|
||||
}
|
||||
}
|
||||
14
src/background/index.ts
Normal file
14
src/background/index.ts
Normal file
@ -0,0 +1,14 @@
|
||||
import { Extractor } from "./extractor";
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
$: (...args: any) => void;
|
||||
Extractor: any;
|
||||
}
|
||||
}
|
||||
|
||||
window.$ = function (...args) {
|
||||
return new Extractor().task(...args).start();
|
||||
}
|
||||
|
||||
window.Extractor = Extractor;
|
||||
73
src/background/logger.ts
Normal file
73
src/background/logger.ts
Normal file
@ -0,0 +1,73 @@
|
||||
export enum LOGGER_LEVEL {
|
||||
DEBUG = 1,
|
||||
INFO,
|
||||
WARN,
|
||||
ERROR,
|
||||
DISABLED,
|
||||
};
|
||||
|
||||
export class Logger {
|
||||
private _notificationId = undefined;
|
||||
private _log_level = LOGGER_LEVEL.INFO;
|
||||
private _notify_level = LOGGER_LEVEL.ERROR;
|
||||
constructor(logLevel, notifyLevel) {
|
||||
if (logLevel) this._log_level = logLevel;
|
||||
if (notifyLevel) this._notify_level = notifyLevel;
|
||||
chrome.notifications.onClosed.addListener((id, byUser) => { this._notify_level = undefined });
|
||||
}
|
||||
get logLevel() {
|
||||
return this._log_level;
|
||||
}
|
||||
set logLevel(val: LOGGER_LEVEL) {
|
||||
this._log_level = val;
|
||||
}
|
||||
get notifyLevel() {
|
||||
return this._notify_level;
|
||||
}
|
||||
set notifyLevel(val: LOGGER_LEVEL) {
|
||||
this._notify_level = val;
|
||||
}
|
||||
log(level: LOGGER_LEVEL, loggerFn: Function, ...msgs) {
|
||||
if (level < this._log_level) return;
|
||||
let time = new Date().toLocaleString();
|
||||
loggerFn(`${time} [${LOGGER_LEVEL[level]}]`, ...msgs);
|
||||
if (level < this._notify_level) return;
|
||||
this.notify(...msgs);
|
||||
}
|
||||
debug(...msgs) {
|
||||
this.log(LOGGER_LEVEL.DEBUG, console.debug, ...msgs);
|
||||
}
|
||||
info(...msgs) {
|
||||
this.log(LOGGER_LEVEL.INFO, console.info, ...msgs);
|
||||
}
|
||||
warn(...msgs) {
|
||||
this.log(LOGGER_LEVEL.WARN, console.info, ...msgs);
|
||||
}
|
||||
error(...msgs) {
|
||||
this.log(LOGGER_LEVEL.ERROR, console.info, ...msgs);
|
||||
}
|
||||
notify(...msgs) {
|
||||
let msg = msgs.join(' ');
|
||||
if (!this._notificationId) {
|
||||
chrome.notifications.create(
|
||||
null,
|
||||
{
|
||||
"type": "basic",
|
||||
"iconUrl": chrome.extension.getURL('icon.png'),
|
||||
"title": "Data Extractor",
|
||||
"message": msg,
|
||||
"priority": 0,
|
||||
"requireInteraction": true,
|
||||
},
|
||||
notificationId => {
|
||||
this._notificationId = notificationId;
|
||||
}
|
||||
);
|
||||
return;
|
||||
}
|
||||
chrome.notifications.update(
|
||||
this._notificationId,
|
||||
{ "message": msg }
|
||||
);
|
||||
}
|
||||
}
|
||||
94
src/background/messaging.ts
Normal file
94
src/background/messaging.ts
Normal file
@ -0,0 +1,94 @@
|
||||
import { EXT_NAME, ACTION_UPLOAD_STATE } from "../common";
|
||||
import { getTabByID } from "./actions";
|
||||
import { caches, logger } from "./common";
|
||||
|
||||
/**
|
||||
* Sending a message to target tab repeatedly until the response is not undefined.
|
||||
* @param {object} tab the table where to send the message
|
||||
* @param {object} req the request data.
|
||||
* @param {function} dataChecker (reulst:any, err:error, tryCount:number) => any.
|
||||
* Check and decide what value finally returns.
|
||||
* Return undefined to make 'sendMessage' retry.
|
||||
* Return MSG_USER_ABORT to cancel this promise.
|
||||
* @param {number} interval retry interval, default: 500ms.
|
||||
* @param {number} limit retry limit, default: 0, no limit.
|
||||
* @param {string} log messages logged to console.
|
||||
* @return {Promise} a promise of the response.
|
||||
*/
|
||||
export function sendMessage<T>(
|
||||
tab: chrome.tabs.Tab,
|
||||
req,
|
||||
log?: string,
|
||||
dataChecker?: (r: T, err: chrome.runtime.LastError, count: number) => T | Promise<T>,
|
||||
interval?: number,
|
||||
limit?: number
|
||||
) {
|
||||
interval = interval || 500;
|
||||
limit = isNaN(limit) ? 0 : limit;
|
||||
let count = 0;
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
|
||||
loop();
|
||||
|
||||
async function loop() {
|
||||
logger.debug("Request for", req.action);
|
||||
let tabAvailable = await getTabByID(tab.id);
|
||||
if (!tabAvailable) {
|
||||
reject("Task interrupted due to the target tab is closed.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (limit && count >= limit) {
|
||||
reject(`sendMessage loop limit ${limit} reached.`);
|
||||
return;
|
||||
}
|
||||
count++;
|
||||
chrome.tabs.sendMessage(tab.id, req, async (r: T) => {
|
||||
// check error but do nothing until dataChecker.
|
||||
let err = chrome.runtime.lastError;
|
||||
let result: T = r;
|
||||
|
||||
if (dataChecker) {
|
||||
let pms = dataChecker(r, err, count);
|
||||
// don't catch if it's not a Promise
|
||||
if (pms instanceof Promise) {
|
||||
let checkerError: any;
|
||||
pms = pms.catch(e => checkerError = e);
|
||||
result = await pms;
|
||||
if (checkerError) {
|
||||
reject(checkerError);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
result = pms;
|
||||
}
|
||||
}
|
||||
|
||||
let flag = result !== undefined && result !== null;
|
||||
if (log) logger.info(log, flag ? '(OK)' : '(failed)');
|
||||
if (flag) {
|
||||
resolve(result);
|
||||
} else {
|
||||
setTimeout(() => {
|
||||
loop();
|
||||
}, interval);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(function (request, sender, sendResponse) {
|
||||
if (!request.action || !request.action.startsWith(EXT_NAME)) {
|
||||
return;
|
||||
}
|
||||
switch (request.action) {
|
||||
case ACTION_UPLOAD_STATE:
|
||||
sendResponse('recieved!');
|
||||
caches.setState(request.name, request.state)
|
||||
break;
|
||||
default:
|
||||
sendResponse("Request not supported.");
|
||||
break;
|
||||
}
|
||||
});
|
||||
37
src/background/result.ts
Normal file
37
src/background/result.ts
Normal file
@ -0,0 +1,37 @@
|
||||
export class ExtractResult {
|
||||
private _data: string[][] = [];
|
||||
constructor(data) {
|
||||
this._data = data || [];
|
||||
}
|
||||
row(index: number): string[] {
|
||||
return this._data[index];
|
||||
}
|
||||
column(index: number): string[] {
|
||||
return [...new Array(this._data.length).keys()].map(
|
||||
i => this._data[i][index]
|
||||
);
|
||||
}
|
||||
squash(): string[] {
|
||||
return this._data.reduce((p, c) => p.concat(c), []);
|
||||
}
|
||||
get data(): string[][] {
|
||||
return this._data;
|
||||
}
|
||||
toString(rowsCount: number = 0): string {
|
||||
let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data;
|
||||
return data.slice().reduce(
|
||||
(csv, lineCells) => {
|
||||
if (!lineCells || !lineCells.length) {
|
||||
return csv + "\n";
|
||||
}
|
||||
let line = lineCells.reduce(
|
||||
(lineText, cell, idx) => {
|
||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
||||
}, "");
|
||||
return csv + line + "\n";
|
||||
},
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
71
src/background/signiture.ts
Normal file
71
src/background/signiture.ts
Normal file
@ -0,0 +1,71 @@
|
||||
import { ExtractResult } from "./result";
|
||||
|
||||
export const signitures = `
|
||||
## Usage
|
||||
// single task
|
||||
$(...args);
|
||||
// managed task chains
|
||||
e = new Extractor();
|
||||
e.task(...args).task(...args).start();
|
||||
|
||||
## Task Call Signitures:
|
||||
function(itemsSelector:string, fieldSelectors:string[]);
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number);
|
||||
function(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[]);
|
||||
function(itemsSelector:string, fieldSelectors:string[], urls:string[]);
|
||||
|
||||
## Example:
|
||||
// extract all links text & url under '.item' elements
|
||||
// use 'selector@attr' to get attribute of the field elements
|
||||
$(".item", ["a", "a@href"]);
|
||||
|
||||
## See Detailed Help:
|
||||
https://git.jebbs.co/jebbs/data-extracter-extesion
|
||||
`.trim();
|
||||
|
||||
export function testArgs(...args: any) {
|
||||
switch (args.length) {
|
||||
case 0:
|
||||
case 1:
|
||||
return false;
|
||||
case 2:
|
||||
return args[0] && args[1] &&
|
||||
(typeof args[0] == "string") &&
|
||||
(args[1] instanceof Array) &&
|
||||
testArrayVals(args[1], v => typeof v == "string");
|
||||
case 3:
|
||||
return args[0] && args[1] &&
|
||||
typeof args[0] == "string" &&
|
||||
args[1] instanceof Array &&
|
||||
testArrayVals(args[1], v => typeof v == "string") &&
|
||||
(
|
||||
(
|
||||
args[2] instanceof Array &&
|
||||
testArrayVals(args[2], v => typeof v == "string")
|
||||
) || (
|
||||
args[2] instanceof ExtractResult
|
||||
)
|
||||
);
|
||||
case 4:
|
||||
return args[0] && args[1] &&
|
||||
typeof args[0] == "string" &&
|
||||
args[1] instanceof Array &&
|
||||
testArrayVals(args[1], v => typeof v == "string") &&
|
||||
typeof args[2] == "string" &&
|
||||
args[3] instanceof Array &&
|
||||
testArrayVals(args[3], v => typeof v == "number");
|
||||
case 6:
|
||||
return args[0] && args[1] &&
|
||||
typeof args[0] == "string" &&
|
||||
args[1] instanceof Array &&
|
||||
testArrayVals(args[1], v => typeof v == "string") &&
|
||||
typeof args[2] == "string" &&
|
||||
!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
function testArrayVals(arr, tester) {
|
||||
return arr.reduce((p, c) => p && tester(c), true);
|
||||
}
|
||||
}
|
||||
91
src/background/task.ts
Normal file
91
src/background/task.ts
Normal file
@ -0,0 +1,91 @@
|
||||
import { parseUrls } from "./tools";
|
||||
import { queryUrl, redirectTab, scrollToBottom, extractTabData } from "./actions";
|
||||
import { testArgs, signitures } from "./signiture";
|
||||
import { ExtractResult } from "./result";
|
||||
|
||||
export class Task {
|
||||
private _data: { [key: string]: string[][] } = {};
|
||||
private _data_keys: string[] = [];
|
||||
private _options: any;
|
||||
private _itemsSelector: string;
|
||||
private _fieldSelectors: string[];
|
||||
private _urls: string[] = [];
|
||||
|
||||
constructor(options: any, ...arg: any);
|
||||
constructor(options: any, itemsSelector: string, fieldSelectors: string[]);
|
||||
constructor(options: any, itemsSelector: string, fieldSelectors: string[], url: string, from: number, to: number, interval: number);
|
||||
constructor(options: any, itemsSelector: string, fieldSelectors: string[], url: string, pages: number[]);
|
||||
constructor(options: any, itemsSelector: string, fieldSelectors: string[], urls: string[]);
|
||||
constructor(options, ...args) {
|
||||
if (!testArgs(...args))
|
||||
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
||||
this._options = options;
|
||||
this._itemsSelector = args.shift();
|
||||
this._fieldSelectors = args.shift();
|
||||
this._urls = parseUrls(...args);
|
||||
}
|
||||
load(state: any): Task {
|
||||
this._itemsSelector = state._itemsSelector;
|
||||
this._data = state._data;
|
||||
this._data_keys = state._data_keys;
|
||||
this._itemsSelector = state._itemsSelector;
|
||||
this._fieldSelectors = state._fieldSelectors;
|
||||
this._urls = state._urls;
|
||||
return this;
|
||||
}
|
||||
get urls(): string[] {
|
||||
return this._urls;
|
||||
}
|
||||
get results(): string[][] {
|
||||
return this._data_keys.reduce((p, c) => {
|
||||
return p.concat(this._data[c]);
|
||||
}, []);
|
||||
}
|
||||
get fieldSelectors(): string[] {
|
||||
return this._fieldSelectors;
|
||||
}
|
||||
clean(): Task {
|
||||
this._data = {};
|
||||
this._data_keys = [];
|
||||
return this;
|
||||
}
|
||||
async execute(tab: chrome.tabs.Tab, upstreamData?: ExtractResult): Promise<void> {
|
||||
if (!tab) return Promise.reject("No tab to execute the task.");
|
||||
let urls = this._urls
|
||||
if (!urls.length) {
|
||||
if (upstreamData) {
|
||||
urls = parseUrls(upstreamData);
|
||||
} else {
|
||||
urls = [await queryUrl(tab)];
|
||||
}
|
||||
}
|
||||
let saveResult = (results, key) => {
|
||||
this._data[key] = results;
|
||||
this._data_keys.push(key);
|
||||
}
|
||||
return urls.reduce((p, url, i) => p.then(
|
||||
results => {
|
||||
if (i > 0 && results instanceof Array) {
|
||||
let lastURL = urls[i - 1];
|
||||
saveResult(results, lastURL);
|
||||
}
|
||||
if (this._data[url]) return;
|
||||
let pms: Promise<any> = redirectTab(tab, url);
|
||||
if (this._options["scrollToBottom"]) {
|
||||
pms = pms.then(() => scrollToBottom(tab));
|
||||
}
|
||||
return pms.then(
|
||||
() => extractTabData(tab, this._itemsSelector, this._fieldSelectors)
|
||||
);
|
||||
}
|
||||
), Promise.resolve<string[][]>(null)).then(
|
||||
results => {
|
||||
if (results && results.length) {
|
||||
let lastURL = urls[urls.length - 1];
|
||||
saveResult(results, lastURL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
61
src/background/tools.ts
Normal file
61
src/background/tools.ts
Normal file
@ -0,0 +1,61 @@
|
||||
import { URL_REG } from "./common";
|
||||
import { ExtractResult } from "./result";
|
||||
|
||||
export function parseUrls(...args) {
|
||||
if (!args.length) return [];
|
||||
let arg = args.shift();
|
||||
if (arg instanceof Array) {
|
||||
return arg;
|
||||
} else if (arg instanceof ExtractResult) {
|
||||
return arg.squash().filter(v => URL_REG.test(v));
|
||||
} else {
|
||||
let urlTempl = arg;
|
||||
if (urlTempl) {
|
||||
if (args[0] instanceof Array) {
|
||||
return args[0].map(p => urlTempl.replace("${page}", p));
|
||||
} else if (args.length >= 3) {
|
||||
let urls = [];
|
||||
let from = args.shift();
|
||||
let to = args.shift();
|
||||
let interval = args.shift();
|
||||
for (let i = from; i <= to; i += interval) {
|
||||
urls.push(urlTempl.replace("${page}", i));
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
export function saveFile(data: string, mimeType: string, fileName?: string) {
|
||||
fileName = fileName || document.title || "result";
|
||||
let blob: Blob;
|
||||
if (typeof window.Blob == "function") {
|
||||
blob = new Blob([data], {
|
||||
type: mimeType
|
||||
})
|
||||
} else {
|
||||
var BlobBuiler = window.MSBlobBuilder;
|
||||
var builer = new BlobBuiler();
|
||||
builer.append(data);
|
||||
blob = builer.getBlob(mimeType)
|
||||
}
|
||||
var URL = window.URL || window.webkitURL;
|
||||
var url = URL.createObjectURL(blob);
|
||||
var link = document.createElement("a");
|
||||
if ('download' in link) {
|
||||
link.style.visibility = "hidden";
|
||||
link.href = url;
|
||||
link.download = fileName;
|
||||
document.body.appendChild(link);
|
||||
var j = document.createEvent("MouseEvents");
|
||||
j.initEvent("click", true, true);
|
||||
link.dispatchEvent(j);
|
||||
document.body.removeChild(link)
|
||||
} else if (navigator.msSaveBlob) {
|
||||
navigator.msSaveBlob(blob, fileName)
|
||||
} else {
|
||||
location.href = url
|
||||
}
|
||||
}
|
||||
11
src/common.ts
Normal file
11
src/common.ts
Normal file
@ -0,0 +1,11 @@
|
||||
|
||||
export const EXT_NAME = "DataExtracter";
|
||||
|
||||
export const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
||||
export const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
||||
export const ACTION_PING = `${EXT_NAME}:ReportIn`;
|
||||
export const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
||||
export const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
||||
export const ACTION_UPLOAD_STATE = `${EXT_NAME}:UploadStateFile`;
|
||||
export const ACTION_SLEEP = `${EXT_NAME}:Sleep`;
|
||||
export const ACTION_WAKEUP = `${EXT_NAME}:WakeUp`;
|
||||
73
src/content/actions.ts
Normal file
73
src/content/actions.ts
Normal file
@ -0,0 +1,73 @@
|
||||
export function extract(itemsSelector: string, fieldSelectors: string[]): string[][] {
|
||||
// since some elements may be loaded asynchronously.
|
||||
// if one field is never found, we should return undefined,
|
||||
// so that senders can detect to retry until elements loaded.
|
||||
// If user writes wrong selectors, the task retries infinitely.
|
||||
let fieldFound: { [key: string]: boolean } = {};
|
||||
let items: Element[] = Array.from(document.querySelectorAll(itemsSelector));
|
||||
// items may not loaded yet, tell the sender to retry.
|
||||
if (!items.length) return [];
|
||||
let results: string[][] = items.map(
|
||||
item => {
|
||||
return fieldSelectors.map(
|
||||
selector => {
|
||||
let [cls, attr] = selector.split('@').slice(0, 2);
|
||||
let fieldVals = Array.from(item.querySelectorAll(cls));
|
||||
if (!fieldVals.length) {
|
||||
return;
|
||||
}
|
||||
fieldFound[selector] = true;
|
||||
return fieldVals.map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
||||
}
|
||||
)
|
||||
}
|
||||
);
|
||||
// if it exists a field, which is not found in any row, the sender should retry.
|
||||
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
||||
return shouldWait ? [] : results;
|
||||
}
|
||||
|
||||
export function scrollToBottom() {
|
||||
return executeUntil(
|
||||
() => window.scrollTo(0, document.body.clientHeight),
|
||||
() => document.body.clientHeight - window.scrollY - window.innerHeight < 20,
|
||||
"Scroll to page bottom...",
|
||||
1000,
|
||||
10
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Repeatedly execute an function until the the detector returns true.
|
||||
* @param {object} fn the function to execute
|
||||
* @param {object} detector the detector.
|
||||
* @param {string} log messages logged to console.
|
||||
* @param {number} interval interval for detecting
|
||||
* @param {number} limit max execute times of a function
|
||||
* @return {Promise} a promise of the response.
|
||||
*/
|
||||
function executeUntil(fn: () => void, detector: () => boolean, log: string, interval: number, limit: number) {
|
||||
interval = interval || 500;
|
||||
let count = 0;
|
||||
return new Promise<boolean>((resolve, reject) => {
|
||||
|
||||
loop();
|
||||
|
||||
async function loop() {
|
||||
fn();
|
||||
limit++;
|
||||
if (limit && count >= limit) {
|
||||
reject(false);
|
||||
}
|
||||
setTimeout(() => {
|
||||
let flag = !detector || detector();
|
||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||
if (flag) {
|
||||
resolve(true);
|
||||
} else {
|
||||
loop();
|
||||
}
|
||||
}, interval);
|
||||
}
|
||||
});
|
||||
}
|
||||
45
src/content/index.ts
Normal file
45
src/content/index.ts
Normal file
@ -0,0 +1,45 @@
|
||||
import { ACTION_WAKEUP, ACTION_EXTRACT, ACTION_GOTO_URL, ACTION_PING, ACTION_QUERY_URL, ACTION_SCROLL_BOTTOM, ACTION_SLEEP } from '../common';
|
||||
import { scrollToBottom, extract } from './actions';
|
||||
|
||||
let asleep = false;
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function (request, sender: chrome.runtime.MessageSender, sendResponse: (r: any) => void) {
|
||||
if (!request.action) return;
|
||||
if (asleep && ACTION_WAKEUP != request.action) {
|
||||
sendResponse && sendResponse(undefined);
|
||||
return;
|
||||
}
|
||||
// console.log("Recieved request:",request);
|
||||
doAction(request, sender).then(r => sendResponse && sendResponse(r));
|
||||
// return true to indicate you wish to send a response asynchronously
|
||||
return true;
|
||||
}
|
||||
);
|
||||
|
||||
async function doAction(request: any, sender: chrome.runtime.MessageSender) {
|
||||
switch (request.action) {
|
||||
case ACTION_EXTRACT:
|
||||
let data = extract(request.itemsSelector, request.fieldSelectors);
|
||||
return data;
|
||||
case ACTION_GOTO_URL:
|
||||
window.location.replace(request.url);
|
||||
// should not recieve any request until the page & script reload
|
||||
asleep = true;
|
||||
return request.url;
|
||||
case ACTION_PING:
|
||||
return "pong";
|
||||
case ACTION_QUERY_URL:
|
||||
return window.location.href;
|
||||
case ACTION_SCROLL_BOTTOM:
|
||||
return scrollToBottom();
|
||||
case ACTION_SLEEP:
|
||||
asleep = true;
|
||||
return "Content script is sleeping.";
|
||||
case ACTION_WAKEUP:
|
||||
asleep = false;
|
||||
return "Content script is available.";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
34
src/popup/index.ts
Normal file
34
src/popup/index.ts
Normal file
@ -0,0 +1,34 @@
|
||||
import { ACTION_UPLOAD_STATE } from '../common';
|
||||
|
||||
window.onload = function () {
|
||||
document.querySelector('#link-extension-detail')
|
||||
.addEventListener('click', () => {
|
||||
chrome.tabs.create({
|
||||
'url': `chrome://extensions/?id=${chrome.runtime.id}`
|
||||
});
|
||||
})
|
||||
document.querySelector('#link-document')
|
||||
.addEventListener('click', () => {
|
||||
chrome.tabs.create({
|
||||
'url': `https://git.jebbs.co/jebbs/data-extracter-extesion`
|
||||
});
|
||||
})
|
||||
document.querySelector('#state-input')
|
||||
.addEventListener('change', function (...args) {
|
||||
if (this.files.length == 1) {
|
||||
var reader = new FileReader();
|
||||
let fileName = this.files[0].name;
|
||||
reader.readAsText(this.files[0], "UTF-8");
|
||||
reader.onload = function (evt) {
|
||||
var fileString = evt.target.result;
|
||||
chrome.runtime.sendMessage({
|
||||
action: ACTION_UPLOAD_STATE,
|
||||
state: fileString,
|
||||
name: fileName
|
||||
}, r => {
|
||||
if (r) console.log('State sent:', r);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user