import { Task } from "./task"; import { saveFile } from "./tools"; import { createTab, getActiveTab, ping } from "./actions"; import { ExtractResult } from "./result"; import { logger } from "./logger"; import { caches } from "./caches"; export class Extractor { private _tasks: Task[] = []; private _running = false; private _options: any = {}; constructor(options?) { if (options) this._options = options; } /** * Save current state, in case we restore it later. */ save() { saveFile(JSON.stringify(this), 'application/json', 'state.json'); } /** * Restore previous state by loading from saved state. */ load() { let content = caches.state; if (!content) { logger.info('No state found. Please upload a saved state from the popup window first.'); return; } let state = JSON.parse(content); this._options = state._options; this._tasks = state._tasks.map(t => new Task(this._options, 'whaterver', ['whaterver']).load(t)); return this; } /** * Add a task to Extractor. \n * One Extractor could has multiple tasks, which orgnized in a task chian. * If url arguments not given within later tasks, they will use previous task result as input (target url list). * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. */ task(...args: any) { this._tasks.push(new Task(this._options, ...args)); return this; } /** * Clear tasks and task caches. */ clear() { this._tasks = []; return this; } /** * Start the task chain. */ start() { return this._startTasks(0); } stop(id?: number) { if (id !== undefined) { id = this._checkTaskId(id); if (id < 0) return; this._tasks[id].stop(); return; } for (let i = 0; i < this._tasks.length; i++) { this._tasks[i].stop(); } } watch(id: number) { id = this._checkTaskId(id); if (id < 0) return; this._tasks[id].watch(); } /** * restart from specified task, but don't restart the previous tasks. * @param {number} from where to restart the tasks, begins with 0 */ restart(from: number = 0) { let id = this._checkTaskId(from, 0); if (id < 0) return; for (let i = id; i < this._tasks.length; i++) { this._tasks[i].clean(); } return this._startTasks(0); } async _startTasks(from: number) { if (this._running) { logger.info('The Extractor is running. Please wait..'); return; } if (!this._tasks.length) { logger.info('No task to run.'); return; } let tab; let task = this._tasks[0]; if (task.urls.length) { // task specifies target urls, create new tab with first url for it tab = await createTab(task.urls[0], false); } else { tab = await getActiveTab(true) || await getActiveTab(false); let succ = await ping(tab); if (!succ) { logger.error('Cannot contact with active tab.'); return; } } this._running = true; return this._tasks.reduce((pms, task: Task, i: number) => { return pms.then( () => { if (i < from) return; if (i > 0) { let prevTask = this._tasks[i - 1]; return task.execute(tab, new ExtractResult(prevTask.results)); } return task.execute(tab); }); }, Promise.resolve(undefined)).then( () => { this._running = false; this.export(); } ).catch(err => { this._running = false; logger.error(err); }); } /** * export result of a task to CSV * @param {number} taskid which task id to save, begins with 0 */ export(taskid?: number) { let id = this._checkTaskId(taskid, this._tasks.length - 1); if (id < 0) return; let results = this._tasks[id].results if (!results.length) { logger.info(`No result for task #${id}. Forget to call ".start()"?`); return; } results.unshift(this._tasks[id].fieldSelectors); let exResults = new ExtractResult(results); let msg = ` Please confirm to download (${results.length - 1} items): ${exResults.toString(50) || "- Empty -"} `.trim(); if (confirm(msg)) { saveFile(exResults.toString(), "text/csv"); } } _checkTaskId(id: number, defaultId?: number) { if (!this._tasks.length) { logger.info("No task found."); return -1; } if (!isNaN(defaultId) && id === undefined) id = defaultId; if (isNaN(id) || id < 0 || id >= this._tasks.length) { logger.info(`Invalid task id. Rang(0-${this._tasks.length - 1})`); return -1; } return id } }