var __EXTRACTOR_STATE__ = ""; class Extractor { constructor(options) { this._tasks = []; this._running = false; this._options = options; } /** * Save current state, in case we restore it later. */ save() { saveFile(JSON.stringify(this), 'application/json', 'state.json'); } /** * Restore previous state by loading from saved state. */ load() { if (!__EXTRACTOR_STATE__) { logger.info('No state found. Please upload a saved state from the popup window first.'); return; } let state = JSON.parse(__EXTRACTOR_STATE__); __EXTRACTOR_STATE__ = ""; this._options = state._options; this._tasks = state._tasks.map(t => new Task(this._options, 'whaterver', ['whaterver']).load(t)); return this; } /** * Add a task to Extractor. \n * One Extractor could has multiple tasks, which orgnized in a task chian. * If url arguments not given within later tasks, they will use previous task result as input (target url list). * @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls. */ task(...args) { this._tasks.push(new Task(this._options, ...args)); return this; } /** * Clear tasks and task caches. */ clear() { this._tasks = []; return this; } /** * Start the task chain. */ async start() { return this._startTasks(0); } /** * restart from specified task, but don't restart the previous tasks. * @param {number} from where to restart the tasks, begins with 0 */ async restart(from = 0) { let id = this._checkTaskId(from, 0); if (id < 0) return; for (let i = id; i < this._tasks.length; i++) { this._tasks[i].clean(); } return this._startTasks(0); } async _startTasks(from) { if (this._running) { logger.info('The Extractor is running. Please wait..'); return; } if (!this._tasks.length) { logger.info('No task to run.'); return; } let tab; let task = this._tasks[0]; if (task.urls.length) { // task specifies target urls, create new tab with first url for it tab = await createTab(task.urls[0], false); } else { tab = await getActiveTab(true) || await getActiveTab(false); let succ = await ping(tab); if (!succ) { logger.error('Cannot contact with active tab.'); return; } } this._running = true; return this._tasks.reduce((pms, task, i) => { return pms.then( () => { if (i < from) return; if (i > 0) { let prevTask = this._tasks[i - 1]; return task.execute(tab, new ExtractResult(prevTask.results)); } return task.execute(tab, undefined); }); }, Promise.resolve(undefined)).then( () => { this._running = false; this.export(); } ).catch(err => { this._running = false; logger.error(err); }); } /** * export result of a task to CSV * @param {number} taskid which task id to save, begins with 0 */ export(taskid) { let id = this._checkTaskId(taskid, this._tasks.length - 1); if (id < 0) return; let results = this._tasks[id].results results.unshift(this._tasks[id].fieldSelectors); let exResults = new ExtractResult(results); if (!results.length) { logger.info(`No result for task #${id}. Forget to call ".start()"?`); return; } let msg = ` Please confirm to download (${results.length - 1} items): ${exResults.toString(50) || "- Empty -"} `.trim(); if (confirm(msg)) { saveFile(exResults, "text/csv"); } } _checkTaskId(id, defaultId) { if (!this._tasks.length) { logger.info("No task found."); return -1; } if (!isNaN(defaultId) && id === undefined) id = defaultId; if (isNaN(id) || id < 0 || id >= this._tasks.length) { logger.info(`Invalid task id. Rang(0-${this._tasks.length - 1})`); return -1; } return id } }