scrollToBottom option
This commit is contained in:
@ -136,6 +136,19 @@ function queryUrl(tab, expected, log) {
|
||||
return sendMessage(tab, req, log, cond);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the url of the target tab
|
||||
* @param {any} tab target tab
|
||||
* @param {string} expected if specified, queryUrl resolves only when tab url equals to expected
|
||||
* @returns {Promise<string>} a promise of the url
|
||||
*/
|
||||
function scrollToBottom(tab) {
|
||||
let req = {
|
||||
action: ACTION_SCROLL_BOTTOM
|
||||
}
|
||||
return sendMessage(tab, req, 'Scroll to page bottom...');
|
||||
}
|
||||
|
||||
async function createTab(url, active) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.create({
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
class Extractor {
|
||||
constructor() {
|
||||
constructor(options) {
|
||||
this._tasks = [];
|
||||
this._running = false;
|
||||
this._options = options;
|
||||
}
|
||||
/**
|
||||
* Add a task to Extractor. \n
|
||||
@ -10,7 +11,7 @@ class Extractor {
|
||||
* @param {...any} args itemsSelector, fieldSelectors, and more args to specify target urls.
|
||||
*/
|
||||
task(...args) {
|
||||
this._tasks.push(new Task(...args));
|
||||
this._tasks.push(new Task(this._options, ...args));
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
@ -32,7 +33,7 @@ class Extractor {
|
||||
*/
|
||||
async restart(from = 0) {
|
||||
let id = this._checkTaskId(from, 0);
|
||||
if (!id) return;
|
||||
if (id < 0) return;
|
||||
for (let i = id; i < this._tasks.length; i++) {
|
||||
this._tasks[i].clean();
|
||||
}
|
||||
@ -68,10 +69,13 @@ class Extractor {
|
||||
return task.execute(tab, undefined);
|
||||
});
|
||||
}, Promise.resolve(undefined)).then(
|
||||
() => this.save()
|
||||
() => {
|
||||
this._running = false;
|
||||
this.save();
|
||||
}
|
||||
).catch(err => {
|
||||
this._running = false;
|
||||
console.log(err)
|
||||
console.log(err);
|
||||
});
|
||||
}
|
||||
/**
|
||||
@ -80,31 +84,34 @@ class Extractor {
|
||||
*/
|
||||
save(taskid) {
|
||||
let id = this._checkTaskId(taskid, this._tasks.length - 1);
|
||||
if (!id) return;
|
||||
let result = new ExtractResult(this._tasks[id].results);
|
||||
if (id < 0) return;
|
||||
let results = this._tasks[id].results
|
||||
results.unshift(this._tasks[id].fieldSelectors);
|
||||
|
||||
if (!result.data.length) {
|
||||
let exResults = new ExtractResult(results);
|
||||
|
||||
if (!results.length) {
|
||||
console.log(`No result for task #${id}. Forget to call ".start()"?`);
|
||||
return;
|
||||
}
|
||||
let msg = `
|
||||
Please confirm to download (${result.data.length - 1} items):
|
||||
Please confirm to download (${results.length - 1} items):
|
||||
|
||||
${result.toString(50) || "- Empty -"}
|
||||
${exResults.toString(50) || "- Empty -"}
|
||||
`.trim();
|
||||
if (confirm(msg)) {
|
||||
saveFile(result, "text/csv");
|
||||
saveFile(exResults, "text/csv");
|
||||
}
|
||||
}
|
||||
_checkTaskId(id, defaultId) {
|
||||
if (!this._tasks.length) {
|
||||
console.log("No task found.");
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
if (defaultId && id === undefined) id = defaultId;
|
||||
if (!isNaN(defaultId) && id === undefined) id = defaultId;
|
||||
if (isNaN(id) || id < 0 || id >= this._tasks.length) {
|
||||
console.log(`Invalid task id. Rang(0-${this._tasks.length - 1})`);
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
class Task {
|
||||
// _manager = undefined;
|
||||
// _id = 0;
|
||||
// _urls = [];
|
||||
_data = {};
|
||||
_data_keys = [];
|
||||
/**
|
||||
* Create a task.
|
||||
* constructor(itemsSelector:string, fieldSelectors:string[])
|
||||
@ -11,9 +9,10 @@ class Task {
|
||||
* constructor(itemsSelector:string, fieldSelectors:string[], urls:string[])
|
||||
* @param {...any} args
|
||||
*/
|
||||
constructor(...args) {
|
||||
constructor(options, ...args) {
|
||||
if (!testArgs(...args))
|
||||
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
|
||||
this._options = options;
|
||||
this._itemsSelector = args.shift();
|
||||
this._fieldSelectors = args.shift();
|
||||
this._urls = parseUrls(...args);
|
||||
@ -25,39 +24,52 @@ class Task {
|
||||
return this._data;
|
||||
}
|
||||
get results() {
|
||||
return this._urls.reduce((p, c) => {
|
||||
return this._data_keys.reduce((p, c) => {
|
||||
return p.concat(this._data[c]);
|
||||
}, []);
|
||||
}
|
||||
get fieldSelectors() {
|
||||
return this._fieldSelectors;
|
||||
}
|
||||
clean() {
|
||||
this._data = {};
|
||||
}
|
||||
async execute(tab, upstreamData) {
|
||||
if (!tab) throw new Error("No tab to execute the task.");
|
||||
if (!this._urls.length) {
|
||||
if (!tab) return Promise.reject("No tab to execute the task.");
|
||||
let urls = this._urls
|
||||
if (!urls.length) {
|
||||
if (upstreamData) {
|
||||
this._urls = parseUrls(upstreamData);
|
||||
urls = parseUrls(upstreamData);
|
||||
} else {
|
||||
this._urls = [await queryUrl(tab)];
|
||||
urls = [await queryUrl(tab)];
|
||||
}
|
||||
}
|
||||
return this._urls.reduce((p, url, i) => p.then(
|
||||
let saveResult = (results, key) => {
|
||||
this._data[key] = results;
|
||||
this._data_keys.push(key);
|
||||
}
|
||||
return urls.reduce((p, url, i) => p.then(
|
||||
results => {
|
||||
if (i > 0) {
|
||||
if (!MSG_URL_SKIPPED.isEqual(results)) {
|
||||
let lastURL = this._urls[i - 1];
|
||||
this._data[lastURL] = results;
|
||||
}
|
||||
let lastURL = urls[i - 1];
|
||||
saveResult(results, lastURL);
|
||||
}
|
||||
}
|
||||
return this._data[url] ? MSG_URL_SKIPPED : redirectTab(tab, url).then(
|
||||
if (this._data[url]) return MSG_URL_SKIPPED;
|
||||
let pms = redirectTab(tab, url);
|
||||
if (this._options["scrollToBottom"]) {
|
||||
pms = pms.then(() => scrollToBottom(tab));
|
||||
}
|
||||
return pms.then(
|
||||
() => extractTabData(tab, this._itemsSelector, this._fieldSelectors)
|
||||
);
|
||||
}
|
||||
), Promise.resolve(null)).then(
|
||||
results => {
|
||||
if (!MSG_URL_SKIPPED.isEqual(results)) {
|
||||
let lastURL = this._urls[this._urls.length - 1];
|
||||
this._data[lastURL] = results;
|
||||
let lastURL = urls[urls.length - 1];
|
||||
saveResult(results, lastURL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,53 +1,101 @@
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function (request, sender, sendResponse) {
|
||||
if (!request.action) return;
|
||||
// console.log("Recieved request:",request);
|
||||
(function () {
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function (request, sender, sendResponse) {
|
||||
if (!request.action) return;
|
||||
// console.log("Recieved request:",request);
|
||||
doAction(request, sender).then(r => sendResponse && sendResponse(r));
|
||||
// return true to indicate you wish to send a response asynchronously
|
||||
return true;
|
||||
}
|
||||
);
|
||||
|
||||
async function doAction(request, sender) {
|
||||
switch (request.action) {
|
||||
case ACTION_EXTRACT:
|
||||
let data = extract(request.itemsSelector, request.fieldSelectors);
|
||||
if (sendResponse) sendResponse(data);
|
||||
break;
|
||||
return data;
|
||||
case ACTION_GOTO_URL:
|
||||
window.location.replace(request.url);
|
||||
if (sendResponse) sendResponse(request.url);
|
||||
break;
|
||||
return request.url;
|
||||
case ACTION_REPORT_IN:
|
||||
if (sendResponse) sendResponse(request.action);
|
||||
break;
|
||||
return request.action;
|
||||
case ACTION_QUERY_URL:
|
||||
if (sendResponse) sendResponse(window.location.href);
|
||||
break;
|
||||
return window.location.href;
|
||||
case ACTION_SCROLL_BOTTOM:
|
||||
return executeUntil(
|
||||
() => window.scrollTo(0, document.body.clientHeight),
|
||||
() => document.body.clientHeight - window.scrollY - window.innerHeight < 20,
|
||||
"Scroll to page bottom...",
|
||||
1000,
|
||||
10
|
||||
)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
function extract(itemsSelector, fieldSelectors) {
|
||||
// since some elements may be loaded asynchronously.
|
||||
// if one field is never found, we should return undefined,
|
||||
// so that senders can detect to retry until elements loaded.
|
||||
// If user writes wrong selectors, the task retries infinitely.
|
||||
let fieldFound = {};
|
||||
let items = Array.from(document.querySelectorAll(itemsSelector));
|
||||
// items may not loaded yet, tell the sender to retry.
|
||||
if (!items.length) return MSG_ELEMENT_NOT_FOUND;
|
||||
let results = items.map(
|
||||
item => {
|
||||
return fieldSelectors.map(
|
||||
selector => {
|
||||
let [cls, attr] = selector.split('@').slice(0, 2);
|
||||
let fieldVals = Array.from(item.querySelectorAll(cls));
|
||||
if (!fieldVals.length) {
|
||||
return;
|
||||
function extract(itemsSelector, fieldSelectors) {
|
||||
// since some elements may be loaded asynchronously.
|
||||
// if one field is never found, we should return undefined,
|
||||
// so that senders can detect to retry until elements loaded.
|
||||
// If user writes wrong selectors, the task retries infinitely.
|
||||
let fieldFound = {};
|
||||
let items = Array.from(document.querySelectorAll(itemsSelector));
|
||||
// items may not loaded yet, tell the sender to retry.
|
||||
if (!items.length) return MSG_ELEMENT_NOT_FOUND;
|
||||
let results = items.map(
|
||||
item => {
|
||||
return fieldSelectors.map(
|
||||
selector => {
|
||||
let [cls, attr] = selector.split('@').slice(0, 2);
|
||||
let fieldVals = Array.from(item.querySelectorAll(cls));
|
||||
if (!fieldVals.length) {
|
||||
return;
|
||||
}
|
||||
fieldFound[selector] = true;
|
||||
return fieldVals.map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
||||
}
|
||||
fieldFound[selector] = true;
|
||||
return fieldVals.map(find => attr ? find[attr] : find.textContent.trim()).join('\n')
|
||||
)
|
||||
}
|
||||
);
|
||||
// if it exists a field, which is not found in any row, the sender should retry.
|
||||
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
||||
return shouldWait ? MSG_ELEMENT_NOT_FOUND : results
|
||||
}
|
||||
|
||||
/**
|
||||
* Repeatedly execute an function until the the detector returns true.
|
||||
* @param {object} fn the function to execute
|
||||
* @param {object} detector the detector.
|
||||
* @param {string} log messages logged to console.
|
||||
* @param {number} interval interval for detecting
|
||||
* @param {number} limit max execute times of a function
|
||||
* @return {Promise} a promise of the response.
|
||||
*/
|
||||
function executeUntil(fn, detector, log, interval, limit) {
|
||||
interval = interval || 500;
|
||||
let count = 0;
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
loop();
|
||||
|
||||
async function loop() {
|
||||
fn();
|
||||
limit++;
|
||||
if (limit && count >= limit) {
|
||||
reject(false);
|
||||
}
|
||||
)
|
||||
}
|
||||
);
|
||||
// if it exists a field, which is not found in any row, the sender should retry.
|
||||
let shouldWait = fieldSelectors.reduce((p, c) => p || !fieldFound[c], false);
|
||||
return shouldWait ? MSG_ELEMENT_NOT_FOUND : results
|
||||
}
|
||||
setTimeout(() => {
|
||||
let flag = !detector || detector();
|
||||
if (log) console.log(log, flag ? '(OK)' : '(failed)');
|
||||
if (flag) {
|
||||
resolve(true);
|
||||
} else {
|
||||
loop();
|
||||
}
|
||||
}, interval);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
})();
|
||||
|
||||
@ -6,6 +6,7 @@ const ACTION_EXTRACT = `${EXT_NAME}:Extract`;
|
||||
const ACTION_GOTO_URL = `${EXT_NAME}:GoToTUL`;
|
||||
const ACTION_REPORT_IN = `${EXT_NAME}:ReportIn`;
|
||||
const ACTION_QUERY_URL = `${EXT_NAME}:QueryURL`;
|
||||
const ACTION_SCROLL_BOTTOM = `${EXT_NAME}:ScrollToBottom`;
|
||||
|
||||
const MSG_ELEMENT_NOT_FOUND = new ConstMessage(1, "No element found for at least one selector, maybe it's not loaded yet");
|
||||
const MSG_URL_SKIPPED = new ConstMessage(100, "Skipped current URL");
|
||||
|
||||
Reference in New Issue
Block a user