fix & optimize

This commit is contained in:
2018-09-27 14:42:08 +08:00
parent 1148ae79d6
commit e9270e22b2
5 changed files with 112 additions and 102 deletions

View File

@ -1,9 +1,14 @@
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
if (message.from === "DataExtracter:Extract")
if (message.from === "DataExtracter:Extract") {
if (!testArgs(...message.args)) {
sendResponse(signitures);
return;
}
extract(...message.args).catch(
err => {
console.log(err);
alert(err);
}
);
}
});

View File

@ -1,4 +1,14 @@
chrome.runtime.onMessage.addListener(
function extract(...args) {
let message = {
from: "DataExtracter:Extract",
args: args
}
chrome.runtime.sendMessage(message, r => {
if (r) console.log(r);
});
}
chrome.runtime.onMessage.addListener(
function (request, sender, sendResponse) {
if (!request.from) return;
let [ext, act] = request.from.split(":");
@ -25,44 +35,6 @@
}
);
const sig = `
# DataExtracter Help
----------------------------
## Signitures:
----------------------------
function extract(itemsSelector:string, fieldSelectors:string[])
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
function extract(itemsSelector:string, fieldSelectors:string[], urls:string[])
## Examples:
----------------------------
### Extract current page
extract(".list-item", ["a.title", "p.content"])
### Extract multiple pages (1-10, interval 1)
extract(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=\${page}", 1, 10, 1)
### Extract multiple urls (list)
extract(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"])
### Extract specified pages (1,3,5)
extract(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=\${page}", [1, 3, 5])
## Advanced Examples:
----------------------------
### Extract link text and target (use 'selector@attribute')
extract('.list-item', ['a.title', 'a.title@href'])
### Collect links from page(s) & Extract data of each link (only available in console of extension background page)
extract('body',["a.title", "p.content"], await getData('.list-item', ['.item a@href'],["http://sample.com/abc"]))
`.trim();
function extractTabData(itemsSelector, fieldSelectors) {
return $(itemsSelector).toArray().map(
item => fieldSelectors.map(
@ -72,62 +44,4 @@ function extractTabData(itemsSelector, fieldSelectors) {
}
)
);
}
function extract(...args) {
if (!testArgs(...args)) {
console.log(sig);
return;
}
let message = {
from: "DataExtracter:Extract",
args: args
}
chrome.runtime.sendMessage(message, r => {
if (r) {
console.log(r);
alert(r);
}
});
}
function testArgs(...args) {
switch (args.length) {
case 0, 1:
return false;
case 2:
return args[0] && args[1] &&
(typeof args[0] == "string") &&
(args[1] instanceof Array) &&
testArrayVals(args[1], v => typeof v == "string");
case 3:
return args[0] && args[1] &&
typeof args[0] == "string" &&
args[1] instanceof Array &&
testArrayVals(args[1], v => typeof v == "string") &&
args[2] instanceof Array &&
testArrayVals(args[2], v => typeof v == "string");
case 4:
return args[0] && args[1] &&
typeof args[0] == "string" &&
args[1] instanceof Array &&
testArrayVals(args[1], v => typeof v == "string") &&
typeof args[2] == "string" &&
args[3] instanceof Array &&
testArrayVals(args[3], v => typeof v == "number");
case 5:
return args[0] && args[1] &&
typeof args[0] == "string" &&
args[1] instanceof Array &&
testArrayVals(args[1], v => typeof v == "string") &&
typeof args[2] == "string" &&
!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]);
default:
return false;
}
function testArrayVals(arr, tester) {
return arr.reduce((p, c) => p && tester(c), true);
}
}

View File

@ -1,4 +1,4 @@
class CSV {
class ExractResult {
constructor(data) {
this._data = data || [];
@ -7,8 +7,8 @@ class CSV {
return this._data[index];
}
column(index) {
return new [...Array(this._data.length).keys].map(
i => this._data[i, index]
return [...new Array(this._data.length).keys()].map(
i => this._data[i][index]
);
}
get data() {

View File

@ -20,11 +20,15 @@ async function extract(itemsSelector, fieldSelectors, ...args) {
* @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages]
*/
async function getData(itemsSelector, fieldSelectors, ...args) {
if (!testArgs(itemsSelector, fieldSelectors, ...args))
throw new Error(`Invalid call arguments.\n\n${signitures}\n\n`);
let urls = [];
if (args.length) {
let arg = args.shift();
if (arg instanceof Array) {
urls = arg;
} else if (arg instanceof ExractResult) {
urls = arg.column(0);
} else {
let urlTempl = arg;
if (urlTempl) {
@ -62,7 +66,7 @@ async function getData(itemsSelector, fieldSelectors, ...args) {
pms.then(
results => {
data.push(...results);
resolve(new CSV(data));
resolve(new ExractResult(data));
},
err => reject(err)
);

View File

@ -1,3 +1,44 @@
const signitures = `
# DataExtracter Help
----------------------------
## Signitures:
----------------------------
function extract(itemsSelector:string, fieldSelectors:string[])
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])
function extract(itemsSelector:string, fieldSelectors:string[], urls:string[])
function extract(itemsSelector:string, fieldSelectors:string[], urls:ExractResult)
## Examples:
----------------------------
### Extract current page
extract(".list-item", ["a.title", "p.content"])
### Extract multiple pages (1-10, interval 1)
extract(".list-item", ["a.title", "p.content"],"http://sample.com/?pn=\${page}", 1, 10, 1)
### Extract multiple urls (list)
extract(".list-item", ["a.title", "p.content"],["http://sample.com/abc","http://sample.com/xyz"])
### Extract specified pages (1,3,5)
extract(".list-item", ["a.title", "p.content"], "http://sample.com/?pn=\${page}", [1, 3, 5])
## Advanced Examples:
----------------------------
### Extract link text and target (use 'selector@attribute')
extract('.list-item', ['a.title', 'a.title@href'])
### Collect links from page(s) & Extract data of each link
>> (Available only in console of extension background page)
extract('body',["a.title", "p.content"], await getData('.list-item', ['.item a@href'],["http://sample.com/abc"]))
`.trim();
function saveFile(data, mimeType, fileName) {
fileName = fileName || document.title || "result";
var blob;
@ -28,4 +69,50 @@ function saveFile(data, mimeType, fileName) {
} else {
location.href = url
}
}
function testArgs(...args) {
switch (args.length) {
case 0, 1:
return false;
case 2:
return args[0] && args[1] &&
(typeof args[0] == "string") &&
(args[1] instanceof Array) &&
testArrayVals(args[1], v => typeof v == "string");
case 3:
return args[0] && args[1] &&
typeof args[0] == "string" &&
args[1] instanceof Array &&
testArrayVals(args[1], v => typeof v == "string") &&
(
(
args[2] instanceof Array &&
testArrayVals(args[2], v => typeof v == "string")
) || (
args[2] instanceof ExractResult
)
);
case 4:
return args[0] && args[1] &&
typeof args[0] == "string" &&
args[1] instanceof Array &&
testArrayVals(args[1], v => typeof v == "string") &&
typeof args[2] == "string" &&
args[3] instanceof Array &&
testArrayVals(args[3], v => typeof v == "number");
case 6:
return args[0] && args[1] &&
typeof args[0] == "string" &&
args[1] instanceof Array &&
testArrayVals(args[1], v => typeof v == "string") &&
typeof args[2] == "string" &&
!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]);
default:
return false;
}
function testArrayVals(arr, tester) {
return arr.reduce((p, c) => p && tester(c), true);
}
}