This commit is contained in:
2018-05-23 09:51:18 +08:00
commit c81b2d1989
14 changed files with 409 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.DS_Store

BIN
icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

38
manifest.json Executable file
View File

@ -0,0 +1,38 @@
{
"manifest_version": 2,
"name": "Data Extracter",
"version": "0.0.1",
"author": "jebbs",
"description": "Extract data as sheet from web page elements。",
"icons": {
"16": "icon.png",
"48": "icon.png",
"128": "icon.png"
},
"browser_action": {
"default_icon": "icon.png",
"default_popup": "popup/tip.html",
"default_title": "Data Extracter"
},
"background": {
"scripts": [
"scripts/tools.js",
"scripts/extract.js",
"scripts/background.js"
],
"persistent": false
},
"content_scripts": [{
"matches": ["*://*/*"],
"js": [
"scripts/jquery.min.js",
"scripts/tools.js",
"scripts/content.js"
],
"run_at": "document_idle"
}],
"permissions": [
"activeTab",
"storage"
]
}

BIN
popup/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

52
popup/index.html Executable file
View File

@ -0,0 +1,52 @@
<!doctype html>
<html>
<link>
<meta charset="utf-8">
<title>Data Extractor</title>
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
<script charset="UTF-8" type="text/javascript" src="../scripts/tools.js"></script>
<script charset="UTF-8" type="text/javascript" src="../scripts/extract.js"></script>
<script charset="UTF-8" type="text/javascript" src="popup.js"></script>
<link rel="stylesheet" href="../styles/bootstrap.min.css">
</head>
<body style="margin: 20px 10px;">
<div class="container-fluid" style="min-width: 420px;">
<div class="row">
<div class="col">
<h3>Data Extractor</h3>
<div class="alert alert-info small" role="alert">
Selector fields require
<b>JQuery Selectors</b> to select data items and fields.
</div>
</div>
</div>
<div class="row">
<div class="col">
<form>
<div class="form-group">
<label for="formFileName">File Name</label>
<input type="text" class="form-control" id="formFileName" placeholder="fileName">
</div>
<div class="form-group">
<label for="formItemSelector">Items Selector</label>
<input type="text" class="form-control" id="formItemSelector" placeholder=".item-selector">
</div>
<div class="form-group">
<label for="formFieldSelector">Field Selectors (one selector per line)</label>
<textarea class="form-control" id="formFieldSelector" rows="3" placeholder=".field-selector"></textarea>
</div>
</form>
<input type="button" class="btn btn-primary" value="Extract" id="doExtract">
</div>
</div>
<div class="row">
<div class="col">
</div>
</div>
</div>
</body>
</html>

10
popup/popup.js Executable file
View File

@ -0,0 +1,10 @@
$().ready(
() => $("#doExtract").on('click', doExtract)
);
function doExtract() {
let fname = $("#formFileName")[0].value;
let itemSelector = $("#formItemSelector")[0].value;
let fieldSelectors = $("#formFieldSelector")[0].value.split('\n').map(s => s.trim());
eval($("#formFieldSelector")[0].value);
}

46
popup/tip.html Normal file
View File

@ -0,0 +1,46 @@
<!doctype html>
<html>
<link>
<meta charset="utf-8">
<title>Data Extractor</title>
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
<link rel="stylesheet" href="../styles/bootstrap.min.css">
</head>
<body style="margin: 20px 10px;">
<div class="container-fluid" style="min-width: 420px;">
<div class="row">
<div class="col">
<h3>Data Extractor</h3>
<div class="alert alert-info small" role="alert">
<h6>Usage:</h6>
<p>
<b>Open console</b> and
<b>switch to Data Extracter</b>, then call the
<b>extract</b> function.
</p>
<p>
<img src="demo.png" alt="" style="max-width: 489px; width: 100%; border-radius: 5px">
</p>
<p>
<b>Example</b>:<br> extract("table tr", ["td:nth-child(0)","td:nth-child(1)"])
</p>
</div>
<div class="alert alert-info small" role="alert">
<h6>Interface:</h6>
<p>function extract(itemsSelector:string, fieldSelectors:string[]) </p>
<p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)</p>
<p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])</p>
</div>
<!-- <div class="alert alert-danger small" role="alert">
Selector means
<b>JQuery Selectors</b> which is used to select data items and fields.
</div> -->
</div>
</div>
</div>
</body>
</html>

6
scripts/background.js Normal file
View File

@ -0,0 +1,6 @@
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
if (message.from === "doExtractRequest")
extract(...message.args).catch(
err => console.log(err)
);
});

64
scripts/content.js Normal file
View File

@ -0,0 +1,64 @@
chrome.runtime.onMessage.addListener(
function (request, sender, sendResponse) {
// console.log(request);
if (request.from == "doExtractRequest") {
// console.log(request);
let data = extractData(request.itemsSelector, request.fieldSelectors);
// console.log(data);
sendResponse(data);
} else if (request.from == "doExtractGotoUrl") {
// console.log(request);
window.location.replace(request.url);
} else if (request.from == "doExtractReportIn") {
// console.log("doExtractReportIn");
sendResponse(request.from);
}
}
);
function extractData(itemsSelector, fieldSelectors) {
return $(itemsSelector).toArray().map(
item => fieldSelectors.map(
cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n')
)
);
}
function extract(...args) {
let sig = `Invalid call args.
function extract(itemsSelector:string, fieldSelectors:string[])
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`;
if (!testArgs(...args)) {
console.log(sig);
return;
}
if (args.length == 2) {
saveFileAsk(extractData(args[0], args[1]));
return;
}
let message = {
from: "doExtractRequest",
args: args
}
chrome.runtime.sendMessage(message, r => {
if (r) {
console.log(r);
alert(r);
}
});
}
function testArgs(...args) {
if (args.length < 2) return false;
if (args.length == 2)
return (args[0] && args[1] && (typeof args[0] == "string") && (args[1] instanceof Array))
let urls = [];
if (args.length > 2) return (typeof args[2] == "string") && (
(args[3] instanceof Array) ||
(!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]))
)
}

121
scripts/extract.js Normal file
View File

@ -0,0 +1,121 @@
// function extract(itemsSelector, fieldSelectors, url, from, to, interval)
// function extract(itemsSelector, fieldSelectors, url, pages)
function extract(itemsSelector, fieldSelectors, url, ...args) {
let urls = [];
if (url) {
if (args[0] instanceof Array) {
urls = args[0].map(p => url.replace("${page}", p));
} else if (args.length >= 3) {
let from = args.shift();
let to = args.shift();
let interval = args.shift();
for (let i = from; i <= to; i += interval) {
urls.push(url.replace("${page}", i));
}
}
}
let data = [];
return new Promise((resolve, reject) => {
chrome.tabs.query({
active: true,
currentWindow: true
}, function (tabs) {
let pms;
let tab = tabs[0];
if (urls.length) {
pms = urls.reduce((p, url) => p.then(
results => {
data.push(...results);
return redirectTab(tab, url).then(
tab => extractTabData(tab, itemsSelector, fieldSelectors)
);
},
() => p
), Promise.resolve([]));
} else {
pms = extractTabData(tab, itemsSelector, fieldSelectors);
}
pms.then(
results => {
data.push(...results);
data.unshift(fieldSelectors);
saveFileAsk(data);
resolve("save done.")
},
err => reject(err)
);
});
});
}
function extractTabData(tab, itemsSelector, fieldSelectors) {
let done = false;
return new Promise((resolve, reject) => {
chrome.tabs.sendMessage(
tab.id, {
from: "doExtractRequest",
itemsSelector: itemsSelector,
fieldSelectors: fieldSelectors
},
response => {
done = true;
resolve(response);
}
);
setTimeout(() => {
if (!done) reject(`extractTabData failed after 2 second.`);
}, 2000);
});
}
function redirectTab(tab, url) {
let done = false;
let timeOut;
return new Promise((resolve, reject) => {
let req = {
from: "doExtractGotoUrl",
url: url
}
chrome.tabs.sendMessage(tab.id, req, r => {
// if (r != req.from) return;
// detectLoop();
setTimeout(() => {
detectLoop();
}, 500);
});
setTimeout(() => {
if (!done) {
reject(`redirectTab failed after 10 second.`);
clearTimeout(timeOut);
}
}, 10000);
function detectLoop() {
let req = {
from: "doExtractReportIn"
}
chrome.tabs.sendMessage(tab.id, req, r => {
if (r != req.from) return;
resolve(tab)
done = true;
});
if (!done) {
clearTimeout(timeOut);
timeOut = setTimeout(() => {
detectLoop();
}, 100);
}
}
});
}
function createTab(url) {
return new Promise((resolve, reject) => {
chrome.tabs.create({
active: true,
url: url
}, function (tab) {
resolve(tab);
});
});
}

4
scripts/jquery.min.js vendored Executable file

File diff suppressed because one or more lines are too long

53
scripts/tools.js Normal file
View File

@ -0,0 +1,53 @@
function formatCSV(data) {
return data.reduce(
(csv, lineCells) => {
let line = lineCells.reduce(
(lineText, cell, idx) => {
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
}, "");
return csv + line + "\n";
},
""
);
}
function saveFile(data, mimeType, fileName) {
fileName = fileName || document.title || "result";
var blob;
if (typeof window.Blob == "function") {
blob = new Blob([data], {
type: mimeType
})
} else {
var BlobBuiler = window.BlobBuilder || window.MozBlobBuilder || window.WebKitBlobBuilder || window.MSBlobBuilder;
var builer = new BlobBuiler();
builer.append(data);
blob = builer.getBlob(mimeType)
}
var URL = window.URL || window.webkitURL;
var url = URL.createObjectURL(blob);
var link = document.createElement("a");
if ('download' in link) {
link.style.visibility = "hidden";
link.href = url;
link.download = fileName;
document.body.appendChild(link);
var j = document.createEvent("MouseEvents");
j.initEvent("click", true, true);
link.dispatchEvent(j);
document.body.removeChild(link)
} else if (navigator.msSaveBlob) {
navigator.msSaveBlob(blob, fileName)
} else {
location.href = url
}
}
function saveFileAsk(data) {
let csv = formatCSV(data.slice(1, 50)).trim() || "- Empty -";
if (confirm(`Click confirm to download if the sample data looks good (${data.length-1} items)\n\n${csv}`)) {
csv = formatCSV(data);
saveFile(csv, "text/csv");
}
}

7
styles/bootstrap.min.css vendored Normal file

File diff suppressed because one or more lines are too long

7
styles/bootstrap.min.js vendored Normal file

File diff suppressed because one or more lines are too long