init
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
.DS_Store
|
||||||
38
manifest.json
Executable file
38
manifest.json
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
{
|
||||||
|
"manifest_version": 2,
|
||||||
|
"name": "Data Extracter",
|
||||||
|
"version": "0.0.1",
|
||||||
|
"author": "jebbs",
|
||||||
|
"description": "Extract data as sheet from web page elements。",
|
||||||
|
"icons": {
|
||||||
|
"16": "icon.png",
|
||||||
|
"48": "icon.png",
|
||||||
|
"128": "icon.png"
|
||||||
|
},
|
||||||
|
"browser_action": {
|
||||||
|
"default_icon": "icon.png",
|
||||||
|
"default_popup": "popup/tip.html",
|
||||||
|
"default_title": "Data Extracter"
|
||||||
|
},
|
||||||
|
"background": {
|
||||||
|
"scripts": [
|
||||||
|
"scripts/tools.js",
|
||||||
|
"scripts/extract.js",
|
||||||
|
"scripts/background.js"
|
||||||
|
],
|
||||||
|
"persistent": false
|
||||||
|
},
|
||||||
|
"content_scripts": [{
|
||||||
|
"matches": ["*://*/*"],
|
||||||
|
"js": [
|
||||||
|
"scripts/jquery.min.js",
|
||||||
|
"scripts/tools.js",
|
||||||
|
"scripts/content.js"
|
||||||
|
],
|
||||||
|
"run_at": "document_idle"
|
||||||
|
}],
|
||||||
|
"permissions": [
|
||||||
|
"activeTab",
|
||||||
|
"storage"
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
popup/demo.png
Normal file
BIN
popup/demo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
52
popup/index.html
Executable file
52
popup/index.html
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<link>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Data Extractor</title>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../scripts/tools.js"></script>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../scripts/extract.js"></script>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="popup.js"></script>
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="../styles/bootstrap.min.css">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body style="margin: 20px 10px;">
|
||||||
|
<div class="container-fluid" style="min-width: 420px;">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<h3>Data Extractor</h3>
|
||||||
|
<div class="alert alert-info small" role="alert">
|
||||||
|
Selector fields require
|
||||||
|
<b>JQuery Selectors</b> to select data items and fields.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<form>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="formFileName">File Name</label>
|
||||||
|
<input type="text" class="form-control" id="formFileName" placeholder="fileName">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="formItemSelector">Items Selector</label>
|
||||||
|
<input type="text" class="form-control" id="formItemSelector" placeholder=".item-selector">
|
||||||
|
</div>
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="formFieldSelector">Field Selectors (one selector per line)</label>
|
||||||
|
<textarea class="form-control" id="formFieldSelector" rows="3" placeholder=".field-selector"></textarea>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
<input type="button" class="btn btn-primary" value="Extract" id="doExtract">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
10
popup/popup.js
Executable file
10
popup/popup.js
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
$().ready(
|
||||||
|
() => $("#doExtract").on('click', doExtract)
|
||||||
|
);
|
||||||
|
|
||||||
|
function doExtract() {
|
||||||
|
let fname = $("#formFileName")[0].value;
|
||||||
|
let itemSelector = $("#formItemSelector")[0].value;
|
||||||
|
let fieldSelectors = $("#formFieldSelector")[0].value.split('\n').map(s => s.trim());
|
||||||
|
eval($("#formFieldSelector")[0].value);
|
||||||
|
}
|
||||||
46
popup/tip.html
Normal file
46
popup/tip.html
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<link>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Data Extractor</title>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
|
||||||
|
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="../styles/bootstrap.min.css">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body style="margin: 20px 10px;">
|
||||||
|
<div class="container-fluid" style="min-width: 420px;">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
<h3>Data Extractor</h3>
|
||||||
|
<div class="alert alert-info small" role="alert">
|
||||||
|
<h6>Usage:</h6>
|
||||||
|
<p>
|
||||||
|
<b>Open console</b> and
|
||||||
|
<b>switch to Data Extracter</b>, then call the
|
||||||
|
<b>extract</b> function.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
<img src="demo.png" alt="" style="max-width: 489px; width: 100%; border-radius: 5px">
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
<b>Example</b>:<br> extract("table tr", ["td:nth-child(0)","td:nth-child(1)"])
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div class="alert alert-info small" role="alert">
|
||||||
|
<h6>Interface:</h6>
|
||||||
|
<p>function extract(itemsSelector:string, fieldSelectors:string[]) </p>
|
||||||
|
<p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)</p>
|
||||||
|
<p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])</p>
|
||||||
|
</div>
|
||||||
|
<!-- <div class="alert alert-danger small" role="alert">
|
||||||
|
Selector means
|
||||||
|
<b>JQuery Selectors</b> which is used to select data items and fields.
|
||||||
|
</div> -->
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
6
scripts/background.js
Normal file
6
scripts/background.js
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
||||||
|
if (message.from === "doExtractRequest")
|
||||||
|
extract(...message.args).catch(
|
||||||
|
err => console.log(err)
|
||||||
|
);
|
||||||
|
});
|
||||||
64
scripts/content.js
Normal file
64
scripts/content.js
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
chrome.runtime.onMessage.addListener(
|
||||||
|
function (request, sender, sendResponse) {
|
||||||
|
// console.log(request);
|
||||||
|
if (request.from == "doExtractRequest") {
|
||||||
|
// console.log(request);
|
||||||
|
let data = extractData(request.itemsSelector, request.fieldSelectors);
|
||||||
|
// console.log(data);
|
||||||
|
sendResponse(data);
|
||||||
|
} else if (request.from == "doExtractGotoUrl") {
|
||||||
|
// console.log(request);
|
||||||
|
window.location.replace(request.url);
|
||||||
|
} else if (request.from == "doExtractReportIn") {
|
||||||
|
// console.log("doExtractReportIn");
|
||||||
|
sendResponse(request.from);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
function extractData(itemsSelector, fieldSelectors) {
|
||||||
|
return $(itemsSelector).toArray().map(
|
||||||
|
item => fieldSelectors.map(
|
||||||
|
cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n')
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function extract(...args) {
|
||||||
|
let sig = `Invalid call args.
|
||||||
|
function extract(itemsSelector:string, fieldSelectors:string[])
|
||||||
|
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||||
|
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`;
|
||||||
|
if (!testArgs(...args)) {
|
||||||
|
console.log(sig);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (args.length == 2) {
|
||||||
|
saveFileAsk(extractData(args[0], args[1]));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let message = {
|
||||||
|
from: "doExtractRequest",
|
||||||
|
args: args
|
||||||
|
}
|
||||||
|
chrome.runtime.sendMessage(message, r => {
|
||||||
|
if (r) {
|
||||||
|
console.log(r);
|
||||||
|
alert(r);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function testArgs(...args) {
|
||||||
|
|
||||||
|
if (args.length < 2) return false;
|
||||||
|
|
||||||
|
if (args.length == 2)
|
||||||
|
return (args[0] && args[1] && (typeof args[0] == "string") && (args[1] instanceof Array))
|
||||||
|
|
||||||
|
let urls = [];
|
||||||
|
if (args.length > 2) return (typeof args[2] == "string") && (
|
||||||
|
(args[3] instanceof Array) ||
|
||||||
|
(!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]))
|
||||||
|
)
|
||||||
|
}
|
||||||
121
scripts/extract.js
Normal file
121
scripts/extract.js
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
// function extract(itemsSelector, fieldSelectors, url, from, to, interval)
|
||||||
|
// function extract(itemsSelector, fieldSelectors, url, pages)
|
||||||
|
function extract(itemsSelector, fieldSelectors, url, ...args) {
|
||||||
|
let urls = [];
|
||||||
|
if (url) {
|
||||||
|
if (args[0] instanceof Array) {
|
||||||
|
urls = args[0].map(p => url.replace("${page}", p));
|
||||||
|
} else if (args.length >= 3) {
|
||||||
|
let from = args.shift();
|
||||||
|
let to = args.shift();
|
||||||
|
let interval = args.shift();
|
||||||
|
for (let i = from; i <= to; i += interval) {
|
||||||
|
urls.push(url.replace("${page}", i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let data = [];
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
chrome.tabs.query({
|
||||||
|
active: true,
|
||||||
|
currentWindow: true
|
||||||
|
}, function (tabs) {
|
||||||
|
let pms;
|
||||||
|
let tab = tabs[0];
|
||||||
|
if (urls.length) {
|
||||||
|
pms = urls.reduce((p, url) => p.then(
|
||||||
|
results => {
|
||||||
|
data.push(...results);
|
||||||
|
return redirectTab(tab, url).then(
|
||||||
|
tab => extractTabData(tab, itemsSelector, fieldSelectors)
|
||||||
|
);
|
||||||
|
},
|
||||||
|
() => p
|
||||||
|
), Promise.resolve([]));
|
||||||
|
} else {
|
||||||
|
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
||||||
|
}
|
||||||
|
pms.then(
|
||||||
|
results => {
|
||||||
|
data.push(...results);
|
||||||
|
data.unshift(fieldSelectors);
|
||||||
|
saveFileAsk(data);
|
||||||
|
resolve("save done.")
|
||||||
|
},
|
||||||
|
err => reject(err)
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
||||||
|
let done = false;
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
chrome.tabs.sendMessage(
|
||||||
|
tab.id, {
|
||||||
|
from: "doExtractRequest",
|
||||||
|
itemsSelector: itemsSelector,
|
||||||
|
fieldSelectors: fieldSelectors
|
||||||
|
},
|
||||||
|
response => {
|
||||||
|
done = true;
|
||||||
|
resolve(response);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
setTimeout(() => {
|
||||||
|
if (!done) reject(`extractTabData failed after 2 second.`);
|
||||||
|
}, 2000);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function redirectTab(tab, url) {
|
||||||
|
let done = false;
|
||||||
|
let timeOut;
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
let req = {
|
||||||
|
from: "doExtractGotoUrl",
|
||||||
|
url: url
|
||||||
|
}
|
||||||
|
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||||
|
// if (r != req.from) return;
|
||||||
|
// detectLoop();
|
||||||
|
setTimeout(() => {
|
||||||
|
detectLoop();
|
||||||
|
}, 500);
|
||||||
|
});
|
||||||
|
setTimeout(() => {
|
||||||
|
if (!done) {
|
||||||
|
reject(`redirectTab failed after 10 second.`);
|
||||||
|
clearTimeout(timeOut);
|
||||||
|
}
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
|
function detectLoop() {
|
||||||
|
let req = {
|
||||||
|
from: "doExtractReportIn"
|
||||||
|
}
|
||||||
|
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||||
|
if (r != req.from) return;
|
||||||
|
resolve(tab)
|
||||||
|
done = true;
|
||||||
|
});
|
||||||
|
if (!done) {
|
||||||
|
clearTimeout(timeOut);
|
||||||
|
timeOut = setTimeout(() => {
|
||||||
|
detectLoop();
|
||||||
|
}, 100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function createTab(url) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
chrome.tabs.create({
|
||||||
|
active: true,
|
||||||
|
url: url
|
||||||
|
}, function (tab) {
|
||||||
|
resolve(tab);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
4
scripts/jquery.min.js
vendored
Executable file
4
scripts/jquery.min.js
vendored
Executable file
File diff suppressed because one or more lines are too long
53
scripts/tools.js
Normal file
53
scripts/tools.js
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
function formatCSV(data) {
|
||||||
|
return data.reduce(
|
||||||
|
(csv, lineCells) => {
|
||||||
|
let line = lineCells.reduce(
|
||||||
|
(lineText, cell, idx) => {
|
||||||
|
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||||
|
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
||||||
|
}, "");
|
||||||
|
return csv + line + "\n";
|
||||||
|
},
|
||||||
|
""
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveFile(data, mimeType, fileName) {
|
||||||
|
fileName = fileName || document.title || "result";
|
||||||
|
var blob;
|
||||||
|
if (typeof window.Blob == "function") {
|
||||||
|
blob = new Blob([data], {
|
||||||
|
type: mimeType
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
var BlobBuiler = window.BlobBuilder || window.MozBlobBuilder || window.WebKitBlobBuilder || window.MSBlobBuilder;
|
||||||
|
var builer = new BlobBuiler();
|
||||||
|
builer.append(data);
|
||||||
|
blob = builer.getBlob(mimeType)
|
||||||
|
}
|
||||||
|
var URL = window.URL || window.webkitURL;
|
||||||
|
var url = URL.createObjectURL(blob);
|
||||||
|
var link = document.createElement("a");
|
||||||
|
if ('download' in link) {
|
||||||
|
link.style.visibility = "hidden";
|
||||||
|
link.href = url;
|
||||||
|
link.download = fileName;
|
||||||
|
document.body.appendChild(link);
|
||||||
|
var j = document.createEvent("MouseEvents");
|
||||||
|
j.initEvent("click", true, true);
|
||||||
|
link.dispatchEvent(j);
|
||||||
|
document.body.removeChild(link)
|
||||||
|
} else if (navigator.msSaveBlob) {
|
||||||
|
navigator.msSaveBlob(blob, fileName)
|
||||||
|
} else {
|
||||||
|
location.href = url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveFileAsk(data) {
|
||||||
|
let csv = formatCSV(data.slice(1, 50)).trim() || "- Empty -";
|
||||||
|
if (confirm(`Click confirm to download if the sample data looks good (${data.length-1} items):\n\n${csv}`)) {
|
||||||
|
csv = formatCSV(data);
|
||||||
|
saveFile(csv, "text/csv");
|
||||||
|
}
|
||||||
|
}
|
||||||
7
styles/bootstrap.min.css
vendored
Normal file
7
styles/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
7
styles/bootstrap.min.js
vendored
Normal file
7
styles/bootstrap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user