init
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
.DS_Store
|
||||
38
manifest.json
Executable file
38
manifest.json
Executable file
@ -0,0 +1,38 @@
|
||||
{
|
||||
"manifest_version": 2,
|
||||
"name": "Data Extracter",
|
||||
"version": "0.0.1",
|
||||
"author": "jebbs",
|
||||
"description": "Extract data as sheet from web page elements。",
|
||||
"icons": {
|
||||
"16": "icon.png",
|
||||
"48": "icon.png",
|
||||
"128": "icon.png"
|
||||
},
|
||||
"browser_action": {
|
||||
"default_icon": "icon.png",
|
||||
"default_popup": "popup/tip.html",
|
||||
"default_title": "Data Extracter"
|
||||
},
|
||||
"background": {
|
||||
"scripts": [
|
||||
"scripts/tools.js",
|
||||
"scripts/extract.js",
|
||||
"scripts/background.js"
|
||||
],
|
||||
"persistent": false
|
||||
},
|
||||
"content_scripts": [{
|
||||
"matches": ["*://*/*"],
|
||||
"js": [
|
||||
"scripts/jquery.min.js",
|
||||
"scripts/tools.js",
|
||||
"scripts/content.js"
|
||||
],
|
||||
"run_at": "document_idle"
|
||||
}],
|
||||
"permissions": [
|
||||
"activeTab",
|
||||
"storage"
|
||||
]
|
||||
}
|
||||
BIN
popup/demo.png
Normal file
BIN
popup/demo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
52
popup/index.html
Executable file
52
popup/index.html
Executable file
@ -0,0 +1,52 @@
|
||||
<!doctype html>
|
||||
<html>
|
||||
<link>
|
||||
<meta charset="utf-8">
|
||||
<title>Data Extractor</title>
|
||||
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
|
||||
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
|
||||
<script charset="UTF-8" type="text/javascript" src="../scripts/tools.js"></script>
|
||||
<script charset="UTF-8" type="text/javascript" src="../scripts/extract.js"></script>
|
||||
<script charset="UTF-8" type="text/javascript" src="popup.js"></script>
|
||||
|
||||
<link rel="stylesheet" href="../styles/bootstrap.min.css">
|
||||
</head>
|
||||
|
||||
<body style="margin: 20px 10px;">
|
||||
<div class="container-fluid" style="min-width: 420px;">
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<h3>Data Extractor</h3>
|
||||
<div class="alert alert-info small" role="alert">
|
||||
Selector fields require
|
||||
<b>JQuery Selectors</b> to select data items and fields.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<form>
|
||||
<div class="form-group">
|
||||
<label for="formFileName">File Name</label>
|
||||
<input type="text" class="form-control" id="formFileName" placeholder="fileName">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="formItemSelector">Items Selector</label>
|
||||
<input type="text" class="form-control" id="formItemSelector" placeholder=".item-selector">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="formFieldSelector">Field Selectors (one selector per line)</label>
|
||||
<textarea class="form-control" id="formFieldSelector" rows="3" placeholder=".field-selector"></textarea>
|
||||
</div>
|
||||
</form>
|
||||
<input type="button" class="btn btn-primary" value="Extract" id="doExtract">
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
10
popup/popup.js
Executable file
10
popup/popup.js
Executable file
@ -0,0 +1,10 @@
|
||||
$().ready(
|
||||
() => $("#doExtract").on('click', doExtract)
|
||||
);
|
||||
|
||||
function doExtract() {
|
||||
let fname = $("#formFileName")[0].value;
|
||||
let itemSelector = $("#formItemSelector")[0].value;
|
||||
let fieldSelectors = $("#formFieldSelector")[0].value.split('\n').map(s => s.trim());
|
||||
eval($("#formFieldSelector")[0].value);
|
||||
}
|
||||
46
popup/tip.html
Normal file
46
popup/tip.html
Normal file
@ -0,0 +1,46 @@
|
||||
<!doctype html>
|
||||
<html>
|
||||
<link>
|
||||
<meta charset="utf-8">
|
||||
<title>Data Extractor</title>
|
||||
<script charset="UTF-8" type="text/javascript" src="../scripts/jquery.min.js"></script>
|
||||
<script charset="UTF-8" type="text/javascript" src="../styles/bootstrap.min.js"></script>
|
||||
|
||||
<link rel="stylesheet" href="../styles/bootstrap.min.css">
|
||||
</head>
|
||||
|
||||
<body style="margin: 20px 10px;">
|
||||
<div class="container-fluid" style="min-width: 420px;">
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<h3>Data Extractor</h3>
|
||||
<div class="alert alert-info small" role="alert">
|
||||
<h6>Usage:</h6>
|
||||
<p>
|
||||
<b>Open console</b> and
|
||||
<b>switch to Data Extracter</b>, then call the
|
||||
<b>extract</b> function.
|
||||
</p>
|
||||
<p>
|
||||
<img src="demo.png" alt="" style="max-width: 489px; width: 100%; border-radius: 5px">
|
||||
</p>
|
||||
<p>
|
||||
<b>Example</b>:<br> extract("table tr", ["td:nth-child(0)","td:nth-child(1)"])
|
||||
</p>
|
||||
</div>
|
||||
<div class="alert alert-info small" role="alert">
|
||||
<h6>Interface:</h6>
|
||||
<p>function extract(itemsSelector:string, fieldSelectors:string[]) </p>
|
||||
<p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)</p>
|
||||
<p>function extract(itemsSelector:string, fieldSelectors:string[], url:string, pages:number[])</p>
|
||||
</div>
|
||||
<!-- <div class="alert alert-danger small" role="alert">
|
||||
Selector means
|
||||
<b>JQuery Selectors</b> which is used to select data items and fields.
|
||||
</div> -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
6
scripts/background.js
Normal file
6
scripts/background.js
Normal file
@ -0,0 +1,6 @@
|
||||
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
|
||||
if (message.from === "doExtractRequest")
|
||||
extract(...message.args).catch(
|
||||
err => console.log(err)
|
||||
);
|
||||
});
|
||||
64
scripts/content.js
Normal file
64
scripts/content.js
Normal file
@ -0,0 +1,64 @@
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function (request, sender, sendResponse) {
|
||||
// console.log(request);
|
||||
if (request.from == "doExtractRequest") {
|
||||
// console.log(request);
|
||||
let data = extractData(request.itemsSelector, request.fieldSelectors);
|
||||
// console.log(data);
|
||||
sendResponse(data);
|
||||
} else if (request.from == "doExtractGotoUrl") {
|
||||
// console.log(request);
|
||||
window.location.replace(request.url);
|
||||
} else if (request.from == "doExtractReportIn") {
|
||||
// console.log("doExtractReportIn");
|
||||
sendResponse(request.from);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
function extractData(itemsSelector, fieldSelectors) {
|
||||
return $(itemsSelector).toArray().map(
|
||||
item => fieldSelectors.map(
|
||||
cls => $(item).find(cls).toArray().map(find => find.textContent.trim()).join('\n')
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function extract(...args) {
|
||||
let sig = `Invalid call args.
|
||||
function extract(itemsSelector:string, fieldSelectors:string[])
|
||||
function extract(itemsSelector:string, fieldSelectors:string[], url:string, from:number, to:number, interval:number)
|
||||
function extract(itemsSelector:string, fieldSelectors:string, url:string, pages:number[])`;
|
||||
if (!testArgs(...args)) {
|
||||
console.log(sig);
|
||||
return;
|
||||
}
|
||||
if (args.length == 2) {
|
||||
saveFileAsk(extractData(args[0], args[1]));
|
||||
return;
|
||||
}
|
||||
let message = {
|
||||
from: "doExtractRequest",
|
||||
args: args
|
||||
}
|
||||
chrome.runtime.sendMessage(message, r => {
|
||||
if (r) {
|
||||
console.log(r);
|
||||
alert(r);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function testArgs(...args) {
|
||||
|
||||
if (args.length < 2) return false;
|
||||
|
||||
if (args.length == 2)
|
||||
return (args[0] && args[1] && (typeof args[0] == "string") && (args[1] instanceof Array))
|
||||
|
||||
let urls = [];
|
||||
if (args.length > 2) return (typeof args[2] == "string") && (
|
||||
(args[3] instanceof Array) ||
|
||||
(!isNaN(args[3]) && !isNaN(args[4]) && !isNaN(args[5]))
|
||||
)
|
||||
}
|
||||
121
scripts/extract.js
Normal file
121
scripts/extract.js
Normal file
@ -0,0 +1,121 @@
|
||||
// function extract(itemsSelector, fieldSelectors, url, from, to, interval)
|
||||
// function extract(itemsSelector, fieldSelectors, url, pages)
|
||||
function extract(itemsSelector, fieldSelectors, url, ...args) {
|
||||
let urls = [];
|
||||
if (url) {
|
||||
if (args[0] instanceof Array) {
|
||||
urls = args[0].map(p => url.replace("${page}", p));
|
||||
} else if (args.length >= 3) {
|
||||
let from = args.shift();
|
||||
let to = args.shift();
|
||||
let interval = args.shift();
|
||||
for (let i = from; i <= to; i += interval) {
|
||||
urls.push(url.replace("${page}", i));
|
||||
}
|
||||
}
|
||||
}
|
||||
let data = [];
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.query({
|
||||
active: true,
|
||||
currentWindow: true
|
||||
}, function (tabs) {
|
||||
let pms;
|
||||
let tab = tabs[0];
|
||||
if (urls.length) {
|
||||
pms = urls.reduce((p, url) => p.then(
|
||||
results => {
|
||||
data.push(...results);
|
||||
return redirectTab(tab, url).then(
|
||||
tab => extractTabData(tab, itemsSelector, fieldSelectors)
|
||||
);
|
||||
},
|
||||
() => p
|
||||
), Promise.resolve([]));
|
||||
} else {
|
||||
pms = extractTabData(tab, itemsSelector, fieldSelectors);
|
||||
}
|
||||
pms.then(
|
||||
results => {
|
||||
data.push(...results);
|
||||
data.unshift(fieldSelectors);
|
||||
saveFileAsk(data);
|
||||
resolve("save done.")
|
||||
},
|
||||
err => reject(err)
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function extractTabData(tab, itemsSelector, fieldSelectors) {
|
||||
let done = false;
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.sendMessage(
|
||||
tab.id, {
|
||||
from: "doExtractRequest",
|
||||
itemsSelector: itemsSelector,
|
||||
fieldSelectors: fieldSelectors
|
||||
},
|
||||
response => {
|
||||
done = true;
|
||||
resolve(response);
|
||||
}
|
||||
);
|
||||
setTimeout(() => {
|
||||
if (!done) reject(`extractTabData failed after 2 second.`);
|
||||
}, 2000);
|
||||
});
|
||||
}
|
||||
|
||||
function redirectTab(tab, url) {
|
||||
let done = false;
|
||||
let timeOut;
|
||||
return new Promise((resolve, reject) => {
|
||||
let req = {
|
||||
from: "doExtractGotoUrl",
|
||||
url: url
|
||||
}
|
||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||
// if (r != req.from) return;
|
||||
// detectLoop();
|
||||
setTimeout(() => {
|
||||
detectLoop();
|
||||
}, 500);
|
||||
});
|
||||
setTimeout(() => {
|
||||
if (!done) {
|
||||
reject(`redirectTab failed after 10 second.`);
|
||||
clearTimeout(timeOut);
|
||||
}
|
||||
}, 10000);
|
||||
|
||||
function detectLoop() {
|
||||
let req = {
|
||||
from: "doExtractReportIn"
|
||||
}
|
||||
chrome.tabs.sendMessage(tab.id, req, r => {
|
||||
if (r != req.from) return;
|
||||
resolve(tab)
|
||||
done = true;
|
||||
});
|
||||
if (!done) {
|
||||
clearTimeout(timeOut);
|
||||
timeOut = setTimeout(() => {
|
||||
detectLoop();
|
||||
}, 100);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function createTab(url) {
|
||||
return new Promise((resolve, reject) => {
|
||||
chrome.tabs.create({
|
||||
active: true,
|
||||
url: url
|
||||
}, function (tab) {
|
||||
resolve(tab);
|
||||
});
|
||||
});
|
||||
}
|
||||
4
scripts/jquery.min.js
vendored
Executable file
4
scripts/jquery.min.js
vendored
Executable file
File diff suppressed because one or more lines are too long
53
scripts/tools.js
Normal file
53
scripts/tools.js
Normal file
@ -0,0 +1,53 @@
|
||||
function formatCSV(data) {
|
||||
return data.reduce(
|
||||
(csv, lineCells) => {
|
||||
let line = lineCells.reduce(
|
||||
(lineText, cell, idx) => {
|
||||
cell = '"' + cell.trim().replace(/"/g, '""') + '"';
|
||||
return lineText + cell + (idx == lineCells.length - 1 ? "" : ",")
|
||||
}, "");
|
||||
return csv + line + "\n";
|
||||
},
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
function saveFile(data, mimeType, fileName) {
|
||||
fileName = fileName || document.title || "result";
|
||||
var blob;
|
||||
if (typeof window.Blob == "function") {
|
||||
blob = new Blob([data], {
|
||||
type: mimeType
|
||||
})
|
||||
} else {
|
||||
var BlobBuiler = window.BlobBuilder || window.MozBlobBuilder || window.WebKitBlobBuilder || window.MSBlobBuilder;
|
||||
var builer = new BlobBuiler();
|
||||
builer.append(data);
|
||||
blob = builer.getBlob(mimeType)
|
||||
}
|
||||
var URL = window.URL || window.webkitURL;
|
||||
var url = URL.createObjectURL(blob);
|
||||
var link = document.createElement("a");
|
||||
if ('download' in link) {
|
||||
link.style.visibility = "hidden";
|
||||
link.href = url;
|
||||
link.download = fileName;
|
||||
document.body.appendChild(link);
|
||||
var j = document.createEvent("MouseEvents");
|
||||
j.initEvent("click", true, true);
|
||||
link.dispatchEvent(j);
|
||||
document.body.removeChild(link)
|
||||
} else if (navigator.msSaveBlob) {
|
||||
navigator.msSaveBlob(blob, fileName)
|
||||
} else {
|
||||
location.href = url
|
||||
}
|
||||
}
|
||||
|
||||
function saveFileAsk(data) {
|
||||
let csv = formatCSV(data.slice(1, 50)).trim() || "- Empty -";
|
||||
if (confirm(`Click confirm to download if the sample data looks good (${data.length-1} items):\n\n${csv}`)) {
|
||||
csv = formatCSV(data);
|
||||
saveFile(csv, "text/csv");
|
||||
}
|
||||
}
|
||||
7
styles/bootstrap.min.css
vendored
Normal file
7
styles/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
7
styles/bootstrap.min.js
vendored
Normal file
7
styles/bootstrap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user