diff --git a/manifest.json b/manifest.json index 4e345b2..e6dad09 100755 --- a/manifest.json +++ b/manifest.json @@ -16,9 +16,10 @@ }, "background": { "scripts": [ + "scripts/background.js", + "scripts/csv.js", "scripts/tools.js", - "scripts/extract.js", - "scripts/background.js" + "scripts/extract.js" ], "persistent": false }, @@ -26,7 +27,6 @@ "matches": ["*://*/*"], "js": [ "scripts/jquery.min.js", - "scripts/tools.js", "scripts/content.js" ], "run_at": "document_idle" diff --git a/scripts/content.js b/scripts/content.js index 10b432f..5a07bf5 100644 --- a/scripts/content.js +++ b/scripts/content.js @@ -79,12 +79,7 @@ function extract(...args) { console.log(sig); return; } - if (args.length == 2) { - let data = extractTabData(args[0], args[1]); - data.unshift(args[1]); - saveFileAsk(data); - return; - } + let message = { from: "DataExtracter:Extract", args: args diff --git a/scripts/csv.js b/scripts/csv.js new file mode 100644 index 0000000..87a6ab0 --- /dev/null +++ b/scripts/csv.js @@ -0,0 +1,31 @@ +class CSV { + constructor(data) { + this._data = data || []; + + } + row(index) { + return this._data[index]; + } + column(index) { + return new [...Array(this._data.length).keys].map( + i => this._data[i, index] + ); + } + get data() { + return this._data; + } + toString(rowsCount) { + let data = rowsCount > 0 ? this._data.slice(0, rowsCount) : this._data; + return data.slice().reduce( + (csv, lineCells) => { + let line = lineCells.reduce( + (lineText, cell, idx) => { + cell = '"' + cell.trim().replace(/"/g, '""') + '"'; + return lineText + cell + (idx == lineCells.length - 1 ? "" : ",") + }, ""); + return csv + line + "\n"; + }, + "" + ); + } +} \ No newline at end of file diff --git a/scripts/extract.js b/scripts/extract.js index 6698a72..ff5bb14 100644 --- a/scripts/extract.js +++ b/scripts/extract.js @@ -5,9 +5,12 @@ * @param {...any} args url list / url templates, page numers, either [from, to, interval] or [...pages] */ async function extract(itemsSelector, fieldSelectors, ...args) { - let data = await getData(itemsSelector, fieldSelectors, ...args); - data.unshift(fieldSelectors); - saveFileAsk(data); + let result = await getData(itemsSelector, fieldSelectors, ...args); + if (confirm( + `Click confirm to download if the sample data looks good (${result.data.length} items):\n\n${result.toString(50) || "- Empty -"}` + )) { + saveFile(result, "text/csv"); + } } /** @@ -18,53 +21,51 @@ async function extract(itemsSelector, fieldSelectors, ...args) { */ async function getData(itemsSelector, fieldSelectors, ...args) { let urls = []; - let arg = args.shift(); - if (arg instanceof Array) { - urls = arg; - } else { - let urlTempl = arg; - if (urlTempl) { - if (args[0] instanceof Array) { - urls = args[0].map(p => urlTempl.replace("${page}", p)); - } else if (args.length >= 3) { - let from = args.shift(); - let to = args.shift(); - let interval = args.shift(); - for (let i = from; i <= to; i += interval) { - urls.push(urlTempl.replace("${page}", i)); + if (args.length) { + let arg = args.shift(); + if (arg instanceof Array) { + urls = arg; + } else { + let urlTempl = arg; + if (urlTempl) { + if (args[0] instanceof Array) { + urls = args[0].map(p => urlTempl.replace("${page}", p)); + } else if (args.length >= 3) { + let from = args.shift(); + let to = args.shift(); + let interval = args.shift(); + for (let i = from; i <= to; i += interval) { + urls.push(urlTempl.replace("${page}", i)); + } } } } } let data = []; + let tab = await getActiveTab(true) || await getActiveTab(false); + if (!tab) throw new Error("Cannot find active tab."); return new Promise((resolve, reject) => { - chrome.tabs.query({ - active: true, - currentWindow: false - }, function (tabs) { - let pms; - let tab = tabs[0]; - if (urls.length) { - pms = urls.reduce((p, url) => p.then( - results => { - data.push(...results); - return redirectTab(tab, url).then( - () => extractTabData(tab, itemsSelector, fieldSelectors) - ); - }, - () => p - ), Promise.resolve([])); - } else { - pms = extractTabData(tab, itemsSelector, fieldSelectors); - } - pms.then( + let pms; + if (urls.length) { + pms = urls.reduce((p, url) => p.then( results => { data.push(...results); - resolve(data); + return redirectTab(tab, url).then( + () => extractTabData(tab, itemsSelector, fieldSelectors) + ); }, - err => reject(err) - ); - }); + () => p + ), Promise.resolve([])); + } else { + pms = extractTabData(tab, itemsSelector, fieldSelectors); + } + pms.then( + results => { + data.push(...results); + resolve(new CSV(data)); + }, + err => reject(err) + ); }); } @@ -163,4 +164,15 @@ function sendMessage(tab, req, cond, failedTimeOut, detectInterval) { }); } }); +} + +async function getActiveTab(currentWindow) { + return new Promise((resolve, reject) => { + chrome.tabs.query({ + active: true, + currentWindow: currentWindow + }, function (tabs) { + resolve(tabs[0]); + }) + }) } \ No newline at end of file diff --git a/scripts/tools.js b/scripts/tools.js index ee39187..9eadea2 100644 --- a/scripts/tools.js +++ b/scripts/tools.js @@ -1,17 +1,3 @@ -function formatCSV(data) { - return data.reduce( - (csv, lineCells) => { - let line = lineCells.reduce( - (lineText, cell, idx) => { - cell = '"' + cell.trim().replace(/"/g, '""') + '"'; - return lineText + cell + (idx == lineCells.length - 1 ? "" : ",") - }, ""); - return csv + line + "\n"; - }, - "" - ); -} - function saveFile(data, mimeType, fileName) { fileName = fileName || document.title || "result"; var blob; @@ -42,12 +28,4 @@ function saveFile(data, mimeType, fileName) { } else { location.href = url } -} - -function saveFileAsk(data) { - let csv = formatCSV(data.slice(1, 50)).trim() || "- Empty -"; - if (confirm(`Click confirm to download if the sample data looks good (${data.length-1} items):\n\n${csv}`)) { - csv = formatCSV(data); - saveFile(csv, "text/csv"); - } } \ No newline at end of file