improve csv parsing code

This commit is contained in:
Max Ogden
2011-08-31 19:56:56 -04:00
parent 48dbdf58f5
commit 935e0d15a0
2 changed files with 66 additions and 23 deletions

View File

@@ -1,19 +1,56 @@
importScripts('lib/underscore.js'); importScripts('lib/underscore.js');
onmessage = function(message) { onmessage = function(message) {
var rows = message.data.data.split('\n');
var docs = []; function parseCSV(rawCSV) {
_.each(rows, function(row) { var patterns = new RegExp((
if (row.length == 0) return; // Delimiters.
postMessage(JSON.stringify({size: rows.length})); "(\\,|\\r?\\n|\\r|^)" +
var doc = {}; // Quoted fields.
_.each(row.split(','), function(field, index) { doc['field' + index] = field }); "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
docs.push(doc); // Standard fields.
}) "([^\"\\,\\r\\n]*))"
), "gi");
var rows = [[]], matches = null;
while (matches = patterns.exec(rawCSV)) {
var delimiter = matches[1];
if (delimiter.length && (delimiter !== ",")) rows.push([]);
if (matches[2]) {
var value = matches[2].replace(new RegExp("\"\"", "g"), "\"");
} else {
var value = matches[3];
}
rows[rows.length - 1].push(value);
}
if(_.isEqual(rows[rows.length -1], [""])) rows.pop();
var docs = [];
var headers = _.first(rows);
_.each(_.rest(rows), function(row, rowIDX) {
var doc = {};
_.each(row, function(cell, idx) {
doc[headers[idx]] = cell;
})
docs.push(doc);
})
return docs;
}
var docs = parseCSV(message.data.data);
var req = new XMLHttpRequest(); var req = new XMLHttpRequest();
req.onreadystatechange = function() {
if (req.readyState == 4) postMessage(JSON.stringify({done: true})) req.onprogress = req.upload.onprogress = function(e) {
if(e.lengthComputable) postMessage(JSON.stringify({ percent: (e.loaded / e.total) * 100 }));
}; };
req.onreadystatechange = function() { if (req.readyState == 4) postMessage(JSON.stringify( {done: true} )) };
req.open('POST', message.data.url); req.open('POST', message.data.url);
req.setRequestHeader('Content-Type', 'application/json'); req.setRequestHeader('Content-Type', 'application/json');
req.send(JSON.stringify({docs: docs})); req.send(JSON.stringify({docs: docs}));

View File

@@ -120,18 +120,24 @@ var costco = function() {
}; };
var worker = new Worker('script/costco-csv-worker.js'); var worker = new Worker('script/costco-csv-worker.js');
worker.onmessage = function(message) { worker.onmessage = function(message) {
message = JSON.parse(message.data); message = JSON.parse(message.data);
if(message.done) { console.log(message)
util.hide('dialog');
util.notify("Data uploaded successfully!"); if (message.done) {
recline.initializeTable(app.offset); util.hide('dialog');
} else if (message.size) { util.notify("Data uploaded successfully!");
util.notify("Processing " + message.size + " rows. This could take a while...", {persist: true, loader: true}); recline.initializeTable(app.offset);
} else { } else if (message.percent) {
util.notify(JSON.stringify(message)); if (message.percent === 100) {
} util.notify("Waiting for CouchDB...", {persist: true, loader: true})
}; } else {
worker.postMessage(payload); util.notify("Uploading... " + message.percent + "%");
}
} else {
util.notify(JSON.stringify(message));
}
};
worker.postMessage(payload);
}; };
} else { } else {
util.notify('File not selected. Please try again'); util.notify('File not selected. Please try again');