4 https://github.com/mholt/PapaParse
6 (function(root, factory)
8 if (typeof define === 'function' && define.amd)
10 // AMD. Register as an anonymous module.
13 else if (typeof module === 'object' && module.exports)
15 // Node. Does not work with strict CommonJS, but
16 // only CommonJS-like environments that support module.exports,
18 module.exports = factory();
22 // Browser globals (root is window)
23 root.Papa = factory();
29 var global = (function () {
30 // alternative method, similar to `Function('return this')()`
31 // but without using `eval` (which is disabled when
32 // using Content Security Policy).
34 if (typeof self !== 'undefined') { return self; }
35 if (typeof window !== 'undefined') { return window; }
36 if (typeof global !== 'undefined') { return global; }
38 // When running tests none of the above have been defined
43 var IS_WORKER = !global.document && !!global.postMessage,
44 IS_PAPA_WORKER = IS_WORKER && /(\?|&)papaworker(=|&|$)/.test(global.location.search),
45 LOADED_SYNC = false, AUTO_SCRIPT_PATH;
46 var workers = {}, workerIdCounter = 0;
50 Papa.parse = CsvToJson;
51 Papa.unparse = JsonToCsv;
53 Papa.RECORD_SEP = String.fromCharCode(30);
54 Papa.UNIT_SEP = String.fromCharCode(31);
55 Papa.BYTE_ORDER_MARK = '\ufeff';
56 Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
57 Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker;
58 Papa.SCRIPT_PATH = null; // Must be set by your code if you use workers and this lib is loaded asynchronously
60 // Configurable chunk sizes for local and remote files, respectively
61 Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
62 Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
63 Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
65 // Exposed for testing and development only
67 Papa.ParserHandle = ParserHandle;
68 Papa.NetworkStreamer = NetworkStreamer;
69 Papa.FileStreamer = FileStreamer;
70 Papa.StringStreamer = StringStreamer;
74 var $ = global.jQuery;
75 $.fn.parse = function(options)
77 var config = options.config || {};
80 this.each(function(idx)
82 var supported = $(this).prop('tagName').toUpperCase() === 'INPUT'
83 && $(this).attr('type').toLowerCase() === 'file'
86 if (!supported || !this.files || this.files.length === 0)
87 return true; // continue to next input element
89 for (var i = 0; i < this.files.length; i++)
94 instanceConfig: $.extend({}, config)
99 parseNextFile(); // begin parsing
100 return this; // maintains chainability
103 function parseNextFile()
105 if (queue.length === 0)
107 if (isFunction(options.complete))
114 if (isFunction(options.before))
116 var returned = options.before(f.file, f.inputElem);
118 if (typeof returned === 'object')
120 if (returned.action === 'abort')
122 error('AbortError', f.file, f.inputElem, returned.reason);
123 return; // Aborts all queued files immediately
125 else if (returned.action === 'skip')
127 fileComplete(); // parse the next file in the queue, if any
130 else if (typeof returned.config === 'object')
131 f.instanceConfig = $.extend(f.instanceConfig, returned.config);
133 else if (returned === 'skip')
135 fileComplete(); // parse the next file in the queue, if any
140 // Wrap up the user's complete callback, if any, so that ours also gets executed
141 var userCompleteFunc = f.instanceConfig.complete;
142 f.instanceConfig.complete = function(results)
144 if (isFunction(userCompleteFunc))
145 userCompleteFunc(results, f.file, f.inputElem);
149 Papa.parse(f.file, f.instanceConfig);
152 function error(name, file, elem, reason)
154 if (isFunction(options.error))
155 options.error({name: name}, file, elem, reason);
158 function fileComplete()
169 global.onmessage = workerThreadReceivedMessage;
171 else if (Papa.WORKERS_SUPPORTED)
173 AUTO_SCRIPT_PATH = getScriptPath();
175 // Check if the script was loaded synchronously
178 // Body doesn't exist yet, must be synchronous
183 document.addEventListener('DOMContentLoaded', function () {
192 function CsvToJson(_input, _config)
194 _config = _config || {};
195 _config.dynamicTyping = _config.dynamicTyping || false;
197 if (_config.worker && Papa.WORKERS_SUPPORTED)
201 w.userStep = _config.step;
202 w.userChunk = _config.chunk;
203 w.userComplete = _config.complete;
204 w.userError = _config.error;
206 _config.step = isFunction(_config.step);
207 _config.chunk = isFunction(_config.chunk);
208 _config.complete = isFunction(_config.complete);
209 _config.error = isFunction(_config.error);
210 delete _config.worker; // prevent infinite loop
222 if (typeof _input === 'string')
224 if (_config.download)
225 streamer = new NetworkStreamer(_config);
227 streamer = new StringStreamer(_config);
229 else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106)
230 streamer = new FileStreamer(_config);
232 return streamer.stream(_input);
240 function JsonToCsv(_input, _config)
245 // Default configuration
247 /** whether to surround every datum with quotes */
250 /** whether to write headers */
251 var _writeHeader = true;
253 /** delimiting character */
254 var _delimiter = ',';
256 /** newline character(s) */
257 var _newline = '\r\n';
259 /** quote character */
260 var _quoteChar = '"';
264 var quoteCharRegex = new RegExp(_quoteChar, 'g');
266 if (typeof _input === 'string')
267 _input = JSON.parse(_input);
269 if (_input instanceof Array)
271 if (!_input.length || _input[0] instanceof Array)
272 return serialize(null, _input);
273 else if (typeof _input[0] === 'object')
274 return serialize(objectKeys(_input[0]), _input);
276 else if (typeof _input === 'object')
278 if (typeof _input.data === 'string')
279 _input.data = JSON.parse(_input.data);
281 if (_input.data instanceof Array)
284 _input.fields = _input.meta && _input.meta.fields;
287 _input.fields = _input.data[0] instanceof Array
289 : objectKeys(_input.data[0]);
291 if (!(_input.data[0] instanceof Array) && typeof _input.data[0] !== 'object')
292 _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
295 return serialize(_input.fields || [], _input.data || []);
298 // Default (any valid paths should return before this)
299 throw 'exception: Unable to serialize unrecognized input';
302 function unpackConfig()
304 if (typeof _config !== 'object')
307 if (typeof _config.delimiter === 'string'
308 && _config.delimiter.length === 1
309 && Papa.BAD_DELIMITERS.indexOf(_config.delimiter) === -1)
311 _delimiter = _config.delimiter;
314 if (typeof _config.quotes === 'boolean'
315 || _config.quotes instanceof Array)
316 _quotes = _config.quotes;
318 if (typeof _config.newline === 'string')
319 _newline = _config.newline;
321 if (typeof _config.quoteChar === 'string')
322 _quoteChar = _config.quoteChar;
324 if (typeof _config.header === 'boolean')
325 _writeHeader = _config.header;
329 /** Turns an object's keys into an array */
330 function objectKeys(obj)
332 if (typeof obj !== 'object')
340 /** The double for loop that iterates the data and writes out a CSV string including header row */
341 function serialize(fields, data)
345 if (typeof fields === 'string')
346 fields = JSON.parse(fields);
347 if (typeof data === 'string')
348 data = JSON.parse(data);
350 var hasHeader = fields instanceof Array && fields.length > 0;
351 var dataKeyedByField = !(data[0] instanceof Array);
353 // If there a header row, write it first
354 if (hasHeader && _writeHeader)
356 for (var i = 0; i < fields.length; i++)
360 csv += safe(fields[i], i);
366 // Then write out the data
367 for (var row = 0; row < data.length; row++)
369 var maxCol = hasHeader ? fields.length : data[row].length;
371 for (var col = 0; col < maxCol; col++)
375 var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
376 csv += safe(data[row][colIdx], col);
379 if (row < data.length - 1)
386 /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
387 function safe(str, col)
389 if (typeof str === 'undefined' || str === null)
392 str = str.toString().replace(quoteCharRegex, _quoteChar+_quoteChar);
394 var needsQuotes = (typeof _quotes === 'boolean' && _quotes)
395 || (_quotes instanceof Array && _quotes[col])
396 || hasAny(str, Papa.BAD_DELIMITERS)
397 || str.indexOf(_delimiter) > -1
398 || str.charAt(0) === ' '
399 || str.charAt(str.length - 1) === ' ';
401 return needsQuotes ? _quoteChar + str + _quoteChar : str;
404 function hasAny(str, substrings)
406 for (var i = 0; i < substrings.length; i++)
407 if (str.indexOf(substrings[i]) > -1)
413 /** ChunkStreamer is the base prototype for various streamer implementations. */
414 function ChunkStreamer(config)
417 this._paused = false;
418 this._finished = false;
421 this._partialLine = '';
424 this._nextChunk = null;
425 this.isFirstChunk = true;
426 this._completeResults = {
431 replaceConfig.call(this, config);
433 this.parseChunk = function(chunk)
435 // First chunk pre-processing
436 if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk))
438 var modifiedChunk = this._config.beforeFirstChunk(chunk);
439 if (modifiedChunk !== undefined)
440 chunk = modifiedChunk;
442 this.isFirstChunk = false;
444 // Rejoin the line we likely just split in two by chunking the file
445 var aggregate = this._partialLine + chunk;
446 this._partialLine = '';
448 var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
450 if (this._handle.paused() || this._handle.aborted())
453 var lastIndex = results.meta.cursor;
457 this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
458 this._baseIndex = lastIndex;
461 if (results && results.data)
462 this._rowCount += results.data.length;
464 var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
470 workerId: Papa.WORKER_ID,
471 finished: finishedIncludingPreview
474 else if (isFunction(this._config.chunk))
476 this._config.chunk(results, this._handle);
480 this._completeResults = undefined;
483 if (!this._config.step && !this._config.chunk) {
484 this._completeResults.data = this._completeResults.data.concat(results.data);
485 this._completeResults.errors = this._completeResults.errors.concat(results.errors);
486 this._completeResults.meta = results.meta;
489 if (finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted))
490 this._config.complete(this._completeResults, this._input);
492 if (!finishedIncludingPreview && (!results || !results.meta.paused))
498 this._sendError = function(error)
500 if (isFunction(this._config.error))
501 this._config.error(error);
502 else if (IS_PAPA_WORKER && this._config.error)
505 workerId: Papa.WORKER_ID,
512 function replaceConfig(config)
514 // Deep-copy the config so we can edit it
515 var configCopy = copy(config);
516 configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
517 if (!config.step && !config.chunk)
518 configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
519 this._handle = new ParserHandle(configCopy);
520 this._handle.streamer = this;
521 this._config = configCopy; // persist the copy to the caller
526 function NetworkStreamer(config)
528 config = config || {};
529 if (!config.chunkSize)
530 config.chunkSize = Papa.RemoteChunkSize;
531 ChunkStreamer.call(this, config);
537 this._nextChunk = function()
545 this._nextChunk = function()
551 this.stream = function(url)
554 this._nextChunk(); // Starts streaming
557 this._readChunk = function()
565 xhr = new XMLHttpRequest();
567 if (this._config.withCredentials)
569 xhr.withCredentials = this._config.withCredentials;
574 xhr.onload = bindFunction(this._chunkLoaded, this);
575 xhr.onerror = bindFunction(this._chunkError, this);
578 xhr.open('GET', this._input, !IS_WORKER);
580 if (this._config.chunkSize)
582 var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
583 xhr.setRequestHeader('Range', 'bytes='+this._start+'-'+end);
584 xhr.setRequestHeader('If-None-Match', 'webkit-no-cache'); // https://bugs.webkit.org/show_bug.cgi?id=82672
591 this._chunkError(err.message);
594 if (IS_WORKER && xhr.status === 0)
597 this._start += this._config.chunkSize;
600 this._chunkLoaded = function()
602 if (xhr.readyState != 4)
605 if (xhr.status < 200 || xhr.status >= 400)
611 this._finished = !this._config.chunkSize || this._start > getFileSize(xhr);
612 this.parseChunk(xhr.responseText);
615 this._chunkError = function(errorMessage)
617 var errorText = xhr.statusText || errorMessage;
618 this._sendError(errorText);
621 function getFileSize(xhr)
623 var contentRange = xhr.getResponseHeader('Content-Range');
624 if (contentRange === null) { // no content range, then finish!
627 return parseInt(contentRange.substr(contentRange.lastIndexOf('/') + 1));
630 NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
631 NetworkStreamer.prototype.constructor = NetworkStreamer;
634 function FileStreamer(config)
636 config = config || {};
637 if (!config.chunkSize)
638 config.chunkSize = Papa.LocalChunkSize;
639 ChunkStreamer.call(this, config);
643 // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
644 // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
645 var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105
647 this.stream = function(file)
650 slice = file.slice || file.webkitSlice || file.mozSlice;
652 if (usingAsyncReader)
654 reader = new FileReader(); // Preferred method of reading files, even in workers
655 reader.onload = bindFunction(this._chunkLoaded, this);
656 reader.onerror = bindFunction(this._chunkError, this);
659 reader = new FileReaderSync(); // Hack for running in a web worker in Firefox
661 this._nextChunk(); // Starts streaming
664 this._nextChunk = function()
666 if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview))
670 this._readChunk = function()
672 var input = this._input;
673 if (this._config.chunkSize)
675 var end = Math.min(this._start + this._config.chunkSize, this._input.size);
676 input = slice.call(input, this._start, end);
678 var txt = reader.readAsText(input, this._config.encoding);
679 if (!usingAsyncReader)
680 this._chunkLoaded({ target: { result: txt } }); // mimic the async signature
683 this._chunkLoaded = function(event)
685 // Very important to increment start each time before handling results
686 this._start += this._config.chunkSize;
687 this._finished = !this._config.chunkSize || this._start >= this._input.size;
688 this.parseChunk(event.target.result);
691 this._chunkError = function()
693 this._sendError(reader.error);
697 FileStreamer.prototype = Object.create(ChunkStreamer.prototype);
698 FileStreamer.prototype.constructor = FileStreamer;
701 function StringStreamer(config)
703 config = config || {};
704 ChunkStreamer.call(this, config);
708 this.stream = function(s)
712 return this._nextChunk();
714 this._nextChunk = function()
716 if (this._finished) return;
717 var size = this._config.chunkSize;
718 var chunk = size ? remaining.substr(0, size) : remaining;
719 remaining = size ? remaining.substr(size) : '';
720 this._finished = !remaining;
721 return this.parseChunk(chunk);
724 StringStreamer.prototype = Object.create(StringStreamer.prototype);
725 StringStreamer.prototype.constructor = StringStreamer;
729 // Use one ParserHandle per entire CSV file or string
730 function ParserHandle(_config)
732 // One goal is to minimize the use of regular expressions...
733 var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
736 var _stepCounter = 0; // Number of times step was called (number of rows parsed)
737 var _input; // The input being parsed
738 var _parser; // The core parser being used
739 var _paused = false; // Whether we are paused or not
740 var _aborted = false; // Whether the parser has aborted or not
741 var _delimiterError; // Temporary state between delimiter detection and processing results
742 var _fields = []; // Fields are from the header row of the input, if there is one
743 var _results = { // The last results returned from the parser
749 if (isFunction(_config.step))
751 var userStep = _config.step;
752 _config.step = function(results)
756 if (needsHeaderRow())
758 else // only call user's step function after header row
762 // It's possbile that this line was empty and there's no row here after all
763 if (_results.data.length === 0)
766 _stepCounter += results.data.length;
767 if (_config.preview && _stepCounter > _config.preview)
770 userStep(_results, self);
776 * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
777 * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
778 * when an input comes in multiple chunks, like from a file.
780 this.parse = function(input, baseIndex, ignoreLastRow)
782 if (!_config.newline)
783 _config.newline = guessLineEndings(input);
785 _delimiterError = false;
786 if (!_config.delimiter)
788 var delimGuess = guessDelimiter(input, _config.newline);
789 if (delimGuess.successful)
790 _config.delimiter = delimGuess.bestDelimiter;
793 _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
794 _config.delimiter = Papa.DefaultDelimiter;
796 _results.meta.delimiter = _config.delimiter;
798 else if(typeof _config.delimiter === 'function')
800 _config.delimiter = _config.delimiter(input);
801 _results.meta.delimiter = _config.delimiter;
804 var parserConfig = copy(_config);
805 if (_config.preview && _config.header)
806 parserConfig.preview++; // to compensate for header row
809 _parser = new Parser(parserConfig);
810 _results = _parser.parse(_input, baseIndex, ignoreLastRow);
812 return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
815 this.paused = function()
820 this.pause = function()
824 _input = _input.substr(_parser.getCharIndex());
827 this.resume = function()
830 self.streamer.parseChunk(_input);
833 this.aborted = function ()
838 this.abort = function()
842 _results.meta.aborted = true;
843 if (isFunction(_config.complete))
844 _config.complete(_results);
848 function processResults()
850 if (_results && _delimiterError)
852 addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \''+Papa.DefaultDelimiter+'\'');
853 _delimiterError = false;
856 if (_config.skipEmptyLines)
858 for (var i = 0; i < _results.data.length; i++)
859 if (_results.data[i].length === 1 && _results.data[i][0] === '')
860 _results.data.splice(i--, 1);
863 if (needsHeaderRow())
866 return applyHeaderAndDynamicTyping();
869 function needsHeaderRow()
871 return _config.header && _fields.length === 0;
874 function fillHeaderFields()
878 for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
879 for (var j = 0; j < _results.data[i].length; j++)
880 _fields.push(_results.data[i][j]);
881 _results.data.splice(0, 1);
884 function parseDynamic(field, value)
886 if ((_config.dynamicTyping[field] || _config.dynamicTyping) === true)
888 if (value === 'true' || value === 'TRUE')
890 else if (value === 'false' || value === 'FALSE')
893 return tryParseFloat(value);
898 function applyHeaderAndDynamicTyping()
900 if (!_results || (!_config.header && !_config.dynamicTyping))
903 for (var i = 0; i < _results.data.length; i++)
905 var row = _config.header ? {} : [];
907 for (var j = 0; j < _results.data[i].length; j++)
910 var value = _results.data[i][j];
913 field = j >= _fields.length ? '__parsed_extra' : _fields[j];
915 value = parseDynamic(field, value);
917 if (field === '__parsed_extra')
919 row[field] = row[field] || [];
920 row[field].push(value);
926 _results.data[i] = row;
930 if (j > _fields.length)
931 addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, i);
932 else if (j < _fields.length)
933 addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, i);
937 if (_config.header && _results.meta)
938 _results.meta.fields = _fields;
942 function guessDelimiter(input, newline)
944 var delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
945 var bestDelim, bestDelta, fieldCountPrevRow;
947 for (var i = 0; i < delimChoices.length; i++)
949 var delim = delimChoices[i];
950 var delta = 0, avgFieldCount = 0;
951 fieldCountPrevRow = undefined;
953 var preview = new Parser({
959 for (var j = 0; j < preview.data.length; j++)
961 var fieldCount = preview.data[j].length;
962 avgFieldCount += fieldCount;
964 if (typeof fieldCountPrevRow === 'undefined')
966 fieldCountPrevRow = fieldCount;
969 else if (fieldCount > 1)
971 delta += Math.abs(fieldCount - fieldCountPrevRow);
972 fieldCountPrevRow = fieldCount;
976 if (preview.data.length > 0)
977 avgFieldCount /= preview.data.length;
979 if ((typeof bestDelta === 'undefined' || delta < bestDelta)
980 && avgFieldCount > 1.99)
987 _config.delimiter = bestDelim;
990 successful: !!bestDelim,
991 bestDelimiter: bestDelim
995 function guessLineEndings(input)
997 input = input.substr(0, 1024*1024); // max length 1 MB
999 var r = input.split('\r');
1001 var n = input.split('\n');
1003 var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length);
1005 if (r.length === 1 || nAppearsFirst)
1009 for (var i = 0; i < r.length; i++)
1011 if (r[i][0] === '\n')
1015 return numWithN >= r.length / 2 ? '\r\n' : '\r';
1018 function tryParseFloat(val)
1020 var isNumber = FLOAT.test(val);
1021 return isNumber ? parseFloat(val) : val;
1024 function addError(type, code, msg, row)
1026 _results.errors.push({
1039 /** The core parser implements speedy and correct CSV parsing */
1040 function Parser(config)
1042 // Unpack the config object
1043 config = config || {};
1044 var delim = config.delimiter;
1045 var newline = config.newline;
1046 var comments = config.comments;
1047 var step = config.step;
1048 var preview = config.preview;
1049 var fastMode = config.fastMode;
1050 var quoteChar = config.quoteChar || '"';
1052 // Delimiter must be valid
1053 if (typeof delim !== 'string'
1054 || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
1057 // Comment character must be valid
1058 if (comments === delim)
1059 throw 'Comment character same as delimiter';
1060 else if (comments === true)
1062 else if (typeof comments !== 'string'
1063 || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
1066 // Newline must be valid: \r, \n, or \r\n
1067 if (newline != '\n' && newline != '\r' && newline != '\r\n')
1070 // We're gonna need these at the Parser scope
1072 var aborted = false;
1074 this.parse = function(input, baseIndex, ignoreLastRow)
1076 // For some reason, in Chrome, this speeds things up (!?)
1077 if (typeof input !== 'string')
1078 throw 'Input must be a string';
1080 // We don't need to compute some of these every time parse() is called,
1081 // but having them in a more local scope seems to perform better
1082 var inputLen = input.length,
1083 delimLen = delim.length,
1084 newlineLen = newline.length,
1085 commentsLen = comments.length;
1086 var stepIsFunction = typeof step === 'function';
1088 // Establish starting state
1090 var data = [], errors = [], row = [], lastCursor = 0;
1093 return returnable();
1095 if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
1097 var rows = input.split(newline);
1098 for (var i = 0; i < rows.length; i++)
1101 cursor += row.length;
1102 if (i !== rows.length - 1)
1103 cursor += newline.length;
1104 else if (ignoreLastRow)
1105 return returnable();
1106 if (comments && row.substr(0, commentsLen) === comments)
1111 pushRow(row.split(delim));
1114 return returnable();
1117 pushRow(row.split(delim));
1118 if (preview && i >= preview)
1120 data = data.slice(0, preview);
1121 return returnable(true);
1124 return returnable();
1127 var nextDelim = input.indexOf(delim, cursor);
1128 var nextNewline = input.indexOf(newline, cursor);
1129 var quoteCharRegex = new RegExp(quoteChar+quoteChar, 'g');
1134 // Field has opening quote
1135 if (input[cursor] === quoteChar)
1137 // Start our search for the closing quote where the cursor is
1138 var quoteSearch = cursor;
1140 // Skip the opening quote
1145 // Find closing quote
1146 var quoteSearch = input.indexOf(quoteChar, quoteSearch+1);
1148 if (quoteSearch === -1)
1150 if (!ignoreLastRow) {
1151 // No closing quote... what a pity
1154 code: 'MissingQuotes',
1155 message: 'Quoted field unterminated',
1156 row: data.length, // row has yet to be inserted
1163 if (quoteSearch === inputLen-1)
1165 // Closing quote at EOF
1166 var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
1167 return finish(value);
1170 // If this quote is escaped, it's part of the data; skip it
1171 if (input[quoteSearch+1] === quoteChar)
1177 if (input[quoteSearch+1] === delim)
1179 // Closing quote followed by delimiter
1180 row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
1181 cursor = quoteSearch + 1 + delimLen;
1182 nextDelim = input.indexOf(delim, cursor);
1183 nextNewline = input.indexOf(newline, cursor);
1187 if (input.substr(quoteSearch+1, newlineLen) === newline)
1189 // Closing quote followed by newline
1190 row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
1191 saveRow(quoteSearch + 1 + newlineLen);
1192 nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
1198 return returnable();
1201 if (preview && data.length >= preview)
1202 return returnable(true);
1211 // Comment found at start of new line
1212 if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments)
1214 if (nextNewline === -1) // Comment ends at EOF
1215 return returnable();
1216 cursor = nextNewline + newlineLen;
1217 nextNewline = input.indexOf(newline, cursor);
1218 nextDelim = input.indexOf(delim, cursor);
1222 // Next delimiter comes before next newline, so we've reached end of field
1223 if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1))
1225 row.push(input.substring(cursor, nextDelim));
1226 cursor = nextDelim + delimLen;
1227 nextDelim = input.indexOf(delim, cursor);
1232 if (nextNewline !== -1)
1234 row.push(input.substring(cursor, nextNewline));
1235 saveRow(nextNewline + newlineLen);
1241 return returnable();
1244 if (preview && data.length >= preview)
1245 return returnable(true);
1257 function pushRow(row)
1260 lastCursor = cursor;
1264 * Appends the remaining input from cursor to the end into
1265 * row, saves the row, calls step, and returns the results.
1267 function finish(value)
1270 return returnable();
1271 if (typeof value === 'undefined')
1272 value = input.substr(cursor);
1274 cursor = inputLen; // important in case parsing is paused
1278 return returnable();
1282 * Appends the current row to the results. It sets the cursor
1283 * to newCursor and finds the nextNewline. The caller should
1284 * take care to execute user's step function and check for
1285 * preview and end parsing if necessary.
1287 function saveRow(newCursor)
1292 nextNewline = input.indexOf(newline, cursor);
1295 /** Returns an object with the results, errors, and meta. */
1296 function returnable(stopped)
1305 truncated: !!stopped,
1306 cursor: lastCursor + (baseIndex || 0)
1311 /** Executes the user's step function and resets data & errors. */
1315 data = [], errors = [];
1319 /** Sets the abort flag */
1320 this.abort = function()
1325 /** Gets the cursor position */
1326 this.getCharIndex = function()
1333 // If you need to load Papa Parse asynchronously and you also need worker threads, hard-code
1334 // the script path here. See: https://github.com/mholt/PapaParse/issues/87#issuecomment-57885358
1335 function getScriptPath()
1337 var scripts = document.getElementsByTagName('script');
1338 return scripts.length ? scripts[scripts.length - 1].src : '';
1341 function newWorker()
1343 if (!Papa.WORKERS_SUPPORTED)
1345 if (!LOADED_SYNC && Papa.SCRIPT_PATH === null)
1347 'Script path cannot be determined automatically when Papa Parse is loaded asynchronously. ' +
1348 'You need to set Papa.SCRIPT_PATH manually.'
1350 var workerUrl = Papa.SCRIPT_PATH || AUTO_SCRIPT_PATH;
1351 // Append 'papaworker' to the search string to tell papaparse that this is our worker.
1352 workerUrl += (workerUrl.indexOf('?') !== -1 ? '&' : '?') + 'papaworker';
1353 var w = new global.Worker(workerUrl);
1354 w.onmessage = mainThreadReceivedMessage;
1355 w.id = workerIdCounter++;
1360 /** Callback when main thread receives a message */
1361 function mainThreadReceivedMessage(e)
1364 var worker = workers[msg.workerId];
1365 var aborted = false;
1368 worker.userError(msg.error, msg.file);
1369 else if (msg.results && msg.results.data)
1371 var abort = function() {
1373 completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } });
1378 pause: notImplemented,
1379 resume: notImplemented
1382 if (isFunction(worker.userStep))
1384 for (var i = 0; i < msg.results.data.length; i++)
1387 data: [msg.results.data[i]],
1388 errors: msg.results.errors,
1389 meta: msg.results.meta
1394 delete msg.results; // free memory ASAP
1396 else if (isFunction(worker.userChunk))
1398 worker.userChunk(msg.results, handle, msg.file);
1403 if (msg.finished && !aborted)
1404 completeWorker(msg.workerId, msg.results);
1407 function completeWorker(workerId, results) {
1408 var worker = workers[workerId];
1409 if (isFunction(worker.userComplete))
1410 worker.userComplete(results);
1412 delete workers[workerId];
1415 function notImplemented() {
1416 throw 'Not implemented.';
1419 /** Callback when worker thread receives a message */
1420 function workerThreadReceivedMessage(e)
1424 if (typeof Papa.WORKER_ID === 'undefined' && msg)
1425 Papa.WORKER_ID = msg.workerId;
1427 if (typeof msg.input === 'string')
1429 global.postMessage({
1430 workerId: Papa.WORKER_ID,
1431 results: Papa.parse(msg.input, msg.config),
1435 else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106)
1437 var results = Papa.parse(msg.input, msg.config);
1439 global.postMessage({
1440 workerId: Papa.WORKER_ID,
1447 /** Makes a deep copy of an array or object (mostly) */
1450 if (typeof obj !== 'object')
1452 var cpy = obj instanceof Array ? [] : {};
1453 for (var key in obj)
1454 cpy[key] = copy(obj[key]);
1458 function bindFunction(f, self)
1460 return function() { f.apply(self, arguments); };
1463 function isFunction(func)
1465 return typeof func === 'function';