// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package csv import ( "io" "reflect" "strings" "testing" "unicode/utf8" ) func TestRead(t *testing.T) { tests := []struct { Name string Input string Output [][]string Error error // These fields are copied into the Reader Comma rune Comment rune UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 FieldsPerRecord int LazyQuotes bool TrimLeadingSpace bool ReuseRecord bool }{{ Name: "Simple", Input: "a,b,c\n", Output: [][]string{{"a", "b", "c"}}, }, { Name: "CRLF", Input: "a,b\r\nc,d\r\n", Output: [][]string{{"a", "b"}, {"c", "d"}}, }, { Name: "BareCR", Input: "a,b\rc,d\r\n", Output: [][]string{{"a", "b\rc", "d"}}, }, { Name: "RFC4180test", Input: `#field1,field2,field3 "aaa","bb b","ccc" "a,a","b""bb","ccc" zzz,yyy,xxx `, Output: [][]string{ {"#field1", "field2", "field3"}, {"aaa", "bb\nb", "ccc"}, {"a,a", `b"bb`, "ccc"}, {"zzz", "yyy", "xxx"}, }, UseFieldsPerRecord: true, FieldsPerRecord: 0, }, { Name: "NoEOLTest", Input: "a,b,c", Output: [][]string{{"a", "b", "c"}}, }, { Name: "Semicolon", Input: "a;b;c\n", Output: [][]string{{"a", "b", "c"}}, Comma: ';', }, { Name: "MultiLine", Input: `"two line","one line","three line field"`, Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, }, { Name: "BlankLine", Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, }, { Name: "BlankLineFieldCount", Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, UseFieldsPerRecord: true, FieldsPerRecord: 0, }, { Name: "TrimSpace", Input: " a, b, c\n", Output: [][]string{{"a", "b", "c"}}, TrimLeadingSpace: true, }, { Name: "LeadingSpace", Input: " a, b, c\n", Output: [][]string{{" a", " b", " c"}}, }, { Name: "Comment", Input: "#1,2,3\na,b,c\n#comment", Output: [][]string{{"a", "b", "c"}}, Comment: '#', }, { Name: "NoComment", Input: "#1,2,3\na,b,c", Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, }, { Name: "LazyQuotes", Input: `a "word","1"2",a","b`, Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, LazyQuotes: true, }, { Name: "BareQuotes", Input: `a "word","1"2",a"`, Output: [][]string{{`a "word"`, `1"2`, `a"`}}, LazyQuotes: true, }, { Name: "BareDoubleQuotes", Input: `a""b,c`, Output: [][]string{{`a""b`, `c`}}, LazyQuotes: true, }, { Name: "BadDoubleQuotes", Input: `a""b,c`, Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, }, { Name: "TrimQuote", Input: ` "a"," b",c`, Output: [][]string{{"a", " b", "c"}}, TrimLeadingSpace: true, }, { Name: "BadBareQuote", Input: `a "word","b"`, Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, }, { Name: "BadTrailingQuote", Input: `"a word",b"`, Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, }, { Name: "ExtraneousQuote", Input: `"a "word","b"`, Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, }, { Name: "BadFieldCount", Input: "a,b,c\nd,e", Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount}, UseFieldsPerRecord: true, FieldsPerRecord: 0, }, { Name: "BadFieldCount1", Input: `a,b,c`, Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, UseFieldsPerRecord: true, FieldsPerRecord: 2, }, { Name: "FieldCount", Input: "a,b,c\nd,e", Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, }, { Name: "TrailingCommaEOF", Input: "a,b,c,", Output: [][]string{{"a", "b", "c", ""}}, }, { Name: "TrailingCommaEOL", Input: "a,b,c,\n", Output: [][]string{{"a", "b", "c", ""}}, }, { Name: "TrailingCommaSpaceEOF", Input: "a,b,c, ", Output: [][]string{{"a", "b", "c", ""}}, TrimLeadingSpace: true, }, { Name: "TrailingCommaSpaceEOL", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", ""}}, TrimLeadingSpace: true, }, { Name: "TrailingCommaLine3", Input: "a,b,c\nd,e,f\ng,hi,", Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, TrimLeadingSpace: true, }, { Name: "NotTrailingComma3", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", " "}}, }, { Name: "CommaFieldTest", Input: `x,y,z,w x,y,z, x,y,, x,,, ,,, "x","y","z","w" "x","y","z","" "x","y","","" "x","","","" "","","","" `, Output: [][]string{ {"x", "y", "z", "w"}, {"x", "y", "z", ""}, {"x", "y", "", ""}, {"x", "", "", ""}, {"", "", "", ""}, {"x", "y", "z", "w"}, {"x", "y", "z", ""}, {"x", "y", "", ""}, {"x", "", "", ""}, {"", "", "", ""}, }, }, { Name: "TrailingCommaIneffective1", Input: "a,b,\nc,d,e", Output: [][]string{ {"a", "b", ""}, {"c", "d", "e"}, }, TrimLeadingSpace: true, }, { Name: "ReadAllReuseRecord", Input: "a,b\nc,d", Output: [][]string{ {"a", "b"}, {"c", "d"}, }, ReuseRecord: true, }, { Name: "StartLine1", // Issue 19019 Input: "a,\"b\nc\"d,e", Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, }, { Name: "StartLine2", Input: "a,b\n\"d\n\n,e", Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, }, { Name: "CRLFInQuotedField", // Issue 21201 Input: "A,\"Hello\r\nHi\",B\r\n", Output: [][]string{ {"A", "Hello\nHi", "B"}, }, }, { Name: "BinaryBlobField", // Issue 19410 Input: "x09\x41\xb4\x1c,aktau", Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, }, { Name: "TrailingCR", Input: "field1,field2\r", Output: [][]string{{"field1", "field2"}}, }, { Name: "QuotedTrailingCR", Input: "\"field\"\r", Output: [][]string{{"field"}}, }, { Name: "QuotedTrailingCRCR", Input: "\"field\"\r\r", Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, }, { Name: "FieldCR", Input: "field\rfield\r", Output: [][]string{{"field\rfield"}}, }, { Name: "FieldCRCR", Input: "field\r\rfield\r\r", Output: [][]string{{"field\r\rfield\r"}}, }, { Name: "FieldCRCRLF", Input: "field\r\r\nfield\r\r\n", Output: [][]string{{"field\r"}, {"field\r"}}, }, { Name: "FieldCRCRLFCR", Input: "field\r\r\n\rfield\r\r\n\r", Output: [][]string{{"field\r"}, {"\rfield\r"}}, }, { Name: "FieldCRCRLFCRCR", Input: "field\r\r\n\r\rfield\r\r\n\r\r", Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, }, { Name: "MultiFieldCRCRLFCRCR", Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", Output: [][]string{ {"field1", "field2\r"}, {"\r\rfield1", "field2\r"}, {"\r\r", ""}, }, }, { Name: "NonASCIICommaAndComment", Input: "a£b,c£ \td,e\n€ comment\n", Output: [][]string{{"a", "b,c", "d,e"}}, TrimLeadingSpace: true, Comma: '£', Comment: '€', }, { Name: "NonASCIICommaAndCommentWithQuotes", Input: "a€\" b,\"€ c\nλ comment\n", Output: [][]string{{"a", " b,", " c"}}, Comma: '€', Comment: 'λ', }, { // λ and θ start with the same byte. // This tests that the parser doesn't confuse such characters. Name: "NonASCIICommaConfusion", Input: "\"abθcd\"λefθgh", Output: [][]string{{"abθcd", "efθgh"}}, Comma: 'λ', Comment: '€', }, { Name: "NonASCIICommentConfusion", Input: "λ\nλ\nθ\nλ\n", Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, Comment: 'θ', }, { Name: "QuotedFieldMultipleLF", Input: "\"\n\n\n\n\"", Output: [][]string{{"\n\n\n\n"}}, }, { Name: "MultipleCRLF", Input: "\r\n\r\n\r\n\r\n", }, { // The implementation may read each line in several chunks if it doesn't fit entirely // in the read buffer, so we should test the code to handle that condition. Name: "HugeLines", Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, Comment: '#', }, { Name: "QuoteWithTrailingCRLF", Input: "\"foo\"bar\"\r\n", Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, }, { Name: "LazyQuoteWithTrailingCRLF", Input: "\"foo\"bar\"\r\n", Output: [][]string{{`foo"bar`}}, LazyQuotes: true, }, { Name: "DoubleQuoteWithTrailingCRLF", Input: "\"foo\"\"bar\"\r\n", Output: [][]string{{`foo"bar`}}, }, { Name: "EvenQuotes", Input: `""""""""`, Output: [][]string{{`"""`}}, }, { Name: "OddQuotes", Input: `"""""""`, Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}, }, { Name: "LazyOddQuotes", Input: `"""""""`, Output: [][]string{{`"""`}}, LazyQuotes: true, }, { Name: "BadComma1", Comma: '\n', Error: errInvalidDelim, }, { Name: "BadComma2", Comma: '\r', Error: errInvalidDelim, }, { Name: "BadComma3", Comma: utf8.RuneError, Error: errInvalidDelim, }, { Name: "BadComment1", Comment: '\n', Error: errInvalidDelim, }, { Name: "BadComment2", Comment: '\r', Error: errInvalidDelim, }, { Name: "BadComment3", Comment: utf8.RuneError, Error: errInvalidDelim, }, { Name: "BadCommaComment", Comma: 'X', Comment: 'X', Error: errInvalidDelim, }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { r := NewReader(strings.NewReader(tt.Input)) if tt.Comma != 0 { r.Comma = tt.Comma } r.Comment = tt.Comment if tt.UseFieldsPerRecord { r.FieldsPerRecord = tt.FieldsPerRecord } else { r.FieldsPerRecord = -1 } r.LazyQuotes = tt.LazyQuotes r.TrimLeadingSpace = tt.TrimLeadingSpace r.ReuseRecord = tt.ReuseRecord out, err := r.ReadAll() if !reflect.DeepEqual(err, tt.Error) { t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) } else if !reflect.DeepEqual(out, tt.Output) { t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) } }) } } // nTimes is an io.Reader which yields the string s n times. type nTimes struct { s string n int off int } func (r *nTimes) Read(p []byte) (n int, err error) { for { if r.n <= 0 || r.s == "" { return n, io.EOF } n0 := copy(p, r.s[r.off:]) p = p[n0:] n += n0 r.off += n0 if r.off == len(r.s) { r.off = 0 r.n-- } if len(p) == 0 { return } } } // benchmarkRead measures reading the provided CSV rows data. // initReader, if non-nil, modifies the Reader before it's used. func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { b.ReportAllocs() r := NewReader(&nTimes{s: rows, n: b.N}) if initReader != nil { initReader(r) } for { _, err := r.Read() if err == io.EOF { break } if err != nil { b.Fatal(err) } } } const benchmarkCSVData = `x,y,z,w x,y,z, x,y,, x,,, ,,, "x","y","z","w" "x","y","z","" "x","y","","" "x","","","" "","","","" ` func BenchmarkRead(b *testing.B) { benchmarkRead(b, nil, benchmarkCSVData) } func BenchmarkReadWithFieldsPerRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) } func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) } func BenchmarkReadLargeFields(b *testing.B) { benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv `, 3)) } func BenchmarkReadReuseRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) } func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) } func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) } func BenchmarkReadReuseRecordLargeFields(b *testing.B) { benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv `, 3)) }