deno.land / std@0.224.0 / csv / csv_parse_stream_test.ts
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.import { CsvParseStream } from "./csv_parse_stream.ts";import type { CsvParseStreamOptions } from "./csv_parse_stream.ts";import { ERR_QUOTE, ParseError } from "./_io.ts";import { assert, assertEquals, assertRejects, assertStringIncludes,} from "../assert/mod.ts";import type { AssertTrue, IsExact } from "../testing/types.ts";import { fromFileUrl, join } from "../path/mod.ts";import { delay } from "../async/delay.ts";
const testdataDir = join(fromFileUrl(import.meta.url), "../testdata");const encoder = new TextEncoder();
Deno.test({ name: "CsvParseStream should work with Deno.FsFile's readable", permissions: { read: [testdataDir], }, fn: async () => { const file = await Deno.open(join(testdataDir, "simple.csv")); const readable = file.readable .pipeThrough(new TextDecoderStream()) .pipeThrough(new CsvParseStream()); const records = await Array.fromAsync(readable); assertEquals(records, [ ["id", "name"], ["1", "foobar"], ["2", "barbaz"], ]); },});
Deno.test({ name: "CsvParseStream throws at invalid csv line", fn: async () => { const readable = ReadableStream.from([ encoder.encode("id,name\n"), encoder.encode("\n"), encoder.encode("1,foo\n"), encoder.encode('2,"baz\n'), ]).pipeThrough(new TextDecoderStream()).pipeThrough( new CsvParseStream(), ); const reader = readable.getReader(); assertEquals(await reader.read(), { done: false, value: ["id", "name"] }); assertEquals(await reader.read(), { done: false, value: ["1", "foo"] }); const error = await assertRejects(() => reader.read()); assert(error instanceof ParseError); assertEquals(error.startLine, 4); assertEquals(error.line, 5); assertEquals(error.column, 0); assertStringIncludes(error.message, ERR_QUOTE); },});
Deno.test({ name: "CsvParseStream handles various inputs", permissions: "none", fn: async (t) => { // These test cases were originally ported from Go: // https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/ // Copyright 2011 The Go Authors. All rights reserved. BSD license. // https://github.com/golang/go/blob/master/LICENSE const testCases = [ { name: "CRLF", input: "a,b\r\nc,d\r\n", output: [["a", "b"], ["c", "d"]], }, { name: "BareCR", input: "a,b\rc,d\r\n", output: [["a", "b\rc", "d"]], }, { name: "NoEOLTest", input: "a,b,c", output: [["a", "b", "c"]], }, { name: "Semicolon", input: "a;b;c\n", output: [["a", "b", "c"]], separator: ";", }, { name: "MultiLine", input: `"twoline","one line","threelinefield"`, output: [["two\nline", "one line", "three\nline\nfield"]], }, { name: "BlankLine", input: "a,b,c\n\nd,e,f\n\n", output: [ ["a", "b", "c"], ["d", "e", "f"], ], }, { name: "LeadingSpace", input: " a, b, c\n", output: [[" a", " b", " c"]], }, { name: "Comment", input: "#1,2,3\na,b,c\n#comment", output: [["a", "b", "c"]], comment: "#", }, { name: "NoComment", input: "#1,2,3\na,b,c", output: [ ["#1", "2", "3"], ["a", "b", "c"], ], }, { name: "FieldCount", input: "a,b,c\nd,e", output: [ ["a", "b", "c"], ["d", "e"], ], }, { name: "TrailingCommaEOF", input: "a,b,c,", output: [["a", "b", "c", ""]], }, { name: "TrailingCommaEOL", input: "a,b,c,\n", output: [["a", "b", "c", ""]], }, { name: "NotTrailingComma3", input: "a,b,c, \n", output: [["a", "b", "c", " "]], }, { name: "CommaFieldTest", input: `x,y,z,wx,y,z,x,y,,x,,,,,,"x","y","z","w""x","y","z","""x","y","","""x","","","""","","",""`, output: [ ["x", "y", "z", "w"], ["x", "y", "z", ""], ["x", "y", "", ""], ["x", "", "", ""], ["", "", "", ""], ["x", "y", "z", "w"], ["x", "y", "z", ""], ["x", "y", "", ""], ["x", "", "", ""], ["", "", "", ""], ], }, { name: "CRLFInQuotedField", // Issue 21201 input: 'A,"Hello\r\nHi",B\r\n', output: [["A", "Hello\nHi", "B"]], }, { name: "BinaryBlobField", // Issue 19410 input: "x09\x41\xb4\x1c,aktau", output: [["x09A\xb4\x1c", "aktau"]], }, { name: "TrailingCR", input: "field1,field2\r", output: [["field1", "field2"]], }, { name: "QuotedTrailingCR", input: '"field"\r', output: [["field"]], }, { name: "FieldCR", input: "field\rfield\r", output: [["field\rfield"]], }, { name: "FieldCRCR", input: "field\r\rfield\r\r", output: [["field\r\rfield\r"]], }, { name: "FieldCRCRLF", input: "field\r\r\nfield\r\r\n", output: [["field\r"], ["field\r"]], }, { name: "FieldCRCRLFCR", input: "field\r\r\n\rfield\r\r\n\r", output: [["field\r"], ["\rfield\r"]], }, { name: "MultiFieldCRCRLFCRCR", input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", output: [ ["field1", "field2\r"], ["\r\rfield1", "field2\r"], ["\r\r", ""], ], }, { name: "NonASCIICommaAndCommentWithQuotes", input: 'a€" b,"€ c\nλ comment\n', output: [["a", " b,", " c"]], separator: "€", comment: "λ", }, { // λ and θ start with the same byte. // This tests that the parser doesn't confuse such characters. name: "NonASCIICommaConfusion", input: '"abθcd"λefθgh', output: [["abθcd", "efθgh"]], separator: "λ", comment: "€", }, { name: "NonASCIICommentConfusion", input: "λ\nλ\nθ\nλ\n", output: [["λ"], ["λ"], ["λ"]], comment: "θ", }, { name: "QuotedFieldMultipleLF", input: '"\n\n\n\n"', output: [["\n\n\n\n"]], }, { name: "MultipleCRLF", input: "\r\n\r\n\r\n\r\n", output: [], }, { name: "DoubleQuoteWithTrailingCRLF", input: '"foo""bar"\r\n', output: [[`foo"bar`]], }, { name: "EvenQuotes", input: `""""""""`, output: [[`"""`]], }, { name: "simple", input: "a,b,c", output: [["a", "b", "c"]], skipFirstRow: false, }, { name: "multiline", input: "a,b,c\ne,f,g\n", output: [ ["a", "b", "c"], ["e", "f", "g"], ], skipFirstRow: false, }, { name: "header mapping boolean", input: "a,b,c\ne,f,g\n", output: [{ a: "e", b: "f", c: "g" }], skipFirstRow: true, }, { name: "header mapping array", input: "a,b,c\ne,f,g\n", output: [ { this: "a", is: "b", sparta: "c" }, { this: "e", is: "f", sparta: "g" }, ], columns: ["this", "is", "sparta"], }, { name: "provides both opts.skipFirstRow and opts.columns", input: "a,b,1\nc,d,2\ne,f,3", output: [ { foo: "c", bar: "d", baz: "2" }, { foo: "e", bar: "f", baz: "3" }, ], skipFirstRow: true, columns: ["foo", "bar", "baz"], }, { name: "mismatching number of headers and fields", input: "a,b,c\nd,e", skipFirstRow: true, columns: ["foo", "bar", "baz"], errorMessage: "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2", }, ]; for (const testCase of testCases) { await t.step(testCase.name, async () => { const options: CsvParseStreamOptions = {}; if (testCase.separator) { options.separator = testCase.separator; } if (testCase.comment) { options.comment = testCase.comment; } if (testCase.skipFirstRow) { options.skipFirstRow = testCase.skipFirstRow; } if (testCase.columns) { options.columns = testCase.columns; } const readable = ReadableStream.from(testCase.input) .pipeThrough(new CsvParseStream(options));
if (testCase.output) { const actual = await Array.fromAsync(readable); assertEquals(actual, testCase.output); } else { await assertRejects(async () => { for await (const _ of readable); }, testCase.errorMessage); } }); } },});
Deno.test({ name: "CsvParseStream.cancel() does not leak file when called in the middle of iteration", permissions: { read: [testdataDir] }, fn: async () => { const file = await Deno.open(join(testdataDir, "large.csv")); const readable = file.readable .pipeThrough(new TextDecoderStream()) .pipeThrough(new CsvParseStream()); for await (const _record of readable) { break; } // FIXME(kt3k): Remove this delay. await delay(100); },});
Deno.test({ name: "CsvParseStream is correctly typed", fn() { // If no option is passed, defaults to ReadableStream<string[]>. { const { readable } = new CsvParseStream(); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; } { const { readable } = new CsvParseStream(undefined); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; } { // `skipFirstRow` may be `true` or `false`. // `columns` may be `undefined` or `string[]`. // If you don't know exactly what the value of the option is, // the return type is ReadableStream<string[] | Record<string, string | undefined>> const options: CsvParseStreamOptions = {}; const { readable } = new CsvParseStream(options); type _ = AssertTrue< IsExact< typeof readable, ReadableStream<string[] | Record<string, string | undefined>> > >; } { const { readable } = new CsvParseStream({}); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; }
// skipFirstRow option { const { readable } = new CsvParseStream({ skipFirstRow: undefined }); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; } { const { readable } = new CsvParseStream({ skipFirstRow: false }); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; } { const { readable } = new CsvParseStream({ skipFirstRow: true }); type _ = AssertTrue< IsExact< typeof readable, ReadableStream<Record<string, string | undefined>> > >; }
// columns option { const { readable } = new CsvParseStream({ columns: undefined }); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; } { const { readable } = new CsvParseStream({ columns: ["aaa", "bbb"] }); type _ = AssertTrue< IsExact<typeof readable, ReadableStream<Record<"aaa" | "bbb", string>>> >; } { const { readable } = new CsvParseStream({ columns: ["aaa"] as string[] }); type _ = AssertTrue< IsExact< typeof readable, ReadableStream<Record<string, string | undefined>> > >; }
// skipFirstRow option + columns option { const { readable } = new CsvParseStream({ skipFirstRow: false, columns: undefined, }); type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>; } { const { readable } = new CsvParseStream({ skipFirstRow: true, columns: undefined, }); type _ = AssertTrue< IsExact< typeof readable, ReadableStream<Record<string, string | undefined>> > >; } { const { readable } = new CsvParseStream({ skipFirstRow: false, columns: ["aaa"], }); type _ = AssertTrue< IsExact<typeof readable, ReadableStream<Record<"aaa", string>>> >; } { const { readable } = new CsvParseStream({ skipFirstRow: true, columns: ["aaa"], }); type _ = AssertTrue< IsExact<typeof readable, ReadableStream<Record<"aaa", string>>> >; } },});
Version Info