diff --git a/.gitignore b/.gitignore index 1313f538..1a4e73cc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ node_modules test/fixtures/* test/samples/* *.xlsx + +testsss diff --git a/src/danfojs-base/io/browser/io.csv.ts b/src/danfojs-base/io/browser/io.csv.ts index 883de10d..3def7ad6 100644 --- a/src/danfojs-base/io/browser/io.csv.ts +++ b/src/danfojs-base/io/browser/io.csv.ts @@ -48,14 +48,16 @@ import Papa from 'papaparse' */ const $readCSV = async (file: any, options?: CsvInputOptionsBrowser): Promise => { const frameConfig = options?.frameConfig || {} + const hasStringType = frameConfig.dtypes?.includes("string") return new Promise((resolve, reject) => { let hasError = false; Papa.parse(file, { header: true, - dynamicTyping: true, + dynamicTyping: !hasStringType, skipEmptyLines: 'greedy', + delimiter: ",", ...options, error: (error) => { hasError = true; @@ -108,12 +110,13 @@ const $streamCSV = async (file: string, callback: (df: DataFrame) => void, optio return new Promise((resolve, reject) => { let count = 0 let hasError = false; - + const hasStringType = frameConfig.dtypes?.includes("string") Papa.parse(file, { - ...options, - dynamicTyping: true, header: true, download: true, + dynamicTyping: !hasStringType, + delimiter: ",", + ...options, step: results => { if (hasError) return; try { diff --git a/src/danfojs-base/io/node/io.csv.ts b/src/danfojs-base/io/node/io.csv.ts index 7126ab49..6cf11e86 100644 --- a/src/danfojs-base/io/node/io.csv.ts +++ b/src/danfojs-base/io/node/io.csv.ts @@ -50,14 +50,16 @@ import fs from 'fs' */ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promise => { const frameConfig = options?.frameConfig || {} + const hasStringType = frameConfig.dtypes?.includes("string") if (filePath.startsWith("http") || filePath.startsWith("https")) { return new Promise((resolve, reject) => { let hasError = false; const optionsWithDefaults = { header: true, - dynamicTyping: true, + dynamicTyping: !hasStringType, skipEmptyLines: 'greedy', + delimiter: ",", ...options, } @@ -116,7 +118,8 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis Papa.parse(fileStream, { header: true, - dynamicTyping: true, + dynamicTyping: !hasStringType, + delimiter: ",", ...options, error: (error) => { hasError = true; diff --git a/src/danfojs-browser/tests/io/csv.reader.test.js b/src/danfojs-browser/tests/io/csv.reader.test.js index ca8855de..9fcc2a2f 100644 --- a/src/danfojs-browser/tests/io/csv.reader.test.js +++ b/src/danfojs-browser/tests/io/csv.reader.test.js @@ -97,6 +97,41 @@ describe("readCSV", function () { assert.ok(error instanceof Error); } }); + + it("Preserves leading zeros when dtype is string", async function () { + // Create a CSV file with leading zeros + const csvContent = "codes\n012345\n001234"; + const file = new File([ csvContent ], "leading_zeros.csv", { type: "text/csv" }); + + const df = await dfd.readCSV(file, { + frameConfig: { + dtypes: [ "string" ] + } + }); + + assert.deepEqual(df.values, [ [ "012345" ], [ "001234" ] ]); + assert.deepEqual(df.dtypes, [ "string" ]); + + // Verify the values are actually strings + const jsonData = dfd.toJSON(df); + assert.deepEqual(jsonData, [ { codes: "012345" }, { codes: "001234" } ]); + }); + + it("Converts to numbers when dtype is not string", async function () { + // Create a CSV file with leading zeros + const csvContent = "codes\n012345\n001234"; + const file = new File([ csvContent ], "leading_zeros.csv", { type: "text/csv" }); + + const df = await dfd.readCSV(file); // default behavior without string dtype + + // Values should be converted to numbers + assert.deepEqual(df.values, [ [ 12345 ], [ 1234 ] ]); + assert.deepEqual(df.dtypes, [ "int32" ]); + + // Verify JSON output + const jsonData = dfd.toJSON(df); + assert.deepEqual(jsonData, [ { codes: 12345 }, { codes: 1234 } ]); + }); }); // describe("streamCSV", function () { diff --git a/src/danfojs-node/test/io/csv.reader.test.ts b/src/danfojs-node/test/io/csv.reader.test.ts index f1ff730f..4cfef8dd 100644 --- a/src/danfojs-node/test/io/csv.reader.test.ts +++ b/src/danfojs-node/test/io/csv.reader.test.ts @@ -2,7 +2,7 @@ import path from "path"; import chai, { assert, expect } from "chai"; import { describe, it } from "mocha"; import chaiAsPromised from "chai-as-promised"; -import { DataFrame, readCSV, Series, streamCSV, toCSV } from "../../dist/danfojs-node/src"; +import { DataFrame, readCSV, Series, streamCSV, toCSV, toJSON } from "../../dist/danfojs-node/src"; import fs from 'fs'; import process from 'process'; @@ -112,6 +112,59 @@ describe("readCSV", function () { const filePath = path.join(testSamplesDir, "invalid.csv"); await expect(readCSV(filePath)).to.be.rejectedWith("ENOENT: no such file or directory"); }); + + it("Preserves leading zeros when dtype is string", async function () { + const filePath = path.join(testSamplesDir, "leading_zeros.csv"); + // Create test CSV file + fs.writeFileSync(filePath, "codes\n012345\n001234"); + + try { + const df = await readCSV(filePath, { + frameConfig: { + dtypes: ["string"] + } + }); + + assert.deepEqual(df.values, [["012345"], ["001234"]]); + assert.deepEqual(df.dtypes, ["string"]); + + // Verify the values are actually strings + const jsonData = toJSON(df); + assert.deepEqual(jsonData, [{ codes: "012345" }, { codes: "001234" }]); + + // Clean up + fs.unlinkSync(filePath); + } catch (error) { + // Clean up even if test fails + fs.unlinkSync(filePath); + throw error; + } + }); + + it("Converts to numbers when dtype is not string", async function () { + const filePath = path.join(testSamplesDir, "leading_zeros.csv"); + // Create test CSV file + fs.writeFileSync(filePath, "codes\n012345\n001234"); + + try { + const df = await readCSV(filePath); // default behavior without string dtype + + // Values should be converted to numbers + assert.deepEqual(df.values, [[12345], [1234]]); + assert.deepEqual(df.dtypes, ["int32"]); + + // Verify JSON output + const jsonData = toJSON(df); + assert.deepEqual(jsonData, [{ codes: 12345 }, { codes: 1234 }]); + + // Clean up + fs.unlinkSync(filePath); + } catch (error) { + // Clean up even if test fails + fs.unlinkSync(filePath); + throw error; + } + }); }); describe("streamCSV", function () {