Skip to content

Commit 4f8f880

Browse files
committed
updated benchmarks and added back fast mode
1 parent 7f43b2f commit 4f8f880

File tree

5 files changed

+171
-17
lines changed

5 files changed

+171
-17
lines changed

src/ci/performance-benchmark.ts

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,35 @@ export class PerformanceBenchmark {
4848
/**
4949
* Generate large test data for stress testing
5050
*/
51-
generateLargeTestData(rows: number, columns: number): TestData {
51+
generateLargeTestData(
52+
rows: number,
53+
columns: number,
54+
withQuotes: boolean = false,
55+
): TestData {
5256
const headers = Array.from({ length: columns }, (_, i) => `column_${i}`);
5357
const csvLines = [headers.join(",")];
5458

5559
for (let i = 0; i < rows; i++) {
5660
const row = Array.from({ length: columns }, (_, j) => {
5761
// Mix of data types to test dynamic typing
58-
if (j % 4 === 0) return `"string_value_${i}_${j}"`;
62+
if (j % 4 === 0) {
63+
return withQuotes
64+
? `"string_value_${i}_${j}"`
65+
: `string_value_${i}_${j}`;
66+
}
5967
if (j % 4 === 1) return String(Math.random() * 1000);
6068
if (j % 4 === 2) return Math.random() > 0.5 ? "true" : "false";
6169
return String(i * j);
6270
});
6371
csvLines.push(row.join(","));
6472
}
6573

74+
const suffix = withQuotes ? "_with_quotes" : "";
6675
return {
67-
name: `large_${rows}x${columns}`,
76+
name: `large_${rows}x${columns}${suffix}`,
6877
csvContent: csvLines.join("\n"),
6978
expectedRows: rows,
70-
description: `Large dataset with ${rows} rows and ${columns} columns`,
79+
description: `Large dataset with ${rows} rows and ${columns} columns${withQuotes ? " (with quotes)" : " (fast mode)"}`,
7180
};
7281
}
7382

@@ -117,7 +126,7 @@ export class PerformanceBenchmark {
117126
return new Promise((resolve, reject) => {
118127
try {
119128
const result = parser.parse(testData.csvContent, {
120-
header: true,
129+
header: false,
121130
dynamicTyping: true,
122131
complete: (results: any) => {
123132
const endTime = Date.now();
@@ -178,7 +187,7 @@ export class PerformanceBenchmark {
178187
return new Promise((resolve, reject) => {
179188
try {
180189
const result = parser.parse(testData.csvContent, {
181-
header: true,
190+
header: false,
182191
dynamicTyping: true,
183192
complete: (results: any) => {
184193
const endTime = Date.now();
@@ -284,9 +293,10 @@ export class PerformanceBenchmark {
284293
let totalPerformanceRatio = 0;
285294
let totalMemoryRatio = 0;
286295

287-
// Add standard test datasets
288-
this.addTestData(this.generateLargeTestData(10000, 10));
289-
this.addTestData(this.generateLargeTestData(50000, 5));
296+
// Add standard test datasets - test both fast mode and quote handling
297+
this.addTestData(this.generateLargeTestData(10000, 10, false)); // Fast mode
298+
this.addTestData(this.generateLargeTestData(50000, 5, false)); // Fast mode
299+
this.addTestData(this.generateLargeTestData(10000, 10, true)); // With quotes
290300
this.addTestData(this.generateProblematicTestData());
291301

292302
for (const testData of this.testData) {

src/constants/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ export const DEFAULT_CONFIG = {
7979
downloadRequestBody: "",
8080
skipEmptyLines: false,
8181
chunk: false,
82-
fastMode: false,
82+
fastMode: undefined,
8383
beforeFirstChunk: false,
8484
withCredentials: false,
8585
transform: false,

src/core/lexer.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ export interface LexerConfig {
4545
quoteChar: string;
4646
escapeChar: string;
4747
comments: string | false;
48-
fastMode: boolean;
48+
fastMode: boolean | undefined;
4949
}
5050

5151
/**
@@ -74,7 +74,7 @@ export class Lexer {
7474
private quoteChar: string;
7575
private escapeChar: string;
7676
private comments: string | false;
77-
private fastMode: boolean;
77+
private fastMode: boolean | undefined;
7878

7979
constructor(config: LexerConfig) {
8080
this.config = config;
@@ -329,10 +329,8 @@ export class Lexer {
329329
* Legacy reference: line 1482
330330
*/
331331
private canUseFastMode(): boolean {
332-
return (
333-
this.fastMode ||
334-
(this.fastMode !== false && this.input.indexOf(this.quoteChar) === -1)
335-
);
332+
const hasQuotes = this.input.indexOf(this.quoteChar) !== -1;
333+
return this.fastMode || (this.fastMode !== false && !hasQuotes);
336334
}
337335

338336
/**

src/core/parser.ts

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type {
66
} from "../types/index.js";
77
import { isFunction, stripBom } from "../utils/index.js";
88
import { createLexerConfig, Lexer, type Token, TokenType } from "./lexer.js";
9+
import { parseDynamic } from "../heuristics/dynamic-typing.js";
910

1011
/**
1112
* Parser state for row assembly and processing
@@ -55,6 +56,141 @@ export class Parser implements PapaParseParser {
5556
this.state = this.createInitialState();
5657
}
5758

59+
/**
60+
* Fast mode parsing - bypasses lexer for performance
61+
* Mirrors legacy lines 1482-1513 exactly
62+
*/
63+
private parseFastMode(
64+
input: string,
65+
baseIndex: number,
66+
ignoreLastRow: boolean,
67+
): PapaParseResult {
68+
const newline =
69+
(this.config.newline as string) || this.guessLineEndings(input) || "\r\n";
70+
const delimiter = (this.config.delimiter as string) || ",";
71+
const comments =
72+
typeof this.config.comments === "string" ? this.config.comments : false;
73+
const commentsLen = comments ? comments.length : 0;
74+
75+
const rows = input.split(newline);
76+
const data: any[][] = [];
77+
78+
for (let i = 0; i < rows.length; i++) {
79+
const row = rows[i];
80+
81+
if (i === rows.length - 1 && ignoreLastRow) {
82+
break;
83+
}
84+
85+
// Skip comment lines
86+
if (comments && row.substring(0, commentsLen) === comments) {
87+
continue;
88+
}
89+
90+
// Split row by delimiter and add to data
91+
let fields = row.split(delimiter);
92+
93+
// Apply dynamic typing and transforms
94+
if (this.config.dynamicTyping || this.config.transform) {
95+
fields = fields.map((field, index) => {
96+
let value = field;
97+
98+
// Apply transform if provided
99+
if (
100+
this.config.transform &&
101+
typeof this.config.transform === "function"
102+
) {
103+
value = this.config.transform(value, index);
104+
}
105+
106+
// Apply dynamic typing
107+
if (this.config.dynamicTyping) {
108+
value = parseDynamic(
109+
value,
110+
String(index),
111+
this.config.dynamicTyping as any,
112+
);
113+
}
114+
115+
return value;
116+
});
117+
}
118+
119+
data.push(fields);
120+
121+
// Handle preview limit
122+
if (this.config.preview && data.length >= this.config.preview) {
123+
break;
124+
}
125+
}
126+
127+
// Apply header processing and transformations
128+
return this.buildFastModeResult(data, baseIndex);
129+
}
130+
131+
/**
132+
* Build result for fast mode parsing
133+
*/
134+
private buildFastModeResult(
135+
data: any[][],
136+
baseIndex: number,
137+
): PapaParseResult {
138+
const newline = (this.config.newline as string) || "\r\n";
139+
const delimiter = (this.config.delimiter as string) || ",";
140+
// Apply header logic if needed
141+
if (this.config.header && data.length > 0) {
142+
const headers = data[0];
143+
const rows = data.slice(1);
144+
145+
// Convert to objects
146+
const objectData = rows.map((row) => {
147+
const obj: any = {};
148+
headers.forEach((header, index) => {
149+
obj[header] = row[index] || "";
150+
});
151+
return obj;
152+
});
153+
154+
return {
155+
data: objectData,
156+
errors: [],
157+
meta: {
158+
delimiter: delimiter,
159+
linebreak: newline,
160+
aborted: false,
161+
truncated: false,
162+
cursor: baseIndex + data.length,
163+
},
164+
};
165+
}
166+
167+
return {
168+
data,
169+
errors: [],
170+
meta: {
171+
delimiter: delimiter,
172+
linebreak: newline,
173+
aborted: false,
174+
truncated: false,
175+
cursor: baseIndex + data.length,
176+
},
177+
};
178+
}
179+
180+
/**
181+
* Guess line endings from input
182+
*/
183+
private guessLineEndings(input: string): string | null {
184+
const crCount = (input.match(/\r/g) || []).length;
185+
const lfCount = (input.match(/\n/g) || []).length;
186+
const crlfCount = (input.match(/\r\n/g) || []).length;
187+
188+
if (crlfCount > 0) return "\r\n";
189+
if (lfCount > crCount) return "\n";
190+
if (crCount > 0) return "\r";
191+
return null;
192+
}
193+
58194
/**
59195
* Parse input string and return results
60196
* Legacy reference: lines 1461-1806
@@ -72,6 +208,16 @@ export class Parser implements PapaParseParser {
72208
return this.buildResult(baseIndex);
73209
}
74210

211+
// Fast mode bypass - mirror legacy behavior exactly (lines 1482-1513)
212+
const canUseFastMode =
213+
this.config.fastMode === true ||
214+
(this.config.fastMode !== false &&
215+
input.indexOf(this.config.quoteChar || '"') === -1);
216+
217+
if (canUseFastMode && !this.config.step && !this.config.chunk) {
218+
return this.parseFastMode(input, baseIndex, ignoreLastRow);
219+
}
220+
75221
// Tokenize input
76222
const { tokens, errors, terminatedByComment } = this.lexer.tokenize();
77223
this.state.errors.push(...errors);

src/types/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ export interface StrictParseConfig<
284284
downloadRequestBody: string;
285285
skipEmptyLines: boolean | "greedy";
286286
chunk: false | ((results: PapaParseResult, parser: PapaParseParser) => void);
287-
fastMode: boolean;
287+
fastMode: boolean | undefined;
288288
beforeFirstChunk: false | ((chunk: string) => string);
289289
withCredentials: boolean;
290290
transformHeader: false | ((header: string, index: number) => string);

0 commit comments

Comments
 (0)