Migrating to Node.js Web Streams? Benchmark First!
Node.js Web Stream API has been available since Node.js 21, and I’ve been waiting for Node.js 22 LTS to migrate to it. The fact that it’s the new standard was so compelling. Before migrating my Node.js Stream API code, I wanted to test its performance first. I grabbed a dataset file from the U.S. Government’s Open Data website. I was particularly interested in the border crossing entry data to understand the fuss caused by the Trump administration. After looking at the data, I came to the conclusion that Trump is just a paranoid old guy. Jokes Aside! Let’s go back to Node.js and streams now! The file was in CSV format, and I wanted to transform it into JSONL format without using external libraries to eliminate any outside influence. Here’s the Node.js Stream API version: const fs = require("node:fs"); const { Transform } = require("node:stream"); const { pipeline } = require("node:stream/promises"); class OneLineAtATime extends Transform { constructor() { super(); this.buffer = Buffer.alloc(0); } _transform(chunk, _encoding, callback) { this.buffer = Buffer.concat([this.buffer, chunk]); let index; while ((index = this.buffer.indexOf("\n")) !== -1) { const line = this.buffer.subarray(0, index); this.push(line); this.buffer = this.buffer.subarray(index + 1); } callback(); } _flush(callback) { if (this.buffer.length > 0) { this.push(this.buffer); } callback(); } } class ToJson extends Transform { constructor() { super(); this.keys = null; } _transform(chunk, _encoding, callback) { const chunkAsString = chunk.toString(); if (!this.keys) { this.keys = chunkAsString.split(",").map((key) => key.trim()); callback(); return; } const values = chunkAsString.split(","); const obj = {}; for (let i = 0; i { console.log("Pipeline succeeded."); }) .catch((err) => { console.error("Pipeline failed.", err); }); When I run it with: time node streams.js I get the output: Pipeline succeeded. real 0m1,477s user 0m1,337s sys 0m0,315s That’s fair for a ~400k lines file. Curious to see how the Web Stream API would perform, I implemented the same logic using it: const fs = require("node:fs"); const { Readable, Writable } = require("node:stream"); const { TransformStream } = require("node:stream/web"); class OneLineAtATime { constructor() { this.buffer = Buffer.alloc(0); } transform(chunk, controller) { this.buffer = Buffer.concat([this.buffer, chunk]); let index; while ((index = this.buffer.indexOf("\n")) !== -1) { const line = this.buffer.subarray(0, index); controller.enqueue(line); this.buffer = this.buffer.subarray(index + 1); } } flush(controller) { if (this.buffer.length > 0) { controller.enqueue(this.buffer); } } } class ToJson { constructor() { this.keys = null; } transform(chunk, controller) { const chunkAsString = chunk.toString(); if (!this.keys) { this.keys = chunkAsString.split(",").map((key) => key.trim()); return; } const values = chunkAsString.split(","); const obj = {}; for (let i = 0; i console.log("Pipeline succeeded.")) .catch((err) => console.error("Pipeline failed.", err)); Running it with: time node webstreams.js I get: Pipeline succeeded. real 0m35,551s user 0m35,880s sys 0m0,894s That’s a huge difference! To understand this performance gap, I decided to dig deeper by profiling both processes. Here are the profiling results: Node.js Stream API Profiling Summary: [Summary]: ticks total nonlib name 406 28.3% 30.2% JavaScript 295 20.6% 21.9% C++ 51 3.6% 3.8% GC 90 6.3% Shared libraries 644 44.9% Unaccounted Web Stream API Profiling Summary: [Summary]: ticks total nonlib name 2036 6.1% 7.3% JavaScript 468 1.4% 1.7% C++ 433 1.3% 1.6% GC 5447 16.4% Shared libraries 25225 76.0% Unaccounted From this, we can see that the traditional Stream API relies more on C++ execution, making it faster. Meanwhile, the Web Stream API shows a significant portion of execution time as "Unaccounted," which is likely due to additional layers of abstraction. The main culprit might be toWeb(), as it adds an extra step of conversion. I may explore Web Stream API in a scenario that avoids this conversion—like fetching data from an HTTP API and writing it directly to a database. For now, I’ll stick with the Stream API due to its significant performance advantage.

Node.js Web Stream API has been available since Node.js 21, and I’ve been waiting for Node.js 22 LTS to migrate to it. The fact that it’s the new standard was so compelling.
Before migrating my Node.js Stream API code, I wanted to test its performance first. I grabbed a dataset file from the U.S. Government’s Open Data website. I was particularly interested in the border crossing entry data to understand the fuss caused by the Trump administration. After looking at the data, I came to the conclusion that Trump is just a paranoid old guy.
Jokes Aside! Let’s go back to Node.js and streams now!
The file was in CSV format, and I wanted to transform it into JSONL format without using external libraries to eliminate any outside influence. Here’s the Node.js Stream API version:
const fs = require("node:fs");
const { Transform } = require("node:stream");
const { pipeline } = require("node:stream/promises");
class OneLineAtATime extends Transform {
constructor() {
super();
this.buffer = Buffer.alloc(0);
}
_transform(chunk, _encoding, callback) {
this.buffer = Buffer.concat([this.buffer, chunk]);
let index;
while ((index = this.buffer.indexOf("\n")) !== -1) {
const line = this.buffer.subarray(0, index);
this.push(line);
this.buffer = this.buffer.subarray(index + 1);
}
callback();
}
_flush(callback) {
if (this.buffer.length > 0) {
this.push(this.buffer);
}
callback();
}
}
class ToJson extends Transform {
constructor() {
super();
this.keys = null;
}
_transform(chunk, _encoding, callback) {
const chunkAsString = chunk.toString();
if (!this.keys) {
this.keys = chunkAsString.split(",").map((key) => key.trim());
callback();
return;
}
const values = chunkAsString.split(",");
const obj = {};
for (let i = 0; i < this.keys.length; i++) {
obj[this.keys[i]] = values[i]?.trim() || "";
}
this.push(Buffer.from(JSON.stringify(obj) + "\n"));
callback();
}
}
pipeline(
fs.createReadStream("./data/input/Border_Crossing_Entry_Data.csv"),
new OneLineAtATime(),
new ToJson(),
fs.createWriteStream("./data/output/Border_Crossing_Entry_Data_streams.jsonl")
)
.then(() => {
console.log("Pipeline succeeded.");
})
.catch((err) => {
console.error("Pipeline failed.", err);
});
When I run it with:
time node streams.js
I get the output:
Pipeline succeeded.
real 0m1,477s
user 0m1,337s
sys 0m0,315s
That’s fair for a ~400k lines file.
Curious to see how the Web Stream API would perform, I implemented the same logic using it:
const fs = require("node:fs");
const { Readable, Writable } = require("node:stream");
const { TransformStream } = require("node:stream/web");
class OneLineAtATime {
constructor() {
this.buffer = Buffer.alloc(0);
}
transform(chunk, controller) {
this.buffer = Buffer.concat([this.buffer, chunk]);
let index;
while ((index = this.buffer.indexOf("\n")) !== -1) {
const line = this.buffer.subarray(0, index);
controller.enqueue(line);
this.buffer = this.buffer.subarray(index + 1);
}
}
flush(controller) {
if (this.buffer.length > 0) {
controller.enqueue(this.buffer);
}
}
}
class ToJson {
constructor() {
this.keys = null;
}
transform(chunk, controller) {
const chunkAsString = chunk.toString();
if (!this.keys) {
this.keys = chunkAsString.split(",").map((key) => key.trim());
return;
}
const values = chunkAsString.split(",");
const obj = {};
for (let i = 0; i < this.keys.length; i++) {
obj[this.keys[i]] = values[i]?.trim() || "";
}
controller.enqueue(Buffer.from(JSON.stringify(obj) + "\n"));
}
}
// Convert Node.js readable stream to Web ReadableStream
const readableStream = Readable.toWeb(fs.createReadStream("./data/input/Border_Crossing_Entry_Data.csv"));
// Convert Node.js writable stream to Web WritableStream
const writableStream = Writable.toWeb(fs.createWriteStream("./data/output/Border_Crossing_Entry_Data_webstreams.jsonl"));
// Create Web Transform Streams
const lineSplitter = new TransformStream(new OneLineAtATime());
const jsonConverter = new TransformStream(new ToJson());
// Connect the streams
readableStream
.pipeThrough(lineSplitter)
.pipeThrough(jsonConverter)
.pipeTo(writableStream)
.then(() => console.log("Pipeline succeeded."))
.catch((err) => console.error("Pipeline failed.", err));
Running it with:
time node webstreams.js
I get:
Pipeline succeeded.
real 0m35,551s
user 0m35,880s
sys 0m0,894s
That’s a huge difference!
To understand this performance gap, I decided to dig deeper by profiling both processes. Here are the profiling results:
Node.js Stream API Profiling Summary:
[Summary]:
ticks total nonlib name
406 28.3% 30.2% JavaScript
295 20.6% 21.9% C++
51 3.6% 3.8% GC
90 6.3% Shared libraries
644 44.9% Unaccounted
Web Stream API Profiling Summary:
[Summary]:
ticks total nonlib name
2036 6.1% 7.3% JavaScript
468 1.4% 1.7% C++
433 1.3% 1.6% GC
5447 16.4% Shared libraries
25225 76.0% Unaccounted
From this, we can see that the traditional Stream API relies more on C++ execution, making it faster. Meanwhile, the Web Stream API shows a significant portion of execution time as "Unaccounted," which is likely due to additional layers of abstraction.
The main culprit might be toWeb()
, as it adds an extra step of conversion. I may explore Web Stream API in a scenario that avoids this conversion—like fetching data from an HTTP API and writing it directly to a database.
For now, I’ll stick with the Stream API due to its significant performance advantage.