>_>_
Processing 13 million rows from a CSV file in the Browser (Without freezing the screen)

Processing 13 million rows from a CSV file in the Browser (Without freezing the screen)

Have you ever imagined processing huge files—like a 13-million-row CSV—directly in the browser without freezing the screen?

Traditionally, browsers are seen only as UI renderers, while heavy processing tasks are pushed to the backend. But thanks to modern browser APIs like Streams and Web Workers, we can now offload these tasks from the main thread, keeping the UI smooth while processing files asynchronously.

🧪 Application Goal

We're going to build a pure JavaScript browser app that:

  • Loads multiple massive CSV files
  • Processes them concurrently in separate threads
  • Converts each CSV row into a JavaScript object
  • Simulates sending those objects to a server (mocked requests)

🛡️ Key Concepts Used

  • Streams: To read the file chunk by chunk (instead of loading it fully into memory)
  • Web Workers: To handle each file in its own thread (no UI blocking)

📆 Requirements

  • Google Chrome v111.0.5563.65 (or higher)
  • Serve the app via localhost (use VS Code Live Server, etc.)
  • A CSV file with ~13 million rows (you can generate one for testing)

🌐 Thread Worker (threadWorker.js)

The thread worker is responsible for:

  • Reading the CSV file
  • Converting lines into JS objects
  • Simulating asynchronous server requests
let readableStream = null;
let fileIndex = null;
let bytesLoaded = 0;
let linesSent = 0;
const objectsToSend = [];
let fileCompletelyLoaded = false;
const readyEvent = new Event('ready');

const ObjectTranform = {
  headerLine: true,
  keys: [],
  tailChunk: '',
  start() {
    this.decoder = new TextDecoder('utf-8');
  },
  transform(chunk, controller) {
    const stringChunks = this.decoder.decode(chunk, { stream: true });
    const lines = stringChunks.split('\n');
    for (const line of lines) {
      const lineString = this.tailChunk + line;
      const values = lineString.split(',');

      if (this.headerLine) {
        this.keys = values;
        this.headerLine = false;
        continue;
      }

      if (values.length !== this.keys.length || lineString.endsWith(',')) {
        this.tailChunk = line;
      } else {
        const chunkObject = {};
        this.keys.forEach((key, index) => {
          chunkObject[key] = values[index];
        });
        this.tailChunk = '';
        controller.enqueue(JSON.stringify(chunkObject));
      }
    }
  },
};

const ProgressTransform = {
  transform(chunk, controller) {
    bytesLoaded += chunk.length;
    controller.enqueue(chunk);
    postMessage({ progressLoaded: bytesLoaded, progressSent: linesSent, index: fileIndex, totalToSend: 0 });
  },
  flush() {
    fileCompletelyLoaded = true;
  },
};

const MyWritable = {
  write(chunk) {
    objectsToSend.push(postRequest(JSON.parse(chunk)));
  },
  close() {
    if (fileCompletelyLoaded) {
      postMessage({ totalToSend: objectsToSend.length, index: fileIndex, progressLoaded: bytesLoaded, progressSent: linesSent });
      dispatchEvent(readyEvent);
    }
  },
  abort(err) {
    console.error("Sink error:", err);
  },
};

const postRequest = async (data) => {
  return new Promise((resolve) => {
    setTimeout(() => {
      linesSent++;
      postMessage({ totalToSend: objectsToSend.length, progressSent: linesSent, progressLoaded: bytesLoaded, index: fileIndex });
      resolve(data);
    }, 3000);
  });
};

addEventListener('ready', async () => {
  await Promise.all(objectsToSend);
});

addEventListener("message", (event) => {
  fileIndex = event.data?.index;
  readableStream = event.data?.file?.stream();
  readableStream
    .pipeThrough(new TransformStream(ProgressTransform))
    .pipeThrough(new TransformStream(ObjectTranform))
    .pipeTo(new WritableStream(MyWritable));
});

🔄 UI Logic (main.js)

const input = document.getElementById('files');
const progress = document.getElementById('progress');

const formatBytes = (bytes, decimals = 2) => {
  if (!+bytes) return '0 Bytes';
  const k = 1024;
  const dm = decimals < 0 ? 0 : decimals;
  const sizes = ['Bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];
  const i = Math.floor(Math.log(bytes) / Math.log(k));
  return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
};

input.addEventListener('change', async (e) => {
  const files = e.target.files;
  const workersInfo = [];

  for (const i in files) {
    if (files[i].name) {
      const worker = new Worker("threadWorker.js");
      worker.postMessage({ index: i, name: files[i].name, file: files[i] });

      worker.addEventListener("message", (event) => {
        if (event.data) {
          const infos = {
            progressSent: event.data.progressSent,
            progressLoaded: event.data.progressLoaded,
            index: event.data.index,
            totalToSend: event.data.totalToSend,
            fileSize: files[i].size,
            fileName: files[i].name,
          };
          workersInfo[i] = infos;
        }

        progress.innerHTML = `
          <table align="center" border cellspacing="1">
            <thead>
              <tr>
                <th>File</th>
                <th>File Size</th>
                <th>Loaded</th>
                <th></th>
                <th>Total Rows</th>
                <th>Rows Sent</th>
                <th></th>
              </tr>    
            </thead>
            <tbody>
              ${workersInfo.map(info => `
                <tr>
                  <td>${info.fileName}</td>
                  <td>${formatBytes(info.fileSize)}</td>
                  <td>${formatBytes(info.progressLoaded)}</td>
                  <td><progress value="${Math.ceil(info.progressLoaded / info.fileSize * 100)}" max="100"></progress></td>
                  <td>${info.totalToSend}</td>
                  <td>${info.progressSent}</td>
                  <td><progress value="${Math.ceil(info.progressSent / info.totalToSend * 100)}" max="100"></progress></td>
                </tr>`).join('')}
            </tbody>
          </table>`;
      });
    }
  }
});

🔐 HTML (index.html)

<!DOCTYPE html>
<html>
<head>
  <meta charset='utf-8'>
  <meta http-equiv='X-UA-Compatible' content='IE=edge'>
  <title>Multithreading browser</title>
  <meta name='viewport' content='width=device-width, initial-scale=1'>
  <script src='threadWorker.js' async></script>
  <script src='main.js' async></script>
</head>
<body>
  <input type="file" multiple id="files" accept=".csv" /><br><br>
  <div id="progress"></div>
</body>
</html>

📅 Takeaways

  • Modern browsers support powerful multithreading and stream APIs
  • You can offload heavy computation like CSV parsing and transform it into a non-blocking background task
  • This improves performance and user experience without relying solely on backend power

Let the browser do the heavy lifting when possible — and keep your users happy with a fluid UI!