forked from sheetjs/docs.sheetjs.com
worker_threads
This commit is contained in:
parent
292e61d647
commit
ae9e15716b
@ -75,7 +75,7 @@ This breaks web frameworks that use the filesystem in body parsing.
|
||||
Deno provides the basic elements to implement a server. It does not provide a
|
||||
body parser out of the box.
|
||||
|
||||
### Drash
|
||||
#### Drash
|
||||
|
||||
In testing, [Drash](https://drash.land/drash/) had an in-memory body parser
|
||||
which could handle file uploads on hosted services like Deno Deploy.
|
||||
@ -223,7 +223,15 @@ The page should show the contents of the file as an HTML table.
|
||||
|
||||
## NodeJS
|
||||
|
||||
### Express
|
||||
When processing small files, the work is best handled in the server response
|
||||
handler function. This approach is used in the "Framework Demos" section.
|
||||
|
||||
When processing large files, the direct approach will freeze the server. NodeJS
|
||||
provides "Worker Threads" for this exact use case.
|
||||
|
||||
### Framework Demos
|
||||
|
||||
#### Express
|
||||
|
||||
The `express-formidable` middleware is powered by the `formidable` parser. It
|
||||
adds a `files` property to the request.
|
||||
@ -304,7 +312,7 @@ It should prompt to download `SheetJSExpress.xlsx`
|
||||
|
||||
</details>
|
||||
|
||||
### NestJS
|
||||
#### NestJS
|
||||
|
||||
[The NestJS docs](https://docs.nestjs.com/techniques/file-upload) have detailed
|
||||
instructions for file upload support. In the controller, the `path` property
|
||||
@ -413,7 +421,7 @@ It should prompt to download `SheetJSNest.xlsx`
|
||||
|
||||
</details>
|
||||
|
||||
### Fastify
|
||||
#### Fastify
|
||||
|
||||
:::note
|
||||
|
||||
@ -552,3 +560,136 @@ The response should show the data in CSV rows.
|
||||
It should prompt to download `SheetJSFastify.xlsx`
|
||||
|
||||
</details>
|
||||
|
||||
### Worker Threads
|
||||
|
||||
NodeJS "Worker Threads" were introduced in v14 and eventually marked as stable
|
||||
in v16. Coupled with `AsyncResource`, a simple thread pool enables processing
|
||||
without blocking the server! The official NodeJS docs include a sample worker
|
||||
pool implementation.
|
||||
|
||||
This example uses ExpressJS to create a general XLSX conversion service, but
|
||||
the same approach applies to any NodeJS server side framework.
|
||||
|
||||
When reading large files, it is strongly recommended to run the body parser in
|
||||
the main server process. Body parsers like `formidable` will write uploaded
|
||||
files to the filesystem, and the file path should be passed to the worker (and
|
||||
the worker would be responsible for reading and cleaning up the files).
|
||||
|
||||
:::note
|
||||
|
||||
The `child_process` module can also spawn [command-line tools](/docs/demos/cli).
|
||||
That approach is not explored in this demo.
|
||||
|
||||
:::
|
||||
|
||||
<details><summary><b>Complete Example</b> (click to show)</summary>
|
||||
|
||||
:::note
|
||||
|
||||
This demo was verified on 2022 December 14
|
||||
|
||||
:::
|
||||
|
||||
0) Create a simple ECMAScript-Module-enabled `package.json`:
|
||||
|
||||
```json title="package.json"
|
||||
{ "type": "module" }
|
||||
```
|
||||
|
||||
1) Install the dependencies:
|
||||
|
||||
```bash
|
||||
npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz express@4.18.2 formidable@2.1.1
|
||||
```
|
||||
|
||||
2) Create a worker script `worker.js` that listens for messages. When a message
|
||||
is received, it will read the file from the filesystem, generate and pass back a
|
||||
new XLSX file, and delete the original file:
|
||||
|
||||
```js title="worker.js"
|
||||
/* load the worker_threads module */
|
||||
import { parentPort } from 'node:worker_threads';
|
||||
|
||||
/* load the SheetJS module and hook to FS */
|
||||
import { set_fs, readFile, write } from 'xlsx';
|
||||
import * as fs from 'fs';
|
||||
set_fs(fs);
|
||||
|
||||
/* the server will send a message with the `path` field */
|
||||
parentPort.on('message', (task) => {
|
||||
/* highlight-start */
|
||||
// read file
|
||||
const wb = readFile(task.path, { dense: true });
|
||||
// send back XLSX
|
||||
parentPort.postMessage(write(wb, { type: "buffer", bookType: "xlsx" }));
|
||||
/* highlight-end */
|
||||
// remove file
|
||||
fs.unlink(task.path, ()=>{});
|
||||
});
|
||||
```
|
||||
|
||||
3) Download [`worker_pool.js`](pathname:///server/worker_pool.js):
|
||||
|
||||
```bash
|
||||
curl -LO https://docs.sheetjs.com/server/worker_pool.js
|
||||
```
|
||||
|
||||
(this is a slightly modified version of the example in the NodeJS docs)
|
||||
|
||||
4) Save the following server code to `main.mjs`:
|
||||
|
||||
```js title="main.mjs"
|
||||
/* load dependencies */
|
||||
import os from 'node:os';
|
||||
import process from 'node:process'
|
||||
import express from 'express';
|
||||
import formidable from 'formidable';
|
||||
|
||||
/* load worker pool */
|
||||
import WorkerPool from './worker_pool.js';
|
||||
|
||||
const pool = new WorkerPool(os.cpus().length);
|
||||
process.on("beforeExit", () => { pool.close(); })
|
||||
|
||||
/* create server */
|
||||
const app = express();
|
||||
app.post('/', (req, res, next) => {
|
||||
// parse body
|
||||
const form = formidable({});
|
||||
form.parse(req, (err, fields, files) => {
|
||||
// look for "upload" field
|
||||
if(err) return next(err);
|
||||
if(!files["upload"]) return next(new Error("missing `upload` file"));
|
||||
|
||||
// send a message to the worker with the path to the uploaded file
|
||||
// highlight-next-line
|
||||
pool.runTask({ path: files["upload"].filepath }, (err, result) => {
|
||||
if(err) return next(err);
|
||||
// send the file back as an attachment
|
||||
res.attachment("SheetJSPool.xlsx");
|
||||
res.status(200).end(result);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// start server
|
||||
app.listen(7262, () => { console.log(`Example app listening on port 7262`); });
|
||||
```
|
||||
|
||||
5) Run the server:
|
||||
|
||||
```bash
|
||||
node main.mjs
|
||||
```
|
||||
|
||||
Test with the [`pres.numbers` sample file](https://sheetjs.com/pres.numbers):
|
||||
|
||||
```bash
|
||||
curl -LO https://sheetjs.com/pres.numbers
|
||||
curl -X POST -F upload=@pres.numbers http://localhost:7262/ -J -O
|
||||
```
|
||||
|
||||
This will generate `SheetJSPool.xlsx`.
|
||||
|
||||
</details>
|
88
docz/static/server/worker_pool.js
Normal file
88
docz/static/server/worker_pool.js
Normal file
@ -0,0 +1,88 @@
|
||||
// This example from https://nodejs.org/dist/latest/docs/api/worker_threads.html
|
||||
// Documentation code redistributed under the MIT license.
|
||||
// Copyright Node.js contributors
|
||||
|
||||
import { AsyncResource } from 'node:async_hooks';
|
||||
import { EventEmitter } from 'node:events';
|
||||
import { Worker } from 'node:worker_threads';
|
||||
|
||||
const kTaskInfo = Symbol('kTaskInfo');
|
||||
const kWorkerFreedEvent = Symbol('kWorkerFreedEvent');
|
||||
|
||||
class WorkerPoolTaskInfo extends AsyncResource {
|
||||
constructor(callback) {
|
||||
super('WorkerPoolTaskInfo');
|
||||
this.callback = callback;
|
||||
}
|
||||
|
||||
done(err, result) {
|
||||
this.runInAsyncScope(this.callback, null, err, result);
|
||||
this.emitDestroy(); // `TaskInfo`s are used only once.
|
||||
}
|
||||
}
|
||||
|
||||
export default class WorkerPool extends EventEmitter {
|
||||
constructor(numThreads) {
|
||||
super();
|
||||
this.numThreads = numThreads;
|
||||
this.workers = [];
|
||||
this.freeWorkers = [];
|
||||
this.tasks = [];
|
||||
|
||||
for (let i = 0; i < numThreads; i++)
|
||||
this.addNewWorker();
|
||||
|
||||
// Any time the kWorkerFreedEvent is emitted, dispatch
|
||||
// the next task pending in the queue, if any.
|
||||
this.on(kWorkerFreedEvent, () => {
|
||||
if (this.tasks.length > 0) {
|
||||
const { task, callback } = this.tasks.shift();
|
||||
this.runTask(task, callback);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
addNewWorker() {
|
||||
const worker = new Worker(new URL('worker.js', import.meta.url));
|
||||
worker.on('message', (result) => {
|
||||
// In case of success: Call the callback that was passed to `runTask`,
|
||||
// remove the `TaskInfo` associated with the Worker, and mark it as free
|
||||
// again.
|
||||
worker[kTaskInfo].done(null, result);
|
||||
worker[kTaskInfo] = null;
|
||||
this.freeWorkers.push(worker);
|
||||
this.emit(kWorkerFreedEvent);
|
||||
});
|
||||
worker.on('error', (err) => {
|
||||
// In case of an uncaught exception: Call the callback that was passed to
|
||||
// `runTask` with the error.
|
||||
if (worker[kTaskInfo])
|
||||
worker[kTaskInfo].done(err, null);
|
||||
else
|
||||
this.emit('error', err);
|
||||
// Remove the worker from the list and start a new Worker to replace the
|
||||
// current one.
|
||||
this.workers.splice(this.workers.indexOf(worker), 1);
|
||||
this.addNewWorker();
|
||||
});
|
||||
this.workers.push(worker);
|
||||
this.freeWorkers.push(worker);
|
||||
this.emit(kWorkerFreedEvent);
|
||||
}
|
||||
|
||||
runTask(task, callback) {
|
||||
if (this.freeWorkers.length === 0) {
|
||||
// No free threads, wait until a worker thread becomes free.
|
||||
this.tasks.push({ task, callback });
|
||||
return;
|
||||
}
|
||||
|
||||
const worker = this.freeWorkers.pop();
|
||||
worker[kTaskInfo] = new WorkerPoolTaskInfo(callback);
|
||||
worker.postMessage(task);
|
||||
}
|
||||
|
||||
close() {
|
||||
for (const worker of this.workers) worker.terminate();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user