Node.js HTTP Client Memory Leak Issue

Recently, a developer in the community submitted an issue regarding a memory leak in the HTTP module, which affects the Node.js HTTP client. However, this is a rather special scenario that generally does not occur unless the server maliciously attacks the client. A pull request (PR) has recently been submitted to fix this issue, and this article briefly introduces the problem and the solution.

Example

First, let’s look at the code to reproduce the issue.

const http = require('http');
const gcTrackerMap = new WeakMap();
const gcTrackerTag = 'NODE_TEST_COMMON_GC_TRACKER';

function onGC(obj, gcListener) {
    const async_hooks = require('async_hooks');
    const onGcAsyncHook = async_hooks.createHook({
        init: function(id, type) {
            if (this.trackedId === undefined) {
                this.trackedId = id;
            }
        },
        destroy(id) {
            if (id === this.trackedId) {
                this.gcListener.ongc();
                onGcAsyncHook.disable();
            }
        },
    }).enable();
    onGcAsyncHook.gcListener = gcListener;

    gcTrackerMap.set(obj, new async_hooks.AsyncResource(gcTrackerTag));
    obj = null;
}

function createServer() {
    const server = http.createServer((req, res) => {
        res.setHeader('Content-Type', 'application/json');
        res.end(JSON.stringify({ hello: 'world' }));
        req.socket.write('HTTP/1.1 400 Bad Request\r\n\r\n');
    });

    return new Promise((resolve) => {
        server.listen(0, () => {
            resolve(server);
        });
    });
}

async function main() {
    const server = await createServer();
    const req = http.get({
        port: server.address().port,
    }, (res) => {
        const chunks = [];
        res.on('data', (c) => chunks.push(c), 1);
        res.on('end', () => {
            console.log(Buffer.concat(chunks).toString('utf8'));
        });
    });
    const timer = setInterval(global.gc, 300);
    onGC(req, {
        ongc: () => {
            clearInterval(timer);
            server.close();
        }
    });
}

main();

The logic in the above code is quite simple. First, an HTTP request is initiated, and a response is obtained. The special part is that the server returns two responses, which causes the request object not to be released, leading to a memory leak issue.

HTTP Response Parsing Process

Next, let’s analyze the reason for this issue. Understanding this problem requires some knowledge of the Node.js HTTP protocol parsing process. In simple terms, when Node.js receives data, it calls parser.execute(data) to parse the HTTP protocol.

// Executed when data is received on the socket
function socketOnData(d) {
    const socket = this;
    const req = this._httpMessage;
    const parser = this.parser;
    // Parse HTTP response
    const ret = parser.execute(d);
    // After the response is parsed, perform some cleanup operations to free related object memory
    if (parser.incoming?.complete) {
        socket.removeListener('data', socketOnData);
        socket.removeListener('end', socketOnEnd);
        socket.removeListener('drain', ondrain);
        freeParser(parser, req, socket);
    }
}

function freeParser(parser, req, socket) {
    if (parser) {
        cleanParser(parser);
        parser.remove();
        if (parsers.free(parser) === false) {
            // function closeParserInstance(parser) { parser.close(); }
            setImmediate(closeParserInstance, parser);
        } else {
            parser.free();
        }
    }
    if (req) {
        req.parser = null;
    }
    if (socket) {
        socket.parser = null;
    }
}

function cleanParser(parser) {
    parser.socket = null;
    parser.incoming = null;
    parser.outgoing = null;
    parser[kOnMessageBegin] = null;
    parser[kOnExecute] = null;
    parser[kOnTimeout] = null;
    parser.onIncoming = null;
}

During the parsing process, multiple hook functions are executed.

// When parsing headers
const kOnHeaders = HTTPParser.kOnHeaders | 0;
// When headers parsing is complete
const kOnHeadersComplete = HTTPParser.kOnHeadersComplete | 0;
// When parsing HTTP body
const kOnBody = HTTPParser.kOnBody | 0;
// When a complete HTTP message is parsed
const kOnMessageComplete = HTTPParser.kOnMessageComplete | 0;

Next, let’s look at the logic of these hook functions when Node.js processes HTTP responses.

Parsing the headers.

function parserOnHeaders(headers, url) {
    // Once we exceeded headers limit - stop collecting them
    if (this.maxHeaderPairs <= 0 ||
        this._headers.length < this.maxHeaderPairs) {
        this._headers.push(...headers);
    }
    this._url += url;
}

After parsing the headers.

function parserOnHeadersComplete(versionMajor, versionMinor, headers, method,
                                 url, statusCode, statusMessage, upgrade,
                                 shouldKeepAlive) {
    const parser = this;
    const { socket } = parser;
    const incoming = parser.incoming = new IncomingMessage(socket);
    return parser.onIncoming(incoming, shouldKeepAlive);
}

Next, the onIncoming function is called.

function parserOnIncomingClient(res, shouldKeepAlive) {
    const socket = this.socket;
    const req = socket._httpMessage;

    if (req.res) {
        // Received multiple responses
        socket.destroy();
        return 0;
    }
    // Trigger response event
    if (req.aborted || !req.emit('response', res)) {
        // ...
    }
    return 0;  // No special treatment.
}

Parsing the HTTP response body.

function parserOnBody(b) {
    const stream = this.incoming;

    // If the stream has already been removed, then drop it.
    if (stream === null)
        return;

    // Push the body into the response object
    if (!stream._dumped) {
        const ret = stream.push(b);
        if (!ret)
            readStop(this.socket);
    }
}

After parsing the HTTP response.

function parserOnMessageComplete() {
    const parser = this;
    const stream = parser.incoming;
    // stream is the IncomingMessage object
    if (stream !== null) {
        // Mark the response object as parsed
        stream.complete = true;
        // Mark the stream as ended
        stream.push(null);
    }
}

Analyzing the Problem

After understanding the general flow, let’s see why a memory leak occurs. When parsing the response through parser.execute(data), because the server returns two responses, the first time the HTTP response header is parsed, the following code is executed.

const incoming = parser.incoming = new IncomingMessage(socket);
return parser.onIncoming(incoming, shouldKeepAlive);

The onIncoming function will trigger the response event, which is the normal flow. Immediately after, when the second response’s header is parsed, the problem arises. The same code is executed, and note that parser.incoming points to a new IncomingMessage object. Now let’s look at the logic of onIncoming.

// Already received a response, ignore and destroy socket
if (req.res) {
    socket.destroy();
    return 0;
}

Node.js makes a judgment here, directly destroying the socket and returning. Ultimately, the execution of parser.execute(data) ends, and the relevant code is as follows.

// Parse HTTP response
const ret = parser.execute(d);
// Check if the response is completely parsed
if (parser.incoming?.complete) {
    // Perform some cleanup operations to free related object memory
    freeParser(parser, req, socket);
}

Because parser.incoming at this point points to the second response, its complete field is false, which leads to the cleanup operation not being executed, causing a memory leak.

Fix Proposal

There are two fix proposals. One is to return -1 when parsing the second response to indicate a parsing error.

if (req.res) {
    socket.destroy();
    return -1;
}

However, this approach has a problem: since the response event has already been triggered after parsing the first response, triggering an error event here would be quite strange and difficult for the user to handle. The second solution is to ignore the second response. Ultimately, the second solution was chosen, with the following modification.

if (req.res) {
    socket.destroy();
    if (socket.parser) {
        // Now, parser.incoming is pointed to the new IncomingMessage,
        // we need to rewrite it to the first one and skip all the pending IncomingMessage
        socket.parser.incoming = req.res;
        socket.parser.incoming[kSkipPendingData] = true;
    }
    return 0;
}

First, let parser.incoming execute the first response and set a flag to discard all subsequent data. Then, during the subsequent parsing process, ignore the received data; otherwise, the subsequent data would interfere with the first response.

function parserOnBody(b) {
    const stream = this.incoming;

    if (stream === null || stream[kSkipPendingData])
        return;

    if (!stream._dumped) {
        const ret = stream.push(b);
        if (!ret)
            readStop(this.socket);
    }
}

function parserOnMessageComplete() {
    const parser = this;
    const stream = parser.incoming;

    if (stream !== null && !stream[kSkipPendingData]) {
        stream.complete = true;
        stream.push(null);
    }
}

1. Issue: https://github.com/nodejs/node/issues/60025

2. PR: https://github.com/nodejs/node/pull/60062

Leave a Comment