mirror of
https://github.com/nodejs/node.git
synced 2025-12-28 07:50:41 +00:00
src: improve buffer.transcode performance
PR-URL: https://github.com/nodejs/node/pull/54153 Reviewed-By: Daniel Lemire <daniel@lemire.me> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Minwoo Jung <nodecorelab@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
ef4bdbfb76
commit
7c76fa0aee
35
benchmark/buffers/buffer-transcode.js
Normal file
35
benchmark/buffers/buffer-transcode.js
Normal file
@ -0,0 +1,35 @@
|
||||
'use strict';
|
||||
const common = require('../common.js');
|
||||
const assert = require('node:assert');
|
||||
const buffer = require('node:buffer');
|
||||
|
||||
const hasIntl = !!process.config.variables.v8_enable_i18n_support;
|
||||
const encodings = ['latin1', 'ascii', 'ucs2', 'utf8'];
|
||||
|
||||
if (!hasIntl) {
|
||||
console.log('Skipping: `transcode` is only available on platforms that support i18n`');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
fromEncoding: encodings,
|
||||
toEncoding: encodings,
|
||||
length: [1, 10, 1000],
|
||||
n: [1e5],
|
||||
}, {
|
||||
combinationFilter(p) {
|
||||
return !(p.fromEncoding === 'ucs2' && p.toEncoding === 'utf8');
|
||||
},
|
||||
});
|
||||
|
||||
function main({ n, fromEncoding, toEncoding, length }) {
|
||||
const input = Buffer.from('a'.repeat(length));
|
||||
let out = 0;
|
||||
bench.start();
|
||||
for (let i = 0; i < n; i++) {
|
||||
const dest = buffer.transcode(input, fromEncoding, toEncoding);
|
||||
out += dest.buffer.byteLength;
|
||||
}
|
||||
bench.end(n);
|
||||
assert.ok(out >= 0);
|
||||
}
|
||||
107
src/node_i18n.cc
107
src/node_i18n.cc
@ -42,6 +42,7 @@
|
||||
|
||||
#include "node_i18n.h"
|
||||
#include "node_external_reference.h"
|
||||
#include "simdutf.h"
|
||||
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
|
||||
@ -147,7 +148,6 @@ MaybeLocal<Object> Transcode(Environment* env,
|
||||
const char* source,
|
||||
const size_t source_length,
|
||||
UErrorCode* status) {
|
||||
*status = U_ZERO_ERROR;
|
||||
MaybeLocal<Object> ret;
|
||||
MaybeStackBuffer<char> result;
|
||||
Converter to(toEncoding);
|
||||
@ -170,22 +170,21 @@ MaybeLocal<Object> Transcode(Environment* env,
|
||||
return ret;
|
||||
}
|
||||
|
||||
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
|
||||
const char* fromEncoding,
|
||||
const char* toEncoding,
|
||||
const char* source,
|
||||
const size_t source_length,
|
||||
UErrorCode* status) {
|
||||
*status = U_ZERO_ERROR;
|
||||
MaybeLocal<Object> ret;
|
||||
MaybeLocal<Object> TranscodeLatin1ToUcs2(Environment* env,
|
||||
const char* fromEncoding,
|
||||
const char* toEncoding,
|
||||
const char* source,
|
||||
const size_t source_length,
|
||||
UErrorCode* status) {
|
||||
MaybeStackBuffer<UChar> destbuf(source_length);
|
||||
Converter from(fromEncoding);
|
||||
const size_t length_in_chars = source_length * sizeof(UChar);
|
||||
ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
|
||||
source, source_length, status);
|
||||
if (U_SUCCESS(*status))
|
||||
ret = ToBufferEndian(env, &destbuf);
|
||||
return ret;
|
||||
auto actual_length =
|
||||
simdutf::convert_latin1_to_utf16le(source, source_length, destbuf.out());
|
||||
if (actual_length == 0) {
|
||||
*status = U_INVALID_CHAR_FOUND;
|
||||
return {};
|
||||
}
|
||||
|
||||
return Buffer::New(env, &destbuf);
|
||||
}
|
||||
|
||||
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
|
||||
@ -194,13 +193,11 @@ MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
|
||||
const char* source,
|
||||
const size_t source_length,
|
||||
UErrorCode* status) {
|
||||
*status = U_ZERO_ERROR;
|
||||
MaybeStackBuffer<UChar> sourcebuf;
|
||||
MaybeLocal<Object> ret;
|
||||
Converter to(toEncoding);
|
||||
|
||||
size_t sublen = ucnv_getMinCharSize(to.conv());
|
||||
std::string sub(sublen, '?');
|
||||
std::string sub(to.min_char_size(), '?');
|
||||
to.set_subst_chars(sub.c_str());
|
||||
|
||||
const size_t length_in_chars = source_length / sizeof(UChar);
|
||||
@ -221,26 +218,18 @@ MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
|
||||
const char* source,
|
||||
const size_t source_length,
|
||||
UErrorCode* status) {
|
||||
*status = U_ZERO_ERROR;
|
||||
MaybeStackBuffer<UChar> destbuf;
|
||||
int32_t result_length;
|
||||
u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
|
||||
source, source_length, status);
|
||||
MaybeLocal<Object> ret;
|
||||
if (U_SUCCESS(*status)) {
|
||||
destbuf.SetLength(result_length);
|
||||
ret = ToBufferEndian(env, &destbuf);
|
||||
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*status = U_ZERO_ERROR;
|
||||
destbuf.AllocateSufficientStorage(result_length);
|
||||
u_strFromUTF8(*destbuf, result_length, &result_length,
|
||||
source, source_length, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
destbuf.SetLength(result_length);
|
||||
ret = ToBufferEndian(env, &destbuf);
|
||||
}
|
||||
size_t expected_utf16_length =
|
||||
simdutf::utf16_length_from_utf8(source, source_length);
|
||||
MaybeStackBuffer<UChar> destbuf(expected_utf16_length);
|
||||
auto actual_length =
|
||||
simdutf::convert_utf8_to_utf16le(source, source_length, destbuf.out());
|
||||
|
||||
if (actual_length == 0) {
|
||||
*status = U_INVALID_CHAR_FOUND;
|
||||
return {};
|
||||
}
|
||||
return ret;
|
||||
|
||||
return Buffer::New(env, &destbuf);
|
||||
}
|
||||
|
||||
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
|
||||
@ -249,32 +238,25 @@ MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
|
||||
const char* source,
|
||||
const size_t source_length,
|
||||
UErrorCode* status) {
|
||||
*status = U_ZERO_ERROR;
|
||||
MaybeLocal<Object> ret;
|
||||
const size_t length_in_chars = source_length / sizeof(UChar);
|
||||
int32_t result_length;
|
||||
MaybeStackBuffer<UChar> sourcebuf;
|
||||
MaybeStackBuffer<char> destbuf;
|
||||
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
|
||||
u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
|
||||
*sourcebuf, length_in_chars, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
destbuf.SetLength(result_length);
|
||||
ret = ToBufferEndian(env, &destbuf);
|
||||
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*status = U_ZERO_ERROR;
|
||||
destbuf.AllocateSufficientStorage(result_length);
|
||||
u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
|
||||
length_in_chars, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
destbuf.SetLength(result_length);
|
||||
ret = ToBufferEndian(env, &destbuf);
|
||||
}
|
||||
size_t expected_utf8_length = simdutf::utf8_length_from_utf16le(
|
||||
reinterpret_cast<const char16_t*>(source), length_in_chars);
|
||||
|
||||
MaybeStackBuffer<char> destbuf(expected_utf8_length);
|
||||
auto actual_length = simdutf::convert_utf16le_to_utf8(
|
||||
reinterpret_cast<const char16_t*>(source),
|
||||
length_in_chars,
|
||||
destbuf.out());
|
||||
|
||||
if (actual_length == 0) {
|
||||
*status = U_INVALID_CHAR_FOUND;
|
||||
return {};
|
||||
}
|
||||
return ret;
|
||||
|
||||
return Buffer::New(env, &destbuf);
|
||||
}
|
||||
|
||||
const char* EncodingName(const enum encoding encoding) {
|
||||
constexpr const char* EncodingName(const enum encoding encoding) {
|
||||
switch (encoding) {
|
||||
case ASCII: return "us-ascii";
|
||||
case LATIN1: return "iso8859-1";
|
||||
@ -284,7 +266,7 @@ const char* EncodingName(const enum encoding encoding) {
|
||||
}
|
||||
}
|
||||
|
||||
bool SupportedEncoding(const enum encoding encoding) {
|
||||
constexpr bool SupportedEncoding(const enum encoding encoding) {
|
||||
switch (encoding) {
|
||||
case ASCII:
|
||||
case LATIN1:
|
||||
@ -309,8 +291,7 @@ void Transcode(const FunctionCallbackInfo<Value>&args) {
|
||||
switch (fromEncoding) {
|
||||
case ASCII:
|
||||
case LATIN1:
|
||||
if (toEncoding == UCS2)
|
||||
tfn = &TranscodeToUcs2;
|
||||
if (toEncoding == UCS2) tfn = &TranscodeLatin1ToUcs2;
|
||||
break;
|
||||
case UTF8:
|
||||
if (toEncoding == UCS2)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user