mirror of
https://github.com/nodejs/node.git
synced 2025-12-28 07:50:41 +00:00
buffer: use simdutf for atob implementation
Co-authored-by: Daniel Lemire <daniel@lemire.me> PR-URL: https://github.com/nodejs/node/pull/52381 Refs: https://github.com/nodejs/node/pull/51670 Reviewed-By: Daniel Lemire <daniel@lemire.me> Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Filip Skokan <panva.ip@gmail.com>
This commit is contained in:
parent
cf629366b9
commit
6f504b71ac
20
benchmark/buffers/buffer-atob.js
Normal file
20
benchmark/buffers/buffer-atob.js
Normal file
@ -0,0 +1,20 @@
|
||||
'use strict';
|
||||
const common = require('../common.js');
|
||||
const assert = require('node:assert');
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
size: [16, 32, 64, 128],
|
||||
n: [1e6],
|
||||
});
|
||||
|
||||
function main({ n, size }) {
|
||||
const input = btoa('A'.repeat(size));
|
||||
let out = 0;
|
||||
|
||||
bench.start();
|
||||
for (let i = 0; i < n; i++) {
|
||||
out += atob(input).length;
|
||||
}
|
||||
bench.end(n);
|
||||
assert(out > 0);
|
||||
}
|
||||
@ -23,10 +23,8 @@
|
||||
|
||||
const {
|
||||
Array,
|
||||
ArrayFrom,
|
||||
ArrayIsArray,
|
||||
ArrayPrototypeForEach,
|
||||
ArrayPrototypeIndexOf,
|
||||
MathFloor,
|
||||
MathMin,
|
||||
MathTrunc,
|
||||
@ -70,6 +68,7 @@ const {
|
||||
swap64: _swap64,
|
||||
kMaxLength,
|
||||
kStringMaxLength,
|
||||
atob: _atob,
|
||||
} = internalBinding('buffer');
|
||||
const {
|
||||
constants: {
|
||||
@ -1259,85 +1258,26 @@ function btoa(input) {
|
||||
return buf.toString('base64');
|
||||
}
|
||||
|
||||
// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
|
||||
const kForgivingBase64AllowedChars = [
|
||||
// ASCII whitespace
|
||||
// Refs: https://infra.spec.whatwg.org/#ascii-whitespace
|
||||
0x09, 0x0A, 0x0C, 0x0D, 0x20,
|
||||
|
||||
// Uppercase letters
|
||||
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i),
|
||||
|
||||
// Lowercase letters
|
||||
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i),
|
||||
|
||||
// Decimal digits
|
||||
...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i),
|
||||
|
||||
0x2B, // +
|
||||
0x2F, // /
|
||||
0x3D, // =
|
||||
];
|
||||
const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars,
|
||||
0x3D);
|
||||
|
||||
function atob(input) {
|
||||
// The implementation here has not been performance optimized in any way and
|
||||
// should not be.
|
||||
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
|
||||
if (arguments.length === 0) {
|
||||
throw new ERR_MISSING_ARGS('input');
|
||||
}
|
||||
|
||||
input = `${input}`;
|
||||
let nonAsciiWhitespaceCharCount = 0;
|
||||
let equalCharCount = 0;
|
||||
const result = _atob(`${input}`);
|
||||
|
||||
for (let n = 0; n < input.length; n++) {
|
||||
const index = ArrayPrototypeIndexOf(
|
||||
kForgivingBase64AllowedChars,
|
||||
StringPrototypeCharCodeAt(input, n));
|
||||
|
||||
if (index > 4) {
|
||||
// The first 5 elements of `kForgivingBase64AllowedChars` are
|
||||
// ASCII whitespace char codes.
|
||||
nonAsciiWhitespaceCharCount++;
|
||||
|
||||
if (index === kEqualSignIndex) {
|
||||
equalCharCount++;
|
||||
} else if (equalCharCount) {
|
||||
// The `=` char is only allowed at the end.
|
||||
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
|
||||
}
|
||||
|
||||
if (equalCharCount > 2) {
|
||||
// Only one more `=` is permitted after the first equal sign.
|
||||
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
|
||||
}
|
||||
} else if (index === -1) {
|
||||
switch (result) {
|
||||
case -2: // Invalid character
|
||||
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
|
||||
}
|
||||
case -1: // Single character remained
|
||||
throw lazyDOMException(
|
||||
'The string to be decoded is not correctly encoded.',
|
||||
'InvalidCharacterError');
|
||||
case -3: // Possible overflow
|
||||
// TODO(@anonrig): Throw correct error in here.
|
||||
throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError');
|
||||
default:
|
||||
return result;
|
||||
}
|
||||
|
||||
let reminder = nonAsciiWhitespaceCharCount % 4;
|
||||
|
||||
// See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64
|
||||
if (!reminder) {
|
||||
// Remove all trailing `=` characters and get the new reminder.
|
||||
reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4;
|
||||
} else if (equalCharCount) {
|
||||
// `=` should not in the input if there's a reminder.
|
||||
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
|
||||
}
|
||||
|
||||
// See #3 - https://infra.spec.whatwg.org/#forgiving-base64
|
||||
if (reminder === 1) {
|
||||
throw lazyDOMException(
|
||||
'The string to be decoded is not correctly encoded.',
|
||||
'InvalidCharacterError');
|
||||
}
|
||||
|
||||
return Buffer.from(input, 'base64').toString('latin1');
|
||||
}
|
||||
|
||||
function isUtf8(input) {
|
||||
|
||||
@ -67,6 +67,7 @@ using v8::Just;
|
||||
using v8::Local;
|
||||
using v8::Maybe;
|
||||
using v8::MaybeLocal;
|
||||
using v8::NewStringType;
|
||||
using v8::Nothing;
|
||||
using v8::Number;
|
||||
using v8::Object;
|
||||
@ -1210,6 +1211,61 @@ void DetachArrayBuffer(const FunctionCallbackInfo<Value>& args) {
|
||||
}
|
||||
}
|
||||
|
||||
// In case of success, the decoded string is returned.
|
||||
// In case of error, a negative value is returned:
|
||||
// * -1 indicates a single character remained,
|
||||
// * -2 indicates an invalid character,
|
||||
// * -3 indicates a possible overflow (i.e., more than 2 GB output).
|
||||
static void Atob(const FunctionCallbackInfo<Value>& args) {
|
||||
CHECK_EQ(args.Length(), 1);
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
THROW_AND_RETURN_IF_NOT_STRING(env, args[0], "argument");
|
||||
|
||||
Local<String> input = args[0].As<String>();
|
||||
MaybeStackBuffer<char> buffer;
|
||||
simdutf::result result;
|
||||
|
||||
if (input->IsExternalOneByte()) { // 8-bit case
|
||||
auto ext = input->GetExternalOneByteStringResource();
|
||||
size_t expected_length =
|
||||
simdutf::maximal_binary_length_from_base64(ext->data(), ext->length());
|
||||
buffer.AllocateSufficientStorage(expected_length + 1);
|
||||
buffer.SetLengthAndZeroTerminate(expected_length);
|
||||
result = simdutf::base64_to_binary(
|
||||
ext->data(), ext->length(), buffer.out(), simdutf::base64_default);
|
||||
} else { // 16-bit case
|
||||
String::Value value(env->isolate(), input);
|
||||
auto data = reinterpret_cast<const char16_t*>(*value);
|
||||
size_t expected_length =
|
||||
simdutf::maximal_binary_length_from_base64(data, value.length());
|
||||
buffer.AllocateSufficientStorage(expected_length + 1);
|
||||
buffer.SetLengthAndZeroTerminate(expected_length);
|
||||
result = simdutf::base64_to_binary(
|
||||
data, value.length(), buffer.out(), simdutf::base64_default);
|
||||
}
|
||||
|
||||
if (result.error == simdutf::error_code::SUCCESS) {
|
||||
auto value =
|
||||
String::NewFromOneByte(env->isolate(),
|
||||
reinterpret_cast<const uint8_t*>(buffer.out()),
|
||||
NewStringType::kNormal,
|
||||
result.count)
|
||||
.ToLocalChecked();
|
||||
return args.GetReturnValue().Set(value);
|
||||
}
|
||||
|
||||
// Default value is: "possible overflow"
|
||||
int32_t error_code = -3;
|
||||
|
||||
if (result.error == simdutf::error_code::INVALID_BASE64_CHARACTER) {
|
||||
error_code = -2;
|
||||
} else if (result.error == simdutf::error_code::BASE64_INPUT_REMAINDER) {
|
||||
error_code = -1;
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(error_code);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::pair<void*, size_t> DecomposeBufferToParts(Local<Value> buffer) {
|
||||
@ -1272,6 +1328,8 @@ void Initialize(Local<Object> target,
|
||||
Environment* env = Environment::GetCurrent(context);
|
||||
Isolate* isolate = env->isolate();
|
||||
|
||||
SetMethodNoSideEffect(context, target, "atob", Atob);
|
||||
|
||||
SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
|
||||
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
|
||||
|
||||
@ -1373,6 +1431,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
|
||||
|
||||
registry->Register(DetachArrayBuffer);
|
||||
registry->Register(CopyArrayBuffer);
|
||||
|
||||
registry->Register(Atob);
|
||||
}
|
||||
|
||||
} // namespace Buffer
|
||||
|
||||
Loading…
Reference in New Issue
Block a user