deps: update simdutf to 5.2.3

PR-URL: https://github.com/nodejs/node/pull/52381
Refs: https://github.com/nodejs/node/pull/51670
Reviewed-By: Daniel Lemire <daniel@lemire.me>
Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Robert Nagy <ronagy@icloud.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: Filip Skokan <panva.ip@gmail.com>
This commit is contained in:
Yagiz Nizipli 2024-04-04 17:53:41 -04:00 committed by Node.js GitHub Bot
parent 08609b5222
commit cf629366b9
2 changed files with 2204 additions and 959 deletions

3000
deps/simdutf/simdutf.cpp vendored

File diff suppressed because it is too large Load Diff

163
deps/simdutf/simdutf.h vendored
View File

@ -1,4 +1,4 @@
/* auto-generated on 2024-03-18 10:58:28 -0400. Do not edit! */
/* auto-generated on 2024-04-05 16:29:02 -0400. Do not edit! */
/* begin file include/simdutf.h */
#ifndef SIMDUTF_H
#define SIMDUTF_H
@ -566,6 +566,7 @@ enum error_code {
// there must be no surrogate at all (Latin1)
INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid base64 string.
BASE64_INPUT_REMAINDER, // The base64 input terminates with a single character, excluding padding (=).
OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small.
OTHER // Not related to validation/transcoding.
};
@ -573,9 +574,9 @@ struct result {
error_code error;
size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of code units validated/written.
simdutf_really_inline result();
simdutf_really_inline result() : error{error_code::SUCCESS}, count{0} {}
simdutf_really_inline result(error_code, size_t);
simdutf_really_inline result(error_code _err, size_t _pos) : error{_err}, count{_pos} {}
};
}
@ -593,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
#define SIMDUTF_SIMDUTF_VERSION_H
/** The version of simdutf being used (major.minor.revision) */
#define SIMDUTF_VERSION "5.0.0"
#define SIMDUTF_VERSION "5.2.3"
namespace simdutf {
enum {
@ -604,11 +605,11 @@ enum {
/**
* The minor version (major.MINOR.revision) of simdutf being used.
*/
SIMDUTF_VERSION_MINOR = 0,
SIMDUTF_VERSION_MINOR = 2,
/**
* The revision (major.minor.REVISION) of simdutf being used.
*/
SIMDUTF_VERSION_REVISION = 0
SIMDUTF_VERSION_REVISION = 3
};
} // namespace simdutf
@ -2285,6 +2286,12 @@ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t le
*/
simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
// base64_options are used to specify the base64 encoding options.
using base64_options = uint64_t;
enum : base64_options {
base64_default = 0, /* standard base64 format */
base64_url = 1 /* base64url format*/
};
/**
* Provide the maximal binary length in bytes given the base64 input.
@ -2293,10 +2300,21 @@ simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t leng
*
* @param input the base64 input to process
* @param length the length of the base64 input in bytes
* @return number of base64 bytes
* @return maximum number of binary bytes
*/
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char * input, size_t length) noexcept;
/**
* Provide the maximal binary length in bytes given the base64 input.
* In general, if the input contains ASCII spaces, the result will be less than
* the maximum length.
*
* @param input the base64 input to process, in ASCII stored as 16-bit units
* @param length the length of the base64 input in 16-bit units
* @return maximal number of binary bytes
*/
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t * input, size_t length) noexcept;
/**
* Convert a base64 input to a binary ouput.
*
@ -2307,19 +2325,24 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char * input,
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
*
* This function will fail in case of invalid input. There are two possible reasons for
* failure: the input is contains a number of base64 characters that when divided by 4, leaves
* a singler remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
* failure: the input contains a number of base64 characters that when divided by 4, leaves
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
*
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
* @param input the base64 string to process
* @param length the length of the string in bytes
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
*/
simdutf_warn_unused result base64_to_binary(const char * input, size_t length, char* output) noexcept;
simdutf_warn_unused result base64_to_binary(const char * input, size_t length, char* output, base64_options options = base64_default) noexcept;
/**
* Provide the base64 length in bytes given the length of a binary input.
@ -2327,7 +2350,7 @@ simdutf_warn_unused result base64_to_binary(const char * input, size_t length, c
* @param length the length of the input in bytes
* @return number of base64 bytes
*/
simdutf_warn_unused size_t base64_length_from_binary(size_t length) noexcept;
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options = base64_default) noexcept;
/**
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
@ -2338,9 +2361,74 @@ simdutf_warn_unused size_t base64_length_from_binary(size_t length) noexcept;
* @param input the binary to process
* @param length the length of the input in bytes
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return number of written bytes, will be equal to base64_length_from_binary(length)
*/
size_t binary_to_base64(const char * input, size_t length, char* output) noexcept;
size_t binary_to_base64(const char * input, size_t length, char* output, base64_options options = base64_default) noexcept;
/**
* Convert a base64 input to a binary ouput.
*
* This function follows the WHATWG forgiving-base64 format, which means that it will
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
* equal signs at the end) or an unpadded input (without any equal signs at the end).
*
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
*
* This function will fail in case of invalid input. There are two possible reasons for
* failure: the input contains a number of base64 characters that when divided by 4, leaves
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
*
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
* @param input the base64 string to process, in ASCII stored as 16-bit units
* @param length the length of the string in 16-bit units
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.
*/
simdutf_warn_unused result base64_to_binary(const char16_t * input, size_t length, char* output, base64_options options = base64_default) noexcept;
/**
* Convert a base64 input to a binary ouput.
*
* This function follows the WHATWG forgiving-base64 format, which means that it will
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
* equal signs at the end) or an unpadded input (without any equal signs at the end).
*
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
*
* This function will fail in case of invalid input. There are three possible reasons for
* failure: the input contains a number of base64 characters that when divided by 4, leaves
* a single remainder character (BASE64_INPUT_REMAINDER), the input contains a character
* that is not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer
* is too small (OUTPUT_BUFFER_TOO_SMALL).
*
* When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written
* and the number of units processed, see description of the parameters and returned value.
*
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected to discard
* the output.
*
* @param input the base64 string to process, in ASCII stored as 8-bit or 16-bit units
* @param length the length of the string in 8-bit or 16-bit units.
* @param output the pointer to buffer that can hold the conversion result.
* @param outlen the number of bytes that can be written in the output buffer. Upon return, it is modified to reflect how many bytes were written.
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of units processed if successful.
*/
simdutf_warn_unused result base64_to_binary_safe(const char * input, size_t length, char* output, size_t& outlen, base64_options options = base64_default) noexcept;
simdutf_warn_unused result base64_to_binary_safe(const char16_t * input, size_t length, char* output, size_t& outlen, base64_options options = base64_default) noexcept;
/**
* An implementation of simdutf for a particular CPU architecture.
@ -3409,10 +3497,21 @@ public:
*
* @param input the base64 input to process
* @param length the length of the base64 input in bytes
* @return number of base64 bytes
* @return maximal number of binary bytes
*/
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char * input, size_t length) const noexcept = 0;
/**
* Provide the maximal binary length in bytes given the base64 input.
* In general, if the input contains ASCII spaces, the result will be less than
* the maximum length.
*
* @param input the base64 input to process, in ASCII stored as 16-bit units
* @param length the length of the base64 input in 16-bit units
* @return maximal number of binary bytes
*/
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char16_t * input, size_t length) const noexcept = 0;
/**
* Convert a base64 input to a binary ouput.
*
@ -3423,8 +3522,8 @@ public:
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
*
* This function will fail in case of invalid input. There are two possible reasons for
* failure: the input is contains a number of base64 characters that when divided by 4, leaves
* a singler remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
* failure: the input contains a number of base64 characters that when divided by 4, leaves
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
*
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
@ -3433,17 +3532,44 @@ public:
* @param input the base64 string to process
* @param length the length of the string in bytes
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
*/
simdutf_warn_unused virtual result base64_to_binary(const char * input, size_t length, char* output) const noexcept = 0;
simdutf_warn_unused virtual result base64_to_binary(const char * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
/**
* Convert a base64 input to a binary ouput.
*
* This function follows the WHATWG forgiving-base64 format, which means that it will
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
* equal signs at the end) or an unpadded input (without any equal signs at the end).
*
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
*
* This function will fail in case of invalid input. There are two possible reasons for
* failure: the input contains a number of base64 characters that when divided by 4, leaves
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
*
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
* @param input the base64 string to process, in ASCII stored as 16-bit units
* @param length the length of the string in 16-bit units
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.
*/
simdutf_warn_unused virtual result base64_to_binary(const char16_t * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
/**
* Provide the base64 length in bytes given the length of a binary input.
*
* @param length the length of the input in bytes
* @parem options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return number of base64 bytes
*/
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length) const noexcept = 0;
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length, base64_options options = base64_default) const noexcept = 0;
/**
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
@ -3454,9 +3580,10 @@ public:
* @param input the binary to process
* @param length the length of the input in bytes
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
* @return number of written bytes, will be equal to base64_length_from_binary(length)
*/
virtual size_t binary_to_base64(const char * input, size_t length, char* output) const noexcept = 0;
virtual size_t binary_to_base64(const char * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
protected: