mirror of
https://github.com/nodejs/node.git
synced 2025-12-28 16:07:39 +00:00
src: remove icu based ToASCII and ToUnicode
Some checks are pending
Coverage Linux (without intl) / coverage-linux-without-intl (push) Waiting to run
Coverage Linux / coverage-linux (push) Waiting to run
Coverage Windows / coverage-windows (push) Waiting to run
Test and upload documentation to artifacts / build-docs (push) Waiting to run
Linters / lint-addon-docs (push) Waiting to run
Linters / lint-cpp (push) Waiting to run
Linters / format-cpp (push) Waiting to run
Linters / lint-js-and-md (push) Waiting to run
Linters / lint-py (push) Waiting to run
Linters / lint-yaml (push) Waiting to run
Linters / lint-sh (push) Waiting to run
Linters / lint-codeowners (push) Waiting to run
Linters / lint-pr-url (push) Waiting to run
Linters / lint-readme (push) Waiting to run
Notify on Push / Notify on Force Push on `main` (push) Waiting to run
Notify on Push / Notify on Push on `main` that lacks metadata (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
Some checks are pending
Coverage Linux (without intl) / coverage-linux-without-intl (push) Waiting to run
Coverage Linux / coverage-linux (push) Waiting to run
Coverage Windows / coverage-windows (push) Waiting to run
Test and upload documentation to artifacts / build-docs (push) Waiting to run
Linters / lint-addon-docs (push) Waiting to run
Linters / lint-cpp (push) Waiting to run
Linters / format-cpp (push) Waiting to run
Linters / lint-js-and-md (push) Waiting to run
Linters / lint-py (push) Waiting to run
Linters / lint-yaml (push) Waiting to run
Linters / lint-sh (push) Waiting to run
Linters / lint-codeowners (push) Waiting to run
Linters / lint-pr-url (push) Waiting to run
Linters / lint-readme (push) Waiting to run
Notify on Push / Notify on Force Push on `main` (push) Waiting to run
Notify on Push / Notify on Push on `main` that lacks metadata (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run
PR-URL: https://github.com/nodejs/node/pull/55156 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Matthew Aitken <maitken033380023@gmail.com> Reviewed-By: Daniel Lemire <daniel@lemire.me> Reviewed-By: Richard Lau <rlau@redhat.com>
This commit is contained in:
parent
7c0cc12f0b
commit
9f5000e0f2
172
src/node_i18n.cc
172
src/node_i18n.cc
@ -60,19 +60,17 @@
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/uclean.h>
|
||||
#include <unicode/ucnv.h>
|
||||
#include <unicode/udata.h>
|
||||
#include <unicode/uidna.h>
|
||||
#include <unicode/ulocdata.h>
|
||||
#include <unicode/urename.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf16.h>
|
||||
#include <unicode/utf8.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/uvernum.h>
|
||||
#include <unicode/uversion.h>
|
||||
#include "nbytes.h"
|
||||
|
||||
#ifdef NODE_HAVE_SMALL_ICU
|
||||
#include <unicode/udata.h>
|
||||
|
||||
/* if this is defined, we have a 'secondary' entry point.
|
||||
compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
|
||||
#define SMALL_ICUDATA_ENTRY_POINT \
|
||||
@ -96,7 +94,6 @@ using v8::Int32;
|
||||
using v8::Isolate;
|
||||
using v8::Local;
|
||||
using v8::MaybeLocal;
|
||||
using v8::NewStringType;
|
||||
using v8::Object;
|
||||
using v8::ObjectTemplate;
|
||||
using v8::String;
|
||||
@ -583,167 +580,6 @@ void SetDefaultTimeZone(const char* tzid) {
|
||||
CHECK(U_SUCCESS(status));
|
||||
}
|
||||
|
||||
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
||||
const char* input,
|
||||
size_t length) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
|
||||
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||
if (U_FAILURE(status))
|
||||
return -1;
|
||||
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||
|
||||
int32_t len = uidna_nameToUnicodeUTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->capacity(),
|
||||
&info,
|
||||
&status);
|
||||
|
||||
// Do not check info.errors like we do with ToASCII since ToUnicode always
|
||||
// returns a string, despite any possible errors that may have occurred.
|
||||
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
buf->AllocateSufficientStorage(len);
|
||||
len = uidna_nameToUnicodeUTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->capacity(),
|
||||
&info,
|
||||
&status);
|
||||
}
|
||||
|
||||
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
|
||||
// string, regardless of whether an error occurred.
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
len = -1;
|
||||
buf->SetLength(0);
|
||||
} else {
|
||||
buf->SetLength(len);
|
||||
}
|
||||
|
||||
uidna_close(uidna);
|
||||
return len;
|
||||
}
|
||||
|
||||
int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
||||
const char* input,
|
||||
size_t length,
|
||||
idna_mode mode) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t options = // CheckHyphens = false; handled later
|
||||
UIDNA_CHECK_BIDI | // CheckBidi = true
|
||||
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
|
||||
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
|
||||
if (mode == idna_mode::kStrict) {
|
||||
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
|
||||
// VerifyDnsLength = beStrict;
|
||||
// handled later
|
||||
}
|
||||
|
||||
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||
if (U_FAILURE(status))
|
||||
return -1;
|
||||
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||
|
||||
int32_t len = uidna_nameToASCII_UTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->capacity(),
|
||||
&info,
|
||||
&status);
|
||||
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
buf->AllocateSufficientStorage(len);
|
||||
len = uidna_nameToASCII_UTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->capacity(),
|
||||
&info,
|
||||
&status);
|
||||
}
|
||||
|
||||
// In UTS #46 which specifies ToASCII, certain error conditions are
|
||||
// configurable through options, and the WHATWG URL Standard promptly elects
|
||||
// to disable some of them to accommodate for real-world use cases.
|
||||
// Unfortunately, ICU4C's IDNA module does not support disabling some of
|
||||
// these options through `options` above, and thus continues throwing
|
||||
// unnecessary errors. To counter this situation, we just filter out the
|
||||
// errors that may have happened afterwards, before deciding whether to
|
||||
// return an error from this function.
|
||||
|
||||
// CheckHyphens = false
|
||||
// (Specified in the current UTS #46 draft rev. 18.)
|
||||
// Refs:
|
||||
// - https://github.com/whatwg/url/issues/53
|
||||
// - https://github.com/whatwg/url/pull/309
|
||||
// - http://www.unicode.org/review/pri317/
|
||||
// - http://www.unicode.org/reports/tr46/tr46-18.html
|
||||
// - https://www.icann.org/news/announcement-2000-01-07-en
|
||||
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
|
||||
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
|
||||
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
|
||||
|
||||
if (mode != idna_mode::kStrict) {
|
||||
// VerifyDnsLength = beStrict
|
||||
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
|
||||
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
|
||||
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) {
|
||||
len = -1;
|
||||
buf->SetLength(0);
|
||||
} else {
|
||||
buf->SetLength(len);
|
||||
}
|
||||
|
||||
uidna_close(uidna);
|
||||
return len;
|
||||
}
|
||||
|
||||
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
CHECK_GE(args.Length(), 1);
|
||||
CHECK(args[0]->IsString());
|
||||
Utf8Value val(env->isolate(), args[0]);
|
||||
|
||||
MaybeStackBuffer<char> buf;
|
||||
int32_t len = ToUnicode(&buf, *val, val.length());
|
||||
|
||||
if (len < 0) {
|
||||
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(
|
||||
String::NewFromUtf8(env->isolate(),
|
||||
*buf,
|
||||
NewStringType::kNormal,
|
||||
len).ToLocalChecked());
|
||||
}
|
||||
|
||||
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
CHECK_GE(args.Length(), 1);
|
||||
CHECK(args[0]->IsString());
|
||||
Utf8Value val(env->isolate(), args[0]);
|
||||
// optional arg
|
||||
bool lenient = args[1]->BooleanValue(env->isolate());
|
||||
idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault;
|
||||
|
||||
MaybeStackBuffer<char> buf;
|
||||
int32_t len = ToASCII(&buf, *val, val.length(), mode);
|
||||
|
||||
if (len < 0) {
|
||||
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(
|
||||
String::NewFromUtf8(env->isolate(),
|
||||
*buf,
|
||||
NewStringType::kNormal,
|
||||
len).ToLocalChecked());
|
||||
}
|
||||
|
||||
// This is similar to wcwidth except that it takes the current unicode
|
||||
// character properties database into consideration, allowing it to
|
||||
// correctly calculate the column widths of things like emoji's and
|
||||
@ -850,8 +686,6 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data,
|
||||
Local<ObjectTemplate> target) {
|
||||
Isolate* isolate = isolate_data->isolate();
|
||||
|
||||
SetMethod(isolate, target, "toUnicode", ToUnicode);
|
||||
SetMethod(isolate, target, "toASCII", ToASCII);
|
||||
SetMethod(isolate, target, "getStringWidth", GetStringWidth);
|
||||
|
||||
// One-shot converters
|
||||
@ -880,8 +714,6 @@ void CreatePerContextProperties(Local<Object> target,
|
||||
void* priv) {}
|
||||
|
||||
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
|
||||
registry->Register(ToUnicode);
|
||||
registry->Register(ToASCII);
|
||||
registry->Register(GetStringWidth);
|
||||
registry->Register(ICUErrorName);
|
||||
registry->Register(Transcode);
|
||||
|
||||
@ -53,19 +53,6 @@ enum class idna_mode {
|
||||
kStrict
|
||||
};
|
||||
|
||||
// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
|
||||
// https://url.spec.whatwg.org/#concept-domain-to-ascii
|
||||
int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
||||
const char* input,
|
||||
size_t length,
|
||||
idna_mode mode = idna_mode::kDefault);
|
||||
|
||||
// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
|
||||
// https://url.spec.whatwg.org/#concept-domain-to-unicode
|
||||
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
||||
const char* input,
|
||||
size_t length);
|
||||
|
||||
struct ConverterDeleter {
|
||||
void operator()(UConverter* pointer) const { ucnv_close(pointer); }
|
||||
};
|
||||
|
||||
149
test/fixtures/icu-punycode-toascii.json
vendored
149
test/fixtures/icu-punycode-toascii.json
vendored
@ -1,149 +0,0 @@
|
||||
[
|
||||
"This resource is focused on highlighting issues with UTS #46 ToASCII",
|
||||
{
|
||||
"comment": "Label with hyphens in 3rd and 4th position",
|
||||
"input": "aa--",
|
||||
"output": "aa--"
|
||||
},
|
||||
{
|
||||
"input": "a†--",
|
||||
"output": "xn--a---kp0a"
|
||||
},
|
||||
{
|
||||
"input": "ab--c",
|
||||
"output": "ab--c"
|
||||
},
|
||||
{
|
||||
"comment": "Label with leading hyphen",
|
||||
"input": "-x",
|
||||
"output": "-x"
|
||||
},
|
||||
{
|
||||
"input": "-†",
|
||||
"output": "xn----xhn"
|
||||
},
|
||||
{
|
||||
"input": "-x.xn--nxa",
|
||||
"output": "-x.xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "-x.β",
|
||||
"output": "-x.xn--nxa"
|
||||
},
|
||||
{
|
||||
"comment": "Label with trailing hyphen",
|
||||
"input": "x-.xn--nxa",
|
||||
"output": "x-.xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "x-.β",
|
||||
"output": "x-.xn--nxa"
|
||||
},
|
||||
{
|
||||
"comment": "Empty labels",
|
||||
"input": "x..xn--nxa",
|
||||
"output": "x..xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "x..β",
|
||||
"output": "x..xn--nxa"
|
||||
},
|
||||
{
|
||||
"comment": "Invalid Punycode",
|
||||
"input": "xn--a",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"input": "xn--a.xn--nxa",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"input": "xn--a.β",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"comment": "Valid Punycode",
|
||||
"input": "xn--nxa.xn--nxa",
|
||||
"output": "xn--nxa.xn--nxa"
|
||||
},
|
||||
{
|
||||
"comment": "Mixed",
|
||||
"input": "xn--nxa.β",
|
||||
"output": "xn--nxa.xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "ab--c.xn--nxa",
|
||||
"output": "ab--c.xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "ab--c.β",
|
||||
"output": "ab--c.xn--nxa"
|
||||
},
|
||||
{
|
||||
"comment": "CheckJoiners is true",
|
||||
"input": "\u200D.example",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"input": "xn--1ug.example",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"comment": "CheckBidi is true",
|
||||
"input": "يa",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"input": "xn--a-yoc",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"comment": "processing_option is Nontransitional_Processing",
|
||||
"input": "ශ්රී",
|
||||
"output": "xn--10cl1a0b660p"
|
||||
},
|
||||
{
|
||||
"input": "نامهای",
|
||||
"output": "xn--mgba3gch31f060k"
|
||||
},
|
||||
{
|
||||
"comment": "U+FFFD",
|
||||
"input": "\uFFFD.com",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"comment": "U+FFFD character encoded in Punycode",
|
||||
"input": "xn--zn7c.com",
|
||||
"output": null
|
||||
},
|
||||
{
|
||||
"comment": "Label longer than 63 code points",
|
||||
"input": "x01234567890123456789012345678901234567890123456789012345678901x",
|
||||
"output": "x01234567890123456789012345678901234567890123456789012345678901x"
|
||||
},
|
||||
{
|
||||
"input": "x01234567890123456789012345678901234567890123456789012345678901†",
|
||||
"output": "xn--x01234567890123456789012345678901234567890123456789012345678901-6963b"
|
||||
},
|
||||
{
|
||||
"input": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa",
|
||||
"output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "x01234567890123456789012345678901234567890123456789012345678901x.β",
|
||||
"output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa"
|
||||
},
|
||||
{
|
||||
"comment": "Domain excluding TLD longer than 253 code points",
|
||||
"input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x",
|
||||
"output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x"
|
||||
},
|
||||
{
|
||||
"input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa",
|
||||
"output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa"
|
||||
},
|
||||
{
|
||||
"input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.β",
|
||||
"output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa"
|
||||
}
|
||||
]
|
||||
@ -1,57 +0,0 @@
|
||||
'use strict';
|
||||
// Flags: --expose-internals
|
||||
const common = require('../common');
|
||||
|
||||
if (!common.hasIntl)
|
||||
common.skip('missing Intl');
|
||||
|
||||
const { internalBinding } = require('internal/test/binding');
|
||||
const icu = internalBinding('icu');
|
||||
const assert = require('assert');
|
||||
|
||||
// Test hasConverter method
|
||||
assert(icu.hasConverter('utf-8'),
|
||||
'hasConverter should report converter exists for utf-8');
|
||||
assert(!icu.hasConverter('x'),
|
||||
'hasConverter should report converter does not exist for x');
|
||||
|
||||
const tests = require('../fixtures/url-idna.js');
|
||||
const fixtures = require('../fixtures/icu-punycode-toascii.json');
|
||||
|
||||
{
|
||||
for (const [i, { ascii, unicode }] of tests.entries()) {
|
||||
assert.strictEqual(ascii, icu.toASCII(unicode), `toASCII(${i + 1})`);
|
||||
assert.strictEqual(unicode, icu.toUnicode(ascii), `toUnicode(${i + 1})`);
|
||||
assert.strictEqual(ascii, icu.toASCII(icu.toUnicode(ascii)),
|
||||
`toASCII(toUnicode(${i + 1}))`);
|
||||
assert.strictEqual(unicode, icu.toUnicode(icu.toASCII(unicode)),
|
||||
`toUnicode(toASCII(${i + 1}))`);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
for (const [i, test] of fixtures.entries()) {
|
||||
if (typeof test === 'string')
|
||||
continue; // skip comments
|
||||
const { comment, input, output } = test;
|
||||
let caseComment = `case ${i + 1}`;
|
||||
if (comment)
|
||||
caseComment += ` (${comment})`;
|
||||
if (output === null) {
|
||||
assert.throws(
|
||||
() => icu.toASCII(input),
|
||||
{
|
||||
code: 'ERR_INVALID_ARG_VALUE',
|
||||
name: 'TypeError',
|
||||
message: 'Cannot convert name to ASCII'
|
||||
}
|
||||
);
|
||||
icu.toASCII(input, true); // Should not throw.
|
||||
} else {
|
||||
assert.strictEqual(icu.toASCII(input), output, `ToASCII ${caseComment}`);
|
||||
assert.strictEqual(icu.toASCII(input, true), output,
|
||||
`ToASCII ${caseComment} in lenient mode`);
|
||||
}
|
||||
icu.toUnicode(input); // Should not throw.
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user