util: improve unicode support

The array grouping function relies on the width of the characters. It was not calculated correct so far, since it used the string length instead. This improves the unicode output by calculating the mono-spaced font width (other fonts might differ). PR-URL: https://github.com/nodejs/node/pull/31319 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Steven R Loomis <srloomis@us.ibm.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Minwoo Jung <nodecorelab@gmail.com>
2025-12-28 16:07:39 +00:00 · 2020-01-11 19:48:40 +01:00 · 2020-01-11 19:48:40 +01:00 · 8fb5fe28a4
commit 8fb5fe28a4
parent 2606e1ed25
11 changed files with 211 additions and 192 deletions
--- a/lib/internal/cli_table.js
+++ b/lib/internal/cli_table.js
@ -6,7 +6,7 @@ const {
  ObjectPrototypeHasOwnProperty,
 } = primordials;

-const { getStringWidth } = require('internal/readline/utils');
+const { getStringWidth } = require('internal/util/inspect');

 // The use of Unicode characters below is the only non-comment use of non-ASCII
 // Unicode characters in Node.js built-in modules. If they are ever removed or
--- a/lib/internal/readline/utils.js
+++ b/lib/internal/readline/utils.js
@ -1,25 +1,13 @@
 'use strict';

 const {
-  RegExp,
  Symbol,
 } = primordials;

-// Regex used for ansi escape code splitting
-// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
-// License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore
-// Matches all ansi escape code sequences in a string
-const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' +
-  '(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' +
-  '|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))';
-const ansi = new RegExp(ansiPattern, 'g');
-
 const kUTF16SurrogateThreshold = 0x10000; // 2 ** 16
 const kEscape = '\x1b';
 const kSubstringSearch = Symbol('kSubstringSearch');

-let getStringWidth;
-
 function CSI(strings, ...args) {
  let ret = `${kEscape}[`;
  for (let n = 0; n < strings.length; n++) {
@ -59,109 +47,6 @@ function charLengthAt(str, i) {
  return str.codePointAt(i) >= kUTF16SurrogateThreshold ? 2 : 1;
 }

-if (internalBinding('config').hasIntl) {
-  const icu = internalBinding('icu');
-  // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
-  // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
-  // TODO(BridgeAR): Expose the options to the user. That is probably the
-  // best thing possible at the moment, since it's difficult to know what
-  // the receiving end supports.
-  getStringWidth = function getStringWidth(str) {
-    let width = 0;
-    str = stripVTControlCharacters(str);
-    for (let i = 0; i < str.length; i++) {
-      // Try to avoid calling into C++ by first handling the ASCII portion of
-      // the string. If it is fully ASCII, we skip the C++ part.
-      const code = str.charCodeAt(i);
-      if (code >= 127) {
-        width += icu.getStringWidth(str.slice(i));
-        break;
-      }
-      width += code >= 32 ? 1 : 0;
-    }
-    return width;
-  };
-} else {
-  /**
-   * Returns the number of columns required to display the given string.
-   */
-  getStringWidth = function getStringWidth(str) {
-    let width = 0;
-
-    str = stripVTControlCharacters(str);
-
-    for (const char of str) {
-      const code = char.codePointAt(0);
-      if (isFullWidthCodePoint(code)) {
-        width += 2;
-      } else if (!isZeroWidthCodePoint(code)) {
-        width++;
-      }
-    }
-
-    return width;
-  };
-
-  /**
-   * Returns true if the character represented by a given
-   * Unicode code point is full-width. Otherwise returns false.
-   */
-  const isFullWidthCodePoint = (code) => {
-    // Code points are partially derived from:
-    // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
-    return code >= 0x1100 && (
-      code <= 0x115f ||  // Hangul Jamo
-      code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
-      code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
-      // CJK Radicals Supplement .. Enclosed CJK Letters and Months
-      (code >= 0x2e80 && code <= 0x3247 && code !== 0x303f) ||
-      // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
-      (code >= 0x3250 && code <= 0x4dbf) ||
-      // CJK Unified Ideographs .. Yi Radicals
-      (code >= 0x4e00 && code <= 0xa4c6) ||
-      // Hangul Jamo Extended-A
-      (code >= 0xa960 && code <= 0xa97c) ||
-      // Hangul Syllables
-      (code >= 0xac00 && code <= 0xd7a3) ||
-      // CJK Compatibility Ideographs
-      (code >= 0xf900 && code <= 0xfaff) ||
-      // Vertical Forms
-      (code >= 0xfe10 && code <= 0xfe19) ||
-      // CJK Compatibility Forms .. Small Form Variants
-      (code >= 0xfe30 && code <= 0xfe6b) ||
-      // Halfwidth and Fullwidth Forms
-      (code >= 0xff01 && code <= 0xff60) ||
-      (code >= 0xffe0 && code <= 0xffe6) ||
-      // Kana Supplement
-      (code >= 0x1b000 && code <= 0x1b001) ||
-      // Enclosed Ideographic Supplement
-      (code >= 0x1f200 && code <= 0x1f251) ||
-      // Miscellaneous Symbols and Pictographs .. Emoticons
-      (code >= 0x1f300 && code <= 0x1f64f) ||
-      // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
-      (code >= 0x20000 && code <= 0x3fffd)
-    );
-  };
-
-  const isZeroWidthCodePoint = (code) => {
-    return code <= 0x1F || // C0 control codes
-      (code > 0x7F && code <= 0x9F) || // C1 control codes
-      (code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks
-      (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
-      (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
-      (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
-      (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
-  };
-}
-
-/**
- * Tries to remove all VT control characters. Use to estimate displayed
- * string width. May be buggy due to not running a real state machine
- */
-function stripVTControlCharacters(str) {
-  return str.replace(ansi, '');
-}
-
 /*
  Some patterns seen in terminal key escape codes, derived from combos seen
  at http://www.midnight-commander.org/browser/lib/tty/key.c
@ -477,8 +362,6 @@ module.exports = {
  charLengthLeft,
  commonPrefix,
  emitKeys,
-  getStringWidth,
  kSubstringSearch,
-  stripVTControlCharacters,
  CSI
 };
--- a/lib/internal/repl/utils.js
+++ b/lib/internal/repl/utils.js
@ -32,11 +32,13 @@ const {

 const {
  commonPrefix,
-  getStringWidth,
  kSubstringSearch,
 } = require('internal/readline/utils');

-const { inspect } = require('util');
+const {
+  getStringWidth,
+  inspect,
+} = require('internal/util/inspect');

 const debug = require('internal/util/debuglog').debuglog('repl');

--- a/lib/internal/util/inspect.js
+++ b/lib/internal/util/inspect.js
@ -192,6 +192,17 @@ const meta = [
  '\\x98', '\\x99', '\\x9A', '\\x9B', '\\x9C', '\\x9D', '\\x9E', '\\x9F', // x9F
 ];

+// Regex used for ansi escape code splitting
+// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
+// License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore
+// Matches all ansi escape code sequences in a string
+const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' +
+  '(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' +
+  '|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))';
+const ansi = new RegExp(ansiPattern, 'g');
+
+let getStringWidth;
+
 function getUserOptions(ctx) {
  return {
    stylize: ctx.stylize,
@ -1154,7 +1165,7 @@ function groupArrayElements(ctx, output, value) {
  // entries length of all output entries. We have to remove colors first,
  // otherwise the length would not be calculated properly.
  for (; i < outputLength; i++) {
-    const len = ctx.colors ? removeColors(output[i]).length : output[i].length;
+    const len = getStringWidth(output[i], ctx.colors);
    dataLen[i] = len;
    totalLength += len + separatorSpace;
    if (maxLength < len)
@ -1197,8 +1208,6 @@ function groupArrayElements(ctx, output, value) {
    if (columns <= 1) {
      return output;
    }
-    // TODO(BridgeAR): Add unicode support. Use the readline getStringWidth
-    // function.
    const tmp = [];
    const maxLineLength = [];
    for (let i = 0; i < columns; i++) {
@ -1565,11 +1574,8 @@ function formatProperty(ctx, value, recurseTimes, key, type, desc) {
    const diff = (ctx.compact !== true || type !== kObjectType) ? 2 : 3;
    ctx.indentationLvl += diff;
    str = formatValue(ctx, desc.value, recurseTimes);
-    if (diff === 3) {
-      const len = ctx.colors ? removeColors(str).length : str.length;
-      if (ctx.breakLength < len) {
-        extra = `\n${' '.repeat(ctx.indentationLvl)}`;
-      }
+    if (diff === 3 && ctx.breakLength < getStringWidth(str, ctx.colors)) {
+      extra = `\n${' '.repeat(ctx.indentationLvl)}`;
    }
    ctx.indentationLvl -= diff;
  } else if (desc.get !== undefined) {
@ -1889,9 +1895,116 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
  return str;
 }

+if (internalBinding('config').hasIntl) {
+  const icu = internalBinding('icu');
+  // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
+  // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
+  // TODO(BridgeAR): Expose the options to the user. That is probably the
+  // best thing possible at the moment, since it's difficult to know what
+  // the receiving end supports.
+  getStringWidth = function getStringWidth(str, removeControlChars = true) {
+    let width = 0;
+    if (removeControlChars)
+      str = stripVTControlCharacters(str);
+    for (let i = 0; i < str.length; i++) {
+      // Try to avoid calling into C++ by first handling the ASCII portion of
+      // the string. If it is fully ASCII, we skip the C++ part.
+      const code = str.charCodeAt(i);
+      if (code >= 127) {
+        width += icu.getStringWidth(str.slice(i));
+        break;
+      }
+      width += code >= 32 ? 1 : 0;
+    }
+    return width;
+  };
+} else {
+  /**
+   * Returns the number of columns required to display the given string.
+   */
+  getStringWidth = function getStringWidth(str, removeControlChars = true) {
+    let width = 0;
+
+    if (removeControlChars)
+      str = stripVTControlCharacters(str);
+
+    for (const char of str) {
+      const code = char.codePointAt(0);
+      if (isFullWidthCodePoint(code)) {
+        width += 2;
+      } else if (!isZeroWidthCodePoint(code)) {
+        width++;
+      }
+    }
+
+    return width;
+  };
+
+  /**
+   * Returns true if the character represented by a given
+   * Unicode code point is full-width. Otherwise returns false.
+   */
+  const isFullWidthCodePoint = (code) => {
+    // Code points are partially derived from:
+    // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
+    return code >= 0x1100 && (
+      code <= 0x115f ||  // Hangul Jamo
+      code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
+      code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
+      // CJK Radicals Supplement .. Enclosed CJK Letters and Months
+      (code >= 0x2e80 && code <= 0x3247 && code !== 0x303f) ||
+      // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
+      (code >= 0x3250 && code <= 0x4dbf) ||
+      // CJK Unified Ideographs .. Yi Radicals
+      (code >= 0x4e00 && code <= 0xa4c6) ||
+      // Hangul Jamo Extended-A
+      (code >= 0xa960 && code <= 0xa97c) ||
+      // Hangul Syllables
+      (code >= 0xac00 && code <= 0xd7a3) ||
+      // CJK Compatibility Ideographs
+      (code >= 0xf900 && code <= 0xfaff) ||
+      // Vertical Forms
+      (code >= 0xfe10 && code <= 0xfe19) ||
+      // CJK Compatibility Forms .. Small Form Variants
+      (code >= 0xfe30 && code <= 0xfe6b) ||
+      // Halfwidth and Fullwidth Forms
+      (code >= 0xff01 && code <= 0xff60) ||
+      (code >= 0xffe0 && code <= 0xffe6) ||
+      // Kana Supplement
+      (code >= 0x1b000 && code <= 0x1b001) ||
+      // Enclosed Ideographic Supplement
+      (code >= 0x1f200 && code <= 0x1f251) ||
+      // Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
+      // Emoticons 0x1f600 - 0x1f64f
+      (code >= 0x1f300 && code <= 0x1f64f) ||
+      // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
+      (code >= 0x20000 && code <= 0x3fffd)
+    );
+  };
+
+  const isZeroWidthCodePoint = (code) => {
+    return code <= 0x1F || // C0 control codes
+      (code > 0x7F && code <= 0x9F) || // C1 control codes
+      (code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
+      (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
+      (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
+      (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
+      (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
+  };
+}
+
+/**
+ * Remove all VT control characters. Use to estimate displayed string width.
+ */
+function stripVTControlCharacters(str) {
+  return str.replace(ansi, '');
+}
+
 module.exports = {
  inspect,
  format,
  formatWithOptions,
-  inspectDefaultOptions
+  getStringWidth,
+  inspectDefaultOptions,
+  stripVTControlCharacters
 };
--- a/lib/readline.js
+++ b/lib/readline.js
@ -46,7 +46,11 @@ const {
  ERR_INVALID_OPT_VALUE
 } = require('internal/errors').codes;
 const { validateString } = require('internal/validators');
-const { inspect } = require('internal/util/inspect');
+const {
+  inspect,
+  getStringWidth,
+  stripVTControlCharacters,
+} = require('internal/util/inspect');
 const EventEmitter = require('events');
 const {
  charLengthAt,
@ -54,9 +58,7 @@ const {
  commonPrefix,
  CSI,
  emitKeys,
-  getStringWidth,
  kSubstringSearch,
-  stripVTControlCharacters
 } = require('internal/readline/utils');

 const { clearTimeout, setTimeout } = require('timers');
--- a/test/parallel/test-icu-stringwidth.js
+++ b/test/parallel/test-icu-stringwidth.js
@ -6,46 +6,46 @@ if (!common.hasIntl)
  common.skip('missing Intl');

 const assert = require('assert');
-const readline = require('internal/readline/utils');
+const { getStringWidth } = require('internal/util/inspect');

 // Test column width

 // Ll (Lowercase Letter): LATIN SMALL LETTER A
-assert.strictEqual(readline.getStringWidth('a'), 1);
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0061)), 1);
+assert.strictEqual(getStringWidth('a'), 1);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x0061)), 1);
 // Lo (Other Letter)
-assert.strictEqual(readline.getStringWidth('丁'), 2);
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x4E01)), 2);
+assert.strictEqual(getStringWidth('丁'), 2);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x4E01)), 2);
 // Surrogate pairs
-assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
-assert.strictEqual(readline.getStringWidth('👅'), 2);
+assert.strictEqual(getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
+assert.strictEqual(getStringWidth('👅'), 2);
 // Cs (Surrogate): High Surrogate
-assert.strictEqual(readline.getStringWidth('\ud83d'), 1);
+assert.strictEqual(getStringWidth('\ud83d'), 1);
 // Cs (Surrogate): Low Surrogate
-assert.strictEqual(readline.getStringWidth('\udc78'), 1);
+assert.strictEqual(getStringWidth('\udc78'), 1);
 // Cc (Control): NULL
-assert.strictEqual(readline.getStringWidth('\u0000'), 0);
+assert.strictEqual(getStringWidth('\u0000'), 0);
 // Cc (Control): BELL
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0007)), 0);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x0007)), 0);
 // Cc (Control): LINE FEED
-assert.strictEqual(readline.getStringWidth('\n'), 0);
+assert.strictEqual(getStringWidth('\n'), 0);
 // Cf (Format): SOFT HYPHEN
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x00AD)), 1);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x00AD)), 1);
 // Cf (Format): LEFT-TO-RIGHT MARK
 // Cf (Format): RIGHT-TO-LEFT MARK
-assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1);
+assert.strictEqual(getStringWidth('\u200Ef\u200F'), 1);
 // Cn (Unassigned): Not a character
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x10FFEF)), 1);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x10FFEF)), 1);
 // Cn (Unassigned): Not a character (but in a CJK range)
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x3FFEF)), 1);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x3FFEF)), 1);
 // Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0301)), 0);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x0301)), 0);
 // Mc (Spacing Mark): BALINESE ADEG ADEG
 // Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
 // character.
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x1B44)), 1);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x1B44)), 1);
 // Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
-assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x20DD)), 0);
+assert.strictEqual(getStringWidth(String.fromCharCode(0x20DD)), 0);

 // The following is an emoji sequence with ZWJ (zero-width-joiner). In some
 // implementations, it is represented as a single glyph, in other
@ -53,37 +53,37 @@ assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x20DD)), 0);
 // component will be counted individually, since not a lot of systems support
 // these fully.
 // See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
-assert.strictEqual(readline.getStringWidth('👩‍👩‍👧‍👧'), 8);
+assert.strictEqual(getStringWidth('👩‍👩‍👧‍👧'), 8);
 // TODO(BridgeAR): This should have a width of two and six. The heart contains
 // the \uFE0F variation selector that indicates that it should be displayed as
 // emoji instead of as text. Emojis are all full width characters when not being
 // rendered as text.
 // https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)
-assert.strictEqual(readline.getStringWidth('❤️'), 1);
-assert.strictEqual(readline.getStringWidth('👩‍❤️‍👩'), 5);
+assert.strictEqual(getStringWidth('❤️'), 1);
+assert.strictEqual(getStringWidth('👩‍❤️‍👩'), 5);
 // The length of one is correct. It is an emoji treated as text.
-assert.strictEqual(readline.getStringWidth('❤'), 1);
+assert.strictEqual(getStringWidth('❤'), 1);

 // By default, unicode characters whose width is considered ambiguous will
 // be considered half-width. For these characters, getStringWidth will return
 // 1. In some contexts, however, it is more appropriate to consider them full
 // width. By default, the algorithm will assume half width.
-assert.strictEqual(readline.getStringWidth('\u01d4'), 1);
+assert.strictEqual(getStringWidth('\u01d4'), 1);

 // Control chars and combining chars are zero
-assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
+assert.strictEqual(getStringWidth('\u200E\n\u220A\u20D2'), 1);

 // Test that the fast path for ASCII characters yields results consistent
 // with the 'slow' path.
 for (let i = 0; i < 256; i++) {
  const char = String.fromCharCode(i);
  assert.strictEqual(
-    readline.getStringWidth(char + '🎉'),
-    readline.getStringWidth(char) + 2);
+    getStringWidth(char + '🎉'),
+    getStringWidth(char) + 2);

  if (i < 32 || (i >= 127 && i < 160)) {  // Control character
-    assert.strictEqual(readline.getStringWidth(char), 0);
+    assert.strictEqual(getStringWidth(char), 0);
  } else {  // Regular ASCII character
-    assert.strictEqual(readline.getStringWidth(char), 1);
+    assert.strictEqual(getStringWidth(char), 1);
  }
 }
--- a/test/parallel/test-readline-interface.js
+++ b/test/parallel/test-readline-interface.js
@ -25,7 +25,10 @@ const common = require('../common');

 const assert = require('assert');
 const readline = require('readline');
-const internalReadline = require('internal/readline/utils');
+const {
+  getStringWidth,
+  stripVTControlCharacters
+} = require('internal/util/inspect');
 const EventEmitter = require('events').EventEmitter;
 const { Writable, Readable } = require('stream');

@ -1140,48 +1143,44 @@ function isWarned(emitter) {
  }

  // Wide characters should be treated as two columns.
-  assert.strictEqual(internalReadline.getStringWidth('a'), 1);
-  assert.strictEqual(internalReadline.getStringWidth('あ'), 2);
-  assert.strictEqual(internalReadline.getStringWidth('谢'), 2);
-  assert.strictEqual(internalReadline.getStringWidth('고'), 2);
-  assert.strictEqual(
-    internalReadline.getStringWidth(String.fromCodePoint(0x1f251)), 2);
-  assert.strictEqual(internalReadline.getStringWidth('abcde'), 5);
-  assert.strictEqual(internalReadline.getStringWidth('古池や'), 6);
-  assert.strictEqual(internalReadline.getStringWidth('ノード.js'), 9);
-  assert.strictEqual(internalReadline.getStringWidth('你好'), 4);
-  assert.strictEqual(internalReadline.getStringWidth('안녕하세요'), 10);
-  assert.strictEqual(internalReadline.getStringWidth('A\ud83c\ude00BC'), 5);
-  assert.strictEqual(internalReadline.getStringWidth('👨‍👩‍👦‍👦'), 8);
-  assert.strictEqual(internalReadline.getStringWidth('🐕𐐷あ💻😀'), 9);
+  assert.strictEqual(getStringWidth('a'), 1);
+  assert.strictEqual(getStringWidth('あ'), 2);
+  assert.strictEqual(getStringWidth('谢'), 2);
+  assert.strictEqual(getStringWidth('고'), 2);
+  assert.strictEqual(getStringWidth(String.fromCodePoint(0x1f251)), 2);
+  assert.strictEqual(getStringWidth('abcde'), 5);
+  assert.strictEqual(getStringWidth('古池や'), 6);
+  assert.strictEqual(getStringWidth('ノード.js'), 9);
+  assert.strictEqual(getStringWidth('你好'), 4);
+  assert.strictEqual(getStringWidth('안녕하세요'), 10);
+  assert.strictEqual(getStringWidth('A\ud83c\ude00BC'), 5);
+  assert.strictEqual(getStringWidth('👨‍👩‍👦‍👦'), 8);
+  assert.strictEqual(getStringWidth('🐕𐐷あ💻😀'), 9);
  // TODO(BridgeAR): This should have a width of 4.
-  assert.strictEqual(internalReadline.getStringWidth('⓬⓪'), 2);
-  assert.strictEqual(internalReadline.getStringWidth('\u0301\u200D\u200E'), 0);
+  assert.strictEqual(getStringWidth('⓬⓪'), 2);
+  assert.strictEqual(getStringWidth('\u0301\u200D\u200E'), 0);

  // Check if vt control chars are stripped
  assert.strictEqual(
-    internalReadline.stripVTControlCharacters('\u001b[31m> \u001b[39m'),
+    stripVTControlCharacters('\u001b[31m> \u001b[39m'),
    '> '
  );
  assert.strictEqual(
-    internalReadline.stripVTControlCharacters('\u001b[31m> \u001b[39m> '),
+    stripVTControlCharacters('\u001b[31m> \u001b[39m> '),
    '> > '
  );
  assert.strictEqual(
-    internalReadline.stripVTControlCharacters('\u001b[31m\u001b[39m'),
+    stripVTControlCharacters('\u001b[31m\u001b[39m'),
    ''
  );
  assert.strictEqual(
-    internalReadline.stripVTControlCharacters('> '),
+    stripVTControlCharacters('> '),
    '> '
  );
-  assert.strictEqual(internalReadline
-    .getStringWidth('\u001b[31m> \u001b[39m'), 2);
-  assert.strictEqual(internalReadline
-    .getStringWidth('\u001b[31m> \u001b[39m> '), 4);
-  assert.strictEqual(internalReadline
-    .getStringWidth('\u001b[31m\u001b[39m'), 0);
-  assert.strictEqual(internalReadline.getStringWidth('> '), 2);
+  assert.strictEqual(getStringWidth('\u001b[31m> \u001b[39m'), 2);
+  assert.strictEqual(getStringWidth('\u001b[31m> \u001b[39m> '), 4);
+  assert.strictEqual(getStringWidth('\u001b[31m\u001b[39m'), 0);
+  assert.strictEqual(getStringWidth('> '), 2);

  {
    const fi = new FakeInput();
--- a/test/parallel/test-readline-tab-complete.js
+++ b/test/parallel/test-readline-tab-complete.js
@ -6,7 +6,7 @@ const common = require('../common');
 const readline = require('readline');
 const assert = require('assert');
 const EventEmitter = require('events').EventEmitter;
-const { getStringWidth } = require('internal/readline/utils');
+const { getStringWidth } = require('internal/util/inspect');

 // This test verifies that the tab completion supports unicode and the writes
 // are limited to the minimum.
--- a/test/parallel/test-repl-history-navigation.js
+++ b/test/parallel/test-repl-history-navigation.js
@ -157,7 +157,7 @@ const tests = [
    env: { NODE_REPL_HISTORY: defaultHistoryPath },
    skip: !process.features.inspector,
    test: [
-      // あ is a fill width character with a length of one.
+      // あ is a full width character with a length of one.
      // 🐕 is a full width character with a length of two.
      // 𐐷 is a half width character with the length of two.
      // '\u0301', '0x200D', '\u200E' are zero width characters.
--- a/test/parallel/test-repl-top-level-await.js
+++ b/test/parallel/test-repl-top-level-await.js
@ -3,7 +3,7 @@
 const common = require('../common');
 const ArrayStream = require('../common/arraystream');
 const assert = require('assert');
-const { stripVTControlCharacters } = require('internal/readline/utils');
+const { stripVTControlCharacters } = require('internal/util/inspect');
 const repl = require('repl');

 common.skipIfInspectorDisabled();
--- a/test/parallel/test-util-inspect.js
+++ b/test/parallel/test-util-inspect.js
@ -2417,6 +2417,26 @@ assert.strictEqual(

  assert.strictEqual(out, expected);

+  // Unicode support. あ has a length of one and a width of two.
+  obj = [
+    '123', '123', '123', '123', 'あああ',
+    '123', '123', '123', '123', 'あああ'
+  ];
+
+  out = util.inspect(obj, { compact: 3 });
+
+  expected = [
+    '[',
+    "  '123',    '123',",
+    "  '123',    '123',",
+    "  'あああ', '123',",
+    "  '123',    '123',",
+    "  '123',    'あああ'",
+    ']',
+  ].join('\n');
+
+  assert.strictEqual(out, expected);
+
  // Verify that array grouping and line consolidation does not happen together.
  obj = {
    a: {