Summary
String::translateEscapes
will be extended to accept occurrences of unicode
escapes.
Problem
Currently, String::translateEscapes
does not accept unicode escapes and reports an IllegalArgumentException
.
Solution
Accept unicode escapes in the form of \uXXXX, where X is a hexadecimal digit.
Specification
diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 2245792999a..84f98a5efc2 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -4155,9 +4155,9 @@ private static int outdent(List<String> lines) {
/**
* Returns a string whose value is this string, with escape sequences
- * translated as if in a string literal.
+ * and Unicode escapes translated as if in a string literal.
* <p>
- * Escape sequences are translated as follows;
+ * Escapes are translated as follows;
* <table class="striped">
* <caption style="display:none">Translation</caption>
* <thead>
@@ -4223,21 +4223,31 @@ private static int outdent(List<String> lines) {
* <td>continuation</td>
* <td>discard</td>
* </tr>
+ * <tr>
+ * <th scope="row">{@code \u005CuXXXX}</th>
+ * <td>Unicode escape</td>
+ * <td>single UTF-16 code unit equivalent {@code U+XXXX}<p>multiple 'u' are supported per JLS 3.3</td>
+ * </tr>
* </tbody>
* </table>
*
- * @implNote
- * This method does <em>not</em> translate Unicode escapes such as "{@code \u005cu2022}".
- * Unicode escapes are translated by the Java compiler when reading input characters and
- * are not part of the string literal specification.
- *
* @throws IllegalArgumentException when an escape sequence is malformed.
*
- * @return String with escape sequences translated.
+ * @return String with escape sequences and Unicode escapes translated.
*
* @jls 3.10.7 Escape Sequences
+ * @jls 3.3 Unicode Escapes
*
* @since 15
+ *
+ * @implNote Unicode escapes are translated by the compiler before string
+ * literals are translated. As a convenience for use with constructed
+ * strings, this method translates Unicode escapes. For example, this
+ * method could be used when ASCII encoded text files need to maintain Unicode
+ * content. The translation is done in a single pass and is non-recursive. That is,
+ * escape sequences and Unicode escapes are translated as encountered in one pass and
+ * <strong>not</strong> done as an Unicode escapes pass followed by an escape sequences
+ * pass.
*/
public String translateEscapes() {
if (isEmpty()) {
@@ -4270,6 +4280,21 @@ public String translateEscapes() {
case 't':
ch = '\t';
break;
+ case 'u':
+ while (from < length && chars[from] == 'u') {
+ from++;
+ }
+ if (from <= length - 4) {
+ from += 4;
+ try {
+ ch = (char) Integer.parseInt(this, from - 4, from, 16);
+ } catch (NumberFormatException ex) {
+ throw new IllegalArgumentException("Invalid Unicode sequence: " + substring(from - 4, from));
+ }
+ } else {
+ throw new IllegalArgumentException("Invalid Unicode sequence: " + substring(from));
+ }
+ break;
case '\'':
case '\"':
case '\\':
- csr of
-
JDK-8263261 Extend String::translateEscapes to support unicode escapes
-
- Closed
-