You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
4.4 KiB
125 lines
4.4 KiB
/* |
|
* Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved. |
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
* |
|
* This code is free software; you can redistribute it and/or modify it |
|
* under the terms of the GNU General Public License version 2 only, as |
|
* published by the Free Software Foundation. |
|
* |
|
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
* version 2 for more details (a copy is included in the LICENSE file that |
|
* accompanied this code). |
|
* |
|
* You should have received a copy of the GNU General Public License version |
|
* 2 along with this work; if not, write to the Free Software Foundation, |
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
* |
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
* or visit www.oracle.com if you need additional information or have any |
|
* questions. |
|
*/ |
|
|
|
/* |
|
* @test |
|
* @bug 4396708 |
|
* @summary Test URL encoder and decoder on a string that contains |
|
* surrogate pairs. |
|
* |
|
*/ |
|
|
|
import java.io.*; |
|
import java.net.*; |
|
|
|
/* |
|
* Surrogate pairs are two character Unicode sequences where the first |
|
* character lies in the range [d800, dbff] and the second character lies |
|
* in the range [dc00, dfff]. They are used as an escaping mechanism to add |
|
* 1M more characters to Unicode. |
|
*/ |
|
public class SurrogatePairs { |
|
|
|
static String[] testStrings = {"\uD800\uDC00", |
|
"\uD800\uDFFF", |
|
"\uDBFF\uDC00", |
|
"\uDBFF\uDFFF", |
|
"1\uDBFF\uDC00", |
|
"@\uDBFF\uDC00", |
|
"\uDBFF\uDC001", |
|
"\uDBFF\uDC00@", |
|
"\u0101\uDBFF\uDC00", |
|
"\uDBFF\uDC00\u0101" |
|
}; |
|
|
|
static String[] correctEncodings = {"%F0%90%80%80", |
|
"%F0%90%8F%BF", |
|
"%F4%8F%B0%80", |
|
"%F4%8F%BF%BF", |
|
"1%F4%8F%B0%80", |
|
"%40%F4%8F%B0%80", |
|
"%F4%8F%B0%801", |
|
"%F4%8F%B0%80%40", |
|
"%C4%81%F4%8F%B0%80", |
|
"%F4%8F%B0%80%C4%81" |
|
}; |
|
|
|
public static void main(String[] args) throws Exception { |
|
|
|
for (int i=0; i < testStrings.length; i++) { |
|
test(testStrings[i], correctEncodings[i]); |
|
} |
|
} |
|
|
|
private static void test(String str, String correctEncoding) |
|
throws Exception { |
|
|
|
System.out.println("Unicode bytes of test string are: " |
|
+ getHexBytes(str)); |
|
|
|
String encoded = URLEncoder.encode(str, "UTF-8"); |
|
|
|
System.out.println("URLEncoding is: " + encoded); |
|
|
|
if (encoded.equals(correctEncoding)) |
|
System.out.println("The encoding is correct!"); |
|
else { |
|
throw new Exception("The encoding is incorrect!" + |
|
" It should be " + correctEncoding); |
|
} |
|
|
|
String decoded = URLDecoder.decode(encoded, "UTF-8"); |
|
|
|
System.out.println("Unicode bytes for URLDecoding are: " |
|
+ getHexBytes(decoded)); |
|
|
|
if (str.equals(decoded)) |
|
System.out.println("The decoding is correct"); |
|
else { |
|
throw new Exception("The decoded is not equal to the original"); |
|
} |
|
System.out.println("---"); |
|
} |
|
|
|
private static String getHexBytes(String s) throws Exception { |
|
StringBuffer sb = new StringBuffer(); |
|
for (int i = 0; i < s.length(); i++) { |
|
|
|
int a = s.charAt(i); |
|
int b1 = (a >>8) & 0xff; |
|
int b2 = (byte)a; |
|
int b11 = (b1>>4) & 0x0f; |
|
int b12 = b1 & 0x0f; |
|
int b21 = (b2 >>4) & 0x0f; |
|
int b22 = b2 & 0x0f; |
|
|
|
sb.append(Integer.toHexString(b11)); |
|
sb.append(Integer.toHexString(b12)); |
|
sb.append(Integer.toHexString(b21)); |
|
sb.append(Integer.toHexString(b22)); |
|
sb.append(' '); |
|
} |
|
return sb.toString(); |
|
} |
|
|
|
}
|
|
|