perf: String#getBytes(Charset) vs getBytes(String)
This commit is contained in:
parent
7139d1eff7
commit
e84893f676
7 changed files with 117 additions and 28 deletions
|
@ -37,6 +37,8 @@ import java.io.OutputStream;
|
|||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
@ -76,8 +78,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
|
|||
static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
|
||||
static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
|
||||
|
||||
// Defined by java.nio.charset.Charset
|
||||
protected static final String UTF_8 = "UTF-8";
|
||||
protected static final Charset UTF_8 = Charset.forName("UTF-8");
|
||||
|
||||
/**
|
||||
* Empty {@code ByteString}.
|
||||
|
@ -269,11 +270,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
|
|||
* @return new {@code ByteString}
|
||||
*/
|
||||
public static ByteString copyFromUtf8(String text) {
|
||||
try {
|
||||
return new LiteralByteString(text.getBytes(UTF_8));
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException("UTF-8 not supported?", e);
|
||||
}
|
||||
return new LiteralByteString(text.getBytes(UTF_8));
|
||||
}
|
||||
|
||||
// =================================================================
|
||||
|
@ -612,8 +609,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
|
|||
* @return new string
|
||||
* @throws UnsupportedEncodingException if charset isn't recognized
|
||||
*/
|
||||
public abstract String toString(String charsetName)
|
||||
throws UnsupportedEncodingException;
|
||||
public String toString(String charsetName)
|
||||
throws UnsupportedEncodingException {
|
||||
try {
|
||||
return toString(Charset.forName(charsetName));
|
||||
} catch (UnsupportedCharsetException e) {
|
||||
UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
|
||||
exception.initCause(e);
|
||||
throw exception;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new {@code String} by decoding the bytes using the
|
||||
* specified charset. Returns the same empty String if empty.
|
||||
*
|
||||
* @param charset encode using this charset
|
||||
* @return new string
|
||||
*/
|
||||
public String toString(Charset charset) {
|
||||
return size() == 0 ? "" : toStringInternal(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new {@code String} by decoding the bytes using the
|
||||
* specified charset.
|
||||
*
|
||||
* @param charset encode using this charset
|
||||
* @return new string
|
||||
*/
|
||||
protected abstract String toStringInternal(Charset charset);
|
||||
|
||||
// =================================================================
|
||||
// UTF-8 decoding
|
||||
|
@ -624,11 +649,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
|
|||
* @return new string using UTF-8 encoding
|
||||
*/
|
||||
public String toStringUtf8() {
|
||||
try {
|
||||
return toString(UTF_8);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException("UTF-8 not supported?", e);
|
||||
}
|
||||
return toString(UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -36,6 +36,7 @@ import java.io.InputStream;
|
|||
import java.io.OutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
@ -152,13 +153,8 @@ class LiteralByteString extends ByteString {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String toString(String charsetName)
|
||||
throws UnsupportedEncodingException {
|
||||
// Optimize for empty strings, but ensure we don't silently ignore invalid
|
||||
// encodings.
|
||||
return size() == 0 && UTF_8.equals(charsetName)
|
||||
? ""
|
||||
: new String(bytes, getOffsetIntoBytes(), size(), charsetName);
|
||||
protected String toStringInternal(Charset charset) {
|
||||
return new String(bytes, getOffsetIntoBytes(), size(), charset);
|
||||
}
|
||||
|
||||
// =================================================================
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.io.OutputStream;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
@ -418,13 +419,8 @@ class RopeByteString extends ByteString {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String toString(String charsetName)
|
||||
throws UnsupportedEncodingException {
|
||||
// Optimize for empty strings, but ensure we don't silently ignore invalid
|
||||
// encodings.
|
||||
return size() == 0 && UTF_8.equals(charsetName)
|
||||
? ""
|
||||
: new String(toByteArray(), charsetName);
|
||||
protected String toStringInternal(Charset charset) {
|
||||
return new String(toByteArray(), charset);
|
||||
}
|
||||
|
||||
// =================================================================
|
||||
|
|
|
@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
|
|||
testString.substring(2, testString.length() - 6), roundTripString);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testCharsetToString() throws UnsupportedEncodingException {
|
||||
String testString = "I love unicode \u1234\u5678 characters";
|
||||
LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
|
||||
ByteString chopped = unicode.substring(2, unicode.size() - 6);
|
||||
assertEquals(classUnderTest + ".substring() must have the expected type",
|
||||
classUnderTest, getActualClassName(chopped));
|
||||
|
||||
String roundTripString = chopped.toString(ByteString.UTF_8);
|
||||
assertEquals(classUnderTest + " unicode bytes must match",
|
||||
testString.substring(2, testString.length() - 6), roundTripString);
|
||||
}
|
||||
|
||||
public void testJavaSerialization() throws Exception {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
ObjectOutputStream oos = new ObjectOutputStream(out);
|
||||
|
|
|
@ -298,6 +298,13 @@ public class LiteralByteStringTest extends TestCase {
|
|||
assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
|
||||
}
|
||||
|
||||
public void testCharsetToString() throws UnsupportedEncodingException {
|
||||
String testString = "I love unicode \u1234\u5678 characters";
|
||||
LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
|
||||
String roundTripString = unicode.toString(ByteString.UTF_8);
|
||||
assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
|
||||
}
|
||||
|
||||
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{
|
||||
assertSame(classUnderTest + " must be the same string references",
|
||||
ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8));
|
||||
|
|
|
@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest {
|
|||
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
|
||||
flatString.hashCode(), unicode.hashCode());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testCharsetToString() throws UnsupportedEncodingException {
|
||||
String sourceString = "I love unicode \u1234\u5678 characters";
|
||||
ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
|
||||
int copies = 250;
|
||||
|
||||
// By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
|
||||
StringBuilder builder = new StringBuilder(copies * sourceString.length());
|
||||
ByteString unicode = ByteString.EMPTY;
|
||||
for (int i = 0; i < copies; ++i) {
|
||||
builder.append(sourceString);
|
||||
unicode = RopeByteString.concatenate(unicode, sourceByteString);
|
||||
}
|
||||
String testString = builder.toString();
|
||||
|
||||
// Do the substring part
|
||||
testString = testString.substring(2, testString.length() - 6);
|
||||
unicode = unicode.substring(2, unicode.size() - 6);
|
||||
|
||||
assertEquals(classUnderTest + " from string must have the expected type",
|
||||
classUnderTest, getActualClassName(unicode));
|
||||
String roundTripString = unicode.toString(ByteString.UTF_8);
|
||||
assertEquals(classUnderTest + " unicode bytes must match",
|
||||
testString, roundTripString);
|
||||
ByteString flatString = ByteString.copyFromUtf8(testString);
|
||||
assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
|
||||
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
|
||||
flatString.hashCode(), unicode.hashCode());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,6 +118,32 @@ public class RopeByteStringTest extends LiteralByteStringTest {
|
|||
flatString.hashCode(), unicode.hashCode());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testCharsetToString() throws UnsupportedEncodingException {
|
||||
String sourceString = "I love unicode \u1234\u5678 characters";
|
||||
ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
|
||||
int copies = 250;
|
||||
|
||||
// By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
|
||||
StringBuilder builder = new StringBuilder(copies * sourceString.length());
|
||||
ByteString unicode = ByteString.EMPTY;
|
||||
for (int i = 0; i < copies; ++i) {
|
||||
builder.append(sourceString);
|
||||
unicode = RopeByteString.concatenate(unicode, sourceByteString);
|
||||
}
|
||||
String testString = builder.toString();
|
||||
|
||||
assertEquals(classUnderTest + " from string must have the expected type",
|
||||
classUnderTest, getActualClassName(unicode));
|
||||
String roundTripString = unicode.toString(ByteString.UTF_8);
|
||||
assertEquals(classUnderTest + " unicode bytes must match",
|
||||
testString, roundTripString);
|
||||
ByteString flatString = ByteString.copyFromUtf8(testString);
|
||||
assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
|
||||
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
|
||||
flatString.hashCode(), unicode.hashCode());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException {
|
||||
RopeByteString ropeByteString =
|
||||
|
|
Loading…
Add table
Reference in a new issue