package org.jsoup.internal;
import org.jsoup.helper.Validate;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Stack;
A minimal String utility class. Designed for internal jsoup use only.
/**
* A minimal String utility class. Designed for internal jsoup use only.
*/
public final class StringUtil {
// memoised padding up to 21
static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " "};
Join a collection of strings by a separator
Params: - strings – collection of string objects
- sep – string to place between strings
Returns: joined string
/**
* Join a collection of strings by a separator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Collection strings, String sep) {
return join(strings.iterator(), sep);
}
Join a collection of strings by a separator
Params: - strings – iterator of string objects
- sep – string to place between strings
Returns: joined string
/**
* Join a collection of strings by a separator
* @param strings iterator of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Iterator strings, String sep) {
if (!strings.hasNext())
return "";
String start = strings.next().toString();
if (!strings.hasNext()) // only one, avoid builder
return start;
StringBuilder sb = StringUtil.borrowBuilder().append(start);
while (strings.hasNext()) {
sb.append(sep);
sb.append(strings.next());
}
return StringUtil.releaseBuilder(sb);
}
Join an array of strings by a separator
Params: - strings – collection of string objects
- sep – string to place between strings
Returns: joined string
/**
* Join an array of strings by a separator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(String[] strings, String sep) {
return join(Arrays.asList(strings), sep);
}
Returns space padding
Params: - width – amount of padding desired
Returns: string of spaces * width
/**
* Returns space padding
* @param width amount of padding desired
* @return string of spaces * width
*/
public static String padding(int width) {
if (width < 0)
throw new IllegalArgumentException("width must be > 0");
if (width < padding.length)
return padding[width];
char[] out = new char[width];
for (int i = 0; i < width; i++)
out[i] = ' ';
return String.valueOf(out);
}
Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
Params: - string – string to test
Returns: if string is blank
/**
* Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
* @param string string to test
* @return if string is blank
*/
public static boolean isBlank(String string) {
if (string == null || string.length() == 0)
return true;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!StringUtil.isWhitespace(string.codePointAt(i)))
return false;
}
return true;
}
Tests if a string is numeric, i.e. contains only digit characters
Params: - string – string to test
Returns: true if only digit chars, false if empty or null or contains non-digit chars
/**
* Tests if a string is numeric, i.e. contains only digit characters
* @param string string to test
* @return true if only digit chars, false if empty or null or contains non-digit chars
*/
public static boolean isNumeric(String string) {
if (string == null || string.length() == 0)
return false;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!Character.isDigit(string.codePointAt(i)))
return false;
}
return true;
}
Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
Params: - c – code point to test
See Also: Returns: true if code point is whitespace, false otherwise
/**
* Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
* @see #isActuallyWhitespace(int)
*/
public static boolean isWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}
Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
Params: - c – code point to test
Returns: true if code point is whitespace, false otherwise
/**
* Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
*/
public static boolean isActuallyWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160;
// 160 is (non-breaking space). Not in the spec but expected.
}
public static boolean isInvisibleChar(int c) {
return Character.getType(c) == 16 && (c == 8203 || c == 8204 || c == 8205 || c == 173);
// zero width sp, zw non join, zw join, soft hyphen
}
Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters
(e.g. newline, tab) convert to a simple space
Params: - string – content to normalise
Returns: normalised string
/**
* Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters
* (e.g. newline, tab) convert to a simple space
* @param string content to normalise
* @return normalised string
*/
public static String normaliseWhitespace(String string) {
StringBuilder sb = StringUtil.borrowBuilder();
appendNormalisedWhitespace(sb, string, false);
return StringUtil.releaseBuilder(sb);
}
After normalizing the whitespace within a string, appends it to a string builder.
Params: - accum – builder to append to
- string – string to normalize whitespace within
- stripLeading – set to true if you wish to remove any leading whitespace
/**
* After normalizing the whitespace within a string, appends it to a string builder.
* @param accum builder to append to
* @param string string to normalize whitespace within
* @param stripLeading set to true if you wish to remove any leading whitespace
*/
public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) {
boolean lastWasWhite = false;
boolean reachedNonWhite = false;
int len = string.length();
int c;
for (int i = 0; i < len; i+= Character.charCount(c)) {
c = string.codePointAt(i);
if (isActuallyWhitespace(c)) {
if ((stripLeading && !reachedNonWhite) || lastWasWhite)
continue;
accum.append(' ');
lastWasWhite = true;
}
else if (!isInvisibleChar(c)) {
accum.appendCodePoint(c);
lastWasWhite = false;
reachedNonWhite = true;
}
}
}
public static boolean in(final String needle, final String... haystack) {
final int len = haystack.length;
for (int i = 0; i < len; i++) {
if (haystack[i].equals(needle))
return true;
}
return false;
}
public static boolean inSorted(String needle, String[] haystack) {
return Arrays.binarySearch(haystack, needle) >= 0;
}
Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
Params: - base – the existing absolute base URL
- relUrl – the relative URL to resolve. (If it's already absolute, it will be returned)
Throws: - MalformedURLException – if an error occurred generating the URL
Returns: the resolved absolute URL
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param base the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return the resolved absolute URL
* @throws MalformedURLException if an error occurred generating the URL
*/
public static URL resolve(URL base, String relUrl) throws MalformedURLException {
// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?"))
relUrl = base.getPath() + relUrl;
// workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo
if (relUrl.indexOf('.') == 0 && base.getFile().indexOf('/') != 0) {
base = new URL(base.getProtocol(), base.getHost(), base.getPort(), "/" + base.getFile());
}
return new URL(base, relUrl);
}
Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
Params: - baseUrl – the existing absolute base URL
- relUrl – the relative URL to resolve. (If it's already absolute, it will be returned)
Returns: an absolute URL if one was able to be generated, or the empty string if not
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param baseUrl the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return an absolute URL if one was able to be generated, or the empty string if not
*/
public static String resolve(final String baseUrl, final String relUrl) {
URL base;
try {
try {
base = new URL(baseUrl);
} catch (MalformedURLException e) {
// the base is unsuitable, but the attribute/rel may be abs on its own, so try that
URL abs = new URL(relUrl);
return abs.toExternalForm();
}
return resolve(base, relUrl).toExternalForm();
} catch (MalformedURLException e) {
return "";
}
}
private static final Stack<StringBuilder> builders = new Stack<>();
Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is
prevented from growing too large.
Care must be taken to release the builder once its work has been completed, with {@see #releaseBuilder}
Returns: an empty StringBuilder @
/**
* Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is
* prevented from growing too large.
* <p>
* Care must be taken to release the builder once its work has been completed, with {@see #releaseBuilder}
* @return an empty StringBuilder
* @
*/
public static StringBuilder borrowBuilder() {
synchronized (builders) {
return builders.empty() ?
new StringBuilder(MaxCachedBuilderSize) :
builders.pop();
}
}
Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its
contents may be changed by this method, or by a concurrent thread.
Params: - sb – the StringBuilder to release.
Returns: the string value of the released String Builder (as an incentive to release it!).
/**
* Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its
* contents may be changed by this method, or by a concurrent thread.
* @param sb the StringBuilder to release.
* @return the string value of the released String Builder (as an incentive to release it!).
*/
public static String releaseBuilder(StringBuilder sb) {
Validate.notNull(sb);
String string = sb.toString();
if (sb.length() > MaxCachedBuilderSize)
sb = new StringBuilder(MaxCachedBuilderSize); // make sure it hasn't grown too big
else
sb.delete(0, sb.length()); // make sure it's emptied on release
synchronized (builders) {
builders.push(sb);
while (builders.size() > MaxIdleBuilders) {
builders.pop();
}
}
return string;
}
private static final int MaxCachedBuilderSize = 8 * 1024;
private static final int MaxIdleBuilders = 8;
}