package org.jsoup.internal;

import org.jsoup.helper.Validate;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Stack;

A minimal String utility class. Designed for internal jsoup use only.
/** * A minimal String utility class. Designed for internal jsoup use only. */
public final class StringUtil { // memoised padding up to 21 static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
Join a collection of strings by a separator
Params:
  • strings – collection of string objects
  • sep – string to place between strings
Returns:joined string
/** * Join a collection of strings by a separator * @param strings collection of string objects * @param sep string to place between strings * @return joined string */
public static String join(Collection strings, String sep) { return join(strings.iterator(), sep); }
Join a collection of strings by a separator
Params:
  • strings – iterator of string objects
  • sep – string to place between strings
Returns:joined string
/** * Join a collection of strings by a separator * @param strings iterator of string objects * @param sep string to place between strings * @return joined string */
public static String join(Iterator strings, String sep) { if (!strings.hasNext()) return ""; String start = strings.next().toString(); if (!strings.hasNext()) // only one, avoid builder return start; StringBuilder sb = StringUtil.borrowBuilder().append(start); while (strings.hasNext()) { sb.append(sep); sb.append(strings.next()); } return StringUtil.releaseBuilder(sb); }
Join an array of strings by a separator
Params:
  • strings – collection of string objects
  • sep – string to place between strings
Returns:joined string
/** * Join an array of strings by a separator * @param strings collection of string objects * @param sep string to place between strings * @return joined string */
public static String join(String[] strings, String sep) { return join(Arrays.asList(strings), sep); }
Returns space padding
Params:
  • width – amount of padding desired
Returns:string of spaces * width
/** * Returns space padding * @param width amount of padding desired * @return string of spaces * width */
public static String padding(int width) { if (width < 0) throw new IllegalArgumentException("width must be > 0"); if (width < padding.length) return padding[width]; char[] out = new char[width]; for (int i = 0; i < width; i++) out[i] = ' '; return String.valueOf(out); }
Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
Params:
  • string – string to test
Returns:if string is blank
/** * Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc) * @param string string to test * @return if string is blank */
public static boolean isBlank(String string) { if (string == null || string.length() == 0) return true; int l = string.length(); for (int i = 0; i < l; i++) { if (!StringUtil.isWhitespace(string.codePointAt(i))) return false; } return true; }
Tests if a string is numeric, i.e. contains only digit characters
Params:
  • string – string to test
Returns:true if only digit chars, false if empty or null or contains non-digit chars
/** * Tests if a string is numeric, i.e. contains only digit characters * @param string string to test * @return true if only digit chars, false if empty or null or contains non-digit chars */
public static boolean isNumeric(String string) { if (string == null || string.length() == 0) return false; int l = string.length(); for (int i = 0; i < l; i++) { if (!Character.isDigit(string.codePointAt(i))) return false; } return true; }
Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
Params:
  • c – code point to test
See Also:
Returns:true if code point is whitespace, false otherwise
/** * Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML. * @param c code point to test * @return true if code point is whitespace, false otherwise * @see #isActuallyWhitespace(int) */
public static boolean isWhitespace(int c){ return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r'; }
Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
Params:
  • c – code point to test
Returns:true if code point is whitespace, false otherwise
/** * Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc. * @param c code point to test * @return true if code point is whitespace, false otherwise */
public static boolean isActuallyWhitespace(int c){ return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160; // 160 is &nbsp; (non-breaking space). Not in the spec but expected. } public static boolean isInvisibleChar(int c) { return Character.getType(c) == 16 && (c == 8203 || c == 8204 || c == 8205 || c == 173); // zero width sp, zw non join, zw join, soft hyphen }
Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters (e.g. newline, tab) convert to a simple space
Params:
  • string – content to normalise
Returns:normalised string
/** * Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters * (e.g. newline, tab) convert to a simple space * @param string content to normalise * @return normalised string */
public static String normaliseWhitespace(String string) { StringBuilder sb = StringUtil.borrowBuilder(); appendNormalisedWhitespace(sb, string, false); return StringUtil.releaseBuilder(sb); }
After normalizing the whitespace within a string, appends it to a string builder.
Params:
  • accum – builder to append to
  • string – string to normalize whitespace within
  • stripLeading – set to true if you wish to remove any leading whitespace
/** * After normalizing the whitespace within a string, appends it to a string builder. * @param accum builder to append to * @param string string to normalize whitespace within * @param stripLeading set to true if you wish to remove any leading whitespace */
public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) { boolean lastWasWhite = false; boolean reachedNonWhite = false; int len = string.length(); int c; for (int i = 0; i < len; i+= Character.charCount(c)) { c = string.codePointAt(i); if (isActuallyWhitespace(c)) { if ((stripLeading && !reachedNonWhite) || lastWasWhite) continue; accum.append(' '); lastWasWhite = true; } else if (!isInvisibleChar(c)) { accum.appendCodePoint(c); lastWasWhite = false; reachedNonWhite = true; } } } public static boolean in(final String needle, final String... haystack) { final int len = haystack.length; for (int i = 0; i < len; i++) { if (haystack[i].equals(needle)) return true; } return false; } public static boolean inSorted(String needle, String[] haystack) { return Arrays.binarySearch(haystack, needle) >= 0; }
Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
Params:
  • base – the existing absolute base URL
  • relUrl – the relative URL to resolve. (If it's already absolute, it will be returned)
Throws:
Returns:the resolved absolute URL
/** * Create a new absolute URL, from a provided existing absolute URL and a relative URL component. * @param base the existing absolute base URL * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) * @return the resolved absolute URL * @throws MalformedURLException if an error occurred generating the URL */
public static URL resolve(URL base, String relUrl) throws MalformedURLException { // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired if (relUrl.startsWith("?")) relUrl = base.getPath() + relUrl; // workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo if (relUrl.indexOf('.') == 0 && base.getFile().indexOf('/') != 0) { base = new URL(base.getProtocol(), base.getHost(), base.getPort(), "/" + base.getFile()); } return new URL(base, relUrl); }
Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
Params:
  • baseUrl – the existing absolute base URL
  • relUrl – the relative URL to resolve. (If it's already absolute, it will be returned)
Returns:an absolute URL if one was able to be generated, or the empty string if not
/** * Create a new absolute URL, from a provided existing absolute URL and a relative URL component. * @param baseUrl the existing absolute base URL * @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned) * @return an absolute URL if one was able to be generated, or the empty string if not */
public static String resolve(final String baseUrl, final String relUrl) { URL base; try { try { base = new URL(baseUrl); } catch (MalformedURLException e) { // the base is unsuitable, but the attribute/rel may be abs on its own, so try that URL abs = new URL(relUrl); return abs.toExternalForm(); } return resolve(base, relUrl).toExternalForm(); } catch (MalformedURLException e) { return ""; } } private static final Stack<StringBuilder> builders = new Stack<>();
Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is prevented from growing too large.

Care must be taken to release the builder once its work has been completed, with {@see #releaseBuilder}

Returns:an empty StringBuilder
@
/** * Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is * prevented from growing too large. * <p> * Care must be taken to release the builder once its work has been completed, with {@see #releaseBuilder} * @return an empty StringBuilder * @ */
public static StringBuilder borrowBuilder() { synchronized (builders) { return builders.empty() ? new StringBuilder(MaxCachedBuilderSize) : builders.pop(); } }
Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its contents may be changed by this method, or by a concurrent thread.
Params:
  • sb – the StringBuilder to release.
Returns:the string value of the released String Builder (as an incentive to release it!).
/** * Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its * contents may be changed by this method, or by a concurrent thread. * @param sb the StringBuilder to release. * @return the string value of the released String Builder (as an incentive to release it!). */
public static String releaseBuilder(StringBuilder sb) { Validate.notNull(sb); String string = sb.toString(); if (sb.length() > MaxCachedBuilderSize) sb = new StringBuilder(MaxCachedBuilderSize); // make sure it hasn't grown too big else sb.delete(0, sb.length()); // make sure it's emptied on release synchronized (builders) { builders.push(sb); while (builders.size() > MaxIdleBuilders) { builders.pop(); } } return string; } private static final int MaxCachedBuilderSize = 8 * 1024; private static final int MaxIdleBuilders = 8; }