package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.internal.Normalizer;
import java.util.HashMap;
import java.util.Map;
HTML Tag capabilities.
Author: Jonathan Hedley, jonathan@hedley.net
/**
* HTML Tag capabilities.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Tag {
private static final Map<String, Tag> tags = new HashMap<>(); // map of known tags
private String tagName;
private String normalName; // always the lower case version of this tag, regardless of case preservation mode
private boolean isBlock = true; // block or inline
private boolean formatAsBlock = true; // should be formatted as a block
private boolean canContainInline = true; // only pcdata if not
private boolean empty = false; // can hold nothing; e.g. img
private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
private boolean preserveWhitespace = false; // for pre, textarea, script etc
private boolean formList = false; // a control that appears in forms: input, textarea, output etc
private boolean formSubmit = false; // a control that can be submitted in a form: input etc
private Tag(String tagName) {
this.tagName = tagName;
normalName = Normalizer.lowerCase(tagName);
}
Get this tag's name.
Returns: the tag's name
/**
* Get this tag's name.
*
* @return the tag's name
*/
public String getName() {
return tagName;
}
Get this tag's normalized (lowercased) name.
Returns: the tag's normal name.
/**
* Get this tag's normalized (lowercased) name.
* @return the tag's normal name.
*/
public String normalName() {
return normalName;
}
Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
Params: - tagName – Name of tag, e.g. "p". Case insensitive.
- settings – used to control tag name sensitivity
Returns: The tag, either defined or new generic.
/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* <p>
* Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
* </p>
*
* @param tagName Name of tag, e.g. "p". Case insensitive.
* @param settings used to control tag name sensitivity
* @return The tag, either defined or new generic.
*/
public static Tag valueOf(String tagName, ParseSettings settings) {
Validate.notNull(tagName);
Tag tag = tags.get(tagName);
if (tag == null) {
tagName = settings.normalizeTag(tagName);
Validate.notEmpty(tagName);
tag = tags.get(tagName);
if (tag == null) {
// not defined: create default; go anywhere, do anything! (incl be inside a <p>)
tag = new Tag(tagName);
tag.isBlock = false;
}
}
return tag;
}
Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
Params: - tagName – Name of tag, e.g. "p". Case sensitive.
Returns: The tag, either defined or new generic.
/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* <p>
* Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
* </p>
*
* @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
* @return The tag, either defined or new generic.
*/
public static Tag valueOf(String tagName) {
return valueOf(tagName, ParseSettings.preserveCase);
}
Gets if this is a block tag.
Returns: if block tag
/**
* Gets if this is a block tag.
*
* @return if block tag
*/
public boolean isBlock() {
return isBlock;
}
Gets if this tag should be formatted as a block (or as inline)
Returns: if should be formatted as block or inline
/**
* Gets if this tag should be formatted as a block (or as inline)
*
* @return if should be formatted as block or inline
*/
public boolean formatAsBlock() {
return formatAsBlock;
}
Gets if this tag can contain block tags.
Returns: if tag can contain block tags Deprecated: No longer used, and no different result than {isBlock()
}
/**
* Gets if this tag can contain block tags.
*
* @return if tag can contain block tags
* @deprecated No longer used, and no different result than {{@link #isBlock()}}
*/
public boolean canContainBlock() {
return isBlock;
}
Gets if this tag is an inline tag.
Returns: if this tag is an inline tag.
/**
* Gets if this tag is an inline tag.
*
* @return if this tag is an inline tag.
*/
public boolean isInline() {
return !isBlock;
}
Gets if this tag is a data only tag.
Returns: if this tag is a data only tag
/**
* Gets if this tag is a data only tag.
*
* @return if this tag is a data only tag
*/
public boolean isData() {
return !canContainInline && !isEmpty();
}
Get if this is an empty tag
Returns: if this is an empty tag
/**
* Get if this is an empty tag
*
* @return if this is an empty tag
*/
public boolean isEmpty() {
return empty;
}
Get if this tag is self closing.
Returns: if this tag should be output as self closing.
/**
* Get if this tag is self closing.
*
* @return if this tag should be output as self closing.
*/
public boolean isSelfClosing() {
return empty || selfClosing;
}
Get if this is a pre-defined tag, or was auto created on parsing.
Returns: if a known tag
/**
* Get if this is a pre-defined tag, or was auto created on parsing.
*
* @return if a known tag
*/
public boolean isKnownTag() {
return tags.containsKey(tagName);
}
Check if this tagname is a known tag.
Params: - tagName – name of tag
Returns: if known HTML tag
/**
* Check if this tagname is a known tag.
*
* @param tagName name of tag
* @return if known HTML tag
*/
public static boolean isKnownTag(String tagName) {
return tags.containsKey(tagName);
}
Get if this tag should preserve whitespace within child text nodes.
Returns: if preserve whitespace
/**
* Get if this tag should preserve whitespace within child text nodes.
*
* @return if preserve whitespace
*/
public boolean preserveWhitespace() {
return preserveWhitespace;
}
Get if this tag represents a control associated with a form. E.g. input, textarea, output
Returns: if associated with a form
/**
* Get if this tag represents a control associated with a form. E.g. input, textarea, output
* @return if associated with a form
*/
public boolean isFormListed() {
return formList;
}
Get if this tag represents an element that should be submitted with a form. E.g. input, option
Returns: if submittable with a form
/**
* Get if this tag represents an element that should be submitted with a form. E.g. input, option
* @return if submittable with a form
*/
public boolean isFormSubmittable() {
return formSubmit;
}
Tag setSelfClosing() {
selfClosing = true;
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Tag)) return false;
Tag tag = (Tag) o;
if (!tagName.equals(tag.tagName)) return false;
if (canContainInline != tag.canContainInline) return false;
if (empty != tag.empty) return false;
if (formatAsBlock != tag.formatAsBlock) return false;
if (isBlock != tag.isBlock) return false;
if (preserveWhitespace != tag.preserveWhitespace) return false;
if (selfClosing != tag.selfClosing) return false;
if (formList != tag.formList) return false;
return formSubmit == tag.formSubmit;
}
@Override
public int hashCode() {
int result = tagName.hashCode();
result = 31 * result + (isBlock ? 1 : 0);
result = 31 * result + (formatAsBlock ? 1 : 0);
result = 31 * result + (canContainInline ? 1 : 0);
result = 31 * result + (empty ? 1 : 0);
result = 31 * result + (selfClosing ? 1 : 0);
result = 31 * result + (preserveWhitespace ? 1 : 0);
result = 31 * result + (formList ? 1 : 0);
result = 31 * result + (formSubmit ? 1 : 0);
return result;
}
@Override
public String toString() {
return tagName;
}
// internal static initialisers:
// prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
private static final String[] blockTags = {
"html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
"noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
"del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
"td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
"svg", "math", "center"
};
private static final String[] inlineTags = {
"object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
"var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
"sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
"option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
"summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
"data", "bdi", "s"
};
private static final String[] emptyTags = {
"meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
"device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
};
private static final String[] formatAsInlineTags = {
"title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
"ins", "del", "s"
};
private static final String[] preserveWhitespaceTags = {
"pre", "plaintext", "title", "textarea"
// script is not here as it is a data node, which always preserve whitespace
};
// todo: I think we just need submit tags, and can scrub listed
private static final String[] formListedTags = {
"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
};
private static final String[] formSubmitTags = {
"input", "keygen", "object", "select", "textarea"
};
static {
// creates
for (String tagName : blockTags) {
Tag tag = new Tag(tagName);
register(tag);
}
for (String tagName : inlineTags) {
Tag tag = new Tag(tagName);
tag.isBlock = false;
tag.formatAsBlock = false;
register(tag);
}
// mods:
for (String tagName : emptyTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.canContainInline = false;
tag.empty = true;
}
for (String tagName : formatAsInlineTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formatAsBlock = false;
}
for (String tagName : preserveWhitespaceTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.preserveWhitespace = true;
}
for (String tagName : formListedTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formList = true;
}
for (String tagName : formSubmitTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formSubmit = true;
}
}
private static void register(Tag tag) {
tags.put(tag.tagName, tag);
}
}