Java编程转义HTML标签格式内容
编程教程
>
Java
(3421)
2024-11-26 14:39:04
Java编程转义HTML标签格式内容
用于转义
String
使用HTML实体中的字符的Java示例。这将Java String转换为等效的HTML内容,浏览器能够打印。
1)StringEscapeUtils.escapeHtml4()[Apache Commons Text]
- 此方法将原始字符串作为参数,然后使用HTML实体转义字符。
- 它支持所有已知的HTML 4.0实体。
Apostrophe
转义字符(')不是合法实体,因此不受支持。
要使用
StringEscapeUtils
,请导入
commons-text
依赖项
$title(pom.xml)
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.4</version>
</dependency>
现在使用
StringEscapeUtils.escapeHtml4()
方法
$title(HTMLEscapeExample.java)
import org.apache.commons.text.StringEscapeUtils;
public class HTMLEscapeExample
{
public static void main(String[] args)
{
String unEscapedString = "<java>public static void main(String[] args) { ... }</java>";
String escapedHTML = StringEscapeUtils.escapeHtml4(unEscapedString);
System.out.println(escapedHTML); //Browser can now parse this and print
}
}
$title(输出:)
<java>public static void main(String[] args) { ... }</java>
2)自定义StringUtils.encodeHtml()方法
如果您有某些要求需要修改库方法提供的逻辑,则可以编写自己的方法。大多数情况下应避免这种方法,但在需求出现时可能会很方便。
$title(HTMLEscapeExample.java)
public class HTMLEscapeExample
{
public static void main(String[] args)
{
String unEscapedString = "<java>public static void main(String[] args) { ... }</java>";
String escapedHTML = StringUtils.encodeHtml(unEscapedString);
System.out.println(escapedHTML); //Browser can now parse this and print
}
}
$title(输出:)
<java>public static void main(String[] args) { ... }</java>
StringUtils.java类
$title(StringUtils.java)
import java.util.HashMap;
public class StringUtils
{
private static final HashMap<Character, String> htmlEncodeChars = new HashMap<>();
static
{
// Special characters for HTML
htmlEncodeChars.put('\u0026', "&");
htmlEncodeChars.put('\u003C', "<");
htmlEncodeChars.put('\u003E', ">");
htmlEncodeChars.put('\u0022', """);
htmlEncodeChars.put('\u0152', "Œ");
htmlEncodeChars.put('\u0153', "œ");
htmlEncodeChars.put('\u0160', "Š");
htmlEncodeChars.put('\u0161', "š");
htmlEncodeChars.put('\u0178', "Ÿ");
htmlEncodeChars.put('\u02C6', "ˆ");
htmlEncodeChars.put('\u02DC', "˜");
htmlEncodeChars.put('\u2002', " ");
htmlEncodeChars.put('\u2003', " ");
htmlEncodeChars.put('\u2009', " ");
htmlEncodeChars.put('\u200C', "‌");
htmlEncodeChars.put('\u200D', "‍");
htmlEncodeChars.put('\u200E', "‎");
htmlEncodeChars.put('\u200F', "‏");
htmlEncodeChars.put('\u2013', "–");
htmlEncodeChars.put('\u2014', "—");
htmlEncodeChars.put('\u2018', "‘");
htmlEncodeChars.put('\u2019', "’");
htmlEncodeChars.put('\u201A', "‚");
htmlEncodeChars.put('\u201C', "“");
htmlEncodeChars.put('\u201D', "”");
htmlEncodeChars.put('\u201E', "„");
htmlEncodeChars.put('\u2020', "†");
htmlEncodeChars.put('\u2021', "‡");
htmlEncodeChars.put('\u2030', "‰");
htmlEncodeChars.put('\u2039', "‹");
htmlEncodeChars.put('\u203A', "›");
htmlEncodeChars.put('\u20AC', "€");
// Character entity references for ISO 8859-1 characters
htmlEncodeChars.put('\u00A0', " ");
htmlEncodeChars.put('\u00A1', "¡");
htmlEncodeChars.put('\u00A2', "¢");
htmlEncodeChars.put('\u00A3', "£");
htmlEncodeChars.put('\u00A4', "¤");
htmlEncodeChars.put('\u00A5', "¥");
htmlEncodeChars.put('\u00A6', "¦");
htmlEncodeChars.put('\u00A7', "§");
htmlEncodeChars.put('\u00A8', "¨");
htmlEncodeChars.put('\u00A9', "©");
htmlEncodeChars.put('\u00AA', "ª");
htmlEncodeChars.put('\u00AB', "«");
htmlEncodeChars.put('\u00AC', "¬");
htmlEncodeChars.put('\u00AD', "­");
htmlEncodeChars.put('\u00AE', "®");
htmlEncodeChars.put('\u00AF', "¯");
htmlEncodeChars.put('\u00B0', "°");
htmlEncodeChars.put('\u00B1', "±");
htmlEncodeChars.put('\u00B2', "²");
htmlEncodeChars.put('\u00B3', "³");
htmlEncodeChars.put('\u00B4', "´");
htmlEncodeChars.put('\u00B5', "µ");
htmlEncodeChars.put('\u00B6', "¶");
htmlEncodeChars.put('\u00B7', "·");
htmlEncodeChars.put('\u00B8', "¸");
htmlEncodeChars.put('\u00B9', "¹");
htmlEncodeChars.put('\u00BA', "º");
htmlEncodeChars.put('\u00BB', "»");
htmlEncodeChars.put('\u00BC', "¼");
htmlEncodeChars.put('\u00BD', "½");
htmlEncodeChars.put('\u00BE', "¾");
htmlEncodeChars.put('\u00BF', "¿");
htmlEncodeChars.put('\u00C0', "À");
htmlEncodeChars.put('\u00C1', "Á");
htmlEncodeChars.put('\u00C2', "Â");
htmlEncodeChars.put('\u00C3', "Ã");
htmlEncodeChars.put('\u00C4', "Ä");
htmlEncodeChars.put('\u00C5', "Å");
htmlEncodeChars.put('\u00C6', "Æ");
htmlEncodeChars.put('\u00C7', "Ç");
htmlEncodeChars.put('\u00C8', "È");
htmlEncodeChars.put('\u00C9', "É");
htmlEncodeChars.put('\u00CA', "Ê");
htmlEncodeChars.put('\u00CB', "Ë");
htmlEncodeChars.put('\u00CC', "Ì");
htmlEncodeChars.put('\u00CD', "Í");
htmlEncodeChars.put('\u00CE', "Î");
htmlEncodeChars.put('\u00CF', "Ï");
htmlEncodeChars.put('\u00D0', "Ð");
htmlEncodeChars.put('\u00D1', "Ñ");
htmlEncodeChars.put('\u00D2', "Ò");
htmlEncodeChars.put('\u00D3', "Ó");
htmlEncodeChars.put('\u00D4', "Ô");
htmlEncodeChars.put('\u00D5', "Õ");
htmlEncodeChars.put('\u00D6', "Ö");
htmlEncodeChars.put('\u00D7', "×");
htmlEncodeChars.put('\u00D8', "Ø");
htmlEncodeChars.put('\u00D9', "Ù");
htmlEncodeChars.put('\u00DA', "Ú");
htmlEncodeChars.put('\u00DB', "Û");
htmlEncodeChars.put('\u00DC', "Ü");
htmlEncodeChars.put('\u00DD', "Ý");
htmlEncodeChars.put('\u00DE', "Þ");
htmlEncodeChars.put('\u00DF', "ß");
htmlEncodeChars.put('\u00E0', "à");
htmlEncodeChars.put('\u00E1', "á");
htmlEncodeChars.put('\u00E2', "â");
htmlEncodeChars.put('\u00E3', "ã");
htmlEncodeChars.put('\u00E4', "ä");
htmlEncodeChars.put('\u00E5', "å");
htmlEncodeChars.put('\u00E6', "æ");
htmlEncodeChars.put('\u00E7', "ç");
htmlEncodeChars.put('\u00E8', "è");
htmlEncodeChars.put('\u00E9', "é");
htmlEncodeChars.put('\u00EA', "ê");
htmlEncodeChars.put('\u00EB', "ë");
htmlEncodeChars.put('\u00EC', "ì");
htmlEncodeChars.put('\u00ED', "í");
htmlEncodeChars.put('\u00EE', "î");
htmlEncodeChars.put('\u00EF', "ï");
htmlEncodeChars.put('\u00F0', "ð");
htmlEncodeChars.put('\u00F1', "ñ");
htmlEncodeChars.put('\u00F2', "ò");
htmlEncodeChars.put('\u00F3', "ó");
htmlEncodeChars.put('\u00F4', "ô");
htmlEncodeChars.put('\u00F5', "õ");
htmlEncodeChars.put('\u00F6', "ö");
htmlEncodeChars.put('\u00F7', "÷");
htmlEncodeChars.put('\u00F8', "ø");
htmlEncodeChars.put('\u00F9', "ù");
htmlEncodeChars.put('\u00FA', "ú");
htmlEncodeChars.put('\u00FB', "û");
htmlEncodeChars.put('\u00FC', "ü");
htmlEncodeChars.put('\u00FD', "ý");
htmlEncodeChars.put('\u00FE', "þ");
htmlEncodeChars.put('\u00FF', "ÿ");
// Mathematical, Greek and Symbolic characters for HTML
htmlEncodeChars.put('\u0192', "ƒ");
htmlEncodeChars.put('\u0391', "Α");
htmlEncodeChars.put('\u0392', "Β");
htmlEncodeChars.put('\u0393', "Γ");
htmlEncodeChars.put('\u0394', "Δ");
htmlEncodeChars.put('\u0395', "Ε");
htmlEncodeChars.put('\u0396', "Ζ");
htmlEncodeChars.put('\u0397', "Η");
htmlEncodeChars.put('\u0398', "Θ");
htmlEncodeChars.put('\u0399', "Ι");
htmlEncodeChars.put('\u039A', "Κ");
htmlEncodeChars.put('\u039B', "Λ");
htmlEncodeChars.put('\u039C', "Μ");
htmlEncodeChars.put('\u039D', "Ν");
htmlEncodeChars.put('\u039E', "Ξ");
htmlEncodeChars.put('\u039F', "Ο");
htmlEncodeChars.put('\u03A0', "Π");
htmlEncodeChars.put('\u03A1', "Ρ");
htmlEncodeChars.put('\u03A3', "Σ");
htmlEncodeChars.put('\u03A4', "Τ");
htmlEncodeChars.put('\u03A5', "Υ");
htmlEncodeChars.put('\u03A6', "Φ");
htmlEncodeChars.put('\u03A7', "Χ");
htmlEncodeChars.put('\u03A8', "Ψ");
htmlEncodeChars.put('\u03A9', "Ω");
htmlEncodeChars.put('\u03B1', "α");
htmlEncodeChars.put('\u03B2', "β");
htmlEncodeChars.put('\u03B3', "γ");
htmlEncodeChars.put('\u03B4', "δ");
htmlEncodeChars.put('\u03B5', "ε");
htmlEncodeChars.put('\u03B6', "ζ");
htmlEncodeChars.put('\u03B7', "η");
htmlEncodeChars.put('\u03B8', "θ");
htmlEncodeChars.put('\u03B9', "ι");
htmlEncodeChars.put('\u03BA', "κ");
htmlEncodeChars.put('\u03BB', "λ");
htmlEncodeChars.put('\u03BC', "μ");
htmlEncodeChars.put('\u03BD', "ν");
htmlEncodeChars.put('\u03BE', "ξ");
htmlEncodeChars.put('\u03BF', "ο");
htmlEncodeChars.put('\u03C0', "π");
htmlEncodeChars.put('\u03C1', "ρ");
htmlEncodeChars.put('\u03C2', "ς");
htmlEncodeChars.put('\u03C3', "σ");
htmlEncodeChars.put('\u03C4', "τ");
htmlEncodeChars.put('\u03C5', "υ");
htmlEncodeChars.put('\u03C6', "φ");
htmlEncodeChars.put('\u03C7', "χ");
htmlEncodeChars.put('\u03C8', "ψ");
htmlEncodeChars.put('\u03C9', "ω");
htmlEncodeChars.put('\u03D1', "ϑ");
htmlEncodeChars.put('\u03D2', "ϒ");
htmlEncodeChars.put('\u03D6', "ϖ");
htmlEncodeChars.put('\u2022', "•");
htmlEncodeChars.put('\u2026', "…");
htmlEncodeChars.put('\u2032', "′");
htmlEncodeChars.put('\u2033', "″");
htmlEncodeChars.put('\u203E', "‾");
htmlEncodeChars.put('\u2044', "⁄");
htmlEncodeChars.put('\u2118', "℘");
htmlEncodeChars.put('\u2111', "ℑ");
htmlEncodeChars.put('\u211C', "ℜ");
htmlEncodeChars.put('\u2122', "™");
htmlEncodeChars.put('\u2135', "ℵ");
htmlEncodeChars.put('\u2190', "←");
htmlEncodeChars.put('\u2191', "↑");
htmlEncodeChars.put('\u2192', "→");
htmlEncodeChars.put('\u2193', "↓");
htmlEncodeChars.put('\u2194', "↔");
htmlEncodeChars.put('\u21B5', "↵");
htmlEncodeChars.put('\u21D0', "⇐");
htmlEncodeChars.put('\u21D1', "⇑");
htmlEncodeChars.put('\u21D2', "⇒");
htmlEncodeChars.put('\u21D3', "⇓");
htmlEncodeChars.put('\u21D4', "⇔");
htmlEncodeChars.put('\u2200', "∀");
htmlEncodeChars.put('\u2202', "∂");
htmlEncodeChars.put('\u2203', "∃");
htmlEncodeChars.put('\u2205', "∅");
htmlEncodeChars.put('\u2207', "∇");
htmlEncodeChars.put('\u2208', "∈");
htmlEncodeChars.put('\u2209', "∉");
htmlEncodeChars.put('\u220B', "∋");
htmlEncodeChars.put('\u220F', "∏");
htmlEncodeChars.put('\u2211', "∑");
htmlEncodeChars.put('\u2212', "−");
htmlEncodeChars.put('\u2217', "∗");
htmlEncodeChars.put('\u221A', "√");
htmlEncodeChars.put('\u221D', "∝");
htmlEncodeChars.put('\u221E', "∞");
htmlEncodeChars.put('\u2220', "∠");
htmlEncodeChars.put('\u2227', "∧");
htmlEncodeChars.put('\u2228', "∨");
htmlEncodeChars.put('\u2229', "∩");
htmlEncodeChars.put('\u222A', "∪");
htmlEncodeChars.put('\u222B', "∫");
htmlEncodeChars.put('\u2234', "∴");
htmlEncodeChars.put('\u223C', "∼");
htmlEncodeChars.put('\u2245', "≅");
htmlEncodeChars.put('\u2248', "≈");
htmlEncodeChars.put('\u2260', "≠");
htmlEncodeChars.put('\u2261', "≡");
htmlEncodeChars.put('\u2264', "≤");
htmlEncodeChars.put('\u2265', "≥");
htmlEncodeChars.put('\u2282', "⊂");
htmlEncodeChars.put('\u2283', "⊃");
htmlEncodeChars.put('\u2284', "⊄");
htmlEncodeChars.put('\u2286', "⊆");
htmlEncodeChars.put('\u2287', "⊇");
htmlEncodeChars.put('\u2295', "⊕");
htmlEncodeChars.put('\u2297', "⊗");
htmlEncodeChars.put('\u22A5', "⊥");
htmlEncodeChars.put('\u22C5', "⋅");
htmlEncodeChars.put('\u2308', "⌈");
htmlEncodeChars.put('\u2309', "⌉");
htmlEncodeChars.put('\u230A', "⌊");
htmlEncodeChars.put('\u230B', "⌋");
htmlEncodeChars.put('\u2329', "⟨");
htmlEncodeChars.put('\u232A', "⟩");
htmlEncodeChars.put('\u25CA', "◊");
htmlEncodeChars.put('\u2660', "♠");
htmlEncodeChars.put('\u2663', "♣");
htmlEncodeChars.put('\u2665', "♥");
htmlEncodeChars.put('\u2666', "♦");
}
private StringUtils()
{
}
public static String encodeHtml(String source)
{
return encode(source, htmlEncodeChars);
}
private static String encode(String source, HashMap<Character, String> encodingTable)
{
if (null == source)
{
return null;
}
if (null == encodingTable)
{
return source;
}
StringBuffer encoded_string = null;
char[] string_to_encode_array = source.toCharArray();
int last_match = -1;
int difference = 0;
for (int i = 0; i < string_to_encode_array.length; i++)
{
char char_to_encode = string_to_encode_array[i];
if (encodingTable.containsKey(char_to_encode))
{
if (null == encoded_string)
{
encoded_string = new StringBuffer(source.length());
}
difference = i - (last_match + 1);
if (difference > 0)
{
encoded_string.append(string_to_encode_array, last_match + 1, difference);
}
encoded_string.append(encodingTable.get(char_to_encode));
last_match = i;
}
}
if (null == encoded_string)
{
return source;
}
else
{
difference = string_to_encode_array.length - (last_match + 1);
if (difference > 0)
{
encoded_string.append(string_to_encode_array, last_match + 1, difference);
}
return encoded_string.toString();
}
}
}
http://blog.xqlee.com/article/489.html