在 JavaScript 中将特殊字符转换为 HTML

如何在 JavaScript 中将特殊字符转换为 HTML?

例如:

  • &(符号)变成 &amp
  • 当没有设置 ENT_NOQUOTES时,"(双引号)变成 &quot
  • 只有在设置 ENT_QUOTES时,'(单引号)才成为 &#039
  • <(小于)变成 &lt
  • >(大于)变成 &gt
451618 次浏览

你需要一个函数来完成

return mystring.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;").replace(/"/g, "&quot;");

但是要考虑到你对单引号和双引号的不同处理方式。

创建一个使用字符串 replace的函数

function convert(str)
{
str = str.replace(/&/g, "&amp;");
str = str.replace(/>/g, "&gt;");
str = str.replace(/</g, "&lt;");
str = str.replace(/"/g, "&quot;");
str = str.replace(/'/g, "&#039;");
return str;
}
<html>
<body>
<script type="text/javascript">
var str = "&\"'<>";
alert('B4 Change: \n' + str);


str = str.replace(/\&/g, '&amp;');
str = str.replace(/</g,  '&lt;');
str = str.replace(/>/g,  '&gt;');
str = str.replace(/\"/g, '&quot;');
str = str.replace(/\'/g, '&#039;');


alert('After change: \n' + str);
</script>
</body>
</html>

使用这个测试: http://www.w3schools.com/js/tryit.asp?filename=tryjs_text

在我看来,最好的方法是使用浏览器内置的 HTML 转义功能来处理许多情况。为此,只需在 DOM 树中创建一个元素,并将元素的 innerText设置为字符串。然后检索元素的 innerHTML。浏览器将返回一个 HTML 编码的字符串。

function HtmlEncode(s)
{
var el = document.createElement("div");
el.innerText = el.textContent = s;
s = el.innerHTML;
return s;
}

试运行:

alert(HtmlEncode('&;\'><"'));

产出:

&amp;;'&gt;&lt;"

原型 JS 库也使用这种转义 HTML 的方法,尽管它与我给出的简单示例不同。

注意: 您仍然需要自己转义引号(双引号和单引号)。您可以使用这里其他人概述的任何方法。

这个通用函数将每个非字母字符编码为 HTML 代码(字符值引用(NCR)) :

function HTMLEncode(str) {
var i = str.length,
aRet = [];


while (i--) {
var iC = str[i].charCodeAt();
if (iC < 65 || iC > 127 || (iC>90 && iC<97)) {
aRet[i] = '&#'+iC+';';
} else {
aRet[i] = str[i];
}
}
return aRet.join('');
}

[ 编辑2022]更现代的方法:

const toHtmlEntities = (str, showInHtml = false) =>
[...str].map( v => `${showInHtml ? `&amp;#` : `&#`}${v.charCodeAt(0)};`).join(``);
const str = `&Hellõ Wórld`;


document.body.insertAdjacentHTML(`beforeend`, `<ul>
<li>Show the entities (<code>toHtmlEntities(str, true)</code>): <b>${
toHtmlEntities(str, true)}</b></li>
<li>Let the browser decide (<code>toHtmlEntities(str)</code>): <b>${
toHtmlEntities(str)}</b></li>
<li id="textOnly"></li></ul>`);
document.querySelector(`#textOnly`).textContent = `As textContent: ${
toHtmlEntities(str)}`;
body {
font: 14px / 18px "normal verdana", arial;
margin: 1rem;
}


code {
background-color: #eee;
}

function escape (text)
{
return text.replace(/[<>\&\"\']/g, function(c) {
return '&#' + c.charCodeAt(0) + ';';
});
}


alert(escape("<>&'\""));
function ConvChar(str) {
c = {'&lt;':'&amp;lt;', '&gt;':'&amp;gt;', '&':'&amp;amp;',
'"':'&amp;quot;', "'":'&amp;#039;', '#':'&amp;#035;' };


return str.replace(/[&lt;&amp;>'"#]/g, function(s) { return c[s]; });
}


alert(ConvChar('&lt;-"-&-"->-&lt;-\'-#-\'->'));

结果:

&lt;-&quot;-&amp;-&quot;-&gt;-&lt;-&#039;-&#035;-&#039;-&gt;

在测试区的标签上:

<-"-&-"->-<-'-#-'->

如果你能用长码改几个字符..。

是的,但是如果您需要将生成的字符串插入到某个位置而不需要将其转换回来,那么您需要:

str.replace(/'/g,"&amp;amp;#39;"); // and so on
function char_convert() {


var chars = ["©","Û","®","ž","Ü","Ÿ","Ý","$","Þ","%","¡","ß","¢","à","£","á","À","¤","â","Á","¥","ã","Â","¦","ä","Ã","§","å","Ä","¨","æ","Å","©","ç","Æ","ª","è","Ç","«","é","È","¬","ê","É","­","ë","Ê","®","ì","Ë","¯","í","Ì","°","î","Í","±","ï","Î","²","ð","Ï","³","ñ","Ð","´","ò","Ñ","µ","ó","Õ","¶","ô","Ö","·","õ","Ø","¸","ö","Ù","¹","÷","Ú","º","ø","Û","»","ù","Ü","@","¼","ú","Ý","½","û","Þ","€","¾","ü","ß","¿","ý","à","‚","À","þ","á","ƒ","Á","ÿ","å","„","Â","æ","…","Ã","ç","†","Ä","è","‡","Å","é","ˆ","Æ","ê","‰","Ç","ë","Š","È","ì","‹","É","í","Œ","Ê","î","Ë","ï","Ž","Ì","ð","Í","ñ","Î","ò","‘","Ï","ó","’","Ð","ô","“","Ñ","õ","”","Ò","ö","•","Ó","ø","–","Ô","ù","—","Õ","ú","˜","Ö","û","™","×","ý","š","Ø","þ","›","Ù","ÿ","œ","Ú"];
var codes = ["&copy;","&#219;","&reg;","&#158;","&#220;","&#159;","&#221;","&#36;","&#222;","&#37;","&#161;","&#223;","&#162;","&#224;","&#163;","&#225;","&Agrave;","&#164;","&#226;","&Aacute;","&#165;","&#227;","&Acirc;","&#166;","&#228;","&Atilde;","&#167;","&#229;","&Auml;","&#168;","&#230;","&Aring;","&#169;","&#231;","&AElig;","&#170;","&#232;","&Ccedil;","&#171;","&#233;","&Egrave;","&#172;","&#234;","&Eacute;","&#173;","&#235;","&Ecirc;","&#174;","&#236;","&Euml;","&#175;","&#237;","&Igrave;","&#176;","&#238;","&Iacute;","&#177;","&#239;","&Icirc;","&#178;","&#240;","&Iuml;","&#179;","&#241;","&ETH;","&#180;","&#242;","&Ntilde;","&#181;","&#243;","&Otilde;","&#182;","&#244;","&Ouml;","&#183;","&#245;","&Oslash;","&#184;","&#246;","&Ugrave;","&#185;","&#247;","&Uacute;","&#186;","&#248;","&Ucirc;","&#187;","&#249;","&Uuml;","&#64;","&#188;","&#250;","&Yacute;","&#189;","&#251;","&THORN;","&#128;","&#190;","&#252","&szlig;","&#191;","&#253;","&agrave;","&#130;","&#192;","&#254;","&aacute;","&#131;","&#193;","&#255;","&aring;","&#132;","&#194;","&aelig;","&#133;","&#195;","&ccedil;","&#134;","&#196;","&egrave;","&#135;","&#197;","&eacute;","&#136;","&#198;","&ecirc;","&#137;","&#199;","&euml;","&#138;","&#200;","&igrave;","&#139;","&#201;","&iacute;","&#140;","&#202;","&icirc;","&#203;","&iuml;","&#142;","&#204;","&eth;","&#205;","&ntilde;","&#206;","&ograve;","&#145;","&#207;","&oacute;","&#146;","&#208;","&ocirc;","&#147;","&#209;","&otilde;","&#148;","&#210;","&ouml;","&#149;","&#211;","&oslash;","&#150;","&#212;","&ugrave;","&#151;","&#213;","&uacute;","&#152;","&#214;","&ucirc;","&#153;","&#215;","&yacute;","&#154;","&#216;","&thorn;","&#155;","&#217;","&yuml;","&#156;","&#218;"];


for(x=0; x<chars.length; x++){
for (i=0; i<arguments.length; i++){
arguments[i].value = arguments[i].value.replace(chars[x], codes[x]);
}
}
}


char_convert(this);

用途:

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 169, 61558, 8226, 61607);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&copy;", "&bull;", "&bull;", "&bull;");


var TextCheck = {
doCWBind:function(div){
$(div).bind({
bind:function(){
TextCheck.cleanWord(div);
},
focus:function(){
TextCheck.cleanWord(div);
},
paste:function(){
TextCheck.cleanWord(div);
}
});
},
cleanWord:function(div){
var output = $(div).val();
for (i = 0; i < swapCodes.length; i++) {
var swapper = new RegExp("\\u" + swapCodes[i].toString(16), "g");
output = output.replace(swapper, swapStrings[i]);
}
$(div).val(output);
}
}

我们现在用的另一个有效的方法。上面的代码调用了一个脚本,并返回了转换后的代码。它只适用于小的文字区域(意味着不是一篇完整的文章、博客等)


对于上述情况,它适用于大多数角色。

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 61558, 8226, 61607, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376, 402);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&bull;", "&bull;", "&bull;", "&iexcl;", "&cent;", "&pound;", "&curren;", "&yen;", "&brvbar;", "&sect;", "&uml;", "&copy;", "&ordf;", "&laquo;", "&not;", "&shy;", "&reg;", "&macr;", "&deg;", "&plusmn;", "&sup2;", "&sup3;", "&acute;", "&micro;", "&para;", "&middot;", "&cedil;", "&sup1;", "&ordm;", "&raquo;", "&frac14;", "&frac12;", "&frac34;", "&iquest;", "&Agrave;", "&Aacute;", "&Acirc;", "&Atilde;", "&Auml;", "&Aring;", "&AElig;", "&Ccedil;", "&Egrave;", "&Eacute;", "&Ecirc;", "&Euml;", "&Igrave;", "&Iacute;", "&Icirc;", "&Iuml;", "&ETH;", "&Ntilde;", "&Ograve;", "&Oacute;", "&Ocirc;", "&Otilde;", "&Ouml;", "&times;", "&Oslash;", "&Ugrave;", "&Uacute;", "&Ucirc;", "&Uuml;", "&Yacute;", "&THORN;", "&szlig;", "&agrave;", "&aacute;", "&acirc;", "&atilde;", "&auml;", "&aring;", "&aelig;", "&ccedil;", "&egrave;", "&eacute;", "&ecirc;", "&euml;", "&igrave;", "&iacute;", "&icirc;", "&iuml;", "&eth;", "&ntilde;", "&ograve;", "&oacute;", "&ocirc;", "&otilde;", "&ouml;", "&divide;", "&oslash;", "&ugrave;", "&uacute;", "&ucirc;", "&uuml;", "&yacute;", "&thorn;", "&yuml;", "&#338;", "&#339;", "&#352;", "&#353;", "&#376;", "&#402;");

我创建了一个包含很多功能的 javascript 文件,其中包括以上内容。 Http://www.neotropicsolutions.com/jschars.zip

所有需要的文件都包含在内。我添加了 jQuery 1.4.4。仅仅是因为我在其他版本中看到了问题,但还没有尝试过。

Requires: jQuery & jQuery Impromptu from: http://trentrichardson.com/Impromptu/index.php


1. Word Count
2. Character Conversion
3. Checks to ensure this is not passed: "notsomeverylongstringmissingspaces"
4. Checks to make sure ALL IS NOT ALL UPPERCASE.
5. Strip HTML


// Word Counter
$.getScript('js/characters.js', function(){
$('#adtxt').bind("keyup click blur focus change paste",
function(event){
TextCheck.wordCount(30, "#adtxt", "#adtxt_count", event);
});
$('#adtxt').blur(
function(event){
TextCheck.check_length('#adtxt'); // unsures properly spaces-not one long word
TextCheck.doCWBind('#adtxt'); // char conversion
});


TextCheck.wordCount(30, "#adtxt", "#adtxt_count", false);
});


//HTML
<textarea name="adtxt" id="adtxt" rows="10" cols="70" class="wordCount"></textarea>


<div id="adtxt_count" class="clear"></div>


// Just Character Conversions:
TextCheck.doCWBind('#myfield');


// Run through form fields in a form for case checking.
// Alerts user when field is blur'd.
var labels = new Array("Brief Description", "Website URL", "Contact Name", "Website", "Email", "Linkback URL");
var checking = new Array("descr", "title", "fname", "website", "email", "linkback");
TextCheck.check_it(checking, labels);


// Extra security to check again, make sure form is not submitted
var pass = TextCheck.validate(checking, labels);
if(pass){
// Do form actions
}


//Strip HTML
<textarea name="adtxt" id="adtxt" rows="10" cols="70" onblur="TextCheck.stripHTML(this);"></textarea>

使用 JavaScript 函数 逃跑,它允许您对字符串进行编码。

例如:

escape("yourString");

PRE标签中——以及大多数其他 HTML 标签中——使用输出重定向字符(<>)的批处理文件的纯文本会破坏 HTML,但是 这是我的小费: 任何进入 TEXTAREA元素的东西都不会破坏 HTML,主要是因为我们在一个实例化的控件中,由操作系统处理,因此它的内容不会被 HTML 引擎解析。

例如,我想使用 JavaScript 突出显示批处理文件的语法。我只是简单地将代码粘贴到文本区域中,而不用担心 HTML 保留字符,然后让脚本处理文本区域的 innerHTML属性,该属性计算结果为文本,HTML 保留字符被相应的 ISO 8859-1实体替换。

当您检索元素的 innerHTML(和 outerHTML)属性时,浏览器将自动转义特殊字符。使用文本区域(谁知道呢,也许是文本类型的输入)可以避免执行转换(手动或通过代码)。

我使用这个技巧来测试我的语法高亮显示器,当我完成编写和测试时,我只是从视图中隐藏文本区域。

来自 Mozilla..。

请注意,charCodeAt 总是返回小于65,536的值。这是因为较高的代码点由一对(低值)“代理”伪字符表示,这些伪字符用于构成真正的字符。因此,为了检查或复制值为65,536及以上的单个字符的完整字符,对于这些字符,不仅需要检索 charCodeAt (i) ,还需要检索 charCodeAt (i + 1)(就像检查/复制一个包含两个 > 字母的字符串一样)。

最佳解决方案

/**
* (c) 2012 Steven Levithan <http://slevithan.com/>
* MIT license
*/
if (!String.prototype.codePointAt) {
String.prototype.codePointAt = function (pos) {
pos = isNaN(pos) ? 0 : pos;
var str = String(this),
code = str.charCodeAt(pos),
next = str.charCodeAt(pos + 1);
// If a surrogate pair
if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) {
return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000;
}
return code;
};
}


/**
* Encodes special html characters
* @param string
* @return {*}
*/
function html_encode(string) {
var ret_val = '';
for (var i = 0; i < string.length; i++) {
if (string.codePointAt(i) > 127) {
ret_val += '&#' + string.codePointAt(i) + ';';
} else {
ret_val += string.charAt(i);
}
}
return ret_val;
}

用法例子:

html_encode("✈");

变通办法:

var temp = $("div").text("<");
var afterEscape = temp.html(); // afterEscape == "&lt;"

就像 被龙提到一样,最干净的方法是使用 JQuery:

function htmlEncode(s) {
return $('<div>').text(s).html();
}


function htmlDecode(s) {
return $('<div>').html(s).text();
}

这并不能直接回答您的问题,但是如果您使用 innerHTML在元素中编写文本,并且遇到了编码问题,那么只需使用 textContent,即:

var s = "Foo 'bar' baz <qux>";


var element = document.getElementById('foo');
element.textContent = s;


// <div id="foo">Foo 'bar' baz <qux></div>
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>html</title>


<script>
$(function() {
document.getElementById('test').innerHTML = "&amp;";
});
</script>
</head>


<body>
<div id="test"></div>
</body>
</html>

您只需使用上述代码将特殊字符转换为 HTML 即可。

这里有一个很好的图书馆,我发现在这方面非常有用。

Https://github.com/mathiasbynens/he

据其作者称:

它根据 HTML 支持所有标准化的命名字符引用, 像浏览器一样处理模糊的 & 符号和其他边缘情况 有一个广泛的测试套件,而且ーー与其他许多测试套件相反 JavaScript 解决方案ーー他可以很好地处理星形 Unicode 符号

下面是我使用的一些不需要 JQuery的方法:

您可以在字符串中使用 对每个字符进行编码:

function encode(e){return e.replace(/[^]/g, function(e) {return "&#" + e.charCodeAt(0) + ";"})}

或者只是针对主 安全编码字符来担心(& ,inebreak,< ,> ,”和’) ,比如:

function encode(r){
return r.replace(/[\x26\x0A\<>'"]/g, function(r){return "&#" + r.charCodeAt(0) + ";"})
}


test.value = encode('How to encode\nonly html tags &<>\'" nice & fast!');


/*************
* \x26 is &ampersand (it has to be first),
* \x0A is newline,
*************/
<textarea id=test rows="9" cols="55">www.WHAK.com</textarea>

如果您需要对 < strong > 所有标准化的命名字符引用 < strong > Unicode 模棱两可的 & 符号的支持,那么 他很坚强库是我所知道的唯一100% 可靠的解决方案!


示例使用

he.encode('foo © bar ≠ baz 𝌆 qux');
// Output: 'foo &#xA9; bar &#x2260; baz &#x1D306; qux'


he.decode('foo &copy; bar &ne; baz &#x1D306; qux');
// Output: 'foo © bar ≠ baz 𝌆 qux'

对于那些想在字符串中使用像 &#xxx;这样的整数字符代码的人,使用以下函数:

function decodeHtmlCharCodes(str) {
return str.replace(/(&#(\d+);)/g, function(match, capture, charCode) {
return String.fromCharCode(charCode);
});
}


// Will output "The show that gained int’l reputation’!"
console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

ES6

const decodeHtmlCharCodes = str =>
str.replace(/(&#(\d+);)/g, (match, capture, charCode) =>
String.fromCharCode(charCode));


// Will output "The show that gained int’l reputation’!"
console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

我们可以使用 JavaScript 的 DOMParser进行特殊字符的转换。

const parser = new DOMParser();
const convertedValue = (parser.parseFromString("&#039 &amp &#039 &lt &gt", "application/xml").body.innerText;

下面是用 JavaScript 编码 XML 转义字符的函数:

Encoder.htmlEncode(unsafeText);

如果您使用的是 Lodash,您可以这样做(从文档中复制粘贴) :

_.escape('fred, barney, & pebbles');
// => 'fred, barney, &amp; pebbles'

更多信息: ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

我自己也为此挣扎了一段时间,但我决定使用这个负匹配正则表达式来匹配所有特殊字符,并将它们转换为相关的字符代码:

var encoded = value.replace(/[^A-Za-z0-9]/g, function(i) {
return '&#' + i.charCodeAt(0) + ';';
});