83 lines
2.0 KiB
JavaScript
Raw Normal View History

2020-10-17 18:42:50 +02:00
var inverseXML = getInverseObj(require("../maps/xml.json")),
xmlReplacer = getInverseReplacer(inverseXML);
exports.XML = getInverse(inverseXML, xmlReplacer);
var inverseHTML = getInverseObj(require("../maps/entities.json")),
htmlReplacer = getInverseReplacer(inverseHTML);
exports.HTML = getInverse(inverseHTML, htmlReplacer);
function getInverseObj(obj) {
return Object.keys(obj)
.sort()
.reduce(function(inverse, name) {
inverse[obj[name]] = "&" + name + ";";
return inverse;
}, {});
}
function getInverseReplacer(inverse) {
var single = [],
multiple = [];
Object.keys(inverse).forEach(function(k) {
if (k.length === 1) {
single.push("\\" + k);
} else {
multiple.push(k);
}
});
//TODO add ranges
multiple.unshift("[" + single.join("") + "]");
return new RegExp(multiple.join("|"), "g");
}
var re_nonASCII = /[^\0-\x7F]/g,
re_astralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
function singleCharReplacer(c) {
return (
"&#x" +
c
.charCodeAt(0)
.toString(16)
.toUpperCase() +
";"
);
}
function astralReplacer(c) {
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
var high = c.charCodeAt(0);
var low = c.charCodeAt(1);
var codePoint = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
return "&#x" + codePoint.toString(16).toUpperCase() + ";";
}
function getInverse(inverse, re) {
function func(name) {
return inverse[name];
}
return function(data) {
return data
.replace(re, func)
.replace(re_astralSymbols, astralReplacer)
.replace(re_nonASCII, singleCharReplacer);
};
}
var re_xmlChars = getInverseReplacer(inverseXML);
function escapeXML(data) {
return data
.replace(re_xmlChars, singleCharReplacer)
.replace(re_astralSymbols, astralReplacer)
.replace(re_nonASCII, singleCharReplacer);
}
exports.escape = escapeXML;