Intial Commit
This commit is contained in:
268
nodered/rootfs/data/node_modules/html-to-text/lib/formatter.js
generated
vendored
Normal file
268
nodered/rootfs/data/node_modules/html-to-text/lib/formatter.js
generated
vendored
Normal file
@@ -0,0 +1,268 @@
|
||||
var max = require('lodash/max');
|
||||
var compact = require('lodash/compact');
|
||||
var times = require('lodash/times');
|
||||
|
||||
var trimStart = require('lodash/trimStart');
|
||||
var padEnd = require('lodash/padEnd');
|
||||
|
||||
var he = require('he');
|
||||
|
||||
var helper = require('./helper');
|
||||
|
||||
function formatText(elem, options) {
|
||||
var text = elem.data || "";
|
||||
text = he.decode(text, options.decodeOptions);
|
||||
|
||||
if (options.isInPre) {
|
||||
return text;
|
||||
} else {
|
||||
return helper.wordwrap(elem.trimLeadingSpace ? trimStart(text) : text, options);
|
||||
}
|
||||
}
|
||||
|
||||
function formatImage(elem, options) {
|
||||
if (options.ignoreImage) {
|
||||
return '';
|
||||
}
|
||||
|
||||
var result = '', attribs = elem.attribs || {};
|
||||
if (attribs.alt) {
|
||||
result += he.decode(attribs.alt, options.decodeOptions);
|
||||
if (attribs.src) {
|
||||
result += ' ';
|
||||
}
|
||||
}
|
||||
if (attribs.src) {
|
||||
result += '[' + attribs.src + ']';
|
||||
}
|
||||
return (result);
|
||||
}
|
||||
|
||||
function formatLineBreak(elem, fn, options) {
|
||||
return '\n' + fn(elem.children, options);
|
||||
}
|
||||
|
||||
function formatParagraph(elem, fn, options) {
|
||||
var paragraph = fn(elem.children, options);
|
||||
if (options.singleNewLineParagraphs) {
|
||||
return paragraph + '\n';
|
||||
} else {
|
||||
return paragraph + '\n\n';
|
||||
}
|
||||
}
|
||||
|
||||
function formatHeading(elem, fn, options) {
|
||||
var heading = fn(elem.children, options);
|
||||
if (options.uppercaseHeadings) {
|
||||
heading = heading.toUpperCase();
|
||||
}
|
||||
return heading + '\n';
|
||||
}
|
||||
|
||||
// If we have both href and anchor text, format it in a useful manner:
|
||||
// - "anchor text [href]"
|
||||
// Otherwise if we have only anchor text or an href, we return the part we have:
|
||||
// - "anchor text" or
|
||||
// - "href"
|
||||
function formatAnchor(elem, fn, options) {
|
||||
var href = '';
|
||||
// Always get the anchor text
|
||||
var storedCharCount = options.lineCharCount;
|
||||
var text = fn(elem.children || [], options);
|
||||
if (!text) {
|
||||
text = '';
|
||||
}
|
||||
|
||||
var result = elem.trimLeadingSpace ? trimStart(text) : text;
|
||||
|
||||
if (!options.ignoreHref) {
|
||||
// Get the href, if present
|
||||
if (elem.attribs && elem.attribs.href) {
|
||||
href = elem.attribs.href.replace(/^mailto:/, '');
|
||||
}
|
||||
if (href) {
|
||||
if ((!options.noAnchorUrl) || (options.noAnchorUrl && href[0] !== '#')) {
|
||||
if (options.linkHrefBaseUrl && href.indexOf('/') === 0) {
|
||||
href = options.linkHrefBaseUrl + href;
|
||||
}
|
||||
if (!options.hideLinkHrefIfSameAsText || href !== helper.replaceAll(result, '\n', '')) {
|
||||
if (!options.noLinkBrackets) {
|
||||
result += ' [' + href + ']';
|
||||
} else {
|
||||
result += ' ' + href;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
options.lineCharCount = storedCharCount;
|
||||
|
||||
return formatText({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options);
|
||||
}
|
||||
|
||||
function formatHorizontalLine(elem, fn, options) {
|
||||
return '\n' + '-'.repeat(options.wordwrap) + '\n\n';
|
||||
}
|
||||
|
||||
function formatListItem(prefix, elem, fn, options) {
|
||||
options = Object.assign({}, options);
|
||||
// Reduce the wordwrap for sub elements.
|
||||
if (options.wordwrap) {
|
||||
options.wordwrap -= prefix.length;
|
||||
}
|
||||
// Process sub elements.
|
||||
var text = fn(elem.children, options);
|
||||
// Replace all line breaks with line break + prefix spacing.
|
||||
text = text.replace(/\n/g, '\n' + ' '.repeat(prefix.length));
|
||||
// Add first prefix and line break at the end.
|
||||
return prefix + text + '\n';
|
||||
}
|
||||
|
||||
var whiteSpaceRegex = /^\s*$/;
|
||||
|
||||
function formatUnorderedList(elem, fn, options) {
|
||||
var result = '';
|
||||
var prefix = options.unorderedListItemPrefix;
|
||||
var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) {
|
||||
return child.type !== 'text' || !whiteSpaceRegex.test(child.data);
|
||||
});
|
||||
nonWhiteSpaceChildren.forEach(function(elem) {
|
||||
result += formatListItem(prefix, elem, fn, options);
|
||||
});
|
||||
return result + '\n';
|
||||
}
|
||||
|
||||
function formatOrderedList(elem, fn, options) {
|
||||
var result = '';
|
||||
var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) {
|
||||
return child.type !== 'text' || !whiteSpaceRegex.test(child.data);
|
||||
});
|
||||
// Return different functions for different OL types
|
||||
var typeFunction = (function() {
|
||||
// Determine type
|
||||
var olType = elem.attribs.type || '1';
|
||||
// TODO Imeplement the other valid types
|
||||
// Fallback to type '1' function for other valid types
|
||||
switch(olType) {
|
||||
case 'a': return function(start, i) { return String.fromCharCode(i + start + 97);};
|
||||
case 'A': return function(start, i) { return String.fromCharCode(i + start + 65);};
|
||||
case '1':
|
||||
default: return function(start, i) { return i + 1 + start;};
|
||||
}
|
||||
}());
|
||||
// Make sure there are list items present
|
||||
if (nonWhiteSpaceChildren.length) {
|
||||
// Calculate initial start from ol attribute
|
||||
var start = Number(elem.attribs.start || '1') - 1;
|
||||
// Calculate the maximum length to i.
|
||||
var maxLength = (nonWhiteSpaceChildren.length + start).toString().length;
|
||||
nonWhiteSpaceChildren.forEach(function(elem, i) {
|
||||
// Use different function depending on type
|
||||
var index = typeFunction(start, i);
|
||||
// Calculate the needed spacing for nice indentation.
|
||||
var spacing = maxLength - index.toString().length;
|
||||
var prefix = ' ' + index + '. ' + ' '.repeat(spacing);
|
||||
result += formatListItem(prefix, elem, fn, options);
|
||||
});
|
||||
}
|
||||
return result + '\n';
|
||||
}
|
||||
|
||||
function tableToString(table) {
|
||||
// Determine space width per column
|
||||
// Convert all rows to lengths
|
||||
var widths = table.map(function(row) {
|
||||
return row.map(function(col) {
|
||||
return col.length;
|
||||
});
|
||||
});
|
||||
// Invert rows with colums
|
||||
widths = helper.arrayZip(widths);
|
||||
// Determine the max values for each column
|
||||
widths = widths.map(function(col) {
|
||||
return max(col);
|
||||
});
|
||||
|
||||
// Build the table
|
||||
var text = '';
|
||||
table.forEach(function(row) {
|
||||
var i = 0;
|
||||
row.forEach(function(col) {
|
||||
text += padEnd(col.trim(), widths[i++], ' ') + ' ';
|
||||
});
|
||||
text += '\n';
|
||||
});
|
||||
return text + '\n';
|
||||
}
|
||||
|
||||
function formatTable(elem, fn, options) {
|
||||
var table = [];
|
||||
elem.children.forEach(tryParseRows);
|
||||
return tableToString(table);
|
||||
|
||||
function tryParseRows(elem) {
|
||||
if (elem.type !== 'tag') {
|
||||
return;
|
||||
}
|
||||
switch (elem.name.toLowerCase()) {
|
||||
case "thead":
|
||||
case "tbody":
|
||||
case "tfoot":
|
||||
case "center":
|
||||
elem.children.forEach(tryParseRows);
|
||||
return;
|
||||
|
||||
case 'tr':
|
||||
var rows = [];
|
||||
elem.children.forEach(function(elem) {
|
||||
var tokens, count;
|
||||
if (elem.type === 'tag') {
|
||||
switch (elem.name.toLowerCase()) {
|
||||
case 'th':
|
||||
tokens = formatHeading(elem, fn, options).split('\n');
|
||||
rows.push(compact(tokens));
|
||||
break;
|
||||
|
||||
case 'td':
|
||||
tokens = fn(elem.children, options).split('\n');
|
||||
rows.push(compact(tokens));
|
||||
// Fill colspans with empty values
|
||||
if (elem.attribs && elem.attribs.colspan) {
|
||||
count = elem.attribs.colspan - 1 || 0;
|
||||
times(count, function() {
|
||||
rows.push(['']);
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
rows = helper.arrayZip(rows);
|
||||
rows.forEach(function(row) {
|
||||
row = row.map(function(col) {
|
||||
return col || '';
|
||||
});
|
||||
table.push(row);
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function formatBlockquote(elem, fn, options) {
|
||||
return '> ' + fn(elem.children, options) + '\n';
|
||||
}
|
||||
|
||||
exports.text = formatText;
|
||||
exports.image = formatImage;
|
||||
exports.lineBreak = formatLineBreak;
|
||||
exports.paragraph = formatParagraph;
|
||||
exports.anchor = formatAnchor;
|
||||
exports.heading = formatHeading;
|
||||
exports.table = formatTable;
|
||||
exports.orderedList = formatOrderedList;
|
||||
exports.unorderedList = formatUnorderedList;
|
||||
exports.listItem = formatListItem;
|
||||
exports.horizontalLine = formatHorizontalLine;
|
||||
exports.blockquote = formatBlockquote;
|
||||
139
nodered/rootfs/data/node_modules/html-to-text/lib/helper.js
generated
vendored
Normal file
139
nodered/rootfs/data/node_modules/html-to-text/lib/helper.js
generated
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
var zip = require('lodash/zip');
|
||||
var trimEnd = require('lodash/trimEnd');
|
||||
|
||||
// Split a long word up to fit within the word wrap limit. Use either a
|
||||
// character to split looking back from the word wrap limit, or
|
||||
// truncate to the word wrap limit.
|
||||
function splitLongWord(word, options) {
|
||||
var wrapCharacters = options.longWordSplit.wrapCharacters || [];
|
||||
var forceWrapOnLimit = options.longWordSplit.forceWrapOnLimit || false;
|
||||
var max = options.wordwrap;
|
||||
|
||||
var fuseWord = [];
|
||||
var idx = 0;
|
||||
while (word.length > max) {
|
||||
var firstLine = word.substr(0, max);
|
||||
var remainingChars = word.substr(max);
|
||||
|
||||
var splitIndex = firstLine.lastIndexOf(wrapCharacters[idx]);
|
||||
|
||||
if (splitIndex > -1) {
|
||||
// We've found a character to split on, store before the split then check if we
|
||||
// need to split again
|
||||
word = firstLine.substr(splitIndex + 1) + remainingChars;
|
||||
fuseWord.push(firstLine.substr(0, splitIndex + 1));
|
||||
} else {
|
||||
idx++;
|
||||
if (idx >= wrapCharacters.length) {
|
||||
// Cannot split on character, so either split at 'max' or preserve length
|
||||
if (forceWrapOnLimit) {
|
||||
fuseWord.push(firstLine);
|
||||
word = remainingChars;
|
||||
if (word.length > max) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
word = firstLine + remainingChars;
|
||||
if (!options.preserveNewlines) {
|
||||
word += '\n';
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
word = firstLine + remainingChars;
|
||||
}
|
||||
}
|
||||
}
|
||||
fuseWord.push(word);
|
||||
|
||||
return fuseWord.join('\n');
|
||||
}
|
||||
|
||||
exports.wordwrap = function wordwrap(text, options) {
|
||||
var max = options.wordwrap;
|
||||
var preserveNewlines = options.preserveNewlines;
|
||||
var length = options.lineCharCount;
|
||||
|
||||
// Preserve leading space
|
||||
var result = text.startsWith(' ') ? ' ' : '';
|
||||
length += result.length;
|
||||
var buffer = [];
|
||||
// Split the text into words, decide to preserve new lines or not.
|
||||
var words = preserveNewlines
|
||||
? text.trim().replace(/\n/g, '\n ').split(/\ +/)
|
||||
: text.trim().split(/\s+/);
|
||||
|
||||
// Determine where to end line word by word.
|
||||
words.forEach(function(word) {
|
||||
// Add buffer to result if we can't fit any more words in the buffer.
|
||||
if ((max || max === 0) && length > 0 && ((length + word.length > max) || (length + word.indexOf('\n') > max))) {
|
||||
// Concat buffer and add it to the result
|
||||
result += buffer.join(' ') + '\n';
|
||||
// Reset buffer and length
|
||||
buffer.length = length = 0;
|
||||
}
|
||||
|
||||
// Check if the current word is long enough to be wrapped
|
||||
if ((max || max === 0) && (options.longWordSplit) && (word.length > max)) {
|
||||
word = splitLongWord(word, options);
|
||||
}
|
||||
|
||||
buffer.push(word);
|
||||
|
||||
// If the word contains a newline then restart the count and add the buffer to the result
|
||||
if (word.indexOf('\n') !== -1) {
|
||||
result += buffer.join(' ');
|
||||
|
||||
// Reset the buffer, let the length include any characters after the last newline
|
||||
buffer.length = 0;
|
||||
length = word.length - (word.lastIndexOf('\n') + 1);
|
||||
// If there are characters after the newline, add a space and increase the length by 1
|
||||
if (length) {
|
||||
result += ' ';
|
||||
length++;
|
||||
}
|
||||
} else {
|
||||
// Add word length + one whitespace
|
||||
length += word.length + 1;
|
||||
}
|
||||
});
|
||||
// Add the rest to the result.
|
||||
result += buffer.join(' ');
|
||||
|
||||
// Preserve trailing space
|
||||
if (!text.endsWith(' ')) {
|
||||
result = trimEnd(result);
|
||||
} else if (!result.endsWith(' ')) {
|
||||
result = result + ' ';
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
exports.arrayZip = function arrayZip(array) {
|
||||
return zip.apply(null, array);
|
||||
};
|
||||
|
||||
exports.splitCssSearchTag = function splitCssSearchTag(tagString) {
|
||||
function getParams(re, string) {
|
||||
var captures = [], found;
|
||||
while ((found = re.exec(string)) !== null) {
|
||||
captures.push(found[1]);
|
||||
}
|
||||
return captures;
|
||||
}
|
||||
|
||||
var splitTag = {};
|
||||
var elementRe = /(^\w*)/g;
|
||||
splitTag.element = elementRe.exec(tagString)[1];
|
||||
splitTag.classes = getParams( /\.([\d\w-]*)/g, tagString);
|
||||
splitTag.ids = getParams( /#([\d\w-]*)/g, tagString);
|
||||
|
||||
return splitTag;
|
||||
};
|
||||
|
||||
exports.replaceAll = function replaceAll(str, find, replace) {
|
||||
var reg = new RegExp(find, 'g');
|
||||
|
||||
return str.replace(reg, replace);
|
||||
};
|
||||
188
nodered/rootfs/data/node_modules/html-to-text/lib/html-to-text.js
generated
vendored
Normal file
188
nodered/rootfs/data/node_modules/html-to-text/lib/html-to-text.js
generated
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
var includes = require('lodash/includes');
|
||||
var trimEnd = require('lodash/trimEnd');
|
||||
var htmlparser = require('htmlparser2');
|
||||
|
||||
var helper = require('./helper');
|
||||
var defaultFormat = require('./formatter');
|
||||
|
||||
// Which type of tags should not be parsed
|
||||
var SKIP_TYPES = [
|
||||
'style',
|
||||
'script'
|
||||
];
|
||||
|
||||
function htmlToText(html, options) {
|
||||
options = Object.assign({
|
||||
wordwrap: 80,
|
||||
tables: [],
|
||||
preserveNewlines: false,
|
||||
uppercaseHeadings: true,
|
||||
singleNewLineParagraphs: false,
|
||||
hideLinkHrefIfSameAsText: false,
|
||||
linkHrefBaseUrl: null,
|
||||
noLinkBrackets: false,
|
||||
noAnchorUrl: true,
|
||||
baseElement: 'body',
|
||||
returnDomByDefault: true,
|
||||
format: {},
|
||||
decodeOptions: {
|
||||
isAttributeValue: false,
|
||||
strict: false
|
||||
},
|
||||
longWordSplit: {
|
||||
wrapCharacters: [],
|
||||
forceWrapOnLimit: false
|
||||
},
|
||||
unorderedListItemPrefix: ' * '
|
||||
}, options || {});
|
||||
|
||||
var handler = new htmlparser.DefaultHandler(function (error, dom) {
|
||||
|
||||
}, {
|
||||
verbose: true
|
||||
});
|
||||
new htmlparser.Parser(handler).parseComplete(html);
|
||||
|
||||
options.lineCharCount = 0;
|
||||
|
||||
var result = '';
|
||||
var baseElements = Array.isArray(options.baseElement) ? options.baseElement : [options.baseElement];
|
||||
for (var idx = 0; idx < baseElements.length; ++idx) {
|
||||
result += walk(filterBody(handler.dom, options, baseElements[idx]), options);
|
||||
}
|
||||
return trimEnd(result);
|
||||
}
|
||||
|
||||
function filterBody(dom, options, baseElement) {
|
||||
var result = null;
|
||||
|
||||
var splitTag = helper.splitCssSearchTag(baseElement);
|
||||
|
||||
function walk(dom) {
|
||||
if (result) return;
|
||||
dom.forEach(function(elem) {
|
||||
if (result) return;
|
||||
if (elem.name === splitTag.element) {
|
||||
var documentClasses = elem.attribs && elem.attribs.class ? elem.attribs.class.split(" ") : [];
|
||||
var documentIds = elem.attribs && elem.attribs.id ? elem.attribs.id.split(" ") : [];
|
||||
|
||||
if ((splitTag.classes.every(function (val) { return documentClasses.indexOf(val) >= 0; })) &&
|
||||
(splitTag.ids.every(function (val) { return documentIds.indexOf(val) >= 0; }))) {
|
||||
result = [elem];
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (elem.children) walk(elem.children);
|
||||
});
|
||||
}
|
||||
walk(dom);
|
||||
return options.returnDomByDefault ? result || dom : result;
|
||||
}
|
||||
|
||||
function containsTable(attr, tables) {
|
||||
if (tables === true) return true;
|
||||
|
||||
function removePrefix(key) {
|
||||
return key.substr(1);
|
||||
}
|
||||
function checkPrefix(prefix) {
|
||||
return function(key) {
|
||||
return key.startsWith(prefix);
|
||||
};
|
||||
}
|
||||
function filterByPrefix(tables, prefix) {
|
||||
return tables
|
||||
.filter(checkPrefix(prefix))
|
||||
.map(removePrefix);
|
||||
}
|
||||
var classes = filterByPrefix(tables, '.');
|
||||
var ids = filterByPrefix(tables, '#');
|
||||
return attr && (includes(classes, attr['class']) || includes(ids, attr['id']));
|
||||
}
|
||||
|
||||
function walk(dom, options, result) {
|
||||
if (arguments.length < 3) {
|
||||
result = '';
|
||||
}
|
||||
var whiteSpaceRegex = /\s$/;
|
||||
var format = Object.assign({}, defaultFormat, options.format);
|
||||
|
||||
if (!dom) {
|
||||
return result;
|
||||
}
|
||||
|
||||
dom.forEach(function(elem) {
|
||||
switch(elem.type) {
|
||||
case 'tag':
|
||||
switch(elem.name.toLowerCase()) {
|
||||
case 'img':
|
||||
result += format.image(elem, options);
|
||||
break;
|
||||
case 'a':
|
||||
// Inline element needs its leading space to be trimmed if `result`
|
||||
// currently ends with whitespace
|
||||
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
|
||||
result += format.anchor(elem, walk, options);
|
||||
break;
|
||||
case 'p':
|
||||
result += format.paragraph(elem, walk, options);
|
||||
break;
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
case 'h4':
|
||||
case 'h5':
|
||||
case 'h6':
|
||||
result += format.heading(elem, walk, options);
|
||||
break;
|
||||
case 'br':
|
||||
result += format.lineBreak(elem, walk, options);
|
||||
break;
|
||||
case 'hr':
|
||||
result += format.horizontalLine(elem, walk, options);
|
||||
break;
|
||||
case 'ul':
|
||||
result += format.unorderedList(elem, walk, options);
|
||||
break;
|
||||
case 'ol':
|
||||
result += format.orderedList(elem, walk, options);
|
||||
break;
|
||||
case 'pre':
|
||||
var newOptions = Object.assign({}, options);
|
||||
newOptions.isInPre = true;
|
||||
result += format.paragraph(elem, walk, newOptions);
|
||||
break;
|
||||
case 'table':
|
||||
result = containsTable(elem.attribs, options.tables)
|
||||
? result + format.table(elem, walk, options)
|
||||
: walk(elem.children || [], options, result);
|
||||
break;
|
||||
case 'blockquote':
|
||||
result += format.blockquote(elem, walk, options);
|
||||
break;
|
||||
default:
|
||||
result = walk(elem.children || [], options, result);
|
||||
}
|
||||
break;
|
||||
case 'text':
|
||||
if (elem.data !== '\r\n') {
|
||||
// Text needs its leading space to be trimmed if `result`
|
||||
// currently ends with whitespace
|
||||
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
|
||||
result += format.text(elem, options);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (!includes(SKIP_TYPES, elem.type)) {
|
||||
result = walk(elem.children || [], options, result);
|
||||
}
|
||||
}
|
||||
|
||||
options.lineCharCount = result.length - (result.lastIndexOf('\n') + 1);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
exports.fromString = function(str, options) {
|
||||
return htmlToText(str, options || {});
|
||||
};
|
||||
Reference in New Issue
Block a user