637 lines
17 KiB
JavaScript
Raw Normal View History

2020-10-17 18:42:50 +02:00
'use strict';
////////////////////////////////////////////////////////////////////////////////
// Helpers
// Merge objects
//
function assign(obj /*from1, from2, from3, ...*/) {
var sources = Array.prototype.slice.call(arguments, 1);
sources.forEach(function (source) {
if (!source) { return; }
Object.keys(source).forEach(function (key) {
obj[key] = source[key];
});
});
return obj;
}
function _class(obj) { return Object.prototype.toString.call(obj); }
function isString(obj) { return _class(obj) === '[object String]'; }
function isObject(obj) { return _class(obj) === '[object Object]'; }
function isRegExp(obj) { return _class(obj) === '[object RegExp]'; }
function isFunction(obj) { return _class(obj) === '[object Function]'; }
function escapeRE(str) { return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&'); }
////////////////////////////////////////////////////////////////////////////////
var defaultOptions = {
fuzzyLink: true,
fuzzyEmail: true,
fuzzyIP: false
};
function isOptionsObj(obj) {
return Object.keys(obj || {}).reduce(function (acc, k) {
return acc || defaultOptions.hasOwnProperty(k);
}, false);
}
var defaultSchemas = {
'http:': {
validate: function (text, pos, self) {
var tail = text.slice(pos);
if (!self.re.http) {
// compile lazily, because "host"-containing variables can change on tlds update.
self.re.http = new RegExp(
'^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i'
);
}
if (self.re.http.test(tail)) {
return tail.match(self.re.http)[0].length;
}
return 0;
}
},
'https:': 'http:',
'ftp:': 'http:',
'//': {
validate: function (text, pos, self) {
var tail = text.slice(pos);
if (!self.re.no_http) {
// compile lazily, because "host"-containing variables can change on tlds update.
self.re.no_http = new RegExp(
'^' +
self.re.src_auth +
// Don't allow single-level domains, because of false positives like '//test'
// with code comments
'(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' +
self.re.src_port +
self.re.src_host_terminator +
self.re.src_path,
'i'
);
}
if (self.re.no_http.test(tail)) {
// should not be `://` & `///`, that protects from errors in protocol name
if (pos >= 3 && text[pos - 3] === ':') { return 0; }
if (pos >= 3 && text[pos - 3] === '/') { return 0; }
return tail.match(self.re.no_http)[0].length;
}
return 0;
}
},
'mailto:': {
validate: function (text, pos, self) {
var tail = text.slice(pos);
if (!self.re.mailto) {
self.re.mailto = new RegExp(
'^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i'
);
}
if (self.re.mailto.test(tail)) {
return tail.match(self.re.mailto)[0].length;
}
return 0;
}
}
};
/*eslint-disable max-len*/
// RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
var tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]';
// DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
var tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|');
/*eslint-enable max-len*/
////////////////////////////////////////////////////////////////////////////////
function resetScanCache(self) {
self.__index__ = -1;
self.__text_cache__ = '';
}
function createValidator(re) {
return function (text, pos) {
var tail = text.slice(pos);
if (re.test(tail)) {
return tail.match(re)[0].length;
}
return 0;
};
}
function createNormalizer() {
return function (match, self) {
self.normalize(match);
};
}
// Schemas compiler. Build regexps.
//
function compile(self) {
// Load & clone RE patterns.
var re = self.re = require('./lib/re')(self.__opts__);
// Define dynamic patterns
var tlds = self.__tlds__.slice();
self.onCompile();
if (!self.__tlds_replaced__) {
tlds.push(tlds_2ch_src_re);
}
tlds.push(re.src_xn);
re.src_tlds = tlds.join('|');
function untpl(tpl) { return tpl.replace('%TLDS%', re.src_tlds); }
re.email_fuzzy = RegExp(untpl(re.tpl_email_fuzzy), 'i');
re.link_fuzzy = RegExp(untpl(re.tpl_link_fuzzy), 'i');
re.link_no_ip_fuzzy = RegExp(untpl(re.tpl_link_no_ip_fuzzy), 'i');
re.host_fuzzy_test = RegExp(untpl(re.tpl_host_fuzzy_test), 'i');
//
// Compile each schema
//
var aliases = [];
self.__compiled__ = {}; // Reset compiled data
function schemaError(name, val) {
throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val);
}
Object.keys(self.__schemas__).forEach(function (name) {
var val = self.__schemas__[name];
// skip disabled methods
if (val === null) { return; }
var compiled = { validate: null, link: null };
self.__compiled__[name] = compiled;
if (isObject(val)) {
if (isRegExp(val.validate)) {
compiled.validate = createValidator(val.validate);
} else if (isFunction(val.validate)) {
compiled.validate = val.validate;
} else {
schemaError(name, val);
}
if (isFunction(val.normalize)) {
compiled.normalize = val.normalize;
} else if (!val.normalize) {
compiled.normalize = createNormalizer();
} else {
schemaError(name, val);
}
return;
}
if (isString(val)) {
aliases.push(name);
return;
}
schemaError(name, val);
});
//
// Compile postponed aliases
//
aliases.forEach(function (alias) {
if (!self.__compiled__[self.__schemas__[alias]]) {
// Silently fail on missed schemas to avoid errons on disable.
// schemaError(alias, self.__schemas__[alias]);
return;
}
self.__compiled__[alias].validate =
self.__compiled__[self.__schemas__[alias]].validate;
self.__compiled__[alias].normalize =
self.__compiled__[self.__schemas__[alias]].normalize;
});
//
// Fake record for guessed links
//
self.__compiled__[''] = { validate: null, normalize: createNormalizer() };
//
// Build schema condition
//
var slist = Object.keys(self.__compiled__)
.filter(function (name) {
// Filter disabled & fake schemas
return name.length > 0 && self.__compiled__[name];
})
.map(escapeRE)
.join('|');
// (?!_) cause 1.5x slowdown
self.re.schema_test = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'i');
self.re.schema_search = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'ig');
self.re.pretest = RegExp(
'(' + self.re.schema_test.source + ')|(' + self.re.host_fuzzy_test.source + ')|@',
'i'
);
//
// Cleanup
//
resetScanCache(self);
}
/**
* class Match
*
* Match result. Single element of array, returned by [[LinkifyIt#match]]
**/
function Match(self, shift) {
var start = self.__index__,
end = self.__last_index__,
text = self.__text_cache__.slice(start, end);
/**
* Match#schema -> String
*
* Prefix (protocol) for matched string.
**/
this.schema = self.__schema__.toLowerCase();
/**
* Match#index -> Number
*
* First position of matched string.
**/
this.index = start + shift;
/**
* Match#lastIndex -> Number
*
* Next position after matched string.
**/
this.lastIndex = end + shift;
/**
* Match#raw -> String
*
* Matched string.
**/
this.raw = text;
/**
* Match#text -> String
*
* Notmalized text of matched string.
**/
this.text = text;
/**
* Match#url -> String
*
* Normalized url of matched string.
**/
this.url = text;
}
function createMatch(self, shift) {
var match = new Match(self, shift);
self.__compiled__[match.schema].normalize(match, self);
return match;
}
/**
* class LinkifyIt
**/
/**
* new LinkifyIt(schemas, options)
* - schemas (Object): Optional. Additional schemas to validate (prefix/validator)
* - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
*
* Creates new linkifier instance with optional additional schemas.
* Can be called without `new` keyword for convenience.
*
* By default understands:
*
* - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
* - "fuzzy" links and emails (example.com, foo@bar.com).
*
* `schemas` is an object, where each key/value describes protocol/rule:
*
* - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
* for example). `linkify-it` makes shure that prefix is not preceeded with
* alphanumeric char and symbols. Only whitespaces and punctuation allowed.
* - __value__ - rule to check tail after link prefix
* - _String_ - just alias to existing rule
* - _Object_
* - _validate_ - validator function (should return matched length on success),
* or `RegExp`.
* - _normalize_ - optional function to normalize text & url of matched result
* (for example, for @twitter mentions).
*
* `options`:
*
* - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.
* - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
* like version numbers. Default `false`.
* - __fuzzyEmail__ - recognize emails without `mailto:` prefix.
*
**/
function LinkifyIt(schemas, options) {
if (!(this instanceof LinkifyIt)) {
return new LinkifyIt(schemas, options);
}
if (!options) {
if (isOptionsObj(schemas)) {
options = schemas;
schemas = {};
}
}
this.__opts__ = assign({}, defaultOptions, options);
// Cache last tested result. Used to skip repeating steps on next `match` call.
this.__index__ = -1;
this.__last_index__ = -1; // Next scan position
this.__schema__ = '';
this.__text_cache__ = '';
this.__schemas__ = assign({}, defaultSchemas, schemas);
this.__compiled__ = {};
this.__tlds__ = tlds_default;
this.__tlds_replaced__ = false;
this.re = {};
compile(this);
}
/** chainable
* LinkifyIt#add(schema, definition)
* - schema (String): rule name (fixed pattern prefix)
* - definition (String|RegExp|Object): schema definition
*
* Add new rule definition. See constructor description for details.
**/
LinkifyIt.prototype.add = function add(schema, definition) {
this.__schemas__[schema] = definition;
compile(this);
return this;
};
/** chainable
* LinkifyIt#set(options)
* - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
*
* Set recognition options for links without schema.
**/
LinkifyIt.prototype.set = function set(options) {
this.__opts__ = assign(this.__opts__, options);
return this;
};
/**
* LinkifyIt#test(text) -> Boolean
*
* Searches linkifiable pattern and returns `true` on success or `false` on fail.
**/
LinkifyIt.prototype.test = function test(text) {
// Reset scan cache
this.__text_cache__ = text;
this.__index__ = -1;
if (!text.length) { return false; }
var m, ml, me, len, shift, next, re, tld_pos, at_pos;
// try to scan for link with schema - that's the most simple rule
if (this.re.schema_test.test(text)) {
re = this.re.schema_search;
re.lastIndex = 0;
while ((m = re.exec(text)) !== null) {
len = this.testSchemaAt(text, m[2], re.lastIndex);
if (len) {
this.__schema__ = m[2];
this.__index__ = m.index + m[1].length;
this.__last_index__ = m.index + m[0].length + len;
break;
}
}
}
if (this.__opts__.fuzzyLink && this.__compiled__['http:']) {
// guess schemaless links
tld_pos = text.search(this.re.host_fuzzy_test);
if (tld_pos >= 0) {
// if tld is located after found link - no need to check fuzzy pattern
if (this.__index__ < 0 || tld_pos < this.__index__) {
if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) {
shift = ml.index + ml[1].length;
if (this.__index__ < 0 || shift < this.__index__) {
this.__schema__ = '';
this.__index__ = shift;
this.__last_index__ = ml.index + ml[0].length;
}
}
}
}
}
if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) {
// guess schemaless emails
at_pos = text.indexOf('@');
if (at_pos >= 0) {
// We can't skip this check, because this cases are possible:
// 192.168.1.1@gmail.com, my.in@example.com
if ((me = text.match(this.re.email_fuzzy)) !== null) {
shift = me.index + me[1].length;
next = me.index + me[0].length;
if (this.__index__ < 0 || shift < this.__index__ ||
(shift === this.__index__ && next > this.__last_index__)) {
this.__schema__ = 'mailto:';
this.__index__ = shift;
this.__last_index__ = next;
}
}
}
}
return this.__index__ >= 0;
};
/**
* LinkifyIt#pretest(text) -> Boolean
*
* Very quick check, that can give false positives. Returns true if link MAY BE
* can exists. Can be used for speed optimization, when you need to check that
* link NOT exists.
**/
LinkifyIt.prototype.pretest = function pretest(text) {
return this.re.pretest.test(text);
};
/**
* LinkifyIt#testSchemaAt(text, name, position) -> Number
* - text (String): text to scan
* - name (String): rule (schema) name
* - position (Number): text offset to check from
*
* Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly
* at given position. Returns length of found pattern (0 on fail).
**/
LinkifyIt.prototype.testSchemaAt = function testSchemaAt(text, schema, pos) {
// If not supported schema check requested - terminate
if (!this.__compiled__[schema.toLowerCase()]) {
return 0;
}
return this.__compiled__[schema.toLowerCase()].validate(text, pos, this);
};
/**
* LinkifyIt#match(text) -> Array|null
*
* Returns array of found link descriptions or `null` on fail. We strongly
* recommend to use [[LinkifyIt#test]] first, for best speed.
*
* ##### Result match description
*
* - __schema__ - link schema, can be empty for fuzzy links, or `//` for
* protocol-neutral links.
* - __index__ - offset of matched text
* - __lastIndex__ - index of next char after mathch end
* - __raw__ - matched text
* - __text__ - normalized text
* - __url__ - link, generated from matched text
**/
LinkifyIt.prototype.match = function match(text) {
var shift = 0, result = [];
// Try to take previous element from cache, if .test() called before
if (this.__index__ >= 0 && this.__text_cache__ === text) {
result.push(createMatch(this, shift));
shift = this.__last_index__;
}
// Cut head if cache was used
var tail = shift ? text.slice(shift) : text;
// Scan string until end reached
while (this.test(tail)) {
result.push(createMatch(this, shift));
tail = tail.slice(this.__last_index__);
shift += this.__last_index__;
}
if (result.length) {
return result;
}
return null;
};
/** chainable
* LinkifyIt#tlds(list [, keepOld]) -> this
* - list (Array): list of tlds
* - keepOld (Boolean): merge with current list if `true` (`false` by default)
*
* Load (or merge) new tlds list. Those are user for fuzzy links (without prefix)
* to avoid false positives. By default this algorythm used:
*
* - hostname with any 2-letter root zones are ok.
* - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
* are ok.
* - encoded (`xn--...`) root zones are ok.
*
* If list is replaced, then exact match for 2-chars root zones will be checked.
**/
LinkifyIt.prototype.tlds = function tlds(list, keepOld) {
list = Array.isArray(list) ? list : [ list ];
if (!keepOld) {
this.__tlds__ = list.slice();
this.__tlds_replaced__ = true;
compile(this);
return this;
}
this.__tlds__ = this.__tlds__.concat(list)
.sort()
.filter(function (el, idx, arr) {
return el !== arr[idx - 1];
})
.reverse();
compile(this);
return this;
};
/**
* LinkifyIt#normalize(match)
*
* Default normalizer (if schema does not define it's own).
**/
LinkifyIt.prototype.normalize = function normalize(match) {
// Do minimal possible changes by default. Need to collect feedback prior
// to move forward https://github.com/markdown-it/linkify-it/issues/1
if (!match.schema) { match.url = 'http://' + match.url; }
if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) {
match.url = 'mailto:' + match.url;
}
};
/**
* LinkifyIt#onCompile()
*
* Override to modify basic RegExp-s.
**/
LinkifyIt.prototype.onCompile = function onCompile() {
};
module.exports = LinkifyIt;