编辑:做了一些实际的编程。此算法接受字符和HTML标记之间的HTML标记以及单词之间的空白。
const haystack = '<html>This, <b>that</b>, and\nthe<i>other</i>.</html>';
const needle = 'This, that, and the other.';
// Make a regex from the needle...
let regex = '';
// ..split the needle into words...
const words = needle.split(/\s+/);
for (let i = 0; i < words.length; i++) {
const word = words[i];
// ...allow HTML tags after each character except the last one in a word...
for (let i = 0; i < word.length - 1; i++) {
regex += word.charAt(i) + '(<.+?>)*';
}
regex += word.charAt(word.length - 1);
// ...allow a mixture of whitespace and HTML tags after each word except the last one
if (i < words.length - 1) regex += '(\\s|(<.+?>))+';
}
// Find the match, if any
const matches = haystack.match(regex);
console.log(matches);
// Report results
if (matches) {
const match = matches[0];
const offset = matches.index;
console.log('Found match!');
console.log('Offset: ' + offset);
console.log('Length: ' + match.length);
console.log(match);
}