DOMDocument
的
加载HTML()
方法可以很好地处理无效的HTML片段,因此您可以这样使用DOMXPath:
<?php
$html = 'text1
<br>
text2
<br/>
text3
<br/>
text4
<br>
text5';
echo "<pre>" . htmlentities($html) . "</pre><br>\n";
$dom = new DOMDocument();
// loadHtml() needs mb_convert_encoding() to work well with UTF-8 encoding
$dom->loadHtml(mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"));
$xpath = new DOMXPath($dom);
echo "Text nodes preceding br:";
foreach($xpath->query('//text()[(following::br)]') as $node)
{
var_dump($node->wholeText);
}
echo "Text nodes following br:";
foreach($xpath->query('//text()[(preceding::br)]') as $node)
{
var_dump($node->wholeText);
}
echo "Text nodes following OR preceding br:";
foreach($xpath->query('//text()[(following::br) or (preceding::br)]') as $node)
{
var_dump($node->wholeText);
}