<?php error_reporting(-1);
// http://stackoverflow.com/questions/4592148/parsing-html-with-xpath-and-php
$xml = <<< XML
<?xml version="1.0" encoding="UTF-8"?>
<body>
<p>Not in a table and before first h1</p>
<table>
<p>In a table</p>
</table>
<p>This one is good too</p>
<h1>First Heading</h1>
<p>Not in a table but to late in document</p>
<table>
<p>In a table</p>
</table>
<h1>Second Heading</h1>
<p>Not in a table but to late in document</p>
<table>
<p>In a table</p>
</table>
</body>
XML;
$xml = <<< XML
<?xml version="1.0" encoding="UTF-8"?>
<body>
<p>Not in a table and before first h1</p>
<table>
<p>In a table</p>
</table>
<p>This one is good too</p>
<h1>First Heading</h1>
<p>Not in a table but to late in document</p>
<table>
<p>In a table</p>
</table>
<h1>Second Heading</h1>
<p>Not in a table but to late in document</p>
<table>
<p>In a table</p>
</table>
</body>
XML;
// src document
$dom = new DOMDocument;
$dom->loadXML($xml);
// dest document
$new = new DOMDocument;
$new->formatOutput = TRUE;
// xpath setup
$xp = new DOMXPath($dom);
$expr = '//p[not(preceding::h1[1]) and not(ancestor::table)]';
// importing nodes into dest document
foreach ($xp->query($expr) as $node) {
$new->appendChild($new->importNode($node, TRUE));
}
// output dest document
echo $new->saveXML();