$html = file_get_contents('path/to/file.html'); // Create a new DOM document $dom = new DOMDocument; // Parse the HTML. Use the @ to avoid any parsing errors if the $html string isn't valid XHTML @$dom->loadHTML($html); // Get all links 'a' we can use also 'img', 'li, 'table', etc to extract other tags. $links = $dom->getElementsByTagName('a'); foreach ($links as $link){ echo $link->nodeValue; // Full node echo $link->getAttribute('href'), '<br />'; // the link itself
}