// Create DOM from URL or file$html = file_get_html('http://www.example.com/');
// Find all imagesforeach($html->find('img') as $element)echo $element->src . '<br>';
// Find all linksforeach($html->find('a') as $element)echo $element->href . '<br>';
如何修改超文本标记语言元素:
// Create DOM from string$html = str_get_html('<div id="hello">Hello</div><div id="world">World</div>');
$html->find('div', 1)->class = 'bar';
$html->find('div[id=hello]', 0)->innertext = 'foo';
echo $html;
从超文本标记语言中提取内容:
// Dump contents (without tags) from HTMLecho file_get_html('http://www.google.com/')->plaintext;
刮斜线:
// Create DOM from URL$html = file_get_html('http://slashdot.org/');
// Find all article blocksforeach($html->find('div.article') as $article) {$item['title'] = $article->find('div.title', 0)->plaintext;$item['intro'] = $article->find('div.intro', 0)->plaintext;$item['details'] = $article->find('div.details', 0)->plaintext;$articles[] = $item;}
print_r($articles);
FluentDOM为PHP中的DOMDocument提供了一个类似jQuery的流畅XML接口。选择器是用XPath或CSS(使用CSS到XPath转换器)编写的。当前版本扩展了DOM实现标准接口并添加了DOM Living Standard的功能。FluentDOM可以加载JSON、CSV、JsonML、RabbitFish等格式。可以通过Composer安装。
namespace PowerTools;
// Get file content$htmlcode = file_get_contents('https://github.com');
// Define your DOMCrawler based on file string$H = new DOM_Query($htmlcode);
// Define your DOMCrawler based on an existing DOM_Query instance$H = new DOM_Query($H->select('body'));
// Passing a string (CSS selector)$s = $H->select('div.foo');
// Passing an element object (DOM Element)$s = $H->select($documentBody);
// Passing a DOM Query object$s = $H->select( $H->select('p + p'));
// Select the body tag$body = $H->select('body');
// Combine different classes as one selector to get all site blocks$siteblocks = $body->select('.site-header, .masthead, .site-body, .site-footer');
// Nest your methods just like you would with jQuery$siteblocks->select('button')->add('span')->addClass('icon icon-printer');
// Use a lambda function to set the text of all site blocks$siteblocks->text(function( $i, $val) {return $i . " - " . $val->attr('class');});
// Append the following HTML to all site blocks$siteblocks->append('<div class="site-center"></div>');
// Use a descendant selector to select the site's footer$sitefooter = $body->select('.site-footer > .site-center');
// Set some attributes for the site's footer$sitefooter->attr(array('id' => 'aweeesome', 'data-val' => 'see'));
// Use a lambda function to set the attributes of all site blocks$siteblocks->attr('data-val', function( $i, $val) {return $i . " - " . $val->attr('class') . " - photo by Kelly Clark";});
// Select the parent of the site's footer$sitefooterparent = $sitefooter->parent();
// Remove the class of all i-tags within the site's footer's parent$sitefooterparent->select('i')->removeAttr('class');
// Wrap the site's footer within two nex selectors$sitefooter->wrap('<section><div class="footer-wrapper"></div></section>');
[...]
$dom = new IvoPetkov\HTML5DOMDocument();$dom->loadHTML('<!DOCTYPE html><html><body><h1>Hello</h1><div class="content">This is some text</div></body></html>');echo $dom->querySelector('h1')->innerHTML;
<pre><?phpinclude "ScarletsQuery.php";
// Load the HTML content and parse it$html = file_get_contents('https://www.lipsum.com');$dom = Scarlets\Library\MarkupLanguage::parseText($html);
// Select meta tag on the HTML header$description = $dom->selector('head meta[name="description"]')[0];
// Get 'content' attribute value from meta tagprint_r($description->attr('content'));
$description = $dom->selector('#Content p');
// Get element arrayprint_r($description->view);