等效于 textNodes 的 getElementsByTagName()

有没有什么方法可以得到一个文档中所有 textNode对象的集合?

getElementsByTagName()很适合元素,但是 textNode不是元素。

更新: 我意识到这可以通过遍历 DOM 来实现——正如下面许多人建议的那样。我知道如何编写一个 DOM-walker 函数来查看文档中的每个节点。我希望有一些浏览器本地化的方式来做到这一点。毕竟有点奇怪,我可以得到所有的 <input>s 与一个单一的内置调用,但不是所有的 textNode

19103 次浏览

Update:

I have outlined some basic performance tests for each of these 6 methods over 1000 runs. getElementsByTagName is the fastest but it does a half-assed job, as it does not select all elements, but only one particular type of tag ( i think p) and blindly assumes that its firstChild is a text element. It might be little flawed but its there for demonstration purpose and comparing its performance to TreeWalker. Run the tests yourselves on jsfiddle to see the results.

  1. Using a TreeWalker
  2. Custom Iterative Traversal
  3. Custom Recursive Traversal
  4. Xpath query
  5. querySelectorAll
  6. getElementsByTagName

Let's assume for a moment that there is a method that allows you to get all Text nodes natively. You would still have to traverse each resulting text node and call node.nodeValue to get the actual text as you would do with any DOM Node. So the issue of performance is not with iterating through text nodes, but iterating through all nodes that are not text and checking their type. I would argue (based on the results) that TreeWalker performs just as fast as getElementsByTagName, if not faster (even with getElementsByTagName playing handicapped).

Ran each test 1000 times.


Method                  Total ms        Average ms
--------------------------------------------------
document.TreeWalker          301            0.301
Iterative Traverser          769            0.769
Recursive Traverser         7352            7.352
XPath query                 1849            1.849
querySelectorAll            1725            1.725
getElementsByTagName         212            0.212

Source for each method:

TreeWalker

function nativeTreeWalker() {
var walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
null,
false
);


var node;
var textNodes = [];


while(node = walker.nextNode()) {
textNodes.push(node.nodeValue);
}
}

Recursive Tree Traversal

function customRecursiveTreeWalker() {
var result = [];


(function findTextNodes(current) {
for(var i = 0; i < current.childNodes.length; i++) {
var child = current.childNodes[i];
if(child.nodeType == 3) {
result.push(child.nodeValue);
}
else {
findTextNodes(child);
}
}
})(document.body);
}

Iterative Tree Traversal

function customIterativeTreeWalker() {
var result = [];
var root = document.body;


var node = root.childNodes[0];
while(node != null) {
if(node.nodeType == 3) { /* Fixed a bug here. Thanks @theazureshadow */
result.push(node.nodeValue);
}


if(node.hasChildNodes()) {
node = node.firstChild;
}
else {
while(node.nextSibling == null && node != root) {
node = node.parentNode;
}
node = node.nextSibling;
}
}
}

querySelectorAll

function nativeSelector() {
var elements = document.querySelectorAll("body, body *"); /* Fixed a bug here. Thanks @theazureshadow */
var results = [];
var child;
for(var i = 0; i < elements.length; i++) {
child = elements[i].childNodes[0];
if(elements[i].hasChildNodes() && child.nodeType == 3) {
results.push(child.nodeValue);
}
}
}

getElementsByTagName (handicap)

function getElementsByTagName() {
var elements = document.getElementsByTagName("p");
var results = [];
for(var i = 0; i < elements.length; i++) {
results.push(elements[i].childNodes[0].nodeValue);
}
}

XPath

function xpathSelector() {
var xpathResult = document.evaluate(
"//*/text()",
document,
null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE,
null
);


var results = [], res;
while(res = xpathResult.iterateNext()) {
results.push(res.nodeValue);  /* Fixed a bug here. Thanks @theazureshadow */
}
}

Also, you might find this discussion helpful - http://bytes.com/topic/javascript/answers/153239-how-do-i-get-elements-text-node

 document.deepText= function(hoo, fun){
var A= [], tem;
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
if(typeof fun== 'function'){
tem= fun(hoo);
if(tem!= undefined) A[A.length]= tem;
}
else A[A.length]= hoo;
}
else A= A.concat(document.deepText(hoo, fun));
hoo= hoo.nextSibling;
}
}
return A;
}

/* You can return an array of all the descendant text nodes of some parent element, or you can pass it some function and do something (find or replace or whatever) to the text in place.

This example returns the text of the non-whitespace textnodes in the body:

var A= document.deepText(document.body, function(t){
var tem= t.data;
return /\S/.test(tem)? tem: undefined;
});
alert(A.join('\n'))

*/

Handy for search and replace, highlighting and so on

I know you specifically asked for a collection, but if you just meant that informally and didn't care if they were all joined together into one big string, you can use:

var allTextAsString = document.documentElement.textContent || document.documentElement.innerText;

...with the first item being the DOM3 standard approach. Note however that innerText appears to exclude script or style tag contents in implementations that support it (at least IE and Chrome) while textContent includes them (in Firefox and Chrome).

var el1 = document.childNodes[0]
function get(node,ob)
{
ob = ob || {};


if(node.childElementCount)
{


ob[node.nodeName] = {}
ob[node.nodeName]["text"] = [];
for(var x = 0; x < node.childNodes.length;x++)
{
if(node.childNodes[x].nodeType == 3)
{
var txt = node.childNodes[x].nodeValue;




ob[node.nodeName]["text"].push(txt)
continue
}
get(node.childNodes[x],ob[node.nodeName])
};
}
else
{
ob[node.nodeName]   = (node.childNodes[0] == undefined ? null :node.childNodes[0].nodeValue )
}
return ob
}






var o = get(el1)
console.log(o)

Here's a modern Iterator version of the fastest TreeWalker method:

function getTextNodesIterator(el) { // Returns an iterable TreeWalker
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
walker[Symbol.iterator] = () => ({
next() {
const value = walker.nextNode();
return {value, done: !value};
}
});
return walker;
}

Usage:

for (const textNode of getTextNodesIterator(document.body)) {
console.log(textNode)
}

Safer version

Using the iterator directly might get stuck if you move the nodes around while looping. This is safer, it returns an array:

function getTextNodes(el) { // Returns an array of Text nodes
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
const nodes = [];
while (walker.nextNode()) {
nodes.push(walker.currentNode);
}
return nodes;
}

Here's an alternative that's a bit more idiomatic and (hopefully) easier to understand.

function getText(node) {
// recurse into each child node
if (node.hasChildNodes()) {
node.childNodes.forEach(getText);
}
// get content of each non-empty text node
else if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent.trim();
if (text) {
console.log(text); // do something
}
}
}

after createTreeWalker is deprecated you can use

  /**
* Get all text nodes under an element
* @param {!Element} el
* @return {Array<!Node>}
*/
function getTextNodes(el) {
const iterator = document.createNodeIterator(el, NodeFilter.SHOW_TEXT);
const textNodes = [];
let currentTextNode;
while ((currentTextNode = iterator.nextNode())) {
textNodes.push(currentTextNode);
}
return textNodes;
}