如何在Java中使用XPath读取XML

小开

最佳答案

你需要这样的东西:

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(<uri_as_string>);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(<xpath_expression>);

然后调用expr.evaluate()，传入在该代码中定义的文档和您期望的返回类型，并将结果转换为结果的对象类型。

如果您需要关于特定XPath表达式的帮助，您可能应该将其作为单独的问题提出(除非这是您首先要问的问题—我理解您的问题是如何在Java中使用API)。

编辑:(回复评论):这个XPath表达式将为您提供PowerBuilder下第一个URL元素的文本:

/howto/topic[@name='PowerBuilder']/url/text()

这将得到第二个:

/howto/topic[@name='PowerBuilder']/url[2]/text()

你可以通过下面的代码得到:

expr.evaluate(doc, XPathConstants.STRING);

如果你不知道给定节点中有多少url，那么你应该这样做:

XPathExpression expr = xpath.compile("/howto/topic[@name='PowerBuilder']/url");
NodeList nl = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);

然后循环遍历NodeList。

小开

入门示例:

xml文件:

<inventory>
<book year="2000">
<title>Snow Crash</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553380958</isbn>
<price>14.95</price>
</book>


<book year="2005">
<title>Burning Tower</title>
<author>Larry Niven</author>
<author>Jerry Pournelle</author>
<publisher>Pocket</publisher>
<isbn>0743416910</isbn>
<price>5.99</price>
</book>


<book year="1995">
<title>Zodiac</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553573862</isbn>
<price>7.50</price>
</book>


<!-- more books... -->


</inventory>

Java代码:

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;


import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;




try {


DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File("c:\\tmp\\my.xml"));


// normalize text representation
doc.getDocumentElement().normalize();
System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName());


NodeList listOfBooks = doc.getElementsByTagName("book");
int totalBooks = listOfBooks.getLength();
System.out.println("Total no of books : " + totalBooks);


for(int i=0; i<listOfBooks.getLength() ; i++) {


Node firstBookNode = listOfBooks.item(i);
if(firstBookNode.getNodeType() == Node.ELEMENT_NODE) {


Element firstElement = (Element)firstBookNode;
System.out.println("Year :"+firstElement.getAttribute("year"));


//-------
NodeList firstNameList = firstElement.getElementsByTagName("title");
Element firstNameElement = (Element)firstNameList.item(0);


NodeList textFNList = firstNameElement.getChildNodes();
System.out.println("title : " + ((Node)textFNList.item(0)).getNodeValue().trim());
}
}//end of for loop with s var
} catch (SAXParseException err) {
System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ());
System.out.println(" " + err.getMessage ());
} catch (SAXException e) {
Exception x = e.getException ();
((x == null) ? e : x).printStackTrace ();
} catch (Throwable t) {
t.printStackTrace ();
}

小开

你可以试试这个。

XML文档

保存为employees.xml。

<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employee id="4">
<age>25</age>
<name>Meghan</name>
<gender>Female</gender>
<role>Manager</role>
</Employee>
</Employees>

解析器类

该类有以下方法

列表项
一个方法，它将返回作为输入ID的雇员名。
一个方法，该方法将返回年龄大于输入年龄的雇员名称列表。
返回女性员工姓名列表的方法。

源代码

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;


import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;


import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;




public class Parser {


public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
Document doc = null;
try {
builder = factory.newDocumentBuilder();
doc = builder.parse("employees.xml");


// Create XPathFactory object
XPathFactory xpathFactory = XPathFactory.newInstance();


// Create XPath object
XPath xpath = xpathFactory.newXPath();


String name = getEmployeeNameById(doc, xpath, 4);
System.out.println("Employee Name with ID 4: " + name);


List<String> names = getEmployeeNameWithAge(doc, xpath, 30);
System.out.println("Employees with 'age>30' are:" + Arrays.toString(names.toArray()));


List<String> femaleEmps = getFemaleEmployeesName(doc, xpath);
System.out.println("Female Employees names are:" +
Arrays.toString(femaleEmps.toArray()));


} catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();
}


}




private static List<String> getFemaleEmployeesName(Document doc, XPath xpath) {
List<String> list = new ArrayList<>();
try {
//create XPathExpression object
XPathExpression expr =
xpath.compile("/Employees/Employee[gender='Female']/name/text()");
//evaluate expression result on XML document
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}




private static List<String> getEmployeeNameWithAge(Document doc, XPath xpath, int age) {
List<String> list = new ArrayList<>();
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[age>" + age + "]/name/text()");
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}




private static String getEmployeeNameById(Document doc, XPath xpath, int id) {
String name = null;
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[@id='" + id + "']/name/text()");
name = (String) expr.evaluate(doc, XPathConstants.STRING);
} catch (XPathExpressionException e) {
e.printStackTrace();
}


return name;
}


}

小开

下面是一个用vtd-xml处理xpath的例子…对于繁重的XML处理，它是首屈一指的。这是最近关于这个主题的一篇论文

import com.ximpleware.*;


public class changeAttrVal {
public  static  void main(String s[]) throws VTDException,java.io.UnsupportedEncodingException,java.io.IOException{
VTDGen vg = new VTDGen();
if (!vg.parseFile("input.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
XMLModifier xm = new XMLModifier(vn);
ap.selectXPath("/*/place[@id=\"p14\" and   @initialMarking=\"2\"]/@initialMarking");
int i=0;
while((i=ap.evalXPath())!=-1){
xm.updateToken(i+1, "499");// change initial marking from 2 to 499
}
xm.output("new.xml");
}


}

小开

扩展@bluish和@Yishai的精彩回答，下面是如何使nodelist和节点属性支持迭代器，即for(Node n: nodelist)接口。

像这样使用它:

NodeList nl = ...
for(Node n : XmlUtil.asList(nl))
{...}

而且

Node n = ...
for(Node attr : XmlUtil.asList(n.getAttributes())
{...}

代码:

/**
* Converts NodeList to an iterable construct.
* From: https://stackoverflow.com/a/19591302/779521
*/
public final class XmlUtil {
private XmlUtil() {}


public static List<Node> asList(NodeList n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeListWrapper(n);
}


static final class NodeListWrapper extends AbstractList<Node> implements RandomAccess {
private final NodeList list;


NodeListWrapper(NodeList l) {
this.list = l;
}


public Node get(int index) {
return this.list.item(index);
}


public int size() {
return this.list.getLength();
}
}


public static List<Node> asList(NamedNodeMap n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeMapWrapper(n);
}


static final class NodeMapWrapper extends AbstractList<Node> implements RandomAccess {
private final NamedNodeMap list;


NodeMapWrapper(NamedNodeMap l) {
this.list = l;
}


public Node get(int index) {
return this.list.item(index);
}


public int size() {
return this.list.getLength();
}
}
}

小开

使用XPathFactory， SAXParserFactory和StAX (JSR-173)读取XML文件。

使用XPath获取节点及其子数据。

public static void main(String[] args) {
String xml = "<soapenv:Body xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/'>"
+ "<Yash:Data xmlns:Yash='http://Yash.stackoverflow.com/Services/Yash'>"
+ "<Yash:Tags>Java</Yash:Tags><Yash:Tags>Javascript</Yash:Tags><Yash:Tags>Selenium</Yash:Tags>"
+ "<Yash:Top>javascript</Yash:Top><Yash:User>Yash-777</Yash:User>"
+ "</Yash:Data></soapenv:Body>";
String jsonNameSpaces = "{'soapenv':'http://schemas.xmlsoap.org/soap/envelope/',"
+ "'Yash':'http://Yash.stackoverflow.com/Services/Yash'}";
String xpathExpression = "//Yash:Data";


Document doc1 = getDocument(false, "fileName", xml);
getNodesFromXpath(doc1, xpathExpression, jsonNameSpaces);
System.out.println("\n===== ***** =====");
Document doc2 = getDocument(true, "./books.xml", xml);
getNodesFromXpath(doc2, "//person", "{}");
}
static Document getDocument( boolean isFileName, String fileName, String xml ) {
Document doc = null;
try {


DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(true);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);


DocumentBuilder builder = factory.newDocumentBuilder();
if( isFileName ) {
File file = new File( fileName );
FileInputStream stream = new FileInputStream( file );
doc = builder.parse( stream );
} else {
doc = builder.parse( string2Source( xml ) );
}
} catch (SAXException | IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return doc;
}


/**
* ELEMENT_NODE[1],ATTRIBUTE_NODE[2],TEXT_NODE[3],CDATA_SECTION_NODE[4],
* ENTITY_REFERENCE_NODE[5],ENTITY_NODE[6],PROCESSING_INSTRUCTION_NODE[7],
* COMMENT_NODE[8],DOCUMENT_NODE[9],DOCUMENT_TYPE_NODE[10],DOCUMENT_FRAGMENT_NODE[11],NOTATION_NODE[12]
*/
public static void getNodesFromXpath( Document doc, String xpathExpression, String jsonNameSpaces ) {
try {
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();


JSONObject namespaces = getJSONObjectNameSpaces(jsonNameSpaces);
if ( namespaces.size() > 0 ) {
NamespaceContextImpl nsContext = new NamespaceContextImpl();


Iterator<?> key = namespaces.keySet().iterator();
while (key.hasNext()) { // Apache WebServices Common Utilities
String pPrefix = key.next().toString();
String pURI = namespaces.get(pPrefix).toString();
nsContext.startPrefixMapping(pPrefix, pURI);
}
xpath.setNamespaceContext(nsContext );
}


XPathExpression compile = xpath.compile(xpathExpression);
NodeList nodeList = (NodeList) compile.evaluate(doc, XPathConstants.NODESET);
displayNodeList(nodeList);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}


static void displayNodeList( NodeList nodeList ) {
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
String NodeName = node.getNodeName();


NodeList childNodes = node.getChildNodes();
if ( childNodes.getLength() > 1 ) {
for (int j = 0; j < childNodes.getLength(); j++) {


Node child = childNodes.item(j);
short nodeType = child.getNodeType();
if ( nodeType == 1 ) {
System.out.format( "\n\t Node Name:[%s], Text[%s] ", child.getNodeName(), child.getTextContent() );
}
}
} else {
System.out.format( "\n Node Name:[%s], Text[%s] ", NodeName, node.getTextContent() );
}


}
}
static InputSource string2Source( String str ) {
InputSource inputSource = new InputSource( new StringReader( str ) );
return inputSource;
}
static JSONObject getJSONObjectNameSpaces( String jsonNameSpaces ) {
if(jsonNameSpaces.indexOf("'") > -1)    jsonNameSpaces = jsonNameSpaces.replace("'", "\"");
JSONParser parser = new JSONParser();
JSONObject namespaces = null;
try {
namespaces = (JSONObject) parser.parse(jsonNameSpaces);
} catch (ParseException e) {
e.printStackTrace();
}
return namespaces;
}

XML文档

<?xml version="1.0" encoding="UTF-8"?>
<book>
<person>
<first>Yash</first>
<last>M</last>
<age>22</age>
</person>
<person>
<first>Bill</first>
<last>Gates</last>
<age>46</age>
</person>
<person>
<first>Steve</first>
<last>Jobs</last>
<age>40</age>
</person>
</book>

对于给定的xpatheexpression输出:

String xpathExpression = "//person/first";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[first], Text[Bill]
Node Name:[first], Text[Steve] */


String xpathExpression = "//person";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[last], Text[M]
Node Name:[age], Text[22]
Node Name:[first], Text[Bill]
Node Name:[last], Text[Gates]
Node Name:[age], Text[46]
Node Name:[first], Text[Steve]
Node Name:[last], Text[Jobs]
Node Name:[age], Text[40] */


String xpathExpression = "//Yash:Data";
/*OutPut:
Node Name:[Yash:Tags], Text[Java]
Node Name:[Yash:Tags], Text[Javascript]
Node Name:[Yash:Tags], Text[Selenium]
Node Name:[Yash:Top], Text[javascript]
Node Name:[Yash:User], Text[Yash-777] */

请看这个链接用于我们自己的NamespaceContext的实现

小开

这将向您展示如何

读入一个XML文件到DOM
用XPath过滤出一组Nodes
对每个提取的Nodes执行特定操作。

我们将用下面的语句调用代码

processFilteredXml(xmlIn, xpathExpr,(node) -> {/*Do something...*/;});

在本例中，我们希望从book.xml中打印一些creatorNames，使用"//book/creators/creator/creatorName"作为xpath，在每个匹配XPath的节点上执行printNode操作。

完整代码

@Test
public void printXml() {
try (InputStream in = readFile("book.xml")) {
processFilteredXml(in, "//book/creators/creator/creatorName", (node) -> {
printNode(node, System.out);
});
} catch (Exception e) {
throw new RuntimeException(e);
}
}


private InputStream readFile(String yourSampleFile) {
return Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile);
}


private void processFilteredXml(InputStream in, String xpath, Consumer<Node> process) {
Document doc = readXml(in);
NodeList list = filterNodesByXPath(doc, xpath);
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
process.accept(node);
}
}


public Document readXml(InputStream xmlin) {
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
return db.parse(xmlin);
} catch (Exception e) {
throw new RuntimeException(e);
}
}


private NodeList filterNodesByXPath(Document doc, String xpathExpr) {
try {
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xpath = xPathFactory.newXPath();
XPathExpression expr = xpath.compile(xpathExpr);
Object eval = expr.evaluate(doc, XPathConstants.NODESET);
return (NodeList) eval;
} catch (Exception e) {
throw new RuntimeException(e);
}
}


private void printNode(Node node, PrintStream out) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(node);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
out.println(xmlString);
} catch (Exception e) {
throw new RuntimeException(e);
}
}

打印

<creatorName>Fosmire, Michael</creatorName>


<creatorName>Wertz, Ruth</creatorName>


<creatorName>Purzer, Senay</creatorName>

对于book.xml

<book>
<creators>
<creator>
<creatorName>Fosmire, Michael</creatorName>
<givenName>Michael</givenName>
<familyName>Fosmire</familyName>
</creator>
<creator>
<creatorName>Wertz, Ruth</creatorName>
<givenName>Ruth</givenName>
<familyName>Wertz</familyName>
</creator>
<creator>
<creatorName>Purzer, Senay</creatorName>
<givenName>Senay</givenName>
<familyName>Purzer</familyName>
</creator>
</creators>
<titles>
<title>Critical Engineering Literacy Test (CELT)</title>
</titles>
</book>

小开

如果你有一个像下面这样的xml

<e:Envelope
xmlns:d = "http://www.w3.org/2001/XMLSchema"
xmlns:e = "http://schemas.xmlsoap.org/soap/envelope/"
xmlns:wn0 = "http://systinet.com/xsd/SchemaTypes/"
xmlns:i = "http://www.w3.org/2001/XMLSchema-instance">
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
<e:Body>
<n0:ForAnsiHeaderOperResponse xmlns:n0 = "http://systinet.com/wsdl/com/magicsoftware/ibolt/localhost/ForAnsiHeader/ForAnsiHeaderImpl#ForAnsiHeaderOper?KExqYXZhL2xhbmcvU3RyaW5nOylMamF2YS9sYW5nL1N0cmluZzs=">
<response i:type = "d:string">12--abc--pqr</response>
</n0:ForAnsiHeaderOperResponse>
</e:Body>
</e:Envelope>

并希望提取下面的XML

<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>

下面的代码有助于实现同样的目标

public static void main(String[] args) {


File fXmlFile = new File("C://Users//abhijitb//Desktop//Test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document document;
Node result = null;
try {
document = dbf.newDocumentBuilder().parse(fXmlFile);
XPath xPath = XPathFactory.newInstance().newXPath();
String xpathStr = "//Envelope//Header";
result = (Node) xPath.evaluate(xpathStr, document, XPathConstants.NODE);
System.out.println(nodeToString(result));
} catch (SAXException | IOException | ParserConfigurationException | XPathExpressionException
| TransformerException e) {
e.printStackTrace();
}
}


private static String nodeToString(Node node) throws TransformerException {
StringWriter buf = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
xform.transform(new DOMSource(node), new StreamResult(buf));
return (buf.toString());
}

现在，如果您只需要如下所示的xml

<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>

你需要改变

String xpathStr = "//Envelope//Header";到String xpathStr = "//Envelope//Header/*";