Sunday, March 6, 2011

Parsing xml with DOM

The steps are

  • Get a document builder using document builder factory and parse the xml file to create a DOM object
  • Get a list of employee elements from the DOM
  • For each employee element get the id,name,age and type. Create an employee value object and add it to the list.
  • At the end iterate through the list and print the employees to verify we parsed it right.

Let us take this xml file:

<?xml version="1.0" encoding="UTF-8"?>
<Personnel>
<Employee type="permanent">
<Name>Seagull</Name>
<Id>3674</Id>
<Age>34</Age>
</Employee>
<Employee type="contract">
<Name>Robin</Name>
<Id>3675</Id>
<Age>25</Age>
</Employee>
<Employee type="permanent">
<Name>Crow</Name>
<Id>3676</Id>
<Age>28</Age>
</Employee>
</Personnel>


Full code listing - DomParserExample

a) Getting a document builder

private void parseXmlFile(){
//get the factory
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

try {

//Using factory get an instance of document builder
DocumentBuilder db = dbf.newDocumentBuilder();

//parse using builder to get DOM representation of the XML file
dom = db.parse("employees.xml");


}catch(ParserConfigurationException pce) {
pce.printStackTrace();
}catch(SAXException se) {
se.printStackTrace();
}catch(IOException ioe) {
ioe.printStackTrace();
}
}



b) Get a list of employee elements
Get the rootElement from the DOM object.From the root element get all employee elements. Iterate through each employee element to load the data.

 

private void parseDocument(){
//get the root element
Element docEle = dom.getDocumentElement();

//get a nodelist of elements
NodeList nl = docEle.getElementsByTagName("Employee");
if(nl != null && nl.getLength() > 0) {
for(int i = 0 ; i < nl.getLength();i++) {

//get the employee element
Element el = (Element)nl.item(i);

//get the Employee object
Employee e = getEmployee(el);

//add it to list
myEmpls.add(e);
}
}
}


c) Reading in data from each employee.


/**
* I take an employee element and read the values in, create
* an Employee object and return it
*/
private Employee getEmployee(Element empEl) {

//for each <employee> element get text or int values of
//name ,id, age and name
String name = getTextValue(empEl,"Name");
int id = getIntValue(empEl,"Id");
int age = getIntValue(empEl,"Age");

String type = empEl.getAttribute("type");

//Create a new Employee with the value read from the xml nodes
Employee e = new Employee(name,id,age,type);

return e;
}


/**
* I take a xml element and the tag name, look for the tag and get
* the text content
* i.e for <employee><name>John</name></employee> xml snippet if
* the Element points to employee node and tagName is 'name' I will return John
*/
private String getTextValue(Element ele, String tagName) {
String textVal = null;
NodeList nl = ele.getElementsByTagName(tagName);
if(nl != null && nl.getLength() > 0) {
Element el = (Element)nl.item(0);
textVal = el.getFirstChild().getNodeValue();
}

return textVal;
}


/**
* Calls getTextValue and returns a int value
*/
private int getIntValue(Element ele, String tagName) {
//in production application you would catch the exception
return Integer.parseInt(getTextValue(ele,tagName));
}



d) Iterating and printing.


private void printData(){

System.out.println("No of Employees '" + myEmpls.size() + "'.");

Iterator it = myEmpls.iterator();
while(it.hasNext()) {
System.out.println(it.next().toString());
}
}


See Xml parsing using sax.


Listing full code :


import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class DomParserExample {

//No generics
List myEmpls;
Document dom;


public DomParserExample(){
//create a list to hold the employee objects
myEmpls = new ArrayList();
}

public void runExample() {

//parse the xml file and get the dom object
parseXmlFile();

//get each employee element and create a Employee object
parseDocument();

//Iterate through the list and print the data
printData();

}


private void parseXmlFile(){
//get the factory
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

try {

//Using factory get an instance of document builder
DocumentBuilder db = dbf.newDocumentBuilder();

//parse using builder to get DOM representation of the XML file
dom = db.parse("employees.xml");


}catch(ParserConfigurationException pce) {
pce.printStackTrace();
}catch(SAXException se) {
se.printStackTrace();
}catch(IOException ioe) {
ioe.printStackTrace();
}
}


private void parseDocument(){
//get the root elememt
Element docEle = dom.getDocumentElement();

//get a nodelist of <employee> elements
NodeList nl = docEle.getElementsByTagName("Employee");
if(nl != null && nl.getLength() > 0) {
for(int i = 0 ; i < nl.getLength();i++) {

//get the employee element
Element el = (Element)nl.item(i);

//get the Employee object
Employee e = getEmployee(el);

//add it to list
myEmpls.add(e);
}
}
}


/**
* I take an employee element and read the values in, create
* an Employee object and return it
* @param empEl
* @return
*/
private Employee getEmployee(Element empEl) {

//for each <employee> element get text or int values of
//name ,id, age and name
String name = getTextValue(empEl,"Name");
int id = getIntValue(empEl,"Id");
int age = getIntValue(empEl,"Age");

String type = empEl.getAttribute("type");

//Create a new Employee with the value read from the xml nodes
Employee e = new Employee(name,id,age,type);

return e;
}


/**
* I take a xml element and the tag name, look for the tag and get
* the text content
* i.e for <employee><name>John</name></employee> xml snippet if
* the Element points to employee node and tagName is name I will return John
* @param ele
* @param tagName
* @return
*/
private String getTextValue(Element ele, String tagName) {
String textVal = null;
NodeList nl = ele.getElementsByTagName(tagName);
if(nl != null && nl.getLength() > 0) {
Element el = (Element)nl.item(0);
textVal = el.getFirstChild().getNodeValue();
}

return textVal;
}


/**
* Calls getTextValue and returns a int value
* @param ele
* @param tagName
* @return
*/
private int getIntValue(Element ele, String tagName) {
//in production application you would catch the exception
return Integer.parseInt(getTextValue(ele,tagName));
}

/**
* Iterate through the list and print the
* content to console
*/
private void printData(){

System.out.println("No of Employees '" + myEmpls.size() + "'.");

Iterator it = myEmpls.iterator();
while(it.hasNext()) {
System.out.println(it.next().toString());
}
}


public static void main(String[] args){
//create an instance
DomParserExample dpe = new DomParserExample();

//call run example
dpe.runExample();
}

}


Running DOMParserExample(JDK 1.5+)


  1. Download DomParserExample.java, Employee.java, employees.xml to c:\xercesTest
  2. Go to command prompt and type
    cd c:\xercesTest
  3. To compile, type
    javac -classpath . DomParserExample.java
  4. To run, type
    java -classpath . DomParserExample

No comments:

Post a Comment

Chitika