Wednesday, November 6

XML parsing (SAX and DOM) for Java

Introduction: SAX parser is the most commonly used xml parser in Java after DOM. Sax does not load the XML into memory before parsing it.
JDK provides a SAX parser(SAX Parser) and DOM builder(document Builder) in package javax.xml.parsers. To parse XML with SAX, first we need to create a SAX ParserFactory and then get a SAX Parser from the factory. Then we call parse() method to parse the document. The parse () takes two parameters- XML file and Callback handler. SAX is an event-driven API. It defines a set of callback handler methods that will be invoked when events occur during parsing. JDK provides a Default Handler class. We override the methods to implement this program
It uses three callback methods listed below.
startElement(String uri,String localName,String qName,Attributes attr) : This callback handles when it encounters an opening tag ‘<’.
endElement(String uri,String localName,String qName): This callback handles when it encounters a closing tag ‘>’.
character(char[] chars,int  start, int length): this callback handles data inside an element.

I’ll use SAX Parser and DOM to parse the following XML file.


<?xml version="1.0" ?>
- <catalog>
- <book id="B100">
  <author>Simpson,Adam</author>
  <title>XML Developer's Guide</title>
  <genre>Computer</genre>
  <price>55.95</price>
  <publish_date>2000-10-01</publish_date>
  <description>An in-depth look at creating applications with XML.</description>
  </book>
- <book id="B200">
  <author>Ray,Smith</author>
  <title>C++ for beginner</title>
  <genre>Computer</genre>
  <price>35.95</price>
  <publish_date>2000-12-16</publish_date>
  <description>Start learning C++. It contains simple example with full explanation</description>
  </book>
- <book id="B300">
  <author>Ryan, Kathy</author>
  <title>Learn Hadoop</title>
  <genre>Computer</genre>
  <price>45.95</price>
  <publish_date>2000-11-17</publish_date>
  <description>Learn Hadoop. Learn how to handle big data.</description>
  </book>

   </catalog>


SAXParser Code:
import java.io.File;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;


public class saxparser {
           
     File file=new File("c:\\project\\book.xml");
            //constructor
           
            public saxparser() {
                       
                        try {
                     
                                    //create an instance of SAXParser Factory
                                    SAXParserFactory factory=SAXParserFactory.newInstance();
                   
                                     // Get SAXParser from the factory
                                    SAXParser saxparser=factory.newSAXParser();
                                      //Call parse() to parse the doc.
                                    saxparser.parse(file, new evntHandler());                                 
                        } catch(Exception e)
                        {
                                    e.printStackTrace();
                                   
                        }
            }
           
public static void main(String[] args) {
                        new saxparser();
            }
           
           
}

import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.io.File;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;


//inner class to handle callback events

public class evntHandler extends DefaultHandler {
           
            //process the current element
            private String currentElem;
            //track # of books
            private int cnt=1;
           
           
            @Override //handles when it finds start tag '<'
            public void startElement(String uri,String localname,String qName,Attributes attr) throws SAXException {
                       
                        currentElem=qName;
                        if(currentElem.equals("book")) {
                                    System.out.println("book:" + cnt);
                                    cnt++;
                        }
                        }
                       
                       
            @Override //handles when it finds end tag '>'
                  public void endElement( String uri,String localName,String qName) throws SAXException {
                                    currentElem="";
                                   
                        }
                       
                       
                        @Override //handles data inside element
                  public void characters(char[] chars, int start, int length) throws SAXException {
                     if (currentElem.equalsIgnoreCase("title")) {
                        System.out.println("\tTitle:\t" + new String(chars, start, length));
                     } else if (currentElem.equalsIgnoreCase("author")) {
                        System.out.println("\tAuthor:\t" + new String(chars, start, length));
                     }
                     else if (currentElem.equalsIgnoreCase("genre")) {
                         System.out.println("\tGenre:\t" + new String(chars,start,length));
                     }
                     else if (currentElem.equalsIgnoreCase("price")) {
                         System.out.println("\tPrice:\t $" + new String(chars,start,length));
                     }
                     else if (currentElem.equalsIgnoreCase("publish_date")) {
                         System.out.println("\tpublish_date:\t " + new String(chars,start,length));
                     }
            }
           

}

Output:
Book:1
            Author: Simpson,Adam
            Title:    XML Developer's Guide
            Genre:  Computer
            Price:   $55.95
            publish_date:    2000-10-01
Book:2
            Author: Ray,Smith
            Title:    C++ for beginner
            Genre:  Computer
            Price:   $35.95
            publish_date:    2000-12-16
Book:3
            Author: Ryan, Kathy
            Title:    Learn Hadoop
            Genre:  Computer
            Price:   $45.95
            publish_date:    2000-11-17

DOM:
DOM is a platform- and language-independent API for processing XML documents. The DOM parser loads the XML document, builds an object model in the memory, in the form of a tree comprised of nodes. The DOM API defines the mechanism for querying, traversing the tree; and adding, modifying and deleting the elements and nodes.
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class xmlparser {

                  public static void main(String[] args)  throws Exception {
                          DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance();
                          DocumentBuilder docBuilder=factory.newDocumentBuilder();
                          File file=new File("c:\\project\\book.xml");
                          Document doc=docBuilder.parse(file);
                          //get list of all elements
                          NodeList list = doc.getElementsByTagName("*");
                          
                      int bookCount = 0;
                      for (int i = 0; i < list.getLength(); i++) {
                       
                         Element element = (Element)list.item(i);
                         String nodeName = element.getNodeName();
                         if (nodeName.equals("book")) {
                            bookCount++;
                            System.out.println("BOOK: " + bookCount);
                         
                         } else if (nodeName.equals("title")) {
                            System.out.println("\tTitle:\t"
                                  + element.getChildNodes().item(0).getNodeValue());
                           
                         } else if (nodeName.equals("genre")) {
                                        System.out.println("\tGenre:\t"
                                              + element.getChildNodes().item(0).getNodeValue());
                                    
                           
                            } else if (nodeName.equals("price")) {
                            System.out.println("\tPrice:\t$"
                                  + element.getChildNodes().item(0).getNodeValue());
                        
                      } else if (nodeName.equals("publish_date")) {
                            System.out.println("\tPublish_date:\t"
                                  + element.getChildNodes().item(0).getNodeValue());
                         }
                        
                        
                   }
                  }
}
BOOK: 1
            Title:    XML Developer's Guide
            Genre:  Computer
            Price:   $55.95
            Publish_date:    2000-10-01
BOOK: 2
            Title:    C++ for beginner
            Genre:  Computer
            Price:   $35.95
            Publish_date:    2000-12-16
BOOK: 3
            Title:    Learn Hadoop
            Genre:  Computer
            Price:   $45.95
            Publish_date:    2000-11-17

No comments:

Post a Comment