Home > Articles > Programming > Java

  • Print
  • + Share This
This chapter is from the book

This chapter is from the book

23.4 SAX Example 1: Printing the Outline of an XML Document

Listing 23.7 shows a content handler that responds to three parts of an XML document: start tags, end tags, and tag bodies. It overrides the startElement, endElement, and characters methods to accomplish this. The handler simply prints out the start element, end element, and first word of tag body, with two spaces of indentation for each nesting level. To accomplish this task, the content handler overrides the following three methods:

  • startElement

    This method prints a message indicating that it found the start tag for the element name. Any attributes associated with the element are listed in parentheses. The method also puts spaces in front of the printout, as specified by the indentation variable (initially 0). Finally, it adds 2 to this variable.

  • endElement

    This method subtracts 2 from the indentation variable and then prints a message indicating that it found the end tag for the element.

  • characters

    This method prints the first word of the tag body, leaving the indentation level unchanged.

Listing 23.8 shows a program that lets the user specify a SAX-compliant parser and an XML file, then invokes the parser with the outline-printing content handler just described (and shown in Listing 23.7). Figure 23–4 shows the initial result, and Listing 23.6 shows the top part of the output when orders.xml (Listing 23.9) is selected.

Figure 23–4 Interactively selecting the orders.xml file.

Listing 23.6 Partial output of SAXPrinter applied to orders.xml

Start tag: orders
  Start tag: order
    Start tag: count
      37
    End tag: count
    Start tag: price
      49.99
    End tag: price
    Start tag: book
      Start tag: isbn
        0130897930
      End tag: isbn
      Start tag: title
        Core...
      End tag: title
      Start tag: authors
        Start tag: author
          Marty...
        End tag: author
        Start tag: author
          Larry...
        End tag: author
      End tag: authors
    End tag: book
  End tag: order
  Start tag: order
    Start tag: count
      1
    End tag: count
    Start tag: price
      9.95
    End tag: price
    Start tag: yacht
      Start tag: manufacturer
        Luxury...
      End tag: manufacturer
      Start tag: model
        M-1
      End tag: model
      Start tag: standardFeatures (oars=plastic, lifeVests=none)
        false
      End tag: standardFeatures
    End tag: yacht
  End tag: order
  ... (Rest of results omitted)
End tag: orders

Listing 23.7 PrintHandler.java

import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.util.StringTokenizer;

/** A SAX handler that prints out the start tags, end tags,
 *  and first word of tag body. Indents two spaces
 *  for each nesting level.
 */

public class PrintHandler extends DefaultHandler {
  private int indentation = 0;

  /** When you see a start tag, print it out and then
   *  increase indentation by two spaces. If the
   *  element has attributes, place them in parens
   *  after the element name.
   */

  public void startElement(String namespaceUri,
                           String localName,
                           String qualifiedName,
                           Attributes attributes)
      throws SAXException {
    indent(indentation);
    System.out.print("Start tag: " + qualifiedName);
    int numAttributes = attributes.getLength();
    // For <someTag> just print out "someTag". But for
    // <someTag att1="Val1" att2="Val2">, print out
    // "someTag (att1=Val1, att2=Val2).
    if (numAttributes > 0) {
      System.out.print(" (");
      for(int i=0; i<numAttributes; i++) {
        if (i>0) {
          System.out.print(", ");
        }
        System.out.print(attributes.getQName(i) + "=" +
                         attributes.getValue(i));
      }
      System.out.print(")");
    }
    System.out.println();
    indentation = indentation + 2;
  }
  /** When you see the end tag, print it out and decrease
   *  indentation level by 2.
   */
  
  public void endElement(String namespaceUri,
                         String localName,
                         String qualifiedName)
      throws SAXException {
    indentation = indentation - 2;
    indent(indentation);
    System.out.println("End tag: " + qualifiedName);
  }

  /** Print out the first word of each tag body. */
  
  public void characters(char[] chars,
                         int startIndex,
                         int endIndex) {
    String data = new String(chars, startIndex, endIndex);
    // Whitespace makes up default StringTokenizer delimeters
    StringTokenizer tok = new StringTokenizer(data);
    if (tok.hasMoreTokens()) {
      indent(indentation);
      System.out.print(tok.nextToken());
      if (tok.hasMoreTokens()) {
        System.out.println("...");
      } else {
        System.out.println();
      }
    }
  }

  private void indent(int indentation) {
    for(int i=0; i<indentation; i++) {
      System.out.print(" ");
    }
  }
}

Listing 23.8 SAXPrinter.java

import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;

/** A program that uses SAX to print out the start tags,
 *  end tags, and first word of tag body of an XML file.
 */

public class SAXPrinter {
  public static void main(String[] args) {
    String jaxpPropertyName =
      "javax.xml.parsers.SAXParserFactory";
    // Pass the parser factory in on the command line with
    // -D to override the use of the Apache parser.
    if (System.getProperty(jaxpPropertyName) == null) {
      String apacheXercesPropertyValue =
        "org.apache.xerces.jaxp.SAXParserFactoryImpl";
      System.setProperty(jaxpPropertyName,
                         apacheXercesPropertyValue);
    }
    String filename;
    if (args.length > 0) {
      filename = args[0];
    } else {
      String[] extensions = { "xml", "tld" };
      WindowUtilities.setNativeLookAndFeel();
      filename = ExtensionFileFilter.getFileName(".",
                                                 "XML Files",
                                                 extensions);
      if (filename == null) {
        filename = "test.xml";
      }
    }
    printOutline(filename);
    System.exit(0);
  }

  public static void printOutline(String filename) {
    DefaultHandler handler = new PrintHandler();
    SAXParserFactory factory = SAXParserFactory.newInstance();
    try {
      SAXParser parser = factory.newSAXParser();
      parser.parse(filename, handler);
    } catch(Exception e) {
      String errorMessage =
        "Error parsing " + filename + ": " + e;
      System.err.println(errorMessage);
      e.printStackTrace();
    }
  }
}

Listing 23.9 orders.xml

<?xml version="1.0" ?>
<orders>
  <order>
    <count>37</count>
    <price>49.99</price>
    <book>
      <isbn>0130897930</isbn>
      <title>Core Web Programming Second Edition</title>
      <authors>
        <author>Marty Hall</author>
        <author>Larry Brown</author>
      </authors>
    </book>
  </order>
  <order>
    <count>1</count>
    <price>9.95</price>
    <yacht>
      <manufacturer>Luxury Yachts, Inc.</manufacturer>
      <model>M-1</model>
      <standardFeatures oars="plastic"
                        lifeVests="none">
        false
      </standardFeatures>
    </yacht>
  </order>
  <order>
    <count>3</count>
    <price>22.22</price>
    <book>
      <isbn>B000059Z4H</isbn>
      <title>Harry Potter and the Order of the Phoenix</title>
      <authors>
        <author>J.K. Rowling</author>
      </authors>
    </book>
  </order>
  <order>
    <count>2</count>
    <price>10452689.01</price>
    <yacht>
      <manufacturer>We B Boats, Inc.</manufacturer>
      <model>236-A</model>
      <standardFeatures bowlingAlley="double"
                        tennisCourt="grass">
        true
      </standardFeatures>
    </yacht>
  </order>
  <order>
    <count>13</count>
    <price>49.99</price>
    <book>
      <isbn>0130897930</isbn>
      <title>Core Web Programming Second Edition</title>
      <authors>
        <author>Marty Hall</author>
        <author>Larry Brown</author>
      </authors>
    </book>
  </order>
</orders>
  • + Share This
  • 🔖 Save To Your Account