001 package com.mockrunner.util.web;
002
003 import java.io.StringReader;
004 import java.util.List;
005
006 import org.apache.xerces.parsers.DOMParser;
007 import org.cyberneko.html.HTMLConfiguration;
008 import org.jdom.Element;
009 import org.jdom.input.DOMBuilder;
010 import org.jdom.output.XMLOutputter;
011 import org.xml.sax.InputSource;
012
013 import com.mockrunner.base.NestedApplicationException;
014
015 /**
016 * Util class for HTML and XML parsing.
017 */
018 public class XmlUtil
019 {
020 /**
021 * Convinience method for HTML fragments. Returns the body
022 * as JDOM <code>Element</code>.
023 *
024 * If an HTML documents looks like this:
025 * <pre>
026 * <html>
027 * <head>
028 * </head>
029 * <body>
030 * <h1>
031 * </h1>
032 * </body>
033 * </html>
034 * </pre>
035 *
036 * the method returns the h1 tag as <code>Element</code>.
037 * @param document the <code>org.jdom.Document</code>
038 * @return the body <code>Element</code>
039 */
040 public static Element getBodyFragmentFromJDOMDocument(org.jdom.Document document)
041 {
042 Element element = document.getRootElement().getChild("BODY");
043 if(null == element)
044 {
045 element = document.getRootElement().getChild("body");
046 }
047 if(null != element)
048 {
049 List childs = element.getChildren();
050 if(null != childs && childs.size() > 0) return (Element)childs.get(0);
051 }
052 return null;
053 }
054
055 /**
056 * @deprecated use {@link #getBodyFragmentFromJDOMDocument}
057 */
058 public static Element getBodyFragmentJDOMDocument(org.jdom.Document document)
059 {
060 return getBodyFragmentFromJDOMDocument(document);
061 }
062
063 /**
064 * Returns the documents XML content as a string.
065 * @param document the <code>org.jdom.Document</code>
066 * @return the output as string
067 */
068 public static String createStringFromJDOMDocument(org.jdom.Document document)
069 {
070 try
071 {
072 return new XMLOutputter().outputString(document);
073 }
074 catch(Exception exc)
075 {
076 throw new NestedApplicationException(exc);
077 }
078 }
079
080 /**
081 * Creates a JDOM <code>Document</code> from a specified
082 * W3C <code>Document</code>.
083 * @param document the <code>org.w3c.dom.Document</code>
084 * @return the <code>org.jdom.Document</code>
085 */
086 public static org.jdom.Document createJDOMDocument(org.w3c.dom.Document document)
087 {
088 return new DOMBuilder().build(document);
089 }
090
091 /**
092 * Returns a parser suitable for parsing HTML documents.
093 * The NekoHTML parser is used with some settings to
094 * preserve case of tag names and disable namespace processing.
095 * This method is used by {@link #parseHTML}.
096 * @return instance of <code>org.apache.xerces.parsers.DOMParser</code>
097 * with Neko configuration
098 */
099 public static DOMParser getHTMLParser()
100 {
101 try
102 {
103 HTMLConfiguration config = new HTMLConfiguration();
104 config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
105 config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
106 DOMParser parser = new DOMParser(config);
107 return parser;
108 }
109 catch(Exception exc)
110 {
111 throw new NestedApplicationException(exc);
112 }
113 }
114
115 /**
116 * Parses the specified HTML with the NekoHTML parser.
117 * If you want to use another HTML parser or configure
118 * the NekoHTML parser with special features, you can use
119 * the <code>parse</code> method.
120 * @param source the HTML as String
121 * @return the parsed document as org.w3c.dom.Document
122 */
123 public static org.w3c.dom.Document parseHTML(String source)
124 {
125 try
126 {
127 return parse(getHTMLParser(), source);
128 }
129 catch(Exception exc)
130 {
131 throw new NestedApplicationException(exc);
132 }
133 }
134
135 /**
136 * Parses the specified XML with the specified parser.
137 * The main purpose of this method is to use the NekoHTML
138 * parser with custom features and properties. If you can live
139 * with the settings provided by Mockrunner, you can use
140 * {@link #parseHTML}.
141 * @param parser the parser (must extend
142 * <code>org.apache.xerces.parsers.DOMParser</code>),
143 * e.g. the one returned by {@link #getHTMLParser}
144 * @param source the XML as String
145 * @return the parsed document as org.w3c.dom.Document
146 */
147 public static org.w3c.dom.Document parse(DOMParser parser, String source)
148 {
149 try
150 {
151 parser.parse(new InputSource(new StringReader(source)));
152 return parser.getDocument();
153 }
154 catch(Exception exc)
155 {
156 throw new NestedApplicationException(exc);
157 }
158 }
159 }