This project has retired. For details please refer to its Attic page.
XMLReader xref
View Javadoc
1   package org.apache.archiva.xml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.commons.lang3.StringUtils;
23  import org.w3c.dom.Document;
24  import org.w3c.dom.Element;
25  import org.w3c.dom.Node;
26  import org.w3c.dom.NodeList;
27  import org.xml.sax.ErrorHandler;
28  import org.xml.sax.InputSource;
29  import org.xml.sax.SAXException;
30  import org.xml.sax.SAXParseException;
31  
32  import javax.xml.XMLConstants;
33  import javax.xml.namespace.NamespaceContext;
34  import javax.xml.parsers.DocumentBuilder;
35  import javax.xml.parsers.DocumentBuilderFactory;
36  import javax.xml.parsers.ParserConfigurationException;
37  import javax.xml.xpath.*;
38  import java.io.*;
39  import java.net.MalformedURLException;
40  import java.net.URL;
41  import java.nio.file.Files;
42  import java.nio.file.Path;
43  import java.util.*;
44  import java.util.stream.Collectors;
45  
46  /**
47   * XMLReader - a set of common xml utility methods for reading content out of an xml file.
48   */
49  public class XMLReader
50  {
51      private URL xmlUrl;
52  
53      private String documentType;
54  
55      private Document document;
56  
57      private Map<String, String> namespaceMap = new HashMap<>();
58      private Map<String, String> reverseNamespaceMap = new HashMap<>();
59  
60      private class NamespaceCtx implements NamespaceContext {
61  
62          @Override
63          public String getNamespaceURI(String prefix) {
64              return namespaceMap.get(prefix);
65          }
66  
67          @Override
68          public String getPrefix(String namespaceURI) {
69              return reverseNamespaceMap.get(namespaceURI);
70          }
71  
72          @Override
73          public Iterator getPrefixes(String namespaceURI) {
74              return namespaceMap.keySet().iterator();
75          }
76      }
77  
78      public XMLReader( String type, Path file )
79          throws XMLException
80      {
81          if ( !Files.exists(file) )
82          {
83              throw new XMLException( "file does not exist: " + file.toAbsolutePath() );
84          }
85  
86          if ( !Files.isRegularFile(file) )
87          {
88              throw new XMLException( "path is not a file: " + file.toAbsolutePath() );
89          }
90  
91          if ( !Files.isReadable(file) )
92          {
93              throw new XMLException( "Cannot read xml file due to permissions: " + file.toAbsolutePath() );
94          }
95  
96          try
97          {
98              init( type, file.toUri().toURL() );
99          }
100         catch ( MalformedURLException e )
101         {
102             throw new XMLException( "Unable to translate file " + file + " to URL: " + e.getMessage(), e );
103         }
104     }
105 
106     public XMLReader( String type, URL url )
107         throws XMLException
108     {
109         init( type, url );
110     }
111 
112     private void init( String type, URL url )
113         throws XMLException
114     {
115         this.documentType = type;
116         this.xmlUrl = url;
117 
118         // SAXReader reader = new SAXReader();
119 
120 
121 
122         try (InputStream in = url.openStream(); Reader reader = new LatinEntityResolutionReader(new BufferedReader(new InputStreamReader(in, "UTF-8"))))
123         {
124 
125             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
126             dbf.setNamespaceAware(true);
127             dbf.setExpandEntityReferences(false);
128             dbf.setValidating(false);
129             // dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD,"false");
130             dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING,true);
131             // dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
132 
133             DocumentBuilder db = dbf.newDocumentBuilder();
134             // To suppress error output at System.err
135             db.setErrorHandler(new ErrorHandler() {
136                 @Override
137                 public void warning(SAXParseException exception) throws SAXException {
138 
139                 }
140 
141                 @Override
142                 public void error(SAXParseException exception) throws SAXException {
143                     throw exception;
144                 }
145 
146                 @Override
147                 public void fatalError(SAXParseException exception) throws SAXException {
148                     throw exception;
149                 }
150             });
151             this.document = db.parse(new InputSource(reader));
152 
153         }
154         catch ( IOException e )
155         {
156             throw new XMLException( "Unable to open stream to " + url + ": " + e.getMessage(), e );
157         } catch (ParserConfigurationException e) {
158             throw new XMLException("Unable to start parser "+e.getMessage());
159         } catch (SAXException e) {
160             throw new XMLException("Unable to parse file "+e.getMessage());
161         }
162 
163         Element root = this.document.getDocumentElement();
164         if ( root == null )
165         {
166             throw new XMLException( "Invalid " + documentType + " xml: root element is null." );
167         }
168 
169         if ( !StringUtils.equals( root.getLocalName(), documentType ) )
170         {
171             throw new XMLException(
172                 "Invalid " + documentType + " xml: Unexpected root element <" + root.getLocalName() + ">, expected <"
173                     + documentType + ">" + root.getNodeName() );
174         }
175     }
176 
177     public String getDefaultNamespaceURI()
178     {
179         String namespace = this.document.getNamespaceURI();
180         return namespace;
181     }
182 
183     public void addNamespaceMapping( String elementName, String uri )
184     {
185         this.namespaceMap.put( elementName, uri );
186     }
187 
188     public Element getElement( String xpathExpr )
189         throws XMLException
190     {
191         XPathExpression xpath = null;
192         try {
193             xpath = createXPath( xpathExpr );
194             Object evaluated = xpath.evaluate( document, XPathConstants.NODE);
195 
196             if ( evaluated == null )
197             {
198                 return null;
199             }
200 
201             if ( evaluated instanceof Element )
202             {
203                 return (Element) evaluated;
204             }
205             else
206             {
207                 // Unknown evaluated type.
208                 throw new XMLException( ".getElement( Expr: " + xpathExpr + " ) resulted in non-Element type -> ("
209                         + evaluated.getClass().getName() + ") " + evaluated );
210             }
211         } catch (XPathExpressionException e) {
212             throw new XMLException("Could not parse xpath expression");
213         }
214     }
215 
216     private XPathExpression createXPath(String xpathExpr ) throws XPathExpressionException {
217         XPath xpath = XPathFactory.newInstance().newXPath();
218         if ( !this.namespaceMap.isEmpty() )
219         {
220             xpath.setNamespaceContext(new NamespaceCtx());
221         }
222         return xpath.compile(xpathExpr);
223     }
224 
225     public boolean hasElement( String xpathExpr )
226         throws XMLException
227     {
228         XPathExpression xpath = null;
229         try {
230             xpath = createXPath( xpathExpr );
231             Object evaluated = xpath.evaluate( document, XPathConstants.NODE );
232             if ( evaluated == null )
233             {
234                 return false;
235             }
236             return true;
237         } catch (XPathExpressionException e) {
238             throw new XMLException("Could not create xpath expression");
239         }
240     }
241 
242     /**
243      * Remove namespaces from entire document.
244      */
245     public void removeNamespaces()
246     {
247         removeNamespaces( this.document.getDocumentElement() );
248     }
249 
250     /**
251      * Remove namespaces from element recursively.
252      */
253     @SuppressWarnings("unchecked")
254     public void removeNamespaces( Node elem )
255     {
256         if (elem.getNodeType() == Node.ELEMENT_NODE || elem.getNodeType() == Node.ATTRIBUTE_NODE) {
257             document.renameNode(elem, null, elem.getLocalName());
258 
259             Node n;
260 
261             NodeList nodeList = elem.getChildNodes();
262 
263 
264             for (int i = 0; i < nodeList.getLength(); i++) {
265                 n = nodeList.item(i);
266                 removeNamespaces(n);
267             }
268         }
269     }
270 
271     public String getElementText( Node context, String xpathExpr )
272         throws XMLException
273     {
274         XPathExpression xpath = null;
275         try {
276             xpath = createXPath( xpathExpr );
277             Object evaluated = xpath.evaluate( context, XPathConstants.NODE );
278 
279             if ( evaluated == null )
280             {
281                 return null;
282             }
283 
284             if ( evaluated instanceof Element )
285             {
286                 Element evalElem = (Element) evaluated;
287                 return XmlUtil.getText(evalElem);
288             }
289             else
290             {
291                 // Unknown evaluated type.
292                 throw new XMLException( ".getElementText( Node, Expr: " + xpathExpr + " ) resulted in non-Element type -> ("
293                         + evaluated.getClass().getName() + ") " + evaluated );
294             }
295         } catch (XPathExpressionException e) {
296             throw new XMLException("Could not parse xpath expression");
297         }
298     }
299 
300     public String getElementText( String xpathExpr )
301         throws XMLException
302     {
303         return getElementText(document, xpathExpr);
304     }
305 
306     @SuppressWarnings("unchecked")
307     public List<Node> getElementList( String xpathExpr )
308         throws XMLException
309     {
310         XPathExpression xpath = null;
311         try {
312             xpath = createXPath( xpathExpr );
313             Object evaluated = xpath.evaluate( document, XPathConstants.NODESET);
314 
315             if ( evaluated == null )
316             {
317                 return Collections.emptyList();
318             }
319 
320             NodeList nl = (NodeList) evaluated;
321             List<Node> nodeList = new ArrayList<>();
322             for (int i = 0 ; i<nl.getLength(); i++) {
323                 nodeList.add(nl.item(i));
324             }
325             return nodeList;
326 
327         } catch (XPathExpressionException e) {
328             throw new XMLException("Could not parse xpath expression");
329         }
330     }
331 
332     public List<String> getElementListText( String xpathExpr )
333         throws XMLException
334     {
335         List<Node> elemList = getElementList( xpathExpr );
336         if ( elemList == null )
337         {
338             return null;
339         }
340 
341         return elemList.stream().filter(n -> n instanceof Element).map(n -> XmlUtil.getText(n)).collect(Collectors.toList());
342     }
343 
344 }