001package org.apache.archiva.xml; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import org.apache.commons.lang3.StringUtils; 023import org.w3c.dom.Document; 024import org.w3c.dom.Element; 025import org.w3c.dom.Node; 026import org.w3c.dom.NodeList; 027import org.xml.sax.ErrorHandler; 028import org.xml.sax.InputSource; 029import org.xml.sax.SAXException; 030import org.xml.sax.SAXParseException; 031 032import javax.xml.XMLConstants; 033import javax.xml.namespace.NamespaceContext; 034import javax.xml.parsers.DocumentBuilder; 035import javax.xml.parsers.DocumentBuilderFactory; 036import javax.xml.parsers.ParserConfigurationException; 037import javax.xml.xpath.*; 038import java.io.*; 039import java.net.MalformedURLException; 040import java.net.URL; 041import java.nio.file.Files; 042import java.nio.file.Path; 043import java.util.*; 044import java.util.stream.Collectors; 045 046/** 047 * XMLReader - a set of common xml utility methods for reading content out of an xml file. 048 */ 049public class XMLReader 050{ 051 private URL xmlUrl; 052 053 private String documentType; 054 055 private Document document; 056 057 private Map<String, String> namespaceMap = new HashMap<>(); 058 private Map<String, String> reverseNamespaceMap = new HashMap<>(); 059 060 private class NamespaceCtx implements NamespaceContext { 061 062 @Override 063 public String getNamespaceURI(String prefix) { 064 return namespaceMap.get(prefix); 065 } 066 067 @Override 068 public String getPrefix(String namespaceURI) { 069 return reverseNamespaceMap.get(namespaceURI); 070 } 071 072 @Override 073 public Iterator getPrefixes(String namespaceURI) { 074 return namespaceMap.keySet().iterator(); 075 } 076 } 077 078 public XMLReader( String type, Path file ) 079 throws XMLException 080 { 081 if ( !Files.exists(file) ) 082 { 083 throw new XMLException( "file does not exist: " + file.toAbsolutePath() ); 084 } 085 086 if ( !Files.isRegularFile(file) ) 087 { 088 throw new XMLException( "path is not a file: " + file.toAbsolutePath() ); 089 } 090 091 if ( !Files.isReadable(file) ) 092 { 093 throw new XMLException( "Cannot read xml file due to permissions: " + file.toAbsolutePath() ); 094 } 095 096 try 097 { 098 init( type, file.toUri().toURL() ); 099 } 100 catch ( MalformedURLException e ) 101 { 102 throw new XMLException( "Unable to translate file " + file + " to URL: " + e.getMessage(), e ); 103 } 104 } 105 106 public XMLReader( String type, URL url ) 107 throws XMLException 108 { 109 init( type, url ); 110 } 111 112 private void init( String type, URL url ) 113 throws XMLException 114 { 115 this.documentType = type; 116 this.xmlUrl = url; 117 118 // SAXReader reader = new SAXReader(); 119 120 121 122 try (InputStream in = url.openStream(); Reader reader = new LatinEntityResolutionReader(new BufferedReader(new InputStreamReader(in, "UTF-8")))) 123 { 124 125 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 126 dbf.setNamespaceAware(true); 127 dbf.setExpandEntityReferences(false); 128 dbf.setValidating(false); 129 // dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD,"false"); 130 dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING,true); 131 // dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 132 133 DocumentBuilder db = dbf.newDocumentBuilder(); 134 // To suppress error output at System.err 135 db.setErrorHandler(new ErrorHandler() { 136 @Override 137 public void warning(SAXParseException exception) throws SAXException { 138 139 } 140 141 @Override 142 public void error(SAXParseException exception) throws SAXException { 143 throw exception; 144 } 145 146 @Override 147 public void fatalError(SAXParseException exception) throws SAXException { 148 throw exception; 149 } 150 }); 151 this.document = db.parse(new InputSource(reader)); 152 153 } 154 catch ( IOException e ) 155 { 156 throw new XMLException( "Unable to open stream to " + url + ": " + e.getMessage(), e ); 157 } catch (ParserConfigurationException e) { 158 throw new XMLException("Unable to start parser "+e.getMessage()); 159 } catch (SAXException e) { 160 throw new XMLException("Unable to parse file "+e.getMessage()); 161 } 162 163 Element root = this.document.getDocumentElement(); 164 if ( root == null ) 165 { 166 throw new XMLException( "Invalid " + documentType + " xml: root element is null." ); 167 } 168 169 if ( !StringUtils.equals( root.getLocalName(), documentType ) ) 170 { 171 throw new XMLException( 172 "Invalid " + documentType + " xml: Unexpected root element <" + root.getLocalName() + ">, expected <" 173 + documentType + ">" + root.getNodeName() ); 174 } 175 } 176 177 public String getDefaultNamespaceURI() 178 { 179 String namespace = this.document.getNamespaceURI(); 180 return namespace; 181 } 182 183 public void addNamespaceMapping( String elementName, String uri ) 184 { 185 this.namespaceMap.put( elementName, uri ); 186 } 187 188 public Element getElement( String xpathExpr ) 189 throws XMLException 190 { 191 XPathExpression xpath = null; 192 try { 193 xpath = createXPath( xpathExpr ); 194 Object evaluated = xpath.evaluate( document, XPathConstants.NODE); 195 196 if ( evaluated == null ) 197 { 198 return null; 199 } 200 201 if ( evaluated instanceof Element ) 202 { 203 return (Element) evaluated; 204 } 205 else 206 { 207 // Unknown evaluated type. 208 throw new XMLException( ".getElement( Expr: " + xpathExpr + " ) resulted in non-Element type -> (" 209 + evaluated.getClass().getName() + ") " + evaluated ); 210 } 211 } catch (XPathExpressionException e) { 212 throw new XMLException("Could not parse xpath expression"); 213 } 214 } 215 216 private XPathExpression createXPath(String xpathExpr ) throws XPathExpressionException { 217 XPath xpath = XPathFactory.newInstance().newXPath(); 218 if ( !this.namespaceMap.isEmpty() ) 219 { 220 xpath.setNamespaceContext(new NamespaceCtx()); 221 } 222 return xpath.compile(xpathExpr); 223 } 224 225 public boolean hasElement( String xpathExpr ) 226 throws XMLException 227 { 228 XPathExpression xpath = null; 229 try { 230 xpath = createXPath( xpathExpr ); 231 Object evaluated = xpath.evaluate( document, XPathConstants.NODE ); 232 if ( evaluated == null ) 233 { 234 return false; 235 } 236 return true; 237 } catch (XPathExpressionException e) { 238 throw new XMLException("Could not create xpath expression"); 239 } 240 } 241 242 /** 243 * Remove namespaces from entire document. 244 */ 245 public void removeNamespaces() 246 { 247 removeNamespaces( this.document.getDocumentElement() ); 248 } 249 250 /** 251 * Remove namespaces from element recursively. 252 */ 253 @SuppressWarnings("unchecked") 254 public void removeNamespaces( Node elem ) 255 { 256 if (elem.getNodeType() == Node.ELEMENT_NODE || elem.getNodeType() == Node.ATTRIBUTE_NODE) { 257 document.renameNode(elem, null, elem.getLocalName()); 258 259 Node n; 260 261 NodeList nodeList = elem.getChildNodes(); 262 263 264 for (int i = 0; i < nodeList.getLength(); i++) { 265 n = nodeList.item(i); 266 removeNamespaces(n); 267 } 268 } 269 } 270 271 public String getElementText( Node context, String xpathExpr ) 272 throws XMLException 273 { 274 XPathExpression xpath = null; 275 try { 276 xpath = createXPath( xpathExpr ); 277 Object evaluated = xpath.evaluate( context, XPathConstants.NODE ); 278 279 if ( evaluated == null ) 280 { 281 return null; 282 } 283 284 if ( evaluated instanceof Element ) 285 { 286 Element evalElem = (Element) evaluated; 287 return XmlUtil.getText(evalElem); 288 } 289 else 290 { 291 // Unknown evaluated type. 292 throw new XMLException( ".getElementText( Node, Expr: " + xpathExpr + " ) resulted in non-Element type -> (" 293 + evaluated.getClass().getName() + ") " + evaluated ); 294 } 295 } catch (XPathExpressionException e) { 296 throw new XMLException("Could not parse xpath expression"); 297 } 298 } 299 300 public String getElementText( String xpathExpr ) 301 throws XMLException 302 { 303 return getElementText(document, xpathExpr); 304 } 305 306 @SuppressWarnings("unchecked") 307 public List<Node> getElementList( String xpathExpr ) 308 throws XMLException 309 { 310 XPathExpression xpath = null; 311 try { 312 xpath = createXPath( xpathExpr ); 313 Object evaluated = xpath.evaluate( document, XPathConstants.NODESET); 314 315 if ( evaluated == null ) 316 { 317 return Collections.emptyList(); 318 } 319 320 NodeList nl = (NodeList) evaluated; 321 List<Node> nodeList = new ArrayList<>(); 322 for (int i = 0 ; i<nl.getLength(); i++) { 323 nodeList.add(nl.item(i)); 324 } 325 return nodeList; 326 327 } catch (XPathExpressionException e) { 328 throw new XMLException("Could not parse xpath expression"); 329 } 330 } 331 332 public List<String> getElementListText( String xpathExpr ) 333 throws XMLException 334 { 335 List<Node> elemList = getElementList( xpathExpr ); 336 if ( elemList == null ) 337 { 338 return null; 339 } 340 341 return elemList.stream().filter(n -> n instanceof Element).map(n -> XmlUtil.getText(n)).collect(Collectors.toList()); 342 } 343 344}