001package org.apache.archiva.metadata.repository.stats; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import org.apache.archiva.metadata.model.ArtifactMetadata; 023import org.apache.archiva.metadata.model.maven2.MavenArtifactFacet; 024import org.apache.archiva.metadata.repository.MetadataRepository; 025import org.apache.archiva.metadata.repository.MetadataRepositoryException; 026import org.apache.archiva.metadata.repository.MetadataResolutionException; 027import org.apache.commons.lang.time.StopWatch; 028import org.apache.jackrabbit.commons.JcrUtils; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031import org.springframework.stereotype.Service; 032 033import java.text.ParseException; 034import java.text.SimpleDateFormat; 035import java.util.ArrayList; 036import java.util.Collection; 037import java.util.Collections; 038import java.util.Date; 039import java.util.HashMap; 040import java.util.List; 041import java.util.Map; 042import java.util.TimeZone; 043import javax.jcr.Node; 044import javax.jcr.RepositoryException; 045import javax.jcr.Session; 046import javax.jcr.query.Query; 047import javax.jcr.query.QueryManager; 048import javax.jcr.query.QueryResult; 049import javax.jcr.query.Row; 050 051/** 052 * 053 */ 054@Service("repositoryStatisticsManager#default") 055public class DefaultRepositoryStatisticsManager 056 implements RepositoryStatisticsManager 057{ 058 private static final Logger log = LoggerFactory.getLogger( DefaultRepositoryStatisticsManager.class ); 059 060 private static final TimeZone UTC_TIME_ZONE = TimeZone.getTimeZone( "UTC" ); 061 062 @Override 063 public boolean hasStatistics( MetadataRepository metadataRepository, String repositoryId ) 064 throws MetadataRepositoryException 065 { 066 return metadataRepository.hasMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID ); 067 } 068 069 @Override 070 public RepositoryStatistics getLastStatistics( MetadataRepository metadataRepository, String repositoryId ) 071 throws MetadataRepositoryException 072 { 073 StopWatch stopWatch = new StopWatch(); 074 stopWatch.start(); 075 // TODO: consider a more efficient implementation that directly gets the last one from the content repository 076 List<String> scans = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID ); 077 if ( scans == null ) 078 { 079 return null; 080 } 081 Collections.sort( scans ); 082 if ( !scans.isEmpty() ) 083 { 084 String name = scans.get( scans.size() - 1 ); 085 RepositoryStatistics repositoryStatistics = 086 RepositoryStatistics.class.cast( metadataRepository.getMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID, 087 name )); 088 stopWatch.stop(); 089 log.debug( "time to find last RepositoryStatistics: {} ms", stopWatch.getTime() ); 090 return repositoryStatistics; 091 } 092 else 093 { 094 return null; 095 } 096 } 097 098 private void walkRepository( MetadataRepository metadataRepository, RepositoryStatistics stats, String repositoryId, 099 String ns ) 100 throws MetadataResolutionException 101 { 102 for ( String namespace : metadataRepository.getNamespaces( repositoryId, ns ) ) 103 { 104 walkRepository( metadataRepository, stats, repositoryId, ns + "." + namespace ); 105 } 106 107 Collection<String> projects = metadataRepository.getProjects( repositoryId, ns ); 108 if ( !projects.isEmpty() ) 109 { 110 stats.setTotalGroupCount( stats.getTotalGroupCount() + 1 ); 111 stats.setTotalProjectCount( stats.getTotalProjectCount() + projects.size() ); 112 113 for ( String project : projects ) 114 { 115 for ( String version : metadataRepository.getProjectVersions( repositoryId, ns, project ) ) 116 { 117 for ( ArtifactMetadata artifact : metadataRepository.getArtifacts( repositoryId, ns, project, 118 version ) ) 119 { 120 stats.setTotalArtifactCount( stats.getTotalArtifactCount() + 1 ); 121 stats.setTotalArtifactFileSize( stats.getTotalArtifactFileSize() + artifact.getSize() ); 122 123 MavenArtifactFacet facet = 124 (MavenArtifactFacet) artifact.getFacet( MavenArtifactFacet.FACET_ID ); 125 if ( facet != null ) 126 { 127 String type = facet.getType(); 128 stats.setTotalCountForType( type, stats.getTotalCountForType( type ) + 1 ); 129 } 130 } 131 } 132 } 133 } 134 } 135 136 @Override 137 public void addStatisticsAfterScan( MetadataRepository metadataRepository, String repositoryId, Date startTime, 138 Date endTime, long totalFiles, long newFiles ) 139 throws MetadataRepositoryException 140 { 141 RepositoryStatistics repositoryStatistics = new RepositoryStatistics(); 142 repositoryStatistics.setRepositoryId( repositoryId ); 143 repositoryStatistics.setScanStartTime( startTime ); 144 repositoryStatistics.setScanEndTime( endTime ); 145 repositoryStatistics.setTotalFileCount( totalFiles ); 146 repositoryStatistics.setNewFileCount( newFiles ); 147 148 // TODO 149 // In the future, instead of being tied to a scan we might want to record information in the fly based on 150 // events that are occurring. Even without these totals we could query much of the information on demand based 151 // on information from the metadata content repository. In the mean time, we lock information in at scan time. 152 // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not 153 // be updated and the repository will need to be rescanned. 154 155 long startGather = System.currentTimeMillis(); 156 157 // FIXME what about other implementations ? 158 159 if ( metadataRepository.canObtainAccess( Session.class ) ) 160 { 161 // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead 162 // depend directly on the plugin and interrogate the JCR repository's knowledge of the structure? 163 populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId, 164 repositoryStatistics ); 165 } 166 else 167 { 168 // TODO: 169 // if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could 170 // build an index, or store the aggregate information and update it on the fly. We can perhaps even walk 171 // but retrieve less information to speed it up. In the mean time, we walk the repository using the 172 // standard APIs 173 populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics ); 174 } 175 176 log.info( "Gathering statistics executed in {} ms", ( System.currentTimeMillis() - startGather ) ); 177 178 metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics ); 179 } 180 181 private void populateStatisticsFromJcr( Session session, String repositoryId, 182 RepositoryStatistics repositoryStatistics ) 183 throws MetadataRepositoryException 184 { 185 // TODO: these may be best as running totals, maintained by observations on the properties in JCR 186 187 try 188 { 189 QueryManager queryManager = session.getWorkspace().getQueryManager(); 190 191 // TODO: JCR-SQL2 query will not complete on a large repo in Jackrabbit 2.2.0 - see JCR-2835 192 // Using the JCR-SQL2 variants gives 193 // "org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to 1024" 194// String whereClause = "WHERE ISDESCENDANTNODE([/repositories/" + repositoryId + "/content])"; 195// Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact] " + whereClause, 196// Query.JCR_SQL2 ); 197 String whereClause = "WHERE jcr:path LIKE '/repositories/" + repositoryId + "/content/%'"; 198 Query query = queryManager.createQuery( "SELECT size FROM archiva:artifact " + whereClause, Query.SQL ); 199 200 QueryResult queryResult = query.execute(); 201 202 Map<String, Integer> totalByType = new HashMap<>(); 203 long totalSize = 0, totalArtifacts = 0; 204 for ( Row row : JcrUtils.getRows( queryResult ) ) 205 { 206 Node n = row.getNode(); 207 totalSize += row.getValue( "size" ).getLong(); 208 209 String type; 210 if ( n.hasNode( MavenArtifactFacet.FACET_ID ) ) 211 { 212 Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID ); 213 type = facetNode.getProperty( "type" ).getString(); 214 } 215 else 216 { 217 type = "Other"; 218 } 219 Integer prev = totalByType.get( type ); 220 totalByType.put( type, prev != null ? prev + 1 : 1 ); 221 222 totalArtifacts++; 223 } 224 225 repositoryStatistics.setTotalArtifactCount( totalArtifacts ); 226 repositoryStatistics.setTotalArtifactFileSize( totalSize ); 227 for ( Map.Entry<String, Integer> entry : totalByType.entrySet() ) 228 { 229 repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() ); 230 } 231 232 // The query ordering is a trick to ensure that the size is correct, otherwise due to lazy init it will be -1 233// query = queryManager.createQuery( "SELECT * FROM [archiva:project] " + whereClause, Query.JCR_SQL2 ); 234 query = queryManager.createQuery( "SELECT * FROM archiva:project " + whereClause + " ORDER BY jcr:score", 235 Query.SQL ); 236 repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() ); 237 238// query = queryManager.createQuery( 239// "SELECT * FROM [archiva:namespace] " + whereClause + " AND namespace IS NOT NULL", Query.JCR_SQL2 ); 240 query = queryManager.createQuery( 241 "SELECT * FROM archiva:namespace " + whereClause + " AND namespace IS NOT NULL ORDER BY jcr:score", 242 Query.SQL ); 243 repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() ); 244 } 245 catch ( RepositoryException e ) 246 { 247 throw new MetadataRepositoryException( e.getMessage(), e ); 248 } 249 } 250 251 private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId, 252 RepositoryStatistics repositoryStatistics ) 253 throws MetadataRepositoryException 254 { 255 try 256 { 257 for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) ) 258 { 259 walkRepository( metadataRepository, repositoryStatistics, repositoryId, ns ); 260 } 261 } 262 catch ( MetadataResolutionException e ) 263 { 264 throw new MetadataRepositoryException( e.getMessage(), e ); 265 } 266 } 267 268 @Override 269 public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId ) 270 throws MetadataRepositoryException 271 { 272 metadataRepository.removeMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID ); 273 } 274 275 @Override 276 public List<RepositoryStatistics> getStatisticsInRange( MetadataRepository metadataRepository, String repositoryId, 277 Date startTime, Date endTime ) 278 throws MetadataRepositoryException 279 { 280 List<RepositoryStatistics> results = new ArrayList<>(); 281 List<String> list = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID ); 282 Collections.sort( list, Collections.reverseOrder() ); 283 for ( String name : list ) 284 { 285 try 286 { 287 Date date = createNameFormat().parse( name ); 288 if ( ( startTime == null || !date.before( startTime ) ) && ( endTime == null || !date.after( 289 endTime ) ) ) 290 { 291 RepositoryStatistics stats = 292 (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId, 293 RepositoryStatistics.FACET_ID, 294 name ); 295 results.add( stats ); 296 } 297 } 298 catch ( ParseException e ) 299 { 300 log.error( "Invalid scan result found in the metadata repository: " + e.getMessage() ); 301 // continue and ignore this one 302 } 303 } 304 return results; 305 } 306 307 private static SimpleDateFormat createNameFormat() 308 { 309 SimpleDateFormat fmt = new SimpleDateFormat( RepositoryStatistics.SCAN_TIMESTAMP_FORMAT ); 310 fmt.setTimeZone( UTC_TIME_ZONE ); 311 return fmt; 312 } 313}