This project has retired. For details please refer to its Attic page.
DefaultRepositoryStatisticsManager xref
View Javadoc
1   package org.apache.archiva.metadata.repository.stats;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.archiva.metadata.model.ArtifactMetadata;
23  import org.apache.archiva.metadata.model.maven2.MavenArtifactFacet;
24  import org.apache.archiva.metadata.repository.MetadataRepository;
25  import org.apache.archiva.metadata.repository.MetadataRepositoryException;
26  import org.apache.archiva.metadata.repository.MetadataResolutionException;
27  import org.apache.commons.lang.time.StopWatch;
28  import org.apache.jackrabbit.commons.JcrUtils;
29  import org.slf4j.Logger;
30  import org.slf4j.LoggerFactory;
31  import org.springframework.stereotype.Service;
32  
33  import java.text.ParseException;
34  import java.text.SimpleDateFormat;
35  import java.util.ArrayList;
36  import java.util.Collection;
37  import java.util.Collections;
38  import java.util.Date;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Map;
42  import java.util.TimeZone;
43  import javax.jcr.Node;
44  import javax.jcr.RepositoryException;
45  import javax.jcr.Session;
46  import javax.jcr.query.Query;
47  import javax.jcr.query.QueryManager;
48  import javax.jcr.query.QueryResult;
49  import javax.jcr.query.Row;
50  
51  /**
52   *
53   */
54  @Service("repositoryStatisticsManager#default")
55  public class DefaultRepositoryStatisticsManager
56      implements RepositoryStatisticsManager
57  {
58      private static final Logger log = LoggerFactory.getLogger( DefaultRepositoryStatisticsManager.class );
59  
60      private static final TimeZone UTC_TIME_ZONE = TimeZone.getTimeZone( "UTC" );
61  
62      @Override
63      public boolean hasStatistics( MetadataRepository metadataRepository, String repositoryId )
64          throws MetadataRepositoryException
65      {
66          return metadataRepository.hasMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID );
67      }
68  
69      @Override
70      public RepositoryStatistics getLastStatistics( MetadataRepository metadataRepository, String repositoryId )
71          throws MetadataRepositoryException
72      {
73          StopWatch stopWatch = new StopWatch();
74          stopWatch.start();
75          // TODO: consider a more efficient implementation that directly gets the last one from the content repository
76          List<String> scans = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
77          if ( scans == null )
78          {
79              return null;
80          }
81          Collections.sort( scans );
82          if ( !scans.isEmpty() )
83          {
84              String name = scans.get( scans.size() - 1 );
85              RepositoryStatistics repositoryStatistics =
86                  RepositoryStatistics.class.cast( metadataRepository.getMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID,
87                                                                              name ));
88              stopWatch.stop();
89              log.debug( "time to find last RepositoryStatistics: {} ms", stopWatch.getTime() );
90              return repositoryStatistics;
91          }
92          else
93          {
94              return null;
95          }
96      }
97  
98      private void walkRepository( MetadataRepository metadataRepository, RepositoryStatistics stats, String repositoryId,
99                                   String ns )
100         throws MetadataResolutionException
101     {
102         for ( String namespace : metadataRepository.getNamespaces( repositoryId, ns ) )
103         {
104             walkRepository( metadataRepository, stats, repositoryId, ns + "." + namespace );
105         }
106 
107         Collection<String> projects = metadataRepository.getProjects( repositoryId, ns );
108         if ( !projects.isEmpty() )
109         {
110             stats.setTotalGroupCount( stats.getTotalGroupCount() + 1 );
111             stats.setTotalProjectCount( stats.getTotalProjectCount() + projects.size() );
112 
113             for ( String project : projects )
114             {
115                 for ( String version : metadataRepository.getProjectVersions( repositoryId, ns, project ) )
116                 {
117                     for ( ArtifactMetadata artifact : metadataRepository.getArtifacts( repositoryId, ns, project,
118                                                                                        version ) )
119                     {
120                         stats.setTotalArtifactCount( stats.getTotalArtifactCount() + 1 );
121                         stats.setTotalArtifactFileSize( stats.getTotalArtifactFileSize() + artifact.getSize() );
122 
123                         MavenArtifactFacet facet =
124                             (MavenArtifactFacet) artifact.getFacet( MavenArtifactFacet.FACET_ID );
125                         if ( facet != null )
126                         {
127                             String type = facet.getType();
128                             stats.setTotalCountForType( type, stats.getTotalCountForType( type ) + 1 );
129                         }
130                     }
131                 }
132             }
133         }
134     }
135 
136     @Override
137     public void addStatisticsAfterScan( MetadataRepository metadataRepository, String repositoryId, Date startTime,
138                                         Date endTime, long totalFiles, long newFiles )
139         throws MetadataRepositoryException
140     {
141         RepositoryStatistics repositoryStatistics = new RepositoryStatistics();
142         repositoryStatistics.setRepositoryId( repositoryId );
143         repositoryStatistics.setScanStartTime( startTime );
144         repositoryStatistics.setScanEndTime( endTime );
145         repositoryStatistics.setTotalFileCount( totalFiles );
146         repositoryStatistics.setNewFileCount( newFiles );
147 
148         // TODO
149         // In the future, instead of being tied to a scan we might want to record information in the fly based on
150         // events that are occurring. Even without these totals we could query much of the information on demand based
151         // on information from the metadata content repository. In the mean time, we lock information in at scan time.
152         // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not
153         // be updated and the repository will need to be rescanned.
154 
155         long startGather = System.currentTimeMillis();
156 
157         // FIXME what about other implementations ?
158 
159         if ( metadataRepository.canObtainAccess( Session.class ) )
160         {
161             // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead
162             //   depend directly on the plugin and interrogate the JCR repository's knowledge of the structure?
163             populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId,
164                                        repositoryStatistics );
165         }
166         else
167         {
168             // TODO:
169             //   if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could
170             //   build an index, or store the aggregate information and update it on the fly. We can perhaps even walk
171             //   but retrieve less information to speed it up. In the mean time, we walk the repository using the
172             //   standard APIs
173             populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics );
174         }
175 
176         log.info( "Gathering statistics executed in {} ms",  ( System.currentTimeMillis() - startGather ) );
177 
178         metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics );
179     }
180 
181     private void populateStatisticsFromJcr( Session session, String repositoryId,
182                                             RepositoryStatistics repositoryStatistics )
183         throws MetadataRepositoryException
184     {
185         // TODO: these may be best as running totals, maintained by observations on the properties in JCR
186 
187         try
188         {
189             QueryManager queryManager = session.getWorkspace().getQueryManager();
190 
191             // TODO: JCR-SQL2 query will not complete on a large repo in Jackrabbit 2.2.0 - see JCR-2835
192             //    Using the JCR-SQL2 variants gives
193             //      "org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to 1024"
194 //            String whereClause = "WHERE ISDESCENDANTNODE([/repositories/" + repositoryId + "/content])";
195 //            Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact] " + whereClause,
196 //                                                    Query.JCR_SQL2 );
197             String whereClause = "WHERE jcr:path LIKE '/repositories/" + repositoryId + "/content/%'";
198             Query query = queryManager.createQuery( "SELECT size FROM archiva:artifact " + whereClause, Query.SQL );
199 
200             QueryResult queryResult = query.execute();
201 
202             Map<String, Integer> totalByType = new HashMap<>();
203             long totalSize = 0, totalArtifacts = 0;
204             for ( Row row : JcrUtils.getRows( queryResult ) )
205             {
206                 Node n = row.getNode();
207                 totalSize += row.getValue( "size" ).getLong();
208 
209                 String type;
210                 if ( n.hasNode( MavenArtifactFacet.FACET_ID ) )
211                 {
212                     Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID );
213                     type = facetNode.getProperty( "type" ).getString();
214                 }
215                 else
216                 {
217                     type = "Other";
218                 }
219                 Integer prev = totalByType.get( type );
220                 totalByType.put( type, prev != null ? prev + 1 : 1 );
221 
222                 totalArtifacts++;
223             }
224 
225             repositoryStatistics.setTotalArtifactCount( totalArtifacts );
226             repositoryStatistics.setTotalArtifactFileSize( totalSize );
227             for ( Map.Entry<String, Integer> entry : totalByType.entrySet() )
228             {
229                 repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() );
230             }
231 
232             // The query ordering is a trick to ensure that the size is correct, otherwise due to lazy init it will be -1
233 //            query = queryManager.createQuery( "SELECT * FROM [archiva:project] " + whereClause, Query.JCR_SQL2 );
234             query = queryManager.createQuery( "SELECT * FROM archiva:project " + whereClause + " ORDER BY jcr:score",
235                                               Query.SQL );
236             repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() );
237 
238 //            query = queryManager.createQuery(
239 //                "SELECT * FROM [archiva:namespace] " + whereClause + " AND namespace IS NOT NULL", Query.JCR_SQL2 );
240             query = queryManager.createQuery(
241                 "SELECT * FROM archiva:namespace " + whereClause + " AND namespace IS NOT NULL ORDER BY jcr:score",
242                 Query.SQL );
243             repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() );
244         }
245         catch ( RepositoryException e )
246         {
247             throw new MetadataRepositoryException( e.getMessage(), e );
248         }
249     }
250 
251     private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId,
252                                                        RepositoryStatistics repositoryStatistics )
253         throws MetadataRepositoryException
254     {
255         try
256         {
257             for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) )
258             {
259                 walkRepository( metadataRepository, repositoryStatistics, repositoryId, ns );
260             }
261         }
262         catch ( MetadataResolutionException e )
263         {
264             throw new MetadataRepositoryException( e.getMessage(), e );
265         }
266     }
267 
268     @Override
269     public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId )
270         throws MetadataRepositoryException
271     {
272         metadataRepository.removeMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
273     }
274 
275     @Override
276     public List<RepositoryStatistics> getStatisticsInRange( MetadataRepository metadataRepository, String repositoryId,
277                                                             Date startTime, Date endTime )
278         throws MetadataRepositoryException
279     {
280         List<RepositoryStatistics> results = new ArrayList<>();
281         List<String> list = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
282         Collections.sort( list, Collections.reverseOrder() );
283         for ( String name : list )
284         {
285             try
286             {
287                 Date date = createNameFormat().parse( name );
288                 if ( ( startTime == null || !date.before( startTime ) ) && ( endTime == null || !date.after(
289                     endTime ) ) )
290                 {
291                     RepositoryStatistics stats =
292                         (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId,
293                                                                                     RepositoryStatistics.FACET_ID,
294                                                                                     name );
295                     results.add( stats );
296                 }
297             }
298             catch ( ParseException e )
299             {
300                 log.error( "Invalid scan result found in the metadata repository: " + e.getMessage() );
301                 // continue and ignore this one
302             }
303         }
304         return results;
305     }
306 
307     private static SimpleDateFormat createNameFormat()
308     {
309         SimpleDateFormat fmt = new SimpleDateFormat( RepositoryStatistics.SCAN_TIMESTAMP_FORMAT );
310         fmt.setTimeZone( UTC_TIME_ZONE );
311         return fmt;
312     }
313 }