This project has retired. For details please refer to its Attic page.
Source code
001package org.apache.archiva.metadata.repository.stats;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import org.apache.archiva.metadata.model.ArtifactMetadata;
023import org.apache.archiva.metadata.model.maven2.MavenArtifactFacet;
024import org.apache.archiva.metadata.repository.MetadataRepository;
025import org.apache.archiva.metadata.repository.MetadataRepositoryException;
026import org.apache.archiva.metadata.repository.MetadataResolutionException;
027import org.apache.commons.lang.time.StopWatch;
028import org.apache.jackrabbit.commons.JcrUtils;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031import org.springframework.stereotype.Service;
032
033import java.text.ParseException;
034import java.text.SimpleDateFormat;
035import java.util.ArrayList;
036import java.util.Collection;
037import java.util.Collections;
038import java.util.Date;
039import java.util.HashMap;
040import java.util.List;
041import java.util.Map;
042import java.util.TimeZone;
043import javax.jcr.Node;
044import javax.jcr.RepositoryException;
045import javax.jcr.Session;
046import javax.jcr.query.Query;
047import javax.jcr.query.QueryManager;
048import javax.jcr.query.QueryResult;
049import javax.jcr.query.Row;
050
051/**
052 *
053 */
054@Service("repositoryStatisticsManager#default")
055public class DefaultRepositoryStatisticsManager
056    implements RepositoryStatisticsManager
057{
058    private static final Logger log = LoggerFactory.getLogger( DefaultRepositoryStatisticsManager.class );
059
060    private static final TimeZone UTC_TIME_ZONE = TimeZone.getTimeZone( "UTC" );
061
062    @Override
063    public boolean hasStatistics( MetadataRepository metadataRepository, String repositoryId )
064        throws MetadataRepositoryException
065    {
066        return metadataRepository.hasMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID );
067    }
068
069    @Override
070    public RepositoryStatistics getLastStatistics( MetadataRepository metadataRepository, String repositoryId )
071        throws MetadataRepositoryException
072    {
073        StopWatch stopWatch = new StopWatch();
074        stopWatch.start();
075        // TODO: consider a more efficient implementation that directly gets the last one from the content repository
076        List<String> scans = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
077        if ( scans == null )
078        {
079            return null;
080        }
081        Collections.sort( scans );
082        if ( !scans.isEmpty() )
083        {
084            String name = scans.get( scans.size() - 1 );
085            RepositoryStatistics repositoryStatistics =
086                RepositoryStatistics.class.cast( metadataRepository.getMetadataFacet( repositoryId, RepositoryStatistics.FACET_ID,
087                                                                            name ));
088            stopWatch.stop();
089            log.debug( "time to find last RepositoryStatistics: {} ms", stopWatch.getTime() );
090            return repositoryStatistics;
091        }
092        else
093        {
094            return null;
095        }
096    }
097
098    private void walkRepository( MetadataRepository metadataRepository, RepositoryStatistics stats, String repositoryId,
099                                 String ns )
100        throws MetadataResolutionException
101    {
102        for ( String namespace : metadataRepository.getNamespaces( repositoryId, ns ) )
103        {
104            walkRepository( metadataRepository, stats, repositoryId, ns + "." + namespace );
105        }
106
107        Collection<String> projects = metadataRepository.getProjects( repositoryId, ns );
108        if ( !projects.isEmpty() )
109        {
110            stats.setTotalGroupCount( stats.getTotalGroupCount() + 1 );
111            stats.setTotalProjectCount( stats.getTotalProjectCount() + projects.size() );
112
113            for ( String project : projects )
114            {
115                for ( String version : metadataRepository.getProjectVersions( repositoryId, ns, project ) )
116                {
117                    for ( ArtifactMetadata artifact : metadataRepository.getArtifacts( repositoryId, ns, project,
118                                                                                       version ) )
119                    {
120                        stats.setTotalArtifactCount( stats.getTotalArtifactCount() + 1 );
121                        stats.setTotalArtifactFileSize( stats.getTotalArtifactFileSize() + artifact.getSize() );
122
123                        MavenArtifactFacet facet =
124                            (MavenArtifactFacet) artifact.getFacet( MavenArtifactFacet.FACET_ID );
125                        if ( facet != null )
126                        {
127                            String type = facet.getType();
128                            stats.setTotalCountForType( type, stats.getTotalCountForType( type ) + 1 );
129                        }
130                    }
131                }
132            }
133        }
134    }
135
136    @Override
137    public void addStatisticsAfterScan( MetadataRepository metadataRepository, String repositoryId, Date startTime,
138                                        Date endTime, long totalFiles, long newFiles )
139        throws MetadataRepositoryException
140    {
141        RepositoryStatistics repositoryStatistics = new RepositoryStatistics();
142        repositoryStatistics.setRepositoryId( repositoryId );
143        repositoryStatistics.setScanStartTime( startTime );
144        repositoryStatistics.setScanEndTime( endTime );
145        repositoryStatistics.setTotalFileCount( totalFiles );
146        repositoryStatistics.setNewFileCount( newFiles );
147
148        // TODO
149        // In the future, instead of being tied to a scan we might want to record information in the fly based on
150        // events that are occurring. Even without these totals we could query much of the information on demand based
151        // on information from the metadata content repository. In the mean time, we lock information in at scan time.
152        // Note that if new types are later discoverable due to a code change or new plugin, historical stats will not
153        // be updated and the repository will need to be rescanned.
154
155        long startGather = System.currentTimeMillis();
156
157        // FIXME what about other implementations ?
158
159        if ( metadataRepository.canObtainAccess( Session.class ) )
160        {
161            // TODO: this is currently very raw and susceptible to changes in content structure. Should we instead
162            //   depend directly on the plugin and interrogate the JCR repository's knowledge of the structure?
163            populateStatisticsFromJcr( (Session) metadataRepository.obtainAccess( Session.class ), repositoryId,
164                                       repositoryStatistics );
165        }
166        else
167        {
168            // TODO:
169            //   if the file repository is used more permanently, we may seek a more efficient mechanism - e.g. we could
170            //   build an index, or store the aggregate information and update it on the fly. We can perhaps even walk
171            //   but retrieve less information to speed it up. In the mean time, we walk the repository using the
172            //   standard APIs
173            populateStatisticsFromRepositoryWalk( metadataRepository, repositoryId, repositoryStatistics );
174        }
175
176        log.info( "Gathering statistics executed in {} ms",  ( System.currentTimeMillis() - startGather ) );
177
178        metadataRepository.addMetadataFacet( repositoryId, repositoryStatistics );
179    }
180
181    private void populateStatisticsFromJcr( Session session, String repositoryId,
182                                            RepositoryStatistics repositoryStatistics )
183        throws MetadataRepositoryException
184    {
185        // TODO: these may be best as running totals, maintained by observations on the properties in JCR
186
187        try
188        {
189            QueryManager queryManager = session.getWorkspace().getQueryManager();
190
191            // TODO: JCR-SQL2 query will not complete on a large repo in Jackrabbit 2.2.0 - see JCR-2835
192            //    Using the JCR-SQL2 variants gives
193            //      "org.apache.lucene.search.BooleanQuery$TooManyClauses: maxClauseCount is set to 1024"
194//            String whereClause = "WHERE ISDESCENDANTNODE([/repositories/" + repositoryId + "/content])";
195//            Query query = queryManager.createQuery( "SELECT size FROM [archiva:artifact] " + whereClause,
196//                                                    Query.JCR_SQL2 );
197            String whereClause = "WHERE jcr:path LIKE '/repositories/" + repositoryId + "/content/%'";
198            Query query = queryManager.createQuery( "SELECT size FROM archiva:artifact " + whereClause, Query.SQL );
199
200            QueryResult queryResult = query.execute();
201
202            Map<String, Integer> totalByType = new HashMap<>();
203            long totalSize = 0, totalArtifacts = 0;
204            for ( Row row : JcrUtils.getRows( queryResult ) )
205            {
206                Node n = row.getNode();
207                totalSize += row.getValue( "size" ).getLong();
208
209                String type;
210                if ( n.hasNode( MavenArtifactFacet.FACET_ID ) )
211                {
212                    Node facetNode = n.getNode( MavenArtifactFacet.FACET_ID );
213                    type = facetNode.getProperty( "type" ).getString();
214                }
215                else
216                {
217                    type = "Other";
218                }
219                Integer prev = totalByType.get( type );
220                totalByType.put( type, prev != null ? prev + 1 : 1 );
221
222                totalArtifacts++;
223            }
224
225            repositoryStatistics.setTotalArtifactCount( totalArtifacts );
226            repositoryStatistics.setTotalArtifactFileSize( totalSize );
227            for ( Map.Entry<String, Integer> entry : totalByType.entrySet() )
228            {
229                repositoryStatistics.setTotalCountForType( entry.getKey(), entry.getValue() );
230            }
231
232            // The query ordering is a trick to ensure that the size is correct, otherwise due to lazy init it will be -1
233//            query = queryManager.createQuery( "SELECT * FROM [archiva:project] " + whereClause, Query.JCR_SQL2 );
234            query = queryManager.createQuery( "SELECT * FROM archiva:project " + whereClause + " ORDER BY jcr:score",
235                                              Query.SQL );
236            repositoryStatistics.setTotalProjectCount( query.execute().getRows().getSize() );
237
238//            query = queryManager.createQuery(
239//                "SELECT * FROM [archiva:namespace] " + whereClause + " AND namespace IS NOT NULL", Query.JCR_SQL2 );
240            query = queryManager.createQuery(
241                "SELECT * FROM archiva:namespace " + whereClause + " AND namespace IS NOT NULL ORDER BY jcr:score",
242                Query.SQL );
243            repositoryStatistics.setTotalGroupCount( query.execute().getRows().getSize() );
244        }
245        catch ( RepositoryException e )
246        {
247            throw new MetadataRepositoryException( e.getMessage(), e );
248        }
249    }
250
251    private void populateStatisticsFromRepositoryWalk( MetadataRepository metadataRepository, String repositoryId,
252                                                       RepositoryStatistics repositoryStatistics )
253        throws MetadataRepositoryException
254    {
255        try
256        {
257            for ( String ns : metadataRepository.getRootNamespaces( repositoryId ) )
258            {
259                walkRepository( metadataRepository, repositoryStatistics, repositoryId, ns );
260            }
261        }
262        catch ( MetadataResolutionException e )
263        {
264            throw new MetadataRepositoryException( e.getMessage(), e );
265        }
266    }
267
268    @Override
269    public void deleteStatistics( MetadataRepository metadataRepository, String repositoryId )
270        throws MetadataRepositoryException
271    {
272        metadataRepository.removeMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
273    }
274
275    @Override
276    public List<RepositoryStatistics> getStatisticsInRange( MetadataRepository metadataRepository, String repositoryId,
277                                                            Date startTime, Date endTime )
278        throws MetadataRepositoryException
279    {
280        List<RepositoryStatistics> results = new ArrayList<>();
281        List<String> list = metadataRepository.getMetadataFacets( repositoryId, RepositoryStatistics.FACET_ID );
282        Collections.sort( list, Collections.reverseOrder() );
283        for ( String name : list )
284        {
285            try
286            {
287                Date date = createNameFormat().parse( name );
288                if ( ( startTime == null || !date.before( startTime ) ) && ( endTime == null || !date.after(
289                    endTime ) ) )
290                {
291                    RepositoryStatistics stats =
292                        (RepositoryStatistics) metadataRepository.getMetadataFacet( repositoryId,
293                                                                                    RepositoryStatistics.FACET_ID,
294                                                                                    name );
295                    results.add( stats );
296                }
297            }
298            catch ( ParseException e )
299            {
300                log.error( "Invalid scan result found in the metadata repository: " + e.getMessage() );
301                // continue and ignore this one
302            }
303        }
304        return results;
305    }
306
307    private static SimpleDateFormat createNameFormat()
308    {
309        SimpleDateFormat fmt = new SimpleDateFormat( RepositoryStatistics.SCAN_TIMESTAMP_FORMAT );
310        fmt.setTimeZone( UTC_TIME_ZONE );
311        return fmt;
312    }
313}