This project has retired. For details please refer to its Attic page.
Source code
001package org.apache.archiva.reports.consumers;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *  http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import org.apache.archiva.admin.model.beans.ManagedRepository;
023import org.apache.archiva.checksum.ChecksumAlgorithm;
024import org.apache.archiva.checksum.ChecksummedFile;
025import org.apache.archiva.configuration.ArchivaConfiguration;
026import org.apache.archiva.configuration.ConfigurationNames;
027import org.apache.archiva.configuration.FileTypes;
028import org.apache.archiva.consumers.AbstractMonitoredConsumer;
029import org.apache.archiva.consumers.ConsumerException;
030import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
031import org.apache.archiva.metadata.model.ArtifactMetadata;
032import org.apache.archiva.metadata.repository.MetadataRepository;
033import org.apache.archiva.metadata.repository.MetadataRepositoryException;
034import org.apache.archiva.metadata.repository.RepositorySession;
035import org.apache.archiva.metadata.repository.RepositorySessionFactory;
036import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
037import org.apache.archiva.redback.components.registry.Registry;
038import org.apache.archiva.redback.components.registry.RegistryListener;
039import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet;
040import org.apache.commons.collections.CollectionUtils;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043import org.springframework.context.annotation.Scope;
044import org.springframework.stereotype.Service;
045
046import javax.annotation.PostConstruct;
047import javax.inject.Inject;
048import javax.inject.Named;
049import java.io.File;
050import java.io.IOException;
051import java.util.ArrayList;
052import java.util.Collection;
053import java.util.Collections;
054import java.util.Date;
055import java.util.List;
056
057/**
058 * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts.
059 * <p>
060 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
061 */
062@Service ( "knownRepositoryContentConsumer#duplicate-artifacts" )
063@Scope ( "prototype" )
064public class DuplicateArtifactsConsumer
065    extends AbstractMonitoredConsumer
066    implements KnownRepositoryContentConsumer, RegistryListener
067{
068    private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
069
070    private String id = "duplicate-artifacts";
071
072    private String description = "Check for Duplicate Artifacts via SHA1 Checksums";
073
074    @Inject
075    private ArchivaConfiguration configuration;
076
077    @Inject
078    private FileTypes filetypes;
079
080    /**
081     * FIXME: this could be multiple implementations and needs to be configured.
082     */
083    @Inject
084    private RepositorySessionFactory repositorySessionFactory;
085
086    private List<String> includes = new ArrayList<>();
087
088    private File repositoryDir;
089
090    private String repoId;
091
092    /**
093     * FIXME: needs to be selected based on the repository in question
094     */
095    @Inject
096    @Named ( value = "repositoryPathTranslator#maven2" )
097    private RepositoryPathTranslator pathTranslator;
098
099    private RepositorySession repositorySession;
100
101    @Override
102    public String getId()
103    {
104        return id;
105    }
106
107    @Override
108    public String getDescription()
109    {
110        return description;
111    }
112
113    @Override
114    public List<String> getIncludes()
115    {
116        return includes;
117    }
118
119    @Override
120    public List<String> getExcludes()
121    {
122        return Collections.emptyList();
123    }
124
125    @Override
126    public void beginScan( ManagedRepository repo, Date whenGathered )
127        throws ConsumerException
128    {
129        repoId = repo.getId();
130        this.repositoryDir = new File( repo.getLocation() );
131        repositorySession = repositorySessionFactory.createSession();
132    }
133
134    @Override
135    public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo )
136        throws ConsumerException
137    {
138        beginScan( repo, whenGathered );
139    }
140
141    @Override
142    public void processFile( String path )
143        throws ConsumerException
144    {
145        File artifactFile = new File( this.repositoryDir, path );
146
147        // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
148        //  perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
149        //  alternatively this could come straight from the storage resolver, which could populate the artifact metadata
150        //  in the later parse call with the desired checksum and use that
151        String checksumSha1;
152        ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile );
153        try
154        {
155            checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
156        }
157        catch ( IOException e )
158        {
159            throw new ConsumerException( e.getMessage(), e );
160        }
161
162        MetadataRepository metadataRepository = repositorySession.getRepository();
163
164        Collection<ArtifactMetadata> results;
165        try
166        {
167            results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 );
168        }
169        catch ( MetadataRepositoryException e )
170        {
171            repositorySession.close();
172            throw new ConsumerException( e.getMessage(), e );
173        }
174
175        if ( CollectionUtils.isNotEmpty( results ) )
176        {
177            ArtifactMetadata originalArtifact;
178            try
179            {
180                originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
181            }
182            catch ( Exception e )
183            {
184                log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() );
185                return;
186            }
187
188            for ( ArtifactMetadata dupArtifact : results )
189            {
190                String id = path.substring( path.lastIndexOf( '/' ) + 1 );
191                if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
192                    originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
193                    originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
194                    originalArtifact.getVersion() ) )
195                {
196                    // Skip reference to itself.
197
198                    log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path );
199
200                    continue;
201                }
202
203                RepositoryProblemFacet problem = new RepositoryProblemFacet();
204                problem.setRepositoryId( repoId );
205                problem.setNamespace( originalArtifact.getNamespace() );
206                problem.setProject( originalArtifact.getProject() );
207                problem.setVersion( originalArtifact.getVersion() );
208                problem.setId( id );
209                // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
210                //       a different type
211                // FIXME: we need the project version here, not the artifact version
212                problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
213                    dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
214                    dupArtifact.getId() ) );
215                problem.setProblem( "duplicate-artifact" );
216
217                try
218                {
219                    metadataRepository.addMetadataFacet( repoId, problem );
220                }
221                catch ( MetadataRepositoryException e )
222                {
223                    throw new ConsumerException( e.getMessage(), e );
224                }
225            }
226        }
227    }
228
229    @Override
230    public void processFile( String path, boolean executeOnEntireRepo )
231        throws ConsumerException
232    {
233        processFile( path );
234    }
235
236    @Override
237    public void completeScan()
238    {
239        repositorySession.close();
240    }
241
242    @Override
243    public void completeScan( boolean executeOnEntireRepo )
244    {
245        completeScan();
246    }
247
248    @Override
249    public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
250    {
251        if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
252        {
253            initIncludes();
254        }
255    }
256
257    @Override
258    public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
259    {
260        /* do nothing */
261    }
262
263    private void initIncludes()
264    {
265        includes.clear();
266
267        includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
268    }
269
270    @PostConstruct
271    public void initialize()
272    {
273        initIncludes();
274        configuration.addChangeListener( this );
275    }
276}