This project has retired. For details please refer to its Attic page.
Source code
001package org.apache.archiva.reports.consumers;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *  http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import org.apache.archiva.checksum.ChecksumAlgorithm;
023import org.apache.archiva.checksum.ChecksummedFile;
024import org.apache.archiva.configuration.ArchivaConfiguration;
025import org.apache.archiva.configuration.ConfigurationNames;
026import org.apache.archiva.configuration.FileTypes;
027import org.apache.archiva.consumers.AbstractMonitoredConsumer;
028import org.apache.archiva.consumers.ConsumerException;
029import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
030import org.apache.archiva.metadata.model.ArtifactMetadata;
031import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet;
032import org.apache.archiva.metadata.repository.MetadataRepository;
033import org.apache.archiva.metadata.repository.MetadataRepositoryException;
034import org.apache.archiva.metadata.repository.RepositorySession;
035import org.apache.archiva.metadata.repository.RepositorySessionFactory;
036import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
037import org.apache.archiva.components.registry.Registry;
038import org.apache.archiva.components.registry.RegistryListener;
039import org.apache.archiva.repository.ManagedRepository;
040import org.apache.commons.collections4.CollectionUtils;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043import org.springframework.context.annotation.Scope;
044import org.springframework.stereotype.Service;
045
046import javax.annotation.PostConstruct;
047import javax.inject.Inject;
048import javax.inject.Named;
049import java.io.IOException;
050import java.nio.file.Path;
051import java.nio.file.Paths;
052import java.util.ArrayList;
053import java.util.Collection;
054import java.util.Collections;
055import java.util.Date;
056import java.util.List;
057
058/**
059 * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts.
060 * <p>
061 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
062 */
063@Service ( "knownRepositoryContentConsumer#duplicate-artifacts" )
064@Scope ( "prototype" )
065public class DuplicateArtifactsConsumer
066    extends AbstractMonitoredConsumer
067    implements KnownRepositoryContentConsumer, RegistryListener
068{
069    private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
070
071    private String id = "duplicate-artifacts";
072
073    private String description = "Check for Duplicate Artifacts via SHA1 Checksums";
074
075    @Inject
076    private ArchivaConfiguration configuration;
077
078    @Inject
079    private FileTypes filetypes;
080
081    /**
082     * FIXME: this could be multiple implementations and needs to be configured.
083     */
084    @Inject
085    private RepositorySessionFactory repositorySessionFactory;
086
087    private List<String> includes = new ArrayList<>();
088
089    private Path repositoryDir;
090
091    private String repoId;
092
093    /**
094     * FIXME: needs to be selected based on the repository in question
095     */
096    @Inject
097    @Named ( value = "repositoryPathTranslator#maven2" )
098    private RepositoryPathTranslator pathTranslator;
099
100
101    private RepositorySession repositorySession;
102
103    @Override
104    public String getId()
105    {
106        return id;
107    }
108
109    @Override
110    public String getDescription()
111    {
112        return description;
113    }
114
115    @Override
116    public List<String> getIncludes()
117    {
118        return includes;
119    }
120
121    @Override
122    public List<String> getExcludes()
123    {
124        return Collections.emptyList();
125    }
126
127    @Override
128    public void beginScan( ManagedRepository repo, Date whenGathered )
129        throws ConsumerException
130    {
131        repoId = repo.getId();
132        this.repositoryDir = Paths.get( repo.getLocation() );
133        try
134        {
135            repositorySession = repositorySessionFactory.createSession();
136        }
137        catch ( MetadataRepositoryException e )
138        {
139            e.printStackTrace( );
140        }
141    }
142
143    @Override
144    public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo )
145        throws ConsumerException
146    {
147        beginScan( repo, whenGathered );
148    }
149
150    @Override
151    public void processFile( String path )
152        throws ConsumerException
153    {
154        Path artifactFile = this.repositoryDir.resolve( path );
155
156        // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
157        //  perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
158        //  alternatively this could come straight from the storage resolver, which could populate the artifact metadata
159        //  in the later parse call with the desired checksum and use that
160        String checksumSha1;
161        ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile);
162        try
163        {
164            checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
165        }
166        catch ( IOException e )
167        {
168            throw new ConsumerException( e.getMessage(), e );
169        }
170
171        MetadataRepository metadataRepository = repositorySession.getRepository();
172
173        Collection<ArtifactMetadata> results;
174        try
175        {
176            results = metadataRepository.getArtifactsByChecksum(repositorySession , repoId, checksumSha1 );
177        }
178        catch ( MetadataRepositoryException e )
179        {
180            repositorySession.close();
181            throw new ConsumerException( e.getMessage(), e );
182        }
183
184        if ( CollectionUtils.isNotEmpty( results ) )
185        {
186            ArtifactMetadata originalArtifact;
187            try
188            {
189                originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
190            }
191            catch ( Exception e )
192            {
193                log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() );
194                return;
195            }
196
197            for ( ArtifactMetadata dupArtifact : results )
198            {
199                String id = path.substring( path.lastIndexOf( '/' ) + 1 );
200                if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
201                    originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
202                    originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
203                    originalArtifact.getVersion() ) )
204                {
205                    // Skip reference to itself.
206
207                    log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path );
208
209                    continue;
210                }
211
212                RepositoryProblemFacet problem = new RepositoryProblemFacet();
213                problem.setRepositoryId( repoId );
214                problem.setNamespace( originalArtifact.getNamespace() );
215                problem.setProject( originalArtifact.getProject() );
216                problem.setVersion( originalArtifact.getVersion() );
217                problem.setId( id );
218                // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
219                //       a different type
220                // FIXME: we need the project version here, not the artifact version
221                problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
222                    dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
223                    dupArtifact.getId() ) );
224                problem.setProblem( "duplicate-artifact" );
225
226                try
227                {
228                    metadataRepository.addMetadataFacet(repositorySession , repoId, problem );
229                }
230                catch ( MetadataRepositoryException e )
231                {
232                    throw new ConsumerException( e.getMessage(), e );
233                }
234            }
235        }
236    }
237
238    @Override
239    public void processFile( String path, boolean executeOnEntireRepo )
240        throws ConsumerException
241    {
242        processFile( path );
243    }
244
245    @Override
246    public void completeScan()
247    {
248        repositorySession.close();
249    }
250
251    @Override
252    public void completeScan( boolean executeOnEntireRepo )
253    {
254        completeScan();
255    }
256
257    @Override
258    public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
259    {
260        if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
261        {
262            initIncludes();
263        }
264    }
265
266    @Override
267    public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
268    {
269        /* do nothing */
270    }
271
272    private void initIncludes()
273    {
274        includes.clear();
275
276        includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
277    }
278
279    @PostConstruct
280    public void initialize()
281    {
282        initIncludes();
283        configuration.addChangeListener( this );
284    }
285
286    public RepositorySessionFactory getRepositorySessionFactory( )
287    {
288        return repositorySessionFactory;
289    }
290
291    public void setRepositorySessionFactory( RepositorySessionFactory repositorySessionFactory )
292    {
293        this.repositorySessionFactory = repositorySessionFactory;
294    }
295}