This project has retired. For details please refer to its Attic page.
DuplicateArtifactsConsumer xref
View Javadoc
1   package org.apache.archiva.reports.consumers;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.archiva.checksum.ChecksumAlgorithm;
23  import org.apache.archiva.checksum.ChecksummedFile;
24  import org.apache.archiva.configuration.ArchivaConfiguration;
25  import org.apache.archiva.configuration.ConfigurationNames;
26  import org.apache.archiva.configuration.FileTypes;
27  import org.apache.archiva.consumers.AbstractMonitoredConsumer;
28  import org.apache.archiva.consumers.ConsumerException;
29  import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
30  import org.apache.archiva.metadata.model.ArtifactMetadata;
31  import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet;
32  import org.apache.archiva.metadata.repository.MetadataRepository;
33  import org.apache.archiva.metadata.repository.MetadataRepositoryException;
34  import org.apache.archiva.metadata.repository.RepositorySession;
35  import org.apache.archiva.metadata.repository.RepositorySessionFactory;
36  import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
37  import org.apache.archiva.components.registry.Registry;
38  import org.apache.archiva.components.registry.RegistryListener;
39  import org.apache.archiva.repository.ManagedRepository;
40  import org.apache.commons.collections4.CollectionUtils;
41  import org.slf4j.Logger;
42  import org.slf4j.LoggerFactory;
43  import org.springframework.context.annotation.Scope;
44  import org.springframework.stereotype.Service;
45  
46  import javax.annotation.PostConstruct;
47  import javax.inject.Inject;
48  import javax.inject.Named;
49  import java.io.IOException;
50  import java.nio.file.Path;
51  import java.nio.file.Paths;
52  import java.util.ArrayList;
53  import java.util.Collection;
54  import java.util.Collections;
55  import java.util.Date;
56  import java.util.List;
57  
58  /**
59   * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts.
60   * <p>
61   * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
62   */
63  @Service ( "knownRepositoryContentConsumer#duplicate-artifacts" )
64  @Scope ( "prototype" )
65  public class DuplicateArtifactsConsumer
66      extends AbstractMonitoredConsumer
67      implements KnownRepositoryContentConsumer, RegistryListener
68  {
69      private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
70  
71      private String id = "duplicate-artifacts";
72  
73      private String description = "Check for Duplicate Artifacts via SHA1 Checksums";
74  
75      @Inject
76      private ArchivaConfiguration configuration;
77  
78      @Inject
79      private FileTypes filetypes;
80  
81      /**
82       * FIXME: this could be multiple implementations and needs to be configured.
83       */
84      @Inject
85      private RepositorySessionFactory repositorySessionFactory;
86  
87      private List<String> includes = new ArrayList<>();
88  
89      private Path repositoryDir;
90  
91      private String repoId;
92  
93      /**
94       * FIXME: needs to be selected based on the repository in question
95       */
96      @Inject
97      @Named ( value = "repositoryPathTranslator#maven2" )
98      private RepositoryPathTranslator pathTranslator;
99  
100 
101     private RepositorySession repositorySession;
102 
103     @Override
104     public String getId()
105     {
106         return id;
107     }
108 
109     @Override
110     public String getDescription()
111     {
112         return description;
113     }
114 
115     @Override
116     public List<String> getIncludes()
117     {
118         return includes;
119     }
120 
121     @Override
122     public List<String> getExcludes()
123     {
124         return Collections.emptyList();
125     }
126 
127     @Override
128     public void beginScan( ManagedRepository repo, Date whenGathered )
129         throws ConsumerException
130     {
131         repoId = repo.getId();
132         this.repositoryDir = Paths.get( repo.getLocation() );
133         try
134         {
135             repositorySession = repositorySessionFactory.createSession();
136         }
137         catch ( MetadataRepositoryException e )
138         {
139             e.printStackTrace( );
140         }
141     }
142 
143     @Override
144     public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo )
145         throws ConsumerException
146     {
147         beginScan( repo, whenGathered );
148     }
149 
150     @Override
151     public void processFile( String path )
152         throws ConsumerException
153     {
154         Path artifactFile = this.repositoryDir.resolve( path );
155 
156         // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
157         //  perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
158         //  alternatively this could come straight from the storage resolver, which could populate the artifact metadata
159         //  in the later parse call with the desired checksum and use that
160         String checksumSha1;
161         ChecksummedFile.html#ChecksummedFile">ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile);
162         try
163         {
164             checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
165         }
166         catch ( IOException e )
167         {
168             throw new ConsumerException( e.getMessage(), e );
169         }
170 
171         MetadataRepository metadataRepository = repositorySession.getRepository();
172 
173         Collection<ArtifactMetadata> results;
174         try
175         {
176             results = metadataRepository.getArtifactsByChecksum(repositorySession , repoId, checksumSha1 );
177         }
178         catch ( MetadataRepositoryException e )
179         {
180             repositorySession.close();
181             throw new ConsumerException( e.getMessage(), e );
182         }
183 
184         if ( CollectionUtils.isNotEmpty( results ) )
185         {
186             ArtifactMetadata originalArtifact;
187             try
188             {
189                 originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
190             }
191             catch ( Exception e )
192             {
193                 log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() );
194                 return;
195             }
196 
197             for ( ArtifactMetadata dupArtifact : results )
198             {
199                 String id = path.substring( path.lastIndexOf( '/' ) + 1 );
200                 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
201                     originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
202                     originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
203                     originalArtifact.getVersion() ) )
204                 {
205                     // Skip reference to itself.
206 
207                     log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path );
208 
209                     continue;
210                 }
211 
212                 RepositoryProblemFacetepositoryProblemFacet.html#RepositoryProblemFacet">RepositoryProblemFacet problem = new RepositoryProblemFacet();
213                 problem.setRepositoryId( repoId );
214                 problem.setNamespace( originalArtifact.getNamespace() );
215                 problem.setProject( originalArtifact.getProject() );
216                 problem.setVersion( originalArtifact.getVersion() );
217                 problem.setId( id );
218                 // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
219                 //       a different type
220                 // FIXME: we need the project version here, not the artifact version
221                 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
222                     dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
223                     dupArtifact.getId() ) );
224                 problem.setProblem( "duplicate-artifact" );
225 
226                 try
227                 {
228                     metadataRepository.addMetadataFacet(repositorySession , repoId, problem );
229                 }
230                 catch ( MetadataRepositoryException e )
231                 {
232                     throw new ConsumerException( e.getMessage(), e );
233                 }
234             }
235         }
236     }
237 
238     @Override
239     public void processFile( String path, boolean executeOnEntireRepo )
240         throws ConsumerException
241     {
242         processFile( path );
243     }
244 
245     @Override
246     public void completeScan()
247     {
248         repositorySession.close();
249     }
250 
251     @Override
252     public void completeScan( boolean executeOnEntireRepo )
253     {
254         completeScan();
255     }
256 
257     @Override
258     public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
259     {
260         if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
261         {
262             initIncludes();
263         }
264     }
265 
266     @Override
267     public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
268     {
269         /* do nothing */
270     }
271 
272     private void initIncludes()
273     {
274         includes.clear();
275 
276         includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
277     }
278 
279     @PostConstruct
280     public void initialize()
281     {
282         initIncludes();
283         configuration.addChangeListener( this );
284     }
285 
286     public RepositorySessionFactory getRepositorySessionFactory( )
287     {
288         return repositorySessionFactory;
289     }
290 
291     public void setRepositorySessionFactory( RepositorySessionFactory repositorySessionFactory )
292     {
293         this.repositorySessionFactory = repositorySessionFactory;
294     }
295 }