This project has retired. For details please refer to its Attic page.
DuplicateArtifactsConsumer xref
View Javadoc
1   package org.apache.archiva.reports.consumers;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *  http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.archiva.admin.model.beans.ManagedRepository;
23  import org.apache.archiva.checksum.ChecksumAlgorithm;
24  import org.apache.archiva.checksum.ChecksummedFile;
25  import org.apache.archiva.configuration.ArchivaConfiguration;
26  import org.apache.archiva.configuration.ConfigurationNames;
27  import org.apache.archiva.configuration.FileTypes;
28  import org.apache.archiva.consumers.AbstractMonitoredConsumer;
29  import org.apache.archiva.consumers.ConsumerException;
30  import org.apache.archiva.consumers.KnownRepositoryContentConsumer;
31  import org.apache.archiva.metadata.model.ArtifactMetadata;
32  import org.apache.archiva.metadata.repository.MetadataRepository;
33  import org.apache.archiva.metadata.repository.MetadataRepositoryException;
34  import org.apache.archiva.metadata.repository.RepositorySession;
35  import org.apache.archiva.metadata.repository.RepositorySessionFactory;
36  import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator;
37  import org.apache.archiva.redback.components.registry.Registry;
38  import org.apache.archiva.redback.components.registry.RegistryListener;
39  import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet;
40  import org.apache.commons.collections.CollectionUtils;
41  import org.slf4j.Logger;
42  import org.slf4j.LoggerFactory;
43  import org.springframework.context.annotation.Scope;
44  import org.springframework.stereotype.Service;
45  
46  import javax.annotation.PostConstruct;
47  import javax.inject.Inject;
48  import javax.inject.Named;
49  import java.io.File;
50  import java.io.IOException;
51  import java.util.ArrayList;
52  import java.util.Collection;
53  import java.util.Collections;
54  import java.util.Date;
55  import java.util.List;
56  
57  /**
58   * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts.
59   * <p>
60   * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list
61   */
62  @Service ( "knownRepositoryContentConsumer#duplicate-artifacts" )
63  @Scope ( "prototype" )
64  public class DuplicateArtifactsConsumer
65      extends AbstractMonitoredConsumer
66      implements KnownRepositoryContentConsumer, RegistryListener
67  {
68      private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class );
69  
70      private String id = "duplicate-artifacts";
71  
72      private String description = "Check for Duplicate Artifacts via SHA1 Checksums";
73  
74      @Inject
75      private ArchivaConfiguration configuration;
76  
77      @Inject
78      private FileTypes filetypes;
79  
80      /**
81       * FIXME: this could be multiple implementations and needs to be configured.
82       */
83      @Inject
84      private RepositorySessionFactory repositorySessionFactory;
85  
86      private List<String> includes = new ArrayList<>();
87  
88      private File repositoryDir;
89  
90      private String repoId;
91  
92      /**
93       * FIXME: needs to be selected based on the repository in question
94       */
95      @Inject
96      @Named ( value = "repositoryPathTranslator#maven2" )
97      private RepositoryPathTranslator pathTranslator;
98  
99      private RepositorySession repositorySession;
100 
101     @Override
102     public String getId()
103     {
104         return id;
105     }
106 
107     @Override
108     public String getDescription()
109     {
110         return description;
111     }
112 
113     @Override
114     public List<String> getIncludes()
115     {
116         return includes;
117     }
118 
119     @Override
120     public List<String> getExcludes()
121     {
122         return Collections.emptyList();
123     }
124 
125     @Override
126     public void beginScan( ManagedRepository repo, Date whenGathered )
127         throws ConsumerException
128     {
129         repoId = repo.getId();
130         this.repositoryDir = new File( repo.getLocation() );
131         repositorySession = repositorySessionFactory.createSession();
132     }
133 
134     @Override
135     public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo )
136         throws ConsumerException
137     {
138         beginScan( repo, whenGathered );
139     }
140 
141     @Override
142     public void processFile( String path )
143         throws ConsumerException
144     {
145         File artifactFile = new File( this.repositoryDir, path );
146 
147         // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that
148         //  perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1
149         //  alternatively this could come straight from the storage resolver, which could populate the artifact metadata
150         //  in the later parse call with the desired checksum and use that
151         String checksumSha1;
152         ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile );
153         try
154         {
155             checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 );
156         }
157         catch ( IOException e )
158         {
159             throw new ConsumerException( e.getMessage(), e );
160         }
161 
162         MetadataRepository metadataRepository = repositorySession.getRepository();
163 
164         Collection<ArtifactMetadata> results;
165         try
166         {
167             results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 );
168         }
169         catch ( MetadataRepositoryException e )
170         {
171             repositorySession.close();
172             throw new ConsumerException( e.getMessage(), e );
173         }
174 
175         if ( CollectionUtils.isNotEmpty( results ) )
176         {
177             ArtifactMetadata originalArtifact;
178             try
179             {
180                 originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
181             }
182             catch ( Exception e )
183             {
184                 log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() );
185                 return;
186             }
187 
188             for ( ArtifactMetadata dupArtifact : results )
189             {
190                 String id = path.substring( path.lastIndexOf( '/' ) + 1 );
191                 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals(
192                     originalArtifact.getNamespace() ) && dupArtifact.getProject().equals(
193                     originalArtifact.getProject() ) && dupArtifact.getVersion().equals(
194                     originalArtifact.getVersion() ) )
195                 {
196                     // Skip reference to itself.
197 
198                     log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path );
199 
200                     continue;
201                 }
202 
203                 RepositoryProblemFacet problem = new RepositoryProblemFacet();
204                 problem.setRepositoryId( repoId );
205                 problem.setNamespace( originalArtifact.getNamespace() );
206                 problem.setProject( originalArtifact.getProject() );
207                 problem.setVersion( originalArtifact.getVersion() );
208                 problem.setId( id );
209                 // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
210                 //       a different type
211                 // FIXME: we need the project version here, not the artifact version
212                 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
213                     dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
214                     dupArtifact.getId() ) );
215                 problem.setProblem( "duplicate-artifact" );
216 
217                 try
218                 {
219                     metadataRepository.addMetadataFacet( repoId, problem );
220                 }
221                 catch ( MetadataRepositoryException e )
222                 {
223                     throw new ConsumerException( e.getMessage(), e );
224                 }
225             }
226         }
227     }
228 
229     @Override
230     public void processFile( String path, boolean executeOnEntireRepo )
231         throws ConsumerException
232     {
233         processFile( path );
234     }
235 
236     @Override
237     public void completeScan()
238     {
239         repositorySession.close();
240     }
241 
242     @Override
243     public void completeScan( boolean executeOnEntireRepo )
244     {
245         completeScan();
246     }
247 
248     @Override
249     public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue )
250     {
251         if ( ConfigurationNames.isRepositoryScanning( propertyName ) )
252         {
253             initIncludes();
254         }
255     }
256 
257     @Override
258     public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue )
259     {
260         /* do nothing */
261     }
262 
263     private void initIncludes()
264     {
265         includes.clear();
266 
267         includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) );
268     }
269 
270     @PostConstruct
271     public void initialize()
272     {
273         initIncludes();
274         configuration.addChangeListener( this );
275     }
276 }