001package org.apache.archiva.reports.consumers; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import org.apache.archiva.checksum.ChecksumAlgorithm; 023import org.apache.archiva.checksum.ChecksummedFile; 024import org.apache.archiva.configuration.ArchivaConfiguration; 025import org.apache.archiva.configuration.ConfigurationNames; 026import org.apache.archiva.configuration.FileTypes; 027import org.apache.archiva.consumers.AbstractMonitoredConsumer; 028import org.apache.archiva.consumers.ConsumerException; 029import org.apache.archiva.consumers.KnownRepositoryContentConsumer; 030import org.apache.archiva.metadata.model.ArtifactMetadata; 031import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet; 032import org.apache.archiva.metadata.repository.MetadataRepository; 033import org.apache.archiva.metadata.repository.MetadataRepositoryException; 034import org.apache.archiva.metadata.repository.RepositorySession; 035import org.apache.archiva.metadata.repository.RepositorySessionFactory; 036import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator; 037import org.apache.archiva.components.registry.Registry; 038import org.apache.archiva.components.registry.RegistryListener; 039import org.apache.archiva.repository.ManagedRepository; 040import org.apache.commons.collections4.CollectionUtils; 041import org.slf4j.Logger; 042import org.slf4j.LoggerFactory; 043import org.springframework.context.annotation.Scope; 044import org.springframework.stereotype.Service; 045 046import javax.annotation.PostConstruct; 047import javax.inject.Inject; 048import javax.inject.Named; 049import java.io.IOException; 050import java.nio.file.Path; 051import java.nio.file.Paths; 052import java.util.ArrayList; 053import java.util.Collection; 054import java.util.Collections; 055import java.util.Date; 056import java.util.List; 057 058/** 059 * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts. 060 * <p> 061 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list 062 */ 063@Service ( "knownRepositoryContentConsumer#duplicate-artifacts" ) 064@Scope ( "prototype" ) 065public class DuplicateArtifactsConsumer 066 extends AbstractMonitoredConsumer 067 implements KnownRepositoryContentConsumer, RegistryListener 068{ 069 private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class ); 070 071 private String id = "duplicate-artifacts"; 072 073 private String description = "Check for Duplicate Artifacts via SHA1 Checksums"; 074 075 @Inject 076 private ArchivaConfiguration configuration; 077 078 @Inject 079 private FileTypes filetypes; 080 081 /** 082 * FIXME: this could be multiple implementations and needs to be configured. 083 */ 084 @Inject 085 private RepositorySessionFactory repositorySessionFactory; 086 087 private List<String> includes = new ArrayList<>(); 088 089 private Path repositoryDir; 090 091 private String repoId; 092 093 /** 094 * FIXME: needs to be selected based on the repository in question 095 */ 096 @Inject 097 @Named ( value = "repositoryPathTranslator#maven2" ) 098 private RepositoryPathTranslator pathTranslator; 099 100 101 private RepositorySession repositorySession; 102 103 @Override 104 public String getId() 105 { 106 return id; 107 } 108 109 @Override 110 public String getDescription() 111 { 112 return description; 113 } 114 115 @Override 116 public List<String> getIncludes() 117 { 118 return includes; 119 } 120 121 @Override 122 public List<String> getExcludes() 123 { 124 return Collections.emptyList(); 125 } 126 127 @Override 128 public void beginScan( ManagedRepository repo, Date whenGathered ) 129 throws ConsumerException 130 { 131 repoId = repo.getId(); 132 this.repositoryDir = Paths.get( repo.getLocation() ); 133 try 134 { 135 repositorySession = repositorySessionFactory.createSession(); 136 } 137 catch ( MetadataRepositoryException e ) 138 { 139 e.printStackTrace( ); 140 } 141 } 142 143 @Override 144 public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo ) 145 throws ConsumerException 146 { 147 beginScan( repo, whenGathered ); 148 } 149 150 @Override 151 public void processFile( String path ) 152 throws ConsumerException 153 { 154 Path artifactFile = this.repositoryDir.resolve( path ); 155 156 // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that 157 // perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1 158 // alternatively this could come straight from the storage resolver, which could populate the artifact metadata 159 // in the later parse call with the desired checksum and use that 160 String checksumSha1; 161 ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile); 162 try 163 { 164 checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 ); 165 } 166 catch ( IOException e ) 167 { 168 throw new ConsumerException( e.getMessage(), e ); 169 } 170 171 MetadataRepository metadataRepository = repositorySession.getRepository(); 172 173 Collection<ArtifactMetadata> results; 174 try 175 { 176 results = metadataRepository.getArtifactsByChecksum(repositorySession , repoId, checksumSha1 ); 177 } 178 catch ( MetadataRepositoryException e ) 179 { 180 repositorySession.close(); 181 throw new ConsumerException( e.getMessage(), e ); 182 } 183 184 if ( CollectionUtils.isNotEmpty( results ) ) 185 { 186 ArtifactMetadata originalArtifact; 187 try 188 { 189 originalArtifact = pathTranslator.getArtifactForPath( repoId, path ); 190 } 191 catch ( Exception e ) 192 { 193 log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() ); 194 return; 195 } 196 197 for ( ArtifactMetadata dupArtifact : results ) 198 { 199 String id = path.substring( path.lastIndexOf( '/' ) + 1 ); 200 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals( 201 originalArtifact.getNamespace() ) && dupArtifact.getProject().equals( 202 originalArtifact.getProject() ) && dupArtifact.getVersion().equals( 203 originalArtifact.getVersion() ) ) 204 { 205 // Skip reference to itself. 206 207 log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path ); 208 209 continue; 210 } 211 212 RepositoryProblemFacet problem = new RepositoryProblemFacet(); 213 problem.setRepositoryId( repoId ); 214 problem.setNamespace( originalArtifact.getNamespace() ); 215 problem.setProject( originalArtifact.getProject() ); 216 problem.setVersion( originalArtifact.getVersion() ); 217 problem.setId( id ); 218 // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be 219 // a different type 220 // FIXME: we need the project version here, not the artifact version 221 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath( 222 dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(), 223 dupArtifact.getId() ) ); 224 problem.setProblem( "duplicate-artifact" ); 225 226 try 227 { 228 metadataRepository.addMetadataFacet(repositorySession , repoId, problem ); 229 } 230 catch ( MetadataRepositoryException e ) 231 { 232 throw new ConsumerException( e.getMessage(), e ); 233 } 234 } 235 } 236 } 237 238 @Override 239 public void processFile( String path, boolean executeOnEntireRepo ) 240 throws ConsumerException 241 { 242 processFile( path ); 243 } 244 245 @Override 246 public void completeScan() 247 { 248 repositorySession.close(); 249 } 250 251 @Override 252 public void completeScan( boolean executeOnEntireRepo ) 253 { 254 completeScan(); 255 } 256 257 @Override 258 public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue ) 259 { 260 if ( ConfigurationNames.isRepositoryScanning( propertyName ) ) 261 { 262 initIncludes(); 263 } 264 } 265 266 @Override 267 public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue ) 268 { 269 /* do nothing */ 270 } 271 272 private void initIncludes() 273 { 274 includes.clear(); 275 276 includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) ); 277 } 278 279 @PostConstruct 280 public void initialize() 281 { 282 initIncludes(); 283 configuration.addChangeListener( this ); 284 } 285 286 public RepositorySessionFactory getRepositorySessionFactory( ) 287 { 288 return repositorySessionFactory; 289 } 290 291 public void setRepositorySessionFactory( RepositorySessionFactory repositorySessionFactory ) 292 { 293 this.repositorySessionFactory = repositorySessionFactory; 294 } 295}