001package org.apache.archiva.reports.consumers; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import org.apache.archiva.admin.model.beans.ManagedRepository; 023import org.apache.archiva.checksum.ChecksumAlgorithm; 024import org.apache.archiva.checksum.ChecksummedFile; 025import org.apache.archiva.configuration.ArchivaConfiguration; 026import org.apache.archiva.configuration.ConfigurationNames; 027import org.apache.archiva.configuration.FileTypes; 028import org.apache.archiva.consumers.AbstractMonitoredConsumer; 029import org.apache.archiva.consumers.ConsumerException; 030import org.apache.archiva.consumers.KnownRepositoryContentConsumer; 031import org.apache.archiva.metadata.model.ArtifactMetadata; 032import org.apache.archiva.metadata.repository.MetadataRepository; 033import org.apache.archiva.metadata.repository.MetadataRepositoryException; 034import org.apache.archiva.metadata.repository.RepositorySession; 035import org.apache.archiva.metadata.repository.RepositorySessionFactory; 036import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator; 037import org.apache.archiva.redback.components.registry.Registry; 038import org.apache.archiva.redback.components.registry.RegistryListener; 039import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet; 040import org.apache.commons.collections.CollectionUtils; 041import org.slf4j.Logger; 042import org.slf4j.LoggerFactory; 043import org.springframework.context.annotation.Scope; 044import org.springframework.stereotype.Service; 045 046import javax.annotation.PostConstruct; 047import javax.inject.Inject; 048import javax.inject.Named; 049import java.io.File; 050import java.io.IOException; 051import java.util.ArrayList; 052import java.util.Collection; 053import java.util.Collections; 054import java.util.Date; 055import java.util.List; 056 057/** 058 * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts. 059 * <p> 060 * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list 061 */ 062@Service ( "knownRepositoryContentConsumer#duplicate-artifacts" ) 063@Scope ( "prototype" ) 064public class DuplicateArtifactsConsumer 065 extends AbstractMonitoredConsumer 066 implements KnownRepositoryContentConsumer, RegistryListener 067{ 068 private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class ); 069 070 private String id = "duplicate-artifacts"; 071 072 private String description = "Check for Duplicate Artifacts via SHA1 Checksums"; 073 074 @Inject 075 private ArchivaConfiguration configuration; 076 077 @Inject 078 private FileTypes filetypes; 079 080 /** 081 * FIXME: this could be multiple implementations and needs to be configured. 082 */ 083 @Inject 084 private RepositorySessionFactory repositorySessionFactory; 085 086 private List<String> includes = new ArrayList<>(); 087 088 private File repositoryDir; 089 090 private String repoId; 091 092 /** 093 * FIXME: needs to be selected based on the repository in question 094 */ 095 @Inject 096 @Named ( value = "repositoryPathTranslator#maven2" ) 097 private RepositoryPathTranslator pathTranslator; 098 099 private RepositorySession repositorySession; 100 101 @Override 102 public String getId() 103 { 104 return id; 105 } 106 107 @Override 108 public String getDescription() 109 { 110 return description; 111 } 112 113 @Override 114 public List<String> getIncludes() 115 { 116 return includes; 117 } 118 119 @Override 120 public List<String> getExcludes() 121 { 122 return Collections.emptyList(); 123 } 124 125 @Override 126 public void beginScan( ManagedRepository repo, Date whenGathered ) 127 throws ConsumerException 128 { 129 repoId = repo.getId(); 130 this.repositoryDir = new File( repo.getLocation() ); 131 repositorySession = repositorySessionFactory.createSession(); 132 } 133 134 @Override 135 public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo ) 136 throws ConsumerException 137 { 138 beginScan( repo, whenGathered ); 139 } 140 141 @Override 142 public void processFile( String path ) 143 throws ConsumerException 144 { 145 File artifactFile = new File( this.repositoryDir, path ); 146 147 // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that 148 // perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1 149 // alternatively this could come straight from the storage resolver, which could populate the artifact metadata 150 // in the later parse call with the desired checksum and use that 151 String checksumSha1; 152 ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile ); 153 try 154 { 155 checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 ); 156 } 157 catch ( IOException e ) 158 { 159 throw new ConsumerException( e.getMessage(), e ); 160 } 161 162 MetadataRepository metadataRepository = repositorySession.getRepository(); 163 164 Collection<ArtifactMetadata> results; 165 try 166 { 167 results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 ); 168 } 169 catch ( MetadataRepositoryException e ) 170 { 171 repositorySession.close(); 172 throw new ConsumerException( e.getMessage(), e ); 173 } 174 175 if ( CollectionUtils.isNotEmpty( results ) ) 176 { 177 ArtifactMetadata originalArtifact; 178 try 179 { 180 originalArtifact = pathTranslator.getArtifactForPath( repoId, path ); 181 } 182 catch ( Exception e ) 183 { 184 log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() ); 185 return; 186 } 187 188 for ( ArtifactMetadata dupArtifact : results ) 189 { 190 String id = path.substring( path.lastIndexOf( '/' ) + 1 ); 191 if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals( 192 originalArtifact.getNamespace() ) && dupArtifact.getProject().equals( 193 originalArtifact.getProject() ) && dupArtifact.getVersion().equals( 194 originalArtifact.getVersion() ) ) 195 { 196 // Skip reference to itself. 197 198 log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path ); 199 200 continue; 201 } 202 203 RepositoryProblemFacet problem = new RepositoryProblemFacet(); 204 problem.setRepositoryId( repoId ); 205 problem.setNamespace( originalArtifact.getNamespace() ); 206 problem.setProject( originalArtifact.getProject() ); 207 problem.setVersion( originalArtifact.getVersion() ); 208 problem.setId( id ); 209 // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be 210 // a different type 211 // FIXME: we need the project version here, not the artifact version 212 problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath( 213 dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(), 214 dupArtifact.getId() ) ); 215 problem.setProblem( "duplicate-artifact" ); 216 217 try 218 { 219 metadataRepository.addMetadataFacet( repoId, problem ); 220 } 221 catch ( MetadataRepositoryException e ) 222 { 223 throw new ConsumerException( e.getMessage(), e ); 224 } 225 } 226 } 227 } 228 229 @Override 230 public void processFile( String path, boolean executeOnEntireRepo ) 231 throws ConsumerException 232 { 233 processFile( path ); 234 } 235 236 @Override 237 public void completeScan() 238 { 239 repositorySession.close(); 240 } 241 242 @Override 243 public void completeScan( boolean executeOnEntireRepo ) 244 { 245 completeScan(); 246 } 247 248 @Override 249 public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue ) 250 { 251 if ( ConfigurationNames.isRepositoryScanning( propertyName ) ) 252 { 253 initIncludes(); 254 } 255 } 256 257 @Override 258 public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue ) 259 { 260 /* do nothing */ 261 } 262 263 private void initIncludes() 264 { 265 includes.clear(); 266 267 includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) ); 268 } 269 270 @PostConstruct 271 public void initialize() 272 { 273 initIncludes(); 274 configuration.addChangeListener( this ); 275 } 276}