QGIS API Documentation 3.41.0-Master (45a0abf3bec)
Loading...
Searching...
No Matches
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include "qgsvectorlayer.h"
20#include "qgsgeometryengine.h"
21#include "qgsspatialindex.h"
22
24
25QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
26{
27 return QStringLiteral( "deleteduplicategeometries" );
28}
29
30QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
31{
32 return QObject::tr( "Delete duplicate geometries" );
33}
34
35QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
36{
37 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
38}
39
40QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
41{
42 return QObject::tr( "Vector general" );
43}
44
45QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
46{
47 return QStringLiteral( "vectorgeneral" );
48}
49
50void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
51{
52 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
53 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
54 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
55 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
56}
57
58QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
59{
60 return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
61 "so in case two features have identical geometries but different attributes, only one of "
62 "them will be added to the result layer." );
63}
64
65QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
66{
67 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
68}
69
70QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
71{
72 return new QgsDeleteDuplicateGeometriesAlgorithm();
73}
74
75bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
76{
77 mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
78 if ( !mSource )
79 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
80
81 return true;
82}
83
84QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
85{
86 QString destId;
87 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
88 mSource->wkbType(), mSource->sourceCrs() ) );
89 if ( !sink )
90 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
91
93
94 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
95 QHash< QgsFeatureId, QgsGeometry > geometries;
96 QSet< QgsFeatureId > nullGeometryFeatures;
97 long current = 0;
98 const QgsSpatialIndex index( it, [&]( const QgsFeature & f ) ->bool
99 {
100 if ( feedback->isCanceled() )
101 return false;
102
103 if ( !f.hasGeometry() )
104 {
105 nullGeometryFeatures.insert( f.id() );
106 }
107 else
108 {
109 geometries.insert( f.id(), f.geometry() );
110 }
111
112 // overall this loop takes about 10% of time
113 current++;
114 feedback->setProgress( 0.10 * current * step );
115 return true;
116 } );
117
118 QgsFeature f;
119
120 // start by assuming everything is unique, and chop away at this list
121 QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
122 current = 0;
123 long removed = 0;
124
125 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
126 {
127 const QgsFeatureId featureId = it.key();
128 const QgsGeometry geometry = it.value();
129
130 if ( feedback->isCanceled() )
131 break;
132
133 if ( !uniqueFeatures.contains( featureId ) )
134 {
135 // feature was already marked as a duplicate
136 }
137 else
138 {
139 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
140
141 for ( const QgsFeatureId candidateId : candidates )
142 {
143 if ( candidateId == featureId )
144 continue;
145
146 if ( !uniqueFeatures.contains( candidateId ) )
147 {
148 // candidate already marked as a duplicate (not sure if this is possible,
149 // since it would mean the current feature would also have to be a duplicate!
150 // but let's be safe!)
151 continue;
152 }
153 else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
154 {
155 // candidate is a duplicate of feature
156 uniqueFeatures.remove( candidateId );
157 removed++;
158 }
159 }
160 }
161
162 current++;
163 feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
164 }
165
166 // now, fetch all the feature attributes for the unique features only
167 // be super-smart and don't re-fetch geometries
168 QSet< QgsFeatureId > outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
169 outputFeatureIds.unite( nullGeometryFeatures );
170 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
171
173 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
174 current = 0;
175 while ( it.nextFeature( f ) )
176 {
177 if ( feedback->isCanceled() )
178 break;
179
180 // use already fetched geometry
181 if ( !nullGeometryFeatures.contains( f.id() ) )
182 {
183 f.setGeometry( uniqueFeatures.value( f.id() ) );
184 }
185 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
186 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
187
188 current++;
189 feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
190 }
191
192 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
193
194 sink->finalize();
195
196 QVariantMap outputs;
197 outputs.insert( QStringLiteral( "OUTPUT" ), destId );
198 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
199 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
200 return outputs;
201}
202
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
QgsFeatureId id
Definition qgsfeature.h:66
QgsGeometry geometry
Definition qgsfeature.h:69
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:61
A geometry is the spatial representation of a feature.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
QList< int > QgsAttributeList
Definition qgsfield.h:27