Gentoo Archives: gentoo-commits

From: "Johannes Huber (johu)" <johu@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in kde-base/nepomuk/files: nepomuk-4.8.1-performance.patch
Date: Wed, 04 Apr 2012 11:02:13
Message-Id: 20120404110159.BFC0420032@flycatcher.gentoo.org
1 johu 12/04/04 11:01:59
2
3 Added: nepomuk-4.8.1-performance.patch
4 Log:
5 Revision bump adds upstream patch to improve perfomance on resource identification spotted by scarabeus.
6
7 (Portage version: 2.2.0_alpha98/cvs/Linux i686)
8
9 Revision Changes Path
10 1.1 kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch
11
12 file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch?rev=1.1&view=markup
13 plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch?rev=1.1&content-type=text/plain
14
15 Index: nepomuk-4.8.1-performance.patch
16 ===================================================================
17 commit 754275eda610dce1160286a76339353097d8764c
18 Author: Sebastian Trueg <trueg@×××.org>
19 Date: Fri Mar 9 17:17:48 2012 +0100
20
21 Backport from nepomuk-core: improved performance on res identification.
22
23 BUG: 289932
24 FIXED-IN: 4.8.2
25
26 diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
27 index c1a9919..894372c 100644
28 --- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp
29 +++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
30 @@ -31,6 +31,7 @@
31 #include <Soprano/Statement>
32 #include <Soprano/Graph>
33 #include <Soprano/Node>
34 +#include <Soprano/BindingSet>
35 #include <Soprano/StatementIterator>
36 #include <Soprano/QueryResultIterator>
37 #include <Soprano/Model>
38 @@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
39 return false;
40 }
41
42 - QString query;
43 -
44 QStringList identifyingProperties;
45 QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
46
47 QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
48 QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
49 + QList<Soprano::Node> requiredTypes;
50 for( ; it != constEnd; it++ ) {
51 const QUrl & prop = it.key();
52
53 // Special handling for rdf:type
54 if( prop == RDF::type() ) {
55 - query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() );
56 + requiredTypes << it.value().uri();
57 continue;
58 }
59
60 @@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
61 return false;
62 }
63
64 +
65 + // construct the identification query
66 + QString query = QLatin1String("select distinct ?r where { ");
67 +
68 //
69 // Optimization:
70 // If there is only one identifying property using all that optional and filter stuff
71 @@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
72 QString::number( numIdentifyingProperties++ ) );
73 }
74
75 - // Make sure atleast one of the identification properties has been matched
76 + // Make sure at least one of the identification properties has been matched
77 // by adding filter( bound(?o1) || bound(?o2) ... )
78 query += QString::fromLatin1("filter( ");
79 for( int i=0; i<numIdentifyingProperties-1; i++ ) {
80 @@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
81 query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
82 identifyingPropertiesHash.constBegin().value().toN3());
83 }
84 - query += QLatin1String("}");
85
86 - // Construct the entire query
87 - QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt "
88 - "where { ?r ?p ?o. filter( ?p in (%1) ).")
89 - .arg( identifyingProperties.join(",") );
90 -
91 - query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");
92 + //
93 + // For performance reasons we add a limit even though this could mean that we
94 + // miss a resource to identify since we check the types below.
95 + //
96 + query += QLatin1String("} LIMIT 100");
97
98 - kDebug() << query;
99
100 //
101 - // Only store the results which have the maximum score
102 + // Fetch a score for each result.
103 + // We do this in a separate query for performance reasons.
104 //
105 - QSet<KUrl> results;
106 - int score = -1;
107 + QMultiHash<int, KUrl> resultsScoreHash;
108 + int maxScore = -1;
109 Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
110 while( qit.next() ) {
111 - //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];
112 + const Soprano::Node r(qit["r"]);
113 +
114 + //
115 + // Check the type requirements. Experiments have shown this to mean a substantial
116 + // performance boost as compared to doing it in the main query.
117 + //
118 + if(!requiredTypes.isEmpty() ) {
119 + query = QLatin1String("ask where { ");
120 + foreach(const Soprano::Node& type, requiredTypes) {
121 + query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());
122 + }
123 + query += QLatin1String("}");
124 + if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {
125 + continue;
126 + }
127 + }
128 +
129 +
130 + const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "
131 + "%1 ?p ?o. filter( ?p in (%2) ) . }")
132 + .arg( r.toN3(),
133 + identifyingProperties.join(",") ),
134 + Soprano::Query::QueryLanguageSparql)
135 + .allBindings().first()["cnt"].literal().toInt();
136
137 - int count = qit["cnt"].literal().toInt();
138 - if( score == -1 ) {
139 - score = count;
140 + if( maxScore < score ) {
141 + maxScore = score;
142 }
143 - else if( count < score )
144 - break;
145
146 - results << qit["r"].uri();
147 + resultsScoreHash.insert(score, r.uri());
148 }
149
150 + //
151 + // Only get the results which have the maximum score
152 + //
153 + QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
154 +
155 +
156 //kDebug() << "Got " << results.size() << " results";
157 if( results.empty() )
158 return false;
159
160 KUrl newUri;
161 - if( results.size() == 1 )
162 + if( results.size() == 1 ) {
163 newUri = *results.begin();
164 + }
165 else {
166 kDebug() << "DUPLICATE RESULTS!";
167 newUri = duplicateMatch( res.uri(), results );