[gentoo-commits] gentoo-x86 commit in kde-base/nepomuk/files: nepomuk-4.8.1-performance.patch - gentoo-commits

From:	"Johannes Huber (johu)" <johu@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] gentoo-x86 commit in kde-base/nepomuk/files: nepomuk-4.8.1-performance.patch
Date:	Wed, 04 Apr 2012 11:02:13
Message-Id:	`20120404110159.BFC0420032@flycatcher.gentoo.org`

1

johu        12/04/04 11:01:59

2

3

  Added:                nepomuk-4.8.1-performance.patch

4

  Log:

5

  Revision bump adds upstream patch to improve perfomance on resource identification spotted by scarabeus.

6

7

  (Portage version: 2.2.0_alpha98/cvs/Linux i686)

8

9

Revision  Changes    Path

10

1.1                  kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch

11

12

file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch?rev=1.1&view=markup

13

plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch?rev=1.1&content-type=text/plain

14

15

Index: nepomuk-4.8.1-performance.patch

16

===================================================================

17

commit 754275eda610dce1160286a76339353097d8764c

18

Author: Sebastian Trueg <trueg@×××.org>

19

Date:   Fri Mar 9 17:17:48 2012 +0100

20

21

    Backport from nepomuk-core: improved performance on res identification.

22

23

    BUG: 289932

24

    FIXED-IN: 4.8.2

25

26

diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp

27

index c1a9919..894372c 100644

28

--- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp

29

+++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp

30

@@ -31,6 +31,7 @@

31

 #include <Soprano/Statement>

32

 #include <Soprano/Graph>

33

 #include <Soprano/Node>

34

+#include <Soprano/BindingSet>

35

 #include <Soprano/StatementIterator>

36

 #include <Soprano/QueryResultIterator>

37

 #include <Soprano/Model>

38

@@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)

39

         return false;

40

}

41

42

-    QString query;

43

-

44

     QStringList identifyingProperties;

45

     QHash<KUrl, Soprano::Node> identifyingPropertiesHash;

46

47

     QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();

48

     QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();

49

+    QList<Soprano::Node> requiredTypes;

50

     for( ; it != constEnd; it++ ) {

51

         const QUrl & prop = it.key();

52

53

         // Special handling for rdf:type

54

         if( prop == RDF::type() ) {

55

-            query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() );

56

+            requiredTypes << it.value().uri();

57

             continue;

58

}

59

60

@@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)

61

         return false;

62

}

63

64

+

65

+    // construct the identification query

66

+    QString query = QLatin1String("select distinct ?r where { ");

67

+

68

//

69

     // Optimization:

70

     // If there is only one identifying property using all that optional and filter stuff

71

@@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)

72

                            QString::number( numIdentifyingProperties++ ) );

73

}

74

75

-        // Make sure atleast one of the identification properties has been matched

76

+        // Make sure at least one of the identification properties has been matched

77

         // by adding filter( bound(?o1) || bound(?o2) ... )

78

         query += QString::fromLatin1("filter( ");

79

         for( int i=0; i<numIdentifyingProperties-1; i++ ) {

80

@@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)

81

         query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),

82

                                                          identifyingPropertiesHash.constBegin().value().toN3());

83

}

84

-    query += QLatin1String("}");

85

86

-    // Construct the entire query

87

-    QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt "

88

-    "where { ?r ?p ?o. filter( ?p in (%1) ).")

89

-    .arg( identifyingProperties.join(",") );

90

-

91

-    query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");

92

+    //

93

+    // For performance reasons we add a limit even though this could mean that we

94

+    // miss a resource to identify since we check the types below.

95

+    //

96

+    query += QLatin1String("} LIMIT 100");

97

98

-    kDebug() << query;

99

100

//

101

-    // Only store the results which have the maximum score

102

+    // Fetch a score for each result.

103

+    // We do this in a separate query for performance reasons.

104

//

105

-    QSet<KUrl> results;

106

-    int score = -1;

107

+    QMultiHash<int, KUrl> resultsScoreHash;

108

+    int maxScore = -1;

109

     Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql );

110

     while( qit.next() ) {

111

-        //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];

112

+        const Soprano::Node r(qit["r"]);

113

+

114

+        //

115

+        // Check the type requirements. Experiments have shown this to mean a substantial

116

+        // performance boost as compared to doing it in the main query.

117

+        //

118

+        if(!requiredTypes.isEmpty() ) {

119

+            query = QLatin1String("ask where { ");

120

+            foreach(const Soprano::Node& type, requiredTypes) {

121

+                query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());

122

+            }

123

+            query += QLatin1String("}");

124

+            if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {

125

+                continue;

126

+            }

127

+        }

128

+

129

+

130

+        const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "

131

+                                                                       "%1 ?p ?o. filter( ?p in (%2) ) . }")

132

+                                                   .arg( r.toN3(),

133

+                                                         identifyingProperties.join(",") ),

134

+                                                   Soprano::Query::QueryLanguageSparql)

135

+                          .allBindings().first()["cnt"].literal().toInt();

136

137

-        int count = qit["cnt"].literal().toInt();

138

-        if( score == -1 ) {

139

-            score = count;

140

+        if( maxScore < score ) {

141

+            maxScore = score;

142

}

143

-        else if( count < score )

144

-            break;

145

146

-        results << qit["r"].uri();

147

+        resultsScoreHash.insert(score, r.uri());

148

}

149

150

+    //

151

+    // Only get the results which have the maximum score

152

+    //

153

+    QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));

154

+

155

+

156

     //kDebug() << "Got " << results.size() << " results";

157

     if( results.empty() )

158

         return false;

159

160

     KUrl newUri;

161

-    if( results.size() == 1 )

162

+    if( results.size() == 1 ) {

163

         newUri = *results.begin();

164

+    }

165

     else {

166

         kDebug() << "DUPLICATE RESULTS!";

167

         newUri = duplicateMatch( res.uri(), results );

1	johu 12/04/04 11:01:59
2
3	Added: nepomuk-4.8.1-performance.patch
4	Log:
5	Revision bump adds upstream patch to improve perfomance on resource identification spotted by scarabeus.
6
7	(Portage version: 2.2.0_alpha98/cvs/Linux i686)
8
9	Revision Changes Path
10	1.1 kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch
11
12	file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch?rev=1.1&view=markup
13	plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/kde-base/nepomuk/files/nepomuk-4.8.1-performance.patch?rev=1.1&content-type=text/plain
14
15	Index: nepomuk-4.8.1-performance.patch
16	===================================================================
17	commit 754275eda610dce1160286a76339353097d8764c
18	Author: Sebastian Trueg <trueg@×××.org>
19	Date: Fri Mar 9 17:17:48 2012 +0100
20
21	Backport from nepomuk-core: improved performance on res identification.
22
23	BUG: 289932
24	FIXED-IN: 4.8.2
25
26	diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
27	index c1a9919..894372c 100644
28	--- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp
29	+++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
30	@@ -31,6 +31,7 @@
31	#include <Soprano/Statement>
32	#include <Soprano/Graph>
33	#include <Soprano/Node>
34	+#include <Soprano/BindingSet>
35	#include <Soprano/StatementIterator>
36	#include <Soprano/QueryResultIterator>
37	#include <Soprano/Model>
38	@@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
39	return false;
40	}
41
42	- QString query;
43	-
44	QStringList identifyingProperties;
45	QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
46
47	QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
48	QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
49	+ QList<Soprano::Node> requiredTypes;
50	for( ; it != constEnd; it++ ) {
51	const QUrl & prop = it.key();
52
53	// Special handling for rdf:type
54	if( prop == RDF::type() ) {
55	- query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() );
56	+ requiredTypes << it.value().uri();
57	continue;
58	}
59
60	@@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
61	return false;
62	}
63
64	+
65	+ // construct the identification query
66	+ QString query = QLatin1String("select distinct ?r where { ");
67	+
68	//
69	// Optimization:
70	// If there is only one identifying property using all that optional and filter stuff
71	@@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
72	QString::number( numIdentifyingProperties++ ) );
73	}
74
75	- // Make sure atleast one of the identification properties has been matched
76	+ // Make sure at least one of the identification properties has been matched
77	// by adding filter( bound(?o1) \|\| bound(?o2) ... )
78	query += QString::fromLatin1("filter( ");
79	for( int i=0; i<numIdentifyingProperties-1; i++ ) {
80	@@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
81	query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
82	identifyingPropertiesHash.constBegin().value().toN3());
83	}
84	- query += QLatin1String("}");
85
86	- // Construct the entire query
87	- QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt "
88	- "where { ?r ?p ?o. filter( ?p in (%1) ).")
89	- .arg( identifyingProperties.join(",") );
90	-
91	- query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");
92	+ //
93	+ // For performance reasons we add a limit even though this could mean that we
94	+ // miss a resource to identify since we check the types below.
95	+ //
96	+ query += QLatin1String("} LIMIT 100");
97
98	- kDebug() << query;
99
100	//
101	- // Only store the results which have the maximum score
102	+ // Fetch a score for each result.
103	+ // We do this in a separate query for performance reasons.
104	//
105	- QSet<KUrl> results;
106	- int score = -1;
107	+ QMultiHash<int, KUrl> resultsScoreHash;
108	+ int maxScore = -1;
109	Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
110	while( qit.next() ) {
111	- //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];
112	+ const Soprano::Node r(qit["r"]);
113	+
114	+ //
115	+ // Check the type requirements. Experiments have shown this to mean a substantial
116	+ // performance boost as compared to doing it in the main query.
117	+ //
118	+ if(!requiredTypes.isEmpty() ) {
119	+ query = QLatin1String("ask where { ");
120	+ foreach(const Soprano::Node& type, requiredTypes) {
121	+ query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());
122	+ }
123	+ query += QLatin1String("}");
124	+ if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {
125	+ continue;
126	+ }
127	+ }
128	+
129	+
130	+ const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "
131	+ "%1 ?p ?o. filter( ?p in (%2) ) . }")
132	+ .arg( r.toN3(),
133	+ identifyingProperties.join(",") ),
134	+ Soprano::Query::QueryLanguageSparql)
135	+ .allBindings().first()["cnt"].literal().toInt();
136
137	- int count = qit["cnt"].literal().toInt();
138	- if( score == -1 ) {
139	- score = count;
140	+ if( maxScore < score ) {
141	+ maxScore = score;
142	}
143	- else if( count < score )
144	- break;
145
146	- results << qit["r"].uri();
147	+ resultsScoreHash.insert(score, r.uri());
148	}
149
150	+ //
151	+ // Only get the results which have the maximum score
152	+ //
153	+ QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
154	+
155	+
156	//kDebug() << "Got " << results.size() << " results";
157	if( results.empty() )
158	return false;
159
160	KUrl newUri;
161	- if( results.size() == 1 )
162	+ if( results.size() == 1 ) {
163	newUri = *results.begin();
164	+ }
165	else {
166	kDebug() << "DUPLICATE RESULTS!";
167	newUri = duplicateMatch( res.uri(), results );

Gentoo Archives: gentoo-commits