1 |
eras 11/12/04 06:59:13 |
2 |
|
3 |
Added: SnowballFilter.h SnowballAnalyzer.h |
4 |
Log: |
5 |
Add v2.1_rc1 release - bug #392709 |
6 |
|
7 |
(Portage version: 2.1.10.39/cvs/Linux x86_64) |
8 |
|
9 |
Revision Changes Path |
10 |
1.1 net-mail/dovecot/files/SnowballFilter.h |
11 |
|
12 |
file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/net-mail/dovecot/files/SnowballFilter.h?rev=1.1&view=markup |
13 |
plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/net-mail/dovecot/files/SnowballFilter.h?rev=1.1&content-type=text/plain |
14 |
|
15 |
Index: SnowballFilter.h |
16 |
=================================================================== |
17 |
/*------------------------------------------------------------------------------ |
18 |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team |
19 |
* |
20 |
* Distributable under the terms of either the Apache License (Version 2.0) or |
21 |
* the GNU Lesser General Public License, as specified in the COPYING file. |
22 |
------------------------------------------------------------------------------*/ |
23 |
#ifndef _lucene_analysis_snowball_filter_ |
24 |
#define _lucene_analysis_snowball_filter_ |
25 |
|
26 |
#include "CLucene/analysis/AnalysisHeader.h" |
27 |
#include "libstemmer.h" |
28 |
|
29 |
CL_NS_DEF2(analysis,snowball) |
30 |
|
31 |
/** A filter that stems words using a Snowball-generated stemmer. |
32 |
* |
33 |
* Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a |
34 |
* stemmer is the part of the class name before "Stemmer", e.g., the stemmer in |
35 |
* {@link EnglishStemmer} is named "English". |
36 |
* |
37 |
* Note: todo: This is not thread safe... |
38 |
*/ |
39 |
class CLUCENE_CONTRIBS_EXPORT SnowballFilter: public TokenFilter { |
40 |
struct sb_stemmer * stemmer; |
41 |
public: |
42 |
|
43 |
/** Construct the named stemming filter. |
44 |
* |
45 |
* @param in the input tokens to stem |
46 |
* @param name the name of a stemmer |
47 |
*/ |
48 |
SnowballFilter(TokenStream* in, const char* language, bool deleteTS); |
49 |
|
50 |
~SnowballFilter(); |
51 |
|
52 |
/** Returns the next input Token, after being stemmed */ |
53 |
Token* next(Token* token); |
54 |
}; |
55 |
|
56 |
CL_NS_END2 |
57 |
#endif |
58 |
|
59 |
|
60 |
|
61 |
1.1 net-mail/dovecot/files/SnowballAnalyzer.h |
62 |
|
63 |
file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/net-mail/dovecot/files/SnowballAnalyzer.h?rev=1.1&view=markup |
64 |
plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/net-mail/dovecot/files/SnowballAnalyzer.h?rev=1.1&content-type=text/plain |
65 |
|
66 |
Index: SnowballAnalyzer.h |
67 |
=================================================================== |
68 |
/*------------------------------------------------------------------------------ |
69 |
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team |
70 |
* |
71 |
* Distributable under the terms of either the Apache License (Version 2.0) or |
72 |
* the GNU Lesser General Public License, as specified in the COPYING file. |
73 |
------------------------------------------------------------------------------*/ |
74 |
#ifndef _lucene_analysis_snowball_analyser_ |
75 |
#define _lucene_analysis_snowball_analyser_ |
76 |
|
77 |
#include "CLucene/analysis/AnalysisHeader.h" |
78 |
|
79 |
CL_CLASS_DEF(util,BufferedReader) |
80 |
CL_NS_DEF2(analysis,snowball) |
81 |
|
82 |
/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link |
83 |
* LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}. |
84 |
* |
85 |
* Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a |
86 |
* stemmer is the part of the class name before "Stemmer", e.g., the stemmer in |
87 |
* {@link EnglishStemmer} is named "English". |
88 |
*/ |
89 |
class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer { |
90 |
char* language; |
91 |
CLTCSetList* stopSet; |
92 |
TokenStream *prevstream; |
93 |
|
94 |
public: |
95 |
/** Builds the named analyzer with no stop words. */ |
96 |
SnowballAnalyzer(const char* language="english"); |
97 |
|
98 |
/** Builds the named analyzer with the given stop words. |
99 |
*/ |
100 |
SnowballAnalyzer(const char* language, const TCHAR** stopWords); |
101 |
|
102 |
~SnowballAnalyzer(); |
103 |
|
104 |
/** Constructs a {@link StandardTokenizer} filtered by a {@link |
105 |
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ |
106 |
TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); |
107 |
TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader); |
108 |
TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); |
109 |
}; |
110 |
|
111 |
CL_NS_END2 |
112 |
#endif |