@AnalyzerDef(
name = "tags",
tokenizer =
@TokenizerDef(factory = PatternTokenizerFactory.class,
params = {
@Parameter(name = "pattern", value=",")
}
),
filters = {
@TokenFilterDef(factory = StandardFilterFactory.class),
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(
factory = StopFilterFactory.class,
params = {
@Parameter(name = "words", value = "data/ignorewords.txt"),
@Parameter(name = "ignoreCase", value = "true")
}
),
@TokenFilterDef(
factory = SynonymFilterFactory.class,
params = {
@Parameter(name = "ignoreCase", value="true"),
@Parameter(name = "expand", value="false"),
@Parameter(name = "synonyms", value="data/synonyms.txt")
}
),
@TokenFilterDef(
factory = SnowballPorterFilterFactory.class,
params = {
@Parameter(name = "language", value="English")
}
),
@TokenFilterDef(
factory = ShingleFilterFactory.class,
params = {
@Parameter(name = "minShingleSize", value="2"),
@Parameter(name = "maxShingleSize", value="3"),
@Parameter(name = "outputUnigrams", value="true"),
@Parameter(name = "outputUnigramsIfNoShingles", value="false")
}
),
@TokenFilterDef(
factory = PositionFilterFactory.class,
params = {
@Parameter(name = "positionIncrement", value = "100")
}
),
@TokenFilterDef(
factory = PhoneticFilterFactory.class,
params = {
@Parameter(name = "encoder", value="RefinedSoundex"),
@Parameter(name = "inject", value="true")
}
)
}
),
@AnalyzerDef(
name = "querytime",
tokenizer =
@TokenizerDef(factory = StandardTokenizerFactory.class),
filters = {
@TokenFilterDef(factory = StandardFilterFactory.class),
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = StopFilterFactory.class,
params = {
@Parameter(name = "words", value = "data/ignorewords.txt"),
@Parameter(name = "ignoreCase", value = "true")
}
),
@TokenFilterDef(
factory = SnowballPorterFilterFactory.class,
params = {
@Parameter(name = "language", value="English")
}
),
@TokenFilterDef(
factory = ShingleFilterFactory.class,
params = {
@Parameter(name = "minShingleSize", value="2"),
@Parameter(name = "maxShingleSize", value="3"),
@Parameter(name = "outputUnigrams", value="true"),
@Parameter(name = "outputUnigramsIfNoShingles", value="false")
}
),
@TokenFilterDef(
factory = PositionFilterFactory.class,
params = {
@Parameter(name = "positionIncrement", value = "100")
}
),
@TokenFilterDef(
factory = PhoneticFilterFactory.class,
params = {
@Parameter(name = "encoder", value="RefinedSoundex"),
@Parameter(name = "inject", value="true")
}
)
}
)
})
Above is my Analyzer Definition where i am using PatternTokenizerFactory at index time to index phrase with multiple words.
And another one with StandardTokenizerFactory with ShingleFilterFactory to be used at querytime but however I am not able to receive the combinations of tokens from my search query.
What i was expecting was : when the search query is My Search Query, it should have been my search and search query but instead i am getting my, search and query
Below is my function
FullTextSession fullTextSession = Search.getFullTextSession(session);
Transaction tx = fullTextSession.beginTransaction();
// create native Lucene query
org.apache.lucene.search.Query luceneQuery = null;
String[] fields = new String[] {"tags"};
MultiFieldQueryParser parser = new MultiFieldQueryParser(
Version.LUCENE_31, fields, fullTextSession.getSearchFactory().getAnalyzer("querytime"));
try {
luceneQuery = parser.parse(searchQuery);
} catch (ParseException e) {
e.printStackTrace();
}
// wrap Lucene query in a org.hibernate.Query
org.hibernate.Query hibQuery = fullTextSession.createFullTextQuery(luceneQuery, CityArea.class);
// execute search
List result = hibQuery.list();
tx.commit();
session.close();
return result;
The problem is my index token are for ex: say Word A, Word B and i am search for where is word a (so that it will display record of Word A) but no result. This is because i don’t want to display any result until the search query contains the phrase that is indexed.
No answer from anybody lead myself to dig into the problem and found the answer myself. Since it may help others, i am writing it here and the solutions is pretty simple, just need to wrap the searchQuery into quotation marks.