When configuring Solr in non-english languages, in this case in Portuguese, one usually wants:
- words split by white spaces;
- word case insensitive match;
- stop words removal;
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <!-- in this example, we will only use synonyms at query time <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> --> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.SnowballPorterFilterFactory" language="Portuguese" /> <filter class="solr.ASCIIFoldingFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.SnowballPorterFilterFactory" language="Portuguese" /> <filter class="solr.ASCIIFoldingFilterFactory"/> </analyzer> </fieldType>