Lucene provides a rich query syntax allowing users to easily create simple queries and scale up to complex ones. A number of search back-ends are built on top of Lucene nowadays such as ElasticSearch and Solr. In this article we will explore using Lucene’s query syntax against relational databases.

We used Groovy here, but it would be straightforward to convert this to Java. For the purposes of keeping things simple, we will work with two main classes as shown below.

import java.text.SimpleDateFormat;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;

/**
 * Defines base logic for building native queries from a Lucene based query.
 */
abstract class QueryBuilder
{
    Map<String, String> fields = [:];

    /**
     * Builds a native query String from a Lucene based query.
     * 
     * @param pDefaultField default field name
     * @param pQuery Lucene query string
     * @return String
     */
    public String build(String pDefaultField, String pQuery)
    {
        QueryParser parser = new QueryParser(Version.LUCENE_30, pDefaultField, new WhitespaceAnalyzer());
        parser.dateResolution = DateTools.Resolution.DAY;

        return build(parser.parse(pQuery));
    }

    /**
     * Builds a native query String from a MultiTermQuery.
     * 
     * @param pQuery MultiTermQuery
     * @return String
     */
    protected abstract String buildQuery(MultiTermQuery pQuery);

    /**
     * Builds a native query String from a TermQuery.
     * 
     * @param pQuery TermQuery
     * @return String
     */
    protected abstract String buildQuery(TermQuery pQuery);    

    /**
     * Builds a native query String from a TermRangeQuery.
     * 
     * @param pQuery
     * @return
     */
    protected abstract String buildQuery(TermRangeQuery pQuery);

    /**
     * Builds a query String from a BooleanQuery.
     * 
     * @param pQuery BooleanQuery
     * @return String
     */    
    protected String build(BooleanQuery pQuery)
    {
        int x = 0;
        String query = "";

        pQuery.clauses().each(
            {
                String segment = build(it.query);

                if (segment)
                {              
                    if (x++ != 0)
                    {
                        query += it.isRequired() ? " AND " : " OR ";
                    }

                    // Add paren's to inner boolean queries
                    if (it.query instanceof BooleanQuery)
                    {
                        segment = "($segment)";
                    }
                    query += segment;
                }
            });

        return query;
    }    

    /**
     * Builds a query string from a FuzzyQuery.
     * 
     * @param pQuery MultiTermQuery
     * @return String
     */
    protected String build(FuzzyQuery pQuery)
    {
        Term term = getTerm(pQuery.term);
        if (term)
        {
            return buildQuery(new FuzzyQuery(term, pQuery.minSimilarity, pQuery.prefixLength));
        }

        return null;
    }

    /**
     * Builds a query String from a PhraseQuery.
     * 
     * @param pQuery PhraseQuery
     * @return String
     */    
    protected String build(PhraseQuery pQuery)
    {
        int x = 0;
        String field;
        String text = "";
        Query query;

        // Join all tokens into a single token
        pQuery.terms.each(
            { 
                if (x == 0)
                {
                    field = it.field;
                }
                if (x++ > 0)
                {
                    text += " ";
                }

                text += it.text;
            });

        if (text.find(/\*|\?/))
        {
            query = new WildcardQuery(new Term(field, text));
        }
        else
        {
            query = new TermQuery(new Term(field, text));
        }

        return build(query);
    }    

    /**
     * Builds a query string from a PrefixQuery.
     * 
     * @param pQuery WildcardQuery
     * @return String
     */    
    protected String build(PrefixQuery pQuery)
    {
        String field = getField(pQuery.prefix.field);
        if (field)
        {
            return buildQuery(new WildcardQuery(new Term(field, "${pQuery.prefix.text}*")));
        }

        return null;
    }

    /**
     * Builds a query string from a TermQuery.
     * 
     * @param pQuery TermQuery
     * @return String
     */
    protected String build(TermQuery pQuery)
    {
        Term term = getTerm(pQuery.term);
        if (term)
        {
            return buildQuery(new TermQuery(term));
        }

        return null;
    }    

    /**
     * Builds a query string from a TermRangeQuery.
     * 
     * @param pQuery TermRangeQuery
     * @return String
     */
    protected String build(TermRangeQuery pQuery)
    {
        String field = getField(pQuery.field);
        if (field)
        {
            return buildQuery(new TermRangeQuery(field, pQuery.lowerTerm, pQuery.upperTerm, 
                pQuery.includesLower(), pQuery.includesUpper(), pQuery.collator));
        }

        return null;
    }    

    /**
     * Builds a query string from a WildcardQuery.
     * 
     * @param pQuery WildcardQuery
     * @return String
     */
    protected String build(WildcardQuery pQuery)
    {
        Term term = getTerm(pQuery.term);
        if (term)
        {
            return buildQuery(new WildcardQuery(term));
        }

        return null;
    }    

    /**
     * Builds a new Term item, looking up the true field name from the field mappings.
     * 
     * @param pTerm Term
     * @return Term
     */
    protected Term getTerm(Term pTerm)
    {
        String field = getField(pTerm.field);            
        if (field)
        {
            return new Term(field, pTerm.text);
        }

        return null;
    }

    /**
     * Looks up true field name.
     * 
     * @param pName name
     * @return String
     */
    protected String getField(String pName)
    {
        return fields[pName] ?: pName;
    }

    /**
     * Attempts to parse a Date from a query value.
     * 
     * @param pText text to parse
     * @return Date
     */
    protected Date parseDate(String pText)
    {
        def formats = ['yyyyMMddHHmm', 'yyyyMMdd', 'dd-MMM-yyyy', 'dd-MMM-yyyy HHmm'];

        for (String format : formats)
        {
            try
            {
                return new SimpleDateFormat(format).parse(pText);
            }
            catch (Exception ex) 
            {
                // Ignore
            }
        }

        return null;
    }

    /**
     * Format a date using the standard date format.
     * 
     * @param pDate date to format
     * @return String
     */
    protected String formatDate(Date pDate)
    {
        SimpleDateFormat format = new SimpleDateFormat("yyyyMMddHHmmss");
        return format.format(pDate);
    }
}

 

import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;

/**
 * Builds a SQL query from a Lucene query.
 */
class SqlQueryBuilder extends QueryBuilder
{
    /**
     * {@inheritDoc}
     */
    protected String buildQuery(MultiTermQuery pQuery)
    {
        return "${pQuery.term.field} LIKE '${pQuery.term.text.replace('*', '%')}'";
    }

    /**
     * {@inheritDoc}
     */
    protected String buildQuery(TermQuery pQuery)
    {
        return "${pQuery.term.field} = '${pQuery.term.text}'";
    }

    /**
     * {@inheritDoc}
     */
    protected String buildQuery(TermRangeQuery pQuery)
    {
        String lowerTerm = formatTerm(pQuery.lowerTerm);
        String upperTerm = formatTerm(pQuery.upperTerm);

        String query = "(${pQuery.field} > ${pQuery.includesLower() ? '=' : ''} ${lowerTerm} AND ";
        query += "${pQuery.field} < ${pQuery.includesUpper() ? '=' : ''} ${upperTerm})";

        return query;
    }

    /**
     * Formats a term as either a text or date query.
     * 
     * @param pTerm term
     * @return String
     */
    protected String formatTerm(String pTerm)
    {
        String term = "'$pTerm'";

        Date date = parseDate(pTerm);
        if (date)
        {
            term = '${formatDate(date)}';
        }

        return term;
    }
}

In the code above, QueryBuilder is the base class where Lucene query segments are processed. For each segment type, an abstract method is defined. QueryBuilder also allows defining field name mapping. This allows your Lucene field names to be different from your SQL column names. SqlQueryBuilder extends this class, providing the actual conversion to SQL.

An example of running this code is shown with the following script

// Create QueryBuilder and field translations
QueryBuilder builder = new SqlQueryBuilder(fields: ["id":"post_id", "author":"post_author", "title":"post_title"])

// post_id = 1344
println builder.build("id", "1344")

// post_author LIKE 'john smi%' AND (post_id='3443444' OR post_title = 'Top News Story')
println builder.build("id", "author: \"john smi*\" AND (id: 3443444 title: \"Top News Story\")")

 

To build and run this code:

$GROOVY_HOME/bin/groovyc -cp lucene-core-3.3.0.jar QueryBuilder.groovy SqlQueryBuilder.groovy TestQuery.groovy

$GROOVY_HOME/bin/groovy -cp lucene-core-3.3.0.jar:. TestQuery

In the samples above the output from SqlQueryBuilder would create the WHEREclause. This would be joined with a standard SQL SELECT statement to form the complete query to execute.