root/head/ambra/webapp/src/main/java/org/topazproject/ambra/search2/service/SolrSearchService.java @ 8218

Revision 8218, 11.4 KB (checked in by dragisak, 7 months ago)

Search2:

  • Remove unused field
  • Reduce code duplication
  • Add fixme comment to use filters
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id HeadURL Revision
Line 
1/*
2 * $HeadURL$
3 * $Id$
4 *
5 * Copyright (c) 2006-2010 by Topaz, Inc.
6 * http://topazproject.org
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *     http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20package org.topazproject.ambra.search2.service;
21
22import org.apache.commons.configuration.Configuration;
23import org.apache.commons.lang.StringUtils;
24import org.apache.solr.client.solrj.SolrQuery;
25import org.apache.solr.client.solrj.SolrServerException;
26import org.apache.solr.client.solrj.response.QueryResponse;
27import org.apache.solr.common.SolrDocument;
28import org.apache.solr.common.SolrDocumentList;
29import org.slf4j.Logger;
30import org.slf4j.LoggerFactory;
31import org.topazproject.ambra.ApplicationException;
32import org.topazproject.ambra.search2.SearchHit;
33import org.topazproject.ambra.search2.SearchParameters;
34import org.topazproject.ambra.search2.SearchResultSinglePage;
35
36import java.util.ArrayList;
37import java.util.Collection;
38import java.util.Date;
39import java.util.List;
40import java.util.Map;
41
42/**
43 * Service to provide search capabilities for the application.
44 *
45 * @author Scott Sterling
46 * @author Dragisa Krsmanovic
47 */
48public class SolrSearchService implements SearchService {
49  private static final Logger log = LoggerFactory.getLogger(SolrSearchService.class);
50
51  private SolrServerFactory serverFactory;
52  private int queryTimeout;
53
54  private static final String HIGHLIGHT_FIELD = "body"; //   TODO: set this value from a config file.
55
56  public void setConfiguration(Configuration configuration) {
57    queryTimeout = configuration.getInt("ambra.services.search.timeout", 60000); // default to 1 min
58  }
59
60  public void setServerFactory(SolrServerFactory serverFactory) {
61    this.serverFactory = serverFactory;
62  }
63
64  private SearchResultSinglePage search(SolrQuery query) throws ApplicationException {
65
66    if (serverFactory.getServer() == null) {
67      throw new ApplicationException("Search server is not configured");
68    }
69
70    log.debug("The submitted SolrQuery is: " + query);
71
72    QueryResponse queryResponse;
73    try {
74      queryResponse = serverFactory.getServer().query(query);
75    } catch (SolrServerException e) {
76      log.error("Unable to execute a query on the Solr Server.", e);
77      throw new ApplicationException("Unable to execute a query on the Solr Server.", e);
78    }
79
80    return readQueryResults(queryResponse, query);
81  }
82
83  public SearchResultSinglePage simpleSearch(String query, int startPage, int pageSize) throws ApplicationException {
84    log.debug("Simple Search performed on the String: " + query
85        + " startPage: " + startPage
86        + " pageSize: " + pageSize);
87
88    return search(createQuery(query, startPage, pageSize));
89  }
90
91  private SolrQuery createQuery(String queryString, int startPage, int pageSize) {
92    SolrQuery query = new SolrQuery(queryString);
93    query.setTimeAllowed(queryTimeout);
94    query.setIncludeScore(true);
95    query.setHighlight(true);
96    query.setHighlightFragsize(50);
97    query.setHighlightSnippets(3);
98    query.setHighlightSimplePre("<span class=\"highlight\">");
99    query.setHighlightSimplePost("</span>");
100    query.set("hl.fl", HIGHLIGHT_FIELD);
101    query.set("hl.usePhraseHighlighter", true);
102    query.set("hl.highlightMultiTerm", true);
103    query.set("hl.mergeContiguous", true);
104    query.setStart(startPage * pageSize);
105    query.setRows(pageSize);
106    // request only fields that we need to display
107    query.setFields("doi", "score", "title", "publication_date", "eissn", "journal", "article_type", "author");
108    return query;
109  }
110
111  public SearchResultSinglePage advancedSearch(SearchParameters searchParameters) throws ApplicationException {
112    log.debug("Advanced Search performed on the SearchParameters: " + searchParameters);
113
114    SolrQuery query = createQuery(null, searchParameters.getStartPage(), searchParameters.getPageSize());
115
116    StringBuilder q = new StringBuilder(); // ALWAYS starts with " AND", which is stripped off.
117
118    // Form field description: "Author Name:"
119    if (searchParameters.getCreator().length > 0 && StringUtils.isNotBlank(searchParameters.getCreator()[0])) {
120      q.append(" ( ");
121      for (int i = 0; i < searchParameters.getCreator().length; i++) {
122        String creatorName = searchParameters.getCreator()[i];
123        if (StringUtils.isNotBlank(creatorName)) {
124          q.append(" author:").append(creatorName);
125        }
126        if (i < searchParameters.getCreator().length - 1
127            && StringUtils.isNotBlank(searchParameters.getCreator()[i + 1])) {
128          if ("all".equals(searchParameters.getAuthorNameOp())) {
129            q.append(" AND ");
130          } else {
131            q.append(" OR ");
132          }
133        }
134      }
135      q.append(" ) ");
136    }
137
138    // Form field description: "for at least one of the words:"
139    if (searchParameters.getTextSearchAtLeastOne().trim().length() > 0) {
140      q.append(" AND ").append(
141          addFields(" OR ", searchParameters.getTextSearchOption(), searchParameters.getTextSearchAtLeastOne().trim())
142      );
143    }
144
145    // Form field description: "for all the words:"
146    if (searchParameters.getTextSearchAll().trim().length() > 0) {
147      q.append(" AND ").append(
148          addFields(" AND ", searchParameters.getTextSearchOption(), searchParameters.getTextSearchAll().trim())
149      );
150    }
151
152    // Form field description: "for the exact phrase:"
153    if (searchParameters.getTextSearchExactPhrase().trim().length() > 0) {
154      q.append(" AND (\"").append(searchParameters.getTextSearchExactPhrase().trim()).append("\")");
155    }
156
157    // Form field description: "without the words:"
158
159
160    // If there is no query at this point, then there is nothing to search on, so throw exception.
161    if (q.length() < 1) {
162      throw new ApplicationException("Please enter one or more search terms. ");
163    }
164
165    // Form field description: "Journals"
166    // FIXME: For performance, this should be done with filters not by concatenating "AND" clause.
167    addFilter(q, searchParameters.getJournalOpt(), searchParameters.getLimitToJournal(), " eissn:");
168
169    // Form field description: "Subject Categories"
170    // FIXME: For performance, this should be done with filters not by concatenating "AND" clause.
171    addFilter(q, searchParameters.getSubjectCatOpt(), searchParameters.getLimitToCategory(), " subject:");
172
173    if (q.indexOf(" AND ") == 0)
174      q.replace(0, 4, ""); // Remove the preceding " AND ".
175
176    return search(query.setQuery(q.toString()));
177  }
178
179  private void addFilter(StringBuilder q, String option, String[] limits, String field) {
180    if ("some".equals(option) && limits.length > 0) { // Option "all" does not modify the query.
181      q.append(" AND (");
182      for (String limit : limits) {
183        q.append(field).append(limit).append(" OR");
184      }
185      q.replace(q.length() - 3, q.length(), " ) "); // Remove last "OR". Add closing parenthesis.
186    }
187  }
188
189  private StringBuilder addFields(String operation, String textSearchOption, String searchString) {
190    StringBuilder sb = new StringBuilder();
191    for(String token : searchString.split(" ")) {
192
193      if(sb.length() == 0) {
194        sb.append(" ( ");
195      } else {
196        sb.append(operation);
197      }
198
199      if ("abstract".equals(textSearchOption)) {
200        sb.append("abstract:").append(token);
201      } else if ("refs".equals(textSearchOption)) {
202        sb.append("citation:").append(token);
203      } else if ("title".equals(textSearchOption)) {
204        sb.append("title:").append(token);
205      } else {
206        sb.append(token);
207      }
208    }
209
210    if (sb.length() > 0) {
211      sb.append(") ");
212    }
213
214    return sb;
215  }
216
217  private SearchResultSinglePage readQueryResults(QueryResponse queryResponse, SolrQuery query) {
218    SolrDocumentList documentList = queryResponse.getResults();
219
220    log.info("  ***  query.getQuery():{ " + query.getQuery() + " }"
221        + ", found:" + documentList.getNumFound()
222        + ", start:" + documentList.getStart()
223        + ", max_score:" + documentList.getMaxScore()
224        + ", QTime:" + queryResponse.getQTime() + "ms");
225
226    Map<String, Map<String, List<String>>> highlightings = queryResponse.getHighlighting();
227
228    List<SearchHit> searchResults = new ArrayList<SearchHit>();
229    for (SolrDocument document : documentList) {
230
231      String doi = getFieldValue(document, "doi", String.class, query.toString());
232      String message = doi == null ? query.toString() : doi;
233      Float score = getFieldValue(document, "score", Float.class, message);
234      String title = getFieldValue(document, "title", String.class, message);
235      Date publicationDate = getFieldValue(document, "publication_date", Date.class, message);
236      String eissn = getFieldValue(document, "eissn", String.class, message);
237      String journal = getFieldValue(document, "journal", String.class, message);
238      String articleType = getFieldValue(document, "article_type", String.class, message);
239
240      List<String> authorList = getFieldMultiValue(document, message, String.class, "author");
241
242      String highlights = null;
243      if (query.getHighlight()) {
244        highlights = getHighlights(highlightings.get(doi));
245      }
246
247
248      SearchHit hit = new SearchHit(
249          score, doi, title, highlights, authorList, publicationDate, eissn, journal, articleType);
250
251      log.debug(hit.toString());
252
253      searchResults.add(hit);
254    }
255
256    // here we assume that number of hits is always going to be withing range of int
257    return new SearchResultSinglePage((int) documentList.getNumFound(), -1, searchResults);
258  }
259
260  private <T> T getFieldValue(SolrDocument document, String fieldName, Class<T> type, String message) {
261    Object value = document.getFieldValue(fieldName);
262    if (value != null) {
263      if (type.isInstance(value)) {
264        return type.cast(value);
265      } else {
266        log.error("Field " + fieldName + " is not of type " + type.getName() + " for " + message);
267      }
268    } else {
269      log.warn("No \'" + fieldName + "\' field for " + message);
270    }
271
272    return null;
273  }
274
275  private <T> List<T> getFieldMultiValue(SolrDocument document, String message, Class<T> type, String fieldName) {
276    List<T> authorList = new ArrayList<T>();
277    Object authors = document.getFieldValue(fieldName);
278    if (authors != null) {
279      if (authors instanceof Collection) {
280        authorList.addAll((Collection<T>) authors);
281      } else {
282        T value = getFieldValue(document, fieldName, type, message);
283        if (value != null) {
284          authorList.add(value);
285        }
286      }
287    } else {
288      log.warn("No \'" + fieldName + "\' field for " + message);
289    }
290    return authorList;
291  }
292
293
294  private String getHighlights(Map<String, List<String>> articleHighlights) {
295    String hitHighlights = null;
296    if (articleHighlights != null) {
297      List<String> snippets = articleHighlights.get(HIGHLIGHT_FIELD);
298      if (snippets != null && snippets.size() > 0) {
299        StringBuilder sb = new StringBuilder();
300        for (String snippet : snippets) {
301          if (sb.length() > 0) {
302            sb.append(" ... ");
303          }
304          sb.append(snippet);
305        }
306
307        hitHighlights = sb.toString();
308      }
309    }
310    return hitHighlights;
311  }
312}
Note: See TracBrowser for help on using the browser.