| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | package org.topazproject.ambra.search2.service; |
|---|
| 21 | |
|---|
| 22 | import org.apache.commons.configuration.Configuration; |
|---|
| 23 | import org.apache.commons.lang.StringUtils; |
|---|
| 24 | import org.apache.solr.client.solrj.SolrQuery; |
|---|
| 25 | import org.apache.solr.client.solrj.SolrServerException; |
|---|
| 26 | import org.apache.solr.client.solrj.response.QueryResponse; |
|---|
| 27 | import org.apache.solr.common.SolrDocument; |
|---|
| 28 | import org.apache.solr.common.SolrDocumentList; |
|---|
| 29 | import org.slf4j.Logger; |
|---|
| 30 | import org.slf4j.LoggerFactory; |
|---|
| 31 | import org.topazproject.ambra.ApplicationException; |
|---|
| 32 | import org.topazproject.ambra.search2.SearchHit; |
|---|
| 33 | import org.topazproject.ambra.search2.SearchParameters; |
|---|
| 34 | import org.topazproject.ambra.search2.SearchResultSinglePage; |
|---|
| 35 | |
|---|
| 36 | import java.util.ArrayList; |
|---|
| 37 | import java.util.Collection; |
|---|
| 38 | import java.util.Date; |
|---|
| 39 | import java.util.List; |
|---|
| 40 | import java.util.Map; |
|---|
| 41 | |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | |
|---|
| 45 | |
|---|
| 46 | |
|---|
| 47 | |
|---|
| 48 | public class SolrSearchService implements SearchService { |
|---|
| 49 | private static final Logger log = LoggerFactory.getLogger(SolrSearchService.class); |
|---|
| 50 | |
|---|
| 51 | private SolrServerFactory serverFactory; |
|---|
| 52 | private int queryTimeout; |
|---|
| 53 | |
|---|
| 54 | private static final String HIGHLIGHT_FIELD = "body"; |
|---|
| 55 | |
|---|
| 56 | public void setConfiguration(Configuration configuration) { |
|---|
| 57 | queryTimeout = configuration.getInt("ambra.services.search.timeout", 60000); |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | public void setServerFactory(SolrServerFactory serverFactory) { |
|---|
| 61 | this.serverFactory = serverFactory; |
|---|
| 62 | } |
|---|
| 63 | |
|---|
| 64 | private SearchResultSinglePage search(SolrQuery query) throws ApplicationException { |
|---|
| 65 | |
|---|
| 66 | if (serverFactory.getServer() == null) { |
|---|
| 67 | throw new ApplicationException("Search server is not configured"); |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | log.debug("The submitted SolrQuery is: " + query); |
|---|
| 71 | |
|---|
| 72 | QueryResponse queryResponse; |
|---|
| 73 | try { |
|---|
| 74 | queryResponse = serverFactory.getServer().query(query); |
|---|
| 75 | } catch (SolrServerException e) { |
|---|
| 76 | log.error("Unable to execute a query on the Solr Server.", e); |
|---|
| 77 | throw new ApplicationException("Unable to execute a query on the Solr Server.", e); |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | return readQueryResults(queryResponse, query); |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | public SearchResultSinglePage simpleSearch(String query, int startPage, int pageSize) throws ApplicationException { |
|---|
| 84 | log.debug("Simple Search performed on the String: " + query |
|---|
| 85 | + " startPage: " + startPage |
|---|
| 86 | + " pageSize: " + pageSize); |
|---|
| 87 | |
|---|
| 88 | return search(createQuery(query, startPage, pageSize)); |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | private SolrQuery createQuery(String queryString, int startPage, int pageSize) { |
|---|
| 92 | SolrQuery query = new SolrQuery(queryString); |
|---|
| 93 | query.setTimeAllowed(queryTimeout); |
|---|
| 94 | query.setIncludeScore(true); |
|---|
| 95 | query.setHighlight(true); |
|---|
| 96 | query.setHighlightFragsize(50); |
|---|
| 97 | query.setHighlightSnippets(3); |
|---|
| 98 | query.setHighlightSimplePre("<span class=\"highlight\">"); |
|---|
| 99 | query.setHighlightSimplePost("</span>"); |
|---|
| 100 | query.set("hl.fl", HIGHLIGHT_FIELD); |
|---|
| 101 | query.set("hl.usePhraseHighlighter", true); |
|---|
| 102 | query.set("hl.highlightMultiTerm", true); |
|---|
| 103 | query.set("hl.mergeContiguous", true); |
|---|
| 104 | query.setStart(startPage * pageSize); |
|---|
| 105 | query.setRows(pageSize); |
|---|
| 106 | |
|---|
| 107 | query.setFields("doi", "score", "title", "publication_date", "eissn", "journal", "article_type", "author"); |
|---|
| 108 | return query; |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | public SearchResultSinglePage advancedSearch(SearchParameters searchParameters) throws ApplicationException { |
|---|
| 112 | log.debug("Advanced Search performed on the SearchParameters: " + searchParameters); |
|---|
| 113 | |
|---|
| 114 | SolrQuery query = createQuery(null, searchParameters.getStartPage(), searchParameters.getPageSize()); |
|---|
| 115 | |
|---|
| 116 | StringBuilder q = new StringBuilder(); |
|---|
| 117 | |
|---|
| 118 | |
|---|
| 119 | if (searchParameters.getCreator().length > 0 && StringUtils.isNotBlank(searchParameters.getCreator()[0])) { |
|---|
| 120 | q.append(" ( "); |
|---|
| 121 | for (int i = 0; i < searchParameters.getCreator().length; i++) { |
|---|
| 122 | String creatorName = searchParameters.getCreator()[i]; |
|---|
| 123 | if (StringUtils.isNotBlank(creatorName)) { |
|---|
| 124 | q.append(" author:").append(creatorName); |
|---|
| 125 | } |
|---|
| 126 | if (i < searchParameters.getCreator().length - 1 |
|---|
| 127 | && StringUtils.isNotBlank(searchParameters.getCreator()[i + 1])) { |
|---|
| 128 | if ("all".equals(searchParameters.getAuthorNameOp())) { |
|---|
| 129 | q.append(" AND "); |
|---|
| 130 | } else { |
|---|
| 131 | q.append(" OR "); |
|---|
| 132 | } |
|---|
| 133 | } |
|---|
| 134 | } |
|---|
| 135 | q.append(" ) "); |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | |
|---|
| 139 | if (searchParameters.getTextSearchAtLeastOne().trim().length() > 0) { |
|---|
| 140 | q.append(" AND ").append( |
|---|
| 141 | addFields(" OR ", searchParameters.getTextSearchOption(), searchParameters.getTextSearchAtLeastOne().trim()) |
|---|
| 142 | ); |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | |
|---|
| 146 | if (searchParameters.getTextSearchAll().trim().length() > 0) { |
|---|
| 147 | q.append(" AND ").append( |
|---|
| 148 | addFields(" AND ", searchParameters.getTextSearchOption(), searchParameters.getTextSearchAll().trim()) |
|---|
| 149 | ); |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | |
|---|
| 153 | if (searchParameters.getTextSearchExactPhrase().trim().length() > 0) { |
|---|
| 154 | q.append(" AND (\"").append(searchParameters.getTextSearchExactPhrase().trim()).append("\")"); |
|---|
| 155 | } |
|---|
| 156 | |
|---|
| 157 | |
|---|
| 158 | |
|---|
| 159 | |
|---|
| 160 | |
|---|
| 161 | if (q.length() < 1) { |
|---|
| 162 | throw new ApplicationException("Please enter one or more search terms. "); |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | |
|---|
| 166 | |
|---|
| 167 | addFilter(q, searchParameters.getJournalOpt(), searchParameters.getLimitToJournal(), " eissn:"); |
|---|
| 168 | |
|---|
| 169 | |
|---|
| 170 | |
|---|
| 171 | addFilter(q, searchParameters.getSubjectCatOpt(), searchParameters.getLimitToCategory(), " subject:"); |
|---|
| 172 | |
|---|
| 173 | if (q.indexOf(" AND ") == 0) |
|---|
| 174 | q.replace(0, 4, ""); |
|---|
| 175 | |
|---|
| 176 | return search(query.setQuery(q.toString())); |
|---|
| 177 | } |
|---|
| 178 | |
|---|
| 179 | private void addFilter(StringBuilder q, String option, String[] limits, String field) { |
|---|
| 180 | if ("some".equals(option) && limits.length > 0) { |
|---|
| 181 | q.append(" AND ("); |
|---|
| 182 | for (String limit : limits) { |
|---|
| 183 | q.append(field).append(limit).append(" OR"); |
|---|
| 184 | } |
|---|
| 185 | q.replace(q.length() - 3, q.length(), " ) "); |
|---|
| 186 | } |
|---|
| 187 | } |
|---|
| 188 | |
|---|
| 189 | private StringBuilder addFields(String operation, String textSearchOption, String searchString) { |
|---|
| 190 | StringBuilder sb = new StringBuilder(); |
|---|
| 191 | for(String token : searchString.split(" ")) { |
|---|
| 192 | |
|---|
| 193 | if(sb.length() == 0) { |
|---|
| 194 | sb.append(" ( "); |
|---|
| 195 | } else { |
|---|
| 196 | sb.append(operation); |
|---|
| 197 | } |
|---|
| 198 | |
|---|
| 199 | if ("abstract".equals(textSearchOption)) { |
|---|
| 200 | sb.append("abstract:").append(token); |
|---|
| 201 | } else if ("refs".equals(textSearchOption)) { |
|---|
| 202 | sb.append("citation:").append(token); |
|---|
| 203 | } else if ("title".equals(textSearchOption)) { |
|---|
| 204 | sb.append("title:").append(token); |
|---|
| 205 | } else { |
|---|
| 206 | sb.append(token); |
|---|
| 207 | } |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | if (sb.length() > 0) { |
|---|
| 211 | sb.append(") "); |
|---|
| 212 | } |
|---|
| 213 | |
|---|
| 214 | return sb; |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | private SearchResultSinglePage readQueryResults(QueryResponse queryResponse, SolrQuery query) { |
|---|
| 218 | SolrDocumentList documentList = queryResponse.getResults(); |
|---|
| 219 | |
|---|
| 220 | log.info(" *** query.getQuery():{ " + query.getQuery() + " }" |
|---|
| 221 | + ", found:" + documentList.getNumFound() |
|---|
| 222 | + ", start:" + documentList.getStart() |
|---|
| 223 | + ", max_score:" + documentList.getMaxScore() |
|---|
| 224 | + ", QTime:" + queryResponse.getQTime() + "ms"); |
|---|
| 225 | |
|---|
| 226 | Map<String, Map<String, List<String>>> highlightings = queryResponse.getHighlighting(); |
|---|
| 227 | |
|---|
| 228 | List<SearchHit> searchResults = new ArrayList<SearchHit>(); |
|---|
| 229 | for (SolrDocument document : documentList) { |
|---|
| 230 | |
|---|
| 231 | String doi = getFieldValue(document, "doi", String.class, query.toString()); |
|---|
| 232 | String message = doi == null ? query.toString() : doi; |
|---|
| 233 | Float score = getFieldValue(document, "score", Float.class, message); |
|---|
| 234 | String title = getFieldValue(document, "title", String.class, message); |
|---|
| 235 | Date publicationDate = getFieldValue(document, "publication_date", Date.class, message); |
|---|
| 236 | String eissn = getFieldValue(document, "eissn", String.class, message); |
|---|
| 237 | String journal = getFieldValue(document, "journal", String.class, message); |
|---|
| 238 | String articleType = getFieldValue(document, "article_type", String.class, message); |
|---|
| 239 | |
|---|
| 240 | List<String> authorList = getFieldMultiValue(document, message, String.class, "author"); |
|---|
| 241 | |
|---|
| 242 | String highlights = null; |
|---|
| 243 | if (query.getHighlight()) { |
|---|
| 244 | highlights = getHighlights(highlightings.get(doi)); |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | |
|---|
| 248 | SearchHit hit = new SearchHit( |
|---|
| 249 | score, doi, title, highlights, authorList, publicationDate, eissn, journal, articleType); |
|---|
| 250 | |
|---|
| 251 | log.debug(hit.toString()); |
|---|
| 252 | |
|---|
| 253 | searchResults.add(hit); |
|---|
| 254 | } |
|---|
| 255 | |
|---|
| 256 | |
|---|
| 257 | return new SearchResultSinglePage((int) documentList.getNumFound(), -1, searchResults); |
|---|
| 258 | } |
|---|
| 259 | |
|---|
| 260 | private <T> T getFieldValue(SolrDocument document, String fieldName, Class<T> type, String message) { |
|---|
| 261 | Object value = document.getFieldValue(fieldName); |
|---|
| 262 | if (value != null) { |
|---|
| 263 | if (type.isInstance(value)) { |
|---|
| 264 | return type.cast(value); |
|---|
| 265 | } else { |
|---|
| 266 | log.error("Field " + fieldName + " is not of type " + type.getName() + " for " + message); |
|---|
| 267 | } |
|---|
| 268 | } else { |
|---|
| 269 | log.warn("No \'" + fieldName + "\' field for " + message); |
|---|
| 270 | } |
|---|
| 271 | |
|---|
| 272 | return null; |
|---|
| 273 | } |
|---|
| 274 | |
|---|
| 275 | private <T> List<T> getFieldMultiValue(SolrDocument document, String message, Class<T> type, String fieldName) { |
|---|
| 276 | List<T> authorList = new ArrayList<T>(); |
|---|
| 277 | Object authors = document.getFieldValue(fieldName); |
|---|
| 278 | if (authors != null) { |
|---|
| 279 | if (authors instanceof Collection) { |
|---|
| 280 | authorList.addAll((Collection<T>) authors); |
|---|
| 281 | } else { |
|---|
| 282 | T value = getFieldValue(document, fieldName, type, message); |
|---|
| 283 | if (value != null) { |
|---|
| 284 | authorList.add(value); |
|---|
| 285 | } |
|---|
| 286 | } |
|---|
| 287 | } else { |
|---|
| 288 | log.warn("No \'" + fieldName + "\' field for " + message); |
|---|
| 289 | } |
|---|
| 290 | return authorList; |
|---|
| 291 | } |
|---|
| 292 | |
|---|
| 293 | |
|---|
| 294 | private String getHighlights(Map<String, List<String>> articleHighlights) { |
|---|
| 295 | String hitHighlights = null; |
|---|
| 296 | if (articleHighlights != null) { |
|---|
| 297 | List<String> snippets = articleHighlights.get(HIGHLIGHT_FIELD); |
|---|
| 298 | if (snippets != null && snippets.size() > 0) { |
|---|
| 299 | StringBuilder sb = new StringBuilder(); |
|---|
| 300 | for (String snippet : snippets) { |
|---|
| 301 | if (sb.length() > 0) { |
|---|
| 302 | sb.append(" ... "); |
|---|
| 303 | } |
|---|
| 304 | sb.append(snippet); |
|---|
| 305 | } |
|---|
| 306 | |
|---|
| 307 | hitHighlights = sb.toString(); |
|---|
| 308 | } |
|---|
| 309 | } |
|---|
| 310 | return hitHighlights; |
|---|
| 311 | } |
|---|
| 312 | } |
|---|