Changeset 8267

Show
Ignore:
Timestamp:
03/05/10 13:19:55 (5 months ago)
Author:
ssterling
Message:

For Solr Search, always show the user the values they actually input. Changed the way that input is being filtered for special characters and words. Removed broken field weighting.

Addresses Job Order 16472

Location:
head/ambra/webapp/src/main/java/org/topazproject/ambra/search2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • head/ambra/webapp/src/main/java/org/topazproject/ambra/search2/SearchParameters.java

    r8264 r8267  
    313313  public void setPageSize(int pageSize) { 
    314314    this.pageSize = pageSize; 
     315  } 
     316 
     317  /** 
     318   * Creates a deep copy of this SearchParameters object. 
     319   * 
     320   * @return a deep copy of this SearchParameters object. 
     321   */ 
     322  public SearchParameters copy() { 
     323    SearchParameters sp = new SearchParameters(); 
     324    sp.setQuery(this.getQuery()); 
     325    sp.setTextSearchAll(this.getTextSearchAll()); 
     326    sp.setTextSearchAtLeastOne(this.getTextSearchAtLeastOne()); 
     327    sp.setTextSearchExactPhrase(this.getTextSearchExactPhrase()); 
     328    sp.setTextSearchWithout(this.getTextSearchWithout()); 
     329    sp.setTextSearchOption(this.getTextSearchOption()); 
     330    sp.setCreator(this.getCreator().clone()); 
     331    sp.setAuthorNameOp(this.getAuthorNameOp()); 
     332    sp.setSubjectCatOpt(this.getSubjectCatOpt()); 
     333    sp.setDateTypeSelect(this.getDateTypeSelect()); 
     334    if (this.getStartDate() == null) 
     335      sp.setStartDate(null); 
     336    else 
     337      sp.setStartDate((Date)this.getStartDate().clone()); 
     338    if (this.getEndDate() == null) 
     339      sp.setEndDate(null); 
     340    else 
     341      sp.setEndDate((Date)this.getEndDate().clone()); 
     342    sp.setLimitToCategory(this.getLimitToCategory().clone()); 
     343    sp.setJournalOpt(this.getJournalOpt()); 
     344    sp.setLimitToJournal(this.getLimitToJournal().clone()); 
     345    sp.setStartPage(this.getStartPage()); 
     346    sp.setPageSize(this.getPageSize()); 
     347    return sp; 
    315348  } 
    316349 
  • head/ambra/webapp/src/main/java/org/topazproject/ambra/search2/service/SolrSearchService.java

    r8265 r8267  
    6969   */ 
    7070  public SearchResultSinglePage simpleSearch(String queryString, String eissn, int startPage, int pageSize) throws ApplicationException { 
    71     queryString = queryString.replaceAll("AND", "").replaceAll("OR", ""); 
     71    queryString = cleanString(queryString); 
    7272    log.debug("Simple Search performed on the String: " + queryString 
    7373        + " startPage: " + startPage 
     
    8383 
    8484  public SearchResultSinglePage advancedSearch(SearchParameters searchParameters) throws ApplicationException { 
    85     searchParameters = cleanStrings(searchParameters); 
    86     log.debug("Advanced Search performed on the SearchParameters: " + searchParameters); 
    87  
    88     SolrQuery query = createQuery(null, searchParameters.getStartPage(), searchParameters.getPageSize()); 
     85    SearchParameters sp = cleanStrings(searchParameters); 
     86    log.debug("Advanced Search performed on the SearchParameters: " + sp); 
     87 
     88    SolrQuery query = createQuery(null, sp.getStartPage(), sp.getPageSize()); 
    8989 
    9090    // Set highlighting fields.  Defaults to field list in "highlightFieldsDefault" variable. 
    91     if ("abstract".equals(searchParameters.getTextSearchExactPhrase())) { 
     91    if ("abstract".equals(sp.getTextSearchExactPhrase())) { 
    9292      query.set("hl.fl", "abstract"); 
    93     } else if ("refs".equals(searchParameters.getTextSearchOption())) { 
     93    } else if ("refs".equals(sp.getTextSearchOption())) { 
    9494      query.set("hl.fl", "citation"); 
    95     } else if ("title".equals(searchParameters.getTextSearchOption())) { 
     95    } else if ("title".equals(sp.getTextSearchOption())) { 
    9696      query.set("hl.fl", "title"); 
    9797    } 
     
    100100 
    101101    // Form field description: "Author Name:" 
    102     if (searchParameters.getCreator().length > 0 && StringUtils.isNotBlank(searchParameters.getCreator()[0])) { 
     102    if (sp.getCreator().length > 0 && StringUtils.isNotBlank(sp.getCreator()[0])) { 
    103103      q.append(" ( "); 
    104       for (int i = 0; i < searchParameters.getCreator().length; i++) { 
    105         String creatorName = searchParameters.getCreator()[i]; 
     104      for (int i = 0; i < sp.getCreator().length; i++) { 
     105        String creatorName = sp.getCreator()[i]; 
    106106        if (StringUtils.isNotBlank(creatorName)) { 
    107107          q.append(" author:\"").append(creatorName).append("\""); 
    108108        } 
    109         if (i < searchParameters.getCreator().length - 1 
    110             && StringUtils.isNotBlank(searchParameters.getCreator()[i + 1])) { 
    111           if ("all".equals(searchParameters.getAuthorNameOp())) { 
     109        if (i < sp.getCreator().length - 1 
     110            && StringUtils.isNotBlank(sp.getCreator()[i + 1])) { 
     111          if ("all".equals(sp.getAuthorNameOp())) { 
    112112            q.append(" AND "); 
    113113          } else { 
     
    120120 
    121121    // Form field description: "for at least one of the words:" 
    122     if (searchParameters.getTextSearchAtLeastOne().trim().length() > 0) { 
     122    if (sp.getTextSearchAtLeastOne().trim().length() > 0) { 
    123123      q.append(" AND ").append( 
    124           addFields(" OR ", searchParameters.getTextSearchOption(), searchParameters.getTextSearchAtLeastOne().trim()) 
     124          addFields(" OR ", sp.getTextSearchOption(), sp.getTextSearchAtLeastOne().trim()) 
    125125      ); 
    126126    } 
    127127 
    128128    // Form field description: "for all the words:" 
    129     if (searchParameters.getTextSearchAll().trim().length() > 0) { 
     129    if (sp.getTextSearchAll().trim().length() > 0) { 
    130130      q.append(" AND ").append( 
    131           addFields(" AND ", searchParameters.getTextSearchOption(), searchParameters.getTextSearchAll().trim()) 
     131          addFields(" AND ", sp.getTextSearchOption(), sp.getTextSearchAll().trim()) 
    132132      ); 
    133133    } 
    134134 
    135135    // Form field description: "for the exact phrase:" 
    136     if (searchParameters.getTextSearchExactPhrase().trim().length() > 0) { 
     136    if (sp.getTextSearchExactPhrase().trim().length() > 0) { 
    137137      q.append(" AND "); 
    138       if ("abstract".equals(searchParameters.getTextSearchOption())) { 
    139         q.append("abstract:\"").append(searchParameters.getTextSearchExactPhrase().trim()).append("\""); 
    140       } else if ("refs".equals(searchParameters.getTextSearchOption())) { 
    141         q.append("citation:\"").append(searchParameters.getTextSearchExactPhrase().trim()).append("\""); 
    142       } else if ("title".equals(searchParameters.getTextSearchOption())) { 
    143         q.append("title:\"").append(searchParameters.getTextSearchExactPhrase().trim()).append("\""); 
    144       } else { 
    145         q.append("\"").append(searchParameters.getTextSearchExactPhrase().trim()).append("\""); 
     138      if ("abstract".equals(sp.getTextSearchOption())) { 
     139        q.append("abstract:\"").append(sp.getTextSearchExactPhrase().trim()).append("\""); 
     140      } else if ("refs".equals(sp.getTextSearchOption())) { 
     141        q.append("citation:\"").append(sp.getTextSearchExactPhrase().trim()).append("\""); 
     142      } else if ("title".equals(sp.getTextSearchOption())) { 
     143        q.append("title:\"").append(sp.getTextSearchExactPhrase().trim()).append("\""); 
     144      } else { 
     145        q.append("\"").append(sp.getTextSearchExactPhrase().trim()).append("\""); 
    146146      } 
    147147      q.append(" "); 
     
    149149 
    150150    // Form field description: "without the words:" 
    151     if (searchParameters.getTextSearchWithout().trim().length() > 0) { 
     151    if (sp.getTextSearchWithout().trim().length() > 0) { 
    152152      q.append(" AND ").append( 
    153           addFieldsWithoutTheWords(" AND ", searchParameters.getTextSearchOption(), 
    154               searchParameters.getTextSearchWithout().trim()) 
     153          addFieldsWithoutTheWords(" AND ", sp.getTextSearchOption(), 
     154              sp.getTextSearchWithout().trim()) 
    155155      ); 
    156156    } 
    157157 
    158158    // Form field description: "Dates".  Query Filter. 
    159     if (searchParameters.getDateTypeSelect().trim().length() > 0) { 
    160       query.addFilterQuery(createFilterDateRange(searchParameters.getStartDate(), searchParameters.getEndDate())); 
     159    if (sp.getDateTypeSelect().trim().length() > 0) { 
     160      query.addFilterQuery(createFilterDateRange(sp.getStartDate(), sp.getEndDate())); 
    161161    } 
    162162 
    163163    // Form field description: "Journals".  Query Filter. 
    164     if ("some".equals(searchParameters.getJournalOpt()) && searchParameters.getLimitToJournal().length > 0) { 
    165       query.addFilterQuery(createFilterLimitToJournals(searchParameters.getLimitToJournal())); 
     164    if ("some".equals(sp.getJournalOpt()) && sp.getLimitToJournal().length > 0) { 
     165      query.addFilterQuery(createFilterLimitToJournals(sp.getLimitToJournal())); 
    166166    } 
    167167 
    168168    // Form field description: "Subject Categories".  Query Filter. 
    169     if ("some".equals(searchParameters.getSubjectCatOpt()) && searchParameters.getLimitToCategory().length > 0) { 
    170       query.addFilterQuery(createFilterLimitToCategories(searchParameters.getLimitToCategory())); 
     169    if ("some".equals(sp.getSubjectCatOpt()) && sp.getLimitToCategory().length > 0) { 
     170      query.addFilterQuery(createFilterLimitToCategories(sp.getLimitToCategory())); 
    171171    } 
    172172 
     
    238238    SolrQuery query = new SolrQuery(queryString); 
    239239    query.setTimeAllowed(queryTimeout); 
    240     query.set("fl", "title^2.0", "abstract^1.5", "body^1.0"); // Score weighting. 
    241240    query.setIncludeScore(true); // The relevance (of each results element) to the search terms. 
    242241    query.setHighlight(true); 
     
    485484   * Remove dangerous and unwanted values from the Strings in selected fields in the SearchParameters parameter. 
    486485   * 
    487    * @param sp A SearchParameters object the needs to have some of its fields "cleaned" 
     486   * @param searchParameters A SearchParameters object the needs to have some of its fields "cleaned" 
    488487   * @return The SearchParameters parameter with some of its fields "cleaned" 
    489488   */ 
    490   private SearchParameters cleanStrings(SearchParameters sp) { 
    491     sp.setQuery(sp.getQuery().replaceAll(" AND", "").replaceAll(" OR", "").replaceAll("AND ", "").replaceAll("OR ", "").toLowerCase()); 
    492     if (sp.getCreator().length > 0) { 
    493       String [] tempCreator = new String[sp.getCreator().length]; 
     489  private SearchParameters cleanStrings(SearchParameters searchParameters) { 
     490    SearchParameters sp = searchParameters.copy(); 
     491    sp.setQuery(cleanString(searchParameters.getQuery())); 
     492    if (searchParameters.getCreator().length > 0) { 
     493      String [] tempCreator = new String[searchParameters.getCreator().length]; 
    494494      int counter = 0; 
    495       for (String author : sp.getCreator()) { 
    496         tempCreator[counter++] = author.replaceAll(" AND", "").replaceAll(" OR", "").replaceAll("AND ", "").replaceAll("OR ", "").toLowerCase(); 
     495      for (String author : searchParameters.getCreator()) { 
     496        tempCreator[counter++] = cleanString(author); 
    497497      } 
    498498      sp.setCreator(tempCreator); 
    499499    } 
    500     sp.setTextSearchAll(sp.getTextSearchAll().replaceAll(" AND", "").replaceAll(" OR", "").replaceAll("AND ", "").replaceAll("OR ", "").toLowerCase()); 
    501     sp.setTextSearchAtLeastOne(sp.getTextSearchAtLeastOne().replaceAll(" AND", "").replaceAll(" OR", "").replaceAll("AND ", "").replaceAll("OR ", "").toLowerCase()); 
    502     sp.setTextSearchExactPhrase(sp.getTextSearchExactPhrase().replaceAll(" AND", "").replaceAll(" OR", "").replaceAll("AND ", "").replaceAll("OR ", "").toLowerCase()); 
    503     sp.setTextSearchWithout(sp.getTextSearchWithout().replaceAll(" AND", "").replaceAll(" OR", "").replaceAll("AND ", "").replaceAll("OR ", "").toLowerCase()); 
     500    sp.setTextSearchAll(cleanString(searchParameters.getTextSearchAll())); 
     501    sp.setTextSearchAtLeastOne(cleanString(searchParameters.getTextSearchAtLeastOne())); 
     502    sp.setTextSearchExactPhrase(cleanString(searchParameters.getTextSearchExactPhrase())); 
     503    sp.setTextSearchWithout(cleanString(searchParameters.getTextSearchWithout())); 
    504504    return sp; 
    505505  } 
     506 
     507  /** 
     508   * TODO: add [ and ] but only if they are PAIRED!  Used for ranges. 
     509   * 
     510   * TODO: Check that ^ is followed by a NUMBER (integer or float).  Used for "boosting" terms in sort 
     511   * 
     512   * TODO: allow AND and OR and NOT, but that requires that our algorythm be able to construct complex boolean que 
     513   * 
     514   * @param toBeCleaned 
     515   * @return 
     516   */ 
     517  private String cleanString(String toBeCleaned) { 
     518    return toBeCleaned 
     519        .replaceAll("[^\\w\\s\\?\\*]", "") 
     520        .replaceAll("_", " ") 
     521        .replaceAll("-", " ") 
     522        .replaceAll(" AND", "").replaceAll("AND ", "") 
     523        .replaceAll(" OR", "").replaceAll("OR ", "") 
     524        .replaceAll(" NOT", "").replaceAll("NOT ", ""); 
     525  } 
    506526}