# an extension to ActiveRecord to make it simple to add full-text searching to models.
# - uses the Ferret port of the Lucene search engine.
# a helper class extending Ferret's Index to make handle "complex" searches
# (both free text and a set of "tag filters")
module Buffalo
module Search
# we return an instance of this class...
class SearchResult
attr_accessor :results # the objects to display from the search
attr_accessor :search # the ferret search object itself
attr_accessor :subtags # the tags which would further refine this search
attr_accessor :query_string # the actual query string used for the search
end
# we modify the search behavior to join the free text and tag filters
class SearchIndex < Ferret::Index::Index
include Singleton
include Buffalo::Tags
# create the search index, if necesary
def initialize
super(:path => "#{RAILS_ROOT}/searchindex", :create_if_missing => true)
end
# the actual filtered search...
def uber_search(query, tags, first_doc = 0, num_display_docs = 50, num_inspect_docs = 500)
rs = SearchResult.new
# compose the filter query
rs.query_string = ""
# contruct search string with tags. tags can have aspects (classes of tags), and
# we use the field property of ferret's indexing to distinguish the aspects.
tags.each do |tag|
# get the aspect
aspect_label = ASPECTS[tag.aspect].to_s
# and the tag's text
namewords = tag.name.split
# if the tag is multi-word, we need to quote it
if(namewords.length > 1)
rs.query_string << "+#{aspect_label}: \"#{tag.name}\" "
else
rs.query_string << "+#{aspect_label}: #{tag.name} "
end
end unless tags.nil?
# and add the free text portion of the query
rs.query_string << "+(#{query})" unless query.nil?
# do the actual search.
rs.search = search(rs.query_string, :first_doc => first_doc, :num_docs => num_inspect_docs)
# our results will be the actual objects, but we'll only load the first num_display_docs.
rs.results = []
# also, we want to create an array of tags which are also applied to the objects in the
# search results (so we can offer tags to further refine/filter the search).
# first, we need to initialize a hash of hashes to hold the "subtags" we collect
# (grouped by the tag's aspect).
acc_subtags = Buffalo::Tags::ASPECTS.values.inject({}) { |hsh, k| hsh[k] = {}; hsh }
count = 0
# enumerate the search results...
rs.search.score_docs.each do |ds|
# we actually load the objects for the first part of the results.
if (count < num_display_docs)
# the "id" field on the ferret record has the object type and id.
type, id = doc(ds.doc)[:id].split('.')
# we load the appropriate object, and store it in our results.
content = Buffalo::Content::find_content(type.to_i, id)
rs.results << [content, ds.score]
end
# regardless of whether we load the object, we want to extract the additional
# tags which have been placed on this object, by pulling out the phrases
# from the ferret record for the tag aspect field..
acc_subtags.each do |k, v|
# phrase entries on the doc for a given field.
ents = doc(ds.doc).fields(k.to_s)
# for each, add or increment the count for that tag in our array
ents.each do |ent|
v[ent.data] ||= 0
v[ent.data] += 1
end unless ents.nil?
end
count += 1
end
# now we'll create a hash of arrays with the actual tag objects, but
# limited to those tags which will really help to filter the results.
rs.subtags = acc_subtags.keys.inject({}) do |hsh, k|
hsh[k] = []
acc_subtags[k].each do |n, c|
# we only want to add a tag if it isn't on every object in our
# display search results. if it is, then adding it as a filter
# would be pointless, as the search results would not change.
if c < count #rs.search.total_hits
# create and add the appropriate tag object
ntag = Tag.new(n, REV_ASPECTS[k])
hsh[k] << ntag unless tags.include?(ntag)
end
end
hsh
end
return rs
end
end
end
end
# extension to ActiveRecord to make a model object automatically indexed
module ActiveRecord
module Acts #:nodoc:
module Searchable #:nodoc:
def self.append_features(base)
# extend the ActiveRecord class with this module's ClassMethods,
# exposing those class functions to all instances of ActiveRecord.
super
base.extend(ClassMethods)
end
module ClassMethods
# called by model to add the auto-indexing behavior
def acts_as_searchable(options = {})
# the model must be a specified class, so that we know what to load it later
raise "Model #{self.name} is not defined in Buffalo::Content::CLASSES" unless
Buffalo::Content::REV_CLASSES.has_key?(self.name.to_s.demodulize)
# set class-wide attribute to track the content class (type of model)
write_inheritable_attribute(:internal_content_class,
Buffalo::Content::REV_CLASSES[self.to_s.demodulize]) unless
self.respond_to?(:internal_content_class)
# now, finally add the functions and hooks to make indexing work
class_eval do
# include methods that do the indexing, and need to be present
# on the instance of the model object.
include ActiveRecord::Acts::Searchable::InstanceMethods
# provide access from the instance to the content class type
class_inheritable_reader :internal_content_class
# hooks to update the index when the object is modified
after_create("process_searchindex_internal")
after_update("process_searchindex_internal")
# we might not have a valid id after destruction, so store it before.
before_destroy("self.searchindex_internal = self.id; true")
after_destroy("process_searchable_destroy")
end
end
end
module InstanceMethods
include Ferret::Document
# force an index update now
def update_searchindex
process_searchindex_internal
end
private
# update the ferret index with this object's content
def process_searchindex_internal
# we need to remember both what kind of object it is, and which object it is.
index_id = "#{self.class.internal_content_class}.#{self.id}"
doc = Document.new
doc << Field.new("id", index_id, Field::Store::YES, Field::Index::UNTOKENIZED)
# we also store the *name* of the model type as field, so we can limit searches by type
doc << Field.new("type", Buffalo::Content::CLASSES[self.class.internal_content_class].downcase,
Field::Store::NO, Field::Index::UNTOKENIZED)
# if this model has tags, then we add them as fields, too.
if self.class.method_defined?(:tags)
alltags = self.tags.each do |tag|
# field is the tag aspect, phrase is the tag text
doc << Field.new(Buffalo::Tags::ASPECTS[tag.aspect].to_s, tag.name,
Field::Store::YES, Field::Index::TOKENIZED)
end
end
# let the searchable object set some fields.
# this is a protected function that each model overrides
# to index model-specific data.
set_searchindex_fields(doc)
# and add this document, removing the old copy if necessary.
index = Buffalo::Search::SearchIndex.instance
index.delete(index_id)
index << doc
index.flush
true
end
# remove an entry from the index
def process_searchable_destroy
# is the index open?
unless searchindex_internal.nil?
# construct the appropriate id (type and model id)
index_id = "#{self.class.internal_content_class}.#{searchindex_internal}"
# delete the document.
index = Buffalo::Search::SearchIndex.instance
index.delete(index_id)
index.flush
end
true
end
end
end
end
end
# incorporate our module into the ActiveRecord class
ActiveRecord::Base.class_eval do
include ActiveRecord::Acts::Searchable
end