2011-08-20 17:08:00 +00:00
""" Search backends used by django-watson. """
2011-08-29 10:51:40 +00:00
import re
2011-08-24 10:22:51 +00:00
2011-08-20 17:08:00 +00:00
from django . conf import settings
from django . core . exceptions import ImproperlyConfigured
2011-08-21 16:58:41 +00:00
from django . contrib . contenttypes . models import ContentType
2011-08-21 14:52:24 +00:00
from django . db import models , connection
2011-08-21 16:58:41 +00:00
from django . db . models import Q
2011-08-20 17:27:57 +00:00
2011-08-21 17:14:43 +00:00
from watson . models import SearchEntry , has_int_pk
2011-08-20 17:08:00 +00:00
2011-08-24 12:36:26 +00:00
def regex_from_word ( word ) :
""" Generates a regext from the given search word. """
return u " ( \ s {word} \ s)|(^ {word} \ s)|( \ s {word} $)|(^ {word} $) " . format (
word = re . escape ( word ) ,
2011-08-23 17:14:38 +00:00
)
2011-08-20 17:08:00 +00:00
class SearchBackend ( object ) :
""" Base class for all search backends. """
2011-09-08 15:53:07 +00:00
def is_installed ( self ) :
""" Checks whether django-watson is installed. """
return True
2011-08-20 17:27:57 +00:00
def do_install ( self ) :
2011-09-08 15:53:07 +00:00
""" Executes the SQL needed to install django-watson. """
2011-08-23 16:12:35 +00:00
pass
2011-08-29 10:02:37 +00:00
supports_ranking = False
2011-08-29 10:51:40 +00:00
def do_search ( self , engine_slug , queryset , search_text ) :
2011-08-20 17:27:57 +00:00
""" Filters the given queryset according the the search logic for this backend. """
2011-08-29 10:53:23 +00:00
word_query = Q ( )
2011-08-24 12:36:26 +00:00
for word in search_text . split ( ) :
regex = regex_from_word ( word )
2011-08-29 10:53:23 +00:00
word_query & = ( Q ( title__iregex = regex ) | Q ( description__iregex = regex ) | Q ( content__iregex = regex ) )
2011-08-23 16:12:35 +00:00
return queryset . filter (
2011-08-24 12:36:26 +00:00
word_query
2011-08-23 16:12:35 +00:00
)
2011-08-23 17:14:38 +00:00
2011-08-29 10:51:40 +00:00
def do_search_ranking ( self , engine_slug , queryset , search_text ) :
2011-08-29 10:02:37 +00:00
""" Ranks the given queryset according to the relevance of the given search text. """
return queryset . extra (
select = {
2011-08-29 10:53:23 +00:00
" watson_rank " : " 1 " ,
2011-08-29 10:02:37 +00:00
} ,
)
2011-08-29 10:51:40 +00:00
def do_filter ( self , engine_slug , queryset , search_text ) :
2011-08-23 17:14:38 +00:00
""" Filters the given queryset according the the search logic for this backend. """
2011-08-29 10:51:40 +00:00
word_query = Q ( searchentry_set__engine_slug = engine_slug )
2011-08-24 12:36:26 +00:00
for word in search_text . split ( ) :
regex = regex_from_word ( word )
2011-08-29 10:53:23 +00:00
word_query & = ( Q ( searchentry_set__title__iregex = regex ) | Q ( searchentry_set__description__iregex = regex ) | Q ( searchentry_set__content__iregex = regex ) )
2011-08-23 17:14:38 +00:00
return queryset . filter (
2011-08-24 12:36:26 +00:00
word_query
2011-08-23 17:14:38 +00:00
)
2011-08-29 10:02:37 +00:00
2011-08-29 10:51:40 +00:00
def do_filter_ranking ( self , engine_slug , queryset , search_text ) :
2011-08-29 10:02:37 +00:00
""" Ranks the given queryset according to the relevance of the given search text. """
return queryset . extra (
select = {
" watson_rank " : " 1 " ,
} ,
)
2011-08-21 14:52:24 +00:00
2011-08-23 16:12:35 +00:00
def save_search_entry ( self , search_entry , obj , adapter ) :
2011-08-21 14:52:24 +00:00
""" Saves the given search entry in the database. """
2011-08-23 16:12:35 +00:00
search_entry . save ( )
2011-08-20 17:50:35 +00:00
2011-08-20 17:08:00 +00:00
class PostgresSearchBackend ( SearchBackend ) :
""" A search backend that uses native PostgreSQL full text indices. """
2011-09-08 15:53:07 +00:00
def is_installed ( self ) :
""" Checks whether django-watson is installed. """
cursor = connection . cursor ( )
cursor . execute ( """
SELECT attname FROM pg_attribute
WHERE attrelid = ( SELECT oid FROM pg_class WHERE relname = ' watson_searchentry ' ) AND attname = ' search_tsv ' ;
""" )
return bool ( cursor . fetchall ( ) )
2011-08-20 17:27:57 +00:00
def do_install ( self ) :
2011-09-08 15:53:07 +00:00
""" Executes the PostgreSQL specific SQL code to install django-watson. """
2011-08-21 16:38:04 +00:00
connection . cursor ( ) . execute ( """
2011-08-23 16:19:03 +00:00
- - Ensure that plpgsql is installed .
CREATE OR REPLACE FUNCTION make_plpgsql ( ) RETURNS VOID LANGUAGE SQL AS
$ $
CREATE LANGUAGE plpgsql ;
$ $ ;
SELECT
CASE
WHEN EXISTS (
SELECT 1
FROM pg_catalog . pg_language
WHERE lanname = ' plpgsql '
)
THEN NULL
ELSE make_plpgsql ( ) END ;
DROP FUNCTION make_plpgsql ( ) ;
2011-08-21 16:38:04 +00:00
2011-08-23 16:19:03 +00:00
- - Create the search index .
ALTER TABLE watson_searchentry ADD COLUMN search_tsv tsvector NOT NULL ;
CREATE INDEX watson_searchentry_search_tsv ON watson_searchentry USING gin ( search_tsv ) ;
- - Create the trigger function .
CREATE FUNCTION watson_searchentry_trigger_handler ( ) RETURNS trigger AS $ $
begin
new . search_tsv :=
setweight ( to_tsvector ( ' pg_catalog.english ' , coalesce ( new . title , ' ' ) ) , ' A ' ) | |
setweight ( to_tsvector ( ' pg_catalog.english ' , coalesce ( new . description , ' ' ) ) , ' C ' ) | |
setweight ( to_tsvector ( ' pg_catalog.english ' , coalesce ( new . content , ' ' ) ) , ' D ' ) ;
return new ;
end
$ $ LANGUAGE plpgsql ;
2011-08-29 12:14:22 +00:00
CREATE TRIGGER watson_searchentry_trigger BEFORE INSERT OR UPDATE
2011-08-23 16:19:03 +00:00
ON watson_searchentry FOR EACH ROW EXECUTE PROCEDURE watson_searchentry_trigger_handler ( ) ;
2011-09-08 15:53:07 +00:00
""" )
def do_uninstall ( self ) :
""" Executes the PostgreSQL specific SQL code to uninstall django-watson. """
connection . cursor ( ) . execute ( """
ALTER TABLE watson_searchentry DROP COLUMN search_tsv ;
DROP TRIGGER watson_searchentry_trigger ON watson_searchentry ;
DROP FUNCTION watson_searchentry_trigger_handler ( ) ;
2011-08-21 16:38:04 +00:00
""" )
2011-08-29 10:02:37 +00:00
supports_ranking = True
2011-08-21 16:38:04 +00:00
2011-08-29 10:51:40 +00:00
def do_search ( self , engine_slug , queryset , search_text ) :
2011-08-21 16:38:04 +00:00
""" Performs the full text search. """
2011-08-29 10:02:37 +00:00
return queryset . extra (
where = ( " search_tsv @@ plainto_tsquery( %s ) " , ) ,
params = ( search_text , ) ,
)
2011-08-29 10:51:40 +00:00
def do_search_ranking ( self , engine_slug , queryset , search_text ) :
2011-08-29 10:02:37 +00:00
""" Performs full text ranking. """
2011-08-21 16:38:04 +00:00
return queryset . extra (
select = {
2011-08-29 10:02:37 +00:00
" watson_rank " : " ts_rank_cd(search_tsv, plainto_tsquery( %s )) " ,
2011-08-21 16:38:04 +00:00
} ,
select_params = ( search_text , ) ,
2011-08-29 10:02:37 +00:00
order_by = ( " -watson_rank " , ) ,
2011-08-23 17:22:17 +00:00
)
2011-08-29 10:51:40 +00:00
def do_filter ( self , engine_slug , queryset , search_text ) :
2011-08-23 17:22:17 +00:00
""" Performs the full text filter. """
model = queryset . model
2011-08-24 12:36:26 +00:00
content_type = ContentType . objects . get_for_model ( model )
2011-08-23 17:22:17 +00:00
if has_int_pk ( model ) :
ref_name = " object_id_int "
else :
ref_name = " object_id "
return queryset . extra (
tables = ( " watson_searchentry " , ) ,
where = (
2011-08-29 10:51:40 +00:00
" watson_searchentry.engine_slug = %s " ,
2011-08-23 17:22:17 +00:00
" watson_searchentry.search_tsv @@ plainto_tsquery( %s ) " ,
" watson_searchentry. {ref_name} = {table_name} . {pk_name} " . format (
ref_name = ref_name ,
table_name = connection . ops . quote_name ( model . _meta . db_table ) ,
pk_name = connection . ops . quote_name ( model . _meta . pk . name ) ,
) ,
2011-08-24 12:36:26 +00:00
" watson_searchentry.content_type_id = %s "
2011-08-23 17:22:17 +00:00
) ,
2011-08-29 10:51:40 +00:00
params = ( engine_slug , search_text , content_type . id ) ,
2011-08-29 10:02:37 +00:00
)
2011-08-29 10:51:40 +00:00
def do_filter_ranking ( self , engine_slug , queryset , search_text ) :
2011-08-29 10:02:37 +00:00
""" Performs the full text ranking. """
return queryset . extra (
select = {
" watson_rank " : " ts_rank_cd(watson_searchentry.search_tsv, plainto_tsquery( %s )) " ,
} ,
select_params = ( search_text , ) ,
order_by = ( " -watson_rank " , ) ,
2011-08-21 16:38:04 +00:00
)
2011-08-20 17:08:00 +00:00
2011-08-29 12:30:12 +00:00
def escape_mysql_boolean_query ( search_text ) :
return u " " . join (
u ' + " {word} " ' . format (
2011-08-29 18:18:27 +00:00
word = word . replace ( u ' " ' , u ' ' ) ,
2011-08-29 12:30:12 +00:00
)
for word in search_text . split ( )
)
2011-08-20 17:08:00 +00:00
2011-08-29 12:14:22 +00:00
class MySQLSearchBackend ( SearchBackend ) :
def do_install ( self ) :
""" Generates the PostgreSQL specific SQL code to install django-watson. """
cursor = connection . cursor ( )
# Drop all foreign keys on the watson_searchentry table.
cursor . execute ( " SELECT CONSTRAINT_NAME FROM information_schema.TABLE_CONSTRAINTS WHERE CONSTRAINT_SCHEMA = DATABASE() AND TABLE_NAME = ' watson_searchentry ' AND CONSTRAINT_TYPE = ' FOREIGN KEY ' " )
for constraint_name , in cursor . fetchall ( ) :
cursor . execute ( " ALTER TABLE watson_searchentry DROP FOREIGN KEY {constraint_name} " . format (
constraint_name = constraint_name ,
) )
# Change the storage engine to MyISAM.
cursor . execute ( " ALTER TABLE watson_searchentry ENGINE = MyISAM " )
# Change the collaction to a case-insensitive one.
cursor . execute ( " ALTER TABLE watson_searchentry CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci " )
2011-08-29 18:18:27 +00:00
# Add the full text indexex.
2011-08-29 12:14:22 +00:00
cursor . execute ( " CREATE FULLTEXT INDEX watson_searchentry_fulltext ON watson_searchentry (title, description, content) " )
2011-08-29 18:18:27 +00:00
cursor . execute ( " CREATE FULLTEXT INDEX watson_searchentry_title ON watson_searchentry (title) " )
cursor . execute ( " CREATE FULLTEXT INDEX watson_searchentry_description ON watson_searchentry (description) " )
cursor . execute ( " CREATE FULLTEXT INDEX watson_searchentry_content ON watson_searchentry (content) " )
2011-08-29 12:14:22 +00:00
supports_ranking = True
def do_search ( self , engine_slug , queryset , search_text ) :
""" Performs the full text search. """
return queryset . extra (
2011-08-29 12:30:12 +00:00
where = ( " MATCH (title, description, content) AGAINST ( %s IN BOOLEAN MODE) " , ) ,
params = ( escape_mysql_boolean_query ( search_text ) , ) ,
2011-08-29 12:14:22 +00:00
)
def do_search_ranking ( self , engine_slug , queryset , search_text ) :
""" Performs full text ranking. """
2011-08-29 18:18:27 +00:00
search_text = escape_mysql_boolean_query ( search_text )
2011-08-29 12:14:22 +00:00
return queryset . extra (
select = {
2011-08-29 18:18:27 +00:00
" watson_rank " : """
( ( MATCH ( title ) AGAINST ( % s IN BOOLEAN MODE ) ) * 3 ) +
( ( MATCH ( description ) AGAINST ( % s IN BOOLEAN MODE ) ) * 2 ) +
( ( MATCH ( content ) AGAINST ( % s IN BOOLEAN MODE ) ) * 1 )
""" ,
2011-08-29 12:14:22 +00:00
} ,
2011-08-29 18:18:27 +00:00
select_params = ( search_text , search_text , search_text , ) ,
2011-08-29 12:14:22 +00:00
order_by = ( " -watson_rank " , ) ,
)
def do_filter ( self , engine_slug , queryset , search_text ) :
""" Performs the full text filter. """
model = queryset . model
content_type = ContentType . objects . get_for_model ( model )
if has_int_pk ( model ) :
ref_name = " object_id_int "
else :
ref_name = " object_id "
return queryset . extra (
tables = ( " watson_searchentry " , ) ,
where = (
" watson_searchentry.engine_slug = %s " ,
2011-08-29 12:30:12 +00:00
" MATCH (watson_searchentry.title, watson_searchentry.description, watson_searchentry.content) AGAINST ( %s IN BOOLEAN MODE) " ,
2011-08-29 12:14:22 +00:00
" watson_searchentry. {ref_name} = {table_name} . {pk_name} " . format (
ref_name = ref_name ,
table_name = connection . ops . quote_name ( model . _meta . db_table ) ,
pk_name = connection . ops . quote_name ( model . _meta . pk . name ) ,
) ,
2011-08-29 12:30:12 +00:00
" watson_searchentry.content_type_id = %s " ,
2011-08-29 12:14:22 +00:00
) ,
2011-08-29 12:30:12 +00:00
params = ( engine_slug , escape_mysql_boolean_query ( search_text ) , content_type . id ) ,
2011-08-29 12:14:22 +00:00
)
def do_filter_ranking ( self , engine_slug , queryset , search_text ) :
""" Performs the full text ranking. """
2011-08-29 18:18:27 +00:00
search_text = escape_mysql_boolean_query ( search_text )
2011-08-29 12:14:22 +00:00
return queryset . extra (
select = {
2011-08-29 18:18:27 +00:00
" watson_rank " : """
( ( MATCH ( watson_searchentry . title ) AGAINST ( % s IN BOOLEAN MODE ) ) * 3 ) +
( ( MATCH ( watson_searchentry . description ) AGAINST ( % s IN BOOLEAN MODE ) ) * 2 ) +
( ( MATCH ( watson_searchentry . content ) AGAINST ( % s IN BOOLEAN MODE ) ) * 1 )
""" ,
2011-08-29 12:14:22 +00:00
} ,
2011-08-29 18:18:27 +00:00
select_params = ( search_text , search_text , search_text , ) ,
2011-08-29 12:14:22 +00:00
order_by = ( " -watson_rank " , ) ,
)
2011-08-20 17:08:00 +00:00
class AdaptiveSearchBackend ( SearchBackend ) :
"""
A search backend that guesses the correct search backend based on the
DATABASES [ " default " ] settings .
"""
def __new__ ( cls ) :
""" Guess the correct search backend and initialize it. """
database_engine = settings . DATABASES [ " default " ] [ " ENGINE " ]
if database_engine . endswith ( " postgresql_psycopg2 " ) or database_engine . endswith ( " postgresql " ) :
return PostgresSearchBackend ( )
2011-08-29 12:14:22 +00:00
if database_engine . endswith ( " mysql " ) :
return MySQLSearchBackend ( )
return SearchBackend ( )