Setting up sphinx.conf file for indexing tryton product data
I've successfully configured sphinx to index Tryton data from a postgres database using inheritance for different languages support
Here is how I achieved this:
** Again, I am assuming that you are using ubuntu 10.04 and you followed the instructions for compiling sphinx from source as my previous blog post indicated
here is the sample file which should be on:
I will update later on how to start the indexer and the changes we need to do on the postgres side in order to enable the translated product indexing to work
I've been very very busy this couple weeks with my finals and problems sets of my college.
Here is how I achieved this:
** Again, I am assuming that you are using ubuntu 10.04 and you followed the instructions for compiling sphinx from source as my previous blog post indicated
here is the sample file which should be on:
/etc/sphinx/etc/sphinx.conf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Sphinx configuration file sample | |
# | |
# Please refer to doc/sphinx.html for details. | |
# | |
############################################################################# | |
## data source definition | |
############################################################################# | |
source product_product | |
{ | |
# data source type. mandatory, no default value | |
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc | |
type = pgsql | |
##################################################################### | |
## SQL settings (for 'mysql' and 'pgsql' types) | |
##################################################################### | |
# some straightforward parameters for SQL source types | |
sql_host = PUT_THE_IP_ADDRESS_OR_FQDN_OF_YOUR_DATABASE_SERVER | |
sql_user = PUT_YOUR_DATABASE_USERNAME | |
sql_pass = PUT_THE_PASSWORD_FOR_THE_DATABASE_USER | |
sql_db = PUT_THE_DATABASE_NAME | |
sql_port = 5432 #default is 3306 | |
# main document fetch query | |
# mandatory, integer document ID field MUST be the first selected column | |
sql_query = \ | |
SELECT \ | |
product.id AS id, \ | |
template.name AS name, \ | |
product.description AS description, \ | |
FROM product_product AS product \ | |
JOIN product_template AS template ON template.id = product.template \ | |
WHERE \ | |
product.id >= $start and product.id <= $end \ | |
# range query setup, query that must return min and max ID values | |
# optional, default is empty | |
# | |
# sql_query will need to reference $start and $end boundaries | |
# if using ranged query: | |
# | |
# sql_query = \ | |
# SELECT doc.id, doc.id AS group, doc.title, doc.data \ | |
# FROM documents doc \ | |
# WHERE id>=$start AND id<=$end | |
# | |
sql_query_range = SELECT MIN(id),MAX(id) FROM product_product | |
# range query step | |
# optional, default is 1024 | |
# | |
sql_range_step = 1000 | |
# combined field plus attribute declaration (from a single column) | |
# stores column as an attribute, but also indexes it as a full-text field | |
# | |
sql_field_string = name | |
sql_field_string = description | |
# ranged query throttling, in milliseconds | |
# optional, default is 0 which means no delay | |
# enforces given delay before each query step | |
sql_ranged_throttle = 0 | |
# document info query, ONLY for CLI search (ie. testing and debugging) | |
# optional, default is empty | |
# must contain $id macro and must fetch the document by that id | |
sql_query_info = SELECT * FROM documents WHERE id=$id | |
} | |
# inherited source | |
# | |
# all the parameters are copied from the parent source, | |
# and may then be overridden in this source definition | |
source product_product_pt : product_product | |
{ | |
sql_query = \ | |
SELECT\ | |
"product"."id" AS id, get_template_translation(template.id, 'name', template.name, 'pt_BR') AS name, get_product_translation(product.id, 'description', product.description, 'pt_BR') AS description\ | |
FROM product_product AS product\ | |
JOIN product_template AS template ON template.id = product.template\ | |
WHERE\ | |
product.id >= $start and product.id <= $end\ | |
} | |
############################################################################# | |
## index definition | |
############################################################################# | |
# local index example | |
# | |
# this is an index which is stored locally in the filesystem | |
# | |
# all indexing-time options (such as morphology and charsets) | |
# are configured per local index | |
index product_product | |
{ | |
# document source(s) to index | |
# multi-value, mandatory | |
# document IDs must be globally unique across all sources | |
source = product_product | |
# index files path and file name, without extension | |
# mandatory, path must be writable, extensions will be auto-appended | |
path = /var/data/product_product | |
# document attribute values (docinfo) storage mode | |
# optional, default is 'extern' | |
# known values are 'none', 'extern' and 'inline' | |
docinfo = extern | |
# memory locking for cached data (.spa and .spi), to prevent swapping | |
# optional, default is 0 (do not mlock) | |
# requires searchd to be run from root | |
mlock = 0 | |
# a list of morphology preprocessors to apply | |
# optional, default is empty | |
# | |
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', | |
# 'soundex', and 'metaphone'; additional preprocessors available from | |
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code | |
# (see libstemmer_c/libstemmer/modules.txt) | |
# | |
# morphology = stem_en, stem_ru, soundex | |
# morphology = libstemmer_german | |
# morphology = libstemmer_sv | |
morphology = none | |
# minimum indexed word length | |
# default is 1 (index everything) | |
min_word_len = 1 | |
# charset encoding type | |
# optional, default is 'sbcs' | |
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8' | |
charset_type = utf-8 | |
# whether to strip HTML tags from incoming documents | |
# known values are 0 (do not strip) and 1 (do strip) | |
# optional, default is 0 | |
html_strip = 1 | |
} | |
# inherited index | |
# | |
# all the parameters are copied from the parent index, | |
# and may then be overridden in this index definition | |
index product_product_en : product_product | |
{ | |
path = /var/data/product_product_en | |
morphology = stem_en | |
} | |
index product_product_pt : product_product | |
{ | |
source = product_product_pt | |
path = /var/data/product_product_pt | |
morphology = libstemmer_pt | |
} | |
############################################################################# | |
## indexer settings | |
############################################################################# | |
indexer | |
{ | |
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M) | |
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M | |
mem_limit = 2047M | |
} | |
############################################################################# | |
## searchd settings | |
############################################################################# | |
searchd | |
{ | |
# [hostname:]port[:protocol], or /unix/socket/path to listen on | |
# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) | |
# | |
# multi-value, multiple listen points are allowed | |
# optional, defaults are 9312:sphinx and 9306:mysql41, as below | |
# | |
# listen = 127.0.0.1 | |
# listen = 192.168.0.1:9312 | |
# listen = 9312 | |
# listen = /var/run/searchd.sock | |
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9312 | |
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9306:mysql41 | |
# log file, searchd run info is logged here | |
# optional, default is 'searchd.log' | |
log = /var/log/sphinx/searchd.log | |
# query log file, all search queries are logged here | |
# optional, default is empty (do not log queries) | |
query_log = /var/log/sphinx/query.log | |
# client read timeout, seconds | |
# optional, default is 5 | |
read_timeout = 5 | |
# request timeout, seconds | |
# optional, default is 5 minutes | |
client_timeout = 300 | |
# maximum amount of children to fork (concurrent searches to run) | |
# optional, default is 0 (unlimited) | |
max_children = 30 | |
# PID file, searchd process ID file name | |
# mandatory | |
pid_file = /var/run/searchd.pid | |
# max amount of matches the daemon ever keeps in RAM, per-index | |
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL | |
# default is 1000 (just like Google) | |
max_matches = 1000 | |
# seamless rotate, prevents rotate stalls if precaching huge datasets | |
# optional, default is 1 | |
seamless_rotate = 1 | |
# whether to forcibly preopen all indexes on startup | |
# optional, default is 1 (preopen everything) | |
preopen_indexes = 1 | |
# whether to unlink .old index copies on succesful rotation. | |
# optional, default is 1 (do unlink) | |
unlink_old = 1 | |
# MVA updates pool size | |
# shared between all instances of searchd, disables attr flushes! | |
# optional, default size is 1M | |
mva_updates_pool = 1M | |
# max allowed network packet size | |
# limits both query packets from clients, and responses from agents | |
# optional, default size is 8M | |
max_packet_size = 8M | |
# max allowed per-query filter count | |
# optional, default is 256 | |
max_filters = 256 | |
# max allowed per-filter values count | |
# optional, default is 4096 | |
max_filter_values = 4096 | |
# max allowed per-batch query count (aka multi-query count) | |
# optional, default is 32 | |
max_batch_queries = 32 | |
# multi-processing mode (MPM) | |
# known values are none, fork, prefork, and threads | |
# optional, default is fork | |
# | |
workers = threads # for RT to work | |
} | |
# --eof-- |
I will update later on how to start the indexer and the changes we need to do on the postgres side in order to enable the translated product indexing to work
I've been very very busy this couple weeks with my finals and problems sets of my college.
Oh It's so delicious! :D I have just installed the server using your blog (the last entry). I'm waiting for the next one.
ReplyDeletethanks for all.