Setting up sphinx.conf file for indexing tryton product data

I've successfully configured sphinx to index Tryton data from a postgres database using inheritance for different languages support

Here is how I achieved this:

** Again, I am assuming that you are using ubuntu 10.04 and you followed the instructions for compiling sphinx from source as my previous blog post indicated

here is the sample file which should be on:

/etc/sphinx/etc/sphinx.conf


#
# Sphinx configuration file sample
#
# Please refer to doc/sphinx.html for details.
#
#############################################################################
## data source definition
#############################################################################
source product_product
{
# data source type. mandatory, no default value
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
type = pgsql
#####################################################################
## SQL settings (for 'mysql' and 'pgsql' types)
#####################################################################
# some straightforward parameters for SQL source types
sql_host = PUT_THE_IP_ADDRESS_OR_FQDN_OF_YOUR_DATABASE_SERVER
sql_user = PUT_YOUR_DATABASE_USERNAME
sql_pass = PUT_THE_PASSWORD_FOR_THE_DATABASE_USER
sql_db = PUT_THE_DATABASE_NAME
sql_port = 5432 #default is 3306
# main document fetch query
# mandatory, integer document ID field MUST be the first selected column
sql_query = \
SELECT \
product.id AS id, \
template.name AS name, \
product.description AS description, \
FROM product_product AS product \
JOIN product_template AS template ON template.id = product.template \
WHERE \
product.id >= $start and product.id <= $end \
# range query setup, query that must return min and max ID values
# optional, default is empty
#
# sql_query will need to reference $start and $end boundaries
# if using ranged query:
#
# sql_query = \
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
# FROM documents doc \
# WHERE id>=$start AND id<=$end
#
sql_query_range = SELECT MIN(id),MAX(id) FROM product_product
# range query step
# optional, default is 1024
#
sql_range_step = 1000
# combined field plus attribute declaration (from a single column)
# stores column as an attribute, but also indexes it as a full-text field
#
sql_field_string = name
sql_field_string = description
# ranged query throttling, in milliseconds
# optional, default is 0 which means no delay
# enforces given delay before each query step
sql_ranged_throttle = 0
# document info query, ONLY for CLI search (ie. testing and debugging)
# optional, default is empty
# must contain $id macro and must fetch the document by that id
sql_query_info = SELECT * FROM documents WHERE id=$id
}
# inherited source
#
# all the parameters are copied from the parent source,
# and may then be overridden in this source definition
source product_product_pt : product_product
{
sql_query = \
SELECT\
"product"."id" AS id, get_template_translation(template.id, 'name', template.name, 'pt_BR') AS name, get_product_translation(product.id, 'description', product.description, 'pt_BR') AS description\
FROM product_product AS product\
JOIN product_template AS template ON template.id = product.template\
WHERE\
product.id >= $start and product.id <= $end\
}
#############################################################################
## index definition
#############################################################################
# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index product_product
{
# document source(s) to index
# multi-value, mandatory
# document IDs must be globally unique across all sources
source = product_product
# index files path and file name, without extension
# mandatory, path must be writable, extensions will be auto-appended
path = /var/data/product_product
# document attribute values (docinfo) storage mode
# optional, default is 'extern'
# known values are 'none', 'extern' and 'inline'
docinfo = extern
# memory locking for cached data (.spa and .spi), to prevent swapping
# optional, default is 0 (do not mlock)
# requires searchd to be run from root
mlock = 0
# a list of morphology preprocessors to apply
# optional, default is empty
#
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
# 'soundex', and 'metaphone'; additional preprocessors available from
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
# (see libstemmer_c/libstemmer/modules.txt)
#
# morphology = stem_en, stem_ru, soundex
# morphology = libstemmer_german
# morphology = libstemmer_sv
morphology = none
# minimum indexed word length
# default is 1 (index everything)
min_word_len = 1
# charset encoding type
# optional, default is 'sbcs'
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
charset_type = utf-8
# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
html_strip = 1
}
# inherited index
#
# all the parameters are copied from the parent index,
# and may then be overridden in this index definition
index product_product_en : product_product
{
path = /var/data/product_product_en
morphology = stem_en
}
index product_product_pt : product_product
{
source = product_product_pt
path = /var/data/product_product_pt
morphology = libstemmer_pt
}
#############################################################################
## indexer settings
#############################################################################
indexer
{
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
mem_limit = 2047M
}
#############################################################################
## searchd settings
#############################################################################
searchd
{
# [hostname:]port[:protocol], or /unix/socket/path to listen on
# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
#
# multi-value, multiple listen points are allowed
# optional, defaults are 9312:sphinx and 9306:mysql41, as below
#
# listen = 127.0.0.1
# listen = 192.168.0.1:9312
# listen = 9312
# listen = /var/run/searchd.sock
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9312
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9306:mysql41
# log file, searchd run info is logged here
# optional, default is 'searchd.log'
log = /var/log/sphinx/searchd.log
# query log file, all search queries are logged here
# optional, default is empty (do not log queries)
query_log = /var/log/sphinx/query.log
# client read timeout, seconds
# optional, default is 5
read_timeout = 5
# request timeout, seconds
# optional, default is 5 minutes
client_timeout = 300
# maximum amount of children to fork (concurrent searches to run)
# optional, default is 0 (unlimited)
max_children = 30
# PID file, searchd process ID file name
# mandatory
pid_file = /var/run/searchd.pid
# max amount of matches the daemon ever keeps in RAM, per-index
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
# default is 1000 (just like Google)
max_matches = 1000
# seamless rotate, prevents rotate stalls if precaching huge datasets
# optional, default is 1
seamless_rotate = 1
# whether to forcibly preopen all indexes on startup
# optional, default is 1 (preopen everything)
preopen_indexes = 1
# whether to unlink .old index copies on succesful rotation.
# optional, default is 1 (do unlink)
unlink_old = 1
# MVA updates pool size
# shared between all instances of searchd, disables attr flushes!
# optional, default size is 1M
mva_updates_pool = 1M
# max allowed network packet size
# limits both query packets from clients, and responses from agents
# optional, default size is 8M
max_packet_size = 8M
# max allowed per-query filter count
# optional, default is 256
max_filters = 256
# max allowed per-filter values count
# optional, default is 4096
max_filter_values = 4096
# max allowed per-batch query count (aka multi-query count)
# optional, default is 32
max_batch_queries = 32
# multi-processing mode (MPM)
# known values are none, fork, prefork, and threads
# optional, default is fork
#
workers = threads # for RT to work
}
# --eof--
view raw sphinx.conf hosted with ❤ by GitHub



I will update later on how to start the indexer and the changes we need to do on the postgres side in order to enable the translated product indexing to work


I've been very very busy this couple weeks with my finals and problems sets of my college.

Comments

  1. Oh It's so delicious! :D I have just installed the server using your blog (the last entry). I'm waiting for the next one.

    thanks for all.

    ReplyDelete

Post a Comment

Popular posts from this blog

Project Abstract

How to install Sphinx Search with libstemmer support on Unbuntu 10.04

Status upadate Week[0-1]