-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from TheJacksonLaboratory/G3-204-get-search-wor…
…king G3-204: Get Legacy Search Working
- Loading branch information
Showing
7 changed files
with
428 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
deploy/ | ||
.github/ | ||
docs/ | ||
skaffold.yaml | ||
.gitignore | ||
.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "geneweaver-legacy" | ||
version = "1.1.1" | ||
version = "1.2.0" | ||
description = "" | ||
authors = ["Alexander Berger <[email protected]>"] | ||
readme = "README.md" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
|
||
source geneset_src : base | ||
{ | ||
# potentially delete old entry, first | ||
sql_query_pre = SET search_path to production,extsrc,odestatic; | ||
sql_query_pre = DELETE FROM sphinxcounters WHERE index_name='geneset_tmp'; | ||
sql_query_pre = INSERT INTO sphinxcounters VALUES ('geneset_tmp', NOW()); | ||
|
||
sql_query_range = \ | ||
SELECT min(gs.gs_id), max(gs.gs_id) \ | ||
FROM geneset gs \ | ||
WHERE gs.gs_status<>'deleted' \ | ||
AND gs.gs_updated < ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset_tmp'); | ||
sql_range_step = 1000 | ||
|
||
sql_query = \ | ||
SELECT gs.gs_id, gs.gs_id, gs.gs_id as gs_id_attr, 'GS'||gs.gs_id AS gsid_prefixed, \ | ||
gs.gs_name as name, gs.gs_description as description, \ | ||
gs.gs_abbreviation as label, gs.usr_id, gs.gs_count, \ | ||
p.pub_pubmed AS pubmed_id, p.pub_authors, p.pub_title, p.pub_abstract, p.pub_journal, \ | ||
sp.sp_name as species, sp.sp_taxid as taxid, \ | ||
COALESCE(gs.cur_id, 0) AS cur_id, COALESCE(gs.sp_id, 0) AS sp_id, \ | ||
COALESCE(gs.gs_attribution, 0) as attribution, \ | ||
CASE \ | ||
WHEN gs.gs_status='provisional' THEN 1 \ | ||
WHEN gs.gs_status LIKE 'deprecated%' THEN 2 \ | ||
ELSE 0 \ | ||
END AS gs_status, \ | ||
CASE \ | ||
WHEN gs.sp_id=1 THEN 'mouse' \ | ||
WHEN gs.sp_id=2 THEN 'human' \ | ||
WHEN gs.sp_id=3 THEN 'rat' \ | ||
WHEN gs.sp_id=4 THEN 'zebrafish' \ | ||
WHEN gs.sp_id=5 THEN 'fly' \ | ||
WHEN gs.sp_id=6 THEN 'monkey' \ | ||
WHEN gs.sp_id=8 THEN 'c. elegans' \ | ||
WHEN gs.sp_id=9 THEN 'yeast' \ | ||
END AS common_name \ | ||
FROM geneset gs \ | ||
LEFT OUTER JOIN publication p USING(pub_id) \ | ||
LEFT OUTER JOIN species sp USING(sp_id) \ | ||
WHERE gs.gs_status<>'deleted' \ | ||
AND gs.gs_id>=$start AND gs.gs_id<=$end \ | ||
AND gs.gs_updated < ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset_tmp'); | ||
|
||
# gene ref ids, gene names, and type | ||
# ode_ref_id||' '||COALESCE(gi_name, '')||' '||COALESCE(gi_type, '') \ | ||
|
||
# just gene ref ids | ||
sql_joined_field = genes from query; \ | ||
SELECT gs.gs_id, ode_ref_id \ | ||
FROM geneset gs \ | ||
JOIN geneset_value USING(gs_id) \ | ||
JOIN gene USING(ode_gene_id) \ | ||
JOIN gene_info USING(ode_gene_id) \ | ||
WHERE gs.gs_status<>'deleted' AND gsv_in_threshold AND ode_pref \ | ||
AND gs.gs_updated < ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset_tmp') \ | ||
ORDER BY gs.gs_id ASC; | ||
|
||
# add in ontology term associations | ||
sql_joined_field = ontologies from query; \ | ||
SELECT gso.gs_id, o.ont_ref_id||' '||o.ont_name||' '||o.ont_description \ | ||
FROM ontology o, geneset_ontology gso, geneset gs \ | ||
WHERE gso.gso_ref_type!='Blacklist' \ | ||
AND gso.ont_id=o.ont_id \ | ||
AND gso.gs_id=gs.gs_id AND gs.gs_status<>'deleted' \ | ||
AND gs.gs_updated < ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset_tmp') \ | ||
ORDER BY gs_id ASC; | ||
|
||
# both indexed and attributed | ||
sql_field_string = common_name | ||
sql_field_string = species | ||
sql_field_string = taxid | ||
sql_field_string = pubmed_id | ||
|
||
sql_attr_uint = sp_id | ||
sql_attr_uint = usr_id | ||
sql_attr_uint = cur_id | ||
sql_attr_uint = attribution | ||
sql_attr_uint = gs_status | ||
sql_attr_uint = gs_count | ||
sql_attr_uint = gs_id_attr | ||
|
||
sql_attr_multi = uint grp_id from query; \ | ||
SELECT gs_id, regexp_split_to_table(gs_groups, ',')::integer AS grp_id \ | ||
FROM production.geneset \ | ||
WHERE gs_status<>'deleted' AND (gs_groups <> '') IS NOT FALSE AND gs_updated < ( \ | ||
SELECT last_update FROM production.sphinxcounters WHERE index_name='geneset_tmp') \ | ||
ORDER BY gs_id ASC; | ||
|
||
sql_query_post_index = SET search_path to production,extsrc,odestatic; | ||
sql_query_post_index = DELETE FROM sphinxcounters WHERE index_name='geneset'; | ||
sql_query_post_index = UPDATE sphinxcounters \ | ||
SET index_name='geneset' WHERE index_name='geneset_tmp'; | ||
sql_query_post_index = DELETE FROM genesetklist WHERE ts < ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset'); | ||
} | ||
|
||
source geneset_delta_src : base | ||
{ | ||
sql_query_pre = SET search_path to production,extsrc,odestatic; | ||
|
||
sql_query = \ | ||
SELECT gs.gs_id, gs.gs_id, gs.gs_id as gs_id_attr, 'GS'||gs.gs_id AS gsid_prefixed, \ | ||
gs.gs_name as name, gs.gs_description as description, \ | ||
gs.gs_abbreviation as label, gs.usr_id, gs.gs_count, \ | ||
p.pub_pubmed AS pubmed_id, p.pub_authors, p.pub_title, p.pub_abstract, p.pub_journal, \ | ||
sp.sp_name as species, sp.sp_taxid as taxid, \ | ||
COALESCE(gs.cur_id, 0) AS cur_id, COALESCE(gs.sp_id, 0) AS sp_id, \ | ||
COALESCE(gs.gs_attribution, 0) as attribution, \ | ||
CASE \ | ||
WHEN gs.gs_status='provisional' THEN 1 \ | ||
WHEN gs.gs_status LIKE 'deprecated%' THEN 2 \ | ||
ELSE 0 \ | ||
END AS gs_status, \ | ||
CASE \ | ||
WHEN gs.sp_id=1 THEN 'mouse' \ | ||
WHEN gs.sp_id=2 THEN 'human' \ | ||
WHEN gs.sp_id=3 THEN 'rat' \ | ||
WHEN gs.sp_id=4 THEN 'zebrafish' \ | ||
WHEN gs.sp_id=5 THEN 'fly' \ | ||
WHEN gs.sp_id=6 THEN 'monkey' \ | ||
WHEN gs.sp_id=8 THEN 'c. elegans' \ | ||
WHEN gs.sp_id=9 THEN 'yeast' \ | ||
END AS common_name \ | ||
FROM geneset gs \ | ||
LEFT OUTER JOIN publication p USING(pub_id) \ | ||
LEFT OUTER JOIN species sp USING(sp_id) \ | ||
WHERE gs.gs_status<>'deleted' \ | ||
AND gs.gs_updated >= ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset'); | ||
|
||
# gene ref ids, gene names, and type | ||
# ode_ref_id||' '||COALESCE(gi_name, '')||' '||COALESCE(gi_type, '') \ | ||
|
||
# just gene ref ids | ||
sql_joined_field = genes from query; \ | ||
SELECT gs.gs_id, ode_ref_id \ | ||
FROM geneset gs \ | ||
JOIN geneset_value USING(gs_id) \ | ||
JOIN gene USING(ode_gene_id) \ | ||
JOIN gene_info USING(ode_gene_id) \ | ||
WHERE gs.gs_status<>'deleted' AND gsv_in_threshold AND ode_pref \ | ||
AND gs.gs_updated >= ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset') \ | ||
ORDER BY gs.gs_id ASC; | ||
|
||
# add in ontology term associations | ||
sql_joined_field = ontologies from query; \ | ||
SELECT gso.gs_id, o.ont_ref_id||' '||o.ont_name||' '||o.ont_description \ | ||
FROM ontology o, geneset_ontology gso, geneset gs \ | ||
WHERE gso.gso_ref_type!='Blacklist' \ | ||
AND gso.ont_id=o.ont_id \ | ||
AND gso.gs_id=gs.gs_id AND gs.gs_status<>'deleted' \ | ||
AND gs.gs_updated >= ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset') \ | ||
ORDER BY gs_id ASC; | ||
|
||
# both indexed and attributed | ||
sql_field_string = common_name | ||
sql_field_string = species | ||
sql_field_string = taxid | ||
sql_field_string = pubmed_id | ||
|
||
sql_attr_uint = sp_id | ||
sql_attr_uint = usr_id | ||
sql_attr_uint = cur_id | ||
sql_attr_uint = attribution | ||
sql_attr_uint = gs_status | ||
sql_attr_uint = gs_count | ||
sql_attr_uint = gs_id_attr | ||
|
||
sql_attr_multi = uint grp_id from query; \ | ||
SELECT gs_id, regexp_split_to_table(gs_groups, ',')::integer AS grp_id \ | ||
FROM production.geneset \ | ||
WHERE gs_status<>'deleted' AND gs_updated >= ( \ | ||
SELECT last_update FROM production.sphinxcounters WHERE index_name='geneset') \ | ||
ORDER BY gs_id ASC; | ||
|
||
sql_query_killlist = \ | ||
SELECT gs_id FROM geneset WHERE gs_updated >= ( \ | ||
SELECT last_update FROM sphinxcounters WHERE index_name='geneset') \ | ||
UNION SELECT gs_id from genesetklist; | ||
} | ||
|
||
############################################################################# | ||
## index definitions | ||
############################################################################# | ||
|
||
index geneset | ||
{ | ||
source = geneset_src | ||
path = /app/sphinx/geneset_idx | ||
|
||
morphology = stem_en | ||
|
||
# wordforms file, in "mapfrom > mapto" plain text format | ||
# optional, default is empty | ||
# | ||
# wordforms = ../var/data/wordforms.txt | ||
|
||
# minimum indexed word length; default is 1 (index everything) | ||
min_word_len = 1 | ||
|
||
html_strip = 1 | ||
index_exact_words = 1 | ||
|
||
stopwords = /app/sphinx/stopwords.txt | ||
|
||
min_prefix_len = 1 | ||
enable_star = 1 # allow for partial results | ||
} | ||
|
||
index geneset_delta | ||
{ | ||
source = geneset_delta_src | ||
path = /app/sphinx/geneset_delta_idx | ||
|
||
morphology = stem_en | ||
|
||
# wordforms file, in "mapfrom > mapto" plain text format | ||
# optional, default is empty | ||
# | ||
# wordforms = ../var/data/wordforms.txt | ||
|
||
# minimum indexed word length; default is 1 (index everything) | ||
min_word_len = 1 | ||
|
||
html_strip = 1 | ||
index_exact_words = 1 | ||
|
||
stopwords = /app/sphinx/stopwords.txt | ||
|
||
min_prefix_len = 1 | ||
enable_star = 1 # allow for partial results | ||
} | ||
|
||
############################################################################# | ||
## indexer settings | ||
############################################################################# | ||
|
||
indexer | ||
{ | ||
mem_limit = 2048M | ||
} | ||
|
||
############################################################################# | ||
## searchd settings | ||
############################################################################# | ||
|
||
searchd | ||
{ | ||
# hostname, port, or hostname:port, or /unix/socket/path to listen on | ||
# multi-value, multiple listen points are allowed | ||
# optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312) | ||
# | ||
# listen = 127.0.0.1 | ||
# listen = 192.168.0.1:9312 | ||
# listen = 9312 | ||
# listen = /var/run/searchd.sock | ||
|
||
|
||
# log file, searchd run info is logged here | ||
# optional, default is 'searchd.log' | ||
log = /app/sphinx/sphinx-searchd.log | ||
|
||
# query log file, all search queries are logged here | ||
# optional, default is empty (do not log queries) | ||
query_log = /app/sphinx/sphinx-query.log | ||
|
||
# maximum amount of children to fork (concurrent searches to run) | ||
# optional, default is 0 (unlimited) | ||
max_children = 30 | ||
|
||
# PID file, searchd process ID file name | ||
# mandatory | ||
pid_file = /app/sphinx/sphinx-searchd.pid | ||
|
||
# max amount of matches the daemon ever keeps in RAM, per-index | ||
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL | ||
# default is 1000 (just like Google) | ||
max_matches = 1000 | ||
max_packet_size = 64M | ||
|
||
# avoid deprecation warning | ||
#compat_sphinxql_magics = 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
echo "source base" | ||
echo "{" | ||
echo " type = pgsql" | ||
echo " sql_host = $DB_HOST" | ||
echo " sql_user = $DB_USERNAME" | ||
echo " sql_pass = $DB_PASSWORD" | ||
echo " sql_db = $DB_NAME" | ||
echo "}" | ||
cat /app/sphinx/sphinx.conf | ||
} > /app/sphinx/sphinx.conf.new | ||
|
||
mv /app/sphinx/sphinx.conf.new /app/sphinx/sphinx.conf | ||
|
||
indexer --all --config /app/sphinx/sphinx.conf | ||
|
||
searchd --nodetach --config /app/sphinx/sphinx.conf |
Oops, something went wrong.