-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8aefb17
commit 9caec31
Showing
3 changed files
with
70 additions
and
0 deletions.
There are no files selected for viewing
30 changes: 30 additions & 0 deletions
30
scripts/product_reviews/train_topic_models/make_html_viz_for_best_snapshots.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
dataset_subpath="multi_domain_product_reviews_dataset/clean_data_v20180403/" | ||
dataset_path="$HOME/git/$dataset_subpath/" | ||
echo "dataset_path:" | ||
echo "$dataset_path" | ||
|
||
export XHOST_SSH_ADDR=mchughes@browncs | ||
export XHOST_REMOTE_PATH="/nbu/liv/mhughes/slda_results/$dataset_subpath/" | ||
export XHOST_LOCAL_PATH="/results/$dataset_subpath/" | ||
|
||
pushd $PC_REPO_DIR/pc_toolbox/utils_vizhtml/ | ||
|
||
template_html='<pre>TRAIN AUC=$TRAIN_Y_ROC_AUC<br />VALID AUC=$VALID_Y_ROC_AUC<br /> TEST AUC=$TEST_Y_ROC_AUC<nbsp;></pre>' | ||
|
||
for rank_words_by in 'proba_word_given_topic' 'proba_topic_given_word' | ||
do | ||
python make_html_collection_from_csv.py \ | ||
--snapshot_csv_path $XHOST_LOCAL_PATH/best_runs_20180301_pcslda_tensorflow/best_snapshots_PC_sLDA.csv \ | ||
--html_output_path /tmp/product_reviews_html/rank_words_by="$rank_words_by"/ \ | ||
--field_order LEGEND_NAME,LABEL_NAME,N_STATES,WEIGHT_Y \ | ||
--ncols 4 \ | ||
--n_chars_per_word 20 \ | ||
--n_words_per_topic 15 \ | ||
--rank_words_by $rank_words_by \ | ||
--show_longer_words_via_tooltip 1 \ | ||
--metrics_template_html "$template_html" \ | ||
|
||
done | ||
|
||
|
||
popd |
11 changes: 11 additions & 0 deletions
11
scripts/product_reviews/train_topic_models/rsync_snapshot_perf_csv.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
dataset_subpath="multi_domain_product_reviews_dataset/clean_data_v20180403/" | ||
dataset_path="$HOME/git/$dataset_subpath/" | ||
echo "dataset_path:" | ||
echo "$dataset_path" | ||
|
||
export XHOST_SSH_ADDR=mchughes@browncs | ||
export XHOST_REMOTE_PATH="/nbu/liv/mhughes/slda_results/$dataset_subpath/" | ||
export XHOST_LOCAL_PATH="/results/$dataset_subpath/" | ||
|
||
bash $PC_REPO_DIR/scripts/rsync_tools/rsync_snapshot_perf_metrics.sh | ||
|
29 changes: 29 additions & 0 deletions
29
scripts/product_reviews/train_topic_models/select_best_snapshots.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
dataset_subpath="multi_domain_product_reviews_dataset/clean_data_v20180403/" | ||
dataset_path="$HOME/git/$dataset_subpath/" | ||
echo "dataset_path:" | ||
echo "$dataset_path" | ||
|
||
export XHOST_SSH_ADDR=mchughes@browncs | ||
export XHOST_REMOTE_PATH="/nbu/liv/mhughes/slda_results/$dataset_subpath/" | ||
export XHOST_LOCAL_PATH="/results/$dataset_subpath/" | ||
|
||
y_colnames=`cat $dataset_path/Y_colnames.txt` | ||
echo "target_y_name: $y_colnames" | ||
|
||
results_path_pattern_01="$XHOST_LOCAL_PATH/20180301*tensorflow*" | ||
output_path="$XHOST_LOCAL_PATH/best_runs_20180301_pcslda_tensorflow/" | ||
|
||
python $PC_REPO_DIR/pc_toolbox/utils_snapshots/select_best_runs_and_snapshots.py \ | ||
--output_path $output_path \ | ||
--legend_name PC_sLDA \ | ||
--results_path_patterns "$results_path_pattern_01" \ | ||
--txt_src_path $dataset_path \ | ||
--target_y_name $y_colnames \ | ||
--all_y_names $y_colnames \ | ||
--selection_score_colname Y_ROC_AUC \ | ||
--selection_score_ranking_func argmax \ | ||
--col_names_to_use_at_selection N_STATES,WEIGHT_Y \ | ||
--col_names_to_keep_per_split \ | ||
Y_ROC_AUC,Y_ERROR_RATE,LOGPDF_X_PERTOK,LOGPDF_Y_PERDOC \ | ||
--col_names_to_keep \ | ||
ALPHA,TAU,LAMBDA_W,N_BATCHES \ |