Skip to content

Commit

Permalink
ENH updated product_reviews scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelchughes committed Aug 7, 2018
1 parent 8aefb17 commit 9caec31
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
dataset_subpath="multi_domain_product_reviews_dataset/clean_data_v20180403/"
dataset_path="$HOME/git/$dataset_subpath/"
echo "dataset_path:"
echo "$dataset_path"

export XHOST_SSH_ADDR=mchughes@browncs
export XHOST_REMOTE_PATH="/nbu/liv/mhughes/slda_results/$dataset_subpath/"
export XHOST_LOCAL_PATH="/results/$dataset_subpath/"

pushd $PC_REPO_DIR/pc_toolbox/utils_vizhtml/

template_html='<pre>TRAIN AUC=$TRAIN_Y_ROC_AUC<br />VALID AUC=$VALID_Y_ROC_AUC<br /> TEST AUC=$TEST_Y_ROC_AUC<nbsp;></pre>'

for rank_words_by in 'proba_word_given_topic' 'proba_topic_given_word'
do
python make_html_collection_from_csv.py \
--snapshot_csv_path $XHOST_LOCAL_PATH/best_runs_20180301_pcslda_tensorflow/best_snapshots_PC_sLDA.csv \
--html_output_path /tmp/product_reviews_html/rank_words_by="$rank_words_by"/ \
--field_order LEGEND_NAME,LABEL_NAME,N_STATES,WEIGHT_Y \
--ncols 4 \
--n_chars_per_word 20 \
--n_words_per_topic 15 \
--rank_words_by $rank_words_by \
--show_longer_words_via_tooltip 1 \
--metrics_template_html "$template_html" \

done


popd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
dataset_subpath="multi_domain_product_reviews_dataset/clean_data_v20180403/"
dataset_path="$HOME/git/$dataset_subpath/"
echo "dataset_path:"
echo "$dataset_path"

export XHOST_SSH_ADDR=mchughes@browncs
export XHOST_REMOTE_PATH="/nbu/liv/mhughes/slda_results/$dataset_subpath/"
export XHOST_LOCAL_PATH="/results/$dataset_subpath/"

bash $PC_REPO_DIR/scripts/rsync_tools/rsync_snapshot_perf_metrics.sh

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
dataset_subpath="multi_domain_product_reviews_dataset/clean_data_v20180403/"
dataset_path="$HOME/git/$dataset_subpath/"
echo "dataset_path:"
echo "$dataset_path"

export XHOST_SSH_ADDR=mchughes@browncs
export XHOST_REMOTE_PATH="/nbu/liv/mhughes/slda_results/$dataset_subpath/"
export XHOST_LOCAL_PATH="/results/$dataset_subpath/"

y_colnames=`cat $dataset_path/Y_colnames.txt`
echo "target_y_name: $y_colnames"

results_path_pattern_01="$XHOST_LOCAL_PATH/20180301*tensorflow*"
output_path="$XHOST_LOCAL_PATH/best_runs_20180301_pcslda_tensorflow/"

python $PC_REPO_DIR/pc_toolbox/utils_snapshots/select_best_runs_and_snapshots.py \
--output_path $output_path \
--legend_name PC_sLDA \
--results_path_patterns "$results_path_pattern_01" \
--txt_src_path $dataset_path \
--target_y_name $y_colnames \
--all_y_names $y_colnames \
--selection_score_colname Y_ROC_AUC \
--selection_score_ranking_func argmax \
--col_names_to_use_at_selection N_STATES,WEIGHT_Y \
--col_names_to_keep_per_split \
Y_ROC_AUC,Y_ERROR_RATE,LOGPDF_X_PERTOK,LOGPDF_Y_PERDOC \
--col_names_to_keep \
ALPHA,TAU,LAMBDA_W,N_BATCHES \

0 comments on commit 9caec31

Please sign in to comment.