From 617a5619493c085136ff12639b82c25ea79f7348 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 2 Jul 2024 09:34:33 -0700 Subject: [PATCH] Add test for t-SNE with alternate PCA encoding --- .../pathogen-embed-t-sne-pca-encoding-by-simplex.t | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/pathogen-embed-t-sne-pca-encoding-by-simplex.t diff --git a/tests/pathogen-embed-t-sne-pca-encoding-by-simplex.t b/tests/pathogen-embed-t-sne-pca-encoding-by-simplex.t new file mode 100644 index 0000000..f973215 --- /dev/null +++ b/tests/pathogen-embed-t-sne-pca-encoding-by-simplex.t @@ -0,0 +1,14 @@ +Run pathogen-embed with t-SNE on a H3N2 HA alignment. +Use the simplex encoding of the alignment for the PCA initialization. + + $ pathogen-embed \ + > --alignment $TESTDIR/data/h3n2_ha_alignment.fasta \ + > --output-dataframe embed_t-sne.csv \ + > t-sne \ + > --perplexity 25 \ + > --learning-rate 100 \ + > --pca-encoding simplex + +There should be one record in the embedding per input sequence in the alignment. + + $ [[ $(sed 1d embed_t-sne.csv | wc -l) == $(grep "^>" $TESTDIR/data/h3n2_ha_alignment.fasta | wc -l) ]]