-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add unit tests for the outputaccumulator post processing. #422
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,11 +5,12 @@ | |
import numpy as np | ||
import pandas as pd | ||
from alphabase.spectral_library.base import SpecLibBase | ||
from conftest import mock_fragment_df, mock_precursor_df | ||
from alphabase.spectral_library.flat import SpecLibFlat | ||
from conftest import mock_fragment_df, mock_precursor_df,mock_fragment_correlation_df | ||
|
||
from alphadia import outputtransform | ||
from alphadia.workflow.base import QUANT_FOLDER_NAME | ||
|
||
from alphadia.outputaccumulator import ms2_quality_control | ||
|
||
def prepare_input_data(): | ||
""" | ||
|
@@ -247,3 +248,64 @@ def test_default_column_assignment(): | |
assert built_lib.precursor_df[f"{col}"].equals( | ||
built_lib.precursor_df[f"{col}_library"] | ||
), f"{col} != {col}_library" | ||
|
||
def test_non_nan_fragments(): | ||
""" | ||
Test that the accumulated fragments data frame has no nan values | ||
""" | ||
# Given: | ||
config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data() | ||
keep_top = 2 | ||
config["transfer_library"]["top_k_samples"] = keep_top | ||
|
||
# When: | ||
output = outputtransform.SearchPlanOutput(config, temp_folder) | ||
_ = output.build_transfer_library(raw_folders, save=True) | ||
built_lib = SpecLibBase() | ||
built_lib.load_hdf( | ||
os.path.join(temp_folder, f"{output.TRANSFER_OUTPUT}.hdf"), load_mod_seq=True | ||
) | ||
|
||
# Then: The fragment dataframe should have no nan values | ||
assert not built_lib.fragment_intensity_df.isnull().values.any(), "There are nan values in the fragment dataframe" | ||
|
||
shutil.rmtree(temp_folder) | ||
mo-sameh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def test_use_for_ms2(): | ||
""" | ||
Test that the ms2 quality control is correctly applied by checking the use_for_ms2 column in the precursor_df | ||
""" | ||
# Given: | ||
psm_flat_df = mock_precursor_df(n_precursor=100, with_decoy=True) | ||
fragment_flat_df = mock_fragment_df(n_precursor=100, n_fragments=10) | ||
psm_flat_df = psm_flat_df.sort_values(by="precursor_idx") | ||
fragment_flat_df = fragment_flat_df.sort_values(by="precursor_idx") | ||
psm_flat_df["flat_frag_start_idx"] = np.arange(0, len(psm_flat_df) * 10, 10) | ||
psm_flat_df["flat_frag_stop_idx"] = np.arange(0, len(psm_flat_df) * 10, 10) + 9 | ||
psm_flat_df['nAA'] =psm_flat_df.sequence.str.len().astype(np.int32) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please install the pre-commit hook :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is strange, I already have them installed and ran it locally and all checks were passed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. finally realized that pre-commit was turned off for tests (by me 🙈 ) |
||
fragment_flat_df["loss_type"] = 0 | ||
flat_spec_lib = SpecLibFlat() | ||
flat_spec_lib._precursor_df = psm_flat_df | ||
flat_spec_lib._fragment_df = fragment_flat_df | ||
spec_lib = flat_spec_lib.to_SpecLibBase() | ||
mo-sameh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
fragment_correlation_base_df = mock_fragment_correlation_df(spec_lib.fragment_intensity_df) | ||
spec_lib._fragment_correlation_df = fragment_correlation_base_df | ||
precursor_correlation_cutoff = 0.5 | ||
fragment_correlation_ratio = 0.75 | ||
|
||
base_precursor_df = spec_lib.precursor_df.copy() | ||
base_fragment_df = spec_lib.fragment_intensity_df.copy() | ||
# When: | ||
ms2_quality_control(spec_lib, precursor_correlation_cutoff, fragment_correlation_ratio) | ||
|
||
# Then: The use_for_ms2 column should be correctly assigned for precursors with median fragment correlation above precursor_correlation_cutoff | ||
target_use_for_ms2 = [] | ||
for frag_start,frag_stop in zip(base_precursor_df["frag_start_idx"],base_precursor_df["frag_stop_idx"]): | ||
frag_corr = fragment_correlation_base_df.iloc[frag_start:frag_stop].values | ||
frag_intensities = base_fragment_df.iloc[frag_start:frag_stop].values | ||
# median corr of non zero intensities | ||
frag_corr = frag_corr[frag_intensities>0] | ||
median_frag_corr = np.median(frag_corr) if len(frag_corr) > 0 else 0 | ||
target_use_for_ms2.append(median_frag_corr > precursor_correlation_cutoff) | ||
|
||
np.testing.assert_array_equal(spec_lib.precursor_df["use_for_ms2"].values, target_use_for_ms2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could we use a fixed seed here to make the tests reproducible?
if not, it would be good to print out the generated date, otherwise debugging tests will be a nightmare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
currently mocking the precursor_df, fragment_df and fragment correlation are completely random.
@GeorgWa do you think we can fix the seed for all of them ?