Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@ nosetests.xml

# docs
docs/_build/*
docs/api/generated/*
docs/api/generated/*

tools/*
12 changes: 12 additions & 0 deletions pyprophet/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,10 +456,13 @@ class IPFIOConfig(BaseIOConfig):
ipf_ms2_scoring (bool): Use MS2 precursor data for IPF.
ipf_h0 (bool): Include possibility that peak groups are not covered by the peptidoform space (null hypothesis H0).
ipf_grouped_fdr (bool): [Experimental] Compute grouped FDR instead of pooled FDR to support heterogeneous peptidoform counts per peak group.
ipf_grouped_fdr_strategy (Literal["num_peptidoforms"]): Grouping strategy used when grouped FDR is enabled.
ipf_max_precursor_pep (float): Maximum PEP to consider scored precursors in IPF.
ipf_max_peakgroup_pep (float): Maximum PEP to consider scored peak groups in IPF.
ipf_max_precursor_peakgroup_pep (float): Maximum BHM layer 1 integrated precursor-peakgroup PEP to consider in IPF.
ipf_max_transition_pep (float): Maximum PEP to consider scored transitions in IPF.
ipf_min_supporting_transitions (int): Minimum number of supporting transitions required to keep an inferred peptidoform result.
ipf_min_peakgroup_intensity (float): Minimum MS2 peakgroup area intensity required to keep an inferred peptidoform result.
propagate_signal_across_runs (bool): Propagate signal across runs (requires alignment step).
ipf_max_alignment_pep (float): Maximum PEP to consider for good alignments.
across_run_confidence_threshold (float): Maximum PEP threshold for propagating signal across runs for aligned features.
Expand All @@ -471,10 +474,13 @@ class IPFIOConfig(BaseIOConfig):
ipf_ms2_scoring: bool = True
ipf_h0: bool = True
ipf_grouped_fdr: bool = False
ipf_grouped_fdr_strategy: Literal["num_peptidoforms"] = "num_peptidoforms"
ipf_max_precursor_pep: float = 0.7
ipf_max_peakgroup_pep: float = 0.7
ipf_max_precursor_peakgroup_pep: float = 0.4
ipf_max_transition_pep: float = 0.6
ipf_min_supporting_transitions: int = 0
ipf_min_peakgroup_intensity: float = 0.0
propagate_signal_across_runs: bool = False
ipf_max_alignment_pep: float = 0.7
across_run_confidence_threshold: float = 0.5
Expand All @@ -493,10 +499,13 @@ def from_cli_args(
ipf_ms2_scoring,
ipf_h0,
ipf_grouped_fdr,
ipf_grouped_fdr_strategy,
ipf_max_precursor_pep,
ipf_max_peakgroup_pep,
ipf_max_precursor_peakgroup_pep,
ipf_max_transition_pep,
ipf_min_supporting_transitions,
ipf_min_peakgroup_intensity,
propagate_signal_across_runs,
ipf_max_alignment_pep,
across_run_confidence_threshold,
Expand All @@ -516,10 +525,13 @@ def from_cli_args(
ipf_ms2_scoring=ipf_ms2_scoring,
ipf_h0=ipf_h0,
ipf_grouped_fdr=ipf_grouped_fdr,
ipf_grouped_fdr_strategy=ipf_grouped_fdr_strategy,
ipf_max_precursor_pep=ipf_max_precursor_pep,
ipf_max_peakgroup_pep=ipf_max_peakgroup_pep,
ipf_max_precursor_peakgroup_pep=ipf_max_precursor_peakgroup_pep,
ipf_max_transition_pep=ipf_max_transition_pep,
ipf_min_supporting_transitions=ipf_min_supporting_transitions,
ipf_min_peakgroup_intensity=ipf_min_peakgroup_intensity,
propagate_signal_across_runs=propagate_signal_across_runs,
ipf_max_alignment_pep=ipf_max_alignment_pep,
across_run_confidence_threshold=across_run_confidence_threshold,
Expand Down
27 changes: 27 additions & 0 deletions pyprophet/cli/ipf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
show_default=True,
help="[Experimental] Compute grouped FDR instead of pooled FDR to better support data where peak groups are evaluated to originate from very heterogeneous numbers of peptidoforms.",
)
@click.option(
"--ipf_grouped_fdr_strategy",
default="num_peptidoforms",
show_default=True,
type=click.Choice(["num_peptidoforms"]),
help="Grouping strategy used when --ipf_grouped_fdr is enabled.",
)
Comment on lines +51 to +57
@click.option(
"--ipf_max_precursor_pep",
default=0.7,
Expand Down Expand Up @@ -76,6 +83,20 @@
type=float,
help="Maximum PEP to consider scored transitions in IPF.",
)
@click.option(
"--ipf_min_supporting_transitions",
default=0,
show_default=True,
type=int,
help="Minimum number of supporting identifying transitions required to keep an inferred peptidoform result. Applied as a post-IPF filter; 0 disables the filter.",
)
@click.option(
"--ipf_min_peakgroup_intensity",
default=0.0,
show_default=True,
type=float,
help="Minimum FEATURE_MS2 area intensity required to keep an inferred peptidoform result. Applied as a post-IPF filter; 0 disables the filter.",
)
@click.option(
"--propagate_signal_across_runs/--no-propagate_signal_across_runs",
default=False,
Expand Down Expand Up @@ -120,10 +141,13 @@ def ipf(
ipf_ms2_scoring,
ipf_h0,
ipf_grouped_fdr,
ipf_grouped_fdr_strategy,
ipf_max_precursor_pep,
ipf_max_peakgroup_pep,
ipf_max_precursor_peakgroup_pep,
ipf_max_transition_pep,
ipf_min_supporting_transitions,
ipf_min_peakgroup_intensity,
propagate_signal_across_runs,
ipf_max_alignment_pep,
across_run_confidence_threshold,
Expand Down Expand Up @@ -155,10 +179,13 @@ def ipf(
ipf_ms2_scoring,
ipf_h0,
ipf_grouped_fdr,
ipf_grouped_fdr_strategy,
ipf_max_precursor_pep,
ipf_max_peakgroup_pep,
ipf_max_precursor_peakgroup_pep,
ipf_max_transition_pep,
ipf_min_supporting_transitions,
ipf_min_peakgroup_intensity,
propagate_signal_across_runs,
ipf_max_alignment_pep,
across_run_confidence_threshold,
Expand Down
111 changes: 94 additions & 17 deletions pyprophet/glyco/glycoform.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,105 @@
from .pepmass import GlycoPeptideMassCalculator


def get_feature_mapping_across_runs(infile, ipf_max_alignment_pep=1):
def get_feature_mapping_across_runs(
infile, max_alignment_pep=0.5, min_mapping_confidence=None
):
click.echo("Info: Reading Across Run Feature Alignment Mapping ... ", nl=False)
start = time.time()

con = sqlite3.connect(infile)
with sqlite3.connect(infile) as con:
use_alignment_candidates = (
min_mapping_confidence is not None
and check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT_CANDIDATE")
)

data = pd.read_sql_query(
f"""SELECT
DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
ALIGNED_FEATURE_ID AS FEATURE_ID
FROM (SELECT DISTINCT * FROM FEATURE_MS2_ALIGNMENT) AS FEATURE_MS2_ALIGNMENT
INNER JOIN
(SELECT DISTINCT *, MIN(QVALUE) FROM SCORE_ALIGNMENT GROUP BY FEATURE_ID) AS SCORE_ALIGNMENT
ON SCORE_ALIGNMENT.FEATURE_ID = FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID
WHERE LABEL = 1
AND SCORE_ALIGNMENT.PEP < {ipf_max_alignment_pep}
ORDER BY ALIGNMENT_GROUP_ID""",
con,
)
if use_alignment_candidates:
query = """
SELECT
DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
ALIGNMENT_ID,
FEATURE_ID,
PRECURSOR_ID,
FEATURE_TYPE
FROM (
SELECT DISTINCT
ALIGNMENT_ID,
PRECURSOR_ID,
REFERENCE_FEATURE_ID AS FEATURE_ID,
'REFERENCE' AS FEATURE_TYPE
FROM FEATURE_MS2_ALIGNMENT_CANDIDATE
WHERE SELECTED = 1
AND MAPPING_CONFIDENCE >= ?
AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
AND ALIGNED_FEATURE_ID != -1

UNION

SELECT DISTINCT
ALIGNMENT_ID,
PRECURSOR_ID,
ALIGNED_FEATURE_ID AS FEATURE_ID,
'QUERY' AS FEATURE_TYPE
FROM FEATURE_MS2_ALIGNMENT_CANDIDATE
WHERE SELECTED = 1
AND MAPPING_CONFIDENCE >= ?
AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
AND ALIGNED_FEATURE_ID != -1
) AS feature_list
ORDER BY
ALIGNMENT_GROUP_ID,
CASE FEATURE_TYPE
WHEN 'REFERENCE' THEN 0
WHEN 'QUERY' THEN 1
END
"""
data = pd.read_sql_query(
query,
con,
params=[min_mapping_confidence, min_mapping_confidence],
)
else:
if not check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") or not check_sqlite_table(
con, "SCORE_ALIGNMENT"
):
raise click.ClickException(
"Perform feature alignment using ARYCAL, and apply scoring to alignment-level data before running glycoform inference."
)

query = f"""
SELECT
DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
FEATURE_ID
FROM (
SELECT DISTINCT
ALIGNMENT_ID,
PRECURSOR_ID,
REFERENCE_FEATURE_ID AS FEATURE_ID
FROM FEATURE_MS2_ALIGNMENT
WHERE LABEL = 1
AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID

UNION

SELECT DISTINCT
ALIGNMENT_ID,
PRECURSOR_ID,
ALIGNED_FEATURE_ID AS FEATURE_ID
FROM FEATURE_MS2_ALIGNMENT
WHERE LABEL = 1
AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
) AS feature_list
INNER JOIN (
SELECT DISTINCT FEATURE_ID
FROM SCORE_ALIGNMENT
WHERE PEP < {max_alignment_pep}
) AS good_alignments
ON good_alignments.FEATURE_ID = feature_list.FEATURE_ID
ORDER BY ALIGNMENT_GROUP_ID
"""
data = pd.read_sql_query(query, con)

data.columns = [col.lower() for col in data.columns]
con.close()

end = time.time()
click.echo(f"{end-start:.4f} seconds")
Expand Down Expand Up @@ -604,7 +681,7 @@ def infer_glycoforms(
## prepare for propagating signal across runs for aligned features
if propagate_signal_across_runs:
across_run_feature_map = get_feature_mapping_across_runs(
infile, max_alignment_pep
infile, max_alignment_pep=max_alignment_pep
)
transition_table = pd.merge(
transition_table, across_run_feature_map, on="feature_id", how="left"
Expand Down
Loading
Loading