PyProphet · singjc · Jun 18, 2026 · Jun 17, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/.gitignore b/.gitignore
@@ -40,4 +40,6 @@ nosetests.xml
 
 # docs
 docs/_build/*
-docs/api/generated/*
+docs/api/generated/*
+
+tools/*
diff --git a/pyprophet/_config.py b/pyprophet/_config.py
@@ -456,10 +456,13 @@ class IPFIOConfig(BaseIOConfig):
         ipf_ms2_scoring (bool): Use MS2 precursor data for IPF.
         ipf_h0 (bool): Include possibility that peak groups are not covered by the peptidoform space (null hypothesis H0).
         ipf_grouped_fdr (bool): [Experimental] Compute grouped FDR instead of pooled FDR to support heterogeneous peptidoform counts per peak group.
+        ipf_grouped_fdr_strategy (Literal["num_peptidoforms"]): Grouping strategy used when grouped FDR is enabled.
         ipf_max_precursor_pep (float): Maximum PEP to consider scored precursors in IPF.
         ipf_max_peakgroup_pep (float): Maximum PEP to consider scored peak groups in IPF.
         ipf_max_precursor_peakgroup_pep (float): Maximum BHM layer 1 integrated precursor-peakgroup PEP to consider in IPF.
         ipf_max_transition_pep (float): Maximum PEP to consider scored transitions in IPF.
+        ipf_min_supporting_transitions (int): Minimum number of supporting transitions required to keep an inferred peptidoform result.
+        ipf_min_peakgroup_intensity (float): Minimum MS2 peakgroup area intensity required to keep an inferred peptidoform result.
         propagate_signal_across_runs (bool): Propagate signal across runs (requires alignment step).
         ipf_max_alignment_pep (float): Maximum PEP to consider for good alignments.
         across_run_confidence_threshold (float): Maximum PEP threshold for propagating signal across runs for aligned features.
@@ -471,10 +474,13 @@ class IPFIOConfig(BaseIOConfig):
     ipf_ms2_scoring: bool = True
     ipf_h0: bool = True
     ipf_grouped_fdr: bool = False
+    ipf_grouped_fdr_strategy: Literal["num_peptidoforms"] = "num_peptidoforms"
     ipf_max_precursor_pep: float = 0.7
     ipf_max_peakgroup_pep: float = 0.7
     ipf_max_precursor_peakgroup_pep: float = 0.4
     ipf_max_transition_pep: float = 0.6
+    ipf_min_supporting_transitions: int = 0
+    ipf_min_peakgroup_intensity: float = 0.0
     propagate_signal_across_runs: bool = False
     ipf_max_alignment_pep: float = 0.7
     across_run_confidence_threshold: float = 0.5
@@ -493,10 +499,13 @@ def from_cli_args(
         ipf_ms2_scoring,
         ipf_h0,
         ipf_grouped_fdr,
+        ipf_grouped_fdr_strategy,
         ipf_max_precursor_pep,
         ipf_max_peakgroup_pep,
         ipf_max_precursor_peakgroup_pep,
         ipf_max_transition_pep,
+        ipf_min_supporting_transitions,
+        ipf_min_peakgroup_intensity,
         propagate_signal_across_runs,
         ipf_max_alignment_pep,
         across_run_confidence_threshold,
@@ -516,10 +525,13 @@ def from_cli_args(
             ipf_ms2_scoring=ipf_ms2_scoring,
             ipf_h0=ipf_h0,
             ipf_grouped_fdr=ipf_grouped_fdr,
+            ipf_grouped_fdr_strategy=ipf_grouped_fdr_strategy,
             ipf_max_precursor_pep=ipf_max_precursor_pep,
             ipf_max_peakgroup_pep=ipf_max_peakgroup_pep,
             ipf_max_precursor_peakgroup_pep=ipf_max_precursor_peakgroup_pep,
             ipf_max_transition_pep=ipf_max_transition_pep,
+            ipf_min_supporting_transitions=ipf_min_supporting_transitions,
+            ipf_min_peakgroup_intensity=ipf_min_peakgroup_intensity,
             propagate_signal_across_runs=propagate_signal_across_runs,
             ipf_max_alignment_pep=ipf_max_alignment_pep,
             across_run_confidence_threshold=across_run_confidence_threshold,

diff --git a/pyprophet/cli/ipf.py b/pyprophet/cli/ipf.py
@@ -48,6 +48,13 @@
     show_default=True,
     help="[Experimental] Compute grouped FDR instead of pooled FDR to better support data where peak groups are evaluated to originate from very heterogeneous numbers of peptidoforms.",
 )
+@click.option(
+    "--ipf_grouped_fdr_strategy",
+    default="num_peptidoforms",
+    show_default=True,
+    type=click.Choice(["num_peptidoforms"]),
+    help="Grouping strategy used when --ipf_grouped_fdr is enabled.",
+)
 @click.option(
     "--ipf_max_precursor_pep",
     default=0.7,
@@ -76,6 +83,20 @@
     type=float,
     help="Maximum PEP to consider scored transitions in IPF.",
 )
+@click.option(
+    "--ipf_min_supporting_transitions",
+    default=0,
+    show_default=True,
+    type=int,
+    help="Minimum number of supporting identifying transitions required to keep an inferred peptidoform result. Applied as a post-IPF filter; 0 disables the filter.",
+)
+@click.option(
+    "--ipf_min_peakgroup_intensity",
+    default=0.0,
+    show_default=True,
+    type=float,
+    help="Minimum FEATURE_MS2 area intensity required to keep an inferred peptidoform result. Applied as a post-IPF filter; 0 disables the filter.",
+)
 @click.option(
     "--propagate_signal_across_runs/--no-propagate_signal_across_runs",
     default=False,
@@ -120,10 +141,13 @@ def ipf(
     ipf_ms2_scoring,
     ipf_h0,
     ipf_grouped_fdr,
+    ipf_grouped_fdr_strategy,
     ipf_max_precursor_pep,
     ipf_max_peakgroup_pep,
     ipf_max_precursor_peakgroup_pep,
     ipf_max_transition_pep,
+    ipf_min_supporting_transitions,
+    ipf_min_peakgroup_intensity,
     propagate_signal_across_runs,
     ipf_max_alignment_pep,
     across_run_confidence_threshold,
@@ -155,10 +179,13 @@ def ipf(
         ipf_ms2_scoring,
         ipf_h0,
         ipf_grouped_fdr,
+        ipf_grouped_fdr_strategy,
         ipf_max_precursor_pep,
         ipf_max_peakgroup_pep,
         ipf_max_precursor_peakgroup_pep,
         ipf_max_transition_pep,
+        ipf_min_supporting_transitions,
+        ipf_min_peakgroup_intensity,
         propagate_signal_across_runs,
         ipf_max_alignment_pep,
         across_run_confidence_threshold,

diff --git a/pyprophet/glyco/glycoform.py b/pyprophet/glyco/glycoform.py
@@ -15,28 +15,105 @@
 from .pepmass import GlycoPeptideMassCalculator
 
 
-def get_feature_mapping_across_runs(infile, ipf_max_alignment_pep=1):
+def get_feature_mapping_across_runs(
+    infile, max_alignment_pep=0.5, min_mapping_confidence=None
+):
     click.echo("Info: Reading Across Run Feature Alignment Mapping ... ", nl=False)
     start = time.time()
 
-    con = sqlite3.connect(infile)
+    with sqlite3.connect(infile) as con:
+        use_alignment_candidates = (
+            min_mapping_confidence is not None
+            and check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT_CANDIDATE")
+        )
 
-    data = pd.read_sql_query(
-        f"""SELECT  
-                DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
-                ALIGNED_FEATURE_ID AS FEATURE_ID 
-                FROM (SELECT DISTINCT * FROM FEATURE_MS2_ALIGNMENT) AS FEATURE_MS2_ALIGNMENT
-                INNER JOIN 
-                (SELECT DISTINCT *, MIN(QVALUE) FROM SCORE_ALIGNMENT GROUP BY FEATURE_ID) AS SCORE_ALIGNMENT 
-                ON SCORE_ALIGNMENT.FEATURE_ID = FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID
-                WHERE LABEL = 1
-                AND SCORE_ALIGNMENT.PEP < {ipf_max_alignment_pep}
-                ORDER BY ALIGNMENT_GROUP_ID""",
-        con,
-    )
+        if use_alignment_candidates:
+            query = """
+                SELECT
+                    DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
+                    ALIGNMENT_ID,
+                    FEATURE_ID,
+                    PRECURSOR_ID,
+                    FEATURE_TYPE
+                FROM (
+                    SELECT DISTINCT
+                        ALIGNMENT_ID,
+                        PRECURSOR_ID,
+                        REFERENCE_FEATURE_ID AS FEATURE_ID,
+                        'REFERENCE' AS FEATURE_TYPE
+                    FROM FEATURE_MS2_ALIGNMENT_CANDIDATE
+                    WHERE SELECTED = 1
+                    AND MAPPING_CONFIDENCE >= ?
+                    AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
+                    AND ALIGNED_FEATURE_ID != -1
+
+                    UNION
+
+                    SELECT DISTINCT
+                        ALIGNMENT_ID,
+                        PRECURSOR_ID,
+                        ALIGNED_FEATURE_ID AS FEATURE_ID,
+                        'QUERY' AS FEATURE_TYPE
+                    FROM FEATURE_MS2_ALIGNMENT_CANDIDATE
+                    WHERE SELECTED = 1
+                    AND MAPPING_CONFIDENCE >= ?
+                    AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
+                    AND ALIGNED_FEATURE_ID != -1
+                ) AS feature_list
+                ORDER BY
+                    ALIGNMENT_GROUP_ID,
+                    CASE FEATURE_TYPE
+                        WHEN 'REFERENCE' THEN 0
+                        WHEN 'QUERY' THEN 1
+                    END
+            """
+            data = pd.read_sql_query(
+                query,
+                con,
+                params=[min_mapping_confidence, min_mapping_confidence],
+            )
+        else:
+            if not check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") or not check_sqlite_table(
+                con, "SCORE_ALIGNMENT"
+            ):
+                raise click.ClickException(
+                    "Perform feature alignment using ARYCAL, and apply scoring to alignment-level data before running glycoform inference."
+                )
+
+            query = f"""
+                SELECT
+                    DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID,
+                    FEATURE_ID
+                FROM (
+                    SELECT DISTINCT
+                        ALIGNMENT_ID,
+                        PRECURSOR_ID,
+                        REFERENCE_FEATURE_ID AS FEATURE_ID
+                    FROM FEATURE_MS2_ALIGNMENT
+                    WHERE LABEL = 1
+                    AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
+
+                    UNION
+
+                    SELECT DISTINCT
+                        ALIGNMENT_ID,
+                        PRECURSOR_ID,
+                        ALIGNED_FEATURE_ID AS FEATURE_ID
+                    FROM FEATURE_MS2_ALIGNMENT
+                    WHERE LABEL = 1
+                    AND REFERENCE_FEATURE_ID != ALIGNED_FEATURE_ID
+                ) AS feature_list
+                INNER JOIN (
+                    SELECT DISTINCT FEATURE_ID
+                    FROM SCORE_ALIGNMENT
+                    WHERE PEP < {max_alignment_pep}
+                ) AS good_alignments
+                ON good_alignments.FEATURE_ID = feature_list.FEATURE_ID
+                ORDER BY ALIGNMENT_GROUP_ID
+            """
+            data = pd.read_sql_query(query, con)
 
     data.columns = [col.lower() for col in data.columns]
-    con.close()
 
     end = time.time()
     click.echo(f"{end-start:.4f} seconds")
@@ -604,7 +681,7 @@ def infer_glycoforms(
     ## prepare for propagating signal across runs for aligned features
     if propagate_signal_across_runs:
         across_run_feature_map = get_feature_mapping_across_runs(
-            infile, max_alignment_pep
+            infile, max_alignment_pep=max_alignment_pep
         )
         transition_table = pd.merge(
             transition_table, across_run_feature_map, on="feature_id", how="left"