Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
st.Page(Path("content", "results_pca.py"), title="PCA", icon="📊"),
st.Page(Path("content", "results_heatmap.py"), title="Heatmap", icon="🔥"),
st.Page(Path("content", "results_library.py"), title="Spectral Library", icon="📚"),
st.Page(Path("content", "results_proteomicslfq.py"), title="Proteomics LFQ", icon="🧪"),
st.Page(Path("content", "results_pathway_analysis.py"), title="Pathway Analysis", icon="📉"),
],
}

Expand Down
135 changes: 90 additions & 45 deletions content/results_abundance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from src.common.common import page_setup
from src.common.results_helpers import get_workflow_dir, get_abundance_data
from src.workflow.ParameterManager import ParameterManager

params = page_setup()
st.title("Abundance Quantification")
Expand All @@ -21,6 +22,10 @@

workflow_dir = get_workflow_dir(st.session_state["workspace"])
quant_dir = workflow_dir / "results" / "quant_results"
parameter_manager = ParameterManager(workflow_dir, "TOPP Workflow")

workflow_params = parameter_manager.get_parameters_from_json()
analysis_mode = workflow_params.get("analysis-mode", "LFQ")

if not quant_dir.exists():
st.info("No quantification results available yet. Please run the workflow first.")
Expand All @@ -35,6 +40,60 @@

csv_file = csv_files[0]

def render_protein_table(pivot_df, group_map, is_lfq=True):
"""Common function to render the protein-level abundance table"""
st.markdown("### Protein-Level Abundance Table")
st.info(
"This protein-level table is generated by grouping all PSMs that map to the "
"same protein and aggregating their intensities across samples.\n\n"
"Additionally, log2 fold change and p-values are calculated between sample groups."
)

# Display group comparison info
groups = sorted(set(group_map.values()))
if len(groups) >= 2:
group1, group2 = sorted(groups)[:2]
st.info(f"Statistical comparison: **{group2} vs {group1}**")

if is_lfq:
# Handle LFQ mode columns (Raw Intensity)
id_col = "ProteinName"
exclude_cols = [id_col, "log2FC", "p-value", "PeptideSequence"]
sample_cols = [c for c in pivot_df.columns if c not in exclude_cols]

pivot_df["Intensity"] = pivot_df[sample_cols].apply(list, axis=1)
display_cols = [id_col, "log2FC", "p-value", "Intensity"] + sample_cols + ["PeptideSequence"]
help_text = "Raw sample intensities"
y_min = None
else:
# Handle non-LFQ mode columns (Log2-transformed Intensity)
id_col = "protein"
exclude_cols = [id_col, "log2FC", "p-value", "p-adj", "n_proteins", "n_peptides", "protein_score"]
sample_cols = [c for c in pivot_df.columns if c not in exclude_cols and "ratio" not in c.lower()]

pivot_df["Intensity"] = pivot_df[sample_cols].apply(
lambda row: [np.log2(v + 1) for v in row], axis=1
)
Comment on lines +74 to +76

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

np is undefined - missing numpy import.

The non-LFQ branch uses np.log2() but numpy is not imported in this file, causing a NameError at runtime when TMT mode is used.

Proposed fix - add import at top of file
+import numpy as np
 from src.workflow.ParameterManager import ParameterManager
🧰 Tools
🪛 GitHub Actions: Pylint / 0_build.txt

[error] 75-75: pylint E0602: Undefined variable 'np' (undefined-variable)

🪛 GitHub Actions: Pylint / build

[error] 75-75: pylint E0602: Undefined variable 'np' (undefined-variable)

🪛 Ruff (0.15.17)

[error] 75-75: Undefined name np

(F821)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@content/results_abundance.py` around lines 74 - 76, The code in the lambda
function within the apply method on pivot_df uses np.log2() but the numpy module
is not imported at the top of the file, causing a NameError when this code
executes. Add import numpy as np to the import statements at the top of the
results_abundance.py file to resolve the undefined np reference.

Source: Linters/SAST tools

display_cols = [id_col, "log2FC", "p-value", "Intensity"] + sample_cols
help_text = "Sample intensities (log2 scale)"
y_min = 0

# Filter to available columns, then sort and display
available_cols = [c for c in display_cols if c in pivot_df.columns]

st.dataframe(
pivot_df[available_cols].sort_values("p-value"),
column_config={
"Intensity": st.column_config.BarChartColumn(
"Intensity",
help=help_text,
width="small",
y_min=y_min,
),
},
use_container_width=True,
)

protein_tab, psm_tab = st.tabs(["Protein Table", "PSM-level Quantification Table"])

try:
Expand All @@ -44,58 +103,44 @@
st.info("No data found in this file.")
st.stop()

with protein_tab:
st.markdown("### Protein-Level Abundance Table")
result = get_abundance_data(st.session_state["workspace"])

st.info(
"This protein-level table is generated by grouping all PSMs that map to the "
"same protein and aggregating their intensities across samples.\n\n"
"Additionally, log2 fold change and p-values are calculated between sample groups."
)
if analysis_mode == "LFQ":
protein_tab, psm_tab = st.tabs(["Protein Table", "PSM-level Quantification Table"])

result = get_abundance_data(st.session_state["workspace"])
if result is None:
st.warning("Could not compute abundance data. Please ensure sample groups are defined in the Configure page.")
st.page_link("content/workflow_configure.py", label="Go to Configure", icon="⚙️")
st.stop()
with protein_tab:
if result is None:
st.warning("Could not compute abundance data. Please ensure sample groups are defined in the Configure page.")
# st.page_link("content/workflow_configure.py", label="Go to Configure", icon="⚙️")
st.stop()

pivot_df, expr_df, group_map = result
render_protein_table(pivot_df, group_map, is_lfq=True)

pivot_df, expr_df, group_map = result
with psm_tab:
st.markdown("### PSM-level Quantification Table")
st.info(
"This table shows the PSM-level quantification data, including protein IDs, "
"peptide sequences, charge states, and intensities across samples. "
"Each row represents one peptide-spectrum match detected from the MS/MS analysis."
)
st.dataframe(df, use_container_width=True)

# Display group comparison info
groups = sorted(set(group_map.values()))
if len(groups) >= 2:
group1, group2 = sorted(groups)[:2]
st.info(f"Statistical comparison: **{group2} vs {group1}**")
else:
pre_processing_tab, protein_tab = st.tabs(["Pre-processing", "Protein Table"])

# Get sample columns (between stats and PeptideSequence)
sample_cols = [c for c in pivot_df.columns if c not in ["ProteinName", "log2FC", "p-value", "PeptideSequence"]]

pivot_df["Intensity"] = pivot_df[sample_cols].apply(list, axis=1)
if result is None:
st.info("💡 Please complete the configuration in the 'Configure' page to see results.")
st.stop()

pivot_df, expr_df, group_map = result

# Reorder columns: place Intensity after p-value
display_cols = ["ProteinName", "log2FC", "p-value", "Intensity"] + sample_cols + ["PeptideSequence"]
display_df = pivot_df[display_cols]

st.dataframe(
display_df.sort_values("p-value"),
column_config={
"Intensity": st.column_config.BarChartColumn(
"Intensity",
help="Raw sample intensities",
width="small",
),
},
use_container_width=True,
)
with pre_processing_tab:
st.write("### Final Results (Group row removed, Stats added)")
st.dataframe(pivot_df.head(10))

with psm_tab:
st.markdown("### PSM-level Quantification Table")
st.info(
"This table shows the PSM-level quantification data, including protein IDs, "
"peptide sequences, charge states, and intensities across samples. "
"Each row represents one peptide-spectrum match detected from the MS/MS analysis."
)
st.dataframe(df, use_container_width=True)
with protein_tab:
render_protein_table(pivot_df, group_map, is_lfq=False)

except Exception as e:
st.error(f"Failed to load {csv_file.name}: {e}")
Expand Down
130 changes: 93 additions & 37 deletions content/results_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.spatial.distance import pdist
from src.common.common import page_setup
from src.common.results_helpers import get_abundance_data
from src.common.results_helpers import get_abundance_data, get_workflow_dir
from src.workflow.ParameterManager import ParameterManager

params = page_setup()
st.title("Heatmap")
Expand All @@ -29,48 +30,103 @@

pivot_df, expr_df, group_map = result

top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n")
workflow_dir = get_workflow_dir(st.session_state["workspace"])
parameter_manager = ParameterManager(workflow_dir, "TOPP Workflow")

var_series = expr_df.var(axis=1)
top_proteins = var_series.sort_values(ascending=False).head(top_n).index
heatmap_df = expr_df.loc[top_proteins]
heatmap_z = heatmap_df.sub(heatmap_df.mean(axis=1), axis=0).div(heatmap_df.std(axis=1), axis=0)
heatmap_z = heatmap_z.replace([np.inf, -np.inf], np.nan).dropna()
workflow_params = parameter_manager.get_parameters_from_json()
analysis_mode = workflow_params.get("analysis-mode", "LFQ")

if not heatmap_z.empty:
row_linkage = linkage(pdist(heatmap_z.values), method="average")
row_order = leaves_list(row_linkage)
st.write("Workflow Analysis Mode:", analysis_mode)

col_linkage = linkage(pdist(heatmap_z.T.values), method="average")
col_order = leaves_list(col_linkage)
if analysis_mode == "LFQ":
top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n")

heatmap_clustered = heatmap_z.iloc[row_order, col_order]
var_series = expr_df.var(axis=1)
top_proteins = var_series.sort_values(ascending=False).head(top_n).index
heatmap_df = expr_df.loc[top_proteins]
heatmap_z = heatmap_df.sub(heatmap_df.mean(axis=1), axis=0).div(heatmap_df.std(axis=1), axis=0)
heatmap_z = heatmap_z.replace([np.inf, -np.inf], np.nan).dropna()

fig_heatmap = px.imshow(
heatmap_clustered,
labels=dict(x="Sample", y="Protein", color="Z-score"),
aspect="auto",
color_continuous_scale=[[0.0, "#3b6fb6"], [0.5, "white"], [1.0, "#b40426"]],
zmin=-3, zmax=3
)
if not heatmap_z.empty:
row_linkage = linkage(pdist(heatmap_z.values), method="average")
row_order = leaves_list(row_linkage)

fig_heatmap.update_layout(
height=700,
xaxis={'side': 'bottom'},
yaxis={'side': 'left'}
)
col_linkage = linkage(pdist(heatmap_z.T.values), method="average")
col_order = leaves_list(col_linkage)

fig_heatmap.update_xaxes(tickfont=dict(size=10))
fig_heatmap.update_yaxes(tickfont=dict(size=8))
heatmap_clustered = heatmap_z.iloc[row_order, col_order]

st.plotly_chart(fig_heatmap, use_container_width=True)
fig_heatmap = px.imshow(
heatmap_clustered,
labels=dict(x="Sample", y="Protein", color="Z-score"),
aspect="auto",
color_continuous_scale=[[0.0, "#3b6fb6"], [0.5, "white"], [1.0, "#b40426"]],
zmin=-3, zmax=3
)

fig_heatmap.update_layout(
height=700,
xaxis={'side': 'bottom'},
yaxis={'side': 'left'}
)

fig_heatmap.update_xaxes(tickfont=dict(size=10))
fig_heatmap.update_yaxes(tickfont=dict(size=8))

st.plotly_chart(fig_heatmap, use_container_width=True)
else:
st.warning("Insufficient data to generate the heatmap.")

st.markdown("---")
st.markdown("**Other visualizations:**")
col1, col2 = st.columns(2)
with col1:
st.page_link("content/results_volcano.py", label="Volcano Plot", icon="🌋")
with col2:
st.page_link("content/results_pca.py", label="PCA", icon="📊")
else:
st.warning("Insufficient data to generate the heatmap.")

st.markdown("---")
st.markdown("**Other visualizations:**")
col1, col2 = st.columns(2)
with col1:
st.page_link("content/results_volcano.py", label="Volcano Plot", icon="🌋")
with col2:
st.page_link("content/results_pca.py", label="PCA", icon="📊")
top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n")

var_series = expr_df.var(axis=1)
top_proteins = var_series.sort_values(ascending=False).head(top_n).index
heatmap_df = expr_df.loc[top_proteins]
heatmap_z = heatmap_df.sub(heatmap_df.mean(axis=1), axis=0).div(heatmap_df.std(axis=1), axis=0)
heatmap_z = heatmap_z.replace([np.inf, -np.inf], np.nan).dropna()

if not heatmap_z.empty:
row_linkage = linkage(pdist(heatmap_z.values), method="average")
row_order = leaves_list(row_linkage)

col_linkage = linkage(pdist(heatmap_z.T.values), method="average")
col_order = leaves_list(col_linkage)

heatmap_clustered = heatmap_z.iloc[row_order, col_order]

fig_heatmap = px.imshow(
heatmap_clustered,
labels=dict(x="Sample", y="Protein", color="Z-score"),
aspect="auto",
color_continuous_scale=[[0.0, "#3b6fb6"], [0.5, "white"], [1.0, "#b40426"]],
zmin=-3, zmax=3
)

fig_heatmap.update_layout(
height=700,
xaxis={'side': 'bottom'},
yaxis={'side': 'left'}
)

fig_heatmap.update_xaxes(tickfont=dict(size=10))
fig_heatmap.update_yaxes(tickfont=dict(size=8))

st.plotly_chart(fig_heatmap, width="stretch")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Invalid width parameter for st.plotly_chart.

Same issue as other pages - width="stretch" is not valid.

Proposed fix
-        st.plotly_chart(fig_heatmap, width="stretch")
+        st.plotly_chart(fig_heatmap, use_container_width=True)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
st.plotly_chart(fig_heatmap, width="stretch")
st.plotly_chart(fig_heatmap, use_container_width=True)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@content/results_heatmap.py` at line 122, The st.plotly_chart function call
contains an invalid width parameter set to "stretch". Remove the width="stretch"
parameter from the st.plotly_chart function call, as this is not a supported
parameter value for this function.

else:
st.warning("Insufficient data to generate the heatmap.")

st.markdown("---")
st.markdown("**Other visualizations:**")
col1, col2 = st.columns(2)
with col1:
st.page_link("content/results_volcano.py", label="Volcano Plot", icon="🌋")
with col2:
st.page_link("content/results_pca.py", label="PCA", icon="📊")
Comment on lines +41 to +132

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win

Eliminate near-complete code duplication.

The LFQ and non-LFQ branches (lines 41-86 vs 87-132) are nearly identical - the only difference is the chart rendering call. This should be a single code path.

Proposed refactor
-if analysis_mode == "LFQ":
-    top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n")
-    # ... 40+ lines of identical code ...
-    st.plotly_chart(fig_heatmap, use_container_width=True)
-    # ... footer links ...
-else:
-    top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n")
-    # ... 40+ lines of identical code ...
-    st.plotly_chart(fig_heatmap, width="stretch")
-    # ... footer links ...
+top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n")
+
+var_series = expr_df.var(axis=1)
+top_proteins = var_series.sort_values(ascending=False).head(top_n).index
+heatmap_df = expr_df.loc[top_proteins]
+heatmap_z = heatmap_df.sub(heatmap_df.mean(axis=1), axis=0).div(heatmap_df.std(axis=1), axis=0)
+heatmap_z = heatmap_z.replace([np.inf, -np.inf], np.nan).dropna()
+
+if not heatmap_z.empty:
+    # ... clustering and figure creation ...
+    st.plotly_chart(fig_heatmap, use_container_width=True)
+else:
+    st.warning("Insufficient data to generate the heatmap.")
+
+st.markdown("---")
+st.markdown("**Other visualizations:**")
+# ... links ...
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@content/results_heatmap.py` around lines 41 - 132, The code contains nearly
complete duplication between the if analysis_mode == "LFQ" and else branches,
with all the logic for slider creation with key "heatmap_top_n", variance
calculation using var(), top proteins selection, heatmap normalization and
z-score calculation, hierarchical clustering with linkage() and leaves_list(),
and figure creation with px.imshow() being identical. Extract all this common
code outside the conditional statement to execute once before the if-else block,
and only keep the different st.plotly_chart() parameter call
(use_container_width=True versus width="stretch") within the conditional, or
consolidate that parameter as well if they achieve the same result.

Loading
Loading