VariantEffect · bencap · Jun 4, 2026 · Jun 4, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -80,7 +80,7 @@ RUN curl -sSL https://install.python-poetry.org | python3 -
 COPY poetry.lock pyproject.toml ./
 
 # installs runtime dependencies to $VIRTUAL_ENV
-RUN poetry install --no-root --extras server
+RUN poetry install --no-root --extras server --no-directory
 COPY alembic /code/alembic
 COPY alembic.ini /code/alembic.ini
 COPY src /code/src

diff --git a/Makefile b/Makefile
@@ -0,0 +1,23 @@
+VENV := .venv/bin
+
+.DEFAULT_GOAL := help
+
+.PHONY: help
+help:
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  %-12s %s\n", $$1, $$2}'
+
+.PHONY: dev
+dev: ## Install deps including local editable variant-annotation
+	poetry install --extras server
+
+.PHONY: test
+test: ## Run the test suite
+	$(VENV)/pytest tests/
+
+.PHONY: lint
+lint: ## Check code with ruff
+	$(VENV)/ruff check src/ tests/
+
+.PHONY: format
+format: ## Format code with ruff
+	$(VENV)/ruff format src/ tests/
diff --git a/alembic/versions/a1b2c3d4e5f6_add_vrs_allele_closure_tables.py b/alembic/versions/a1b2c3d4e5f6_add_vrs_allele_closure_tables.py
@@ -0,0 +1,151 @@
+"""Add mapping_records, alleles, and mapping_record_alleles tables
+
+Revision ID: a1b2c3d4e5f6
+Revises: 398067c53257
+Create Date: 2026-05-29
+
+New parallel tables for the Better Reverse Translation epic (#746).
+The existing mapped_variants table is left untouched (frozen serving).
+"""
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "a1b2c3d4e5f6"
+down_revision = "398067c53257"
+branch_labels = None
+depends_on = None
+
+VALID_ASSAY_LEVELS = "('genomic', 'cdna', 'protein')"
+VALID_ALIGNMENT_LEVELS = "('protein', 'cdna', 'genomic')"
+
+
+def upgrade() -> None:
+    op.create_table(
+        "alleles",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("vrs_digest", sa.String(), nullable=False),
+        sa.Column("level", sa.String(length=16), nullable=False),
+        sa.Column("transcript", sa.String(), nullable=False),
+        sa.Column("hgvs_g", sa.String(), nullable=True),
+        sa.Column("hgvs_c", sa.String(), nullable=True),
+        sa.Column("hgvs_p", sa.String(), nullable=True),
+        sa.Column("clingen_allele_id", sa.String(), nullable=True),
+        sa.Column("post_mapped", postgresql.JSONB(), nullable=True),
+        sa.Column("created_at", sa.Date(), nullable=False, server_default=sa.text("CURRENT_DATE")),
+        sa.Column(
+            "updated_at",
+            sa.Date(),
+            nullable=False,
+            server_default=sa.text("CURRENT_DATE"),
+            onupdate=sa.text("CURRENT_DATE"),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("vrs_digest", name="uq_alleles_vrs_digest"),
+    )
+    op.create_index("ix_alleles_vrs_digest", "alleles", ["vrs_digest"])
+    op.create_index("ix_alleles_level", "alleles", ["level"])
+    op.create_index("ix_alleles_clingen_allele_id", "alleles", ["clingen_allele_id"])
+
+    op.create_table(
+        "mapping_records",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("variant_id", sa.Integer(), nullable=False),
+        sa.Column("vrs_digest", sa.String(), nullable=True),
+        sa.Column("pre_mapped", postgresql.JSONB(), nullable=True),
+        sa.Column("assay_level", sa.String(length=16), nullable=False),
+        sa.Column("hgvs_assay_level", sa.String(), nullable=True),
+        sa.Column("mapping_api_version", sa.String(), nullable=False),
+        sa.Column("mapped_date", sa.Date(), nullable=False),
+        sa.Column("vrs_version", sa.String(), nullable=True),
+        sa.Column("current", sa.Boolean(), nullable=False),
+        sa.Column("alignment_level", sa.String(length=16), nullable=True),
+        sa.Column("at_mismatched_locus", sa.Boolean(), nullable=True),
+        sa.Column("near_gap", sa.Boolean(), nullable=True),
+        sa.Column("target_gene_mapping_id", sa.Integer(), nullable=True),
+        sa.Column("created_at", sa.Date(), nullable=False, server_default=sa.text("CURRENT_DATE")),
+        sa.Column(
+            "updated_at",
+            sa.Date(),
+            nullable=False,
+            server_default=sa.text("CURRENT_DATE"),
+            onupdate=sa.text("CURRENT_DATE"),
+        ),
+        sa.ForeignKeyConstraint(
+            ["variant_id"],
+            ["variants.id"],
+            name="fk_mapping_records_variant_id",
+        ),
+        sa.ForeignKeyConstraint(
+            ["target_gene_mapping_id"],
+            ["target_gene_mappings.id"],
+            name="fk_mapping_records_target_gene_mapping_id",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.CheckConstraint(
+            f"assay_level IN {VALID_ASSAY_LEVELS}",
+            name="ck_mapping_records_assay_level_valid",
+        ),
+    )
+    op.create_index("ix_mapping_records_variant_id", "mapping_records", ["variant_id"])
+    op.create_index("ix_mapping_records_vrs_digest", "mapping_records", ["vrs_digest"])
+    op.create_index(
+        "ix_mapping_records_target_gene_mapping_id",
+        "mapping_records",
+        ["target_gene_mapping_id"],
+    )
+
+    op.create_table(
+        "mapping_record_alleles",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("mapping_record_id", sa.Integer(), nullable=False),
+        sa.Column("allele_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "is_authoritative",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("false"),
+        ),
+        sa.ForeignKeyConstraint(
+            ["mapping_record_id"],
+            ["mapping_records.id"],
+            name="fk_mapping_record_alleles_mapping_record_id",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["allele_id"],
+            ["alleles.id"],
+            name="fk_mapping_record_alleles_allele_id",
+            ondelete="RESTRICT",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        "ix_mapping_record_alleles_mapping_record_id",
+        "mapping_record_alleles",
+        ["mapping_record_id"],
+    )
+    op.create_index(
+        "ix_mapping_record_alleles_allele_id",
+        "mapping_record_alleles",
+        ["allele_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_mapping_record_alleles_allele_id", table_name="mapping_record_alleles")
+    op.drop_index("ix_mapping_record_alleles_mapping_record_id", table_name="mapping_record_alleles")
+    op.drop_table("mapping_record_alleles")
+
+    op.drop_index("ix_mapping_records_target_gene_mapping_id", table_name="mapping_records")
+    op.drop_index("ix_mapping_records_vrs_digest", table_name="mapping_records")
+    op.drop_index("ix_mapping_records_variant_id", table_name="mapping_records")
+    op.drop_table("mapping_records")
+
+    op.drop_index("ix_alleles_clingen_allele_id", table_name="alleles")
+    op.drop_index("ix_alleles_level", table_name="alleles")
+    op.drop_index("ix_alleles_vrs_digest", table_name="alleles")
+    op.drop_table("alleles")
diff --git a/alembic/versions/b8e1f0a2c4d7_drop_alleles_transcript_column.py b/alembic/versions/b8e1f0a2c4d7_drop_alleles_transcript_column.py
@@ -0,0 +1,32 @@
+"""drop alleles.transcript column
+
+Revision ID: b8e1f0a2c4d7
+Revises: f4d2a9c1b7e3
+Create Date: 2026-06-05
+
+The `transcript` column duplicated data already present in the HGVS columns — it was
+always extract_accession(hgvs_g/hgvs_c/hgvs_p). It is now a derived hybrid_property on
+the Allele model (split_part(coalesce(hgvs_g, hgvs_c, hgvs_p), ':', 1)), so the stored
+column is removed to keep a single source of truth and avoid drift.
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "b8e1f0a2c4d7"
+down_revision = "f4d2a9c1b7e3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_column("alleles", "transcript")
+
+
+def downgrade() -> None:
+    # Re-add the column and backfill it from the HGVS columns (the same derivation the
+    # hybrid_property uses) so the restored NOT NULL column is consistent.
+    op.add_column("alleles", sa.Column("transcript", sa.String(), nullable=True))
+    op.execute("UPDATE alleles SET transcript = split_part(coalesce(hgvs_g, hgvs_c, hgvs_p), ':', 1)")
+    op.alter_column("alleles", "transcript", nullable=False)
diff --git a/alembic/versions/c3d5e7f9a1b2_temporal_mapping_record_alleles.py b/alembic/versions/c3d5e7f9a1b2_temporal_mapping_record_alleles.py
@@ -0,0 +1,48 @@
+"""add valid-time versioning to mapping_record_alleles
+
+Revision ID: c3d5e7f9a1b2
+Revises: b8e1f0a2c4d7
+Create Date: 2026-06-05
+
+Make the link table valid-time versioned (TemporalLink): a link is live while valid_to is
+NULL, and superseding it closes valid_to instead of deleting, so reverse translation can be
+re-run independently while prior derivations remain queryable point-in-time. The partial
+unique index enforces a single live link per (mapping_record, allele).
+
+Assumes no pre-existing duplicate live links — true for these parallel tables, which are
+new-only writes and not yet serving. If this ever runs against data with duplicates, the
+unique index creation will fail and the duplicates must be retired first.
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "c3d5e7f9a1b2"
+down_revision = "b8e1f0a2c4d7"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "mapping_record_alleles",
+        sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()),
+    )
+    op.add_column(
+        "mapping_record_alleles",
+        sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
+    )
+    op.create_index(
+        "uq_mapping_record_alleles_live",
+        "mapping_record_alleles",
+        ["mapping_record_id", "allele_id"],
+        unique=True,
+        postgresql_where=sa.text("valid_to IS NULL"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("uq_mapping_record_alleles_live", table_name="mapping_record_alleles")
+    op.drop_column("mapping_record_alleles", "valid_to")
+    op.drop_column("mapping_record_alleles", "valid_from")
diff --git a/alembic/versions/d4e6f8a0b2c3_temporal_mapping_records.py b/alembic/versions/d4e6f8a0b2c3_temporal_mapping_records.py
@@ -0,0 +1,76 @@
+"""move mapping_records onto valid-time versioning
+
+Revision ID: d4e6f8a0b2c3
+Revises: c3d5e7f9a1b2
+Create Date: 2026-06-05
+
+Replace the stored `current` flag and the `created_at`/`updated_at` audit dates with valid-time
+columns (ValidTime mixin): a mapping record is live while valid_to is NULL, and a re-map retires
+the prior version (closing valid_to) instead of flipping a boolean. `current` becomes derived
+(valid_to IS NULL). `mapped_date` (the date the mapping was performed) is domain data and stays.
+
+The partial unique index promotes to the database the "one live mapping record per variant"
+invariant the mapping job previously enforced only in app code.
+
+Backfills from the columns being dropped, so existing rows keep their validity. Assumes no
+duplicate live records per variant (true for these pre-cutover parallel tables; otherwise the
+unique index creation fails and the duplicates must be retired first).
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "d4e6f8a0b2c3"
+down_revision = "c3d5e7f9a1b2"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "mapping_records",
+        sa.Column("valid_from", sa.DateTime(timezone=True), nullable=True, server_default=sa.func.now()),
+    )
+    op.add_column(
+        "mapping_records",
+        sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
+    )
+
+    # Backfill validity from the columns being replaced: a record's life began at created_at, and
+    # a non-current record was retired at updated_at (the only in-place update it ever took).
+    op.execute("UPDATE mapping_records SET valid_from = created_at::timestamptz")
+    op.execute("UPDATE mapping_records SET valid_to = updated_at::timestamptz WHERE current = false")
+
+    op.alter_column("mapping_records", "valid_from", nullable=False)
+
+    op.drop_column("mapping_records", "current")
+    op.drop_column("mapping_records", "created_at")
+    op.drop_column("mapping_records", "updated_at")
+
+    op.create_index(
+        "uq_mapping_records_current",
+        "mapping_records",
+        ["variant_id"],
+        unique=True,
+        postgresql_where=sa.text("valid_to IS NULL"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("uq_mapping_records_current", table_name="mapping_records")
+
+    op.add_column("mapping_records", sa.Column("current", sa.Boolean(), nullable=True))
+    op.add_column("mapping_records", sa.Column("created_at", sa.Date(), nullable=True))
+    op.add_column("mapping_records", sa.Column("updated_at", sa.Date(), nullable=True))
+
+    op.execute("UPDATE mapping_records SET current = (valid_to IS NULL)")
+    op.execute("UPDATE mapping_records SET created_at = valid_from::date")
+    op.execute("UPDATE mapping_records SET updated_at = coalesce(valid_to, valid_from)::date")
+
+    op.alter_column("mapping_records", "current", nullable=False)
+    op.alter_column("mapping_records", "created_at", nullable=False)
+    op.alter_column("mapping_records", "updated_at", nullable=False)
+
+    op.drop_column("mapping_records", "valid_to")
+    op.drop_column("mapping_records", "valid_from")
diff --git a/alembic/versions/f4d2a9c1b7e3_add_cross_level_translation_annotation_type.py b/alembic/versions/f4d2a9c1b7e3_add_cross_level_translation_annotation_type.py
@@ -0,0 +1,49 @@
+"""add cross_level_translation annotation type
+
+Revision ID: f4d2a9c1b7e3
+Revises: a1b2c3d4e5f6
+Create Date: 2026-06-02
+
+Extends ck_variant_annotation_type_valid to allow the 'cross_level_translation'
+annotation type. The VRS mapping worker writes one such row per variant to record
+whether cross-level translation (filling the levels the assay did not map)
+succeeded, was skipped (multivariant / no transcript), or failed.
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "f4d2a9c1b7e3"
+down_revision = "a1b2c3d4e5f6"
+branch_labels = None
+depends_on = None
+
+_TYPES_OLD = (
+    "'vrs_mapping', 'clingen_allele_id', 'mapped_hgvs', 'variant_translation', "
+    "'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', "
+    "'ldh_submission'"
+)
+_TYPES_NEW = "'vrs_mapping', 'cross_level_translation', " + (
+    "'clingen_allele_id', 'mapped_hgvs', 'variant_translation', "
+    "'gnomad_allele_frequency', 'clinvar_control', 'vep_functional_consequence', "
+    "'ldh_submission'"
+)
+
+
+def upgrade() -> None:
+    op.drop_constraint("ck_variant_annotation_type_valid", "variant_annotation_status", type_="check")
+    op.create_check_constraint(
+        "ck_variant_annotation_type_valid",
+        "variant_annotation_status",
+        f"annotation_type IN ({_TYPES_NEW})",
+    )
+
+
+def downgrade() -> None:
+    op.execute("DELETE FROM variant_annotation_status WHERE annotation_type = 'cross_level_translation'")
+    op.drop_constraint("ck_variant_annotation_type_valid", "variant_annotation_status", type_="check")
+    op.create_check_constraint(
+        "ck_variant_annotation_type_valid",
+        "variant_annotation_status",
+        f"annotation_type IN ({_TYPES_OLD})",
+    )