Source code for src.seqvar.auto_pm4_bp3

"""Implementation of PM4 and BP3 rules for sequence variants."""

import os
from typing import Optional, Tuple

import tabix
from loguru import logger

from src.core.config import settings
from src.defs.auto_acmg import (
    PM4BP3,
    AutoACMGCriteria,
    AutoACMGPrediction,
    AutoACMGSeqVarData,
    AutoACMGStrength,
)
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException
from src.defs.genome_builds import GenomeRelease
from src.defs.seqvar import SeqVar
from src.utils import AutoACMGHelper


[docs] class AutoPM4BP3(AutoACMGHelper): """Class for PM4 and BP3 prediction.""" def __init__(self): super().__init__() #: Prediction result. self.prediction_pm4bp3: Optional[PM4BP3] = None #: Comment to store the prediction explanation. self.comment_pm4bp3: str = ""
[docs] def _in_repeat_region(self, seqvar: SeqVar) -> bool: """ Check if the variant is in a repeat region using the RepeatMasker track. Args: seqvar: Sequence variant. Returns: bool: True if the variant is in a repeat region, False otherwise. Raises: AlgorithmError: If tabix fails to query the RepeatMasker track. """ try: # Find path to the lib file if seqvar.genome_release == GenomeRelease.GRCh37: path = os.path.join(settings.PATH_TO_ROOT, "lib", "rmsk", "grch37", "rmsk.bed.gz") else: path = os.path.join(settings.PATH_TO_ROOT, "lib", "rmsk", "grch38", "rmsk.bed.gz") tb = tabix.open(path) records = tb.query(f"chr{seqvar.chrom}", seqvar.pos - 1, seqvar.pos) # Check if iterator is not empty if any(True for _ in records): return True else: return False except tabix.TabixError as e: raise AlgorithmError("Failed to check if the variant is in a repeat region.") from e
[docs] def _is_stop_loss(self, var_data: AutoACMGSeqVarData) -> bool: """ Check if the variant's consequence is a stop-loss. Args: var_data: The variant information. Returns: bool: True if the variant is a stop-loss variant, False otherwise. """ if "stop_loss" in var_data.consequence.cadd: return True if any("stop_loss" in cons for cons in var_data.consequence.mehari): return True return False
[docs] def is_inframe_delins(self, var_data: AutoACMGSeqVarData) -> bool: """ Check if the variant's consequence is an in-frame deletion/insertion. Args: var_data: The variant information. Returns: bool: True if the variant is an in-frame deletion/insertion, False otherwise. """ if "inframe_deletion" in var_data.consequence.cadd: return True if "inframe_insertion" in var_data.consequence.cadd: return True if any("inframe_deletion" in cons for cons in var_data.consequence.mehari): return True if any("inframe_insertion" in cons for cons in var_data.consequence.mehari): return True return False
[docs] def _bp3_not_applicable(self, seqvar: SeqVar, var_data: AutoACMGSeqVarData) -> bool: """ Check if BP3 is not applicable for the variant. Args: seqvar: Sequence variant. var_data: The variant information. Returns: bool: True if BP3 is not applicable, False otherwise. """ if seqvar.chrom == "MT": return True return False
[docs] def verify_pm4bp3( self, seqvar: SeqVar, var_data: AutoACMGSeqVarData ) -> Tuple[Optional[PM4BP3], str]: """Predicts PM4 and BP3 criteria for the provided sequence variant. Implementation of the rule: - If the variant is a stop-loss variant, PM4 is True and BP3 is False. - If the variant is an in-frame deletion/insertion: - If the variant is not in a repeat region, PM4 is True and BP3 is False. - If the variant is in a repeat region, PM4 is False and BP3 is True. - Otherwise, PM4 and BP3 are False. Note: Rules: PM4: Protein length changes due to in-frame deletions/insertions in a non-repeat region or stop-loss variants. BP3: In-frame deletions/insertions in a repetitive region without a known function. Args: seqvar: Sequence variant. var_data: The variant information Returns: Tuple[Optional[PM4BP3], str]: Prediction result and comment. """ self.prediction_pm4bp3 = PM4BP3() self.comment_pm4bp3 = "" try: # Stop-loss variants are considered as PM4 if self._is_stop_loss(var_data): self.comment_pm4bp3 = "Variant consequence is stop-loss. PM4 is met." self.prediction_pm4bp3.PM4 = True self.prediction_pm4bp3.BP3 = False # In-frame deletions/insertions elif self.is_inframe_delins(var_data): self.comment_pm4bp3 = "Variant consequence is in-frame deletion/insertion. " if not self._in_repeat_region(seqvar): self.comment_pm4bp3 += ( "Variant is not in a repeat region or a conserved domain. PM4 is met." ) self.prediction_pm4bp3.PM4 = True self.prediction_pm4bp3.BP3 = False else: self.comment_pm4bp3 += ( "Variant is in a repeat region or not in a conserved domain. BP3 is met." ) self.prediction_pm4bp3.PM4 = False self.prediction_pm4bp3.BP3 = True else: self.comment_pm4bp3 = ( "Variant consequence is not indel or stop-loss. PM4 and BP3 are not met." ) self.prediction_pm4bp3.PM4 = False self.prediction_pm4bp3.BP3 = False except AutoAcmgBaseException as e: self.comment_pm4bp3 = f"An error occured while predicting PM4 and BP3 criteria: {e}" self.prediction_pm4bp3 = None return self.prediction_pm4bp3, self.comment_pm4bp3
[docs] def predict_pm4bp3( self, seqvar: SeqVar, var_data: AutoACMGSeqVarData ) -> Tuple[AutoACMGCriteria, AutoACMGCriteria]: """Predict PM4 and BP3 criteria.""" logger.info("Predict PM4 and BP3") pred, comment = self.verify_pm4bp3(seqvar, var_data) if pred: pm4_pred = ( AutoACMGPrediction.Applicable if pred.PM4 else ( AutoACMGPrediction.NotApplicable if pred.PM4 is False else AutoACMGPrediction.Failed ) ) bp3_pred = ( AutoACMGPrediction.Applicable if pred.BP3 else ( AutoACMGPrediction.NotApplicable if pred.BP3 is False else AutoACMGPrediction.Failed ) ) pm4_strength = pred.PM4_strength bp3_strength = pred.BP3_strength else: pm4_pred = AutoACMGPrediction.Failed bp3_pred = AutoACMGPrediction.Failed pm4_strength = AutoACMGStrength.PathogenicModerate bp3_strength = AutoACMGStrength.BenignSupporting # BP3 is not applicable for some VCEPs if self._bp3_not_applicable(seqvar, var_data): return ( AutoACMGCriteria( name="PM4", prediction=pm4_pred, strength=pm4_strength, summary=comment, ), AutoACMGCriteria( name="BP3", prediction=AutoACMGPrediction.NotApplicable, strength=bp3_strength, summary="BP3 is not applicable for the gene.", ), ) return ( AutoACMGCriteria( name="PM4", prediction=pm4_pred, strength=pm4_strength, summary=comment, ), AutoACMGCriteria( name="BP3", prediction=bp3_pred, strength=bp3_strength, summary=comment, ), )