Source code for src.seqvar.auto_pp3_bp4

"""Implementation of the PP3 and BP4 criteria."""

from typing import Optional, Tuple

from loguru import logger

from src.defs.auto_acmg import (
    PP3BP4,
    AutoACMGCriteria,
    AutoACMGPrediction,
    AutoACMGSeqVarData,
    AutoACMGStrength,
)
from src.defs.exceptions import AutoAcmgBaseException, MissingDataError
from src.defs.seqvar import SeqVar
from src.utils import AutoACMGHelper



[docs]
class AutoPP3BP4(AutoACMGHelper):
    """Class for PP3 and BP4 prediction."""

    def __init__(self):
        super().__init__()
        #: Prediction result.
        self.prediction_pp3bp4: Optional[PP3BP4] = None
        #: Comment to store the prediction explanation.
        self.comment_pp3bp4: str = ""


[docs]
    def _is_splice_variant(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant's consequence is a splice related.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is a splice variant, False otherwise.
        """
        if "splice" in var_data.consequence.cadd:
            return True
        if any("splice" in cons for cons in var_data.consequence.mehari):
            return True
        return False



[docs]
    def _is_inframe_indel(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant's consequence is an inframe indel.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is an inframe indel, False otherwise.
        """
        if "inframe" in var_data.consequence.cadd:
            return True
        if any("inframe" in cons for cons in var_data.consequence.mehari):
            return True
        return False



[docs]
    def _is_missense_variant(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant's consequence is a missense variant.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is a missense variant, False otherwise.
        """
        if "missense" in var_data.consequence.cadd:
            return True
        if "missense_variant" in var_data.consequence.mehari:
            return True
        return False



[docs]
    def _is_synonymous_variant(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant's consequence is a synonymous variant.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is a synonymous variant, False otherwise.
        """
        if "synonymous" in var_data.consequence.cadd:
            return True
        if "synonymous_variant" in var_data.consequence.mehari:
            return True
        return False



[docs]
    def _is_intron_variant(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant's consequence is an intron variant.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is an intron variant, False otherwise.
        """
        if "intron" in var_data.consequence.cadd:
            return True
        if any("intron" in cons for cons in var_data.consequence.mehari):
            return True
        return False



[docs]
    def _is_utr_variant(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant's consequence is an UTR variant.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is an UTR variant, False otherwise.
        """
        if "UTR" in var_data.consequence.cadd or "utr" in var_data.consequence.cadd:
            return True
        if any("utr" in cons for cons in var_data.consequence.mehari) or any(
            "UTR" in cons for cons in var_data.consequence.mehari
        ):
            return True
        return False



[docs]
    def _is_pathogenic_score(
        self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
    ) -> bool:
        """
        Check if any of the specified scores meet their corresponding threshold.

        Args:
            var_data: Variant data containing scores and thresholds.
            score_threshold_pairs: Pairs of score attributes and their corresponding pathogenic
            thresholds.

        Returns:
            bool: True if any of the specified scores meet their corresponding threshold, False
            otherwise.
        """
        for score_attr, threshold in score_threshold_pairs:
            score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
            if score_value is not None and score_value >= threshold:
                return True
        return False



[docs]
    def _is_benign_score(
        self, var_data: AutoACMGSeqVarData, *score_threshold_pairs: Tuple[str, float]
    ) -> bool:
        """
        Check if any of the specified scores meet their corresponding threshold.

        Args:
            var_data: Variant data containing scores and thresholds.
            score_threshold_pairs: Pairs of score attributes and their corresponding benign
            thresholds.

        Returns:
            bool: True if any of the specified scores meet their corresponding threshold, False
            otherwise.
        """
        for score_attr, threshold in score_threshold_pairs:
            score_value = getattr(var_data.scores.dbnsfp, score_attr, None)
            if score_value is not None and score_value <= threshold:
                return True
        return False



[docs]
    def _affect_spliceAI(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Predict splice site alterations using SpliceAI.

        If any of SpliceAI scores are greater than specific thresholds, the variant is considered to
        affect splicing.

        Args:
            var_data: The data containing variant scores and thresholds.

        Returns:
            bool: True if the variant is a splice site alteration, False otherwise.
        """
        score_checks = {
            "spliceAI_acceptor_gain": var_data.thresholds.spliceAI_acceptor_gain,
            "spliceAI_acceptor_loss": var_data.thresholds.spliceAI_acceptor_loss,
            "spliceAI_donor_gain": var_data.thresholds.spliceAI_donor_gain,
            "spliceAI_donor_loss": var_data.thresholds.spliceAI_donor_loss,
        }
        return any(
            (getattr(var_data.scores.cadd, score_name) or 0) > threshold
            for score_name, threshold in score_checks.items()
        )



[docs]
    def _is_pathogenic_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant is pathogenic based on splicing scores.

        Checks if the Ada and RF scores are greater than the thresholds.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is pathogenic, False otherwise.

        Raises:
            MissingDataError: If the Ada and RF scores are missing.
        """
        ada = var_data.scores.dbscsnv.ada or var_data.scores.cadd.ada
        rf = var_data.scores.dbscsnv.rf or var_data.scores.cadd.rf
        if not ada and not rf:
            raise MissingDataError("Missing Ada and RF scores.")
        if ada:
            if ada > var_data.thresholds.ada:
                return True
        if rf:
            if rf > var_data.thresholds.rf:
                return True
        return False



[docs]
    def _is_benign_splicing(self, var_data: AutoACMGSeqVarData) -> bool:
        """
        Check if the variant is benign based on splicing scores.

        Checks if the Ada and RF scores are less than the thresholds.

        Args:
            var_data: The variant information.

        Returns:
            bool: True if the variant is benign, False otherwise.

        Raises:
            MissingDataError: If the Ada and RF scores are missing.
        """
        ada = var_data.scores.dbscsnv.ada or var_data.scores.cadd.ada
        rf = var_data.scores.dbscsnv.rf or var_data.scores.cadd.rf
        if not ada and not rf:
            raise MissingDataError("Missing Ada and RF scores.")
        if ada:
            if ada < var_data.thresholds.ada:
                return True
        if rf:
            if rf < var_data.thresholds.rf:
                return True
        return False



[docs]
    def verify_pp3bp4(
        self, seqvar: SeqVar, var_data: AutoACMGSeqVarData
    ) -> Tuple[Optional[PP3BP4], str]:
        """
        Predict PP3 and BP4 criteria.

        The method checks the variant's pathogenicity based on the provided scores and thresholds.
        First of all it checks the pathogenic and benign scores against the thresholds if the
        default strategy is used. Otherwise, it checks the pathogenic and benign scores against the
        specified thresholds and then checks the splicing scores. If the variant is a splice site
        alteration or has the pathogenic score, the variant is considered pathogenic. If the variant
        doesn't affect splicing and has the benign score, the variant is considered benign.

        Note:
            The non-default assesment strategy is used for some VCEPs.

        Args:
            seqvar: Sequence variant.
            var_data: The variant information.

        Returns:
            Tuple[Optional[PP3BP4], str]: The prediction result and the comment.
        """
        self.prediction_pp3bp4 = PP3BP4()
        self.comment_pp3bp4 = ""
        try:
            if (score := var_data.thresholds.pp3bp4_strategy) == "default":
                self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
                    var_data,
                    ("metaRNN", var_data.thresholds.metaRNN_pathogenic),
                    ("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_pathogenic),
                )
                self.prediction_pp3bp4.BP4 = self._is_benign_score(
                    var_data,
                    ("metaRNN", var_data.thresholds.metaRNN_benign),
                    ("bayesDel_noAF", var_data.thresholds.bayesDel_noAF_benign),
                )
                self.comment_pp3bp4 += (
                    f"MetaRNN score: {var_data.scores.dbnsfp.metaRNN}, "
                    f"MetaRNN threshold: {var_data.thresholds.metaRNN_pathogenic}. "
                    f"BayesDel_noAF score: {var_data.scores.dbnsfp.bayesDel_noAF}, "
                    f"BayesDel_noAF threshold: {var_data.thresholds.bayesDel_noAF_pathogenic}. "
                )
            else:
                self.prediction_pp3bp4.PP3 = self._is_pathogenic_score(
                    var_data,
                    (score, getattr(var_data.thresholds, f"{score}_pathogenic")),
                )
                self.prediction_pp3bp4.BP4 = self._is_benign_score(
                    var_data,
                    (score, getattr(var_data.thresholds, f"{score}_benign")),
                )

                self.prediction_pp3bp4.PP3 = (
                    self.prediction_pp3bp4.PP3 or self._is_pathogenic_splicing(var_data)
                )
                self.prediction_pp3bp4.BP4 = self.prediction_pp3bp4.BP4 or self._is_benign_splicing(
                    var_data
                )
                self.comment_pp3bp4 += (
                    f"Ada score: {var_data.scores.dbscsnv.ada}, "
                    f"Ada threshold: {var_data.thresholds.ada}. "
                    f"RF score: {var_data.scores.dbscsnv.rf}, "
                    f"RF threshold: {var_data.thresholds.rf}. "
                )

        except AutoAcmgBaseException as e:
            self.comment_pp3bp4 = f"An error occurred during prediction. Error: {e}"
            self.prediction_pp3bp4 = None
        return self.prediction_pp3bp4, self.comment_pp3bp4



[docs]
    def predict_pp3bp4(
        self, seqvar: SeqVar, var_data: AutoACMGSeqVarData
    ) -> Tuple[AutoACMGCriteria, AutoACMGCriteria]:
        """Predict PP3 and BP4 criteria."""
        logger.info("Predict PP3 and BP4")
        pred, comment = self.verify_pp3bp4(seqvar, var_data)
        if pred:
            pp3_pred = (
                AutoACMGPrediction.Applicable
                if pred.PP3
                else (
                    AutoACMGPrediction.NotApplicable
                    if pred.PP3 is False
                    else AutoACMGPrediction.Failed
                )
            )
            bp4_pred = (
                AutoACMGPrediction.Applicable
                if pred.BP4
                else (
                    AutoACMGPrediction.NotApplicable
                    if pred.BP4 is False
                    else AutoACMGPrediction.Failed
                )
            )
            pp3_strength = pred.PP3_strength
            bp4_strength = pred.BP4_strength
        else:
            pp3_pred = AutoACMGPrediction.Failed
            bp4_pred = AutoACMGPrediction.Failed
            pp3_strength = AutoACMGStrength.PathogenicSupporting
            bp4_strength = AutoACMGStrength.BenignSupporting
        return (
            AutoACMGCriteria(
                name="PP3",
                prediction=pp3_pred,
                strength=pp3_strength,
                summary=comment,
            ),
            AutoACMGCriteria(
                name="BP4",
                prediction=bp4_pred,
                strength=bp4_strength,
                summary=comment,
            ),
        )