Source code for src.seqvar.auto_pp2_bp1

"""Implementation of PP2 and BP1 criteria."""

from typing import Optional, Tuple

from loguru import logger

from src.defs.auto_acmg import (
    PP2BP1,
    AutoACMGCriteria,
    AutoACMGPrediction,
    AutoACMGSeqVarData,
    AutoACMGStrength,
)
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException, InvalidAPIResposeError
from src.defs.seqvar import SeqVar
from src.utils import AutoACMGHelper


[docs] class AutoPP2BP1(AutoACMGHelper): """Class for PP2 and BP1 prediction.""" def __init__(self): super().__init__() #: Prediction result. self.prediction_pp2bp1: Optional[PP2BP1] = None #: Comment to store the prediction explanation. self.comment_pp2bp1: str = ""
[docs] def _get_missense_vars( self, seqvar: SeqVar, start_pos: int, end_pos: int ) -> Tuple[int, int, int]: """ Counts pathogenic, benign, and total missense variants in the specified range. The method retrieves variants from the specified range and iterates through the ClinVar data of each variant to count the number of pathogenic variants, benign variants, and the total number of missense variants. Args: seqvar: The sequence variant being analyzed. start_pos: The start position of the range. end_pos: The end position of the range. Returns: Tuple[int, int, int]: The number of pathogenic variants, benign variants, and the total number of missense variants. Raises: AlgorithmError: If end position is less than the start position. InvalidAPIResposeError: If the API response is invalid or cannot be processed. """ if end_pos < start_pos: raise AlgorithmError("End position is less than the start position.") response = self.annonars_client.get_variant_from_range(seqvar, start_pos, end_pos) if response and response.clinvar: pathogenic_variants = [ v for v in response.clinvar if ( v.records and v.records[0].classifications and v.records[0].classifications.germlineClassification and v.records[0].classifications.germlineClassification.description in ["Pathogenic"] and v.records[0].variationType == "VARIATION_TYPE_SNV" ) ] benign_variants = [ v for v in response.clinvar if ( v.records and v.records[0].classifications and v.records[0].classifications.germlineClassification and v.records[0].classifications.germlineClassification.description in ["Benign"] and v.records[0].variationType == "VARIATION_TYPE_SNV" ) ] return ( len(pathogenic_variants), len(benign_variants), len(pathogenic_variants) + len(benign_variants), ) else: raise InvalidAPIResposeError("Failed to get variant from range. No ClinVar data.")
[docs] def _is_missense(self, var_data: AutoACMGSeqVarData) -> bool: """ Check if the variant is a missense variant. Args: var_data: The variant information. Returns: bool: True if the variant is a missense variant, False otherwise. """ if "missense" in var_data.consequence.cadd: return True if any("missense" in cons for cons in var_data.consequence.mehari): return True return False
[docs] def verify_pp2bp1( self, seqvar: SeqVar, var_data: AutoACMGSeqVarData ) -> Tuple[Optional[PP2BP1], str]: """ Predict PP2 and BP1 criteria. The method verifies the PP2 and BP1 criteria for the provided sequence variant. It checks if the variant is a missense variant and assigns PP2 and BP1 based on the missense Z-score. If the Z-score is not available, the method counts the pathogenic and benign missense variants in the range of the variant and assigns PP2 and BP1 based on the ratio of pathogenic and benign variants. Args: seqvar: The sequence variant being analyzed. var_data: The variant information. Returns: Tuple[Optional[PP2BP1], str]: The prediction result and the explanation. """ self.prediction_pp2bp1 = PP2BP1() self.comment_pp2bp1 = "" if seqvar.chrom == "MT": self.comment_pp2bp1 = ( "Variant is in mitochondrial DNA. PP2 and BP1 criteria are not met." ) self.prediction_pp2bp1.PP2, self.prediction_pp2bp1.BP1 = False, False else: try: if not self._is_missense(var_data): self.comment_pp2bp1 = ( "Variant is not a missense variant. PP2 and BP1 criteria are not met." ) self.prediction_pp2bp1.PP2 = False self.prediction_pp2bp1.BP1 = False return self.prediction_pp2bp1, self.comment_pp2bp1 if var_data.scores.misZ: self.comment_pp2bp1 = ( f"PP2&BP1 prediction based on missense Z-score: {var_data.scores.misZ}. " ) if var_data.scores.misZ > var_data.thresholds.pp2bp1_pathogenic: self.comment_pp2bp1 += ( f"Z-score is greater than {var_data.thresholds.pp2bp1_pathogenic}. " "PP2 is met. " ) self.prediction_pp2bp1.PP2 = True elif var_data.scores.misZ < var_data.thresholds.pp2bp1_benign: self.comment_pp2bp1 += ( f"Z-score is less than {var_data.thresholds.pp2bp1_benign}. " "BP1 is met." ) self.prediction_pp2bp1.BP1 = True else: self.comment_pp2bp1 = "No missense Z-score found. Counting missense variants. " start_pos, end_pos = ( min(var_data.cds_start, var_data.cds_end), max(var_data.cds_start, var_data.cds_end), ) pathogenic_count, benign_count, total_count = self._get_missense_vars( seqvar, start_pos, end_pos ) pathogenic_ratio = pathogenic_count / total_count benign_ratio = benign_count / total_count self.comment_pp2bp1 += ( f"Found pathogenic missense variants: {pathogenic_count}, " f"benign missense variants: {benign_count}, " f"total missense variants: {total_count} " f"in the range {start_pos}-{end_pos}. " f"Pathogenic ratio: {pathogenic_ratio}, Benign ratio: {benign_ratio}. " ) if pathogenic_ratio > var_data.thresholds.pp2bp1_pathogenic: self.comment_pp2bp1 += ( f"Pathogenic ratio is greater than {var_data.thresholds.pp2bp1_pathogenic}. " "PP2 is met. " ) self.prediction_pp2bp1.PP2 = True else: self.comment_pp2bp1 += ( f"Pathogenic ratio is less than {var_data.thresholds.pp2bp1_pathogenic}. " "PP2 is not met. " ) if benign_ratio > var_data.thresholds.pp2bp1_benign: self.comment_pp2bp1 += ( f"Benign ratio is greater than {var_data.thresholds.pp2bp1_benign}. " "BP1 is met." ) self.prediction_pp2bp1.BP1 = True else: self.comment_pp2bp1 += ( f"Benign ratio is less than {var_data.thresholds.pp2bp1_benign}. " "BP1 is not met." ) except AutoAcmgBaseException as e: self.comment_pp2bp1 = f"Error occurred during PP2 and BP1 prediction. Error: {e}" self.prediction_pp2bp1 = None return self.prediction_pp2bp1, self.comment_pp2bp1
[docs] def predict_pp2bp1( self, seqvar: SeqVar, var_data: AutoACMGSeqVarData ) -> Tuple[AutoACMGCriteria, AutoACMGCriteria]: """Predict PP2 and BP1 criteria,""" logger.info("Predict PP2 and BP1") pred, comment = self.verify_pp2bp1(seqvar, var_data) if pred: pp2_pred = ( AutoACMGPrediction.Applicable if pred.PP2 else ( AutoACMGPrediction.NotApplicable if pred.PP2 is False else AutoACMGPrediction.Failed ) ) bp1_pred = ( AutoACMGPrediction.Applicable if pred.BP1 else ( AutoACMGPrediction.NotApplicable if pred.BP1 is False else AutoACMGPrediction.Failed ) ) pp2_strength = pred.PP2_strength bp1_strength = pred.BP1_strength else: pp2_pred = AutoACMGPrediction.Failed bp1_pred = AutoACMGPrediction.Failed pp2_strength = AutoACMGStrength.PathogenicSupporting bp1_strength = AutoACMGStrength.BenignSupporting return ( AutoACMGCriteria( name="PP2", prediction=pp2_pred, strength=pp2_strength, summary=comment, ), AutoACMGCriteria( name="BP1", prediction=bp1_pred, strength=bp1_strength, summary=comment, ), )