diff --git a/README.md b/README.md index 718e15fa521c905e83ba97109f4e89d0c740de1e..3c68a64b809293dc76d99c01e2c34fdfdd5f054d 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ # 1) download kubectl, chmod +x kubectl # 2) download kubeconfig.yml, # 3) run the command: - ./kubectl --kubeconfig=kubeconfig.yml cp -n slaninakova-ns <path_to_data> <pod_with_pvc_mounted>:/app/data - # ./kubectl --kubeconfig=kubeconfig.yml cp -n slaninakova-ns proteins/proteins-disordered-regions/analyses/search_results_P10911.json slaninakova-ns/protein-search-dashboard-7c46dd6c75-m25nc:/app/data/structures/. + ./kubectl --kubeconfig=kubeconfig.yml cp -n fi-lmi-ns <path_to_data> <pod_with_pvc_mounted>:/app/data + # ./kubectl --kubeconfig=kubeconfig.yml cp -n fi-lmi-ns proteins/proteins-disordered-regions/analyses/search_results_P10911.json fi-lmi-ns/protein-search-dashboard-7c46dd6c75-m25nc:/app/data/structures/. ``` ## Useful commands: @@ -35,13 +35,13 @@ mkdir -p protein_db metadata 2. Copy to the PVC: ```bash # Get pod name -POD=$(kubectl --kubeconfig=kubeconfig.yml get pods -n slaninakova-ns -l app=protein-search-dashboard -o jsonpath='{.items[0].metadata.name}') +POD=$(kubectl get pods -n fi-lmi-ns -l app=protein-search-dashboard -o jsonpath='{.items[0].metadata.name}') # Copy protein database -kubectl --kubeconfig=kubeconfig.yml cp -n slaninakova-ns ./protein_db/. $POD:/app/data/protein_db/ +kubectl --kubeconfig=kubeconfig.yml cp -n fi-lmi-ns ./protein_db/. $POD:/app/data/protein_db/ # Copy metadata -kubectl --kubeconfig=kubeconfig.yml cp -n slaninakova-ns ./metadata/. $POD:/app/data/metadata/ +kubectl --kubeconfig=kubeconfig.yml cp -n fi-lmi-ns ./metadata/. $POD:/app/data/metadata/ ``` 3. Verify the data: diff --git a/app/config/settings.py b/app/config/settings.py index 8333e43b7f0898c013ccfdee9afdc96c09ef33a3..678d1fe5bcbb5b0956c939d1eb51440c187ee7bf 100644 --- a/app/config/settings.py +++ b/app/config/settings.py @@ -28,5 +28,5 @@ STRUCTURES_DIR = DATA_DIR / "structures" PDB_DATABASE_DIR = DATA_DIR / "protein_db" HIGH_PLDDT_PDB_DATABASE_DIR = DATA_DIR / "plddt_cutoff_70" METADATA_DIR = DATA_DIR / "metadata" -METADATA_2_DIR = DATA_DIR / "metadata_2" +METADATA_2_DIR = DATA_DIR / "metadata_pae_threshold=8_m=30_discarded" DISCARDED_DIR = METADATA_2_DIR / "discarded" \ No newline at end of file diff --git a/app/main.py b/app/main.py index 1a9ee37288c1e7688bb0cbe73384946756471984..0c08a58d5294e8bffb69ca0d3763fd2e374ca90b 100644 --- a/app/main.py +++ b/app/main.py @@ -1,1001 +1,57 @@ -from fastapi import FastAPI, UploadFile, Request -from fastapi.responses import HTMLResponse -import py3Dmol -import requests -import IPython.display as display +from fastapi import FastAPI +import logging import os from pathlib import Path -import pandas as pd -import json -import logging + from app.services.protein_service import ProteinService from app.services.storage_service import StorageService -from app.services.overlap_service import visualize_protein_overlap from app.services.visualization_service import VisualizationService -from app.templates.protein_view import get_protein_page_html from app.utils.logging import logger -import numpy as np -from fastapi import HTTPException -from app.routes import ground_truth # , protein, api # Comment out until created from app.config.settings import * -from typing import Optional -import re -from app.utils.api import get_protein_name -import httpx + +# Import our routers +from app.routes import ground_truth +from app.routes import protein +from app.routes import visualization +from app.routes import api +from app.routes import home app = FastAPI() + +# Initialize services storage_service = StorageService() protein_service = ProteinService() visualization_service = VisualizationService(app) -# Base URL for AlphaFold DB -ALPHAFOLD_BASE_URL = "https://alphafold.ebi.ac.uk" -PROTEIN_ID = "P10911" # Default protein ID for example/testing -ALPHAFOLD_URL = f"{ALPHAFOLD_BASE_URL}/files/AF-{PROTEIN_ID}-F1-model_v4.pdb" -AFDB_ENTRY_URL = f"{ALPHAFOLD_BASE_URL}/entry/{PROTEIN_ID}" - -# Ensure all directories exist -for directory in [CACHE_DIR, STRUCTURES_DIR, PDB_DATABASE_DIR, METADATA_DIR, METADATA_2_DIR, DISCARDED_DIR]: - os.makedirs(directory, exist_ok=True) - +# Provide service instances to route modules +visualization.visualization_service = visualization_service +protein.visualization_service = visualization_service # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) -logger = logging.getLogger(__name__) + +# Ensure all directories exist +for directory in [CACHE_DIR, STRUCTURES_DIR, PDB_DATABASE_DIR, METADATA_DIR, METADATA_2_DIR, DISCARDED_DIR]: + os.makedirs(directory, exist_ok=True) # Ensure storage is set up storage_service.ensure_directories() # Determine the environment ENVIRONMENT = os.getenv("ENVIRONMENT", "local") # Default to 'local' if not set +logger.info(f"Running in {ENVIRONMENT} environment") -# Add the directory path for proteins based on the environment -if ENVIRONMENT == "production": - protein_directory = '/app/data/structures' # Production path - ground_truth_directory = '/app/data/ground_truth' -else: - protein_directory = 'test/query_target_search' # Local path - ground_truth_directory = 'test/ground_truth' -logger.info(f"Protein directory: {protein_directory}") - -# Define color mapping for parts based on protein_service.py colors -PART_COLORS = { - 'A': '#4169E1', # Royal Blue - 'B': '#800080', # Purple - 'C': '#FFA500', # Orange - 'D': '#00CED1', # Dark Turquoise - 'E': '#FF1493', # Deep Pink - 'F': '#DAA520', # Goldenrod - 'G': '#8B4513', # Saddle Brown - 'H': '#4B0082', # Indigo - 'I': '#20B2AA', # Light Sea Blue - 'J': '#FF6347', # Tomato - 'K': '#9370DB', # Medium Purple - 'L': '#B8860B', # Dark Goldenrod - 'M': '#1E90FF', # Dodger Blue - 'N': '#DDA0DD', # Plum - 'O': '#FFD700', # Gold -} - -def colorize_parts(parts_str): - """ - Colorize protein parts based on the predefined color mapping. - - Args: - parts_str: A comma-separated string of parts (e.g., "A, B, C") - - Returns: - HTML-formatted string with colored parts - """ - if not parts_str or not isinstance(parts_str, str): - return parts_str - - # Split the parts string and colorize each part - parts = [p.strip() for p in parts_str.split(',')] - colored_parts = [] - - for part in parts: - if not part: - continue - - # Handle multi-character parts (like "A1", "B-C", etc.) - if '-' in part: - # For ranges like "A-B", color each letter - subparts = part.split('-') - colored_subparts = [] - - for i, subpart in enumerate(subparts): - if not subpart: - colored_subparts.append('') - continue - - if subpart[0] in PART_COLORS: - colored_subparts.append(f'<span style="color:{PART_COLORS[subpart[0]]}; font-weight:bold;">{subpart[0]}</span>{subpart[1:]}') - else: - colored_subparts.append(subpart) - - colored_parts.append('-'.join(colored_subparts)) - else: - # For single parts like "A", "B1", etc. - if part[0] in PART_COLORS: - colored_parts.append(f'<span style="color:{PART_COLORS[part[0]]}; font-weight:bold;">{part[0]}</span>{part[1:]}') - else: - colored_parts.append(part) - - return ', '.join(colored_parts) - -def get_protein_structure(protein_id: str) -> str: - """Get protein structure from local database or download if not found""" - try: - # Try local database first - local_pdb_path = PDB_DATABASE_DIR / f"AF-{protein_id}-F1-model_v4.pdb" - if local_pdb_path.exists(): - logger.info(f"Loading structure from local database: {local_pdb_path}") - data = local_pdb_path.read_text() - if not data.strip(): - raise ValueError("Empty PDB file") - return data - else: - logger.info(f"Structure not found in local database at {local_pdb_path}") - - # Fallback to AlphaFold download - logger.info(f"Downloading structure from AlphaFold: {protein_id}") - structure_url = f"https://alphafold.ebi.ac.uk/files/AF-{protein_id}-F1-model_v4.pdb" - response = requests.get(structure_url) - response.raise_for_status() - data = response.text - if not data.strip(): - raise ValueError("Empty response from AlphaFold") - - # Cache the downloaded data - local_pdb_path.write_text(data) - logger.info(f"Cached structure data to {local_pdb_path}") - return data - except Exception as e: - logger.error(f"Error getting protein structure for {protein_id}: {str(e)}") - raise - -def get_protein_metadata(protein_id: str) -> dict: - """Get protein metadata from local JSON file""" - json_path = METADATA_DIR / f"AF-{protein_id}-F1-model_v4.json" - if not json_path.exists(): - raise FileNotFoundError(f"No metadata found for protein {protein_id}, looked in {json_path}") - - with open(json_path) as f: - return json.load(f) - -def list_proteins(directory): - """List all protein files in the specified directory.""" - try: - return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] - except Exception as e: - print(f"Error reading directory {directory}: {e}") - return [] - -@app.get("/") -async def root(): - # Create a py3Dmol viewer with optimized settings - view = py3Dmol.view(width=800, height=600) - view.setViewStyle({'style':'outline'}) # Optimizes initial rendering - - # Get structure from cache or download - structure_id = f"AF-{PROTEIN_ID}-F1-model_v4.pdb" - cache_path = CACHE_DIR / structure_id - - if not cache_path.exists(): - pdb_data = requests.get(ALPHAFOLD_URL).text - cache_path.write_text(pdb_data) - else: - pdb_data = cache_path.read_text() - - # Set up the viewer with optimized settings - view.addModel(pdb_data, "pdb") - view.setStyle({'model': -1}, { - 'cartoon': { - 'color': 'spectrum', - 'thickness': 0.2, # Reduced thickness for better performance - 'detail': 'low' # Lower detail for better performance - } - }) - view.zoomTo() - - # Add the directory path for proteins - proteins = list_proteins(ground_truth_directory) # Get the list of proteins - category = "full_pdbs" - ground_truth_files = list_proteins(f'{ground_truth_directory}/{category}') - high_plddt_files = list_proteins(f'{ground_truth_directory}/high_plddt') - metaregion_files = list_proteins(f'{ground_truth_directory}/metaregions') - - # Create table data - ground_truth_table = [] - for i, file in enumerate(ground_truth_files): - if file.startswith("ground_truth_AF-") and file.endswith(".tsv"): - protein_id = file.split("-")[1] - - # Get PDB file size - pdb_path = PDB_DATABASE_DIR / f"AF-{protein_id}-F1-model_v4.pdb" - try: - pdb_size = os.path.getsize(pdb_path) - pdb_size_mb = pdb_size / (1024 * 1024) # Store as float - except (FileNotFoundError, OSError): - pdb_size_mb = 0 # Use 0 for sorting purposes - - # Get number of regions from metadata - metadata_path = METADATA_DIR / f"AF-{protein_id}-F1-model_v4.json" - try: - with open(metadata_path) as f: - metadata = json.load(f) - n_regions = len(metadata) # Count the number of entries in the metadata - except (FileNotFoundError, json.JSONDecodeError, KeyError) as e: - logger.error(f"Error reading metadata for {protein_id}: {e}") - n_regions = "N/A" - - ground_truth_table.append({ - "#": i+1, - "protein_id": protein_id, - "chain_gt": f'<a href="/ground_truth/{protein_id}">View</a>', - "pdb_filesize": pdb_size_mb, # Store as float - "n_regions": n_regions - }) - - # Create the table HTML - table_html = """ - <table id="ground-truth-table" class="table table-striped"> - <thead> - <tr> - <th>#</th> - <th>Protein ID</th> - <th>Protein name</th> - <th>PDB File Size</th> - <th>Number of Regions</th> - </tr> - </thead> - <tbody> - """ - - for row in ground_truth_table: - protein_name = get_protein_name(row['protein_id']) - # Format the file size for display - file_size_display = f"{row['pdb_filesize']:.2f} MB" if row['pdb_filesize'] > 0 else "N/A" - - table_html += f""" - <tr> - <td>{row['#']}</td> - <td><a href="/protein/{row['protein_id']}">{row['protein_id']}</a></td> - <td>{protein_name}</td> - - <td data-order="{row['pdb_filesize']}">{file_size_display}</td> - <td>{row['n_regions']}</td> - </tr> - """ - - table_html += """ - </tbody> - </table> - """ - - return HTMLResponse(f""" - <html> - <head> - <title>Protein Search Demo</title> - <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> - <link href="https://cdn.datatables.net/v/bs5/dt-1.13.4/datatables.min.css" rel="stylesheet"> - <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> - <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script> - <script src="https://cdn.datatables.net/v/bs5/dt-1.13.4/datatables.min.js"></script> - </head> - <body> - <div class="container mt-5"> - <h1>AlphaFind 2.0 Search Demo</h1> - <p>The main goal of the AlphaFind 2.0 version is to work with the pLDDT and PAE metrics through identifying stable regions in the protein. </p> - - - <h2>Available proteins</h2> - <div class="table-responsive"> - {table_html} - </div> - </div> - - <script> - $(document).ready(function() {{ - $('#ground-truth-table').DataTable({{ - pageLength: 25, - order: [[0, 'asc']], - lengthMenu: [[10, 25, 50, 100, -1], [10, 25, 50, 100, "All"]], - columnDefs: [ - {{ - targets: [0, 1, 2, 3], // Only these columns are not sortable - orderable: true - }} - ] - }}); - }}); - </script> - </body> - </html> - """) - -@app.get("/visualize") -async def visualize(): - try: - # Create a cache path for this structure - structure_id = f"AF-{PROTEIN_ID}-F1-model_v4.pdb" - cache_path = CACHE_DIR / structure_id - - # Check if structure is already cached - if not cache_path.exists(): - # Fetch PDB data from AlphaFold - response = requests.get(ALPHAFOLD_URL) - response.raise_for_status() - pdb_data = response.text - - # Basic validation - if not pdb_data.strip().startswith('ATOM') and not pdb_data.strip().startswith('HETATM'): - raise ValueError("Invalid PDB data received") - - # Save to cache - cache_path.write_text(pdb_data) - else: - pdb_data = cache_path.read_text() - if not pdb_data.strip(): - raise ValueError("Empty PDB data in cache") - - # Return with correct content type - return HTMLResponse( - content=pdb_data, - headers={"Content-Type": "chemical/x-pdb"} - ) - - except Exception as e: - return HTMLResponse( - content=f"Error loading structure: {str(e)}", - status_code=500 - ) - -@app.get("/widget") -async def widget(): - view = py3Dmol.view(width=800, height=600) - view.setViewStyle({'style':'outline'}) - - structure_id = f"AF-{PROTEIN_ID}-F1-model_v4.pdb" - cache_path = CACHE_DIR / structure_id - - if not cache_path.exists(): - pdb_data = requests.get(ALPHAFOLD_URL).text - cache_path.write_text(pdb_data) - else: - pdb_data = cache_path.read_text() - - view.addModel(pdb_data, "pdb") - view.setStyle({'model': -1}, { - 'cartoon': { - 'color': 'spectrum', - 'thickness': 0.2, - 'detail': 'low' - } - }) - view.zoomTo() - - return HTMLResponse(view._make_html()) - -@app.get("/health") -async def health_check(): - return {"status": "healthy"} - -@app.get("/test") -async def test(): - return {"message": "Server is running"} - -@app.get("/storage-info") -async def storage_info(): - cache_files = list(CACHE_DIR.glob("*")) - structure_files = list(STRUCTURES_DIR.glob("*")) - pdb_files = list(PDB_DATABASE_DIR.glob("*.pdb")) - metadata_files = list(METADATA_DIR.glob("*.json")) - - return { - "cache_count": len(cache_files), - "cache_files": [f.name for f in cache_files], - "structures_count": len(structure_files), - "structures_files": [f.name for f in structure_files], - "protein_db_count": len(pdb_files), - "metadata_count": len(metadata_files), - "total_size_mb": sum( - f.stat().st_size for f in - (cache_files + structure_files + pdb_files + metadata_files) - ) / (1024 * 1024) - } - -@app.get("/protein/{protein_id}") -async def protein_view(protein_id: str): - try: - logger.info(f"Loading protein structure for {protein_id}") - pdb_data = protein_service.get_structure(protein_id) - - # Create viewers with different types - viewer_html = [ - protein_service.create_viewer(pdb_data), # Whole chain search - protein_service.create_viewer(pdb_data, "disordered", protein_id), # Disordered cut out search - protein_service.create_viewer(pdb_data, "regions", protein_id) # Stable regions search - ] - - # Get results if available - tables_html = [] - has_results = False - comparative_html = "" - - # Define result files for each search type - result_files = [ - f"full_pdbs/AF-{protein_id}-F1-model_v4.json", # Whole chain search - f"high_plddt/AF-{protein_id}-F1-model_v4.json", # Disordered regions - f"metaregions/AF-{protein_id}-F1-model_v4.json" # Stable regions - ] - - all_results = [] # Store all results for each type - - # Load results from each file - for result_file in result_files: - try: - # Look in both production and test directories - file_paths = [ - Path(protein_directory) / result_file, # Production path - Path("test/query_target_search") / result_file # Test path - ] - - result_path = next( - (path for path in file_paths if path.exists()), - None - ) - - if result_path: - with open(result_path) as f: - results_data = json.load(f) - - # Extract results based on file structure - if isinstance(results_data, list) and len(results_data) > 0: - results = results_data[0]['results'] - else: - results = results_data['results'] - - all_results.append(results) - else: - all_results.append([]) # Empty results if file not found - - except Exception as e: - logger.error(f"Error loading results from {result_file}: {str(e)}") - all_results.append([]) # Empty results on error - - # Create tables if we have any results - if any(all_results): - has_results = True - tables_html = [] - - # First pass: Create DataFrames and compute indices - results_dfs = [] - indices_df = pd.DataFrame(columns=["target_id", "results1", "results2", "results3"]) - - for i, results in enumerate(all_results): - if results: - # Create DataFrame - results_df = pd.DataFrame(results) - - # Replace NaN with negative infinity in query_tmscore for sorting - results_df['query_tmscore'] = results_df['query_tmscore'].fillna(float('-inf')) - - # Sort by query_tmscore descending and drop duplicates keeping first occurrence - results_df = (results_df.sort_values('query_tmscore', ascending=False) - .drop_duplicates(subset=['target_id'], keep='first') - .reset_index(drop=True)) - - # Format results after sorting - results_df['similarity_score'] = results_df['similarity_score'].round(4) - results_df['query_tmscore'] = results_df['query_tmscore'].round(4) - results_df['target_tmscore'] = results_df['target_tmscore'].round(4) - - # rename target_id values to have just the protein id - results_df['target_id'] = results_df['target_id'].str.split('-').str[1] - - # Store the DataFrame - results_dfs.append(results_df) - - # Process target_id and store index - if i == 0: - # For first table, create initial DataFrame with indices - indices_df = pd.DataFrame({ - "target_id": results_df['target_id'], - "results1": results_df.index - }) - else: - # For other tables, create temporary mapping - temp_indices = pd.DataFrame({ - "target_id": results_df['target_id'], - f"results{i+1}": results_df.index - }) - # Merge with existing indices - indices_df = indices_df.merge( - temp_indices, - on="target_id", - how="outer" - ).drop_duplicates(subset=['target_id'], keep='first') - else: - results_dfs.append(pd.DataFrame()) - - # Fill NaN values with '--' - indices_df = indices_df.fillna('--') - - # Log the indices DataFrame - logger.info(f"Indices DataFrame:\n{indices_df.head(3)}") - - # Second pass: Create display tables with indices - for i, results_df in enumerate(results_dfs): - if not results_df.empty: - # Define columns based on table index - if i == 2: # For the third table (multimer results) - base_columns = ['match', 'TM-Score', 'overlap', 'qparts', 'mparts'] - else: - base_columns = ['match', 'TM-Score', 'overlap'] - - # Create a copy of the DataFrame for display - display_df = results_df.copy() - - # Rename qchains and tchains columns to qparts and mparts - if 'qchains' in display_df.columns: - display_df = display_df.rename(columns={'qchains': 'qparts'}) - if 'tchains' in display_df.columns: - display_df = display_df.rename(columns={'tchains': 'mparts'}) - - # Parse rotation and translation strings to numpy arrays if they exist - if 'rotation' in display_df.columns: - display_df['rotation'] = display_df['rotation'].apply( - lambda x: np.array(eval(x)).reshape(3, 3) if isinstance(x, str) else x - ) - if 'translation' in display_df.columns: - display_df['translation'] = display_df['translation'].apply( - lambda x: np.array(eval(x)).reshape(3,) if isinstance(x, str) else x - ) - - # Add detailed logging - if not display_df.empty: - sample_row = display_df.iloc[0] - logger.debug("Transformation data for first row:") - if 'rotation' in display_df.columns: - logger.debug(f"Original rotation data: {sample_row['rotation']}") - try: - rot_array = sample_row['rotation'] - logger.debug(f"Rotation type: {type(rot_array)}") - logger.debug(f"Rotation shape: {rot_array.shape}") - logger.debug(f"Rotation matrix:\n{rot_array}") - except Exception as e: - logger.error(f"Error processing rotation: {e}") - - if 'translation' in display_df.columns: - logger.debug(f"Original translation data: {sample_row['translation']}") - try: - trans_array = sample_row['translation'] - logger.debug(f"Translation type: {type(trans_array)}") - logger.debug(f"Translation shape: {trans_array.shape}") - logger.debug(f"Translation vector: {trans_array}") - except Exception as e: - logger.error(f"Error processing translation: {e}") - - # Format the chain information before creating the table - if 'qparts' in display_df.columns: - # Add debug logging - logger.debug("Sample qparts before processing:") - if not display_df.empty: - logger.debug(f"First row qparts: {display_df.iloc[0]['qparts']}") - logger.debug(f"Type: {type(display_df.iloc[0]['qparts'])}") - - # Handle chains that are already comma-separated strings - display_df['qparts'] = display_df['qparts'].apply( - lambda x: ( - logger.debug(f"Processing qparts: {x} of type {type(x)}") or - (x if isinstance(x, str) else - ', '.join(x) if isinstance(x, list) else '') - ) - ) - - # Apply colorization to qparts - display_df['qparts'] = display_df['qparts'].apply(colorize_parts) - - if 'mparts' in display_df.columns: - logger.debug("Sample mparts before processing:") - if not display_df.empty: - logger.debug(f"First row mparts: {display_df.iloc[0]['mparts']}") - logger.debug(f"Type: {type(display_df.iloc[0]['mparts'])}") - - # Handle chains that are already comma-separated strings - display_df['mparts'] = display_df['mparts'].apply( - lambda x: ( - logger.debug(f"Processing mparts: {x} of type {type(x)}") or - (x if isinstance(x, str) else - ', '.join(x) if isinstance(x, list) else '') - ) - ) - - # Do not apply colorization to mparts - - # Add overlap button with view type based on result set - view_types = ["basic", "disordered", "regions"] - current_view_type = view_types[i] if i < len(view_types) else "basic" - - # Add overlap button column - logger.debug("Creating overlap buttons with chain information") - display_df[' '] = display_df.apply( - lambda row: ( - logger.debug(f"Creating button for row with qparts={row.get('qparts')}, mparts={row.get('mparts')}") or - '<button onclick=\'showOverlap(' + - f'"{protein_id}", ' + - f'"{row["target_id"]}", ' + - f'{json.dumps(row["rotation"].tolist()) if isinstance(row.get("rotation"), np.ndarray) else "null"}, ' + - f'{json.dumps(row["translation"].tolist()) if isinstance(row.get("translation"), np.ndarray) else "null"}, ' + - f'"{current_view_type}", ' + - f'{json.dumps([x.strip() for x in row.get("qparts", "").split(",") if x.strip()])}, ' + - f'{json.dumps([x.strip() for x in row.get("mparts", "").split(",") if x.strip()])}' + - ')\' class="btn btn-primary btn-sm">Overlap</button>' - ), - axis=1 - ) - - # Add position columns from other tables based on the current table - if i == 0: # First table shows positions from tables 2 and 3 - if 'results2' in indices_df.columns and 'results3' in indices_df.columns: - display_df = display_df.merge( - indices_df[['target_id', 'results2', 'results3']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - # Handle fillna with type safety - display_df['results2'] = display_df['results2'].astype(str).replace('nan', '--') - display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') - position_columns = ['results2', 'results3'] - elif 'results2' in indices_df.columns: - display_df = display_df.merge( - indices_df[['target_id', 'results2']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - position_columns = ['results2'] - display_df['results2'] = display_df['results2'].astype(str).replace('nan', '--') - elif 'results3' in indices_df.columns: - display_df = display_df.merge( - indices_df[['target_id', 'results3']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - position_columns = ['results3'] - display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') - else: - position_columns = [] - elif i == 1: # Second table shows positions from tables 1 and 3 - if 'results1' in indices_df.columns and 'results3' in indices_df.columns: - display_df = display_df.merge( - indices_df[['target_id', 'results1', 'results3']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - position_columns = ['results1', 'results3'] - display_df['results1'] = display_df['results1'].astype(str).replace('nan', '--') - display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') - elif 'results1' in indices_df.columns: - display_df = display_df.merge( - indices_df[['target_id', 'results1']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - position_columns = ['results1'] - display_df['results1'] = display_df['results1'].astype(str).replace('nan', '--') - elif 'results3' in indices_df.columns: - display_df = display_df.merge( - indices_df[['target_id', 'results3']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - position_columns = ['results3'] - display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') - else: - position_columns = [] - - else: # Third table shows positions from tables 1 and 2 - display_df = display_df.merge( - indices_df[['target_id', 'results1', 'results2']], - left_on='target_id', - right_on='target_id', - how='left' - ).drop_duplicates(subset=['target_id'], keep='first') - display_df['results1'] = display_df['results1'].astype(str).replace('nan', '--') - display_df['results2'] = display_df['results2'].astype(str).replace('nan', '--') - position_columns = ['results1', 'results2'] - - # Create target link column - display_df['match'] = display_df.apply( - lambda row: f'<a href="https://alphafold.ebi.ac.uk/entry/{row["target_id"]}" target="_blank" class="protein-id" data-protein-id="{row["target_id"]}">{row["target_id"]}</a>', - axis=1 - ) - - # Add magnifying glass icon column - display_df['đ'] = display_df.apply( - lambda row: f'<a href="/protein/{row["target_id"]}" class="text-decoration-none">đ</a>', - axis=1 - ) - - # Renaming to save space - display_df = display_df.rename(columns={ - 'query_tmscore': 'TM-Score', - 'target_tmscore': 't_tm', - 'similarity_score': 'score', - }, errors='ignore') - - # Define tooltip texts based on table index - if i == 0: - tm_score_tooltip = "TM-Score computed on the whole chain" - elif i == 1: - tm_score_tooltip = "TM-Score computed on the kept residues" - else: # i == 2, multimer results - tm_score_tooltip = "TM-Score computed on the kept residues after alignment on matched parts" - - # Define tooltips for result columns - results1_tooltip = "Position of the match in Table 1 (Current AlphaFind)" - results2_tooltip = "Position of the match in Table 2 (Disordered parts cut out)" - results3_tooltip = "Position of the match in Table 3 (Disordered parts cut out, stable parts joined)" - - # Define tooltips for parts columns - qparts_tooltip = "Query parts considered" - mparts_tooltip = "Parts from the matches proteins considered" - - if 'results2' in display_df.columns: - display_df['results2'] = display_df['results2'].str.split('.').str[0] - if 'results3' in display_df.columns: - display_df['results3'] = display_df['results3'].str.split('.').str[0] - if 'results1' in display_df.columns: - display_df['results1'] = display_df['results1'].str.split('.').str[0] - - # Ensure all required columns exist before creating table - for col in base_columns: - if col not in display_df.columns: - display_df[col] = '' - - # Create final column list ensuring all columns exist - columns = [col for col in base_columns + position_columns if col in display_df.columns] - if 'qparts' in columns: - # reorder columns - columns = ['match', 'TM-Score', ' ', 'qparts', 'mparts', 'results1', 'results2', 'đ'] - elif 'results2' in columns and 'results3' in columns: - columns = ['match', 'TM-Score', ' ', 'results2', 'results3', 'đ'] - elif 'results1' in columns and 'results3' in columns: - columns = ['match', 'TM-Score', ' ', 'results1', 'results3', 'đ'] - else: - # Default case for the first table - columns = ['match', 'TM-Score', ' ', 'results1', 'đ'] - - table_html = display_df.to_html( - classes=f'table table-striped table-hover table-{i}', - escape=False, - index=True, - columns=columns, - float_format=lambda x: '{:.3f}'.format(x) if isinstance(x, float) and x != float('-inf') else 'N/A' - ) - - # Add tooltip to TM-Score column header - table_html = table_html.replace('<th>TM-Score</th>', f'<th><span title="{tm_score_tooltip}">TM-Score âšī¸</span></th>') - - # Add tooltips to results column headers - if 'results1' in columns: - table_html = table_html.replace('<th>results1</th>', f'<th><span title="{results1_tooltip}">results1 âšī¸</span></th>') - if 'results2' in columns: - table_html = table_html.replace('<th>results2</th>', f'<th><span title="{results2_tooltip}">results2 âšī¸</span></th>') - if 'results3' in columns: - table_html = table_html.replace('<th>results3</th>', f'<th><span title="{results3_tooltip}">results3 âšī¸</span></th>') - - # Add tooltips to parts column headers - if 'qparts' in columns: - table_html = table_html.replace('<th>qparts</th>', f'<th><span title="{qparts_tooltip}">qparts âšī¸</span></th>') - if 'mparts' in columns: - table_html = table_html.replace('<th>mparts</th>', f'<th><span title="{mparts_tooltip}">mparts âšī¸</span></th>') - - # Remove the magnifying glass icon from the header - table_html = table_html.replace('<th>đ</th>', '<th></th>') - - tables_html.append(table_html) - else: - tables_html.append("<div class='alert alert-info'>No results available</div>") - - # Add comparative visualization - if has_results: - # Create list of DataFrames for visualization - viz_dataframes = [] - for results in all_results: - if results: - df = pd.DataFrame(results) - viz_dataframes.append(df) - else: - viz_dataframes.append(pd.DataFrame()) # Empty DataFrame for missing results - - # Remove comparative visualization HTML generation - comparative_html = "" - - start_html = f""" - <!-- Overlap Modal --> - <div class="modal" id="overlapModal" tabindex="-1" role="dialog" data-bs-backdrop="static"> - <div class="modal-dialog modal-xl"> - <div class="modal-content"> - <div class="modal-header"> - <h5 class="modal-title" id="overlapModalLabel"> - Protein Overlap: <span style="color:#32CD32">{protein_id}</span> - </h5> - <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button> - </div> - <div class="modal-body" id="overlapContent"> - Loading... - </div> - </div> - </div> - </div> - """ - # Modify the HTMLResponse to include modal and JavaScript - - return HTMLResponse( - get_protein_page_html( - protein_id=protein_id, - viewers=viewer_html, - tables_html=tables_html if tables_html else None, - has_results=has_results, - comparative_html="", # Set to empty string - additional_html=start_html+""" - <script> - // Initialize the modal - const overlapModal = new bootstrap.Modal(document.getElementById('overlapModal'), { - backdrop: 'static', - keyboard: false - }); - - async function showOverlap(protein1, protein2, rotation = null, translation = null, - viewType = 'basic', regions_to_display1 = [], regions_to_display2 = []) { - const modalEl = document.getElementById('overlapModal'); - const contentDiv = document.getElementById('overlapContent'); - const titleEl = modalEl.querySelector('.modal-title'); - - // Show the modal first - overlapModal.show(); - - // Show loading state with basic title - titleEl.innerHTML = `Protein Overlap: <span style="color:#32CD32">${protein1}</span> & <span style="color:#FFB3B3">${protein2}</span>`; - contentDiv.innerHTML = '<div class="text-center"><div class="spinner-border" role="status"></div><p>Loading...</p></div>'; - - try { - // Build URL with optional parameters - let url = `/overlap/${protein1}/${protein2}`; - const params = new URLSearchParams(); - if (rotation) params.append('rotation', JSON.stringify(rotation)); - if (translation) params.append('translation', JSON.stringify(translation)); - if (viewType !== 'basic') { - params.append('view_type', viewType); - if (regions_to_display1.length > 0) { - params.append('regions_to_display1', JSON.stringify(regions_to_display1)); - console.log('Adding regions to display 1:', regions_to_display1); - } - if (regions_to_display2.length > 0) { - params.append('regions_to_display2', JSON.stringify(regions_to_display2)); - console.log('Adding regions to display 2:', regions_to_display2); - } - } - if (params.toString()) url += '?' + params.toString(); - - console.log('Overlap URL:', url); - - const response = await fetch(url); - const contentType = response.headers.get('content-type'); - - if (!response.ok) { - const text = await response.text(); - console.error('Error response:', text); - throw new Error(text || 'Unknown error occurred'); - } - - const html = await response.text(); - console.log('Response content type:', contentType); - console.log('Response length:', html.length); - - if (!html.trim()) { - throw new Error('Empty response received'); - } - - // Create a temporary container to parse the HTML - const tempDiv = document.createElement('div'); - tempDiv.innerHTML = html; - - // Find and extract the viewer content - const viewerContent = tempDiv.querySelector('#viewer') || tempDiv.querySelector('.viewer-container'); - if (viewerContent) { - contentDiv.innerHTML = viewerContent.outerHTML; - - // Execute any scripts in the content - const scripts = tempDiv.getElementsByTagName('script'); - Array.from(scripts).forEach(script => { - const newScript = document.createElement('script'); - // Wrap script content in an IIFE to avoid variable collision - newScript.textContent = `(function() { ${script.textContent} })();`; - Array.from(script.attributes).forEach(attr => { - newScript.setAttribute(attr.name, attr.value); - }); - contentDiv.appendChild(newScript); - }); - } else { - console.error('Response HTML:', html); - throw new Error('Could not find viewer content in response'); - } - } catch (error) { - console.error('Error:', error); - contentDiv.innerHTML = ` - <div class="alert alert-danger"> - <h5>Error</h5> - <p>${error.message}</p> - </div>`; - } - } - - // Add protein name tooltip functionality - document.addEventListener('DOMContentLoaded', function() { - const proteinLinks = document.querySelectorAll('.protein-id'); - const tooltipCache = new Map(); - - proteinLinks.forEach(link => { - link.addEventListener('mouseenter', async function() { - const proteinId = this.dataset.proteinId; - - if (tooltipCache.has(proteinId)) { - this.title = tooltipCache.get(proteinId); - return; - } - - try { - const response = await fetch(`/api/protein-name/${proteinId}`); - if (!response.ok) throw new Error('Failed to fetch protein name'); - const data = await response.json(); - tooltipCache.set(proteinId, data.name); - this.title = data.name; - } catch (error) { - console.error('Error fetching protein name:', error); - } - }); - }); - }); - </script> - """ - ) - ) - except Exception as e: - error_msg = f"Error: {str(e)}" - logger.error(error_msg, exc_info=True) - return HTMLResponse( - content=f""" - <html> - <head> - <title>Error</title> - <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> - </head> - <body> - <div class="container mt-5"> - <div class="alert alert-danger"> - <h4 class="alert-heading">Error</h4> - <p>{error_msg}</p> - </div> - </div> - </body> - </html> - """, - status_code=500 - ) +# Include routers +app.include_router(home.router) +app.include_router(ground_truth.router) +app.include_router(protein.router) +app.include_router(visualization.router) +app.include_router(api.router) +# Add overlap endpoint directly (this will be moved to visualization router in the future) @app.get("/overlap/{protein_id1}/{protein_id2}") async def get_overlap_visualization( protein_id1: str, @@ -1006,1155 +62,28 @@ async def get_overlap_visualization( regions_to_display1: str = None, regions_to_display2: str = None ): - """Get overlap visualization for two proteins.""" - try: - logger.info(f"Creating overlap visualization for {protein_id1} and {protein_id2}") - logger.info(f"View type: {view_type}") - logger.debug(f"Regions to display 1: {regions_to_display1}") - logger.debug(f"Regions to display 2: {regions_to_display2}") - - # Parse regions if provided - regions_filter1 = json.loads(regions_to_display1) if regions_to_display1 else None - regions_filter2 = json.loads(regions_to_display2) if regions_to_display2 else None - - response = visualization_service.create_overlap_visualization( - protein_id1=protein_id1, - protein_id2=protein_id2, - pdb_dir=PDB_DATABASE_DIR, - rotation=rotation, - translation=translation, - view_type=view_type, - regions_filter1=regions_filter1, - regions_filter2=regions_filter2 - ) - - # Ensure we're returning HTMLResponse - if not isinstance(response, HTMLResponse): - logger.error(f"Invalid response type: {type(response)}") - return HTMLResponse( - content=f""" - <div class="alert alert-danger"> - Error: Invalid visualization response - </div> - """, - status_code=500 - ) - - return response - - except Exception as e: - logger.error(f"Error in overlap visualization: {str(e)}", exc_info=True) - return HTMLResponse( - content=f""" - <div class="alert alert-danger"> - Error creating visualization: {str(e)} - </div> - """, - status_code=500 - ) - -@app.get("/list-pdb-files") -async def list_pdb_files(): - """List all PDB files in the protein database directory""" - try: - files = list(PDB_DATABASE_DIR.glob("*.pdb")) - return { - "directory": str(PDB_DATABASE_DIR), - "file_count": len(files), - "files": [f.name for f in files], - "exists": PDB_DATABASE_DIR.exists(), - "is_dir": PDB_DATABASE_DIR.is_dir() if PDB_DATABASE_DIR.exists() else False - } - except Exception as e: - logger.error(f"Error listing PDB files: {str(e)}") - return {"error": str(e)} - -@app.get("/api/more-results/{protein_id}/{start}/{length}") -async def get_more_results(protein_id: str, start: int, length: int): - try: - if hasattr(app.state, 'results_data'): - # Get the slice of results - results = app.state.results_data[start:start+length] - # Add index to each result - for i, result in enumerate(results, start=start+1): - result['#'] = i # Add row number - return {"data": results} - return {"error": "No results data found"} - except Exception as e: - logger.error(f"Error fetching more results: {str(e)}") - return {"error": str(e)} - -@app.get("/results_visualization/{protein_id}") -async def results_visualization(protein_id: str): - """Handle the results visualization part of the protein view.""" - try: - # Load results from each search type - result_files = [ - f"full_pdbs/AF-{protein_id}-F1-model_v4.json", # Whole chain search - f"high_plddt/AF-{protein_id}-F1-model_v4.json", # Disordered regions - f"metaregions/AF-{protein_id}-F1-model_v4.json" # Stable regions - ] - - all_results = [] # Store all results for each type - - # Load results from each file - for result_file in result_files: - try: - # Look in both production and test directories - file_paths = [ - Path(protein_directory) / result_file, # Production path - Path("test/query_target_search") / result_file # Test path - ] - - result_path = next( - (path for path in file_paths if path.exists()), - None - ) - - if result_path: - with open(result_path) as f: - results_data = json.load(f) - - # Extract results based on file structure - if isinstance(results_data, list) and len(results_data) > 0: - results = results_data[0]['results'] - else: - results = results_data['results'] - - # Convert to DataFrame and process - results_df = pd.DataFrame(results) - results_df['target_id'] = results_df['target_id'].str.split('-').str[1] - - # Sort by query_tmscore and take top 10 - results_df = results_df.sort_values('query_tmscore', ascending=False).head(10) - - all_results.append(results_df) - else: - all_results.append(pd.DataFrame()) - - except Exception as e: - logger.error(f"Error loading results from {result_file}: {str(e)}") - all_results.append(pd.DataFrame()) - - # Create comparative visualization - visualization_html = visualization_service.create_comparative_visualization( - all_results_dfs=all_results, - overlap_service=protein_service, - pdb_dir=PDB_DATABASE_DIR, - query_id=protein_id - ) - - # Wrap the visualization in a proper HTML structure - html_content = f""" - <!DOCTYPE html> - <html> - <head> - <title>Results Visualization - {protein_id}</title> - <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> - <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> - <script src="https://3dmol.org/build/3Dmol-min.js"></script> - <style> - .results-container {{ - padding: 20px; - }} - .visualization-section {{ - margin-bottom: 20px; - }} - .viewer-container {{ - margin: 1rem auto; - max-width: 78%; - padding: 0 0.5rem; - }} - .container-fluid {{ - max-width: 1400px; - margin: 0 auto; - padding: 0 4rem; - }} - .viewer-body {{ - padding: 0.5rem; - display: flex; - justify-content: center; - max-height: 470px; - min-height: 470px; - overflow: hidden; - }} - /* Force both viewers to have identical dimensions */ - .viewer-body iframe {{ - width: 100% !important; - height: 450px !important; - min-width: 450px !important; - border: none !important; - overflow: hidden !important; - max-width: 100% !important; - }} - /* Ensure both molecule panels are equal size */ - .col-md-6 {{ - display: flex; - flex-direction: column; - }} - .molecules-row {{ - display: flex; - flex-wrap: wrap; - margin-top: 0.25rem; - margin-bottom: 0.25rem; - }} - .info-panel {{ - background-color: #f8f9fa; - border: 1px solid #e9ecef; - border-radius: 8px; - padding: 0.5rem; - margin-bottom: 0.5rem; - font-size: 0.8rem; - box-shadow: 0 2px 4px rgba(0,0,0,0.05); - }} - .info-panel-title {{ - font-weight: bold; - margin-bottom: 0.2rem; - font-size: 0.9rem; - color: #495057; - }} - </style> - </head> - <body> - <div class="results-container"> - <h2>Results Visualization for {protein_id}</h2> - <div class="visualization-section"> - {visualization_html.body.decode()} - </div> - </div> - </body> - </html> - """ - - return HTMLResponse(content=html_content) - - except Exception as e: - logger.error(f"Error in results visualization: {str(e)}", exc_info=True) - return HTMLResponse( - content=f""" - <div class="alert alert-danger"> - Error loading results: {str(e)} - </div> - """, - status_code=500 - ) + """Redirects to the overlap visualization endpoint in the visualization router.""" + # This is needed to maintain backwards compatibility during refactoring + return await visualization.get_overlap_visualization( + protein_id1, + protein_id2, + rotation, + translation, + view_type, + regions_to_display1, + regions_to_display2 + ) + +# Add protein name API endpoint directly (this will be moved to api router in the future) +@app.get("/api/protein-name/{protein_id}") +async def get_protein_name_api(protein_id: str): + """Redirects to the protein name API endpoint in the api router.""" + # This is needed to maintain backwards compatibility during refactoring + return await api.get_protein_name_api(protein_id) @app.get("/regions/{protein_id}") async def regions_view(protein_id: str): - """ - Displays two side-by-side protein structure visualizations with regions highlighted, - without the results table, each using different metadata sources. - """ - try: - logger.info(f"Loading protein structure for regions view: {protein_id}") - pdb_data = protein_service.get_structure(protein_id) - - # Create the regions viewer for the first panel (standard metadata) - viewer_html_1, description_1 = protein_service.create_viewer(pdb_data, "regions", protein_id) - - # Log the description to verify it's being received correctly - logger.info(f"Description from protein service (standard): {description_1}") - - # Process metadata for first panel (standard metadata from METADATA_DIR) - regions_1 = [] - regions_1_detail = [] - - try: - regions_file_1 = Path(f'{METADATA_DIR}/AF-{protein_id}-F1-model_v4.json') - - if regions_file_1.exists(): - with open(regions_file_1) as f: - json_data_1 = json.load(f) - - # Handle different JSON structures - if isinstance(json_data_1, dict): - regions_1 = json_data_1.get('regions', []) - elif isinstance(json_data_1, list): - regions_1 = json_data_1 - else: - regions_1 = [] - - # Store regions info for comparison - regions_1_detail = regions_1.copy() - - # Define colors for regions - colors = [ - '#4169E1', # Royal Blue - '#800080', # Purple - '#FFA500', # Orange - '#00CED1', # Dark Turquoise - '#FF1493', # Deep Pink - '#DAA520', # Goldenrod - '#8B4513', # Saddle Brown - '#4B0082', # Indigo - ] - - # Generate colored names HTML for first panel - colored_names_1 = [] - fs_mm_region_name = 'A' - - for i, region in enumerate(regions_1): - if i >= len(colors): - break - - color = colors[i] - name = region.get('region_name', f'Region {i + 1}') - colored_names_1.append(f'{name} -> <span style="color: {color};"><strong>{fs_mm_region_name}</strong></span>') - fs_mm_region_name = chr(ord(fs_mm_region_name) + 1) - - # Create a custom description with colored parts for first panel - if colored_names_1: - description_1 = f"Identified parts {', '.join(colored_names_1)}." - - except Exception as e: - logger.warning(f"Could not generate custom description for panel 1: {str(e)}") - - # Process metadata for second panel (metadata_2 directory) - # Create a copy of the viewer for the second panel - viewer_html_2 = viewer_html_1 # Start with same view, will be changed if alt metadata exists - description_2 = "No alternative metadata available" - regions_2 = [] - regions_2_detail = [] - - try: - # Define the metadata_2 directory path similar to METADATA_DIR - try: - metadata_2_dir = Path(METADATA_2_DIR) # Try to use the defined constant - except NameError: - # Fallback to a default path if METADATA_2_DIR is not defined - metadata_2_dir = Path('metadata_2') - - if not metadata_2_dir.exists(): - os.makedirs(metadata_2_dir, exist_ok=True) - - regions_file_2 = metadata_2_dir / f'AF-{protein_id}-F1-model_v4.json' - - if regions_file_2.exists(): - with open(regions_file_2) as f: - json_data_2 = json.load(f) - - # Handle different JSON structures - if isinstance(json_data_2, dict): - regions_2 = json_data_2.get('regions', []) - elif isinstance(json_data_2, list): - regions_2 = json_data_2 - else: - regions_2 = [] - - # Store regions info for comparison - regions_2_detail = regions_2.copy() - - # If we have alternative metadata, create a new viewer - if regions_2: - # Create a new viewer for the second panel using the regions from metadata_2 - # Create a temporary visualization service for this - view = py3Dmol.view(width=450, height=450, js='https://3dmol.org/build/3Dmol-min.js') - view.addModel(pdb_data, "pdb") - - # Set base style - view.setStyle({'cartoon': {'color': '#32CD32', 'opacity': 0.5}}) - - # Generate colored names HTML for second panel - colored_names_2 = [] - fs_mm_region_name = 'A' - - for i, region in enumerate(regions_2): - if i >= len(colors): - break - - color = colors[i] - name = region.get('region_name', f'Region {i + 1}') - colored_names_2.append(f'{name} -> <span style="color: {color};"><strong>{fs_mm_region_name}</strong></span>') - fs_mm_region_name = chr(ord(fs_mm_region_name) + 1) - - try: - # Process region ranges same as in protein_service.py - if 'start_residue' in region and 'end_residue' in region: - ranges = [(int(region['start_residue']), int(region['end_residue']))] - elif 'region_ranges' in region: - ranges = [(int(start), int(end)) for start, end in region['region_ranges']] - else: - continue - - # Process each range in the region - for start, end in ranges: - # Color the range - view.setStyle({'resi': list(range(start, end + 1))}, - {'cartoon': {'color': color, 'opacity': 1.0}}) - - # Add range boundary labels - view.addLabel(f"{start}", {'position': {'resi': start}, - 'backgroundColor': color, - 'fontColor': 'white', - 'fontSize': 10}) - view.addLabel(f"{end}", {'position': {'resi': end}, - 'backgroundColor': color, - 'fontColor': 'white', - 'fontSize': 10}) - - except Exception as e: - logger.error(f"Error processing region for second panel: {str(e)}") - continue - - # Create description for second panel - if colored_names_2: - description_2 = f"Identified parts {', '.join(colored_names_2)}." - - # Finalize viewer - view.zoomTo() - view.zoom(1.3) - view.setBackgroundColor('white') - - # Get HTML - viewer_html_2 = view._make_html() - viewer_html_2 = viewer_html_2.replace('async src="', 'src="') - else: - description_2 = "No regions found in alternative metadata" - else: - description_2 = "Alternative metadata file not found" - - except Exception as e: - logger.warning(f"Could not generate view for second panel: {str(e)}") - description_2 = f"Error loading alternative metadata: {str(e)}" - - # Process domain data for third panel from TED API - viewer_html_3 = viewer_html_1 # Start with same view, will be changed when domains are found - description_3 = "Loading domain data from TED..." - domain_data = [] - - try: - # Fetch domain data from TED API - ted_api_url = f"https://ted.cathdb.info/api/v1/uniprot/summary/{protein_id}" - logger.info(f"Fetching domain data from TED API: {ted_api_url}") - - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.get(ted_api_url) - if response.status_code == 200: - ted_data = response.json() - - # Extract domain chopping information - if 'data' in ted_data and len(ted_data['data']) > 0: - domain_data = ted_data['data'] - - # Create a new viewer for domain visualization - view = py3Dmol.view(width=450, height=450, js='https://3dmol.org/build/3Dmol-min.js') - view.addModel(pdb_data, "pdb") - - # Set base style - view.setStyle({'cartoon': {'color': '#32CD32', 'opacity': 0.5}}) - - # Generate colored names for domains - domain_names = [] - domain_letter = 'A' - - # Process each domain entry - for i, domain in enumerate(domain_data): - if i >= len(colors): - break - - color = colors[i] - - # Extract domain information - domain_id = domain.get('id', f'') - chopping = domain.get('chopping', '') - - if not chopping: - continue - - domain_names.append(f'<span style="color: {color};"><strong>{domain_letter}</strong></span>') - domain_letter = chr(ord(domain_letter) + 1) - - try: - # Parse chopping information (e.g., "89-166_177-222") - segments = chopping.split('_') - for segment in segments: - if '-' in segment: - start_str, end_str = segment.split('-') - start = int(start_str) - end = int(end_str) - - # Color the segment - view.setStyle({'resi': list(range(start, end + 1))}, - {'cartoon': {'color': color, 'opacity': 1.0}}) - - # Add range boundary labels - view.addLabel(f"{start}", {'position': {'resi': start}, - 'backgroundColor': color, - 'fontColor': 'white', - 'fontSize': 10}) - view.addLabel(f"{end}", {'position': {'resi': end}, - 'backgroundColor': color, - 'fontColor': 'white', - 'fontSize': 10}) - except Exception as e: - logger.error(f"Error processing domain segment: {str(e)}") - continue - - # Create description for domain panel - if domain_names: - description_3 = f"Domains: {', '.join(domain_names)}" - else: - description_3 = "No domain information found in TED database" - - # Finalize viewer - view.zoomTo() - view.zoom(1.3) - view.setBackgroundColor('white') - - # Get HTML - viewer_html_3 = view._make_html() - viewer_html_3 = viewer_html_3.replace('async src="', 'src="') - else: - description_3 = "No domain data found for this protein" - else: - description_3 = f"Error fetching domain data: {response.status_code}" - except Exception as e: - logger.warning(f"Could not fetch or process domain data: {str(e)}") - description_3 = f"Error loading domain data: {str(e)}" - - # Create the comparison table between the two panels - # Map regions from left to right based on names - comparison_html = generate_region_comparison_html(regions_1_detail, regions_2_detail, colors) - - # Create a simplified HTML template for this view with side-by-side panels - html_content = f""" - <html> - <head> - <title>Protein Regions - {protein_id}</title> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- Load CSS --> - <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> - <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.7.2/font/bootstrap-icons.css"> - - <!-- Load Scripts --> - <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> - <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script> - <script src="https://3dmol.org/build/3Dmol-min.js"></script> - <script src="https://3dmol.org/build/3Dmol.ui-min.js"></script> - - <style> - body {{ - font-family: Arial, sans-serif; - padding: 0.5rem; - max-width: 1500px; - margin: 0 auto; - }} - .home-button-container {{ - padding: 0.5rem; - position: absolute; - top: 0; - left: 0; - z-index: 100; - }} - .viewer-container {{ - margin: 1rem auto; - max-width: 95%; - padding: 0 0.5rem; - }} - .container-fluid {{ - max-width: 1500px; - margin: 0 auto; - padding: 0 2rem; - }} - .viewer-card {{ - box-shadow: 0 2px 4px rgba(0,0,0,0.1); - margin-bottom: 1rem; - border-radius: 8px; - overflow: hidden; - height: 100%; - }} - .viewer-heading {{ - background-color: #f8f9fa; - padding: 0.5rem 1rem; - border-bottom: 1px solid #e9ecef; - }} - .viewer-heading h3 {{ - margin: 0; - font-size: 1.1rem; - }} - .viewer-body {{ - padding: 0.5rem; - display: flex; - justify-content: center; - max-height: 470px; - min-height: 470px; - overflow: hidden; - }} - /* Force all viewers to have identical dimensions */ - .viewer-body iframe {{ - width: 100% !important; - height: 450px !important; - min-width: 100% !important; - border: none !important; - overflow: hidden !important; - max-width: 100% !important; - }} - /* Ensure all molecule panels have appropriate size */ - .col-md-4 {{ - display: flex; - flex-direction: column; - }} - .molecules-row {{ - display: flex; - flex-wrap: wrap; - margin-top: 0.25rem; - margin-bottom: 0.25rem; - }} - .info-panel {{ - background-color: #f8f9fa; - border: 1px solid #e9ecef; - border-radius: 8px; - padding: 0.5rem; - margin-bottom: 0.5rem; - font-size: 0.8rem; - box-shadow: 0 2px 4px rgba(0,0,0,0.05); - }} - .info-panel-title {{ - font-weight: bold; - margin-bottom: 0.2rem; - font-size: 0.9rem; - color: #495057; - }} - .comparison-table {{ - width: 100%; - border-collapse: collapse; - margin-top: 20px; - margin-bottom: 20px; - border: 1px solid #e9ecef; - border-radius: 8px; - overflow: hidden; - }} - .comparison-table th, .comparison-table td {{ - padding: 8px 12px; - text-align: left; - vertical-align: middle; - border: 1px solid #e9ecef; - }} - .comparison-table th {{ - background-color: #f8f9fa; - font-weight: bold; - }} - .comparison-table tr:nth-child(even) {{ - background-color: #f8f9fa; - }} - .region-comparison-wrapper {{ - margin-top: 20px; - margin-bottom: 20px; - padding: 0.5rem; - background-color: white; - border-radius: 8px; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); - }} - .region-arrow {{ - font-size: 1.5rem; - color: #6c757d; - text-align: center; - }} - @media (max-width: 992px) {{ - .viewer-body iframe {{ - height: 400px !important; - }} - .viewer-body {{ - max-height: 420px; - min-height: 420px; - }} - .container-fluid {{ - padding: 0 1rem; - }} - .viewer-container {{ - max-width: 98%; - }} - .col-md-4 {{ - width: 100%; - }} - }} - </style> - </head> - <body> - <div class="home-button-container"> - <a href="/" class="btn btn-outline-primary btn-sm"> - <i class="bi bi-house-door"></i> Home - </a> - </div> - - <div class="container-fluid"> - <div class="row justify-content-center"> - <div class="col-12 text-center"> - <h3 class="mt-3 mb-2">Analysis of Regions for {protein_id}</h3> - <p class="small text-muted mb-3">Disordered parts cut out, stable parts joined</p> - </div> - </div> - - <div class="viewer-container"> - <div class="row molecules-row"> - <!-- First Panel --> - <div class="col-md-4 mb-3"> - <!-- Info Panel --> - <div class="info-panel mb-2"> - <div class="info-panel-title">PAE<15 and region size>10</div> - <div>{description_1}</div> - </div> - - <div class="viewer-card"> - <div class="viewer-heading"> - <h3>PAE<15 and region size>10</h3> - </div> - <div class="viewer-body"> - {viewer_html_1} - </div> - </div> - </div> - - <!-- Second Panel --> - <div class="col-md-4 mb-3"> - <!-- Info Panel --> - <div class="info-panel mb-2"> - <div class="info-panel-title">PAE<8 and region size>30</div> - <div>{description_2}</div> - </div> - - <div class="viewer-card"> - <div class="viewer-heading"> - <h3>PAE<8 and region size>30</h3> - </div> - <div class="viewer-body"> - {viewer_html_2} - </div> - </div> - </div> - - <!-- Third Panel (Domain Data) --> - <div class="col-md-4 mb-3"> - <!-- Info Panel --> - <div class="info-panel mb-2"> - <div class="info-panel-title">Protein Domains from TED <a href="https://ted.cathdb.info/uniprot/{protein_id}" target="_blank" class="small ms-2">(View in TED <i class="bi bi-box-arrow-up-right"></i>)</a></div> - <div>{description_3}</div> - </div> - - <div class="viewer-card"> - <div class="viewer-heading"> - <h3>Protein Domains</h3> - </div> - <div class="viewer-body"> - {viewer_html_3} - </div> - </div> - </div> - </div> - - <!-- Region Comparison Section --> - <div class="region-comparison-wrapper"> - <h4 class="text-center mb-3">Region Comparisons</h4> - {comparison_html} - </div> - </div> - - <div class="row justify-content-center mt-2"> - <div class="col-auto"> - <a href="/protein/{protein_id}" class="btn btn-primary btn-sm"> - View Full Protein Page - </a> - </div> - </div> - </div> - </body> - </html> - """ - - return HTMLResponse(content=html_content) - except Exception as e: - logger.error(f"Error in regions view: {str(e)}", exc_info=True) - return HTMLResponse( - content=f""" - <html> - <head> - <title>Error</title> - <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> - </head> - <body> - <div class="container mt-5"> - <div class="alert alert-danger"> - <h4 class="alert-heading">Error</h4> - <p>{str(e)}</p> - </div> - </div> - </body> - </html> - """, - status_code=500 - ) - -def generate_region_comparison_html(regions_1, regions_2, colors): - """ - Generate HTML table to compare regions between the two panels. - - Args: - regions_1: Region data from the first panel (PAE<15) - regions_2: Region data from the second panel (PAE<8) - colors: List of colors used for region highlighting - - Returns: - HTML string for the comparison table - """ - if not regions_1 or not regions_2: - return "<p class='text-center'>No region data available for comparison</p>" - - # Map letters A, B, C, etc. to the regions - left_panel_map = {} - right_panel_map = {} - - # Store the complete region data for later analysis - left_regions_by_name = {} - right_regions_by_name = {} - - # Process left panel regions - for i, region in enumerate(regions_1): - if i < 26: # Limit to 26 letters - letter = chr(ord('A') + i) - name = region.get('region_name', f'Region {i+1}') - left_panel_map[name] = letter - left_regions_by_name[name] = region - - # Process right panel regions - for i, region in enumerate(regions_2): - if i < 26: # Limit to 26 letters - letter = chr(ord('A') + i) - name = region.get('region_name', f'Region {i+1}') - right_panel_map[name] = letter - right_regions_by_name[name] = region - - # Try to load discarded regions information from metadata_2/discarded - discarded_regions = {} - try: - # Get protein_id from regions data if available - protein_id = "P10911" # Default fallback - if regions_1 and isinstance(regions_1, list) and len(regions_1) > 0: - # Try to extract from the first region's filename if available - if hasattr(regions_1[0], 'get') and regions_1[0].get('protein_id'): - protein_id = regions_1[0].get('protein_id') - - # Use the global variables for path construction - discarded_file = os.path.join(DISCARDED_DIR, f'AF-{protein_id}-F1-model_v4_discarded.json') - - if os.path.exists(discarded_file): - with open(discarded_file) as f: - discarded_data = json.load(f) - if 'discarded_regions' in discarded_data: - for region in discarded_data['discarded_regions']: - if 'region_name' in region and 'reason' in region: - discarded_regions[region['region_name']] = region['reason'] - except Exception as e: - logger.warning(f"Could not load discarded regions info: {str(e)}") - - # Create an HTML table for the comparison - html = """ - <table class="comparison-table"> - <thead> - <tr> - <th>Region</th> - <th>Start</th> - <th>End</th> - <th>Size</th> - <th>Forms Metaregion (Left Panel)</th> - <th>Forms Metaregion (Right Panel)</th> - </tr> - </thead> - <tbody> - """ - - # Analyze the regions to determine which individual regions are part of which metaregions - # First, find all unique region names across both panels - all_region_names = set() - - # Look for individual regions in left panel - for name, region in left_regions_by_name.items(): - # Check if this is a composed region (like D-E or F-G-H) - if '-' in name: - # Add each individual part - for part in name.split('-'): - all_region_names.add(part) - else: - all_region_names.add(name) - - # Look for individual regions in right panel - for name, region in right_regions_by_name.items(): - # Check if this is a composed region (like D-E or F-G-H) - if '-' in name: - # Add each individual part - for part in name.split('-'): - all_region_names.add(part) - else: - all_region_names.add(name) - - # Add any discarded regions - for name in discarded_regions.keys(): - all_region_names.add(name) - - # Sort the regions alphabetically - all_region_names = sorted(list(all_region_names)) - - # Now determine which metaregion each individual region belongs to in each panel - region_mappings = {} - - # Store PAE values for metaregions - right_panel_pae = {} - for name, region in right_regions_by_name.items(): - if 'pae' in region: - right_panel_pae[name] = region['pae'] - - # Extract region boundary information from discarded regions - region_boundaries = {} - for region_name, region_data in discarded_regions.items(): - if isinstance(region_data, dict) and 'start_residue' in region_data and 'end_residue' in region_data: - region_boundaries[region_name] = { - 'start': region_data['start_residue'], - 'end': region_data['end_residue'] - } - - # Also check the JSON file directly for region information - try: - # Use the global variables for path construction - discarded_file = os.path.join(DISCARDED_DIR, f'AF-{protein_id}-F1-model_v4_discarded.json') - - if os.path.exists(discarded_file): - with open(discarded_file) as f: - discarded_data = json.load(f) - if 'discarded_regions' in discarded_data: - for region in discarded_data['discarded_regions']: - if ('region_name' in region and 'start_residue' in region and - 'end_residue' in region): - region_boundaries[region['region_name']] = { - 'start': region['start_residue'], - 'end': region['end_residue'] - } - except Exception as e: - logger.warning(f"Could not load region boundaries from discarded file: {str(e)}") - - for region_name in all_region_names: - left_metaregion = "Not present" - right_metaregion = "Not present" - right_pae = None - - # Check left panel - for meta_name, letter in left_panel_map.items(): - # Split the metaregion name and check if region_name is one of the components - is_part_of_metaregion = False - if '-' in meta_name: - parts = meta_name.split('-') - if region_name in parts: - is_part_of_metaregion = True - else: - if region_name == meta_name: - is_part_of_metaregion = True - - if is_part_of_metaregion: - left_metaregion = f"{meta_name} -> {letter}" - # If this is a direct match and we have boundary info in left_regions_by_name - if region_name == meta_name and meta_name in left_regions_by_name: - region_info = left_regions_by_name[meta_name] - if 'start_residue' in region_info and 'end_residue' in region_info: - if region_name not in region_boundaries: - region_boundaries[region_name] = {} - region_boundaries[region_name]['start'] = region_info['start_residue'] - region_boundaries[region_name]['end'] = region_info['end_residue'] - break - - # Check right panel - for meta_name, letter in right_panel_map.items(): - # Split the metaregion name and check if region_name is one of the components - is_part_of_metaregion = False - if '-' in meta_name: - parts = meta_name.split('-') - if region_name in parts: - is_part_of_metaregion = True - else: - if region_name == meta_name: - is_part_of_metaregion = True - - if is_part_of_metaregion: - right_metaregion = f"{meta_name} -> {letter}" - if meta_name in right_panel_pae: - right_pae = right_panel_pae[meta_name] - - # If this is a direct match and we have boundary info in right_regions_by_name - if region_name == meta_name and meta_name in right_regions_by_name: - region_info = right_regions_by_name[meta_name] - if 'start_residue' in region_info and 'end_residue' in region_info: - if region_name not in region_boundaries: - region_boundaries[region_name] = {} - region_boundaries[region_name]['start'] = region_info['start_residue'] - region_boundaries[region_name]['end'] = region_info['end_residue'] - break - - # Add to mappings - region_mappings[region_name] = { - "left": left_metaregion, - "right": right_metaregion, - "right_pae": right_pae - } - - # Update with reasons for missing regions in right panel - for region, reason in discarded_regions.items(): - if region in region_mappings and region_mappings[region]["right"] == "Not present": - reason_display = "" - pae_value = None - - if "pae" in reason: - # Try to extract the actual PAE value - reason_display = "PAE above threshold" - try: - # Check if this format: pae_8.92_above_threshold_8.0 - if '_above_threshold_' in reason: - parts = reason.split('_') - for i, part in enumerate(parts): - if part == 'pae' and i + 1 < len(parts): - pae_value = parts[i + 1] - break - except: - pass - elif "size" in reason: - reason_display = "Size below threshold" - else: - reason_display = reason - - region_mappings[region]["right"] = f"Not present ({reason_display})" - # Add PAE value if available - if pae_value: - region_mappings[region]["pae_value"] = pae_value - - # Create rows for each individual region - for region, mappings in region_mappings.items(): - left_metaregion = mappings["left"] - right_metaregion = mappings["right"] - - # Get region boundaries - region_start = "-" - region_end = "-" - if region in region_boundaries: - if 'start' in region_boundaries[region]: - region_start = str(region_boundaries[region]['start']) - if 'end' in region_boundaries[region]: - region_end = str(region_boundaries[region]['end']) - - # Hardcoded region boundaries for this example if not found - if region_start == "-": - if region == "A": - region_start = "56" - region_end = "124" - elif region == "B": - region_start = "126" - region_end = "162" - elif region == "C": - region_start = "183" - region_end = "346" - elif region == "D": - region_start = "355" - region_end = "378" - elif region == "E": - region_start = "392" - region_end = "418" - elif region == "F": - region_start = "490" - region_end = "704" - elif region == "G": - region_start = "720" - region_end = "738" - elif region == "H": - region_start = "749" - region_end = "819" - - # Calculate region size - region_size = "-" - try: - if region_start != "-" and region_end != "-": - start_val = int(region_start) - end_val = int(region_end) - region_size = str(end_val - start_val + 1) # +1 because ranges are inclusive - except ValueError: - # If conversion to int fails, keep the default "-" - pass - - # Extract just the letter after the arrow for left panel color coding - left_color = "#cccccc" - if "->" in left_metaregion: - left_letter = left_metaregion.split("->")[1].strip() - left_index = ord(left_letter) - ord('A') - if 0 <= left_index < len(colors): - left_color = colors[left_index] - - # Extract just the letter after the arrow for right panel color coding - right_color = "#cccccc" - if "->" in right_metaregion: - right_letter = right_metaregion.split("->")[1].strip() - right_index = ord(right_letter) - ord('A') - if 0 <= right_index < len(colors): - right_color = colors[right_index] - - # Modify right_metaregion to include PAE if available - if "->" in right_metaregion and "right_pae" in mappings and mappings["right_pae"] is not None: - right_metaregion = f"{right_metaregion} (PAE: {mappings['right_pae']:.2f})" - elif "Not present" in right_metaregion and "pae_value" in mappings: - right_metaregion = f"{right_metaregion.replace(')', '')}, PAE: {mappings['pae_value']})" - - # Add row to table - html += f""" - <tr> - <td><strong>{region}</strong></td> - <td>{region_start}</td> - <td>{region_end}</td> - <td>{region_size}</td> - <td>""" - - # Format left metaregion with color only on the letter after arrow - if "->" in left_metaregion: - parts = left_metaregion.split("->") - left_letter = parts[1].strip() - html += f"""{parts[0].strip()} -> <span style="color: {left_color}; font-weight: bold;">{left_letter}</span>""" - if "PAE:" in left_metaregion: - pae_part = left_metaregion.split("(PAE:")[1] - html += f""" (PAE:{pae_part}""" - else: - html += f"""<span style="color: #999; font-style: italic;">{left_metaregion}</span>""" - - html += """</td> - <td>""" - - # Format right metaregion with color only on the letter after arrow - if "->" in right_metaregion: - parts = right_metaregion.split("->") - right_base = parts[0].strip() - - # Handle the case where there's PAE information - if " (PAE:" in parts[1]: - right_parts = parts[1].split(" (PAE:") - right_letter = right_parts[0].strip() - pae_info = "(PAE:" + right_parts[1] - html += f"""{right_base} -> <span style="color: {right_color}; font-weight: bold;">{right_letter}</span> {pae_info}""" - else: - right_letter = parts[1].strip() - html += f"""{right_base} -> <span style="color: {right_color}; font-weight: bold;">{right_letter}</span>""" - else: - html += f"""<span style="color: #999; font-style: italic;">{right_metaregion}</span>""" - - html += """</td> - </tr> - """ - - html += """ - </tbody> - </table> - """ - - return html - -# Include routers -app.include_router(ground_truth.router) -# app.include_router(protein.router) # Comment out until created -# app.include_router(api.router) # Comment out until created - -# Add new endpoint to fetch protein name -@app.get("/api/protein-name/{protein_id}") -async def get_protein_name_api(protein_id: str): - name = get_protein_name(protein_id) - return {"name": name} + """Redirects to the regions view endpoint in the visualization router.""" + # This is needed to maintain backwards compatibility during refactoring + return await visualization.regions_view(protein_id) diff --git a/app/routes/__init__.py b/app/routes/__init__.py index 166ce0b47d9272d500977e4d0057dbce06f9f84f..46b4722eb9934d760c550e5665f2560962cfe4e0 100644 --- a/app/routes/__init__.py +++ b/app/routes/__init__.py @@ -1 +1,6 @@ -# Empty file to make the directory a Python package \ No newline at end of file +# Empty file to make the directory a Python package +from app.routes import ground_truth +from app.routes import protein +from app.routes import api +from app.routes import home +from app.routes import visualization \ No newline at end of file diff --git a/app/routes/api.py b/app/routes/api.py new file mode 100644 index 0000000000000000000000000000000000000000..1a654ddce01bb396875076876a2253860b3e97b6 --- /dev/null +++ b/app/routes/api.py @@ -0,0 +1,188 @@ +from fastapi import APIRouter, HTTPException +from fastapi.responses import JSONResponse +import logging +import json +from pathlib import Path +import os +from typing import Dict, Any, List + +from app.utils.api import get_protein_name +from app.config.settings import PDB_DATABASE_DIR, METADATA_DIR, METADATA_2_DIR + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/api", + tags=["api"], +) + +@router.get("/protein-name/{protein_id}") +async def get_protein_name_api(protein_id: str) -> Dict[str, str]: + """Fetch the protein name for a given protein ID. + + Args: + protein_id: The ID of the protein + + Returns: + Dict with the protein name + """ + try: + name = get_protein_name(protein_id) + return {"name": name} + except Exception as e: + logger.error(f"Error fetching protein name for {protein_id}: {e}") + raise HTTPException(status_code=500, detail=f"Error fetching protein name: {str(e)}") + +@router.get("/more-results/{protein_id}/{start}/{length}") +async def get_more_results(protein_id: str, start: int, length: int) -> Dict[str, Any]: + """Get a paginated slice of results data. + + Args: + protein_id: The ID of the protein + start: The starting index + length: Number of results to return + + Returns: + Dict with paginated results data + """ + try: + # This is a simplified implementation + # In the actual implementation, you would load results from files + return {"data": [], "error": "Not implemented yet"} + except Exception as e: + logger.error(f"Error fetching more results for {protein_id}: {e}") + return {"error": str(e)} + +@router.get("/metadata/{protein_id}") +async def get_protein_metadata(protein_id: str) -> Dict[str, Any]: + """Get metadata for a specific protein. + + Args: + protein_id: The ID of the protein + + Returns: + Dict with protein metadata + """ + try: + json_path = Path(METADATA_DIR) / f"AF-{protein_id}-F1-model_v4.json" + if not json_path.exists(): + raise HTTPException( + status_code=404, + detail=f"Metadata not found for protein {protein_id}" + ) + + with open(json_path) as f: + metadata = json.load(f) + + return metadata + except HTTPException: + raise + except Exception as e: + logger.error(f"Error fetching metadata for {protein_id}: {e}") + raise HTTPException(status_code=500, detail=f"Error fetching metadata: {str(e)}") + +@router.get("/structure-info/{protein_id}") +async def get_structure_info(protein_id: str) -> Dict[str, Any]: + """Get information about a protein structure file. + + Args: + protein_id: The ID of the protein + + Returns: + Dict with structure information + """ + try: + pdb_path = Path(PDB_DATABASE_DIR) / f"AF-{protein_id}-F1-model_v4.pdb" + if not pdb_path.exists(): + raise HTTPException( + status_code=404, + detail=f"Structure not found for protein {protein_id}" + ) + + file_size = pdb_path.stat().st_size + + # Count atoms in PDB file (simplified) + atom_count = 0 + with open(pdb_path, 'r') as f: + for line in f: + if line.startswith('ATOM'): + atom_count += 1 + + return { + "protein_id": protein_id, + "filename": pdb_path.name, + "file_size_bytes": file_size, + "file_size_mb": file_size / (1024 * 1024), + "atom_count": atom_count + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Error fetching structure info for {protein_id}: {e}") + raise HTTPException(status_code=500, detail=f"Error fetching structure info: {str(e)}") + +@router.get("/storage-info") +async def storage_info() -> Dict[str, Any]: + """Get information about the storage directories and files. + + Returns: + Dict with storage information + """ + try: + cache_dir = Path("cache") + structures_dir = Path("structures") + pdb_files = list(Path(PDB_DATABASE_DIR).glob("*.pdb")) + metadata_files = list(Path(METADATA_DIR).glob("*.json")) + + return { + "pdb_database_count": len(pdb_files), + "pdb_database_files": [f.name for f in pdb_files[:10]], # Only return first 10 to avoid large response + "metadata_count": len(metadata_files), + "metadata_files": [f.name for f in metadata_files[:10]], # Only return first 10 to avoid large response + "total_size_mb": sum( + f.stat().st_size for f in + (pdb_files + metadata_files) + ) / (1024 * 1024) + } + except Exception as e: + logger.error(f"Error fetching storage info: {e}") + raise HTTPException(status_code=500, detail=f"Error fetching storage info: {str(e)}") + +@router.get("/list-proteins") +async def list_proteins(directory: str = None) -> Dict[str, Any]: + """List protein files in a directory. + + Args: + directory: Optional directory path, defaults to PDB database directory + + Returns: + Dict with protein files information + """ + try: + if directory is None: + directory = str(PDB_DATABASE_DIR) + + dir_path = Path(directory) + if not dir_path.exists() or not dir_path.is_dir(): + raise HTTPException( + status_code=404, + detail=f"Directory not found: {directory}" + ) + + files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))] + + return { + "directory": str(dir_path), + "file_count": len(files), + "files": files[:100] # Limit to first 100 files + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Error listing proteins in {directory}: {e}") + raise HTTPException(status_code=500, detail=f"Error listing proteins: {str(e)}") \ No newline at end of file diff --git a/app/routes/home.py b/app/routes/home.py new file mode 100644 index 0000000000000000000000000000000000000000..c0607d1e2c4f268d407b5ddbd328110a5a9535c0 --- /dev/null +++ b/app/routes/home.py @@ -0,0 +1,250 @@ +from fastapi import APIRouter, HTTPException +from fastapi.responses import HTMLResponse +import py3Dmol +import requests +import json +import logging +import os +from pathlib import Path +import pandas as pd + +from app.utils.api import get_protein_name +from app.config.settings import ( + CACHE_DIR, ALPHAFOLD_BASE_URL, PROTEIN_ID, PDB_DATABASE_DIR, METADATA_DIR +) + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["home"]) + +@router.get("/", response_class=HTMLResponse) +async def root(): + """Render the home page with protein search demo.""" + # Create a py3Dmol viewer with optimized settings + view = py3Dmol.view(width=800, height=600) + view.setViewStyle({'style':'outline'}) # Optimizes initial rendering + + # Get structure from cache or download + structure_id = f"AF-{PROTEIN_ID}-F1-model_v4.pdb" + cache_path = CACHE_DIR / structure_id + + if not cache_path.exists(): + pdb_data = requests.get(f"{ALPHAFOLD_BASE_URL}/files/{structure_id}").text + cache_path.write_text(pdb_data) + else: + pdb_data = cache_path.read_text() + + # Set up the viewer with optimized settings + view.addModel(pdb_data, "pdb") + view.setStyle({'model': -1}, { + 'cartoon': { + 'color': 'spectrum', + 'thickness': 0.2, # Reduced thickness for better performance + 'detail': 'low' # Lower detail for better performance + } + }) + view.zoomTo() + + # Define directory paths for protein listing + ground_truth_directory = 'test/ground_truth' # Local path, adjust as needed + + # Get the list of proteins + category = "full_pdbs" + ground_truth_files = list_proteins(f'{ground_truth_directory}/{category}') + high_plddt_files = list_proteins(f'{ground_truth_directory}/high_plddt') + metaregion_files = list_proteins(f'{ground_truth_directory}/metaregions') + + # Create table data + ground_truth_table = [] + for i, file in enumerate(ground_truth_files): + if file.startswith("ground_truth_AF-") and file.endswith(".tsv"): + protein_id = file.split("-")[1] + + # Get PDB file size + pdb_path = PDB_DATABASE_DIR / f"AF-{protein_id}-F1-model_v4.pdb" + try: + pdb_size = os.path.getsize(pdb_path) + pdb_size_mb = pdb_size / (1024 * 1024) # Store as float + except (FileNotFoundError, OSError): + pdb_size_mb = 0 # Use 0 for sorting purposes + + # Get number of regions from metadata + metadata_path = METADATA_DIR / f"AF-{protein_id}-F1-model_v4.json" + try: + with open(metadata_path) as f: + metadata = json.load(f) + n_regions = len(metadata) # Count the number of entries in the metadata + except (FileNotFoundError, json.JSONDecodeError, KeyError) as e: + logger.error(f"Error reading metadata for {protein_id}: {e}") + n_regions = "N/A" + + ground_truth_table.append({ + "#": i+1, + "protein_id": protein_id, + "chain_gt": f'<a href="/ground_truth/{protein_id}">View</a>', + "pdb_filesize": pdb_size_mb, # Store as float + "n_regions": n_regions + }) + + # Create the table HTML + table_html = """ + <table id="ground-truth-table" class="table table-striped"> + <thead> + <tr> + <th>#</th> + <th>Protein ID</th> + <th>Protein name</th> + <th>PDB File Size</th> + <th>Number of Regions</th> + </tr> + </thead> + <tbody> + """ + + for row in ground_truth_table: + protein_name = get_protein_name(row['protein_id']) + # Format the file size for display + file_size_display = f"{row['pdb_filesize']:.2f} MB" if row['pdb_filesize'] > 0 else "N/A" + + table_html += f""" + <tr> + <td>{row['#']}</td> + <td><a href="/protein/{row['protein_id']}">{row['protein_id']}</a></td> + <td>{protein_name}</td> + <td data-order="{row['pdb_filesize']}">{file_size_display}</td> + <td>{row['n_regions']}</td> + </tr> + """ + + table_html += """ + </tbody> + </table> + """ + + return HTMLResponse(f""" + <html> + <head> + <title>Protein Search Demo</title> + <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> + <link href="https://cdn.datatables.net/v/bs5/dt-1.13.4/datatables.min.css" rel="stylesheet"> + <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> + <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script> + <script src="https://cdn.datatables.net/v/bs5/dt-1.13.4/datatables.min.js"></script> + </head> + <body> + <div class="container mt-5"> + <h1>AlphaFind 2.0 Search Demo</h1> + <p>The main goal of the AlphaFind 2.0 version is to work with the pLDDT and PAE metrics through identifying stable regions in the protein. </p> + + <h2>Available proteins</h2> + <div class="table-responsive"> + {table_html} + </div> + </div> + + <script> + $(document).ready(function() {{ + $('#ground-truth-table').DataTable({{ + pageLength: 25, + order: [[0, 'asc']], + lengthMenu: [[10, 25, 50, 100, -1], [10, 25, 50, 100, "All"]], + columnDefs: [ + {{ + targets: [0, 1, 2, 3], // Only these columns are not sortable + orderable: true + }} + ] + }}); + }}); + </script> + </body> + </html> + """) + +@router.get("/health") +async def health_check(): + """Health check endpoint.""" + return {"status": "healthy"} + +@router.get("/test") +async def test(): + """Test endpoint to verify the server is running.""" + return {"message": "Server is running"} + +@router.get("/widget") +async def widget(): + """Render a protein structure widget.""" + view = py3Dmol.view(width=800, height=600) + view.setViewStyle({'style':'outline'}) + + structure_id = f"AF-{PROTEIN_ID}-F1-model_v4.pdb" + cache_path = CACHE_DIR / structure_id + + if not cache_path.exists(): + pdb_data = requests.get(f"{ALPHAFOLD_BASE_URL}/files/{structure_id}").text + cache_path.write_text(pdb_data) + else: + pdb_data = cache_path.read_text() + + view.addModel(pdb_data, "pdb") + view.setStyle({'model': -1}, { + 'cartoon': { + 'color': 'spectrum', + 'thickness': 0.2, + 'detail': 'low' + } + }) + view.zoomTo() + + return HTMLResponse(view._make_html()) + +@router.get("/visualize") +async def visualize(): + """Visualize a protein structure.""" + try: + # Create a cache path for this structure + structure_id = f"AF-{PROTEIN_ID}-F1-model_v4.pdb" + cache_path = CACHE_DIR / structure_id + + # Check if structure is already cached + if not cache_path.exists(): + # Fetch PDB data from AlphaFold + response = requests.get(f"{ALPHAFOLD_BASE_URL}/files/{structure_id}") + response.raise_for_status() + pdb_data = response.text + + # Basic validation + if not pdb_data.strip().startswith('ATOM') and not pdb_data.strip().startswith('HETATM'): + raise ValueError("Invalid PDB data received") + + # Save to cache + cache_path.write_text(pdb_data) + else: + pdb_data = cache_path.read_text() + if not pdb_data.strip(): + raise ValueError("Empty PDB data in cache") + + # Return with correct content type + return HTMLResponse( + content=pdb_data, + headers={"Content-Type": "chemical/x-pdb"} + ) + + except Exception as e: + return HTMLResponse( + content=f"Error loading structure: {str(e)}", + status_code=500 + ) + +def list_proteins(directory): + """List all protein files in the specified directory.""" + try: + return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] + except Exception as e: + logger.error(f"Error reading directory {directory}: {e}") + return [] \ No newline at end of file diff --git a/app/routes/protein.py b/app/routes/protein.py new file mode 100644 index 0000000000000000000000000000000000000000..d431eab79021d4a6281c2f463e6b737f0543b351 --- /dev/null +++ b/app/routes/protein.py @@ -0,0 +1,683 @@ +from fastapi import APIRouter, HTTPException +from fastapi.responses import HTMLResponse +import json +import pandas as pd +import logging +import os +from pathlib import Path +import numpy as np +from typing import Optional, List + +from app.services.protein_service import ProteinService +from app.services.visualization_service import VisualizationService +from app.templates.protein_view import get_protein_page_html +from app.config.settings import METADATA_DIR, PDB_DATABASE_DIR + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/protein", + tags=["protein"], +) + +# Part colors - moved from main.py +PART_COLORS = { + 'A': '#4169E1', # Royal Blue + 'B': '#800080', # Purple + 'C': '#FFA500', # Orange + 'D': '#00CED1', # Dark Turquoise + 'E': '#FF1493', # Deep Pink + 'F': '#DAA520', # Goldenrod + 'G': '#8B4513', # Saddle Brown + 'H': '#4B0082', # Indigo + 'I': '#20B2AA', # Light Sea Blue + 'J': '#FF6347', # Tomato + 'K': '#9370DB', # Medium Purple + 'L': '#B8860B', # Dark Goldenrod + 'M': '#1E90FF', # Dodger Blue + 'N': '#DDA0DD', # Plum + 'O': '#FFD700', # Gold +} + +def colorize_parts(parts_str): + """ + Colorize protein parts based on the predefined color mapping. + + Args: + parts_str: A comma-separated string of parts (e.g., "A, B, C") + + Returns: + HTML-formatted string with colored parts + """ + if not parts_str or not isinstance(parts_str, str): + return parts_str + + # Split the parts string and colorize each part + parts = [p.strip() for p in parts_str.split(',')] + colored_parts = [] + + for part in parts: + if not part: + continue + + # Handle multi-character parts (like "A1", "B-C", etc.) + if '-' in part: + # For ranges like "A-B", color each letter + subparts = part.split('-') + colored_subparts = [] + + for i, subpart in enumerate(subparts): + if not subpart: + colored_subparts.append('') + continue + + if subpart[0] in PART_COLORS: + colored_subparts.append(f'<span style="color:{PART_COLORS[subpart[0]]}; font-weight:bold;">{subpart[0]}</span>{subpart[1:]}') + else: + colored_subparts.append(subpart) + + colored_parts.append('-'.join(colored_subparts)) + else: + # For single parts like "A", "B1", etc. + if part[0] in PART_COLORS: + colored_parts.append(f'<span style="color:{PART_COLORS[part[0]]}; font-weight:bold;">{part[0]}</span>{part[1:]}') + else: + colored_parts.append(part) + + return ', '.join(colored_parts) + +# Initialize services - these will be shared across route handlers +protein_service = ProteinService() +visualization_service = None # Will be set from main.py + +# Determine the environment and set up directories +ENVIRONMENT = os.getenv("ENVIRONMENT", "local") # Default to 'local' if not set + +# Add the directory path for proteins based on the environment +if ENVIRONMENT == "production": + protein_directory = '/app/data/structures' # Production path + ground_truth_directory = '/app/data/ground_truth' +else: + protein_directory = 'test/query_target_search' # Local path + ground_truth_directory = 'test/ground_truth' +logger.info(f"Protein directory: {protein_directory}") + +@router.get("/{protein_id}") +async def protein_view(protein_id: str): + try: + logger.info(f"Loading protein structure for {protein_id}") + pdb_data = protein_service.get_structure(protein_id) + + # Create viewers with different types + viewer_html = [ + protein_service.create_viewer(pdb_data), # Whole chain search + protein_service.create_viewer(pdb_data, "disordered", protein_id), # Disordered cut out search + protein_service.create_viewer(pdb_data, "regions", protein_id) # Stable regions search + ] + + # Get results if available + tables_html = [] + has_results = False + comparative_html = "" + + # Define result files for each search type + result_files = [ + f"full_pdbs/AF-{protein_id}-F1-model_v4.json", # Whole chain search + f"high_plddt/AF-{protein_id}-F1-model_v4.json", # Disordered regions + f"metaregions/AF-{protein_id}-F1-model_v4.json" # Stable regions + ] + + all_results = [] # Store all results for each type + + # Load results from each file + for result_file in result_files: + try: + # Look in both production and test directories + file_paths = [ + Path(protein_directory) / result_file, # Production path + Path("test/query_target_search") / result_file # Test path + ] + + result_path = next( + (path for path in file_paths if path.exists()), + None + ) + + if result_path: + with open(result_path) as f: + results_data = json.load(f) + + # Extract results based on file structure + if isinstance(results_data, list) and len(results_data) > 0: + results = results_data[0]['results'] + else: + results = results_data['results'] + + all_results.append(results) + else: + all_results.append([]) # Empty results if file not found + + except Exception as e: + logger.error(f"Error loading results from {result_file}: {str(e)}") + all_results.append([]) # Empty results on error + + # Create tables if we have any results + if any(all_results): + has_results = True + tables_html = [] + + # First pass: Create DataFrames and compute indices + results_dfs = [] + indices_df = pd.DataFrame(columns=["target_id", "results1", "results2", "results3"]) + + for i, results in enumerate(all_results): + if results: + # Create DataFrame + results_df = pd.DataFrame(results) + + # Replace NaN with negative infinity in query_tmscore for sorting + results_df['query_tmscore'] = results_df['query_tmscore'].fillna(float('-inf')) + + # Sort by query_tmscore descending and drop duplicates keeping first occurrence + results_df = (results_df.sort_values('query_tmscore', ascending=False) + .drop_duplicates(subset=['target_id'], keep='first') + .reset_index(drop=True)) + + # Format results after sorting + results_df['similarity_score'] = results_df['similarity_score'].round(4) + results_df['query_tmscore'] = results_df['query_tmscore'].round(4) + results_df['target_tmscore'] = results_df['target_tmscore'].round(4) + + # rename target_id values to have just the protein id + results_df['target_id'] = results_df['target_id'].str.split('-').str[1] + + # Store the DataFrame + results_dfs.append(results_df) + + # Process target_id and store index + if i == 0: + # For first table, create initial DataFrame with indices + indices_df = pd.DataFrame({ + "target_id": results_df['target_id'], + "results1": results_df.index + }) + else: + # For other tables, create temporary mapping + temp_indices = pd.DataFrame({ + "target_id": results_df['target_id'], + f"results{i+1}": results_df.index + }) + # Merge with existing indices + indices_df = indices_df.merge( + temp_indices, + on="target_id", + how="outer" + ).drop_duplicates(subset=['target_id'], keep='first') + else: + results_dfs.append(pd.DataFrame()) + + # Fill NaN values with '--' + indices_df = indices_df.fillna('--') + + # Log the indices DataFrame + logger.info(f"Indices DataFrame:\n{indices_df.head(3)}") + + # Second pass: Create display tables with indices + for i, results_df in enumerate(results_dfs): + if not results_df.empty: + # Define columns based on table index + if i == 2: # For the third table (multimer results) + base_columns = ['match', 'TM-Score', 'overlap', 'qparts', 'mparts'] + else: + base_columns = ['match', 'TM-Score', 'overlap'] + + # Create a copy of the DataFrame for display + display_df = results_df.copy() + + # Rename qchains and tchains columns to qparts and mparts + if 'qchains' in display_df.columns: + display_df = display_df.rename(columns={'qchains': 'qparts'}) + if 'tchains' in display_df.columns: + display_df = display_df.rename(columns={'tchains': 'mparts'}) + + # Parse rotation and translation strings to numpy arrays if they exist + if 'rotation' in display_df.columns: + display_df['rotation'] = display_df['rotation'].apply( + lambda x: np.array(eval(x)).reshape(3, 3) if isinstance(x, str) else x + ) + if 'translation' in display_df.columns: + display_df['translation'] = display_df['translation'].apply( + lambda x: np.array(eval(x)).reshape(3,) if isinstance(x, str) else x + ) + + # Add detailed logging + if not display_df.empty: + sample_row = display_df.iloc[0] + logger.debug("Transformation data for first row:") + if 'rotation' in display_df.columns: + logger.debug(f"Original rotation data: {sample_row['rotation']}") + try: + rot_array = sample_row['rotation'] + logger.debug(f"Rotation type: {type(rot_array)}") + logger.debug(f"Rotation shape: {rot_array.shape}") + logger.debug(f"Rotation matrix:\n{rot_array}") + except Exception as e: + logger.error(f"Error processing rotation: {e}") + + if 'translation' in display_df.columns: + logger.debug(f"Original translation data: {sample_row['translation']}") + try: + trans_array = sample_row['translation'] + logger.debug(f"Translation type: {type(trans_array)}") + logger.debug(f"Translation shape: {trans_array.shape}") + logger.debug(f"Translation vector: {trans_array}") + except Exception as e: + logger.error(f"Error processing translation: {e}") + + # Format the chain information before creating the table + if 'qparts' in display_df.columns: + # Add debug logging + logger.debug("Sample qparts before processing:") + if not display_df.empty: + logger.debug(f"First row qparts: {display_df.iloc[0]['qparts']}") + logger.debug(f"Type: {type(display_df.iloc[0]['qparts'])}") + + # Handle chains that are already comma-separated strings + display_df['qparts'] = display_df['qparts'].apply( + lambda x: ( + logger.debug(f"Processing qparts: {x} of type {type(x)}") or + (x if isinstance(x, str) else + ', '.join(x) if isinstance(x, list) else '') + ) + ) + + # Apply colorization to qparts + display_df['qparts'] = display_df['qparts'].apply(colorize_parts) + + if 'mparts' in display_df.columns: + logger.debug("Sample mparts before processing:") + if not display_df.empty: + logger.debug(f"First row mparts: {display_df.iloc[0]['mparts']}") + logger.debug(f"Type: {type(display_df.iloc[0]['mparts'])}") + + # Handle chains that are already comma-separated strings + display_df['mparts'] = display_df['mparts'].apply( + lambda x: ( + logger.debug(f"Processing mparts: {x} of type {type(x)}") or + (x if isinstance(x, str) else + ', '.join(x) if isinstance(x, list) else '') + ) + ) + + # Do not apply colorization to mparts + + # Add overlap button with view type based on result set + view_types = ["basic", "disordered", "regions"] + current_view_type = view_types[i] if i < len(view_types) else "basic" + + # Add overlap button column + logger.debug("Creating overlap buttons with chain information") + display_df[' '] = display_df.apply( + lambda row: ( + logger.debug(f"Creating button for row with qparts={row.get('qparts')}, mparts={row.get('mparts')}") or + '<button onclick=\'showOverlap(' + + f'"{protein_id}", ' + + f'"{row["target_id"]}", ' + + f'{json.dumps(row["rotation"].tolist()) if isinstance(row.get("rotation"), np.ndarray) else "null"}, ' + + f'{json.dumps(row["translation"].tolist()) if isinstance(row.get("translation"), np.ndarray) else "null"}, ' + + f'"{current_view_type}", ' + + f'{json.dumps([x.strip() for x in row.get("qparts", "").split(",") if x.strip()])}, ' + + f'{json.dumps([x.strip() for x in row.get("mparts", "").split(",") if x.strip()])}' + + ')\' class="btn btn-primary btn-sm">Overlap</button>' + ), + axis=1 + ) + + # Add position columns from other tables based on the current table + if i == 0: # First table shows positions from tables 2 and 3 + if 'results2' in indices_df.columns and 'results3' in indices_df.columns: + display_df = display_df.merge( + indices_df[['target_id', 'results2', 'results3']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + # Handle fillna with type safety + display_df['results2'] = display_df['results2'].astype(str).replace('nan', '--') + display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') + position_columns = ['results2', 'results3'] + elif 'results2' in indices_df.columns: + display_df = display_df.merge( + indices_df[['target_id', 'results2']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + position_columns = ['results2'] + display_df['results2'] = display_df['results2'].astype(str).replace('nan', '--') + elif 'results3' in indices_df.columns: + display_df = display_df.merge( + indices_df[['target_id', 'results3']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + position_columns = ['results3'] + display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') + else: + position_columns = [] + elif i == 1: # Second table shows positions from tables 1 and 3 + if 'results1' in indices_df.columns and 'results3' in indices_df.columns: + display_df = display_df.merge( + indices_df[['target_id', 'results1', 'results3']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + position_columns = ['results1', 'results3'] + display_df['results1'] = display_df['results1'].astype(str).replace('nan', '--') + display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') + elif 'results1' in indices_df.columns: + display_df = display_df.merge( + indices_df[['target_id', 'results1']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + position_columns = ['results1'] + display_df['results1'] = display_df['results1'].astype(str).replace('nan', '--') + elif 'results3' in indices_df.columns: + display_df = display_df.merge( + indices_df[['target_id', 'results3']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + position_columns = ['results3'] + display_df['results3'] = display_df['results3'].astype(str).replace('nan', '--') + else: + position_columns = [] + + else: # Third table shows positions from tables 1 and 2 + display_df = display_df.merge( + indices_df[['target_id', 'results1', 'results2']], + left_on='target_id', + right_on='target_id', + how='left' + ).drop_duplicates(subset=['target_id'], keep='first') + display_df['results1'] = display_df['results1'].astype(str).replace('nan', '--') + display_df['results2'] = display_df['results2'].astype(str).replace('nan', '--') + position_columns = ['results1', 'results2'] + + # Create target link column + display_df['match'] = display_df.apply( + lambda row: f'<a href="https://alphafold.ebi.ac.uk/entry/{row["target_id"]}" target="_blank" class="protein-id" data-protein-id="{row["target_id"]}">{row["target_id"]}</a>', + axis=1 + ) + + # Add magnifying glass icon column + display_df['đ'] = display_df.apply( + lambda row: f'<a href="/protein/{row["target_id"]}" class="text-decoration-none">đ</a>', + axis=1 + ) + + # Renaming to save space + display_df = display_df.rename(columns={ + 'query_tmscore': 'TM-Score', + 'target_tmscore': 't_tm', + 'similarity_score': 'score', + }, errors='ignore') + + # Define tooltip texts based on table index + if i == 0: + tm_score_tooltip = "TM-Score computed on the whole chain" + elif i == 1: + tm_score_tooltip = "TM-Score computed on the kept residues" + else: # i == 2, multimer results + tm_score_tooltip = "TM-Score computed on the kept residues after alignment on matched parts" + + # Define tooltips for result columns + results1_tooltip = "Position of the match in Table 1 (Current AlphaFind)" + results2_tooltip = "Position of the match in Table 2 (Disordered parts cut out)" + results3_tooltip = "Position of the match in Table 3 (Disordered parts cut out, stable parts joined)" + + # Define tooltips for parts columns + qparts_tooltip = "Query parts considered" + mparts_tooltip = "Parts from the matches proteins considered" + + if 'results2' in display_df.columns: + display_df['results2'] = display_df['results2'].str.split('.').str[0] + if 'results3' in display_df.columns: + display_df['results3'] = display_df['results3'].str.split('.').str[0] + if 'results1' in display_df.columns: + display_df['results1'] = display_df['results1'].str.split('.').str[0] + + # Ensure all required columns exist before creating table + for col in base_columns: + if col not in display_df.columns: + display_df[col] = '' + + # Create final column list ensuring all columns exist + columns = [col for col in base_columns + position_columns if col in display_df.columns] + if 'qparts' in columns: + # reorder columns + columns = ['match', 'TM-Score', ' ', 'qparts', 'mparts', 'results1', 'results2', 'đ'] + elif 'results2' in columns and 'results3' in columns: + columns = ['match', 'TM-Score', ' ', 'results2', 'results3', 'đ'] + elif 'results1' in columns and 'results3' in columns: + columns = ['match', 'TM-Score', ' ', 'results1', 'results3', 'đ'] + else: + # Default case for the first table + columns = ['match', 'TM-Score', ' ', 'results1', 'đ'] + + table_html = display_df.to_html( + classes=f'table table-striped table-hover table-{i}', + escape=False, + index=True, + columns=columns, + float_format=lambda x: '{:.3f}'.format(x) if isinstance(x, float) and x != float('-inf') else 'N/A' + ) + + # Add tooltip to TM-Score column header + table_html = table_html.replace('<th>TM-Score</th>', f'<th><span title="{tm_score_tooltip}">TM-Score âšī¸</span></th>') + + # Add tooltips to results column headers + if 'results1' in columns: + table_html = table_html.replace('<th>results1</th>', f'<th><span title="{results1_tooltip}">results1 âšī¸</span></th>') + if 'results2' in columns: + table_html = table_html.replace('<th>results2</th>', f'<th><span title="{results2_tooltip}">results2 âšī¸</span></th>') + if 'results3' in columns: + table_html = table_html.replace('<th>results3</th>', f'<th><span title="{results3_tooltip}">results3 âšī¸</span></th>') + + # Add tooltips to parts column headers + if 'qparts' in columns: + table_html = table_html.replace('<th>qparts</th>', f'<th><span title="{qparts_tooltip}">qparts âšī¸</span></th>') + if 'mparts' in columns: + table_html = table_html.replace('<th>mparts</th>', f'<th><span title="{mparts_tooltip}">mparts âšī¸</span></th>') + + # Remove the magnifying glass icon from the header + table_html = table_html.replace('<th>đ</th>', '<th></th>') + + tables_html.append(table_html) + else: + tables_html.append("<div class='alert alert-info'>No results available</div>") + + start_html = f""" + <!-- Overlap Modal --> + <div class="modal" id="overlapModal" tabindex="-1" role="dialog" data-bs-backdrop="static"> + <div class="modal-dialog modal-xl"> + <div class="modal-content"> + <div class="modal-header"> + <h5 class="modal-title" id="overlapModalLabel"> + Protein Overlap: <span style="color:#32CD32">{protein_id}</span> + </h5> + <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button> + </div> + <div class="modal-body" id="overlapContent"> + Loading... + </div> + </div> + </div> + </div> + """ + + # Create modal and JavaScript for overlap visualization + additional_js = """ + <script> + // Initialize the modal + const overlapModal = new bootstrap.Modal(document.getElementById('overlapModal'), { + backdrop: 'static', + keyboard: false + }); + + async function showOverlap(protein1, protein2, rotation = null, translation = null, + viewType = 'basic', regions_to_display1 = [], regions_to_display2 = []) { + const modalEl = document.getElementById('overlapModal'); + const contentDiv = document.getElementById('overlapContent'); + const titleEl = modalEl.querySelector('.modal-title'); + + // Show the modal first + overlapModal.show(); + + // Show loading state with basic title + titleEl.innerHTML = `Protein Overlap: <span style="color:#32CD32">${protein1}</span> & <span style="color:#FFB3B3">${protein2}</span>`; + contentDiv.innerHTML = '<div class="text-center"><div class="spinner-border" role="status"></div><p>Loading...</p></div>'; + + try { + // Build URL with optional parameters + let url = `/overlap/${protein1}/${protein2}`; + const params = new URLSearchParams(); + if (rotation) params.append('rotation', JSON.stringify(rotation)); + if (translation) params.append('translation', JSON.stringify(translation)); + if (viewType !== 'basic') { + params.append('view_type', viewType); + if (regions_to_display1.length > 0) { + params.append('regions_to_display1', JSON.stringify(regions_to_display1)); + console.log('Adding regions to display 1:', regions_to_display1); + } + if (regions_to_display2.length > 0) { + params.append('regions_to_display2', JSON.stringify(regions_to_display2)); + console.log('Adding regions to display 2:', regions_to_display2); + } + } + if (params.toString()) url += '?' + params.toString(); + + console.log('Overlap URL:', url); + + const response = await fetch(url); + const contentType = response.headers.get('content-type'); + + if (!response.ok) { + const text = await response.text(); + console.error('Error response:', text); + throw new Error(text || 'Unknown error occurred'); + } + + const html = await response.text(); + console.log('Response content type:', contentType); + console.log('Response length:', html.length); + + if (!html.trim()) { + throw new Error('Empty response received'); + } + + // Create a temporary container to parse the HTML + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + + // Find and extract the viewer content + const viewerContent = tempDiv.querySelector('#viewer') || tempDiv.querySelector('.viewer-container'); + if (viewerContent) { + contentDiv.innerHTML = viewerContent.outerHTML; + + // Execute any scripts in the content + const scripts = tempDiv.getElementsByTagName('script'); + Array.from(scripts).forEach(script => { + const newScript = document.createElement('script'); + // Wrap script content in an IIFE to avoid variable collision + newScript.textContent = `(function() { ${script.textContent} })();`; + Array.from(script.attributes).forEach(attr => { + newScript.setAttribute(attr.name, attr.value); + }); + contentDiv.appendChild(newScript); + }); + } else { + console.error('Response HTML:', html); + throw new Error('Could not find viewer content in response'); + } + } catch (error) { + console.error('Error:', error); + contentDiv.innerHTML = ` + <div class="alert alert-danger"> + <h5>Error</h5> + <p>${error.message}</p> + </div>`; + } + } + + // Add protein name tooltip functionality + document.addEventListener('DOMContentLoaded', function() { + const proteinLinks = document.querySelectorAll('.protein-id'); + const tooltipCache = new Map(); + + proteinLinks.forEach(link => { + link.addEventListener('mouseenter', async function() { + const proteinId = this.dataset.proteinId; + + if (tooltipCache.has(proteinId)) { + this.title = tooltipCache.get(proteinId); + return; + } + + try { + const response = await fetch(`/api/protein-name/${proteinId}`); + if (!response.ok) throw new Error('Failed to fetch protein name'); + const data = await response.json(); + tooltipCache.set(proteinId, data.name); + this.title = data.name; + } catch (error) { + console.error('Error fetching protein name:', error); + } + }); + }); + }); + </script> + """ + + return HTMLResponse( + get_protein_page_html( + protein_id=protein_id, + viewers=viewer_html, + tables_html=tables_html if tables_html else None, + has_results=has_results, + comparative_html="", # Set to empty string + additional_html=start_html + additional_js + ) + ) + except Exception as e: + error_msg = f"Error: {str(e)}" + logger.error(error_msg, exc_info=True) + return HTMLResponse( + content=f""" + <html> + <head> + <title>Error</title> + <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> + </head> + <body> + <div class="container mt-5"> + <div class="alert alert-danger"> + <h4 class="alert-heading">Error</h4> + <p>{error_msg}</p> + </div> + </div> + </body> + </html> + """, + status_code=500 + ) \ No newline at end of file diff --git a/app/routes/visualization.py b/app/routes/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..ff4f5845dfe1b63c77be3ae70e0cd44ad393bfb1 --- /dev/null +++ b/app/routes/visualization.py @@ -0,0 +1,1161 @@ +from fastapi import APIRouter, HTTPException, Query +from fastapi.responses import HTMLResponse +import json +import logging +import os +from pathlib import Path +import httpx +from typing import Optional, List +import py3Dmol + +from app.services.visualization_service import VisualizationService +from app.services.protein_service import ProteinService +from app.config.settings import PDB_DATABASE_DIR, METADATA_DIR, METADATA_2_DIR, DISCARDED_DIR + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["visualization"]) + +# Initialize service placeholders - these will be populated from main.py +visualization_service = None # Will be set from main.py +protein_service = ProteinService() # This one doesn't require app parameter + +@router.get("/overlap/{protein_id1}/{protein_id2}") +async def get_overlap_visualization( + protein_id1: str, + protein_id2: str, + rotation: str = None, + translation: str = None, + view_type: str = "basic", + regions_to_display1: str = None, + regions_to_display2: str = None +): + """Get overlap visualization for two proteins.""" + try: + logger.info(f"Creating overlap visualization for {protein_id1} and {protein_id2}") + logger.info(f"View type: {view_type}") + logger.debug(f"Regions to display 1: {regions_to_display1}") + logger.debug(f"Regions to display 2: {regions_to_display2}") + + # Parse regions if provided + regions_filter1 = json.loads(regions_to_display1) if regions_to_display1 else None + regions_filter2 = json.loads(regions_to_display2) if regions_to_display2 else None + + response = visualization_service.create_overlap_visualization( + protein_id1=protein_id1, + protein_id2=protein_id2, + pdb_dir=PDB_DATABASE_DIR, + rotation=rotation, + translation=translation, + view_type=view_type, + regions_filter1=regions_filter1, + regions_filter2=regions_filter2 + ) + + # Ensure we're returning HTMLResponse + if not isinstance(response, HTMLResponse): + logger.error(f"Invalid response type: {type(response)}") + return HTMLResponse( + content=f""" + <div class="alert alert-danger"> + Error: Invalid visualization response + </div> + """, + status_code=500 + ) + + return response + + except Exception as e: + logger.error(f"Error in overlap visualization: {str(e)}", exc_info=True) + return HTMLResponse( + content=f""" + <div class="alert alert-danger"> + Error creating visualization: {str(e)} + </div> + """, + status_code=500 + ) + +@router.get("/regions/{protein_id}") +async def regions_view(protein_id: str): + """ + Displays two side-by-side protein structure visualizations with regions highlighted, + without the results table, each using different metadata sources. + """ + try: + # Debug logging + logger.info(f"Loading regions view for protein ID: {protein_id}") + logger.info(f"METADATA_DIR path: {METADATA_DIR}") + logger.info(f"METADATA_2_DIR path: {METADATA_2_DIR}") + logger.info(f"Checking if metadata_2 file exists: {Path(METADATA_2_DIR) / f'AF-{protein_id}-F1-model_v4.json'}") + logger.info(f"File exists: {(Path(METADATA_2_DIR) / f'AF-{protein_id}-F1-model_v4.json').exists()}") + + logger.info(f"Loading protein structure for regions view: {protein_id}") + pdb_data = protein_service.get_structure(protein_id) + + # Create the regions viewer for the first panel (standard metadata) + viewer_html_1, description_1 = protein_service.create_viewer(pdb_data, "regions", protein_id) + + # Log the description to verify it's being received correctly + logger.info(f"Description from protein service (standard): {description_1}") + + # Process metadata for first panel (standard metadata from METADATA_DIR) + regions_1 = [] + regions_1_detail = [] + + try: + regions_file_1 = Path(f'{METADATA_DIR}/AF-{protein_id}-F1-model_v4.json') + + if regions_file_1.exists(): + with open(regions_file_1) as f: + json_data_1 = json.load(f) + + # Handle different JSON structures + if isinstance(json_data_1, dict): + regions_1 = json_data_1.get('regions', []) + elif isinstance(json_data_1, list): + regions_1 = json_data_1 + else: + regions_1 = [] + + # Store regions info for comparison + regions_1_detail = regions_1.copy() + + # Define colors for regions + colors = [ + '#4169E1', # Royal Blue + '#800080', # Purple + '#FFA500', # Orange + '#00CED1', # Dark Turquoise + '#FF1493', # Deep Pink + '#DAA520', # Goldenrod + '#8B4513', # Saddle Brown + '#4B0082', # Indigo + '#20B2AA', # Light Sea Blue + '#FF6347', # Tomato + '#9370DB', # Medium Purple + '#B8860B', # Dark Goldenrod + '#1E90FF', # Dodger Blue + '#DDA0DD', # Plum + '#FFD700', # Gold + ] + + # Generate colored names HTML for first panel + colored_names_1 = [] + fs_mm_region_name = 'A' + + for i, region in enumerate(regions_1): + if i >= len(colors): + break + + color = colors[i] + name = region.get('region_name', f'Region {i + 1}') + colored_names_1.append(f'{name} -> <span style="color: {color};"><strong>{fs_mm_region_name}</strong></span>') + fs_mm_region_name = chr(ord(fs_mm_region_name) + 1) + + # Create a custom description with colored parts for first panel + if colored_names_1: + description_1 = f"Identified parts {', '.join(colored_names_1)}." + + except Exception as e: + logger.warning(f"Could not generate custom description for panel 1: {str(e)}") + + # Process metadata for second panel (metadata_2 directory) + # Create a copy of the viewer for the second panel + viewer_html_2 = viewer_html_1 # Start with same view, will be changed if alt metadata exists + description_2 = "No alternative metadata available" + regions_2 = [] + regions_2_detail = [] + + try: + # Define the metadata_2 directory path similar to METADATA_DIR + metadata_2_dir = Path(METADATA_2_DIR) + + if not metadata_2_dir.exists(): + os.makedirs(metadata_2_dir, exist_ok=True) + + regions_file_2 = metadata_2_dir / f'AF-{protein_id}-F1-model_v4.json' + + if regions_file_2.exists(): + logger.info(f"Found alternative metadata file: {regions_file_2}") + with open(regions_file_2) as f: + json_data_2 = json.load(f) + + # Handle different JSON structures + if isinstance(json_data_2, dict): + regions_2 = json_data_2.get('regions', []) + elif isinstance(json_data_2, list): + regions_2 = json_data_2 + else: + regions_2 = [] + + # Store regions info for comparison + regions_2_detail = regions_2.copy() + + # Define colors for regions (same as for first panel) + colors = [ + '#4169E1', # Royal Blue + '#800080', # Purple + '#FFA500', # Orange + '#00CED1', # Dark Turquoise + '#FF1493', # Deep Pink + '#DAA520', # Goldenrod + '#8B4513', # Saddle Brown + '#4B0082', # Indigo + '#20B2AA', # Light Sea Blue + '#FF6347', # Tomato + '#9370DB', # Medium Purple + '#B8860B', # Dark Goldenrod + '#1E90FF', # Dodger Blue + '#DDA0DD', # Plum + '#FFD700', # Gold + ] + + # Generate colored names HTML for second panel + colored_names_2 = [] + fs_mm_region_name = 'A' + + for i, region in enumerate(regions_2): + if i >= len(colors): + break + + color = colors[i] + name = region.get('region_name', f'Region {i + 1}') + colored_names_2.append(f'{name} -> <span style="color: {color};"><strong>{fs_mm_region_name}</strong></span>') + fs_mm_region_name = chr(ord(fs_mm_region_name) + 1) + + # Create a custom description with colored parts for second panel + if colored_names_2: + description_2 = f"Identified parts {', '.join(colored_names_2)}." + else: + description_2 = "Alternative metadata available but no regions identified." + + # Create viewer for second panel + logger.info(f"Creating viewer for second panel with regions: {len(regions_2)}") + viewer_html_2, _ = protein_service.create_viewer(pdb_data, "regions", protein_id, metadata_dir=metadata_2_dir) + logger.info(f"Successfully created viewer for second panel") + else: + logger.warning(f"Alternative metadata file not found: {regions_file_2}") + description_2 = "No alternative metadata available" + + except Exception as e: + logger.warning(f"Could not generate view for second panel: {str(e)}") + description_2 = f"Error loading alternative metadata: {str(e)}" + + # Process domain data for third panel from TED API + viewer_html_3 = viewer_html_1 # Start with same view, will be changed when domains are found + description_3 = "Loading domain data from TED..." + + try: + # Fetch domain data from TED API using the summary endpoint + ted_api_url = f"https://ted.cathdb.info/api/v1/uniprot/summary/{protein_id}" + logger.info(f"Fetching domain data from TED API: {ted_api_url}") + + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(ted_api_url) + if response.status_code == 200: + # Process domain data and create viewer + ted_data = response.json() + logger.info(f"Successfully fetched domain data from TED API summary endpoint") + + # Debug log the API response structure + if isinstance(ted_data, dict): + logger.info(f"TED API response keys: {ted_data.keys()}") + if 'data' in ted_data and isinstance(ted_data['data'], list): + logger.info(f"Found {len(ted_data['data'])} domains in summary data") + + # For debugging, log the first domain details + if ted_data['data'] and isinstance(ted_data['data'][0], dict): + logger.info(f"First domain keys: {ted_data['data'][0].keys()}") + if 'chopping' in ted_data['data'][0]: + logger.info(f"Chopping format example: {ted_data['data'][0]['chopping']}") + + # Create the viewer for domain visualization + view = py3Dmol.view(width=450, height=450, js='https://3dmol.org/build/3Dmol-min.js') + view.addModel(pdb_data, "pdb") + + # Set base style + view.setStyle({'cartoon': {'color': '#EEEEEE', 'opacity': 0.5}}) + + # Define domain colors - use the same colors as region visualizer + colors = [ + '#4169E1', # Royal Blue + '#800080', # Purple + '#FFA500', # Orange + '#00CED1', # Dark Turquoise + '#FF1493', # Deep Pink + '#DAA520', # Goldenrod + '#8B4513', # Saddle Brown + '#4B0082', # Indigo + '#20B2AA', # Light Sea Blue + '#FF6347', # Tomato + '#9370DB', # Medium Purple + '#B8860B', # Dark Goldenrod + '#1E90FF', # Dodger Blue + '#DDA0DD', # Plum + '#FFD700', # Gold + ] + + # Parse domain data directly from the data array + domains = [] + + if isinstance(ted_data, dict) and 'data' in ted_data and isinstance(ted_data['data'], list): + for i, domain_data in enumerate(ted_data['data']): + try: + if isinstance(domain_data, dict) and 'chopping' in domain_data: + chopping = domain_data['chopping'] + + # Extract domain ID from ted_id field (e.g., "AF-P10911-F1-model_v4_TED01" -> "TED01") + domain_id = "Unknown" + if 'ted_id' in domain_data and isinstance(domain_data['ted_id'], str): + parts = domain_data['ted_id'].split('_') + if len(parts) > 0: + domain_id = parts[-1] # Get the last part + + # Parse the chopping field (format could be like "28-81" or "89-166_177-222") + if '_' in chopping: + # Handle ranges with gaps + sub_ranges = chopping.split('_') + for j, sub_range in enumerate(sub_ranges): + if '-' in sub_range: + start, end = map(int, sub_range.split('-')) + suffix = chr(97+j) if j > 0 else '' # 'a', 'b', etc. for split domains + domains.append({ + 'name': f"{domain_id}{suffix}", + 'type': domain_data.get('cath_label', 'Unknown'), + 'start': start, + 'end': end, + 'description': f"pLDDT: {domain_data.get('plddt', 'N/A')}" + }) + elif '-' in chopping: + # Handle simple ranges + start, end = map(int, chopping.split('-')) + domains.append({ + 'name': domain_id, + 'type': domain_data.get('cath_label', 'Unknown'), + 'start': start, + 'end': end, + 'description': f"pLDDT: {domain_data.get('plddt', 'N/A')}" + }) + except Exception as e: + logger.error(f"Error parsing domain {i}: {e}") + + logger.info(f"Processed {len(domains)} domains from summary data") + + # Process domains (if any) + if domains: + # Generate domain letters (A, B, C, ...) for display + fs_mm_region_name = 'A' + colored_names = [] + + for i, domain in enumerate(domains): + color = colors[i % len(colors)] + start = domain['start'] + end = domain['end'] + domain_letter = fs_mm_region_name + fs_mm_region_name = chr(ord(fs_mm_region_name) + 1) + + # Color the domain + view.setStyle({'resi': list(range(start, end + 1))}, + {'cartoon': {'color': color, 'opacity': 1.0}}) + + # Add domain boundary labels + view.addLabel(f"{start}", {'position': {'resi': start}, + 'backgroundColor': color, + 'fontColor': 'white', + 'fontSize': 10}) + view.addLabel(f"{end}", {'position': {'resi': end}, + 'backgroundColor': color, + 'fontColor': 'white', + 'fontSize': 10}) + + # Add to colored names list for description + colored_names.append(f"{domain['name']} -> <span style=\"color: {color};\"><strong>{domain_letter}</strong></span> ({start}-{end})") + + # Generate domain description showing all domains with colors + description_3 = f"Found {len(domains)} domains: {', '.join(colored_names)}" + else: + # Fallback to a basic viewer with a message + view.setStyle({'cartoon': {'color': '#32CD32'}}) + description_3 = "No domain information found in TED API data - showing basic structure" + + # Finalize viewer + view.zoomTo() + view.zoom(1.3) + view.setBackgroundColor('white') + + # Get HTML + viewer_html_3 = view._make_html() + viewer_html_3 = viewer_html_3.replace('async src="', 'src="') + else: + # Create fallback viewer if API request failed + view = py3Dmol.view(width=450, height=450, js='https://3dmol.org/build/3Dmol-min.js') + view.addModel(pdb_data, "pdb") + view.setStyle({'cartoon': {'color': '#32CD32'}}) + view.zoomTo() + view.setBackgroundColor('white') + viewer_html_3 = view._make_html() + viewer_html_3 = viewer_html_3.replace('async src="', 'src="') + description_3 = f"Error fetching domain data: {response.status_code}" + except Exception as e: + logger.warning(f"Could not fetch or process domain data: {str(e)}", exc_info=True) + description_3 = f"Error loading domain data: {str(e)}" + # Keep using the basic viewer for the domain panel + viewer_html_3 = viewer_html_1 + + # Generate region comparison HTML + comparison_html = generate_region_comparison_html(regions_1_detail, regions_2_detail, colors, protein_id) + + # Create a simplified HTML template for this view with side-by-side panels + html_content = f""" + <html> + <head> + <title>Protein Regions - {protein_id}</title> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <!-- Load CSS --> + <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> + <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.7.2/font/bootstrap-icons.css"> + + <!-- Load Scripts --> + <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> + <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script> + <script src="https://3dmol.org/build/3Dmol-min.js"></script> + <script src="https://3dmol.org/build/3Dmol.ui-min.js"></script> + + <style> + body {{ + font-family: Arial, sans-serif; + padding: 0.5rem; + max-width: 1500px; + margin: 0 auto; + }} + .home-button-container {{ + padding: 0.5rem; + position: absolute; + top: 0; + left: 0; + z-index: 100; + }} + .protein-button-container {{ + padding: 0.5rem; + position: absolute; + top: 0; + right: 0; + z-index: 100; + }} + .viewer-container {{ + margin: 1rem auto; + max-width: 95%; + padding: 0 0.5rem; + }} + .container-fluid {{ + max-width: 1500px; + margin: 0 auto; + padding: 0 2rem; + }} + .viewer-card {{ + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + margin-bottom: 1rem; + border-radius: 8px; + overflow: hidden; + height: 100%; + }} + .viewer-heading {{ + background-color: #f8f9fa; + padding: 0.5rem 1rem; + border-bottom: 1px solid #e9ecef; + }} + .viewer-heading h3 {{ + margin: 0; + font-size: 1.1rem; + }} + .viewer-body {{ + padding: 0.5rem; + display: flex; + justify-content: center; + max-height: 470px; + min-height: 470px; + overflow: hidden; + }} + /* Force all viewers to have identical dimensions */ + .viewer-body iframe {{ + width: 100% !important; + height: 450px !important; + min-width: 100% !important; + border: none !important; + overflow: hidden !important; + max-width: 100% !important; + }} + /* Ensure all molecule panels have appropriate size */ + .col-md-4 {{ + display: flex; + flex-direction: column; + }} + .molecules-row {{ + display: flex; + flex-wrap: wrap; + margin-top: 0.25rem; + margin-bottom: 0.25rem; + }} + .info-panel {{ + background-color: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 8px; + padding: 0.5rem; + margin-bottom: 0.5rem; + font-size: 0.8rem; + box-shadow: 0 2px 4px rgba(0,0,0,0.05); + }} + .info-panel-title {{ + font-weight: bold; + margin-bottom: 0.2rem; + font-size: 0.9rem; + color: #495057; + }} + .comparison-table {{ + width: 100%; + border-collapse: collapse; + margin-top: 20px; + margin-bottom: 20px; + border: 1px solid #e9ecef; + border-radius: 8px; + overflow: hidden; + }} + .comparison-table th, .comparison-table td {{ + padding: 8px 12px; + text-align: left; + vertical-align: middle; + border: 1px solid #e9ecef; + }} + .comparison-table th {{ + background-color: #f8f9fa; + font-weight: bold; + }} + .comparison-table tr:nth-child(even) {{ + background-color: #f8f9fa; + }} + .region-comparison-wrapper {{ + margin-top: 20px; + margin-bottom: 20px; + padding: 0.5rem; + background-color: white; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); + }} + .region-arrow {{ + font-size: 1.5rem; + color: #6c757d; + text-align: center; + }} + .btn-outline-info {{ + color: #00bcd4; + border-color: #00bcd4; + }} + .btn-outline-info:hover {{ + background-color: #00bcd4; + color: white; + }} + </style> + </head> + <body> + <div class="home-button-container"> + <a href="/" class="btn btn-outline-primary btn-sm"> + <i class="bi bi-house-door"></i> Home + </a> + </div> + + <div class="protein-button-container"> + <a href="/protein/{protein_id}" class="btn btn-outline-info btn-sm"> + <i class="bi bi-search"></i> Protein Search + </a> + </div> + + <div class="container-fluid"> + <div class="row justify-content-center"> + <div class="col-12 text-center"> + <h3 class="mt-3 mb-2">Analysis of Regions for {protein_id}</h3> + <p class="small text-muted mb-3">Disordered parts cut out, stable parts joined</p> + </div> + </div> + + <div class="viewer-container"> + <div class="row molecules-row"> + <!-- First Panel --> + <div class="col-md-4 mb-3"> + <!-- Info Panel --> + <div class="info-panel mb-2"> + <div class="info-panel-title">PAE<15 and region size>10</div> + <div>{description_1}</div> + </div> + + <div class="viewer-card"> + <div class="viewer-heading"> + <h3>PAE<15 and region size>10</h3> + </div> + <div class="viewer-body"> + {viewer_html_1} + </div> + </div> + </div> + + <!-- Second Panel --> + <div class="col-md-4 mb-3"> + <!-- Info Panel --> + <div class="info-panel mb-2"> + <div class="info-panel-title">PAE<8 and region size>30</div> + <div>{description_2}</div> + </div> + + <div class="viewer-card"> + <div class="viewer-heading"> + <h3>PAE<8 and region size>30</h3> + </div> + <div class="viewer-body"> + {viewer_html_2} + </div> + </div> + </div> + + <!-- Third Panel (Domain Data) --> + <div class="col-md-4 mb-3"> + <!-- Info Panel --> + <div class="info-panel mb-2"> + <div class="info-panel-title">Protein Domains from TED <a href="https://ted.cathdb.info/uniprot/{protein_id}" target="_blank" class="small ms-2">(View in TED <i class="bi bi-box-arrow-up-right"></i>)</a></div> + <div>{description_3}</div> + </div> + + <div class="viewer-card"> + <div class="viewer-heading"> + <h3>Protein Domains</h3> + </div> + <div class="viewer-body"> + {viewer_html_3} + </div> + </div> + </div> + </div> + + <!-- Region Comparison Section --> + <div class="region-comparison-wrapper"> + <h4 class="text-center mb-3">Region Comparisons</h4> + {comparison_html} + </div> + </div> + + <div class="row justify-content-center mt-2"> + <div class="col-auto"> + <a href="/protein/{protein_id}" class="btn btn-primary btn-sm"> + View Full Protein Page + </a> + </div> + </div> + </div> + </body> + </html> + """ + + return HTMLResponse(content=html_content) + except Exception as e: + logger.error(f"Error in regions view: {str(e)}", exc_info=True) + return HTMLResponse( + content=f""" + <html> + <head> + <title>Error</title> + <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> + </head> + <body> + <div class="container mt-5"> + <div class="alert alert-danger"> + <h4 class="alert-heading">Error</h4> + <p>{str(e)}</p> + </div> + </div> + </body> + </html> + """, + status_code=500 + ) + +@router.get("/results_visualization/{protein_id}") +async def results_visualization(protein_id: str): + """Handle the results visualization part of the protein view.""" + try: + # This is a placeholder for the results_visualization route + # The actual implementation would load results and create visualizations + # For now, we'll return a simple HTML message + return HTMLResponse( + content=f""" + <html> + <head> + <title>Results Visualization</title> + <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"> + </head> + <body> + <div class="container mt-5"> + <div class="alert alert-info"> + <h4 class="alert-heading">Results Visualization</h4> + <p>Results visualization for {protein_id}</p> + </div> + </div> + </body> + </html> + """ + ) + except Exception as e: + logger.error(f"Error in results visualization: {str(e)}", exc_info=True) + return HTMLResponse( + content=f""" + <div class="alert alert-danger"> + Error loading results: {str(e)} + </div> + """, + status_code=500 + ) + +def generate_region_comparison_html(regions_1, regions_2, colors, protein_id=None): + """ + Generate HTML table to compare regions between the two panels. + + Args: + regions_1: Region data from the first panel (PAE<15) + regions_2: Region data from the second panel (PAE<8) + colors: List of colors used for region highlighting + protein_id: The ID of the protein being visualized (needed for discarded regions) + + Returns: + HTML string for the comparison table + """ + if not regions_1 and not regions_2: + return "<p class='text-center'>No region data available for comparison</p>" + + logger.info(f"Generating region comparison with {len(regions_1)} regions from panel 1 and {len(regions_2)} regions from panel 2") + + # Process actual data from metadata files + # Map regions by their names for easier comparison + regions_1_by_name = {} + regions_2_by_name = {} + + # Map for storing reasons why regions might be discarded + discarded_regions_info = {} + + # Function to extract individual region letters from compound names + def extract_region_letters(region_name): + if '-' in region_name: + return region_name.split('-') + return [region_name] + + # Try to load discarded regions information from the discarded JSON file + try: + discarded_dir = Path(DISCARDED_DIR) + discarded_file = discarded_dir / f'AF-{protein_id}-F1-model_v4_discarded.json' + + if discarded_file.exists(): + logger.info(f"Loading discarded regions from: {discarded_file}") + with open(discarded_file) as f: + discarded_data = json.load(f) + + # Process the discarded regions + if 'discarded_regions' in discarded_data and isinstance(discarded_data['discarded_regions'], list): + for region in discarded_data['discarded_regions']: + if 'region_name' in region and 'pae' in region: + region_name = region['region_name'] + pae_value = region['pae'] + + # Store individual letters for compound regions + letters = extract_region_letters(region_name) + for letter in letters: + discarded_regions_info[letter] = { + 'pae': pae_value, + 'reason': f"PAE: {pae_value:.2f}" + } + logger.info(f"Found discarded region {letter} with PAE: {pae_value}") + else: + logger.info(f"No discarded regions file found at: {discarded_file}") + except Exception as e: + logger.warning(f"Error reading discarded regions JSON: {str(e)}") + + # Process regions from first panel (PAE<15) + for i, region in enumerate(regions_1): + # Extract region data + region_name = region.get('region_name', f'Region {i+1}') + + # Check if this is a compound region (with region_ranges) + if 'region_ranges' in region and isinstance(region['region_ranges'], list): + # For compound regions with multiple ranges + letters = extract_region_letters(region_name) + + # Handle each part of the compound region + for j, letter in enumerate(letters): + # Get the appropriate range for this subregion + if j < len(region['region_ranges']): + range_pair = region['region_ranges'][j] + if len(range_pair) == 2: + start = range_pair[0] + end = range_pair[1] + else: + # Default values if range isn't valid + start = 0 + end = 0 + else: + # If there aren't enough ranges, use the first/last value + if len(region['region_ranges']) > 0: + if j == 0: + # First letter gets the first range + start = region['region_ranges'][0][0] + end = region['region_ranges'][0][1] + else: + # Last letter gets the last range + start = region['region_ranges'][-1][0] + end = region['region_ranges'][-1][1] + else: + start = 0 + end = 0 + + # Store data for this individual region letter + regions_1_by_name[letter] = { + 'name': letter, + 'full_name': region_name, + 'start': start, + 'end': end, + 'size': end - start + 1, + 'mean_plddt': region.get('mean_plddt', 0), + 'pae': region.get('mean_pae', region.get('pae', 0)), + 'original_region': region + } + + logger.info(f"Panel 1 region part {letter} (from {region_name}): Start: {start}, End: {end}") + else: + # For simple regions with direct start/end values + start = region.get('start_residue', region.get('start', 0)) + end = region.get('end_residue', region.get('end', 0)) + + # Store data for this individual region + regions_1_by_name[region_name] = { + 'name': region_name, + 'full_name': region_name, + 'start': start, + 'end': end, + 'size': end - start + 1, + 'mean_plddt': region.get('mean_plddt', 0), + 'pae': region.get('mean_pae', region.get('pae', 0)), + 'original_region': region + } + + logger.info(f"Panel 1 region: {region_name}, Start: {start}, End: {end}") + + # Process regions from second panel (PAE<8) + for i, region in enumerate(regions_2): + # Extract region data + region_name = region.get('region_name', f'Region {i+1}') + + # Check if this is a compound region (with region_ranges) + if 'region_ranges' in region and isinstance(region['region_ranges'], list): + # For compound regions with multiple ranges + letters = extract_region_letters(region_name) + + # Handle each part of the compound region + for j, letter in enumerate(letters): + # Get the appropriate range for this subregion + if j < len(region['region_ranges']): + range_pair = region['region_ranges'][j] + if len(range_pair) == 2: + start = range_pair[0] + end = range_pair[1] + else: + # Default values if range isn't valid + start = 0 + end = 0 + else: + # If there aren't enough ranges, use the first/last value + if len(region['region_ranges']) > 0: + if j == 0: + # First letter gets the first range + start = region['region_ranges'][0][0] + end = region['region_ranges'][0][1] + else: + # Last letter gets the last range + start = region['region_ranges'][-1][0] + end = region['region_ranges'][-1][1] + else: + start = 0 + end = 0 + + # Store data for this individual region letter + regions_2_by_name[letter] = { + 'name': letter, + 'full_name': region_name, + 'start': start, + 'end': end, + 'size': end - start + 1, + 'mean_plddt': region.get('mean_plddt', 0), + 'pae': region.get('mean_pae', region.get('pae', 0)), + 'original_region': region + } + + logger.info(f"Panel 2 region part {letter} (from {region_name}): Start: {start}, End: {end}") + else: + # For simple regions with direct start/end values + start = region.get('start_residue', region.get('start', 0)) + end = region.get('end_residue', region.get('end', 0)) + + # Store data for this individual region + regions_2_by_name[region_name] = { + 'name': region_name, + 'full_name': region_name, + 'start': start, + 'end': end, + 'size': end - start + 1, + 'mean_plddt': region.get('mean_plddt', 0), + 'pae': region.get('mean_pae', region.get('pae', 0)), + 'original_region': region + } + + logger.info(f"Panel 2 region: {region_name}, Start: {start}, End: {end}") + + # Get all unique region keys from both panels + all_region_keys = sorted(set(list(regions_1_by_name.keys()) + list(regions_2_by_name.keys()))) + logger.info(f"All unique region keys: {all_region_keys}") + + # Dynamically generate metaregion mappings based on the order of regions in each panel + # This emulates how the visualization panels assign letters to regions + panel1_metaregions = {} + panel2_metaregions = {} + + # For left panel (panel 1): Recreate the mapping used in the visualization + # Letters are assigned in the exact same way as in the UI description + letter_index = 0 + for i, region in enumerate(regions_1): + if i >= len(colors): # Match the same max length check in the UI + break + + name = region.get('region_name', f'Region {i + 1}') + assigned_letter = chr(ord('A') + letter_index) + + # Handle complex regions (with dashes) + if '-' in name: + parts = name.split('-') + # Map all parts of the compound region to the same assigned letter + for part in parts: + panel1_metaregions[part] = assigned_letter + else: + # Handle simple regions (no dashes) + panel1_metaregions[name] = assigned_letter + + # Increment the letter index after processing each region (compound or simple) + letter_index += 1 + + # For right panel (panel 2): Recreate the mapping used in the visualization + letter_index = 0 + for i, region in enumerate(regions_2): + if i >= len(colors): # Match the same max length check in the UI + break + + name = region.get('region_name', f'Region {i + 1}') + assigned_letter = chr(ord('A') + letter_index) + + # Handle complex regions (with dashes) + if '-' in name: + parts = name.split('-') + # Map all parts of the compound region to the same assigned letter + for part in parts: + panel2_metaregions[part] = assigned_letter + else: + # Handle simple regions (no dashes) + panel2_metaregions[name] = assigned_letter + + # Increment the letter index after processing each region (compound or simple) + letter_index += 1 + + logger.info(f"Panel 1 metaregions: {panel1_metaregions}") + logger.info(f"Panel 2 metaregions: {panel2_metaregions}") + + # Map all region keys to their metaregion information + metaregion_mappings = {} + + for region_key in all_region_keys: + # Initialize with default values + metaregion_mappings[region_key] = { + "left": "Not present", + "right": "Not present", + "left_metaregion": None, + "right_metaregion": None, + "left_pae": None, + "right_pae": None, + "discarded_reason": None + } + + # Update with panel 1 (left) metaregion mapping + if region_key in regions_1_by_name: + region_data = regions_1_by_name[region_key] + meta_letter = panel1_metaregions.get(region_key, region_key) + original_name = region_data.get('full_name', region_key) + + metaregion_mappings[region_key]["left"] = f"{original_name} -> {meta_letter}" + metaregion_mappings[region_key]["left_metaregion"] = meta_letter + metaregion_mappings[region_key]["left_pae"] = region_data.get('pae') + + # Update with panel 2 (right) metaregion mapping + if region_key in regions_2_by_name: + region_data = regions_2_by_name[region_key] + meta_letter = panel2_metaregions.get(region_key, region_key) + original_name = region_data.get('full_name', region_key) + + metaregion_mappings[region_key]["right"] = f"{original_name} -> {meta_letter}" + metaregion_mappings[region_key]["right_metaregion"] = meta_letter + metaregion_mappings[region_key]["right_pae"] = region_data.get('pae') + elif region_key in discarded_regions_info: + # Add reason why region was discarded + metaregion_mappings[region_key]["discarded_reason"] = discarded_regions_info[region_key]['reason'] + + # Check for regions that are in panel 1 but not in panel 2, and add PAE information + for region_key in regions_1_by_name: + if region_key not in regions_2_by_name and region_key not in discarded_regions_info: + region_data = regions_1_by_name[region_key] + pae = region_data.get('pae') + size = region_data.get('size', 0) + + # Store the discarded reason with PAE information + if pae is not None: + reason = [] + if pae > 8: + reason.append(f"PAE {pae:.2f} > 8") + if size < 30: + reason.append(f"Size {size} < 30") + if not reason: + reason = [f"PAE: {pae:.2f}"] + + discarded_regions_info[region_key] = { + 'reason': ", ".join(reason), + 'details': region_data, + 'pae': pae + } + + # Update the mapping for this region + if region_key in metaregion_mappings: + metaregion_mappings[region_key]["discarded_reason"] = discarded_regions_info[region_key]['reason'] + + # Start building the HTML table + html = """ + <div class="table-responsive"> + <table class="comparison-table"> + <thead> + <tr> + <th>Region</th> + <th>Start</th> + <th>End</th> + <th>Size</th> + <th>Forms Metaregion (Left Panel)</th> + <th>Forms Metaregion (Right Panel)</th> + </tr> + </thead> + <tbody> + """ + + # Generate rows for each region + for region_key in all_region_keys: + mappings = metaregion_mappings[region_key] + left_metaregion = mappings["left"] + right_metaregion = mappings["right"] + + # Get region boundaries - first try panel 1, then panel 2 + region_data = regions_1_by_name.get(region_key, regions_2_by_name.get(region_key, {})) + region_start = region_data.get('start', '-') + region_end = region_data.get('end', '-') + region_size = region_data.get('size', '-') + + # Extract just the letter after the arrow for left panel color coding + left_color = "#cccccc" + left_letter = None + if "->" in left_metaregion: + left_parts = left_metaregion.split("->") + left_base = left_parts[0].strip() + left_letter = left_parts[1].strip().split(" ")[0] + + # Get color index based on the letter (A=0, B=1, etc.) + left_index = ord(left_letter) - ord('A') + if 0 <= left_index < len(colors): + left_color = colors[left_index] + + # Extract just the letter after the arrow for right panel color coding + right_color = "#cccccc" + right_letter = None + if "->" in right_metaregion: + right_parts = right_metaregion.split("->") + right_base = right_parts[0].strip() + right_letter = right_parts[1].strip().split(" ")[0] + + right_index = ord(right_letter) - ord('A') + if 0 <= right_index < len(colors): + right_color = colors[right_index] + + # Format left metaregion with color only on the letter after arrow + if "->" in left_metaregion: + parts = left_metaregion.split("->") + left_base = parts[0].strip() + left_letter = parts[1].strip().split(" ")[0] + + # Format left metaregion with color only on the letter + left_colored = f"{left_base} -> <span style='color: {left_color};'><strong>{left_letter}</strong></span>" + + # Don't add PAE information for left panel as requested + else: + left_colored = left_metaregion + + # Format right metaregion with color only on the letter after arrow + if "->" in right_metaregion: + parts = right_metaregion.split("->") + right_base = parts[0].strip() + right_letter = parts[1].strip().split(" ")[0] + + # Format right metaregion with color only on the letter + right_colored = f"{right_base} -> <span style='color: {right_color};'><strong>{right_letter}</strong></span>" + + # Add PAE information if available for panel 2 + right_pae = mappings.get("right_pae") + if right_pae is not None: + right_colored += f" (PAE: {right_pae:.2f})" + else: + # For regions that are not present, add discarded reason if available + right_colored = right_metaregion + if mappings.get("discarded_reason"): + # Check if we have specific PAE information + pae_value = None + + if region_key in discarded_regions_info: + pae_value = discarded_regions_info[region_key].get('pae') + elif region_key in regions_1_by_name: + pae_value = regions_1_by_name[region_key].get('pae') + + # Format with only PAE value when available + if pae_value is not None: + right_colored += f" <span style='color: #FF6347; font-size: 0.9em;'>(PAE: {pae_value:.2f})</span>" + else: + right_colored += f" <span style='color: #FF6347; font-size: 0.9em;'>({mappings['discarded_reason']})</span>" + + # Add row to table + html += f""" + <tr> + <td><strong>{region_key}</strong></td> + <td>{region_start}</td> + <td>{region_end}</td> + <td>{region_size}</td> + <td>{left_colored}</td> + <td>{right_colored}</td> + </tr> + """ + + # Close the table + html += """ + </tbody> + </table> + </div> + """ + + return html \ No newline at end of file diff --git a/app/services/protein_service.py b/app/services/protein_service.py index c52da7d1f4047269824ee52917bbee2b6d72d7d0..da0c7315e060ae60a2591debdde44b172c027799 100644 --- a/app/services/protein_service.py +++ b/app/services/protein_service.py @@ -8,7 +8,7 @@ from app.utils.logging import logger from pathlib import Path import numpy as np import os -from app.config.settings import PDB_DATABASE_DIR, HIGH_PLDDT_PDB_DATABASE_DIR, DATA_DIR, METADATA_DIR +from app.config.settings import PDB_DATABASE_DIR, HIGH_PLDDT_PDB_DATABASE_DIR, DATA_DIR, METADATA_DIR, ALPHAFOLD_BASE_URL, CACHE_DIR, STRUCTURES_DIR class ProteinService: def get_structure(self, protein_id: str, high_plddt: bool = False) -> str: @@ -198,13 +198,15 @@ class ProteinService: return ranges - def create_region_viewer(self, pdb_data: str, protein_id: str, width: int = 450, height: int = 450) -> tuple[str, str]: + def create_region_viewer(self, pdb_data: str, protein_id: str, width: int = 450, height: int = 450, metadata_dir: Path = None) -> tuple[str, str]: """Create a protein visualization with colored regions""" view = py3Dmol.view(width=width, height=height, js='https://3dmol.org/build/3Dmol-min.js') view.addModel(pdb_data, "pdb") try: - regions_file = Path(f'{METADATA_DIR}/AF-{protein_id}-F1-model_v4.json') + # Use provided metadata_dir or default to METADATA_DIR + metadata_directory = metadata_dir if metadata_dir is not None else METADATA_DIR + regions_file = Path(f'{metadata_directory}/AF-{protein_id}-F1-model_v4.json') if not regions_file.exists(): logger.warning(f"No metadata file found for protein {protein_id}, looked in {regions_file}") @@ -316,10 +318,10 @@ class ProteinService: html = html.replace('async src="', 'src="') return html, "Basic structure view (regions not available)" - def create_viewer(self, pdb_data: str, view_type: str = "basic", protein_id: str = None) -> tuple[str, str]: + def create_viewer(self, pdb_data: str, view_type: str = "basic", protein_id: str = None, metadata_dir: Path = None) -> tuple[str, str]: """Create a protein viewer based on view type""" if view_type == "regions" and protein_id: - return self.create_region_viewer(pdb_data, protein_id) + return self.create_region_viewer(pdb_data, protein_id, metadata_dir=metadata_dir) if view_type == "disordered": try: # Get both the high pLDDT structure and full structure