From a38cb2d5aed419ac20f74506232aa4fd129519ae Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Thu, 6 Nov 2025 10:09:50 -0800
Subject: [PATCH 01/20] feat: load processing_manifest.json from main folder if
 necessary

---
 src/see_spot/app.py      | 39 ++++++++++++++++++++++++---------------
 src/see_spot/s3_utils.py | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index ceed09e..492da75 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -19,7 +19,8 @@
 from see_spot.s3_utils import (
     find_unmixed_spots_file, find_related_files,
     load_ratios_from_s3, load_summary_stats_from_s3,
-    load_processing_manifest_from_s3, load_and_merge_spots_from_s3
+    load_processing_manifest_from_s3, load_and_merge_spots_from_s3,
+    find_processing_manifest
 )
 
 logging.basicConfig(level=logging.INFO)
@@ -265,10 +266,14 @@ async def get_real_spots_data(
         processing_manifest = df_cache.get("processing_manifest")
         spot_channels_from_manifest = df_cache.get("spot_channels_from_manifest")
         if not processing_manifest or not spot_channels_from_manifest:
-            # Construct manifest path and load
-            manifest_key = f"{DATA_PREFIX}/derived/processing_manifest.json"
-            logger.info(f"Attempting to load processing manifest from: s3://{S3_BUCKET}/{manifest_key}")
-            processing_manifest = load_processing_manifest_from_s3(S3_BUCKET, manifest_key)
+            # Find manifest in either top level or derived folder
+            manifest_key = find_processing_manifest(S3_BUCKET, DATA_PREFIX)
+            if not manifest_key:
+                logger.error(f"Could not find processing_manifest.json for dataset {DATA_PREFIX}")
+                spot_channels_from_manifest = []
+            else:
+                logger.info(f"Attempting to load processing manifest from: s3://{S3_BUCKET}/{manifest_key}")
+                processing_manifest = load_processing_manifest_from_s3(S3_BUCKET, manifest_key)
             if processing_manifest and "spot_channels" in processing_manifest:
                 spot_channels_from_manifest = processing_manifest["spot_channels"]
                 df_cache["processing_manifest"] = processing_manifest
@@ -280,7 +285,11 @@ async def get_real_spots_data(
     else:
         # Need to load DataFrame from S3
         # 1. Load processing manifest to determine paths and channels
-        manifest_key = f"{DATA_PREFIX}/derived/processing_manifest.json"
+        manifest_key = find_processing_manifest(S3_BUCKET, DATA_PREFIX)
+        if not manifest_key:
+            logger.error(f"Could not find processing_manifest.json for dataset {DATA_PREFIX}.")
+            return JSONResponse(status_code=500, content={'error': 'Failed to find processing manifest'})
+        
         logger.info(f"Attempting to load processing manifest from: s3://{S3_BUCKET}/{manifest_key}")
         processing_manifest = load_processing_manifest_from_s3(S3_BUCKET, manifest_key)
 
@@ -586,22 +595,22 @@ async def download_dataset(request: Request):
             return JSONResponse(status_code=400, content={"error": "Dataset name is required"})
         
         # Check if dataset exists on S3 by looking for the processing manifest
-        manifest_key = f"{dataset_name}/derived/processing_manifest.json"
+        manifest_key = find_processing_manifest(S3_BUCKET, dataset_name)
         
-        logger.info(f"Checking if dataset exists: s3://{S3_BUCKET}/{manifest_key}")
-        
-        # Try to get the manifest to verify the dataset exists
-        manifest_content = s3_handler.get_object(key=manifest_key, bucket_name=S3_BUCKET)
-        
-        if manifest_content is None:
+        if not manifest_key:
             return JSONResponse(
                 status_code=404, 
                 content={
-                    "error": f"Dataset not found on S3",
-                    "checked_path": f"s3://{S3_BUCKET}/{manifest_key}"
+                    "error": f"Dataset not found on S3 - processing_manifest.json not found",
+                    "checked_paths": [
+                        f"s3://{S3_BUCKET}/{dataset_name}/processing_manifest.json",
+                        f"s3://{S3_BUCKET}/{dataset_name}/derived/processing_manifest.json"
+                    ]
                 }
             )
         
+        logger.info(f"Found dataset manifest at: s3://{S3_BUCKET}/{manifest_key}")
+        
         # Download the processing manifest first
         manifest_local_path = s3_handler.download_file(
             key=manifest_key,
diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index 93f55fa..f65f655 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -14,6 +14,41 @@
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
+
+def find_processing_manifest(bucket: str, dataset_name: str) -> Optional[str]:
+    """
+    Find the processing_manifest.json file in either the top level or derived folder.
+    
+    Args:
+        bucket: S3 bucket name
+        dataset_name: Dataset name/prefix
+        
+    Returns:
+        Full S3 key to the manifest file, or None if not found
+    """
+    # Try both possible locations
+    possible_paths = [
+        f"{dataset_name}/processing_manifest.json",  # Top level
+        f"{dataset_name}/derived/processing_manifest.json"  # Derived folder
+    ]
+    
+    logger.info(f"Searching for processing_manifest.json in dataset '{dataset_name}'")
+    
+    for manifest_key in possible_paths:
+        logger.info(f"Checking: s3://{bucket}/{manifest_key}")
+        try:
+            # Try to get metadata (faster than downloading)
+            metadata = s3_handler.get_object_metadata(key=manifest_key, bucket_name=bucket)
+            if metadata is not None:
+                logger.info(f"Found processing manifest at: {manifest_key}")
+                return manifest_key
+        except Exception as e:
+            logger.debug(f"Manifest not found at {manifest_key}: {e}")
+            continue
+    
+    logger.warning(f"Could not find processing_manifest.json in any expected location for dataset '{dataset_name}'")
+    return None
+
 def optimize_dtypes(df: pl.DataFrame) -> pl.DataFrame:
     """Optimize DataFrame dtypes to reduce memory usage.
     

From 1f52f60a91d6016c05a0161d00ac1fddaf61c864 Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Thu, 6 Nov 2025 14:37:20 -0800
Subject: [PATCH 02/20] added pointer to new neuroglancer file (stitched)

---
 src/see_spot/app.py      |  68 ++++++++++++++++++-----
 src/see_spot/ng_utils.py | 116 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 15 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 492da75..22f693a 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -49,7 +49,8 @@
     "target_key": None,
     "processing_manifest": None,
     "spot_channels_from_manifest": None,
-    "sankey_data": None  # Cache Sankey data to avoid recalculation
+    "sankey_data": None,  # Cache Sankey data to avoid recalculation
+    "unmixed_spots_filename": None  # Store unmixed spots filename for neuroglancer logic
 }
 
 
@@ -344,6 +345,11 @@ async def get_real_spots_data(
         S3_BUCKET, related_files_prefix, "unmixed_spots_*.pkl"
     )
     
+    # Store the unmixed spots filename in cache for neuroglancer logic
+    if unmixed_target_key:
+        df_cache["unmixed_spots_filename"] = Path(unmixed_target_key).name
+        logger.info(f"Cached unmixed spots filename: {df_cache['unmixed_spots_filename']}")
+    
     if unmixed_target_key:
         related_files = find_related_files(S3_BUCKET, related_files_prefix, unmixed_target_key)
         logger.info(f"Searching for related files in '{related_files_prefix}'. Found: {related_files}")
@@ -518,29 +524,61 @@ async def create_neuroglancer_link(request: Request):
     cell_id = data.get("cell_id", 42)  # Default value if not provided
     spot_id = data.get("spot_id")
     annotation_color = data.get("annotation_color", "#FFFF00")
-    cross_section_scale = data.get("cross_section_scale", 1.0)
+    cross_section_scale = data.get("cross_section_scale", "0.135")
     
     # Input validation
-    if not fused_s3_paths or not position or not point_annotation or not spot_id:
+    if not position or not point_annotation or not spot_id:
         return JSONResponse(
             status_code=400,
-            content={"error": "Missing required parameters: fused_s3_paths, position, point_annotation, or spot_id"}
+            content={"error": "Missing required parameters: position, point_annotation, or spot_id"}
         )
     
     try:
         # Import the ng_utils module
         from see_spot import ng_utils
         
-        # Create the neuroglancer link
-        ng_link = ng_utils.create_link_no_upload(
-            fused_s3_paths,
-            annotation_color=annotation_color,
-            cross_section_scale=cross_section_scale,
-            cell_id=cell_id,
-            spot_id=spot_id,
-            position=position,
-            point_annotation=point_annotation
-        )
+        # Check if we should use the JSON-based method (when "merged" is in the pkl filename)
+        unmixed_spots_filename = df_cache.get("unmixed_spots_filename") or ""
+        use_json_method = "merged" in unmixed_spots_filename.lower()
+        
+        if use_json_method:
+            # Use the JSON-based method for merged datasets
+            logger.info(f"Using create_link_from_json method for merged dataset (filename: {unmixed_spots_filename})")
+            
+            # Construct the neuroglancer JSON path
+            ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/phase_correlation_stitching_neuroglancer.json"
+            logger.info(f"Neuroglancer JSON path: {ng_json_path}")
+            
+            # Create the neuroglancer link from JSON
+            ng_link = ng_utils.create_link_from_json(
+                ng_json_path=ng_json_path,
+                position=position,
+                spot_id=spot_id,
+                point_annotation=point_annotation,
+                annotation_color=annotation_color,
+                spacing=3.0,
+                cross_section_scale=cross_section_scale
+            )
+        else:
+            # Use the traditional method for non-merged datasets
+            logger.info(f"Using create_link_no_upload method for non-merged dataset (filename: {unmixed_spots_filename})")
+            
+            if not fused_s3_paths:
+                return JSONResponse(
+                    status_code=400,
+                    content={"error": "Missing required parameter: fused_s3_paths (required for non-merged datasets)"}
+                )
+            
+            # Create the neuroglancer link
+            ng_link = ng_utils.create_link_no_upload(
+                fused_s3_paths,
+                annotation_color=annotation_color,
+                cross_section_scale=cross_section_scale,
+                cell_id=cell_id,
+                spot_id=spot_id,
+                position=position,
+                point_annotation=point_annotation
+            )
         
         return {"url": ng_link}
     except Exception as e:
@@ -631,7 +669,7 @@ async def download_dataset(request: Request):
                 content={
                     "error": "Spots data file not found",
                     "checked_path": f"s3://{S3_BUCKET}/{spots_key}unmixed_spots_*.pkl"
-                }
+                
             )
         
         # Try to create the merged parquet file by calling our new merge function
diff --git a/src/see_spot/ng_utils.py b/src/see_spot/ng_utils.py
index 2dc8f4c..4c74eb4 100644
--- a/src/see_spot/ng_utils.py
+++ b/src/see_spot/ng_utils.py
@@ -239,6 +239,122 @@ def create_link_no_upload(fused_s3_path, resolution_zyx=None,
 
     return direct_url
 
+
+def create_link_from_json(ng_json_path, position, spot_id, point_annotation, 
+                          annotation_color="#FFFF00", spacing=3.0, 
+                          cross_section_scale= None, base_url="https://neuroglancer-demo.appspot.com"):
+    """
+    Create a Neuroglancer link from an existing JSON file with updated position and annotation.
+    
+    Parameters:
+    -----------
+    ng_json_path (str or Path): Path to the neuroglancer JSON file (can be local or S3 path)
+    position (list): New position coordinates [x, y, z, t]
+    spot_id (int or str): ID for the spot annotation
+    point_annotation (list): Point annotation coordinates [x, y, z, ...] 
+    annotation_color (str, optional): Hex color for the annotation. Default: "#FFFF00"
+    spacing (float, optional): Spacing for annotations in cross-section view. Default: 3.0
+    cross_section_scale (float, optional): Scale for cross-section view. If None, keeps existing value
+    base_url (str, optional): Base Neuroglancer URL. Default: "https://neuroglancer-demo.appspot.com"
+    
+    Returns:
+    --------
+    str: Direct Neuroglancer URL with updated state
+    """
+    import json
+    from pathlib import Path
+    
+    # Convert to Path object for easier handling
+    json_path = Path(ng_json_path) if not isinstance(ng_json_path, Path) else ng_json_path
+    
+    # Load the JSON file
+    try:
+        if str(json_path).startswith('s3://'):
+            # Handle S3 paths
+            import boto3
+            s3_path = str(json_path)[5:]  # Remove 's3://'
+            parts = s3_path.split('/')
+            bucket = parts[0]
+            key = '/'.join(parts[1:])
+            
+            s3_client = boto3.client('s3')
+            response = s3_client.get_object(Bucket=bucket, Key=key)
+            json_content = response['Body'].read().decode('utf-8')
+            state_dict = json.loads(json_content)
+            print(f"Loaded Neuroglancer state from S3: s3://{bucket}/{key}")
+        else:
+            # Handle local file paths
+            with open(json_path, 'r') as f:
+                state_dict = json.load(f)
+            print(f"Loaded Neuroglancer state from local file: {json_path}")
+    except FileNotFoundError:
+        raise FileNotFoundError(f"Neuroglancer JSON file not found: {json_path}")
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON in file {json_path}: {e}")
+    except Exception as e:
+        raise Exception(f"Error loading Neuroglancer JSON from {json_path}: {e}")
+    
+    # Update position
+    state_dict["position"] = position
+    print(f"Updated position to: {position}")
+    
+    # Update cross-section scale if provided
+    if cross_section_scale is not None:
+        state_dict["crossSectionScale"] = cross_section_scale
+        print(f"Updated crossSectionScale to: {cross_section_scale}")
+    
+    # Find or create annotation layer
+    annotation_layer_found = False
+    
+    if "layers" in state_dict:
+        # Look for existing annotation layer
+        for i, layer in enumerate(state_dict["layers"]):
+            if layer.get("type") == "annotation":
+                # Update existing annotation layer
+                annotation = {
+                    "type": "point",
+                    "id": str(spot_id),
+                    "point": point_annotation,
+                }
+                
+                # Update the layer properties
+                state_dict["layers"][i]["name"] = f"Spot {spot_id}"
+                state_dict["layers"][i]["annotationColor"] = annotation_color
+                state_dict["layers"][i]["crossSectionAnnotationSpacing"] = spacing
+                state_dict["layers"][i]["annotations"] = [annotation]
+                
+                annotation_layer_found = True
+                print(f"Updated existing annotation layer with spot {spot_id}")
+                break
+        
+        # If no annotation layer exists, create one
+        if not annotation_layer_found:
+            annotation_layer = {
+                "type": "annotation",
+                "name": f"Spot {spot_id}",
+                "tab": "annotations",
+                "visible": True,
+                "annotationColor": annotation_color,
+                "crossSectionAnnotationSpacing": spacing,
+                "projectionAnnotationSpacing": 10,
+                "tool": "annotatePoint",
+                "annotations": [{
+                    "type": "point",
+                    "id": str(spot_id),
+                    "point": point_annotation,
+                }]
+            }
+            state_dict["layers"].append(annotation_layer)
+            print(f"Created new annotation layer with spot {spot_id}")
+    else:
+        print("Warning: No 'layers' found in Neuroglancer state")
+    
+    # Generate direct URL
+    direct_url = create_direct_neuroglancer_url(state_dict, base_url=base_url)
+    
+    return direct_url
+
+
 def read_zarr_resolution_boto(s3_path):
     """
     Read resolution from zarr using direct S3 access via boto3

From 903c6bd8becc8292e93ddd4b3b4e48b1cd45f1d6 Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 11:09:29 -0800
Subject: [PATCH 03/20] refactor: linting etc, + fixing zag's mess

---
 src/see_spot/app.py        |  69 +++----
 src/see_spot/ng_utils.py   | 334 ++++++++++++++++++---------------
 src/see_spot/s3_handler.py | 219 ++++++++++++----------
 src/see_spot/s3_utils.py   | 374 +++++++++++++++++++++++--------------
 4 files changed, 579 insertions(+), 417 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 22f693a..4d4ef88 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 from datetime import datetime, timedelta
+from see_spot import ng_utils
 import uvicorn
 import logging
 import os
@@ -51,6 +52,7 @@
     "spot_channels_from_manifest": None,
     "sankey_data": None,  # Cache Sankey data to avoid recalculation
     "unmixed_spots_filename": None  # Store unmixed spots filename for neuroglancer logic
+    "unmixed_spots_filename": None  # Store unmixed spots filename for neuroglancer logic
 }
 
 
@@ -344,6 +346,11 @@ async def get_real_spots_data(
     unmixed_target_key = find_unmixed_spots_file(
         S3_BUCKET, related_files_prefix, "unmixed_spots_*.pkl"
     )
+    # Store the unmixed spots filename in cache for neuroglancer logic
+    if unmixed_target_key:
+        df_cache["unmixed_spots_filename"] = Path(unmixed_target_key).name
+        logger.info(f"Cached unmixed spots filename: {df_cache['unmixed_spots_filename']}")
+    
     
     # Store the unmixed spots filename in cache for neuroglancer logic
     if unmixed_target_key:
@@ -518,13 +525,12 @@ async def create_neuroglancer_link(request: Request):
     data = await request.json()
     
     # Extract the parameters from the request
-    fused_s3_paths = data.get("fused_s3_paths")
+    cross_section_scale = data.get("cross_section_scale", "0.135")
+    spot_id = data.get("spot_id")
     position = data.get("position")
     point_annotation = data.get("point_annotation")
-    cell_id = data.get("cell_id", 42)  # Default value if not provided
-    spot_id = data.get("spot_id")
-    annotation_color = data.get("annotation_color", "#FFFF00")
-    cross_section_scale = data.get("cross_section_scale", "0.135")
+    if not position or not point_annotation or not spot_id:
+        annotation_color = data.get("annotation_color", "#FFFF00")
     
     # Input validation
     if not position or not point_annotation or not spot_id:
@@ -532,37 +538,34 @@ async def create_neuroglancer_link(request: Request):
             status_code=400,
             content={"error": "Missing required parameters: position, point_annotation, or spot_id"}
         )
-    
-    try:
-        # Import the ng_utils module
-        from see_spot import ng_utils
-        
-        # Check if we should use the JSON-based method (when "merged" is in the pkl filename)
-        unmixed_spots_filename = df_cache.get("unmixed_spots_filename") or ""
-        use_json_method = "merged" in unmixed_spots_filename.lower()
-        
+    # Check if we should use the JSON-based method (when "merged" is in the pkl filename)
+    unmixed_spots_filename = df_cache.get("unmixed_spots_filename") or ""
+    use_json_method = "merged" in unmixed_spots_filename.lower()
+    try: 
         if use_json_method:
-            # Use the JSON-based method for merged datasets
-            logger.info(f"Using create_link_from_json method for merged dataset (filename: {unmixed_spots_filename})")
-            
-            # Construct the neuroglancer JSON path
-            ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/phase_correlation_stitching_neuroglancer.json"
-            logger.info(f"Neuroglancer JSON path: {ng_json_path}")
-            
-            # Create the neuroglancer link from JSON
-            ng_link = ng_utils.create_link_from_json(
-                ng_json_path=ng_json_path,
-                position=position,
-                spot_id=spot_id,
-                point_annotation=point_annotation,
-                annotation_color=annotation_color,
-                spacing=3.0,
-                cross_section_scale=cross_section_scale
-            )
+                # Use the JSON-based method for merged datasets
+                logger.info(f"Using create_link_from_json method for merged dataset (filename: {unmixed_spots_filename})")
+                
+                # Construct the neuroglancer JSON path
+                ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/phase_correlation_stitching_neuroglancer.json"
+                logger.info(f"Neuroglancer JSON path: {ng_json_path}")
+                
+                # Create the neuroglancer link from JSON
+                ng_link = ng_utils.create_link_from_json(
+                    ng_json_path=ng_json_path,
+                    position=position,
+                    spot_id=spot_id,
+                    point_annotation=point_annotation,
+                    annotation_color=annotation_color,
+                    spacing=3.0,
+                    cross_section_scale=cross_section_scale
+                )
         else:
             # Use the traditional method for non-merged datasets
             logger.info(f"Using create_link_no_upload method for non-merged dataset (filename: {unmixed_spots_filename})")
-            
+            fused_s3_paths = data.get("fused_s3_paths")
+            cell_id = data.get("cell_id", 42)  # Default value if not provided
+
             if not fused_s3_paths:
                 return JSONResponse(
                     status_code=400,
@@ -579,7 +582,7 @@ async def create_neuroglancer_link(request: Request):
                 position=position,
                 point_annotation=point_annotation
             )
-        
+            
         return {"url": ng_link}
     except Exception as e:
         logger.error(f"Error creating neuroglancer link: {str(e)}")
diff --git a/src/see_spot/ng_utils.py b/src/see_spot/ng_utils.py
index 4c74eb4..49e6498 100644
--- a/src/see_spot/ng_utils.py
+++ b/src/see_spot/ng_utils.py
@@ -16,15 +16,19 @@
 
 import json
 import urllib.parse
-def create_direct_neuroglancer_url(json_data, base_url="https://neuroglancer-demo.appspot.com"):
+
+
+def create_direct_neuroglancer_url(
+    json_data, base_url="https://neuroglancer-demo.appspot.com"
+):
     """
-    Creates a direct Neuroglancer URL by removing the ng_link field 
+    Creates a direct Neuroglancer URL by removing the ng_link field
     and encoding the remaining JSON as part of the URL.
-    
+
     Args:
         json_data: Either a JSON string or a Python dictionary containing the Neuroglancer state
         base_url: The base Neuroglancer URL to use
-    
+
     Returns:
         str: A complete Neuroglancer URL with the JSON state encoded in the fragment
     """
@@ -33,42 +37,54 @@ def create_direct_neuroglancer_url(json_data, base_url="https://neuroglancer-dem
         data = json.loads(json_data)
     else:
         data = json_data.copy()
-    
+
     # Remove the ng_link key if it exists
     if "ng_link" in data:
         del data["ng_link"]
-    
+
     # Convert to JSON string and encode for URL
     json_str = json.dumps(data)
     encoded_json = urllib.parse.quote(json_str)
-    
+
     # Ensure base URL ends with /
-    if not base_url.endswith('/'):
-        base_url += '/'
-    
+    if not base_url.endswith("/"):
+        base_url += "/"
+
     # Create the full URL
     full_url = f"{base_url}#!{encoded_json}"
-    
+
     # Check URL length and print warning if too long
     url_length = len(full_url)
     print(f"URL character count: {url_length}")
-    
+
     if url_length > 5000:
-        print(f"WARNING: URL length ({url_length} characters) exceeds 5000 characters.")
+        print(
+            f"WARNING: URL length ({url_length} characters) exceeds 5000 characters."
+        )
         print("This may cause issues in some browsers or web servers.")
         print("Consider reducing JSON complexity or using a URL shortener.")
-    
+
     return full_url
 
 
-def create_link_no_upload(fused_s3_path, resolution_zyx=None,
-                max_dr=1200, opacity=1.0, blend="additive", 
-                annotation_color="#ff0000", spacing=3.0, cross_section_scale=1.0,
-                position=None, cell_id: int = 0, spot_id=None, point_annotation=None,
-                output_folder=None):
+def create_link_no_upload(
+    fused_s3_path,
+    resolution_zyx=None,
+    max_dr=1200,
+    opacity=1.0,
+    blend="additive",
+    annotation_color="#ff0000",
+    spacing=3.0,
+    cross_section_scale=1.0,
+    position=None,
+    cell_id: int = 0,
+    spot_id=None,
+    point_annotation=None,
+    output_folder=None,
+):
     """
     Create a Neuroglancer JSON file for multiple channels with a single point annotation.
-    
+
     Parameters:
     fused_s3_path (str or list): S3 path(s) to the fused dataset(s). Can be a single string or list of strings.
     resolution_zyx (list, optional): Resolution in z,y,x order. If None, attempts to read from YAML.
@@ -82,85 +98,82 @@ def create_link_no_upload(fused_s3_path, resolution_zyx=None,
     cell_id (int): cell id to plot (gets added to NG json filename)
     spot_id (str or int, optional): ID for the spot annotation
     point_annotation (dict or list, optional): Coordinates [x,y,z] for a single point annotation
-    
+
     Returns:
     str: URL to the Neuroglancer link
     """
     # Convert single paths to lists for consistent processing
     if isinstance(fused_s3_path, str):
         fused_s3_path = [fused_s3_path]
-    
+
     # If resolution not provided, try to read from first zarr file
     if resolution_zyx is None:
         try:
             resolution_zyx = read_zarr_resolution_boto(fused_s3_path[0])
             print(f"Found resolution from zarr: {resolution_zyx}")
         except Exception as e:
-            print(f"Warning: Could not read resolution from zarr file: {str(e)}")
+            print(
+                f"Warning: Could not read resolution from zarr file: {str(e)}"
+            )
             # Provide a default resolution if we can't read it
             resolution_zyx = [1.0, 1.0, 1.0]
             print(f"Using default resolution: {resolution_zyx}")
 
     output_dimensions = {
-            "x": {"voxel_size": resolution_zyx[2], "unit": "microns"},
-            "y": {"voxel_size": resolution_zyx[1], "unit": "microns"},
-            "z": {"voxel_size": resolution_zyx[0], "unit": "microns"},
-            "c'": {"voxel_size": 1, "unit": ""},
-            "t": {"voxel_size": 0.001, "unit": "seconds"},
-        }
+        "x": {"voxel_size": resolution_zyx[2], "unit": "microns"},
+        "y": {"voxel_size": resolution_zyx[1], "unit": "microns"},
+        "z": {"voxel_size": resolution_zyx[0], "unit": "microns"},
+        "c'": {"voxel_size": 1, "unit": ""},
+        "t": {"voxel_size": 0.001, "unit": "seconds"},
+    }
 
     # Initialize layers list
     layers = []  # Represent Neuroglancer Tabs
-    
+
     # Process each fused path
     for idx, fused_path in enumerate(fused_s3_path):
         # Extract channel number from fused path
         pattern = r"(ch|CH|channel)_(\d+)"
         match = re.search(pattern, fused_path)
         if not match:
-            raise ValueError(f"Could not extract channel number from path: {fused_path}")
-        
+            raise ValueError(
+                f"Could not extract channel number from path: {fused_path}"
+            )
+
         channel = int(match.group(2))
         hex_val = wavelength_to_hex_pure_colours(channel)
         hex_str = f"#{hex_val:06x}"
-        
+
         # Add image layer
         image_layer = {
             "type": "image",
             "source": fused_path,
             "channel": 0,
-            "shaderControls": {
-                "normalized": {"range": [90, max_dr]}
-            },
+            "shaderControls": {"normalized": {"range": [90, max_dr]}},
             "shader": {
                 "color": hex_str,
                 "emitter": "RGB",
                 "vec": "vec3",
             },
-            "localPosition": [
-                0.5
-            ],
+            "localPosition": [0.5],
             "visible": True,
             "opacity": opacity,
             "name": f"CH_{channel}",
             "blend": blend,
         }
         layers.append(image_layer)
-    
-   
-    
+
     # Add specific point annotation if provided
     if point_annotation is not None:
-
         # convert output_dimensions to a meter]}
-         # Create a single annotation layer for the point
+        # Create a single annotation layer for the point
         annotation_layer = {
             "type": "annotation",
             # "source": {
             #     "url":"local://annotations",
             #     "transform": output_dimensions
             # },
-            #"source": "local://annotations",
+            # "source": "local://annotations",
             "name": f"Spot {spot_id}",
             "tab": "annotations",
             "visible": True,
@@ -168,25 +181,23 @@ def create_link_no_upload(fused_s3_path, resolution_zyx=None,
             "crossSectionAnnotationSpacing": spacing,
             "projectionAnnotationSpacing": 10,
             "tool": "annotatePoint",
-            
         }
 
-        #point = {"x":point_annotation[0], "y":point_annotation[1], "z":point_annotation[2], "t":point_annotation[3]}
-
+        # point = {"x":point_annotation[0], "y":point_annotation[1], "z":point_annotation[2], "t":point_annotation[3]}
 
         annotation = {
             "type": "point",
             "id": str(spot_id) if spot_id is not None else "spot",
             "point": point_annotation,
-            #"description": f"Spot ID: {spot_id}" if spot_id is not None else "Point annotation"
+            # "description": f"Spot ID: {spot_id}" if spot_id is not None else "Point annotation"
         }
 
         annotation_layer["annotations"] = [annotation]
-        
+
         # Use the point coordinates as the position if no position is specified
         if position is None:
             position = point + [0]  # Add time dimension (t=0)
-        
+
         # Add the annotation layer
         annotation_layer
     print(f"annotation_layer: {annotation_layer}")
@@ -199,38 +210,37 @@ def create_link_no_upload(fused_s3_path, resolution_zyx=None,
         "showAxisLines": False,
     }
 
-    
     # Extract bucket and dataset from first fused path
-    parts = fused_s3_path[0].split('/')
+    parts = fused_s3_path[0].split("/")
     bucket_name = parts[2]
     dataset_name = parts[3]
-    
+
     # Set up output folder
     if output_folder is None:
         cd = os.getcwd()
         output_folder = f"{cd}/{dataset_name}/"
     if not pathlib.Path(output_folder).exists():
         pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)
-    
+
     # Create JSON file name
     json_name = f"point_annotation_ng_link_{spot_id if spot_id is not None else 'spot'}.json"
-    
+
     # Generate the Neuroglancer state
     neuroglancer_link = NgState(
         input_config,
         "s3",
         bucket_name,
         output_folder,
-        dataset_name = pathlib.Path(output_folder).stem,
-        base_url="https://neuroglancer-demo.appspot.com", 
+        dataset_name=pathlib.Path(output_folder).stem,
+        base_url="https://neuroglancer-demo.appspot.com",
         json_name=json_name,
     )
 
     state_dict = neuroglancer_link.state
     # add crossSectionScale to state_dict
     # append annotation_layer to state_dict["layers"]
-    #annotation_layer["source"]["transform"] = state_dict["dimensions"] # THIS BRINGS METERS IN
-    
+    # annotation_layer["source"]["transform"] = state_dict["dimensions"] # THIS BRINGS METERS IN
+
     state_dict["layers"].append(annotation_layer)
     state_dict["crossSectionScale"] = cross_section_scale
     state_dict["position"] = position
@@ -240,72 +250,88 @@ def create_link_no_upload(fused_s3_path, resolution_zyx=None,
     return direct_url
 
 
-def create_link_from_json(ng_json_path, position, spot_id, point_annotation, 
-                          annotation_color="#FFFF00", spacing=3.0, 
-                          cross_section_scale= None, base_url="https://neuroglancer-demo.appspot.com"):
+def create_link_from_json(
+    ng_json_path,
+    position,
+    spot_id,
+    point_annotation,
+    annotation_color="#FFFF00",
+    spacing=3.0,
+    cross_section_scale=None,
+    base_url="https://neuroglancer-demo.appspot.com",
+):
     """
     Create a Neuroglancer link from an existing JSON file with updated position and annotation.
-    
+
     Parameters:
     -----------
     ng_json_path (str or Path): Path to the neuroglancer JSON file (can be local or S3 path)
     position (list): New position coordinates [x, y, z, t]
     spot_id (int or str): ID for the spot annotation
-    point_annotation (list): Point annotation coordinates [x, y, z, ...] 
+    point_annotation (list): Point annotation coordinates [x, y, z, ...]
     annotation_color (str, optional): Hex color for the annotation. Default: "#FFFF00"
     spacing (float, optional): Spacing for annotations in cross-section view. Default: 3.0
     cross_section_scale (float, optional): Scale for cross-section view. If None, keeps existing value
     base_url (str, optional): Base Neuroglancer URL. Default: "https://neuroglancer-demo.appspot.com"
-    
+
     Returns:
     --------
     str: Direct Neuroglancer URL with updated state
     """
     import json
     from pathlib import Path
-    
+
     # Convert to Path object for easier handling
-    json_path = Path(ng_json_path) if not isinstance(ng_json_path, Path) else ng_json_path
-    
+    json_path = (
+        Path(ng_json_path)
+        if not isinstance(ng_json_path, Path)
+        else ng_json_path
+    )
+
     # Load the JSON file
     try:
-        if str(json_path).startswith('s3://'):
+        if str(json_path).startswith("s3://"):
             # Handle S3 paths
             import boto3
+
             s3_path = str(json_path)[5:]  # Remove 's3://'
-            parts = s3_path.split('/')
+            parts = s3_path.split("/")
             bucket = parts[0]
-            key = '/'.join(parts[1:])
-            
-            s3_client = boto3.client('s3')
+            key = "/".join(parts[1:])
+
+            s3_client = boto3.client("s3")
             response = s3_client.get_object(Bucket=bucket, Key=key)
-            json_content = response['Body'].read().decode('utf-8')
+            json_content = response["Body"].read().decode("utf-8")
             state_dict = json.loads(json_content)
             print(f"Loaded Neuroglancer state from S3: s3://{bucket}/{key}")
         else:
             # Handle local file paths
-            with open(json_path, 'r') as f:
+            with open(json_path, "r") as f:
                 state_dict = json.load(f)
             print(f"Loaded Neuroglancer state from local file: {json_path}")
     except FileNotFoundError:
-        raise FileNotFoundError(f"Neuroglancer JSON file not found: {json_path}")
+        raise FileNotFoundError(
+            f"Neuroglancer JSON file not found: {json_path}"
+        )
     except json.JSONDecodeError as e:
         raise ValueError(f"Invalid JSON in file {json_path}: {e}")
     except Exception as e:
-        raise Exception(f"Error loading Neuroglancer JSON from {json_path}: {e}")
-    
+        raise Exception(
+            f"Error loading Neuroglancer JSON from {json_path}: {e}"
+        )
+
     # Update position
     state_dict["position"] = position
     print(f"Updated position to: {position}")
-    
+
     # Update cross-section scale if provided
     if cross_section_scale is not None:
         state_dict["crossSectionScale"] = cross_section_scale
         print(f"Updated crossSectionScale to: {cross_section_scale}")
-    
+
     # Find or create annotation layer
     annotation_layer_found = False
-    
+
     if "layers" in state_dict:
         # Look for existing annotation layer
         for i, layer in enumerate(state_dict["layers"]):
@@ -316,17 +342,19 @@ def create_link_from_json(ng_json_path, position, spot_id, point_annotation,
                     "id": str(spot_id),
                     "point": point_annotation,
                 }
-                
+
                 # Update the layer properties
                 state_dict["layers"][i]["name"] = f"Spot {spot_id}"
                 state_dict["layers"][i]["annotationColor"] = annotation_color
-                state_dict["layers"][i]["crossSectionAnnotationSpacing"] = spacing
+                state_dict["layers"][i][
+                    "crossSectionAnnotationSpacing"
+                ] = spacing
                 state_dict["layers"][i]["annotations"] = [annotation]
-                
+
                 annotation_layer_found = True
                 print(f"Updated existing annotation layer with spot {spot_id}")
                 break
-        
+
         # If no annotation layer exists, create one
         if not annotation_layer_found:
             annotation_layer = {
@@ -338,20 +366,22 @@ def create_link_from_json(ng_json_path, position, spot_id, point_annotation,
                 "crossSectionAnnotationSpacing": spacing,
                 "projectionAnnotationSpacing": 10,
                 "tool": "annotatePoint",
-                "annotations": [{
-                    "type": "point",
-                    "id": str(spot_id),
-                    "point": point_annotation,
-                }]
+                "annotations": [
+                    {
+                        "type": "point",
+                        "id": str(spot_id),
+                        "point": point_annotation,
+                    }
+                ],
             }
             state_dict["layers"].append(annotation_layer)
             print(f"Created new annotation layer with spot {spot_id}")
     else:
         print("Warning: No 'layers' found in Neuroglancer state")
-    
+
     # Generate direct URL
     direct_url = create_direct_neuroglancer_url(state_dict, base_url=base_url)
-    
+
     return direct_url
 
 
@@ -359,66 +389,75 @@ def read_zarr_resolution_boto(s3_path):
     """
     Read resolution from zarr using direct S3 access via boto3
     found s3fs/zarr was not working, so using boto3 (MD)
-    
+
     Parameters:
     s3_path (str): S3 path to the zarr dataset
-    
+
     Returns:
     list: Resolution in z,y,x order in micrometers
     """
     import boto3
     import json
-    
+
     # Parse the S3 path
-    if s3_path.startswith('s3://'):
+    if s3_path.startswith("s3://"):
         s3_path = s3_path[5:]  # Remove 's3://'
-    
-    parts = s3_path.split('/')
+
+    parts = s3_path.split("/")
     bucket = parts[0]
-    prefix = '/'.join(parts[1:])
-    
+    prefix = "/".join(parts[1:])
+
     # Create boto3 client
-    s3_client = boto3.client('s3')
-    
+    s3_client = boto3.client("s3")
+
     try:
         # Try to get the .zattrs file which should contain resolution metadata
         zattrs_key = f"{prefix}/.zattrs"
         print(f"Reading {zattrs_key} from bucket {bucket}")
         response = s3_client.get_object(Bucket=bucket, Key=zattrs_key)
-        zattrs_content = response['Body'].read().decode('utf-8')
+        zattrs_content = response["Body"].read().decode("utf-8")
         zattrs = json.loads(zattrs_content)
-        
+
         # Look for resolution in multiscales metadata
-        if 'multiscales' in zattrs and zattrs['multiscales']:
-            multiscale = zattrs['multiscales'][0]
-            
-            if 'axes' in multiscale:
-                axes = multiscale['axes']
-                axes_map = {axis['name']: i for i, axis in enumerate(axes)}
-                
-                z_idx = axes_map.get('z')
-                y_idx = axes_map.get('y') 
-                x_idx = axes_map.get('x')
-                
-                if 'datasets' in multiscale and multiscale['datasets']:
-                    dataset = multiscale['datasets'][0]
-                    if 'coordinateTransformations' in dataset:
-                        for transform in dataset['coordinateTransformations']:
-                            if transform.get('type') == 'scale':
-                                scale = transform['scale']
-                                
-                                if all(idx is not None for idx in [z_idx, y_idx, x_idx]):
-                                    print(f"Found resolution from multiscales: {[scale[z_idx], scale[y_idx], scale[x_idx]]}")
-                                    return [scale[z_idx], scale[y_idx], scale[x_idx]]
-        
+        if "multiscales" in zattrs and zattrs["multiscales"]:
+            multiscale = zattrs["multiscales"][0]
+
+            if "axes" in multiscale:
+                axes = multiscale["axes"]
+                axes_map = {axis["name"]: i for i, axis in enumerate(axes)}
+
+                z_idx = axes_map.get("z")
+                y_idx = axes_map.get("y")
+                x_idx = axes_map.get("x")
+
+                if "datasets" in multiscale and multiscale["datasets"]:
+                    dataset = multiscale["datasets"][0]
+                    if "coordinateTransformations" in dataset:
+                        for transform in dataset["coordinateTransformations"]:
+                            if transform.get("type") == "scale":
+                                scale = transform["scale"]
+
+                                if all(
+                                    idx is not None
+                                    for idx in [z_idx, y_idx, x_idx]
+                                ):
+                                    print(
+                                        f"Found resolution from multiscales: {[scale[z_idx], scale[y_idx], scale[x_idx]]}"
+                                    )
+                                    return [
+                                        scale[z_idx],
+                                        scale[y_idx],
+                                        scale[x_idx],
+                                    ]
+
         # Check for direct resolution attribute
-        if 'resolution' in zattrs:
+        if "resolution" in zattrs:
             print(f"Found direct resolution attribute: {zattrs['resolution']}")
-            return list(zattrs['resolution'])
-            
+            return list(zattrs["resolution"])
+
     except Exception as e:
         print(f"Error reading .zattrs: {str(e)}")
-    
+
     print(f"Using default resolution for {s3_path}")
     return [1.0, 1.0, 1.0]
 
@@ -441,29 +480,26 @@ def wavelength_to_hex_pure_colours(wavelength: int) -> int:
     # Color map wavelength/hex pairs are generated
     # by sampling along a CIE diagram arc.
     color_map = {
-        0:   0xFFFFFF, #white 
-        1:   0x00FF00,  # Blue
-        2:   0xFF0000,  # Red
-        3:   0x0000FF,  # Blue
-        4:   0x00FFFF,  #cyan   
-        5:   0xFF00FF,  # magenta   #638
-        
-        
-        #420: 0xFFFFFF, #white       #405
-        #490: 0x5DF8D6,  # Green     #488
-        #520: 0x4B90FE,  # Blue      #515
-        #570: 0xE9EC02,  # Yellow    #561
-        #600: 0xF00050,  # Pink      #594
-        #650: 0xF0121E,  # Red       #638
-
-        420: 0xFFFFFF, #white       #405
+        0: 0xFFFFFF,  # white
+        1: 0x00FF00,  # Blue
+        2: 0xFF0000,  # Red
+        3: 0x0000FF,  # Blue
+        4: 0x00FFFF,  # cyan
+        5: 0xFF00FF,  # magenta   #638
+        # 420: 0xFFFFFF, #white       #405
+        # 490: 0x5DF8D6,  # Green     #488
+        # 520: 0x4B90FE,  # Blue      #515
+        # 570: 0xE9EC02,  # Yellow    #561
+        # 600: 0xF00050,  # Pink      #594
+        # 650: 0xF0121E,  # Red       #638
+        420: 0xFFFFFF,  # white       #405
         490: 0x00FF00,  # Green     #488
         520: 0xFF0000,  # Red       #515
         570: 0x0000FF,  # Blue      #561
-        600: 0x00FFFF,  #cyan       #594 #600: 0xFFF000,  # Orange    #594 #or should be cyan? 
+        600: 0x00FFFF,  # cyan       #594 #600: 0xFFF000,  # Orange    #594 #or should be cyan?
         650: 0xFF00FF,  # magenta   #638
     }
     for ub, hex_val in color_map.items():
         if wavelength < ub:  # Exclusive
             return hex_val
-    return hex_val  # hex_val is set to the last color in for loop
\ No newline at end of file
+    return hex_val  # hex_val is set to the last color in for loop
diff --git a/src/see_spot/s3_handler.py b/src/see_spot/s3_handler.py
index 61dfec4..50783aa 100644
--- a/src/see_spot/s3_handler.py
+++ b/src/see_spot/s3_handler.py
@@ -9,16 +9,18 @@
 from botocore.exceptions import ClientError
 from pathlib import Path
 from typing import Optional, Union
+
 # Configure logging
 logger = logging.getLogger(__name__)
 
+
 class S3Handler:
     """Handler for S3 operations."""
-    
+
     def __init__(self, bucket_name=None):
         """
         Initialize S3 client using environment credentials.
-        
+
         Args:
             bucket_name (str, optional): Default S3 bucket name.
         """
@@ -26,149 +28,142 @@ def __init__(self, bucket_name=None):
         self.s3_resource = None
         self.bucket_name = bucket_name
         self.init_s3_client()
-    
+
     def init_s3_client(self):
         """Initialize the S3 client using credentials from environment variables."""
         try:
-            # Create S3 client - boto3 will automatically use AWS_ACCESS_KEY_ID, 
+            # Create S3 client - boto3 will automatically use AWS_ACCESS_KEY_ID,
             # AWS_SECRET_ACCESS_KEY, and AWS_SESSION_TOKEN from environment
-            self.s3_client = boto3.client('s3')
-            self.s3_resource = boto3.resource('s3')
+            self.s3_client = boto3.client("s3")
+            self.s3_resource = boto3.resource("s3")
             logger.info("S3 client initialized successfully")
         except Exception as e:
             logger.error(f"Failed to initialize S3 client: {e}")
             raise
-    
+
     def test_connection(self, bucket_name=None):
         """
         Test connection to S3 by listing objects in a bucket.
-        
+
         Args:
             bucket_name (str, optional): S3 bucket name to test. Uses default if not provided.
-            
+
         Returns:
             dict: Test results with success status and message
         """
         bucket = bucket_name or self.bucket_name
-        
+
         if not bucket:
             return {
                 "success": False,
-                "message": "No bucket name provided for test"
+                "message": "No bucket name provided for test",
             }
-        
+
         try:
             # Try to list objects (limited to 5 for test)
-            response = self.s3_client.list_objects_v2(
-                Bucket=bucket,
-                MaxKeys=5
-            )
-            
+            response = self.s3_client.list_objects_v2(Bucket=bucket, MaxKeys=5)
+
             # Check if we can access the bucket
-            if 'Contents' in response:
-                object_count = len(response['Contents'])
-                objects = [obj['Key'] for obj in response['Contents']]
-                
+            if "Contents" in response:
+                object_count = len(response["Contents"])
+                objects = [obj["Key"] for obj in response["Contents"]]
+
                 return {
                     "success": True,
                     "message": f"Successfully connected to bucket '{bucket}'",
                     "object_count": object_count,
-                    "sample_objects": objects
+                    "sample_objects": objects,
                 }
             else:
                 return {
                     "success": True,
-                    "message": f"Successfully connected to bucket '{bucket}' but it appears to be empty"
+                    "message": f"Successfully connected to bucket '{bucket}' but it appears to be empty",
                 }
-                
+
         except ClientError as e:
-            error_code = e.response['Error']['Code']
-            error_message = e.response['Error']['Message']
-            
-            if error_code == 'NoSuchBucket':
+            error_code = e.response["Error"]["Code"]
+            error_message = e.response["Error"]["Message"]
+
+            if error_code == "NoSuchBucket":
                 return {
                     "success": False,
-                    "message": f"Bucket '{bucket}' does not exist"
+                    "message": f"Bucket '{bucket}' does not exist",
                 }
-            elif error_code == 'AccessDenied':
+            elif error_code == "AccessDenied":
                 return {
                     "success": False,
-                    "message": f"Access denied to bucket '{bucket}'. Check your credentials and permissions."
+                    "message": f"Access denied to bucket '{bucket}'. Check your credentials and permissions.",
                 }
             else:
                 return {
                     "success": False,
-                    "message": f"Error accessing bucket '{bucket}': {error_message}"
+                    "message": f"Error accessing bucket '{bucket}': {error_message}",
                 }
         except Exception as e:
-            return {
-                "success": False,
-                "message": f"Unexpected error: {str(e)}"
-            }
+            return {"success": False, "message": f"Unexpected error: {str(e)}"}
 
     def list_objects(self, bucket_name=None, prefix="", max_keys=1000):
         """
         List objects in a bucket with optional prefix filtering.
-        
+
         Args:
             bucket_name (str, optional): S3 bucket name. Uses default if not provided.
             prefix (str, optional): Filter objects by prefix
             max_keys (int, optional): Maximum number of keys to return
-            
+
         Returns:
             list: List of object keys
         """
         bucket = bucket_name or self.bucket_name
-        
+
         if not bucket:
             logger.error("No bucket name provided")
             return []
-        
+
         try:
-            paginator = self.s3_client.get_paginator('list_objects_v2')
+            paginator = self.s3_client.get_paginator("list_objects_v2")
             objects = []
-            
+
             # Paginate through results
             for page in paginator.paginate(
                 Bucket=bucket,
                 Prefix=prefix,
-                PaginationConfig={"MaxItems": max_keys}
+                PaginationConfig={"MaxItems": max_keys},
             ):
-                if 'Contents' in page:
-                    for obj in page['Contents']:
-                        objects.append(obj['Key'])
-            
+                if "Contents" in page:
+                    for obj in page["Contents"]:
+                        objects.append(obj["Key"])
+
             return objects
-            
+
         except Exception as e:
             logger.error(f"Error listing objects in bucket '{bucket}': {e}")
             return []
-    
+
     def get_object(self, key, bucket_name=None):
         """
         Get an object from S3.
-        
+
         Args:
             key (str): Object key
             bucket_name (str, optional): S3 bucket name. Uses default if not provided.
-            
+
         Returns:
             bytes: Object data or None if error
         """
         bucket = bucket_name or self.bucket_name
-        
+
         if not bucket:
             logger.error("No bucket name provided")
             return None
-        
+
         try:
-            response = self.s3_client.get_object(
-                Bucket=bucket,
-                Key=key
-            )
-            return response['Body'].read()
+            response = self.s3_client.get_object(Bucket=bucket, Key=key)
+            return response["Body"].read()
         except Exception as e:
-            logger.error(f"Error getting object '{key}' from bucket '{bucket}': {e}")
+            logger.error(
+                f"Error getting object '{key}' from bucket '{bucket}': {e}"
+            )
             return None
 
     def get_object_metadata(self, key, bucket_name=None):
@@ -190,37 +185,37 @@ def get_object_metadata(self, key, bucket_name=None):
 
         try:
             # Use head_object to get metadata without downloading the body
-            response = self.s3_client.head_object(
-                Bucket=bucket,
-                Key=key
-            )
+            response = self.s3_client.head_object(Bucket=bucket, Key=key)
             # Return relevant metadata
             return {
-                'ContentLength': response.get('ContentLength'),
-                'LastModified': response.get('LastModified'),
-                'ContentType': response.get('ContentType'),
-                'ETag': response.get('ETag')
+                "ContentLength": response.get("ContentLength"),
+                "LastModified": response.get("LastModified"),
+                "ContentType": response.get("ContentType"),
+                "ETag": response.get("ETag")
                 # Add other metadata fields from response if needed
             }
         except ClientError as e:
             # Handle common errors like Not Found
-            if e.response['Error']['Code'] == '404':
-                 logger.warning(f"Object '{key}' not found in bucket '{bucket}'.")
+            if e.response["Error"]["Code"] == "404":
+                logger.warning(
+                    f"Object '{key}' not found in bucket '{bucket}'."
+                )
             else:
-                logger.error(f"Error getting metadata for object '{key}' from bucket '{bucket}': {e}")
+                logger.error(
+                    f"Error getting metadata for object '{key}' from bucket '{bucket}': {e}"
+                )
             return None
         except Exception as e:
             logger.error(f"Unexpected error getting metadata for '{key}': {e}")
             return None
 
-
     def download_file(
         self,
         key: str,
         bucket_name: Optional[str] = None,
         local_path: Optional[Union[str, Path]] = None,
         use_cache: bool = True,
-        cache_dir: Union[str, Path] = '/s3-cache'
+        cache_dir: Union[str, Path] = "/s3-cache",
     ) -> Optional[Path]:
         """
         Downloads a file from S3, optionally using a local cache.
@@ -244,39 +239,53 @@ def download_file(
             return None
 
         if self.s3_client is None:
-             logger.error("S3 client is not initialized.")
-             return None
+            logger.error("S3 client is not initialized.")
+            return None
 
         effective_local_path: Path
         is_cache_path = False
 
         if local_path:
             # User specified an exact download location
-            effective_local_path = Path(local_path).resolve() # Resolve to absolute path
-            logger.info(f"Direct download requested to: {effective_local_path}")
-            use_cache = False # Explicit path overrides cache usage check
+            effective_local_path = Path(
+                local_path
+            ).resolve()  # Resolve to absolute path
+            logger.info(
+                f"Direct download requested to: {effective_local_path}"
+            )
+            use_cache = False  # Explicit path overrides cache usage check
         else:
             # Construct path within the cache directory
             is_cache_path = True
-            base_cache_dir = Path(cache_dir).resolve() # Resolve cache dir path
+            base_cache_dir = Path(
+                cache_dir
+            ).resolve()  # Resolve cache dir path
             # Combine cache base, bucket, and key to form path
             # Ensure key is treated as relative within the bucket folder
-            safe_key_part = key.lstrip('/')
+            safe_key_part = key.lstrip("/")
             effective_local_path = base_cache_dir / bucket / safe_key_part
             logger.debug(f"Cache path constructed: {effective_local_path}")
 
             # Check cache if requested and applicable
-            if use_cache and effective_local_path.is_file(): # Check if it's actually a file
-                logger.info(f"Cache hit! Using local file: {effective_local_path}")
+            if (
+                use_cache and effective_local_path.is_file()
+            ):  # Check if it's actually a file
+                logger.info(
+                    f"Cache hit! Using local file: {effective_local_path}"
+                )
                 # Optional: Could add check here to compare S3 etag/last_modified
                 # with cached file metadata if cache invalidation is needed.
                 return effective_local_path
             elif use_cache:
-                 logger.info(f"Cache miss or not a file: {effective_local_path}")
+                logger.info(
+                    f"Cache miss or not a file: {effective_local_path}"
+                )
             # If not using cache or file not found, proceed to download
 
         # --- Download required ---
-        logger.info(f"Attempting to download s3://{bucket}/{key} to {effective_local_path}")
+        logger.info(
+            f"Attempting to download s3://{bucket}/{key} to {effective_local_path}"
+        )
 
         try:
             # Ensure parent directory exists
@@ -286,20 +295,28 @@ def download_file(
             self.s3_client.download_file(
                 Bucket=bucket,
                 Key=key,
-                Filename=str(effective_local_path) # download_file expects a string path
+                Filename=str(
+                    effective_local_path
+                ),  # download_file expects a string path
             )
             logger.info(f"Successfully downloaded to: {effective_local_path}")
             return effective_local_path
 
         except ClientError as e:
             # Check for specific errors like Not Found
-            error_code = e.response.get('Error', {}).get('Code')
-            if error_code == '404' or 'NoSuchKey' in str(e): # Check common variations
-                logger.error(f"Error: Object not found on S3: s3://{bucket}/{key}")
-            elif error_code == 'NoSuchBucket':
-                 logger.error(f"Error: Bucket not found: {bucket}")
+            error_code = e.response.get("Error", {}).get("Code")
+            if error_code == "404" or "NoSuchKey" in str(
+                e
+            ):  # Check common variations
+                logger.error(
+                    f"Error: Object not found on S3: s3://{bucket}/{key}"
+                )
+            elif error_code == "NoSuchBucket":
+                logger.error(f"Error: Bucket not found: {bucket}")
             else:
-                logger.error(f"S3 ClientError during download for key '{key}': {e}")
+                logger.error(
+                    f"S3 ClientError during download for key '{key}': {e}"
+                )
             # Consider removing partially downloaded file if download_file guarantees creation
             # Check if file exists and maybe size is 0 before unlinking
             # try:
@@ -310,30 +327,38 @@ def download_file(
             #      logger.error(f"Error removing incomplete file {effective_local_path}: {unlink_err}")
             return None
         except Exception as e:
-            logger.error(f"Unexpected error during download of '{key}': {e}", exc_info=True)
+            logger.error(
+                f"Unexpected error during download of '{key}': {e}",
+                exc_info=True,
+            )
             return None
 
+
 # Create a global instance for easy access
 # s3_handler = S3Handler('aind-open-data')
-s3_handler = S3Handler('codeocean-s3resultsbucket-1182nktl2bh9f')
+s3_handler = S3Handler("codeocean-s3resultsbucket-1182nktl2bh9f")
+
 
 # Test function that can be called to verify connection
 def test_s3_connection():
     """Test S3 connection and print results."""
     results = s3_handler.test_connection()
-    
+
     if results["success"]:
         print(f"✅ {results['message']}")
-        
+
         if "sample_objects" in results:
-            print(f"\nFound {results['object_count']} objects. Sample objects:")
+            print(
+                f"\nFound {results['object_count']} objects. Sample objects:"
+            )
             for obj in results["sample_objects"]:
                 print(f"  - {obj}")
     else:
         print(f"❌ {results['message']}")
-    
+
     return results
 
+
 # For direct testing
 if __name__ == "__main__":
-    test_s3_connection() 
\ No newline at end of file
+    test_s3_connection()
diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index f65f655..10b2ae5 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -11,75 +11,87 @@
 import logging
 import tempfile
 import os
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
 
 def find_processing_manifest(bucket: str, dataset_name: str) -> Optional[str]:
     """
     Find the processing_manifest.json file in either the top level or derived folder.
-    
+
     Args:
         bucket: S3 bucket name
         dataset_name: Dataset name/prefix
-        
+
     Returns:
         Full S3 key to the manifest file, or None if not found
     """
     # Try both possible locations
     possible_paths = [
         f"{dataset_name}/processing_manifest.json",  # Top level
-        f"{dataset_name}/derived/processing_manifest.json"  # Derived folder
+        f"{dataset_name}/derived/processing_manifest.json",  # Derived folder
     ]
-    
-    logger.info(f"Searching for processing_manifest.json in dataset '{dataset_name}'")
-    
+
+    logger.info(
+        f"Searching for processing_manifest.json in dataset '{dataset_name}'"
+    )
+
     for manifest_key in possible_paths:
         logger.info(f"Checking: s3://{bucket}/{manifest_key}")
         try:
             # Try to get metadata (faster than downloading)
-            metadata = s3_handler.get_object_metadata(key=manifest_key, bucket_name=bucket)
+            metadata = s3_handler.get_object_metadata(
+                key=manifest_key, bucket_name=bucket
+            )
             if metadata is not None:
                 logger.info(f"Found processing manifest at: {manifest_key}")
                 return manifest_key
         except Exception as e:
             logger.debug(f"Manifest not found at {manifest_key}: {e}")
             continue
-    
-    logger.warning(f"Could not find processing_manifest.json in any expected location for dataset '{dataset_name}'")
+
+    logger.warning(
+        f"Could not find processing_manifest.json in any expected location for dataset '{dataset_name}'"
+    )
     return None
 
+
 def optimize_dtypes(df: pl.DataFrame) -> pl.DataFrame:
     """Optimize DataFrame dtypes to reduce memory usage.
-    
+
     Args:
         df: Input Polars DataFrame
-        
+
     Returns:
         DataFrame with optimized dtypes
     """
     logger.info("Optimizing data types for memory efficiency...")
-    
+
     # Define columns that should remain as specific types
-    string_cols = ['chan', 'unmixed_chan', 'cell_id'] 
-    int_cols = ['spot_id', 'chan_spot_id', 'round']
-    bool_cols = ['valid_spot', 'reassigned', 'unmixed_removed']
-    
+    string_cols = ["chan", "unmixed_chan", "cell_id"]
+    int_cols = ["spot_id", "chan_spot_id", "round"]
+    bool_cols = ["valid_spot", "reassigned", "unmixed_removed"]
+
     # Get current columns
     current_cols = df.columns
-    
+
     # Build casting dictionary
     cast_dict = {}
-    
+
     for col in current_cols:
         if col in string_cols:
             cast_dict[col] = pl.Utf8
         elif col in int_cols:
             # Use smaller int types where possible
-            if col in ['round']:
+            if col in ["round"]:
                 cast_dict[col] = pl.Int8  # rounds typically 1-10
             else:
-                cast_dict[col] = pl.Int32  # spot_ids can be large but usually fit in Int32
+                cast_dict[
+                    col
+                ] = pl.Int32  # spot_ids can be large but usually fit in Int32
         elif col in bool_cols:
             cast_dict[col] = pl.Boolean
         elif df[col].dtype in [pl.Float64, pl.Float32]:
@@ -88,11 +100,15 @@ def optimize_dtypes(df: pl.DataFrame) -> pl.DataFrame:
             max_val = df[col].max()
             min_val = df[col].min()
             if max_val is not None and min_val is not None:
-                if abs(max_val) < 3.4e38 and abs(min_val) < 3.4e38:  # Float32 range
+                if (
+                    abs(max_val) < 3.4e38 and abs(min_val) < 3.4e38
+                ):  # Float32 range
                     cast_dict[col] = pl.Float32
                 else:
-                    cast_dict[col] = pl.Float64  # Keep as Float64 if values are too large
-    
+                    cast_dict[
+                        col
+                    ] = pl.Float64  # Keep as Float64 if values are too large
+
     # Apply casting
     if cast_dict:
         df_optimized = df.cast(cast_dict)
@@ -112,8 +128,8 @@ def merge_spots_tables(spots_mixed, spots_unmixed):
     Returns:
         pl.DataFrame: Merged DataFrame with unmixed_removed column
     """
-    mixed_clean = spots_mixed.drop('spot_id', strict=False)
-    unmixed_clean = spots_unmixed.drop('spot_id', strict=False)
+    mixed_clean = spots_mixed.drop("spot_id", strict=False)
+    unmixed_clean = spots_unmixed.drop("spot_id", strict=False)
 
     # Get columns that are unique to unmixed table
     mixed_cols = set(mixed_clean.columns)
@@ -121,33 +137,41 @@ def merge_spots_tables(spots_mixed, spots_unmixed):
     unique_unmixed_cols = list(unmixed_cols - mixed_cols)
 
     # Keep only merge keys and unique columns from unmixed
-    merge_keys = ['chan', 'chan_spot_id']
+    merge_keys = ["chan", "chan_spot_id"]
     select_cols = merge_keys + unique_unmixed_cols
     unmixed_subset = unmixed_clean.select(select_cols)
-    merged = mixed_clean.join(unmixed_subset, on=merge_keys, how='left')
+    merged = mixed_clean.join(unmixed_subset, on=merge_keys, how="left")
 
     # Add unmixed_removed column - True where any unique unmixed column is null
     if unique_unmixed_cols:
         # Create condition: all unique unmixed columns are null
-        null_conditions = [pl.col(col).is_null() for col in unique_unmixed_cols]
+        null_conditions = [
+            pl.col(col).is_null() for col in unique_unmixed_cols
+        ]
         all_null = pl.fold(True, lambda acc, x: acc & x, null_conditions)
         merged = merged.with_columns(unmixed_removed=all_null)
     else:
         merged = merged.with_columns(unmixed_removed=pl.lit(False))
 
-    merged_with_id = merged.with_row_index(name='spot_id', offset=1)
+    merged_with_id = merged.with_row_index(name="spot_id", offset=1)
     merged_optimized = optimize_dtypes(merged_with_id)
 
     logger.info(f"Merge completed. Final shape: {merged_optimized.shape}")
     return merged_optimized
 
 
-def find_mixed_spots_file(bucket: str, prefix: str, pattern: str) -> Optional[str]:
+def find_mixed_spots_file(
+    bucket: str, prefix: str, pattern: str
+) -> Optional[str]:
     """Finds the first mixed spots file matching the pattern within the prefix."""
-    logger.info(f"Searching for mixed spots pattern '{pattern}' in bucket '{bucket}' with prefix '{prefix}'...")
+    logger.info(
+        f"Searching for mixed spots pattern '{pattern}' in bucket '{bucket}' with prefix '{prefix}'..."
+    )
     try:
         # List objects - consider increasing max_keys if many files share the prefix
-        objects = s3_handler.list_objects(bucket_name=bucket, prefix=prefix, max_keys=200)
+        objects = s3_handler.list_objects(
+            bucket_name=bucket, prefix=prefix, max_keys=200
+        )
         if not objects:
             logger.warning(f"No objects found with prefix '{prefix}'.")
             return None
@@ -161,16 +185,22 @@ def find_mixed_spots_file(bucket: str, prefix: str, pattern: str) -> Optional[st
                 found_files.append(key)
 
         if not found_files:
-            logger.warning(f"No mixed spots files matching pattern '{pattern}' found within the first {len(objects)} objects listed under prefix '{prefix}'.")
+            logger.warning(
+                f"No mixed spots files matching pattern '{pattern}' found within the first {len(objects)} objects listed under prefix '{prefix}'."
+            )
             return None
 
         if len(found_files) > 1:
-             logger.warning(f"Multiple mixed spots files ({len(found_files)}) matching pattern found. Using the first one: {found_files[0]}")
+            logger.warning(
+                f"Multiple mixed spots files ({len(found_files)}) matching pattern found. Using the first one: {found_files[0]}"
+            )
 
-        return found_files[0] # Return the full key of the first match
+        return found_files[0]  # Return the full key of the first match
 
     except Exception as e:
-        logger.error(f"Error listing or searching objects: {e}", exc_info=True) # Log traceback
+        logger.error(
+            f"Error listing or searching objects: {e}", exc_info=True
+        )  # Log traceback
         return None
 
 
@@ -178,34 +208,34 @@ def get_base_pattern_from_unmixed(unmixed_key: str) -> str:
     """Extract the round pattern (e.g., R3) from unmixed_spots_R3_minDist_3.pkl to find mixed_spots_R3.pkl"""
     filename = Path(unmixed_key).name
     # Extract pattern like R3 from unmixed_spots_R3_minDist_3.pkl
-    parts = filename.split('_')
+    parts = filename.split("_")
     for part in parts:
-        if part.startswith('R') and part[1:].isdigit():
+        if part.startswith("R") and part[1:].isdigit():
             return part
-    return 'R3'  # Default fallback
+    return "R3"  # Default fallback
 
 
 def load_and_merge_spots_from_s3(
-    bucket: str, 
-    dataset_name: str, 
-    unmixed_spots_prefix: str, 
-    valid_spots_only: bool = True
+    bucket: str,
+    dataset_name: str,
+    unmixed_spots_prefix: str,
+    valid_spots_only: bool = True,
 ) -> Optional[pl.DataFrame]:
     """
     Load both mixed and unmixed spots files, merge them, cache as parquet, and return merged DataFrame.
-    
+
     Args:
         bucket: S3 bucket name
         dataset_name: Dataset name (used for parquet filename)
         unmixed_spots_prefix: S3 prefix where spots files are located
         valid_spots_only: If True, filter to only valid spots. If False, return all spots.
-        
+
     Returns:
         Merged Polars DataFrame or None if loading failed
     """
     cache_dir = Path("/s3-cache") / bucket / dataset_name
     parquet_file = cache_dir / f"{dataset_name}.parquet"
-    
+
     # Check if merged parquet file already exists
     if parquet_file.exists():
         logger.info(f"Loading merged data from cached parquet: {parquet_file}")
@@ -214,40 +244,54 @@ def load_and_merge_spots_from_s3(
             # Optimize data types and filter for valid spots
             df_optimized = optimize_dtypes(df)
             if valid_spots_only:
-                df_final = df_optimized.filter(pl.col('valid_spot'))
-                logger.info(f"Loaded DataFrame from parquet (valid spots only). Shape: {df_final.shape}")
+                df_final = df_optimized.filter(pl.col("valid_spot"))
+                logger.info(
+                    f"Loaded DataFrame from parquet (valid spots only). Shape: {df_final.shape}"
+                )
             else:
                 df_final = df_optimized
-                logger.info(f"Loaded DataFrame from parquet (all spots). Shape: {df_final.shape}")
+                logger.info(
+                    f"Loaded DataFrame from parquet (all spots). Shape: {df_final.shape}"
+                )
             return df_final
         except Exception as e:
             logger.error(f"Error loading parquet file: {e}", exc_info=True)
             # Fall through to regenerate the file
-    
+
     # Need to download, merge, and cache
-    logger.info(f"Parquet file not found or corrupted. Downloading and merging spots files...")
-    
+    logger.info(
+        f"Parquet file not found or corrupted. Downloading and merging spots files..."
+    )
+
     # 1. Find unmixed spots file
-    unmixed_key = find_unmixed_spots_file(bucket, unmixed_spots_prefix, "unmixed_spots_*.pkl")
+    unmixed_key = find_unmixed_spots_file(
+        bucket, unmixed_spots_prefix, "unmixed_spots_*.pkl"
+    )
     if not unmixed_key:
-        logger.error(f"Could not find unmixed spots file in {unmixed_spots_prefix}")
+        logger.error(
+            f"Could not find unmixed spots file in {unmixed_spots_prefix}"
+        )
         return None
-    
+
     # 2. Find mixed spots file based on pattern from unmixed file
     base_pattern = get_base_pattern_from_unmixed(unmixed_key)
     mixed_pattern = f"mixed_spots_{base_pattern}.pkl"
-    mixed_key = find_mixed_spots_file(bucket, unmixed_spots_prefix, mixed_pattern)
+    mixed_key = find_mixed_spots_file(
+        bucket, unmixed_spots_prefix, mixed_pattern
+    )
     if not mixed_key:
-        logger.error(f"Could not find mixed spots file matching pattern {mixed_pattern} in {unmixed_spots_prefix}")
+        logger.error(
+            f"Could not find mixed spots file matching pattern {mixed_pattern} in {unmixed_spots_prefix}"
+        )
         return None
-    
+
     logger.info(f"Found unmixed file: {unmixed_key}")
     logger.info(f"Found mixed file: {mixed_key}")
-    
+
     # 3. Download both files to /tmp
     with tempfile.TemporaryDirectory() as tmp_dir:
         tmp_dir_path = Path(tmp_dir)
-        
+
         # Download unmixed file
         unmixed_tmp_path = tmp_dir_path / f"unmixed_{os.getpid()}.pkl"
         logger.info(f"Downloading unmixed file to {unmixed_tmp_path}")
@@ -255,12 +299,12 @@ def load_and_merge_spots_from_s3(
             key=unmixed_key,
             bucket_name=bucket,
             local_path=str(unmixed_tmp_path),
-            use_cache=False
+            use_cache=False,
         )
         if not unmixed_local:
             logger.error("Failed to download unmixed spots file")
             return None
-        
+
         # Download mixed file
         mixed_tmp_path = tmp_dir_path / f"mixed_{os.getpid()}.pkl"
         logger.info(f"Downloading mixed file to {mixed_tmp_path}")
@@ -268,19 +312,19 @@ def load_and_merge_spots_from_s3(
             key=mixed_key,
             bucket_name=bucket,
             local_path=str(mixed_tmp_path),
-            use_cache=False
+            use_cache=False,
         )
         if not mixed_local:
             logger.error("Failed to download mixed spots file")
             return None
-        
+
         # 4. Load both DataFrames using Polars (via pandas for pickle support)
         try:
             logger.info("Loading unmixed spots DataFrame...")
             df_unmixed_pd = pd.read_pickle(unmixed_local)
             df_unmixed = pl.from_pandas(df_unmixed_pd)
             logger.info(f"Loaded unmixed DataFrame. Shape: {df_unmixed.shape}")
-            
+
             logger.info("Loading mixed spots DataFrame...")
             df_mixed_pd = pd.read_pickle(mixed_local)
             df_mixed = pl.from_pandas(df_mixed_pd)
@@ -288,7 +332,7 @@ def load_and_merge_spots_from_s3(
         except Exception as e:
             logger.error(f"Error loading pickle files: {e}", exc_info=True)
             return None
-        
+
         # 5. Merge the DataFrames
         try:
             logger.info("Merging DataFrames...")
@@ -302,30 +346,40 @@ def load_and_merge_spots_from_s3(
         try:
             # Ensure cache directory exists
             cache_dir.mkdir(parents=True, exist_ok=True)
-            
+
             logger.info(f"Saving merged DataFrame to parquet: {parquet_file}")
-            df_merged.write_parquet(parquet_file, compression='snappy')
+            df_merged.write_parquet(parquet_file, compression="snappy")
             logger.info(f"Successfully saved merged data to {parquet_file}")
         except Exception as e:
             logger.error(f"Error saving parquet file: {e}", exc_info=True)
             # Continue anyway - we have the data in memory
-        
+
         # 8. Filter for valid spots (if requested) and return
         if valid_spots_only:
-            df_final = df_merged.filter(pl.col('valid_spot'))
-            logger.info(f"Returning valid spots DataFrame. Shape: {df_final.shape}")
+            df_final = df_merged.filter(pl.col("valid_spot"))
+            logger.info(
+                f"Returning valid spots DataFrame. Shape: {df_final.shape}"
+            )
         else:
             df_final = df_merged
-            logger.info(f"Returning all spots DataFrame. Shape: {df_final.shape}")
+            logger.info(
+                f"Returning all spots DataFrame. Shape: {df_final.shape}"
+            )
         return df_final
 
 
-def find_unmixed_spots_file(bucket: str, prefix: str, pattern: str) -> Optional[str]:
+def find_unmixed_spots_file(
+    bucket: str, prefix: str, pattern: str
+) -> Optional[str]:
     """Finds the first S3 object key matching the pattern within the prefix."""
-    logger.info(f"Searching for pattern '{pattern}' in bucket '{bucket}' with prefix '{prefix}'...")
+    logger.info(
+        f"Searching for pattern '{pattern}' in bucket '{bucket}' with prefix '{prefix}'..."
+    )
     try:
         # List objects - consider increasing max_keys if many files share the prefix
-        objects = s3_handler.list_objects(bucket_name=bucket, prefix=prefix, max_keys=200)
+        objects = s3_handler.list_objects(
+            bucket_name=bucket, prefix=prefix, max_keys=200
+        )
         if not objects:
             logger.warning(f"No objects found with prefix '{prefix}'.")
             return None
@@ -339,23 +393,32 @@ def find_unmixed_spots_file(bucket: str, prefix: str, pattern: str) -> Optional[
                 found_files.append(key)
 
         if not found_files:
-            logger.warning(f"No files matching pattern '{pattern}' found within the first {len(objects)} objects listed under prefix '{prefix}'.")
+            logger.warning(
+                f"No files matching pattern '{pattern}' found within the first {len(objects)} objects listed under prefix '{prefix}'."
+            )
             # Consider adding logic here to list more objects if needed (pagination)
             return None
 
         if len(found_files) > 1:
-             logger.warning(f"Multiple files ({len(found_files)}) matching pattern found. Using the first one: {found_files[0]}")
+            logger.warning(
+                f"Multiple files ({len(found_files)}) matching pattern found. Using the first one: {found_files[0]}"
+            )
 
-        return found_files[0] # Return the full key of the first match
+        return found_files[0]  # Return the full key of the first match
 
     except Exception as e:
-        logger.error(f"Error listing or searching objects: {e}", exc_info=True) # Log traceback
+        logger.error(
+            f"Error listing or searching objects: {e}", exc_info=True
+        )  # Log traceback
         return None
 
-def find_related_files(bucket: str, prefix: str, spots_file: str) -> Dict[str, str]:
+
+def find_related_files(
+    bucket: str, prefix: str, spots_file: str
+) -> Dict[str, str]:
     """
     Find related ratios.txt and summary_stats.csv files based on the unmixed spots file pattern.
-    
+
     Parameters:
     -----------
     bucket: str
@@ -364,55 +427,58 @@ def find_related_files(bucket: str, prefix: str, spots_file: str) -> Dict[str, s
         S3 prefix (folder path)
     spots_file: str
         Full key of the spots file that was found
-    
+
     Returns:
     --------
     Dict[str, str]
         Dictionary with keys 'ratios' and 'summary_stats' pointing to file keys if found
     """
-    result = {'ratios': None, 'summary_stats': None}
-    
+    result = {"ratios": None, "summary_stats": None}
+
     try:
         # Extract base filename without extension
         spots_filename = Path(spots_file).stem
-        base_pattern = spots_filename.replace('unmixed_spots', '*')
-        
+        base_pattern = spots_filename.replace("unmixed_spots", "*")
+
         # List objects in the same directory
-        objects = s3_handler.list_objects(bucket_name=bucket, prefix=prefix, max_keys=200)
+        objects = s3_handler.list_objects(
+            bucket_name=bucket, prefix=prefix, max_keys=200
+        )
         print(objects)
-        
+
         # Look for ratios.txt
         for key in objects:
             filename = Path(key).name
-            if '_ratios.txt' in filename:
+            if "_ratios.txt" in filename:
                 logger.info(f"Found ratios file: {key}")
-                result['ratios'] = key
+                result["ratios"] = key
                 break
-                
+
         # Look for summary_stats.csv
         for key in objects:
             filename = Path(key).name
-            if 'summary_stats.csv' in filename:
+            if "summary_stats.csv" in filename:
                 logger.info(f"Found summary stats file: {key}")
-                result['summary_stats'] = key
+                result["summary_stats"] = key
                 break
-    
+
     except Exception as e:
         logger.error(f"Error finding related files: {e}", exc_info=True)
-    
+
     return result
 
+
 def load_ratios_from_s3(bucket: str, key: str) -> Optional[np.ndarray]:
     """
     Load a ratios.txt file from S3.
-    
+
     Parameters:
     -----------
     bucket: str
         S3 bucket name
     key: str
         S3 key for the ratios file
-    
+
     Returns:
     --------
     Optional[np.ndarray]
@@ -421,43 +487,46 @@ def load_ratios_from_s3(bucket: str, key: str) -> Optional[np.ndarray]:
     if not key:
         logger.warning("No ratios file key provided")
         return None
-        
+
     logger.info(f"Loading ratios from s3://{bucket}/{key}")
-    
+
     try:
         # Download the file content
         content = s3_handler.get_object(key=key, bucket_name=bucket)
         if content is None:
             logger.error(f"Failed to get object content for {key}")
             return None
-            
+
         # Parse the content as a matrix of numbers
-        content_str = content.decode('utf-8')
-        rows = content_str.strip().split('\n')
+        content_str = content.decode("utf-8")
+        rows = content_str.strip().split("\n")
         ratios_matrix = []
-        
+
         for row in rows:
             # Split by tabs and convert to integers
             values = [int(val) for val in row.strip().split()]
             ratios_matrix.append(values)
-            
+
         return np.array(ratios_matrix)
-        
+
     except Exception as e:
         logger.error(f"Error loading ratios file: {e}", exc_info=True)
         return None
 
-def load_summary_stats_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
+
+def load_summary_stats_from_s3(
+    bucket: str, key: str
+) -> Optional[pd.DataFrame]:
     """
     Load a summary_stats.csv file from S3.
-    
+
     Parameters:
     -----------
     bucket: str
         S3 bucket name
     key: str
         S3 key for the summary stats file
-    
+
     Returns:
     --------
     Optional[pd.DataFrame]
@@ -466,26 +535,26 @@ def load_summary_stats_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
     if not key:
         logger.warning("No summary stats file key provided")
         return None
-        
+
     logger.info(f"Loading summary stats from s3://{bucket}/{key}")
-    
+
     try:
         # Download the file content
         content = s3_handler.get_object(key=key, bucket_name=bucket)
         if content is None:
             logger.error(f"Failed to get object content for {key}")
             return None
-            
+
         # Parse CSV
         df = pd.read_csv(io.BytesIO(content))
-        
+
         # Add 'removed_spots' column
-        if 'total_spots' in df.columns and 'kept_spots' in df.columns:
-            df['removed_spots'] = df['total_spots'] - df['kept_spots']
-            df['unchanged_spots'] = df['kept_spots'] - df['reassigned_spots']
+        if "total_spots" in df.columns and "kept_spots" in df.columns:
+            df["removed_spots"] = df["total_spots"] - df["kept_spots"]
+            df["unchanged_spots"] = df["kept_spots"] - df["reassigned_spots"]
 
         return df
-        
+
     except Exception as e:
         logger.error(f"Error loading summary stats file: {e}", exc_info=True)
         return None
@@ -497,28 +566,41 @@ def get_s3_object_size(bucket: str, key: str) -> Optional[int]:
     try:
         # Use the get_object_metadata method (assumes it was added to S3Handler)
         # Check if the method exists before calling
-        if not hasattr(s3_handler, 'get_object_metadata'):
-             logger.error("Error: S3Handler instance does not have 'get_object_metadata' method. Please add it to s3_handler.py.")
-             return None
+        if not hasattr(s3_handler, "get_object_metadata"):
+            logger.error(
+                "Error: S3Handler instance does not have 'get_object_metadata' method. Please add it to s3_handler.py."
+            )
+            return None
 
         metadata = s3_handler.get_object_metadata(key=key, bucket_name=bucket)
 
-        if metadata and 'ContentLength' in metadata and metadata['ContentLength'] is not None:
-            size_bytes = metadata['ContentLength']
+        if (
+            metadata
+            and "ContentLength" in metadata
+            and metadata["ContentLength"] is not None
+        ):
+            size_bytes = metadata["ContentLength"]
             # Convert size to MB for readability
             size_mb = size_bytes / (1024 * 1024)
             logger.info(f"Object size: {size_bytes} bytes ({size_mb:.2f} MB)")
             return size_bytes
         else:
-            logger.warning(f"Could not retrieve valid 'ContentLength' metadata for {key}. Metadata received: {metadata}")
+            logger.warning(
+                f"Could not retrieve valid 'ContentLength' metadata for {key}. Metadata received: {metadata}"
+            )
             return None
     except Exception as e:
-        logger.error(f"Error getting object metadata for {key}: {e}", exc_info=True)
+        logger.error(
+            f"Error getting object metadata for {key}: {e}", exc_info=True
+        )
         return None
 
+
 def load_pkl_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
     """Loads a pickle file from S3 into a pandas DataFrame, using local caching."""
-    logger.info(f"Attempting to load pickle file: s3://{bucket}/{key} (using cache)")
+    logger.info(
+        f"Attempting to load pickle file: s3://{bucket}/{key} (using cache)"
+    )
 
     # 1. Get object size for context (optional, still useful)
     get_s3_object_size(bucket, key)
@@ -533,11 +615,13 @@ def load_pkl_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
         if local_file_path is None:
             logger.error("Failed to download or retrieve file from cache.")
             return None
-        
+
         logger.info(f"File available locally at: {local_file_path}")
 
     except Exception as e:
-        logger.error(f"Error during file download/cache check: {e}", exc_info=True)
+        logger.error(
+            f"Error during file download/cache check: {e}", exc_info=True
+        )
         return None
 
     # 3. Load the pickle data using pandas from the local path
@@ -546,22 +630,32 @@ def load_pkl_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
     try:
         df = pd.read_pickle(local_file_path)
         n_all = df.shape[0]
-        df = df[df['valid_spot'] == True]
+        df = df[df["valid_spot"] == True]
         n_valid = df.shape[0]
         logger.info(f"Successfully loaded DataFrame. Shape: {df.shape}")
         logger.info(f"Total spots: {n_all}, Valid spots: {n_valid}")
         return df
     except pd.errors.EmptyDataError:
-         logger.error(f"Error loading pickle from {local_file_path}: The file seems to be empty or contains no data.")
-         return None
+        logger.error(
+            f"Error loading pickle from {local_file_path}: The file seems to be empty or contains no data."
+        )
+        return None
     except FileNotFoundError:
-         logger.error(f"Error loading pickle: Local file not found at {local_file_path} (should not happen if download succeeded)." )
-         return None
+        logger.error(
+            f"Error loading pickle: Local file not found at {local_file_path} (should not happen if download succeeded)."
+        )
+        return None
     except Exception as e:
-        logger.error(f"Error loading pickle data from {local_file_path}: {e}", exc_info=True)
+        logger.error(
+            f"Error loading pickle data from {local_file_path}: {e}",
+            exc_info=True,
+        )
         return None
 
-def load_processing_manifest_from_s3(bucket: str, key: str) -> Optional[Dict[str, Any]]:
+
+def load_processing_manifest_from_s3(
+    bucket: str, key: str
+) -> Optional[Dict[str, Any]]:
     """
     Loads a processing_manifest.json file from S3.
 
@@ -591,13 +685,17 @@ def load_processing_manifest_from_s3(bucket: str, key: str) -> Optional[Dict[str
             return None
 
         # Parse the JSON content
-        manifest_data = json.loads(content.decode('utf-8'))
-        logger.info(f"Successfully loaded and parsed processing manifest: {key}")
+        manifest_data = json.loads(content.decode("utf-8"))
+        logger.info(
+            f"Successfully loaded and parsed processing manifest: {key}"
+        )
         return manifest_data
 
     except json.JSONDecodeError as e:
         logger.error(f"Error decoding JSON from {key}: {e}", exc_info=True)
         return None
     except Exception as e:
-        logger.error(f"Error loading processing manifest file: {e}", exc_info=True)
-        return None
\ No newline at end of file
+        logger.error(
+            f"Error loading processing manifest file: {e}", exc_info=True
+        )
+        return None

From 8c6aab56772a2ce27a3f954e901c86217e8d7335 Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 11:32:39 -0800
Subject: [PATCH 04/20] refactor: cleanup

---
 src/see_spot/app.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 4d4ef88..942a012 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -52,7 +52,6 @@
     "spot_channels_from_manifest": None,
     "sankey_data": None,  # Cache Sankey data to avoid recalculation
     "unmixed_spots_filename": None  # Store unmixed spots filename for neuroglancer logic
-    "unmixed_spots_filename": None  # Store unmixed spots filename for neuroglancer logic
 }
 
 
@@ -671,7 +670,7 @@ async def download_dataset(request: Request):
                 status_code=404, 
                 content={
                     "error": "Spots data file not found",
-                    "checked_path": f"s3://{S3_BUCKET}/{spots_key}unmixed_spots_*.pkl"
+                    "checked_path": f"s3://{S3_BUCKET}/{spots_key}unmixed_spots_*.pkl"}
                 
             )
         

From ab94721a9e63c6fc71518f682b6c5fc0f4b79185 Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 11:45:05 -0800
Subject: [PATCH 05/20] feat: add support for -1 round in base_pattern

---
 src/see_spot/s3_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index 10b2ae5..feca955 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -210,7 +210,8 @@ def get_base_pattern_from_unmixed(unmixed_key: str) -> str:
     # Extract pattern like R3 from unmixed_spots_R3_minDist_3.pkl
     parts = filename.split("_")
     for part in parts:
-        if part.startswith("R") and part[1:].isdigit():
+        # add support for R-1 (default round for datasets without metadata)
+        if part.startswith("R") and (part[1:].isdigit() or part[1:] == '-1'): 
             return part
     return "R3"  # Default fallback
 

From 390d67fd37ce950b3f1fa45517efc4f50b12e30e Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 11:52:41 -0800
Subject: [PATCH 06/20] bugfix: change mixed pattern

---
 src/see_spot/s3_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index feca955..85747f4 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -276,7 +276,7 @@ def load_and_merge_spots_from_s3(
 
     # 2. Find mixed spots file based on pattern from unmixed file
     base_pattern = get_base_pattern_from_unmixed(unmixed_key)
-    mixed_pattern = f"mixed_spots_{base_pattern}.pkl"
+    mixed_pattern = f"mixed_spots_{base_pattern}*.pkl"
     mixed_key = find_mixed_spots_file(
         bucket, unmixed_spots_prefix, mixed_pattern
     )

From adfa208a87d9ed42b1c1bcbdb557c955c17640b5 Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 11:57:46 -0800
Subject: [PATCH 07/20] feat: only search image_spot_spectral_unmixing top
 level

---
 src/see_spot/s3_utils.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index 85747f4..19d9df4 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -163,7 +163,7 @@ def merge_spots_tables(spots_mixed, spots_unmixed):
 def find_mixed_spots_file(
     bucket: str, prefix: str, pattern: str
 ) -> Optional[str]:
-    """Finds the first mixed spots file matching the pattern within the prefix."""
+    """Finds the first mixed spots file matching the pattern within the prefix (top level only, non-recursive)."""
     logger.info(
         f"Searching for mixed spots pattern '{pattern}' in bucket '{bucket}' with prefix '{prefix}'..."
     )
@@ -178,6 +178,13 @@ def find_mixed_spots_file(
 
         found_files = []
         for key in objects:
+            # Skip objects in subdirectories - only check files at the top level
+            # Remove the prefix and check if there are any additional slashes
+            relative_path = key[len(prefix):] if key.startswith(prefix) else key
+            # If there's a slash in the relative path, it's in a subdirectory
+            if '/' in relative_path.lstrip('/'):
+                continue
+            
             # Use Pathlib to easily get the filename part of the key
             filename = Path(key).name
             if fnmatch.fnmatch(filename, pattern):
@@ -372,7 +379,7 @@ def load_and_merge_spots_from_s3(
 def find_unmixed_spots_file(
     bucket: str, prefix: str, pattern: str
 ) -> Optional[str]:
-    """Finds the first S3 object key matching the pattern within the prefix."""
+    """Finds the first S3 object key matching the pattern within the prefix (top level only, non-recursive)."""
     logger.info(
         f"Searching for pattern '{pattern}' in bucket '{bucket}' with prefix '{prefix}'..."
     )
@@ -387,6 +394,13 @@ def find_unmixed_spots_file(
 
         found_files = []
         for key in objects:
+            # Skip objects in subdirectories - only check files at the top level
+            # Remove the prefix and check if there are any additional slashes
+            relative_path = key[len(prefix):] if key.startswith(prefix) else key
+            # If there's a slash in the relative path, it's in a subdirectory
+            if '/' in relative_path.lstrip('/'):
+                continue
+            
             # Use Pathlib to easily get the filename part of the key
             filename = Path(key).name
             if fnmatch.fnmatch(filename, pattern):

From 44ef2d630b16c87408476b1ab5825e7eff3b4e1d Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 12:22:12 -0800
Subject: [PATCH 08/20] fix: logic in create_neuroglancer_link()

---
 src/see_spot/app.py      | 3 +--
 src/see_spot/ng_utils.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 942a012..bdeb879 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -528,8 +528,7 @@ async def create_neuroglancer_link(request: Request):
     spot_id = data.get("spot_id")
     position = data.get("position")
     point_annotation = data.get("point_annotation")
-    if not position or not point_annotation or not spot_id:
-        annotation_color = data.get("annotation_color", "#FFFF00")
+    annotation_color = data.get("annotation_color", "#FFFF00")
     
     # Input validation
     if not position or not point_annotation or not spot_id:
diff --git a/src/see_spot/ng_utils.py b/src/see_spot/ng_utils.py
index 49e6498..a17c15f 100644
--- a/src/see_spot/ng_utils.py
+++ b/src/see_spot/ng_utils.py
@@ -255,7 +255,7 @@ def create_link_from_json(
     position,
     spot_id,
     point_annotation,
-    annotation_color="#FFFF00",
+    annotation_color="#FF0000",
     spacing=3.0,
     cross_section_scale=None,
     base_url="https://neuroglancer-demo.appspot.com",

From 30f8a596524b1fd6e0ac13b7f81f5d020c97e4ba Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 12:36:07 -0800
Subject: [PATCH 09/20] add some logging to ng function in app.py

---
 src/see_spot/app.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index bdeb879..4050100 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -537,8 +537,10 @@ async def create_neuroglancer_link(request: Request):
             content={"error": "Missing required parameters: position, point_annotation, or spot_id"}
         )
     # Check if we should use the JSON-based method (when "merged" is in the pkl filename)
-    unmixed_spots_filename = df_cache.get("unmixed_spots_filename") or ""
+    unmixed_spots_filename = df_cache.get("unmixed_spots_filename", "no filename found")
+    logger.info(f"Unmixed spots filename: {unmixed_spots_filename}")
     use_json_method = "merged" in unmixed_spots_filename.lower()
+    logger.info(f"Using JSON-based method: {use_json_method}")
     try: 
         if use_json_method:
                 # Use the JSON-based method for merged datasets

From 272182ee0176b4f76e9561c2e1659f065473eb6f Mon Sep 17 00:00:00 2001
From: Carson Berry <carson.berry@carsonb-ux1.corp.alleninstitute.org>
Date: Fri, 7 Nov 2025 14:55:11 -0800
Subject: [PATCH 10/20] try except for ng mode creation instead of using
 filename

---
 src/see_spot/app.py | 53 +++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 4050100..58f839f 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -521,8 +521,18 @@ async def get_real_spots_data(
 async def create_neuroglancer_link(request: Request):
     """Creates a neuroglancer link with a point annotation at specified coordinates."""
     # Parse the JSON data from the request
+    """const requestData = {
+            fused_s3_paths: fusedS3Paths,
+            position: [details.x, details.y, details.z, 0],
+            point_annotation: [details.x, details.y, details.z, 0.5, 0],
+            cell_id: details.cell_id || 42,
+            spot_id: spotId,
+            annotation_color: "#FFFF00",
+            cross_section_scale: 0.2
+        };"""
     data = await request.json()
     
+    
     # Extract the parameters from the request
     cross_section_scale = data.get("cross_section_scale", "0.135")
     spot_id = data.get("spot_id")
@@ -539,28 +549,29 @@ async def create_neuroglancer_link(request: Request):
     # Check if we should use the JSON-based method (when "merged" is in the pkl filename)
     unmixed_spots_filename = df_cache.get("unmixed_spots_filename", "no filename found")
     logger.info(f"Unmixed spots filename: {unmixed_spots_filename}")
-    use_json_method = "merged" in unmixed_spots_filename.lower()
-    logger.info(f"Using JSON-based method: {use_json_method}")
+    # use_json_method = "merged" in unmixed_spots_filename.lower()
+    # logger.info(f"Using JSON-based method: {use_json_method}")
     try: 
-        if use_json_method:
-                # Use the JSON-based method for merged datasets
-                logger.info(f"Using create_link_from_json method for merged dataset (filename: {unmixed_spots_filename})")
-                
-                # Construct the neuroglancer JSON path
-                ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/phase_correlation_stitching_neuroglancer.json"
-                logger.info(f"Neuroglancer JSON path: {ng_json_path}")
-                
-                # Create the neuroglancer link from JSON
-                ng_link = ng_utils.create_link_from_json(
-                    ng_json_path=ng_json_path,
-                    position=position,
-                    spot_id=spot_id,
-                    point_annotation=point_annotation,
-                    annotation_color=annotation_color,
-                    spacing=3.0,
-                    cross_section_scale=cross_section_scale
-                )
-        else:
+        # if use_json_method:
+        try: 
+            # Use the JSON-based method for merged datasets
+            logger.info(f"Using create_link_from_json method for merged dataset (filename: {unmixed_spots_filename})")
+            
+            # Construct the neuroglancer JSON path
+            ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/phase_correlation_stitching_neuroglancer.json"
+            logger.info(f"Neuroglancer JSON path: {ng_json_path}")
+            
+            # Create the neuroglancer link from JSON
+            ng_link = ng_utils.create_link_from_json(
+                ng_json_path=ng_json_path,
+                position=position,
+                spot_id=spot_id,
+                point_annotation=point_annotation,
+                annotation_color=annotation_color,
+                spacing=3.0,
+                cross_section_scale=cross_section_scale
+            )
+        except Exception as e:
             # Use the traditional method for non-merged datasets
             logger.info(f"Using create_link_no_upload method for non-merged dataset (filename: {unmixed_spots_filename})")
             fused_s3_paths = data.get("fused_s3_paths")

From 54b0ed7fe04edf8fc2fcf9614123a86bbae0eee8 Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Fri, 7 Nov 2025 16:43:40 -0800
Subject: [PATCH 11/20] feat: fixes for single channel

---
 src/see_spot/app.py            | 195 +++++++++++++++++++++++----------
 src/see_spot/logging_config.py |  57 ++++++++++
 src/see_spot/ng_utils.py       |  41 +++----
 src/sessions.db                | Bin 0 -> 16384 bytes
 tests/test_dataset_loading.py  |  76 +++++++++++++
 5 files changed, 288 insertions(+), 81 deletions(-)
 create mode 100644 src/see_spot/logging_config.py
 create mode 100644 src/sessions.db
 create mode 100644 tests/test_dataset_loading.py

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 58f839f..201289c 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -2,14 +2,11 @@
 from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
-import numpy as np
-import pandas as pd
-from datetime import datetime, timedelta
+from datetime import datetime
 from see_spot import ng_utils
 import uvicorn
 import logging
 import os
-import json
 from pathlib import Path
 import polars as pl
 import itertools
@@ -17,6 +14,7 @@
 
 # Import your modules
 from see_spot.s3_handler import s3_handler
+from see_spot.logging_config import setup_logging
 from see_spot.s3_utils import (
     find_unmixed_spots_file, find_related_files,
     load_ratios_from_s3, load_summary_stats_from_s3,
@@ -24,7 +22,8 @@
     find_processing_manifest
 )
 
-logging.basicConfig(level=logging.INFO)
+# Initialize logging using central utility (idempotent)
+setup_logging(os.getenv("SEE_SPOT_LOG_LEVEL", "INFO"))
 logger = logging.getLogger(__name__)
 
 app = FastAPI()
@@ -61,6 +60,7 @@ def get_channel_pairs(df: pl.DataFrame) -> List[Tuple[str, str]]:
     channels = sorted([col.split('_')[1] for col in intensity_cols])
     return list(itertools.combinations(channels, 2))
 
+
 def calculate_sankey_data_from_polars(df_polars: pl.DataFrame) -> Dict[str, Any]:
     """
     Calculate Sankey diagram data directly from Polars DataFrame for maximum performance.
@@ -421,17 +421,35 @@ async def get_real_spots_data(
     detail_cols = ['spot_id', 'cell_id', 'round', 'z', 'y', 'x']
     available_detail_cols = [col for col in detail_cols if col in plot_df.columns]
     
-    if len(available_detail_cols) > 1:
-        logger.info(f"Creating spot_details with columns: {available_detail_cols}")
+    # Normalize spot_id to int if float to avoid '63.0' style keys
+    if 'spot_id' in plot_df.columns and plot_df['spot_id'].dtype.kind == 'f':
+        if plot_df['spot_id'].isna().any():
+            logger.warning("spot_id column has NaNs before coercion; keys may be inconsistent")
+        logger.info("Coercing float spot_id column to int64 for clean spot_details keys")
+        try:
+            plot_df['spot_id'] = plot_df['spot_id'].astype('int64')
+        except Exception as e:
+            logger.error(f"Failed coercing spot_id to int64: {e}")
+
+    if len(available_detail_cols) >= 1:
+        logger.info(f"Building spot_details from columns: {available_detail_cols}")
         spot_details_df = plot_df[available_detail_cols].copy()
-        
-        spot_details = {
-            str(row['spot_id']): {
-                col: row[col] for col in available_detail_cols if col != 'spot_id'
+        # Ensure spot_id is present
+        if 'spot_id' not in spot_details_df.columns:
+            logger.warning("spot_id column missing; cannot build spot_details")
+            spot_details = {}
+        else:
+            spot_details = {
+                str(int(row['spot_id'])): {
+                    col: row[col] for col in available_detail_cols if col != 'spot_id'
+                }
+                for _, row in spot_details_df.iterrows()
             }
-            for _, row in spot_details_df.iterrows()
-        }
-        logger.info(f"Created spot_details dictionary with {len(spot_details)} entries")
+            logger.info(
+                "spot_details built: %d entries | sample keys: %s",
+                len(spot_details),
+                list(spot_details.keys())[:5]
+            )
     else:
         spot_details = {}
         logger.warning("Could not create spot_details: required columns not found in DataFrame")
@@ -531,8 +549,6 @@ async def create_neuroglancer_link(request: Request):
             cross_section_scale: 0.2
         };"""
     data = await request.json()
-    
-    
     # Extract the parameters from the request
     cross_section_scale = data.get("cross_section_scale", "0.135")
     spot_id = data.get("spot_id")
@@ -546,44 +562,100 @@ async def create_neuroglancer_link(request: Request):
             status_code=400,
             content={"error": "Missing required parameters: position, point_annotation, or spot_id"}
         )
-    # Check if we should use the JSON-based method (when "merged" is in the pkl filename)
-    unmixed_spots_filename = df_cache.get("unmixed_spots_filename", "no filename found")
-    logger.info(f"Unmixed spots filename: {unmixed_spots_filename}")
-    # use_json_method = "merged" in unmixed_spots_filename.lower()
-    # logger.info(f"Using JSON-based method: {use_json_method}")
-    try: 
-        # if use_json_method:
-        try: 
-            # Use the JSON-based method for merged datasets
-            logger.info(f"Using create_link_from_json method for merged dataset (filename: {unmixed_spots_filename})")
-            
-            # Construct the neuroglancer JSON path
-            ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/phase_correlation_stitching_neuroglancer.json"
-            logger.info(f"Neuroglancer JSON path: {ng_json_path}")
-            
-            # Create the neuroglancer link from JSON
-            ng_link = ng_utils.create_link_from_json(
-                ng_json_path=ng_json_path,
-                position=position,
-                spot_id=spot_id,
-                point_annotation=point_annotation,
-                annotation_color=annotation_color,
-                spacing=3.0,
-                cross_section_scale=cross_section_scale
+    # Enhanced logging & dynamic JSON path logic
+    unmixed_spots_filename = df_cache.get("unmixed_spots_filename", "<unknown>")
+    logger.info(
+        "Neuroglancer link request | spot_id=%s | unmixed_spots_file=%s | json_override_env=%s",
+        spot_id,
+        unmixed_spots_filename,
+        os.getenv("SEE_SPOT_NG_JSON_NAME", "<unset>")
+    )
+    logger.debug(
+        "Raw request data keys: %s", ",".join(sorted(data.keys()))
+    )
+    logger.debug(
+        "Position=%s point_annotation=%s annotation_color=%s cross_section_scale=%s",
+        position,
+        point_annotation,
+        annotation_color,
+        cross_section_scale,
+    )
+
+    # Determine JSON file name (env override allowed) and full S3 path
+    ng_json_filename = os.getenv(
+        "SEE_SPOT_NG_JSON_NAME", "phase_correlation_stitching_neuroglancer.json"
+    )
+    ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/{ng_json_filename}"
+    s3_key_for_json = f"{DATA_PREFIX}/{ng_json_filename}"  # key relative to bucket
+    logger.info("Constructed Neuroglancer JSON path: %s", ng_json_path)
+
+    # Check existence of JSON on S3 (metadata only) for better diagnostics
+    json_metadata = None
+    try:
+        json_metadata = s3_handler.get_object_metadata(
+            s3_key_for_json, bucket_name=S3_BUCKET
+        )
+        if json_metadata:
+            logger.info(
+                "Neuroglancer JSON found on S3 (ContentLength=%s LastModified=%s)",
+                json_metadata.get("ContentLength"),
+                json_metadata.get("LastModified"),
             )
-        except Exception as e:
-            # Use the traditional method for non-merged datasets
-            logger.info(f"Using create_link_no_upload method for non-merged dataset (filename: {unmixed_spots_filename})")
-            fused_s3_paths = data.get("fused_s3_paths")
-            cell_id = data.get("cell_id", 42)  # Default value if not provided
+        else:
+            logger.warning(
+                "Neuroglancer JSON NOT found on S3: s3://%s/%s", S3_BUCKET, s3_key_for_json
+            )
+    except Exception as meta_err:
+        logger.error(
+            "Error checking Neuroglancer JSON metadata: %s", meta_err, exc_info=True
+        )
+
+    # Decide strategy: prefer JSON-based method when file exists; fall back otherwise
+    use_json_method = json_metadata is not None or "merged" in unmixed_spots_filename.lower()
+    logger.info("Use JSON method decision: %s", use_json_method)
+
+    try:
+        if use_json_method:
+            logger.info(
+                "Attempting create_link_from_json(spot_id=%s, path=%s)",
+                spot_id,
+                ng_json_path,
+            )
+            try:
+                ng_link = ng_utils.create_link_from_json(
+                    ng_json_path=ng_json_path,
+                    position=position,
+                    spot_id=spot_id,
+                    point_annotation=point_annotation,
+                    annotation_color=annotation_color,
+                    spacing=3.0,
+                    cross_section_scale=cross_section_scale,
+                )
+                logger.info("Successfully built Neuroglancer link from JSON")
+            except Exception as json_err:
+                logger.error(
+                    "JSON link creation failed: %s | Falling back to direct method",
+                    json_err,
+                    exc_info=True,
+                )
+                use_json_method = False  # force fallback
 
+        if not use_json_method:
+            logger.info("Falling back to create_link_no_upload() pathway")
+            fused_s3_paths = data.get("fused_s3_paths")
+            cell_id = data.get("cell_id", 42)
             if not fused_s3_paths:
+                logger.error(
+                    "Missing fused_s3_paths for fallback method; cannot proceed"
+                )
                 return JSONResponse(
                     status_code=400,
-                    content={"error": "Missing required parameter: fused_s3_paths (required for non-merged datasets)"}
+                    content={
+                        "error": "Missing required parameter: fused_s3_paths for fallback Neuroglancer generation",
+                        "attempted_json_path": ng_json_path,
+                        "json_exists": json_metadata is not None,
+                    },
                 )
-            
-            # Create the neuroglancer link
             ng_link = ng_utils.create_link_no_upload(
                 fused_s3_paths,
                 annotation_color=annotation_color,
@@ -591,15 +663,22 @@ async def create_neuroglancer_link(request: Request):
                 cell_id=cell_id,
                 spot_id=spot_id,
                 position=position,
-                point_annotation=point_annotation
+                point_annotation=point_annotation,
             )
-            
-        return {"url": ng_link}
+            logger.info(
+                "Successfully built Neuroglancer link via fallback direct method"
+            )
+
+        return {"url": ng_link, "used_json_method": use_json_method}
     except Exception as e:
-        logger.error(f"Error creating neuroglancer link: {str(e)}")
+        logger.error("Error creating neuroglancer link: %s", e, exc_info=True)
         return JSONResponse(
             status_code=500,
-            content={"error": f"Failed to create neuroglancer link: {str(e)}"}
+            content={
+                "error": f"Failed to create neuroglancer link: {str(e)}",
+                "attempted_json_path": ng_json_path,
+                "json_exists": json_metadata is not None,
+            },
         )
 
 @app.get("/api/datasets")
@@ -636,6 +715,8 @@ async def list_datasets():
         logger.error(f"Error listing datasets: {e}", exc_info=True)
         return JSONResponse(status_code=500, content={"error": str(e)})
 
+
+
 @app.post("/api/datasets/download")
 async def download_dataset(request: Request):
     """Download a dataset from S3 to local cache."""
@@ -651,9 +732,9 @@ async def download_dataset(request: Request):
         
         if not manifest_key:
             return JSONResponse(
-                status_code=404, 
+                status_code=404,
                 content={
-                    "error": f"Dataset not found on S3 - processing_manifest.json not found",
+                    "error": "Dataset not found on S3 - processing_manifest.json not found",
                     "checked_paths": [
                         f"s3://{S3_BUCKET}/{dataset_name}/processing_manifest.json",
                         f"s3://{S3_BUCKET}/{dataset_name}/derived/processing_manifest.json"
@@ -679,7 +760,7 @@ async def download_dataset(request: Request):
         
         if not spots_file:
             return JSONResponse(
-                status_code=404, 
+                status_code=404,
                 content={
                     "error": "Spots data file not found",
                     "checked_path": f"s3://{S3_BUCKET}/{spots_key}unmixed_spots_*.pkl"}
@@ -735,6 +816,8 @@ async def download_dataset(request: Request):
         logger.error(f"Error downloading dataset: {e}", exc_info=True)
         return JSONResponse(status_code=500, content={"error": str(e)})
 
+
+
 @app.post("/api/datasets/set-active")
 async def set_active_dataset(request: Request):
     """Set the active dataset for the application."""
diff --git a/src/see_spot/logging_config.py b/src/see_spot/logging_config.py
new file mode 100644
index 0000000..852fc57
--- /dev/null
+++ b/src/see_spot/logging_config.py
@@ -0,0 +1,57 @@
+import sys
+import logging
+from logging.config import dictConfig
+
+
+def setup_logging(level: str = "DEBUG") -> None:
+    """Configure application and uvicorn logging in a single, idempotent place.
+
+    Call this exactly once near process start (before creating the FastAPI app).
+    Safe to call multiple times; subsequent calls are no-ops.
+    """
+    if getattr(setup_logging, "_configured", False):  # idempotent guard
+        return
+
+    dictConfig({
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {
+            "default": {
+                "format": "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
+            },
+            "access": {
+                "format": (
+                    "%(asctime)s | %(levelname)-8s | uvicorn.access | "
+                    "%(client_addr)s - %(request_line)s -> %(status_code)s"
+                ),
+            },
+        },
+        "handlers": {
+            "console": {
+                "class": "logging.StreamHandler",
+                "stream": sys.stdout,
+                "formatter": "default",
+                "level": level,
+            },
+            "access_console": {
+                "class": "logging.StreamHandler",
+                "stream": sys.stdout,
+                "formatter": "access",
+                "level": level,
+            },
+        },
+        "loggers": {
+            # Project package
+            "see_spot": {"handlers": ["console"], "level": level, "propagate": False},
+            # Uvicorn internals
+            "uvicorn": {"handlers": ["console"], "level": level, "propagate": True},
+            "uvicorn.error": {"handlers": ["console"], "level": level, "propagate": False},
+            "uvicorn.access": {"handlers": ["access_console"], "level": level, "propagate": False},
+            # FastAPI / Starlette
+            "fastapi": {"handlers": ["console"], "level": level, "propagate": True},
+        },
+        "root": {"handlers": ["console"], "level": level},
+    })
+
+    setup_logging._configured = True
+    logging.getLogger("see_spot.logging_config").debug("Logging configured (level=%s)", level)
diff --git a/src/see_spot/ng_utils.py b/src/see_spot/ng_utils.py
index a17c15f..d48e72a 100644
--- a/src/see_spot/ng_utils.py
+++ b/src/see_spot/ng_utils.py
@@ -281,44 +281,33 @@ def create_link_from_json(
     import json
     from pathlib import Path
 
-    # Convert to Path object for easier handling
-    json_path = (
-        Path(ng_json_path)
-        if not isinstance(ng_json_path, Path)
-        else ng_json_path
-    )
+    # Robust handling of S3 vs local paths: avoid Path() on s3:// to prevent scheme collapse
+    is_s3 = isinstance(ng_json_path, str) and ng_json_path.startswith("s3://")
+    json_path_str = ng_json_path if is_s3 else str(Path(ng_json_path))
 
-    # Load the JSON file
     try:
-        if str(json_path).startswith("s3://"):
-            # Handle S3 paths
-            import boto3
-
-            s3_path = str(json_path)[5:]  # Remove 's3://'
+        if is_s3:
+            s3_path = json_path_str[5:]  # strip 's3://'
             parts = s3_path.split("/")
             bucket = parts[0]
             key = "/".join(parts[1:])
-
+            print(f"[ng_utils] Fetching Neuroglancer JSON from S3: bucket={bucket} key={key}")
             s3_client = boto3.client("s3")
             response = s3_client.get_object(Bucket=bucket, Key=key)
             json_content = response["Body"].read().decode("utf-8")
             state_dict = json.loads(json_content)
             print(f"Loaded Neuroglancer state from S3: s3://{bucket}/{key}")
         else:
-            # Handle local file paths
-            with open(json_path, "r") as f:
+            print(f"[ng_utils] Loading Neuroglancer JSON from local path: {json_path_str}")
+            with open(json_path_str, "r") as f:
                 state_dict = json.load(f)
-            print(f"Loaded Neuroglancer state from local file: {json_path}")
+            print(f"Loaded Neuroglancer state from local file: {json_path_str}")
     except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Neuroglancer JSON file not found: {json_path}"
-        )
+        raise FileNotFoundError(f"Neuroglancer JSON file not found: {json_path_str}")
     except json.JSONDecodeError as e:
-        raise ValueError(f"Invalid JSON in file {json_path}: {e}")
+        raise ValueError(f"Invalid JSON in file {json_path_str}: {e}")
     except Exception as e:
-        raise Exception(
-            f"Error loading Neuroglancer JSON from {json_path}: {e}"
-        )
+        raise Exception(f"Error loading Neuroglancer JSON from {json_path_str}: {e}")
 
     # Update position
     state_dict["position"] = position
@@ -396,7 +385,6 @@ def read_zarr_resolution_boto(s3_path):
     Returns:
     list: Resolution in z,y,x order in micrometers
     """
-    import boto3
     import json
 
     # Parse the S3 path
@@ -442,7 +430,10 @@ def read_zarr_resolution_boto(s3_path):
                                     for idx in [z_idx, y_idx, x_idx]
                                 ):
                                     print(
-                                        f"Found resolution from multiscales: {[scale[z_idx], scale[y_idx], scale[x_idx]]}"
+                                        (
+                                            f"Found resolution from multiscales: "
+                                            f"{[scale[z_idx], scale[y_idx], scale[x_idx]]}"
+                                        )
                                     )
                                     return [
                                         scale[z_idx],
diff --git a/src/sessions.db b/src/sessions.db
new file mode 100644
index 0000000000000000000000000000000000000000..20147ffdb0dcb7ac6e4f7714f64c169ebd825103
GIT binary patch
literal 16384
zcmeI2&u`pB6vyp&vzwIeI;E=i6xBCUiJ|k;WBbH5EFvNa?ZzuL5@2TRv9wZ}K-QId
zLW&Ul3H$+wJAXtD9N@-<3zr_aazTP4<K4JKX@XW1Aw=J3SDtzEJoEMEjV;a7>^<0?
zR3<2ojvuKiV9uK3y3U(H;5g2y{kQDDDXy(tY&U&ZT~D1iKHcy7zdK&9>-4(*xBiz`
zxq=-C00AHX1b_e#00KY&2mk>f00jOu0-v?rPJeyf{XDCbJ}^@=olcGpr_FU~b2J>J
z!yp~p+a3muHQ2ZoTvE=uvq=%8!-r{bf3&qT80`n|4)<Tb^oGY%b9|^CnYsJ!dugyc
z-rm04H$OJ2GDT*$tm)Rya4#L~+z;LwzC9Rkr@`iUG#c)v*|VO@$2d^aDpPrGAJSNj
ze|MNdRVw@V*p!nebLEYUtJRNpw;qg#8_fgXc@D>&yI#A$x$ZiX!@@k7esW;9=$U$4
z9nIQVLq4NTt#x;4p&LkZgJ;*&O1-YSe(H3t-*9@r`d{@v>VDsq*T1$+C_n%R00AHX
z1b_e#00KY&2)qmgPS!e|@lD1~mRUr#RI)^+h$Z68FceG9P>Fe12oW01WmQ!aQ-N@d
zs0c`uaFSqg-V`T<B_w=jbCk(YViILdFCiIW2xI%R8>plCM(odCW~+!(#PP{m`%IE#
zyi_Jsh#MXuuC+pnYO9MEIMNaeg^RQ(ni=MjSY+p)nFvON1ehk6CzM|z35&m!gfpv_
z*{8)PuP%4S{hCsw;ymZtpi+gU)g?j{$v8&F;E0es3R#)&sH)n#AW9}!K4&6iCW^<5
zh%*v)wkRX~3QhtMnNf;}K#ZK+Xm!T7YEh)jBU!Kz<t7xCla;N?B1DCfoC`t)Wx6uc
zXR#x3KuIE)4PT?FHc<;MVe-W^d1soG)U=Q$LIyJ%K*1<d5-UVY!*xlNq*~RW&I_=b
zY_Q_SWaDXNGOH<ik>Gvkw(Hp4E<~X@ElMPEY(s|?MOxY=#YJIqsY;R;QXNck)674|
z#D!fv7ffM7qB;{WHB~mSpNU=j205D2xS%3ODvUMaB-bc5R3aK{p^2e|B0?YO3qxHT
z{r#nO{XX9QXGS9lKd<%PH7mB$d+PWf^q%^!_}u@(|Hl9EW%$s+GXeo100e*l5C8%|
z00;m9AOHk_!2gTDhBtN>No93v$3+rM?b>mXw9j(wxJW9dRXZ+{AMt9(MG^r^uX*Fu
zMe-Z7{l9<e_&@tU_}}@z`M>x-*(MYq00e*l5C8%|00;m9AOHk_01yBIK;XYcpyS<i
t?KzxIeYJ;Q+N)mQot-#YUh!_Z^)ZiDU8zl8?PHG|EUkF`)!EU8KL7?{=SKhl

literal 0
HcmV?d00001

diff --git a/tests/test_dataset_loading.py b/tests/test_dataset_loading.py
new file mode 100644
index 0000000..e9f2c23
--- /dev/null
+++ b/tests/test_dataset_loading.py
@@ -0,0 +1,76 @@
+import sys
+from pathlib import Path
+import unittest
+sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
+
+from see_spot.s3_utils import (  # noqa: E402
+    load_and_merge_spots_from_s3,
+    find_processing_manifest,
+    load_processing_manifest_from_s3,
+)
+from see_spot.s3_handler import s3_handler  # noqa: E402
+
+
+DATASET = "HCR_799211_2025-10-02_17-50-00_processed_2025-11-06_22-50-31"
+BUCKET = "aind-open-data"
+SPOTS_PREFIX = f"{DATASET}/image_spot_spectral_unmixing/"
+
+
+class TestProblemDatasetLoading(unittest.TestCase):
+    """Integration-style checks for problematic dataset loading.
+
+    This focuses on verifying prerequisites for spot_details creation:
+    presence of coordinate and metadata columns (x,y,z,cell_id,round) after merge.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        # Ensure cache root exists
+        Path("/s3-cache").mkdir(exist_ok=True)
+
+    def test_processing_manifest_exists(self):
+        manifest_key = find_processing_manifest(BUCKET, DATASET)
+        self.assertIsNotNone(
+            manifest_key,
+            msg="processing_manifest.json not found in top-level or derived directory",
+        )
+        if manifest_key:
+            manifest = load_processing_manifest_from_s3(BUCKET, manifest_key)
+            self.assertIsInstance(manifest, dict, "Manifest did not parse to dict")
+            self.assertIn(
+                "spot_channels", manifest, "Manifest missing 'spot_channels' key"
+            )
+
+    def test_merge_dataframe_columns(self):
+        df = load_and_merge_spots_from_s3(BUCKET, DATASET, SPOTS_PREFIX, valid_spots_only=False)
+        self.assertIsNotNone(df, "Merged DataFrame is None")
+        cols = set(df.columns)
+        # Required columns for spot_details logic
+        required_detail_cols = {"x", "y", "z", "cell_id", "round"}
+        missing = required_detail_cols - cols
+        # Assert we at least have 2 columns (backend requires >1 to build details)
+        present_count = len(required_detail_cols & cols)
+        self.assertGreater(
+            present_count,
+            1,
+            msg=f"Insufficient detail columns for spot_details (have {present_count}, missing: {missing})",
+        )
+
+    def test_parquet_cached(self):
+        parquet_path = Path(f"/s3-cache/{BUCKET}/{DATASET}/{DATASET}.parquet")
+        # Trigger merge first to ensure file creation
+        _ = load_and_merge_spots_from_s3(BUCKET, DATASET, SPOTS_PREFIX, valid_spots_only=True)
+        self.assertTrue(parquet_path.exists(), "Merged parquet file not cached")
+        self.assertGreater(parquet_path.stat().st_size, 0, "Parquet file size is zero")
+
+    def test_s3_paths_accessible(self):
+        # Metadata check for a representative object (manifest or unmixed file)
+        manifest_key = find_processing_manifest(BUCKET, DATASET)
+        self.assertIsNotNone(manifest_key, "Manifest key missing for accessibility check")
+        meta = s3_handler.get_object_metadata(manifest_key, bucket_name=BUCKET)
+        self.assertIsNotNone(meta, "Unable to retrieve metadata for manifest file")
+        self.assertIn("ContentLength", meta, "Metadata lacks ContentLength")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 9ebe516972aa48fab6be97dbbcb77bdcdfd7d5a5 Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Thu, 13 Nov 2025 14:33:50 -0800
Subject: [PATCH 12/20] feat: DataTables

---
 src/see_spot/app.py                       |  10 +-
 src/see_spot/logging_config.py            |   5 +-
 src/see_spot/static/js/unmixed_spots.js   | 165 +++++++++++++++----
 src/see_spot/templates/unmixed_spots.html | 183 ++++++++++++++++++----
 4 files changed, 296 insertions(+), 67 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 201289c..c4729cc 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -520,7 +520,8 @@ async def get_real_spots_data(
         "channel_pairs": channel_pairs,
         "spots_data": data_for_frontend,
         "spot_details": spot_details,
-        "fused_s3_paths": fused_s3_paths
+        "fused_s3_paths": fused_s3_paths,
+        "current_dataset": DATA_PREFIX  # Include current dataset name
     }
 
     if ratios_json:
@@ -681,6 +682,7 @@ async def create_neuroglancer_link(request: Request):
             },
         )
 
+
 @app.get("/api/datasets")
 async def list_datasets():
     """List all available datasets in the local cache."""
@@ -716,7 +718,6 @@ async def list_datasets():
         return JSONResponse(status_code=500, content={"error": str(e)})
 
 
-
 @app.post("/api/datasets/download")
 async def download_dataset(request: Request):
     """Download a dataset from S3 to local cache."""
@@ -817,7 +818,6 @@ async def download_dataset(request: Request):
         return JSONResponse(status_code=500, content={"error": str(e)})
 
 
-
 @app.post("/api/datasets/set-active")
 async def set_active_dataset(request: Request):
     """Set the active dataset for the application."""
@@ -856,11 +856,13 @@ async def set_active_dataset(request: Request):
         logger.error(f"Error setting active dataset: {e}", exc_info=True)
         return JSONResponse(status_code=500, content={"error": str(e)})
 
+
 @app.get("/")
 @app.get("/unmixed-spots")
 async def unmixed_spots_page(request: Request):
     logger.info("Unmixed spots page accessed")
     return templates.TemplateResponse("unmixed_spots.html", {"request": request})
 
+
 if __name__ == '__main__':
-    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/src/see_spot/logging_config.py b/src/see_spot/logging_config.py
index 852fc57..46c90d0 100644
--- a/src/see_spot/logging_config.py
+++ b/src/see_spot/logging_config.py
@@ -20,10 +20,7 @@ def setup_logging(level: str = "DEBUG") -> None:
                 "format": "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
             },
             "access": {
-                "format": (
-                    "%(asctime)s | %(levelname)-8s | uvicorn.access | "
-                    "%(client_addr)s - %(request_line)s -> %(status_code)s"
-                ),
+                "format": "%(asctime)s | %(levelname)-8s | uvicorn.access | %(message)s",
             },
         },
         "handlers": {
diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index 71880a0..07576ee 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -21,8 +21,8 @@ document.addEventListener('DOMContentLoaded', function () {
     const highlightRemovedToggle = document.getElementById('highlight_removed_toggle');
     const highlightRemovedStatus = document.getElementById('highlight_removed_status');
     const displayChanSelect = document.getElementById('display_chan_select');
-    const validSpotToggle = document.getElementById('valid_spot_toggle');
-    const validSpotStatus = document.getElementById('valid_spot_status');
+    // const validSpotToggle = document.getElementById('valid_spot_toggle');
+    // const validSpotStatus = document.getElementById('valid_spot_status');
     const xlimMin = document.getElementById('xlim_min');
     const xlimMax = document.getElementById('xlim_max');
     const ylimMin = document.getElementById('ylim_min');
@@ -34,6 +34,10 @@ document.addEventListener('DOMContentLoaded', function () {
     const summaryBarChartDom = document.getElementById('summary-bar-chart');
     const summaryHeatmapDom = document.getElementById('summary-heatmap');
     const futureChartDom = document.getElementById('future-chart');
+    const spotsContainerHeader = document.getElementById('spots_container_header');
+    const spotsContainerContent = document.getElementById('spots_container_content');
+    const spotsContainerToggle = document.getElementById('spots_container_toggle');
+    const selectedSpotsCount = document.getElementById('selected_spots_count');
     
     const myChart = echarts.init(chartDom);
     const summaryBarChart = echarts.init(summaryBarChartDom);
@@ -49,7 +53,7 @@ document.addEventListener('DOMContentLoaded', function () {
     let currentSampleSize = parseInt(sampleSizeInput.value) || 10000;
     let highlightReassigned = false;
     let highlightRemoved = false;
-    let displayChanMode = 'unmixed'; // 'unmixed' or 'mixed'
+    let displayChanMode = 'mixed'; // 'unmixed' or 'mixed'
     let isNeuroglancerMode = false;
     let spotDetails = {}; // Will store the spot details for neuroglancer lookup
     let fusedS3Paths = {}; // Will store the fused S3 paths from the API
@@ -57,6 +61,7 @@ document.addEventListener('DOMContentLoaded', function () {
     let ratiosMatrix = null; // Will store the ratios matrix from the API
     let sankeyData = null; // Will store the sankey flow data from the API
     let selectedSpots = new Set();
+    let currentDatasetName = 'Unknown Dataset'; // Track current dataset name
     
     // Chart limits variables
     let chartLimitsMode = 'auto'; // 'auto', 'fixed', 'minmax', 'percentile'
@@ -111,7 +116,15 @@ document.addEventListener('DOMContentLoaded', function () {
             });
     }
 
+    let dataTable = null; // Store DataTables instance
+    
     function updateDatasetTable() {
+        // Destroy existing DataTable if it exists
+        if (dataTable) {
+            dataTable.destroy();
+        }
+        
+        // Clear table body
         datasetTableBody.innerHTML = '';
         
         datasetList.forEach(dataset => {
@@ -135,19 +148,36 @@ document.addEventListener('DOMContentLoaded', function () {
                 statusText = 'Missing';
             }
             
-            // Truncate long dataset names for display
-            const displayName = dataset.name.length > 35 ? 
-                dataset.name.substring(0, 32) + '...' : dataset.name;
-            
+            // No truncation - show full dataset name
+            // Format date to show only date (YYYY-MM-DD) without time
+            const dateOnly = dataset.creation_date.split(' ')[0];
             row.innerHTML = `
-                <td title="${dataset.name}">${displayName}</td>
-                <td>${dataset.creation_date}</td>
+                <td title="${dataset.name}">${dataset.name}</td>
+                <td title="${dataset.creation_date}">${dateOnly}</td>
                 <td><span class="status-indicator ${statusClass}"></span>${statusText}</td>
             `;
             
             row.addEventListener('click', () => selectDataset(dataset.name, row));
             datasetTableBody.appendChild(row);
         });
+        
+        // Initialize DataTables with custom configuration
+        dataTable = $('#dataset_table').DataTable({
+            paging: false, // Disable pagination since we have limited datasets
+            searching: true, // Enable search box
+            ordering: true, // Enable column sorting
+            info: false, // Hide "Showing X to Y of Z entries" text
+            columnDefs: [
+                { width: "45%", targets: 0 }, // Dataset Name column
+                { width: "35%", targets: 1 }, // Date Added column
+                { width: "20%", targets: 2, orderable: false } // Status column (no sorting)
+            ],
+            language: {
+                search: "Filter datasets:",
+                searchPlaceholder: "e.g., HCR_76710"
+            },
+            order: [[1, 'desc']] // Sort by Date Added (newest first) by default
+        });
     }
 
     function selectDataset(datasetName, rowElement) {
@@ -206,6 +236,47 @@ document.addEventListener('DOMContentLoaded', function () {
         }, 5000);
     }
 
+    // Function to update the dataset title display
+    function updateDatasetTitle(datasetName) {
+        console.log('updateDatasetTitle called with:', datasetName);
+        const titleElement = document.getElementById('dataset-title');
+        const nameSpan = titleElement.querySelector('.dataset-name');
+        
+        console.log('titleElement:', titleElement);
+        console.log('nameSpan:', nameSpan);
+        
+        if (!datasetName || datasetName === 'Unknown Dataset') {
+            console.log('No valid dataset name, showing loading state');
+            titleElement.classList.add('loading');
+            nameSpan.textContent = 'Loading dataset...';
+            return;
+        }
+        
+        // Remove loading state
+        titleElement.classList.remove('loading');
+        
+        // Format the dataset name for better readability
+        // Extract key parts: HCR_ID, capture date, processing date
+        const parts = datasetName.split('_');
+        let formattedName = datasetName;
+        
+        if (parts.length >= 3 && parts[0] === 'HCR') {
+            const hcrId = parts[1];
+            const captureDate = parts[2]; // YYYY-MM-DD format
+            formattedName = `HCR ${hcrId} (${captureDate})`;
+        }
+        
+        // Update the display
+        //nameSpan.textContent = formattedName;
+        nameSpan.textContent = datasetName; // Show full name, not formatted MJD
+        nameSpan.title = datasetName; // Full name in tooltip
+        
+        // Store current dataset name
+        currentDatasetName = datasetName;
+        
+        console.log(`Dataset title updated: ${formattedName}`);
+    }
+
     // Dataset management event listeners
     downloadDatasetBtn.addEventListener('click', function() {
         const datasetName = datasetNameInput.value.trim();
@@ -329,6 +400,25 @@ document.addEventListener('DOMContentLoaded', function () {
     // Initialize dataset management
     loadDatasetList();
 
+    // Toggle collapsible Selected Spots section
+    spotsContainerHeader.addEventListener('click', function() {
+        const isCollapsed = spotsContainerContent.classList.contains('collapsed');
+        
+        if (isCollapsed) {
+            spotsContainerContent.classList.remove('collapsed');
+            spotsContainerToggle.classList.remove('collapsed');
+        } else {
+            spotsContainerContent.classList.add('collapsed');
+            spotsContainerToggle.classList.add('collapsed');
+        }
+    });
+
+    // Function to update the selected spots count
+    function updateSelectedSpotsCount() {
+        const count = spotsTableBody.rows.length;
+        selectedSpotsCount.textContent = count;
+    }
+
     // Update current label when input changes
     labelInput.addEventListener('input', function() {
         currentLabel = labelInput.value.trim();
@@ -430,7 +520,7 @@ document.addEventListener('DOMContentLoaded', function () {
     
     // Fetch data function
     function fetchData(sampleSize, forceRefresh = false) {
-        const validSpotsOnly = validSpotToggle.checked;
+        const validSpotsOnly = false; // validSpotToggle.checked; // Toggle disabled
         const url = `/api/real_spots_data?sample_size=${sampleSize}${forceRefresh ? '&force_refresh=true' : ''}${validSpotsOnly ? '&valid_spots_only=true' : '&valid_spots_only=false'}`;
         console.log(`Fetching data with URL: ${url}`);
         
@@ -442,11 +532,20 @@ document.addEventListener('DOMContentLoaded', function () {
                 return response.json();
             })
             .then(data => {
-                console.log(`Fetched unmixed spots data with sample size ${sampleSize}:`, data);
+                console.log(`Fetched spots data with sample size ${sampleSize}:`, data);
+                console.log('Current dataset from API:', data.current_dataset);
                 
                 if (!data.spots_data || !data.channel_pairs || data.spots_data.length === 0) {
                     throw new Error("Invalid or empty data received from API");
                 }
+                
+                // Update dataset title if available
+                if (data.current_dataset) {
+                    console.log('Calling updateDatasetTitle with:', data.current_dataset);
+                    updateDatasetTitle(data.current_dataset);
+                } else {
+                    console.warn('No current_dataset field in API response');
+                }
 
                 channelPairs = data.channel_pairs;
                 const spotsData = data.spots_data;
@@ -1102,6 +1201,9 @@ document.addEventListener('DOMContentLoaded', function () {
         
         const labelCell = newRow.insertCell(); 
         labelCell.textContent = label || ''; // Label
+        
+        // Update the count
+        updateSelectedSpotsCount();
     }
 
     // Event listener for the clear button
@@ -1111,6 +1213,9 @@ document.addEventListener('DOMContentLoaded', function () {
             spotsTableBody.removeChild(spotsTableBody.firstChild);
         }
         console.log("Cleared selected spots table.");
+        
+        // Update the count
+        updateSelectedSpotsCount();
     });
 
     // Event listener for adding lasso selection to table
@@ -1390,25 +1495,25 @@ document.addEventListener('DOMContentLoaded', function () {
         updateChart();
     });
 
-    // Event listener for valid spot toggle
-    validSpotToggle.addEventListener('change', function() {
-        validSpotStatus.textContent = this.checked ? 'On' : 'Off';
-        
-        // Update toggle style
-        const toggleLabel = this.nextElementSibling;
-        const toggleSpan = toggleLabel.querySelector('span');
-        
-        if (this.checked) {
-            toggleLabel.style.backgroundColor = '#4CAF50'; // Green when active
-            toggleSpan.style.left = '22px';
-        } else {
-            toggleLabel.style.backgroundColor = '#ccc'; // Gray when inactive
-            toggleSpan.style.left = '2px';
-        }
-        
-        // Reload data with new filter setting
-        fetchData(currentSampleSize, false);
-    });
+    // Event listener for valid spot toggle - COMMENTED OUT
+    // validSpotToggle.addEventListener('change', function() {
+    //     validSpotStatus.textContent = this.checked ? 'On' : 'Off';
+    //     
+    //     // Update toggle style
+    //     const toggleLabel = this.nextElementSibling;
+    //     const toggleSpan = toggleLabel.querySelector('span');
+    //     
+    //     if (this.checked) {
+    //         toggleLabel.style.backgroundColor = '#4CAF50'; // Green when active
+    //         toggleSpan.style.left = '22px';
+    //     } else {
+    //         toggleLabel.style.backgroundColor = '#ccc'; // Gray when inactive
+    //         toggleSpan.style.left = '2px';
+    //     }
+    //     
+    //     // Reload data with new filter setting
+    //     fetchData(currentSampleSize, false);
+    // });
 
     // Chart limits event listeners
     function updateButtonStates() {
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index 83d3cad..1aff735 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -6,6 +6,10 @@
     <title>Unmixed Spots Visualization</title>
     <!-- Include ECharts -->
     <script src="https://cdn.jsdelivr.net/npm/echarts@5.5.0/dist/echarts.min.js"></script>
+    <!-- Include DataTables -->
+    <link rel="stylesheet" href="https://cdn.datatables.net/1.13.7/css/jquery.dataTables.min.css">
+    <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
+    <script src="https://cdn.datatables.net/1.13.7/js/jquery.dataTables.min.js"></script>
     <style>
         body { font-family: sans-serif; display: flex; margin: 0; }
         #sidebar { width: 200px; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; }
@@ -84,10 +88,46 @@
         }
         #spots_container { 
             border: 1px solid #ccc; 
-            padding: 10px; 
-            flex-grow: 1; 
-            overflow-y: auto; 
+            border-radius: 4px;
             margin-top: 10px;
+            display: flex;
+            flex-direction: column;
+            max-height: 400px; /* Limit height when expanded */
+        }
+        
+        #spots_container_header {
+            padding: 10px;
+            background-color: #f2f2f2;
+            border-bottom: 1px solid #ccc;
+            cursor: pointer;
+            user-select: none;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            font-weight: bold;
+        }
+        
+        #spots_container_header:hover {
+            background-color: #e8e8e8;
+        }
+        
+        #spots_container_toggle {
+            font-size: 18px;
+            transition: transform 0.3s;
+        }
+        
+        #spots_container_toggle.collapsed {
+            transform: rotate(-90deg);
+        }
+        
+        #spots_container_content {
+            padding: 10px;
+            overflow-y: auto;
+            flex-grow: 1;
+        }
+        
+        #spots_container_content.collapsed {
+            display: none;
         }
         #data-table { width: 100%; border-collapse: collapse; margin-top: 10px; }
         #data-table th, #data-table td { border: 1px solid #ddd; padding: 8px; text-align: left; }
@@ -99,9 +139,7 @@
             border: 1px solid #eee;
             background-color: #f9f9f9;
             border-radius: 4px;
-            overflow-y: auto; /* Add scrolling to controls if they get too tall */
             flex-shrink: 0; /* Prevent shrinking */
-            height: 305px; /* Reduced height to accommodate dataset panel */
         }
         #spots_controls > div { 
              display: flex;
@@ -114,6 +152,45 @@
             gap: 10px;
             margin-top: 10px;
         }
+        
+        /* Current Dataset Display Styles */
+        #current-dataset-display {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            padding: 12px 20px;
+            margin-bottom: 15px;
+            border-radius: 8px;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.15);
+            text-align: center;
+        }
+        
+        #dataset-title {
+            margin: 0;
+            color: white;
+            font-size: 16px;
+            font-weight: 600;
+            text-shadow: 0 1px 2px rgba(0,0,0,0.2);
+            letter-spacing: 0.3px;
+        }
+        
+        #dataset-title .dataset-icon {
+            font-size: 18px;
+            margin-right: 8px;
+        }
+        
+        #dataset-title .dataset-name {
+            font-family: 'Courier New', monospace;
+            font-weight: 500;
+        }
+        
+        #dataset-title.loading {
+            opacity: 0.7;
+            animation: pulse 1.5s ease-in-out infinite;
+        }
+        
+        @keyframes pulse {
+            0%, 100% { opacity: 0.7; }
+            50% { opacity: 1; }
+        }
         #channel_navigation {
             display: flex;
             align-items: center;
@@ -146,7 +223,7 @@
             border-radius: 6px;
             padding: 12px;
             margin-bottom: 15px;
-            height: 300px;
+            height: 400px;
             display: flex;
             flex-direction: column;
         }
@@ -197,6 +274,7 @@
             border: 1px solid #ddd;
             border-radius: 4px;
             margin-bottom: 10px;
+            padding: 10px;
         }
         
         #dataset_table {
@@ -298,6 +376,40 @@
         .status-missing {
             background-color: #f44336;
         }
+        
+        /* DataTables custom styling */
+        #dataset_table_wrapper .dataTables_filter {
+            margin-bottom: 10px;
+        }
+        
+        #dataset_table_wrapper .dataTables_filter input {
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            padding: 4px 8px;
+            margin-left: 5px;
+        }
+        
+        #dataset_table_wrapper .dataTables_length {
+            margin-bottom: 10px;
+        }
+        
+        #dataset_table_wrapper .dataTables_info {
+            font-size: 11px;
+            padding-top: 8px;
+        }
+        
+        #dataset_table_wrapper .dataTables_paginate {
+            padding-top: 8px;
+        }
+        
+        /* Ensure DataTables doesn't override our custom row styles */
+        #dataset_table tbody tr.current-dataset {
+            background-color: #e8f5e9 !important;
+        }
+        
+        #dataset_table tbody tr.selected {
+            background-color: #cce7ff !important;
+        }
     </style>
 </head>
 <body>
@@ -368,6 +480,14 @@ <h4>Chart Limits</h4>
 
     <div id="main-content">
         <div id="charts-column">
+            <!-- Current Dataset Display -->
+            <div id="current-dataset-display">
+                <h2 id="dataset-title" class="loading">
+                    <span class="dataset-icon">📊</span>
+                    <span class="dataset-name">Loading dataset...</span>
+                </h2>
+            </div>
+            
             <div id="main-chart"></div>
             <div id="channel_navigation">
                 <button id="prev_channel_pair">&lt;</button>
@@ -397,7 +517,7 @@ <h4>📊 Dataset Management</h4>
                         <thead>
                             <tr>
                                 <th>Dataset Name</th>
-                                <th>Date</th>
+                                <th>Added</th>
                                 <th>Status</th>
                             </tr>
                         </thead>
@@ -421,7 +541,7 @@ <h4>📊 Dataset Management</h4>
                         <span style="display: inline-block; transform: rotate(90deg);">↻</span> Refresh Cache
                     </button>
                 </div>
-                <div id="valid_spot_controls" style="display: flex; align-items: center; gap: 10px; margin-bottom: 10px;">
+                <!-- <div id="valid_spot_controls" style="display: flex; align-items: center; gap: 10px; margin-bottom: 10px;">
                     <label for="valid_spot_toggle">Valid Spots Only:</label>
                     <div class="toggle-switch">
                         <input type="checkbox" id="valid_spot_toggle" style="display: none;">
@@ -431,7 +551,7 @@ <h4>📊 Dataset Management</h4>
                     </div>
                     <span id="valid_spot_status" style="margin-left: 10px; font-size: 13px;">Off</span>
                     <span style="font-size: 12px; color: #666; margin-left: 10px;">(Turn off to see removed spots)</span>
-                </div>
+                </div> -->
                 <div id="sample_size_note" style="margin-bottom: 15px; padding: 8px; border-radius: 4px; background-color: #e8f5e9; font-size: 13px; display: flex; align-items: center;">
                     <span id="sample_size_icon" style="margin-right: 8px; font-weight: bold; color: #4caf50;">✓</span>
                     <span id="sample_size_text">Small sample: full feature set</span>
@@ -441,8 +561,8 @@ <h4>📊 Dataset Management</h4>
                     <div style="display: flex; align-items: center; margin-bottom: 10px;">
                         <label for="display_chan_select" style="margin-right: 10px;">Display chan:</label>
                         <select id="display_chan_select" style="padding: 4px 8px; border: 1px solid #ccc; border-radius: 4px;">
-                            <option value="unmixed">Unmixed</option>
                             <option value="mixed">Mixed</option>
+                            <option value="unmixed">Unmixed</option>
                         </select>
                     </div>
                     <div style="display: flex; align-items: center; margin-bottom: 10px;">
@@ -484,25 +604,30 @@ <h4>📊 Dataset Management</h4>
                 </div>
             </div>
              <div id="spots_container">
-                <h3>Selected Spots</h3>
-                <table id="data-table">
-                    <thead>
-                        <tr>
-                            <th>Spot ID</th>
-                            <th>Channel</th>
-                            <th>X-Channel</th>
-                            <th>Y-Channel</th>
-                            <th>R</th>
-                            <th>Dist</th>
-                            <th>Unmixed</th>
-                            <th>Reassigned</th>
-                            <th>Label</th>
-                        </tr>
-                    </thead>
-                    <tbody id="data-table-body">
-                        <!-- Clicked spot details will be added here -->
-                    </tbody>
-                </table>
+                <div id="spots_container_header">
+                    <span>Selected Spots (<span id="selected_spots_count">0</span>)</span>
+                    <span id="spots_container_toggle" class="collapsed">▼</span>
+                </div>
+                <div id="spots_container_content" class="collapsed">
+                    <table id="data-table">
+                        <thead>
+                            <tr>
+                                <th>Spot ID</th>
+                                <th>Channel</th>
+                                <th>X-Channel</th>
+                                <th>Y-Channel</th>
+                                <th>R</th>
+                                <th>Dist</th>
+                                <th>Unmixed</th>
+                                <th>Reassigned</th>
+                                <th>Label</th>
+                            </tr>
+                        </thead>
+                        <tbody id="data-table-body">
+                            <!-- Clicked spot details will be added here -->
+                        </tbody>
+                    </table>
+                </div>
             </div>
         </div>
     </div>

From e94a3bac803a143fd8b772677b786cdf34844b1f Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Thu, 13 Nov 2025 14:51:54 -0800
Subject: [PATCH 13/20] feat: range sliders for metrics

---
 src/see_spot/static/js/unmixed_spots.js   | 221 ++++++++++++++++------
 src/see_spot/templates/unmixed_spots.html |  59 +++++-
 2 files changed, 224 insertions(+), 56 deletions(-)

diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index 07576ee..b40cbe2 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -68,6 +68,12 @@ document.addEventListener('DOMContentLoaded', function () {
     let currentXLimits = [0, 2000];
     let currentYLimits = [0, 2000];
     
+    // Filter ranges for R Value and Distance
+    let rValueRange = [0, 100];
+    let distanceRange = [0, 100];
+    let rValueFilter = [0, 100];
+    let distanceFilter = [0, 100];
+    
     // Neuroglancer click debounce variables
     let lastNeuroglancerClickTime = 0;
     let lastNeuroglancerSpotId = null;
@@ -399,6 +405,129 @@ document.addEventListener('DOMContentLoaded', function () {
 
     // Initialize dataset management
     loadDatasetList();
+    
+    // Setup noUiSlider elements
+    const rValueSliderEl = document.getElementById('r_value_slider');
+    const distanceSliderEl = document.getElementById('distance_slider');
+    const rValueMinLabel = document.getElementById('r_value_min_label');
+    const rValueMaxLabel = document.getElementById('r_value_max_label');
+    const distanceMinLabel = document.getElementById('distance_min_label');
+    const distanceMaxLabel = document.getElementById('distance_max_label');
+    const resetFiltersBtn = document.getElementById('reset_filters_btn');
+    
+    let rValueSlider = null;
+    let distanceSlider = null;
+    
+    // Function to update filter slider ranges based on data
+    function updateFilterSliderRanges() {
+        if (!allChartData || allChartData.length === 0) return;
+        if (typeof noUiSlider === 'undefined') {
+            console.error('noUiSlider library not loaded');
+            return;
+        }
+        
+        const rValues = allChartData.typedArrays.r;
+        const distValues = allChartData.typedArrays.dist;
+        
+        // Calculate 99th percentile for better range
+        const rSorted = new Float32Array(rValues).sort();
+        const distSorted = new Float32Array(distValues).sort();
+        
+        const r99 = rSorted[Math.floor(rSorted.length * 0.99)] || 1.0;
+        const dist99 = distSorted[Math.floor(distSorted.length * 0.99)] || 5.0;
+        
+        // Cap at reasonable maximums: R at 1.0, Distance at 5.0
+        const rMax = Math.min(r99, 1.0);
+        const distMax = Math.min(dist99, 5.0);
+        
+        rValueRange = [0, rMax];
+        distanceRange = [0, distMax];
+        rValueFilter = [0, rMax];
+        distanceFilter = [0, distMax];
+        
+        // Destroy existing sliders if they exist
+        if (rValueSlider) {
+            rValueSlider.destroy();
+        }
+        if (distanceSlider) {
+            distanceSlider.destroy();
+        }
+        
+        // Create R Value slider
+        rValueSlider = noUiSlider.create(rValueSliderEl, {
+            start: [0, rMax],
+            connect: true,
+            range: {
+                'min': 0,
+                'max': rMax
+            },
+            step: 0.01,
+            tooltips: [true, true],
+            format: {
+                to: function(value) {
+                    return value.toFixed(2);
+                },
+                from: function(value) {
+                    return Number(value);
+                }
+            }
+        });
+        
+        // Create Distance slider
+        distanceSlider = noUiSlider.create(distanceSliderEl, {
+            start: [0, distMax],
+            connect: true,
+            range: {
+                'min': 0,
+                'max': distMax
+            },
+            step: 0.05,
+            tooltips: [true, true],
+            format: {
+                to: function(value) {
+                    return value.toFixed(2);
+                },
+                from: function(value) {
+                    return Number(value);
+                }
+            }
+        });
+        
+        // Update labels
+        rValueMinLabel.textContent = '0.00';
+        rValueMaxLabel.textContent = rMax.toFixed(2);
+        distanceMinLabel.textContent = '0.00';
+        distanceMaxLabel.textContent = distMax.toFixed(2);
+        
+        // Add event listeners for R Value slider - use 'set' event to avoid excessive updates
+        rValueSlider.on('set', function(values, handle) {
+            rValueFilter[0] = parseFloat(values[0]);
+            rValueFilter[1] = parseFloat(values[1]);
+            updateChart();
+        });
+        
+        // Add event listeners for Distance slider - use 'set' event to avoid excessive updates
+        distanceSlider.on('set', function(values, handle) {
+            distanceFilter[0] = parseFloat(values[0]);
+            distanceFilter[1] = parseFloat(values[1]);
+            updateChart();
+        });
+        
+        console.log(`Filter ranges - R: [0, ${rMax.toFixed(2)}], Distance: [0, ${distMax.toFixed(2)}]`);
+    }
+    
+    // Reset filters button
+    resetFiltersBtn.addEventListener('click', function() {
+        if (rValueSlider && distanceSlider) {
+            rValueSlider.set([0, rValueRange[1]]);
+            distanceSlider.set([0, distanceRange[1]]);
+            
+            rValueFilter = [0, rValueRange[1]];
+            distanceFilter = [0, distanceRange[1]];
+            
+            updateChart();
+        }
+    });
 
     // Toggle collapsible Selected Spots section
     spotsContainerHeader.addEventListener('click', function() {
@@ -609,6 +738,9 @@ document.addEventListener('DOMContentLoaded', function () {
 
         // Create channel selector buttons
         createChannelSelector();
+        
+        // Update filter slider ranges based on data
+        updateFilterSliderRanges();
 
         // Set initial channel pair
         currentPairIndex = 0;
@@ -720,6 +852,26 @@ document.addEventListener('DOMContentLoaded', function () {
         spotsData.removed = removed;
     }
 
+    // Function to apply R Value and Distance filters
+    function applyFilters(data) {
+        const filtered = [];
+        const rValues = data.typedArrays.r;
+        const distValues = data.typedArrays.dist;
+        
+        for (let i = 0; i < data.length; i++) {
+            const r = rValues[i];
+            const dist = distValues[i];
+            
+            // Check if point passes both filters
+            if (r >= rValueFilter[0] && r <= rValueFilter[1] &&
+                dist >= distanceFilter[0] && dist <= distanceFilter[1]) {
+                filtered.push(i);
+            }
+        }
+        
+        return filtered;
+    }
+    
     function updateChart(newData = null) {
         if (newData) {
             allChartData = newData;
@@ -753,6 +905,15 @@ document.addEventListener('DOMContentLoaded', function () {
         const xField = `chan_${xChan}_intensity`;
         const yField = `chan_${yChan}_intensity`;
         
+        // Apply filters to get indices of points that pass
+        const filteredIndices = applyFilters(allChartData);
+        
+        // Update filter count display
+        const filterCountEl = document.getElementById('filter_count');
+        if (filterCountEl) {
+            filterCountEl.textContent = `Showing ${filteredIndices.length.toLocaleString()} of ${allChartData.length.toLocaleString()} points`;
+        }
+        
         // Create series data grouped by display channel (mixed or unmixed)
         const seriesData = {};
         const uniqueChannels = [];
@@ -768,7 +929,9 @@ document.addEventListener('DOMContentLoaded', function () {
         const reassigned = allChartData.reassigned;
         const removed = allChartData.removed;
         
-        for (let i = 0; i < allChartData.length; i++) {
+        // Only process filtered indices
+        for (let idx = 0; idx < filteredIndices.length; idx++) {
+            const i = filteredIndices[idx];
             // Use either unmixed channel or original channel based on display mode
             let displayChan = displayChanMode === 'mixed' ? channels[i] : unmixedChans[i];
             
@@ -954,8 +1117,8 @@ document.addEventListener('DOMContentLoaded', function () {
                 }, {})
             },
             grid: {
-                right: totalSliderWidth + sliderConfig.startRight + 40, // Make room for sliders and legend
-                bottom: 70 // Still need some bottom space for axis labels
+                right: 120, // Space for legend only
+                bottom: 70 // Space for axis labels
             },
             tooltip: {
                 trigger: 'item',
@@ -1053,58 +1216,6 @@ document.addEventListener('DOMContentLoaded', function () {
                     filterMode: 'empty'
                 }
             ],
-            visualMap: [
-                {
-                    // R-value filter
-                    right: sliderConfig.startRight,
-                    top: 'center',
-                    dimension: 2, // The 'r' value is at index 2 in each data point array
-                    min: 0,
-                    max: r99Percentile,
-                    precision: 2,
-                    text: ['R Value'],
-                    textStyle: {
-                        fontSize: 12
-                    },
-                    ...sliderConfig,
-                    handleStyle: {
-                        color: '#4285f4'
-                    },
-                    inRange: {
-                        opacity: 1
-                    },
-                    outOfRange: {
-                        opacity: 0.01
-                    },
-                    seriesIndex: seriesIndices, // Explicitly set which series this visualMap controls
-                    hoverLink: false // Disable hover highlight when using the slider
-                },
-                {
-                    // Distance filter
-                    right: sliderConfig.startRight + sliderConfig.width + sliderConfig.gap,
-                    top: 'center',
-                    dimension: 6, // The 'dist' value is at index 6 in each data point array
-                    min: 0,
-                    max: dist99Percentile,
-                    precision: 2,
-                    text: ['Distance'],
-                    textStyle: {
-                        fontSize: 12
-                    },
-                    ...sliderConfig,
-                    handleStyle: {
-                        color: '#f83628ff'
-                    },
-                    inRange: {
-                        opacity: 1
-                    },
-                    outOfRange: {
-                        opacity: 0.01
-                    },
-                    seriesIndex: seriesIndices, // Explicitly set which series this visualMap controls
-                    hoverLink: false // Disable hover highlight when using the slider
-                }
-            ],
             series: series
         };
 
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index 1aff735..e4457a9 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -10,9 +10,12 @@
     <link rel="stylesheet" href="https://cdn.datatables.net/1.13.7/css/jquery.dataTables.min.css">
     <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
     <script src="https://cdn.datatables.net/1.13.7/js/jquery.dataTables.min.js"></script>
+    <!-- Include noUiSlider -->
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/nouislider@15.7.1/dist/nouislider.min.css">
+    <script src="https://cdn.jsdelivr.net/npm/nouislider@15.7.1/dist/nouislider.min.js"></script>
     <style>
         body { font-family: sans-serif; display: flex; margin: 0; }
-        #sidebar { width: 200px; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; }
+        #sidebar { width: 260px; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; overflow-y: auto; }
         #main-content { 
             flex-grow: 1; 
             padding: 20px; 
@@ -377,6 +380,38 @@
             background-color: #f44336;
         }
         
+        /* noUiSlider custom styling */
+        .noUi-connect {
+            background: #4285f4;
+        }
+        
+        .noUi-handle {
+            border: 1px solid #d9d9d9;
+            border-radius: 3px;
+            background: #fff;
+            cursor: pointer;
+            box-shadow: inset 0 0 1px #fff, inset 0 1px 7px #ebebeb, 0 3px 6px -3px #bbb;
+        }
+        
+        .noUi-handle:before,
+        .noUi-handle:after {
+            background: #4285f4;
+        }
+        
+        .noUi-tooltip {
+            display: block;
+            position: absolute;
+            border: 1px solid #d9d9d9;
+            border-radius: 3px;
+            background: #fff;
+            color: #000;
+            padding: 2px 5px;
+            text-align: center;
+            white-space: nowrap;
+            font-size: 11px;
+            bottom: 120%;
+        }
+        
         /* DataTables custom styling */
         #dataset_table_wrapper .dataTables_filter {
             margin-bottom: 10px;
@@ -456,6 +491,28 @@ <h3>Hotkeys</h3>
             </div>
         </div>
         
+        <h3>Data Filters</h3>
+        <div style="padding: 10px; background-color: #f5f5f5; border-radius: 4px; margin-bottom: 15px;">
+            <div style="margin-bottom: 20px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">R Value</label>
+                <div id="r_value_slider" style="margin: 0 10px 15px 10px;"></div>
+                <div style="display: flex; justify-content: space-between; font-size: 11px; color: #666;">
+                    <span id="r_value_min_label">0.00</span>
+                    <span id="r_value_max_label">1.00</span>
+                </div>
+            </div>
+            <div style="margin-bottom: 20px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">Distance</label>
+                <div id="distance_slider" style="margin: 0 10px 15px 10px;"></div>
+                <div style="display: flex; justify-content: space-between; font-size: 11px; color: #666;">
+                    <span id="distance_min_label">0.00</span>
+                    <span id="distance_max_label">5.00</span>
+                </div>
+            </div>
+            <button id="reset_filters_btn" style="width: 100%; padding: 6px; background-color: #ff9800; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">Reset Filters</button>
+            <div id="filter_count" style="margin-top: 10px; font-size: 12px; color: #666; text-align: center;"></div>
+        </div>
+        
         <h3>Chart Limits</h3>
                     <div class="chart-limits-section">
                 <h4>Chart Limits</h4>

From 3b8b02516a828742aebe466e65688f3b4addc524 Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Thu, 13 Nov 2025 15:19:42 -0800
Subject: [PATCH 14/20] feat: add pin to clicked spots for NG mode

---
 src/see_spot/static/js/unmixed_spots.js   | 108 +++++++++++++++++++++-
 src/see_spot/templates/unmixed_spots.html |  20 +++-
 2 files changed, 121 insertions(+), 7 deletions(-)

diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index b40cbe2..9b3262c 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -38,6 +38,8 @@ document.addEventListener('DOMContentLoaded', function () {
     const spotsContainerContent = document.getElementById('spots_container_content');
     const spotsContainerToggle = document.getElementById('spots_container_toggle');
     const selectedSpotsCount = document.getElementById('selected_spots_count');
+    const neuroglancerClickedCount = document.getElementById('neuroglancer_clicked_count');
+    const clearNeuroglancerClicksBtn = document.getElementById('clear_neuroglancer_clicks_btn');
     
     const myChart = echarts.init(chartDom);
     const summaryBarChart = echarts.init(summaryBarChartDom);
@@ -78,6 +80,7 @@ document.addEventListener('DOMContentLoaded', function () {
     let lastNeuroglancerClickTime = 0;
     let lastNeuroglancerSpotId = null;
     const NEUROGLANCER_CLICK_DEBOUNCE_MS = 1000; // Prevent duplicate clicks within 1 second
+    let neuroglancerClickedSpots = new Set(); // Track clicked spot IDs for visual indication
     
     // Large data threshold - samples above this will use optimized rendering
     const LARGE_DATA_THRESHOLD = 25001;
@@ -409,14 +412,19 @@ document.addEventListener('DOMContentLoaded', function () {
     // Setup noUiSlider elements
     const rValueSliderEl = document.getElementById('r_value_slider');
     const distanceSliderEl = document.getElementById('distance_slider');
+    const markerSizeSliderEl = document.getElementById('marker_size_slider');
     const rValueMinLabel = document.getElementById('r_value_min_label');
     const rValueMaxLabel = document.getElementById('r_value_max_label');
     const distanceMinLabel = document.getElementById('distance_min_label');
     const distanceMaxLabel = document.getElementById('distance_max_label');
+    const markerSizeMinLabel = document.getElementById('marker_size_min_label');
+    const markerSizeMaxLabel = document.getElementById('marker_size_max_label');
     const resetFiltersBtn = document.getElementById('reset_filters_btn');
     
     let rValueSlider = null;
     let distanceSlider = null;
+    let markerSizeSlider = null;
+    let markerSizeMultiplier = 1.0; // Default marker size multiplier
     
     // Function to update filter slider ranges based on data
     function updateFilterSliderRanges() {
@@ -452,6 +460,9 @@ document.addEventListener('DOMContentLoaded', function () {
         if (distanceSlider) {
             distanceSlider.destroy();
         }
+        if (markerSizeSlider) {
+            markerSizeSlider.destroy();
+        }
         
         // Create R Value slider
         rValueSlider = noUiSlider.create(rValueSliderEl, {
@@ -513,9 +524,43 @@ document.addEventListener('DOMContentLoaded', function () {
             updateChart();
         });
         
+        // Create Marker Size slider (0.5x to 3.0x)
+        markerSizeSlider = noUiSlider.create(markerSizeSliderEl, {
+            start: [1.0],
+            connect: [true, false],
+            range: {
+                'min': 0.5,
+                'max': 3.0
+            },
+            step: 0.1,
+            tooltips: [true],
+            format: {
+                to: function(value) {
+                    return value.toFixed(1) + '×';
+                },
+                from: function(value) {
+                    return Number(value.replace('×', ''));
+                }
+            }
+        });
+        
+        // Add event listener for Marker Size slider
+        markerSizeSlider.on('set', function(values, handle) {
+            markerSizeMultiplier = parseFloat(values[0]);
+            updateChart();
+        });
+        
         console.log(`Filter ranges - R: [0, ${rMax.toFixed(2)}], Distance: [0, ${distMax.toFixed(2)}]`);
     }
     
+    // Clear neuroglancer clicked spots button
+    clearNeuroglancerClicksBtn.addEventListener('click', function() {
+        neuroglancerClickedSpots.clear();
+        neuroglancerClickedCount.textContent = '0';
+        updateChart();
+        console.log('Cleared all neuroglancer clicked spots');
+    });
+    
     // Reset filters button
     resetFiltersBtn.addEventListener('click', function() {
         if (rValueSlider && distanceSlider) {
@@ -525,6 +570,11 @@ document.addEventListener('DOMContentLoaded', function () {
             rValueFilter = [0, rValueRange[1]];
             distanceFilter = [0, distanceRange[1]];
             
+            if (markerSizeSlider) {
+                markerSizeSlider.set([1.0]);
+                markerSizeMultiplier = 1.0;
+            }
+            
             updateChart();
         }
     });
@@ -980,9 +1030,28 @@ document.addEventListener('DOMContentLoaded', function () {
         const series = sortedChannels.map(channel => ({
             name: channel, // Remove the Mixed/Unmixed prefix from individual labels
             type: 'scatter',
-            data: seriesData[channel],
-            symbol: channel === 'Removed' ? 'triangle' : 'circle', // Use triangle symbol for removed spots 
-            symbolSize: channel === 'Removed' ? 8 : 5, // Larger size for removed symbols
+            data: seriesData[channel].map(point => {
+                const spotId = point.value[4];
+                const isClicked = neuroglancerClickedSpots.has(spotId);
+                const baseSize = (channel === 'Removed' ? 8 : 5) * markerSizeMultiplier;
+                
+                // Add symbol, symbolSize, and itemStyle to each data point
+                const dataPoint = {
+                    ...point,
+                    symbol: isClicked ? 'pin' : (channel === 'Removed' ? 'triangle' : 'circle'),
+                    symbolSize: isClicked ? baseSize * 4 : baseSize
+                };
+                
+                // Add itemStyle overrides for clicked spots
+                if (isClicked) {
+                    dataPoint.itemStyle = {
+                        borderWidth: 5,
+                        borderColor: '#000000'
+                    };
+                }
+                
+                return dataPoint;
+            }),
             // Add large dataset mode optimizations (but disable for Removed series to ensure visibility)
             large: channel !== 'Removed',
             largeThreshold: LARGE_DATA_THRESHOLD,
@@ -1010,9 +1079,15 @@ document.addEventListener('DOMContentLoaded', function () {
                 },
                 // Add visual styling for reassigned and removed spots
                 borderWidth: function(params) {
+                    const spotId = params.data.value[4];
                     const isReassigned = params.data.value[9];
                     const isRemoved = params.data.value[10];
                     
+                    // Thick border for clicked spots
+                    if (neuroglancerClickedSpots.has(spotId)) {
+                        return 5;
+                    }
+                    
                     // Add border if reassigned or removed
                     if (isReassigned || isRemoved) {
                         return 2;
@@ -1020,9 +1095,15 @@ document.addEventListener('DOMContentLoaded', function () {
                     return 0;
                 },
                 borderColor: function(params) {
+                    const spotId = params.data.value[4];
                     const isReassigned = params.data.value[9];
                     const isRemoved = params.data.value[10];
                     
+                    // Black border for clicked spots
+                    if (neuroglancerClickedSpots.has(spotId)) {
+                        return '#000000';
+                    }
+                    
                     // Different border colors for different states
                     if (isReassigned && isRemoved) {
                         return '#ff00ff'; // Magenta for both reassigned and removed
@@ -1034,6 +1115,20 @@ document.addEventListener('DOMContentLoaded', function () {
                     return '#ffffff';
                 },
                 borderType: 'solid',
+                shadowBlur: function(params) {
+                    const spotId = params.data.value[4];
+                    if (neuroglancerClickedSpots.has(spotId)) {
+                        return 15; // Glow effect for clicked spots
+                    }
+                    return 0;
+                },
+                shadowColor: function(params) {
+                    const spotId = params.data.value[4];
+                    if (neuroglancerClickedSpots.has(spotId)) {
+                        return 'rgba(255, 255, 255, 0.8)'; // White glow
+                    }
+                    return 'transparent';
+                },
                 opacity: function(params) {
                     // If highlighting reassigned, make non-reassigned more transparent
                     if (highlightReassigned) {
@@ -1412,6 +1507,10 @@ document.addEventListener('DOMContentLoaded', function () {
         lastNeuroglancerClickTime = currentTime;
         lastNeuroglancerSpotId = spotId;
         
+        // Add spot to clicked set and update counter
+        neuroglancerClickedSpots.add(spotId);
+        neuroglancerClickedCount.textContent = neuroglancerClickedSpots.size;
+        
         console.log("Handling Neuroglancer click for spot ID:", spotId);
         
         if (spotDetails[spotId]) {
@@ -1468,6 +1567,9 @@ document.addEventListener('DOMContentLoaded', function () {
             // Automatically create and open the neuroglancer link
             createAndOpenNeuroglancerLink(spotId, details);
             
+            // Update chart to show clicked spot styling
+            updateChart();
+            
             // Remove the notification after 5 seconds (increased from 3 seconds to give more time)
             setTimeout(() => {
                 notification.style.opacity = '0';
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index e4457a9..d465e4b 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -15,7 +15,7 @@
     <script src="https://cdn.jsdelivr.net/npm/nouislider@15.7.1/dist/nouislider.min.js"></script>
     <style>
         body { font-family: sans-serif; display: flex; margin: 0; }
-        #sidebar { width: 260px; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; overflow-y: auto; }
+        #sidebar { width: 260px; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; overflow-y: auto; display: flex; flex-direction: column; }
         #main-content { 
             flex-grow: 1; 
             padding: 20px; 
@@ -226,7 +226,8 @@
             border-radius: 6px;
             padding: 12px;
             margin-bottom: 15px;
-            height: 400px;
+            flex: 0 0 auto;
+            max-height: 35vh;
             display: flex;
             flex-direction: column;
         }
@@ -488,6 +489,10 @@ <h3>Hotkeys</h3>
                 <div style="font-size: 12px; color: #666;">
                     Click on points to see coordinates
                 </div>
+                <div style="font-size: 12px; color: #666; margin-top: 5px;">
+                    Clicked: <span id="neuroglancer_clicked_count" style="font-weight: bold; color: #4CAF50;">0</span> spots
+                </div>
+                <button id="clear_neuroglancer_clicks_btn" style="width: 100%; margin-top: 8px; padding: 4px; background-color: #ff5722; color: white; border: none; border-radius: 3px; cursor: pointer; font-size: 11px;">Clear Clicked Spots</button>
             </div>
         </div>
         
@@ -509,13 +514,20 @@ <h3>Data Filters</h3>
                     <span id="distance_max_label">5.00</span>
                 </div>
             </div>
+            <div style="margin-bottom: 20px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">Marker Size</label>
+                <div id="marker_size_slider" style="margin: 0 10px 15px 10px;"></div>
+                <div style="display: flex; justify-content: space-between; font-size: 11px; color: #666;">
+                    <span id="marker_size_min_label">0.5×</span>
+                    <span id="marker_size_max_label">3.0×</span>
+                </div>
+            </div>
             <button id="reset_filters_btn" style="width: 100%; padding: 6px; background-color: #ff9800; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">Reset Filters</button>
             <div id="filter_count" style="margin-top: 10px; font-size: 12px; color: #666; text-align: center;"></div>
         </div>
         
         <h3>Chart Limits</h3>
-                    <div class="chart-limits-section">
-                <h4>Chart Limits</h4>
+        <div class="chart-limits-section">
                 <div class="limit-inputs">
                     <label>Xlims:</label>
                     <input type="number" id="xlim_min" value="0" step="10" style="width: 50px;">

From a9e7f47efac9e1191e6a99777d6ee7c5a14b5c22 Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Thu, 13 Nov 2025 16:00:51 -0800
Subject: [PATCH 15/20] feat: support single tile loading

---
 README.md                                 |  37 ++++
 src/see_spot/app.py                       | 217 ++++++++++++++--------
 src/see_spot/s3_utils.py                  | 105 +++++++++--
 src/see_spot/static/js/unmixed_spots.js   |   2 +-
 src/see_spot/templates/unmixed_spots.html |   3 +-
 test_tile_detection.py                    |  30 +++
 6 files changed, 300 insertions(+), 94 deletions(-)
 create mode 100644 test_tile_detection.py

diff --git a/README.md b/README.md
index 837ddc9..5187ca5 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,43 @@
 cd /home/matt.davis/code/see-spot && source .venv/bin/activate && cd src && uvicorn see_spot.app:app --host 0.0.0.0 --port 9999 --reload
 ```
 
+## Dataset Types
+
+### Regular (Fused) Datasets
+Standard datasets with fused image data at the top level:
+```
+dataset_name/
+  image_spot_spectral_unmixing/
+    mixed_spots_*.pkl
+    unmixed_spots_*.pkl
+  image_tile_fusing/fused/
+    channel_*.zarr
+```
+
+### Tiled (Non-Fused) Datasets
+Datasets with independent tile processing, where each tile has separate spot data:
+```
+dataset_name/
+  image_spot_spectral_unmixing/
+    Tile_X_0001_Y_0000_Z_0000/
+      mixed_spots_*_tile_*.pkl
+      unmixed_spots_*_tile_*.pkl
+    Tile_X_0002_Y_0000_Z_0000/
+      ...
+```
+
+When downloading a tiled dataset, the system automatically:
+- Detects tile subfolders (beginning with "Tile")
+- Creates virtual dataset entries for each tile
+- Names them as: `{dataset_name}_X_####_Y_####_Z_####`
+
+Example: Downloading `HCR_799211_2025-10-02_15-10-00_processed_2025-11-06_22-50-54` with tiles creates:
+- `HCR_799211_2025-10-02_15-10-00_processed_2025-11-06_22-50-54_X_0001_Y_0000_Z_0000`
+- `HCR_799211_2025-10-02_15-10-00_processed_2025-11-06_22-50-54_X_0002_Y_0000_Z_0000`
+- etc.
+
+Each virtual tile dataset appears as a separate entry in the dataset management table and can be loaded independently.
+
 ## Changelog
 + v0.5.0 (09-19-2025)
   + backend downloads mixed + unmixed tables, merges and saves as .parquet (massive compression)
diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index c4729cc..7852d74 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -19,7 +19,7 @@
     find_unmixed_spots_file, find_related_files,
     load_ratios_from_s3, load_summary_stats_from_s3,
     load_processing_manifest_from_s3, load_and_merge_spots_from_s3,
-    find_processing_manifest
+    find_processing_manifest, detect_tile_structure, extract_tile_suffix
 )
 
 # Initialize logging using central utility (idempotent)
@@ -260,6 +260,21 @@ async def get_real_spots_data(
     logger.info(f"Real spots data requested with sample size: {sample_size}, "
                 f"force_refresh: {force_refresh}, valid_spots_only: {valid_spots_only}")
 
+    # Detect if this is a tile dataset at the beginning - needed for all operations
+    import re
+    tile_pattern = re.compile(r'_X_\d+_Y_\d+_Z_\d+$')
+    tile_folder = None
+    base_dataset_name = DATA_PREFIX
+    
+    if tile_pattern.search(DATA_PREFIX):
+        # Extract tile suffix and reconstruct tile folder name
+        parts = DATA_PREFIX.rsplit('_', 6)  # Split last 6 parts (X_####_Y_####_Z_####)
+        if len(parts) > 1:
+            base_dataset_name = parts[0]
+            tile_suffix = '_'.join(parts[1:])
+            tile_folder = f"Tile_{tile_suffix}"
+            logger.info(f"Detected tile dataset: base={base_dataset_name}, tile={tile_folder}")
+
     # Check if we can use cached DataFrame
     if not force_refresh and df_cache["data"] is not None:
         logger.info(f"Using cached DataFrame from {df_cache['last_loaded']}. Shape: {df_cache['data'].shape}")
@@ -267,11 +282,12 @@ async def get_real_spots_data(
         # Load manifest and channels from cache if available, or re-fetch
         processing_manifest = df_cache.get("processing_manifest")
         spot_channels_from_manifest = df_cache.get("spot_channels_from_manifest")
+        
         if not processing_manifest or not spot_channels_from_manifest:
-            # Find manifest in either top level or derived folder
-            manifest_key = find_processing_manifest(S3_BUCKET, DATA_PREFIX)
+            # Find manifest in base dataset folder
+            manifest_key = find_processing_manifest(S3_BUCKET, base_dataset_name)
             if not manifest_key:
-                logger.error(f"Could not find processing_manifest.json for dataset {DATA_PREFIX}")
+                logger.error(f"Could not find processing_manifest.json for dataset {base_dataset_name}")
                 spot_channels_from_manifest = []
             else:
                 logger.info(f"Attempting to load processing manifest from: s3://{S3_BUCKET}/{manifest_key}")
@@ -282,14 +298,17 @@ async def get_real_spots_data(
                 df_cache["spot_channels_from_manifest"] = spot_channels_from_manifest
                 logger.info(f"Loaded spot channels from manifest: {spot_channels_from_manifest}")
             else:
-                logger.error(f"Could not load processing manifest or find 'spot_channels'. Manifest: {processing_manifest}")
+                logger.error(
+                    f"Could not load processing manifest or find 'spot_channels'. "
+                    f"Manifest: {processing_manifest}"
+                )
                 spot_channels_from_manifest = []
     else:
         # Need to load DataFrame from S3
-        # 1. Load processing manifest to determine paths and channels
-        manifest_key = find_processing_manifest(S3_BUCKET, DATA_PREFIX)
+        # 1. Load processing manifest from base dataset location
+        manifest_key = find_processing_manifest(S3_BUCKET, base_dataset_name)
         if not manifest_key:
-            logger.error(f"Could not find processing_manifest.json for dataset {DATA_PREFIX}.")
+            logger.error(f"Could not find processing_manifest.json for dataset {base_dataset_name}.")
             return JSONResponse(status_code=500, content={'error': 'Failed to find processing manifest'})
         
         logger.info(f"Attempting to load processing manifest from: s3://{S3_BUCKET}/{manifest_key}")
@@ -306,15 +325,17 @@ async def get_real_spots_data(
             logger.info(f"Loaded spot channels from manifest: {spot_channels_from_manifest}")
 
         # 2. Find and load the merged data
-        unmixed_spots_prefix = f"{DATA_PREFIX}/image_spot_spectral_unmixing/"
+        
+        unmixed_spots_prefix = f"{base_dataset_name}/image_spot_spectral_unmixing/"
         
         logger.info(f"Loading merged spots data for dataset: {DATA_PREFIX}")
         try:
             df_polars = load_and_merge_spots_from_s3(
-                S3_BUCKET, 
-                DATA_PREFIX, 
+                S3_BUCKET,
+                DATA_PREFIX,
                 unmixed_spots_prefix,
-                valid_spots_only
+                valid_spots_only,
+                tile_folder=tile_folder
             )
             if df_polars is None:
                 logger.error("Failed to load merged DataFrame from S3/cache.")
@@ -341,7 +362,11 @@ async def get_real_spots_data(
     ratios_data = None
     summary_stats_data = None
 
-    related_files_prefix = f"{DATA_PREFIX}/image_spot_spectral_unmixing/"
+    # Use base_dataset_name for related files prefix (tile or regular)
+    related_files_prefix = f"{base_dataset_name}/image_spot_spectral_unmixing/"
+    if tile_folder:
+        related_files_prefix = f"{related_files_prefix}{tile_folder}/"
+    
     unmixed_target_key = find_unmixed_spots_file(
         S3_BUCKET, related_files_prefix, "unmixed_spots_*.pkl"
     )
@@ -349,13 +374,7 @@ async def get_real_spots_data(
     if unmixed_target_key:
         df_cache["unmixed_spots_filename"] = Path(unmixed_target_key).name
         logger.info(f"Cached unmixed spots filename: {df_cache['unmixed_spots_filename']}")
-    
-    
-    # Store the unmixed spots filename in cache for neuroglancer logic
-    if unmixed_target_key:
-        df_cache["unmixed_spots_filename"] = Path(unmixed_target_key).name
-        logger.info(f"Cached unmixed spots filename: {df_cache['unmixed_spots_filename']}")
-    
+
     if unmixed_target_key:
         related_files = find_related_files(S3_BUCKET, related_files_prefix, unmixed_target_key)
         logger.info(f"Searching for related files in '{related_files_prefix}'. Found: {related_files}")
@@ -685,7 +704,7 @@ async def create_neuroglancer_link(request: Request):
 
 @app.get("/api/datasets")
 async def list_datasets():
-    """List all available datasets in the local cache."""
+    """List all available datasets in the local cache, including virtual tile datasets."""
     try:
         cache_path = Path("/s3-cache") / S3_BUCKET
         datasets = []
@@ -697,9 +716,12 @@ async def list_datasets():
                     stat = dataset_dir.stat()
                     creation_time = datetime.fromtimestamp(stat.st_mtime)
                     
-                    # Check if dataset has the required structure
+                    # Check if dataset has data - for virtual tile datasets, check for parquet file
+                    parquet_file = dataset_dir / f"{dataset_dir.name}.parquet"
                     spots_dir = dataset_dir / "image_spot_spectral_unmixing"
-                    has_data = spots_dir.exists()
+                    
+                    # Dataset has data if it has either the spots directory OR a parquet file
+                    has_data = spots_dir.exists() or parquet_file.exists()
                     
                     datasets.append({
                         "name": dataset_dir.name,
@@ -720,7 +742,7 @@ async def list_datasets():
 
 @app.post("/api/datasets/download")
 async def download_dataset(request: Request):
-    """Download a dataset from S3 to local cache."""
+    """Download a dataset from S3 to local cache. Detects tiled datasets and creates virtual entries."""
     try:
         data = await request.json()
         dataset_name = data.get("dataset_name")
@@ -755,63 +777,102 @@ async def download_dataset(request: Request):
         if manifest_local_path is None:
             return JSONResponse(status_code=500, content={"error": "Failed to download processing manifest"})
         
-        # Download the unmixed spots file (for merging and related files)
-        spots_key = f"{dataset_name}/image_spot_spectral_unmixing/"
-        spots_file = find_unmixed_spots_file(S3_BUCKET, spots_key, "unmixed_spots_*.pkl")
-        
-        if not spots_file:
-            return JSONResponse(
-                status_code=404,
-                content={
-                    "error": "Spots data file not found",
-                    "checked_path": f"s3://{S3_BUCKET}/{spots_key}unmixed_spots_*.pkl"}
-                
-            )
-        
-        # Try to create the merged parquet file by calling our new merge function
-        try:
-            merged_df = load_and_merge_spots_from_s3(S3_BUCKET, dataset_name, spots_key)
-            if merged_df is not None:
-                logger.info(f"Successfully created merged parquet file for dataset {dataset_name}")
-            else:
-                logger.warning(f"Could not create merged parquet file for dataset {dataset_name}")
-        except Exception as e:
-            logger.warning(f"Error creating merged parquet file: {e}")
-            # Continue anyway - the individual files will still be available
-        
-        # Try to download related files (ratios and summary stats)
-        related_files = find_related_files(S3_BUCKET, spots_key, spots_file)
-        
         downloaded_files = [str(manifest_local_path)]
         
-        # Add the parquet file to downloaded files if it was created
-        parquet_file = Path("/s3-cache") / S3_BUCKET / dataset_name / f"{dataset_name}.parquet"
-        if parquet_file.exists():
-            downloaded_files.append(str(parquet_file))
-        
-        if related_files['ratios']:
-            ratios_local_path = s3_handler.download_file(
-                key=related_files['ratios'],
-                bucket_name=S3_BUCKET,
-                use_cache=True
-            )
-            if ratios_local_path:
-                downloaded_files.append(str(ratios_local_path))
-        
-        if related_files['summary_stats']:
-            stats_local_path = s3_handler.download_file(
-                key=related_files['summary_stats'],
-                bucket_name=S3_BUCKET,
-                use_cache=True
-            )
-            if stats_local_path:
-                downloaded_files.append(str(stats_local_path))
+        # Check for tile structure
+        tile_folders = detect_tile_structure(S3_BUCKET, dataset_name)
         
-        return {
-            "success": True,
-            "dataset_name": dataset_name,
-            "downloaded_files": downloaded_files
-        }
+        if tile_folders:
+            # Tiled dataset - create virtual datasets for each tile
+            logger.info(f"Detected {len(tile_folders)} tiles in dataset {dataset_name}")
+            
+            for tile_folder in tile_folders:
+                tile_suffix = extract_tile_suffix(tile_folder)
+                virtual_dataset_name = f"{dataset_name}_{tile_suffix}"
+                spots_key = f"{dataset_name}/image_spot_spectral_unmixing/"
+                
+                logger.info(f"Creating virtual tile dataset: {virtual_dataset_name}")
+                
+                try:
+                    # Load and merge this tile's data
+                    merged_df = load_and_merge_spots_from_s3(
+                        S3_BUCKET, dataset_name, spots_key, tile_folder=tile_folder
+                    )
+                    if merged_df is not None:
+                        logger.info(f"Successfully created parquet for tile {virtual_dataset_name}")
+                        parquet_path = (
+                            Path("/s3-cache") / S3_BUCKET / virtual_dataset_name /
+                            f"{virtual_dataset_name}.parquet"
+                        )
+                        if parquet_path.exists():
+                            downloaded_files.append(str(parquet_path))
+                    else:
+                        logger.warning(f"Could not create parquet for tile {virtual_dataset_name}")
+                except Exception as e:
+                    logger.error(f"Error processing tile {tile_folder}: {e}", exc_info=True)
+            
+            return {
+                "success": True,
+                "dataset_name": dataset_name,
+                "is_tiled": True,
+                "tile_count": len(tile_folders),
+                "virtual_datasets": [f"{dataset_name}_{extract_tile_suffix(tf)}" for tf in tile_folders],
+                "downloaded_files": downloaded_files
+            }
+        else:
+            # Regular fused dataset - original behavior
+            spots_key = f"{dataset_name}/image_spot_spectral_unmixing/"
+            spots_file = find_unmixed_spots_file(S3_BUCKET, spots_key, "unmixed_spots_*.pkl")
+            
+            if not spots_file:
+                return JSONResponse(
+                    status_code=404,
+                    content={
+                        "error": "Spots data file not found",
+                        "checked_path": f"s3://{S3_BUCKET}/{spots_key}unmixed_spots_*.pkl"}
+                )
+            
+            # Create the merged parquet file
+            try:
+                merged_df = load_and_merge_spots_from_s3(S3_BUCKET, dataset_name, spots_key)
+                if merged_df is not None:
+                    logger.info(f"Successfully created merged parquet file for dataset {dataset_name}")
+                else:
+                    logger.warning(f"Could not create merged parquet file for dataset {dataset_name}")
+            except Exception as e:
+                logger.warning(f"Error creating merged parquet file: {e}")
+            
+            # Download related files (ratios and summary stats)
+            related_files = find_related_files(S3_BUCKET, spots_key, spots_file)
+            
+            parquet_file = Path("/s3-cache") / S3_BUCKET / dataset_name / f"{dataset_name}.parquet"
+            if parquet_file.exists():
+                downloaded_files.append(str(parquet_file))
+            
+            if related_files['ratios']:
+                ratios_local_path = s3_handler.download_file(
+                    key=related_files['ratios'],
+                    bucket_name=S3_BUCKET,
+                    use_cache=True
+                )
+                if ratios_local_path:
+                    downloaded_files.append(str(ratios_local_path))
+            
+            if related_files['summary_stats']:
+                stats_local_path = s3_handler.download_file(
+                    key=related_files['summary_stats'],
+                    bucket_name=S3_BUCKET,
+                    use_cache=True
+                )
+                if stats_local_path:
+                    downloaded_files.append(str(stats_local_path))
+            
+            return {
+                "success": True,
+                "dataset_name": dataset_name,
+                "is_tiled": False,
+                "downloaded_files": downloaded_files
+            }
     
     except Exception as e:
         logger.error(f"Error downloading dataset: {e}", exc_info=True)
diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index 19d9df4..9ae6f3c 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict, List, Tuple, Any
+from typing import Optional, Dict, List, Any
 import polars as pl
 import pandas as pd  # Keep for compatibility where needed
 import numpy as np
@@ -18,6 +18,73 @@
 logger = logging.getLogger(__name__)
 
 
+def detect_tile_structure(bucket: str, dataset_name: str) -> List[str]:
+    """
+    Check if dataset has tile subfolders in image_spot_spectral_unmixing.
+    
+    Args:
+        bucket: S3 bucket name
+        dataset_name: Dataset name/prefix
+        
+    Returns:
+        List of tile folder names (e.g., ["Tile_X_0001_Y_0000_Z_0000", ...])
+        Empty list if no tiles found or if it's a regular fused dataset.
+    """
+    logger.info(f"Checking for tile structure in dataset '{dataset_name}'")
+    
+    spots_prefix = f"{dataset_name}/image_spot_spectral_unmixing/"
+    
+    try:
+        # List objects to check for tile folders
+        objects = s3_handler.list_objects(
+            bucket_name=bucket, prefix=spots_prefix, max_keys=500
+        )
+        
+        if not objects:
+            logger.info(f"No objects found at {spots_prefix}")
+            return []
+        
+        # Look for folders that start with "Tile" (case-insensitive)
+        tile_folders = set()
+        for key in objects:
+            # Remove prefix to get relative path
+            relative_path = key[len(spots_prefix):] if key.startswith(spots_prefix) else key
+            
+            # Check if path contains a Tile folder
+            parts = relative_path.split('/')
+            if parts and parts[0].startswith(('Tile', 'tile')):
+                tile_folders.add(parts[0])
+        
+        tile_list = sorted(list(tile_folders))
+        
+        if tile_list:
+            logger.info(f"Found {len(tile_list)} tile folders: {tile_list[:5]}{'...' if len(tile_list) > 5 else ''}")
+        else:
+            logger.info("No tile folders found - appears to be a regular fused dataset")
+        
+        return tile_list
+        
+    except Exception as e:
+        logger.error(f"Error detecting tile structure: {e}", exc_info=True)
+        return []
+
+
+def extract_tile_suffix(tile_folder_name: str) -> str:
+    """
+    Extract coordinate suffix from tile folder name.
+    
+    Args:
+        tile_folder_name: e.g., "Tile_X_0001_Y_0000_Z_0000"
+        
+    Returns:
+        Coordinate suffix: e.g., "X_0001_Y_0000_Z_0000"
+    """
+    # Remove "Tile_" or "tile_" prefix
+    if tile_folder_name.lower().startswith('tile_'):
+        return tile_folder_name[5:]
+    return tile_folder_name
+
+
 def find_processing_manifest(bucket: str, dataset_name: str) -> Optional[str]:
     """
     Find the processing_manifest.json file in either the top level or derived folder.
@@ -193,13 +260,15 @@ def find_mixed_spots_file(
 
         if not found_files:
             logger.warning(
-                f"No mixed spots files matching pattern '{pattern}' found within the first {len(objects)} objects listed under prefix '{prefix}'."
+                f"No mixed spots files matching pattern '{pattern}' found within "
+                f"the first {len(objects)} objects listed under prefix '{prefix}'."
             )
             return None
 
         if len(found_files) > 1:
             logger.warning(
-                f"Multiple mixed spots files ({len(found_files)}) matching pattern found. Using the first one: {found_files[0]}"
+                f"Multiple mixed spots files ({len(found_files)}) matching pattern found. "
+                f"Using the first one: {found_files[0]}"
             )
 
         return found_files[0]  # Return the full key of the first match
@@ -218,7 +287,7 @@ def get_base_pattern_from_unmixed(unmixed_key: str) -> str:
     parts = filename.split("_")
     for part in parts:
         # add support for R-1 (default round for datasets without metadata)
-        if part.startswith("R") and (part[1:].isdigit() or part[1:] == '-1'): 
+        if part.startswith("R") and (part[1:].isdigit() or part[1:] == '-1'):
             return part
     return "R3"  # Default fallback
 
@@ -228,6 +297,7 @@ def load_and_merge_spots_from_s3(
     dataset_name: str,
     unmixed_spots_prefix: str,
     valid_spots_only: bool = True,
+    tile_folder: Optional[str] = None,
 ) -> Optional[pl.DataFrame]:
     """
     Load both mixed and unmixed spots files, merge them, cache as parquet, and return merged DataFrame.
@@ -237,12 +307,21 @@ def load_and_merge_spots_from_s3(
         dataset_name: Dataset name (used for parquet filename)
         unmixed_spots_prefix: S3 prefix where spots files are located
         valid_spots_only: If True, filter to only valid spots. If False, return all spots.
+        tile_folder: Optional tile folder name (e.g., "Tile_X_0001_Y_0000_Z_0000") for tiled datasets
 
     Returns:
         Merged Polars DataFrame or None if loading failed
     """
-    cache_dir = Path("/s3-cache") / bucket / dataset_name
-    parquet_file = cache_dir / f"{dataset_name}.parquet"
+    # Adjust paths for tile folder if provided
+    if tile_folder:
+        cache_dir = Path("/s3-cache") / bucket / f"{dataset_name}_{extract_tile_suffix(tile_folder)}"
+        parquet_file = cache_dir / f"{dataset_name}_{extract_tile_suffix(tile_folder)}.parquet"
+        # Update prefix to look inside tile folder
+        unmixed_spots_prefix = f"{unmixed_spots_prefix}{tile_folder}/"
+        logger.info(f"Loading tile dataset from: {unmixed_spots_prefix}")
+    else:
+        cache_dir = Path("/s3-cache") / bucket / dataset_name
+        parquet_file = cache_dir / f"{dataset_name}.parquet"
 
     # Check if merged parquet file already exists
     if parquet_file.exists():
@@ -268,7 +347,7 @@ def load_and_merge_spots_from_s3(
 
     # Need to download, merge, and cache
     logger.info(
-        f"Parquet file not found or corrupted. Downloading and merging spots files..."
+        "Parquet file not found or corrupted. Downloading and merging spots files..."
     )
 
     # 1. Find unmixed spots file
@@ -409,7 +488,8 @@ def find_unmixed_spots_file(
 
         if not found_files:
             logger.warning(
-                f"No files matching pattern '{pattern}' found within the first {len(objects)} objects listed under prefix '{prefix}'."
+                f"No files matching pattern '{pattern}' found within "
+                f"the first {len(objects)} objects listed under prefix '{prefix}'."
             )
             # Consider adding logic here to list more objects if needed (pagination)
             return None
@@ -451,10 +531,6 @@ def find_related_files(
     result = {"ratios": None, "summary_stats": None}
 
     try:
-        # Extract base filename without extension
-        spots_filename = Path(spots_file).stem
-        base_pattern = spots_filename.replace("unmixed_spots", "*")
-
         # List objects in the same directory
         objects = s3_handler.list_objects(
             bucket_name=bucket, prefix=prefix, max_keys=200
@@ -645,7 +721,7 @@ def load_pkl_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
     try:
         df = pd.read_pickle(local_file_path)
         n_all = df.shape[0]
-        df = df[df["valid_spot"] == True]
+        df = df[df["valid_spot"]]
         n_valid = df.shape[0]
         logger.info(f"Successfully loaded DataFrame. Shape: {df.shape}")
         logger.info(f"Total spots: {n_all}, Valid spots: {n_valid}")
@@ -657,7 +733,8 @@ def load_pkl_from_s3(bucket: str, key: str) -> Optional[pd.DataFrame]:
         return None
     except FileNotFoundError:
         logger.error(
-            f"Error loading pickle: Local file not found at {local_file_path} (should not happen if download succeeded)."
+            f"Error loading pickle: Local file not found at {local_file_path} "
+            "(should not happen if download succeeded)."
         )
         return None
     except Exception as e:
diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index 9b3262c..88f6cc4 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -346,7 +346,7 @@ document.addEventListener('DOMContentLoaded', function () {
         .then(response => response.json())
         .then(data => {
             if (data.success) {
-                showDatasetMessage(`Successfully loaded dataset: ${data.dataset_name}`, 'success');
+                // showDatasetMessage(`Successfully loaded dataset: ${data.dataset_name}`, 'success');
                 loadDatasetList(); // Refresh the list to show new current dataset
                 
                 // Refresh the main data display
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index d465e4b..9fbfb4e 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -15,7 +15,7 @@
     <script src="https://cdn.jsdelivr.net/npm/nouislider@15.7.1/dist/nouislider.min.js"></script>
     <style>
         body { font-family: sans-serif; display: flex; margin: 0; }
-        #sidebar { width: 260px; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; overflow-y: auto; display: flex; flex-direction: column; }
+        #sidebar { width: 260px; flex-shrink: 0; background-color: #ffffff; padding: 15px; height: 100vh; border-right: 1px solid #ddd; overflow-y: auto; display: flex; flex-direction: column; }
         #main-content { 
             flex-grow: 1; 
             padding: 20px; 
@@ -23,6 +23,7 @@
         }
         #charts-column {
             width: 55%;
+            flex-shrink: 0;
             display: flex;
             flex-direction: column;
         }
diff --git a/test_tile_detection.py b/test_tile_detection.py
new file mode 100644
index 0000000..0d0e6f0
--- /dev/null
+++ b/test_tile_detection.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+"""
+Quick test script to verify tile detection functionality.
+"""
+
+from see_spot.s3_utils import detect_tile_structure, extract_tile_suffix
+
+# Test tile suffix extraction
+test_cases = [
+    "Tile_X_0001_Y_0000_Z_0000",
+    "Tile_X_0002_Y_0001_Z_0000",
+    "tile_X_0003_Y_0002_Z_0001",  # lowercase
+]
+
+print("Testing extract_tile_suffix():")
+for tile_folder in test_cases:
+    suffix = extract_tile_suffix(tile_folder)
+    expected_dataset = f"HCR_799211_2025-10-02_15-10-00_processed_2025-11-06_22-50-54_{suffix}"
+    print(f"  {tile_folder} -> {suffix}")
+    print(f"    Virtual dataset name: {expected_dataset}")
+
+print("\n" + "="*80)
+print("Tile detection test complete!")
+print("="*80)
+print("\nTo test with a real dataset:")
+print("1. In the UI, enter: HCR_799211_2025-10-02_15-10-00_processed_2025-11-06_22-50-54")
+print("2. Click 'Download' button")
+print("3. Check the response - it should show 'is_tiled: true' and list virtual datasets")
+print("4. Refresh the dataset table - you should see individual tile entries")
+print("5. Select a tile dataset and click 'Load' to visualize that tile's data")

From 670f3771a680ac3216d5edfc54a51dcc1098138e Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Fri, 14 Nov 2025 12:13:49 -0800
Subject: [PATCH 16/20] feat: plot dye lines

---
 dye-lines-clipping.md                     |  41 +++
 src/see_spot/static/js/unmixed_spots.js   | 319 +++++++++++++++++++++-
 src/see_spot/templates/unmixed_spots.html |  15 +
 3 files changed, 361 insertions(+), 14 deletions(-)
 create mode 100644 dye-lines-clipping.md

diff --git a/dye-lines-clipping.md b/dye-lines-clipping.md
new file mode 100644
index 0000000..06380ab
--- /dev/null
+++ b/dye-lines-clipping.md
@@ -0,0 +1,41 @@
+Looking at the images, I can see the problem clearly. When you zoom in or change the axis limits, the dye lines disappear because one or both endpoints fall outside the visible range, and ECharts stops rendering the entire line.
+
+Analysis of the Problem
+Current implementation:
+
+Dye lines are drawn from origin (0, 0) to endpoint (x1, y1)
+When axis limits change (e.g., x starts at 10 instead of 0), the origin point is outside the visible area
+ECharts doesn't render lines with endpoints outside the axis range
+Potential Solutions
+Option 1: Clip lines to axis boundaries (Most robust)
+Calculate the intersection of the dye line with the current axis boundaries
+Always provide two points that are within or on the edge of the visible area
+This requires line-rectangle intersection math
+Pros: Lines always visible, mathematically correct
+Cons: More complex calculation
+Option 2: Extend lines beyond axis limits
+Instead of calculating endpoints based on axis limits, always use very large coordinates
+Make lines extend from (-1000, -1000) to (10000, 10000) or similar
+Let ECharts clip the rendering
+Pros: Simple, no recalculation needed
+Cons: May have rendering issues, less precise
+Option 3: Use visualMap or dataZoom with clip: false
+Try setting clip: false on the line series
+This tells ECharts to render even if points are outside axis range
+Pros: Simplest - just one property change
+Cons: May not work reliably, depends on ECharts version
+Option 4: Use graphic elements instead of series
+Add dye lines as graphic elements (like the "Mixed/Unmixed" text)
+Graphic elements use pixel coordinates relative to the grid
+Would require converting data coordinates to pixel coordinates
+Pros: Full control over rendering
+Cons: More complex, harder to maintain
+Recommendation
+I'd suggest trying Option 3 first (adding clip: false) - it's the simplest and might just work.
+
+If that doesn't work, go with Option 1 - calculate the intersection of the dye line (which extends infinitely in both directions from origin along the dye direction) with the current axis boundaries. The math would be:
+
+Dye line equation: points along direction (dx, dy) are t * (dx, dy) for any scalar t
+Find t values where this line intersects the four axis boundaries (xMin, xMax, yMin, yMax)
+Use the two intersection points that are within the visible rectangle
+This way the line always extends across the visible chart area regardless of zoom level.
\ No newline at end of file
diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index 88f6cc4..fb6813d 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -57,6 +57,7 @@ document.addEventListener('DOMContentLoaded', function () {
     let highlightRemoved = false;
     let displayChanMode = 'mixed'; // 'unmixed' or 'mixed'
     let isNeuroglancerMode = false;
+    let showDyeLines = false; // Toggle state for dye lines
     let spotDetails = {}; // Will store the spot details for neuroglancer lookup
     let fusedS3Paths = {}; // Will store the fused S3 paths from the API
     let summaryStats = null; // Will store the summary stats from the API
@@ -420,6 +421,8 @@ document.addEventListener('DOMContentLoaded', function () {
     const markerSizeMinLabel = document.getElementById('marker_size_min_label');
     const markerSizeMaxLabel = document.getElementById('marker_size_max_label');
     const resetFiltersBtn = document.getElementById('reset_filters_btn');
+    const dyeLinesToggle = document.getElementById('dye_lines_toggle');
+    const dyeLinesStatus = document.getElementById('dye_lines_status');
     
     let rValueSlider = null;
     let distanceSlider = null;
@@ -579,6 +582,27 @@ document.addEventListener('DOMContentLoaded', function () {
         }
     });
 
+    // Dye lines toggle event listener
+    dyeLinesToggle.addEventListener('change', function() {
+        showDyeLines = this.checked;
+        dyeLinesStatus.textContent = showDyeLines ? 'On' : 'Off';
+        
+        // Update toggle style
+        const toggleLabel = this.nextElementSibling;
+        const toggleSpan = toggleLabel.querySelector('span');
+        
+        if (showDyeLines) {
+            toggleLabel.style.backgroundColor = '#2196F3'; // Blue when active
+            toggleSpan.style.left = '22px';
+        } else {
+            toggleLabel.style.backgroundColor = '#ccc'; // Gray when inactive
+            toggleSpan.style.left = '2px';
+        }
+        
+        updateChart();
+        console.log(`Dye lines toggle: ${showDyeLines ? 'ON' : 'OFF'}`);
+    });
+
     // Toggle collapsible Selected Spots section
     spotsContainerHeader.addEventListener('click', function() {
         const isCollapsed = spotsContainerContent.classList.contains('collapsed');
@@ -921,6 +945,158 @@ document.addEventListener('DOMContentLoaded', function () {
         
         return filtered;
     }
+
+    /**
+     * Clip a line passing through origin with direction (dx, dy) to axis boundaries.
+     * Returns the two intersection points where the line enters/exits the visible rectangle.
+     * 
+     * @param {number} dx - X component of line direction (normalized)
+     * @param {number} dy - Y component of line direction (normalized)
+     * @param {Array<number>} xLimits - [xMin, xMax] for x-axis
+     * @param {Array<number>} yLimits - [yMin, yMax] for y-axis
+     * @returns {Object} Object with clipped {x0, y0, x1, y1} or null if no intersection
+     */
+    function clipLineToAxes(dx, dy, xLimits, yLimits) {
+        const [xMin, xMax] = xLimits;
+        const [yMin, yMax] = yLimits;
+        
+        // Line through origin: (x, y) = t * (dx, dy) for any scalar t
+        // Find all t values where line intersects the four boundaries
+        const tValues = [];
+        
+        // Intersection with x = xMin: t = xMin / dx (if dx != 0)
+        if (Math.abs(dx) > 1e-9) {
+            const t = xMin / dx;
+            const y = t * dy;
+            if (y >= yMin && y <= yMax) {
+                tValues.push({ t, x: xMin, y });
+            }
+        }
+        
+        // Intersection with x = xMax: t = xMax / dx
+        if (Math.abs(dx) > 1e-9) {
+            const t = xMax / dx;
+            const y = t * dy;
+            if (y >= yMin && y <= yMax) {
+                tValues.push({ t, x: xMax, y });
+            }
+        }
+        
+        // Intersection with y = yMin: t = yMin / dy (if dy != 0)
+        if (Math.abs(dy) > 1e-9) {
+            const t = yMin / dy;
+            const x = t * dx;
+            if (x >= xMin && x <= xMax) {
+                tValues.push({ t, x, y: yMin });
+            }
+        }
+        
+        // Intersection with y = yMax: t = yMax / dy
+        if (Math.abs(dy) > 1e-9) {
+            const t = yMax / dy;
+            const x = t * dx;
+            if (x >= xMin && x <= xMax) {
+                tValues.push({ t, x, y: yMax });
+            }
+        }
+        
+        // Need at least 2 intersections (line enters and exits rectangle)
+        if (tValues.length < 2) {
+            return null;
+        }
+        
+        // Sort by t value and take the two extremes (smallest and largest t)
+        tValues.sort((a, b) => a.t - b.t);
+        const start = tValues[0];
+        const end = tValues[tValues.length - 1];
+        
+        return {
+            x0: start.x,
+            y0: start.y,
+            x1: end.x,
+            y1: end.y
+        };
+    }
+
+    /**
+     * Calculate dye line endpoints for the current channel pair.
+     * The ratios matrix contains learned dye spectral signatures.
+     * Lines are clipped to the visible axis boundaries.
+     * 
+     * @param {string} xChan - X-axis channel (e.g., "488")
+     * @param {string} yChan - Y-axis channel (e.g., "514")
+     * @param {Array<Array<number>>} ratiosMatrix - NxN matrix of dye coefficients
+     * @param {Array<string>} channels - Ordered list of channel names
+     * @param {Array<number>} xLimits - [min, max] for x-axis
+     * @param {Array<number>} yLimits - [min, max] for y-axis
+     * @returns {Array<Object>} Array of dye line objects with endpoints and styling
+     */
+    function calculateDyeLines(xChan, yChan, ratiosMatrix, channels, xLimits, yLimits) {
+        if (!ratiosMatrix || ratiosMatrix.length === 0) {
+            console.warn('No ratios matrix available for dye lines');
+            return [];
+        }
+        
+        // Get indices of current channels
+        const xIdx = channels.indexOf(xChan);
+        const yIdx = channels.indexOf(yChan);
+        
+        if (xIdx === -1 || yIdx === -1) {
+            console.warn(`Channel indices not found: x=${xChan} (idx=${xIdx}), y=${yChan} (idx=${yIdx})`);
+            return [];
+        }
+        
+        const dyeLines = [];
+        const numDyes = ratiosMatrix.length;
+        
+        console.log(`Calculating dye lines: xChan=${xChan} (idx=${xIdx}), yChan=${yChan} (idx=${yIdx})`);
+        console.log(`Axis limits: x=[${xLimits[0]}, ${xLimits[1]}], y=[${yLimits[0]}, ${yLimits[1]}]`);
+        
+        // For each dye (each row represents a dye's spectral signature)
+        for (let d = 0; d < numDyes; d++) {
+            // Extract 2D projection of this dye's direction
+            // ratiosMatrix[d] is the d-th dye's coefficients across all channels
+            const dx = ratiosMatrix[d][xIdx]; // Coefficient for x-channel
+            const dy = ratiosMatrix[d][yIdx]; // Coefficient for y-channel
+            
+            // Normalize to unit length in this 2D subspace
+            const norm = Math.sqrt(dx * dx + dy * dy);
+            if (norm < 1e-9) {
+                console.log(`Skipping dye ${d} (channel ${channels[d]}): near-zero norm (${norm})`);
+                continue; // Skip near-zero vectors
+            }
+            
+            const ux = dx / norm;
+            const uy = dy / norm;
+            
+            // Clip the line to the visible axis boundaries
+            const clipped = clipLineToAxes(ux, uy, xLimits, yLimits);
+            
+            if (!clipped) {
+                console.log(`Skipping dye ${d} (channel ${channels[d]}): no intersection with visible area`);
+                continue;
+            }
+            
+            dyeLines.push({
+                dyeIndex: d,
+                channel: channels[d],
+                x0: clipped.x0,
+                y0: clipped.y0,
+                x1: clipped.x1,
+                y1: clipped.y1,
+                color: COLORS[channels[d]] || COLORS.default,
+                dx: dx,
+                dy: dy,
+                norm: norm
+            });
+            
+            console.log(`Dye line ${d} (${channels[d]}): dx=${dx.toFixed(3)}, dy=${dy.toFixed(3)}, ` +
+                       `clipped to [(${clipped.x0.toFixed(1)}, ${clipped.y0.toFixed(1)}) -> ` +
+                       `(${clipped.x1.toFixed(1)}, ${clipped.y1.toFixed(1)})]`);
+        }
+        
+        return dyeLines;
+    }
     
     function updateChart(newData = null) {
         if (newData) {
@@ -1176,6 +1352,121 @@ document.addEventListener('DOMContentLoaded', function () {
         // Get series indices for visualMap
         const seriesIndices = series.map((_, index) => index);
         
+        // Add dye lines if enabled
+        if (showDyeLines && ratiosMatrix && channelPairs.length > 0) {
+            // Get all channels for matrix lookup
+            let allChannels = [];
+            if (typeof summaryStats !== 'undefined' && summaryStats && summaryStats.length > 0) {
+                allChannels = summaryStats.map(stat => stat.channel.toString());
+            } else {
+                // Derive from channel pairs
+                const uniqueChans = new Set();
+                channelPairs.forEach(pair => {
+                    uniqueChans.add(pair[0].toString());
+                    uniqueChans.add(pair[1].toString());
+                });
+                allChannels = Array.from(uniqueChans).sort();
+            }
+            
+            // Only plot dye lines for the current pair
+            const currentPairChannels = [xChan.toString(), yChan.toString()];
+            console.log(`Drawing dye lines for current pair only: ${currentPairChannels}`);
+            
+            // Determine axis limits for dye line scaling
+            let xLims, yLims;
+            if (chartLimitsMode === 'auto') {
+                // Use data range for auto mode
+                const xField = `chan_${xChan}_intensity`;
+                const yField = `chan_${yChan}_intensity`;
+                const xValues = allChartData.typedArrays[xField];
+                const yValues = allChartData.typedArrays[yField];
+                
+                const xMax = Math.max(...xValues);
+                const yMax = Math.max(...yValues);
+                xLims = [0, xMax * 1.1]; // Add 10% padding
+                yLims = [0, yMax * 1.1];
+            } else {
+                // Use the actual axis limits that will be applied
+                xLims = currentXLimits;
+                yLims = currentYLimits;
+            }
+            
+            console.log(`Dye line scaling: x=[${xLims[0]}, ${xLims[1].toFixed(1)}], y=[${yLims[0]}, ${yLims[1].toFixed(1)}]`);
+            
+            const dyeLines = calculateDyeLines(
+                xChan.toString(),
+                yChan.toString(),
+                ratiosMatrix,
+                allChannels,
+                xLims,
+                yLims
+            );
+            
+            // Filter to only include dye lines for the current pair
+            const filteredDyeLines = dyeLines.filter(line => 
+                currentPairChannels.includes(line.channel)
+            );
+            
+            if (filteredDyeLines.length > 0) {
+                console.log(`Adding ${filteredDyeLines.length} dye lines to chart (filtered from ${dyeLines.length} total)`);
+                
+                // Add each dye line as a separate series
+                filteredDyeLines.forEach(line => {
+                    series.push({
+                        name: `Dye: ${line.channel}`,
+                        type: 'line',
+                        data: [[line.x0, line.y0], [line.x1, line.y1]],
+                        lineStyle: {
+                            color: line.color,
+                            width: 4,
+                            type: 'solid',
+                            opacity: 0.9
+                        },
+                        itemStyle: {
+                            color: line.color  // Ensure marker color matches line color
+                        },
+                        symbol: 'none',
+                        symbolSize: 0,
+                        emphasis: {
+                            disabled: true
+                        },
+                        zlevel: 10, // Render on top of scatter points
+                        silent: true, // Don't respond to mouse events
+                        animation: false,
+                        clip: false, // Keep rendering even when points are outside axis range
+                        // Show only line in legend (no marker)
+                        legendHoverLink: false,
+                        showSymbol: false,
+                        // Add text label at endpoint
+                        markPoint: {
+                            symbol: 'none',
+                            data: [{
+                                coord: [line.x1, line.y1],
+                                symbol: 'none',
+                                symbolSize: 0,
+                                itemStyle: {
+                                    opacity: 0
+                                },
+                                label: {
+                                    show: true,
+                                    formatter: line.channel,
+                                    position: 'top',
+                                    fontSize: 14,
+                                    fontWeight: 'bold',
+                                    color: line.color,
+                                    backgroundColor: 'rgba(255, 255, 255, 0.9)',
+                                    padding: 3,
+                                    borderRadius: 3
+                                }
+                            }]
+                        }
+                    });
+                });
+            } else {
+                console.log('No valid dye lines calculated for current pair');
+            }
+        }
+
         option = {
             // title: {
             //     text: `Intensity Scatter Plot: Channel ${xChan} vs ${yChan}`,
@@ -2014,14 +2305,19 @@ document.addEventListener('DOMContentLoaded', function () {
             channels = Array.from({length: ratiosMatrix.length}, (_, i) => `CH_${i}`);
         }
         
+        // Reverse the channels array for vertical flip
+        const reversedChannels = [...channels].reverse();
+        
         // Prepare data for heatmap
         const data = [];
         const maxValue = 100; // Maximum percentage in ratios matrix
         
-        // Transform matrix into heatmap data format
+        // Transform matrix into heatmap data format with vertical flip
         for (let i = 0; i < ratiosMatrix.length; i++) {
             for (let j = 0; j < ratiosMatrix[i].length; j++) {
-                data.push([i, j, ratiosMatrix[i][j]]);
+                // Flip vertically: map i to (n - 1 - i)
+                const flippedI = ratiosMatrix.length - 1 - i;
+                data.push([j, flippedI, ratiosMatrix[i][j]]);
             }
         }
         
@@ -2034,9 +2330,10 @@ document.addEventListener('DOMContentLoaded', function () {
             tooltip: {
                 position: 'top',
                 formatter: function(params) {
-                    const original = ratiosMatrix[params.data[0]][params.data[1]];
-                    const sourceChannel = channels[params.data[0]];
-                    const targetChannel = channels[params.data[1]];
+                    const flippedI = ratiosMatrix.length - 1 - params.data[1];
+                    const original = ratiosMatrix[flippedI][params.data[0]];
+                    const sourceChannel = channels[flippedI];
+                    const targetChannel = channels[params.data[0]];
                     return `Original: ${sourceChannel}<br>Reassigned: ${targetChannel}<br>Ratio: ${original}%`;
                 }
             },
@@ -2049,20 +2346,14 @@ document.addEventListener('DOMContentLoaded', function () {
                 data: channels,
                 splitArea: {
                     show: true
-                },
-                name: 'Reassigned',
-                nameLocation: 'middle',
-                nameGap: 30
+                }
             },
             yAxis: {
                 type: 'category',
-                data: channels,
+                data: reversedChannels,
                 splitArea: {
                     show: true
-                },
-                name: 'Original',
-                nameLocation: 'middle',
-                nameGap: 40
+                }
             },
             visualMap: {
                 min: 0,
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index 9fbfb4e..a977eb4 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -523,6 +523,21 @@ <h3>Data Filters</h3>
                     <span id="marker_size_max_label">3.0×</span>
                 </div>
             </div>
+            <div style="margin-bottom: 20px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">Show Dye Lines</label>
+                <div style="display: flex; align-items: center; gap: 10px;">
+                    <div class="toggle-switch">
+                        <input type="checkbox" id="dye_lines_toggle" style="display: none;">
+                        <label for="dye_lines_toggle" style="position: relative; display: inline-block; width: 40px; height: 20px; background-color: #ccc; border-radius: 20px; cursor: pointer; transition: background-color 0.3s;">
+                            <span style="position: absolute; top: 2px; left: 2px; width: 16px; height: 16px; border-radius: 50%; background-color: white; transition: left 0.3s;"></span>
+                        </label>
+                    </div>
+                    <span id="dye_lines_status" style="font-size: 13px;">Off</span>
+                </div>
+                <div style="font-size: 11px; color: #666; margin-top: 5px;">
+                    Display learned dye directions
+                </div>
+            </div>
             <button id="reset_filters_btn" style="width: 100%; padding: 6px; background-color: #ff9800; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">Reset Filters</button>
             <div id="filter_count" style="margin-top: 10px; font-size: 12px; color: #666; text-align: center;"></div>
         </div>

From e25a7f036e38ab4cc71fe06dbf87720232a8c36f Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Fri, 14 Nov 2025 12:30:29 -0800
Subject: [PATCH 17/20] fix: duplicate tile dataset names

---
 src/see_spot/app.py                       | 33 +++++++++++++++++++++--
 src/see_spot/s3_utils.py                  | 23 ++++++++++++++--
 src/see_spot/static/js/unmixed_spots.js   | 26 +++++++++++++++---
 src/see_spot/templates/unmixed_spots.html |  4 +--
 4 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 7852d74..b83b267 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -39,7 +39,7 @@
 
 # Configuration for the spots data
 S3_BUCKET = "aind-open-data"
-DATA_PREFIX = "HCR_749315_2025-05-08_14-00-00_processed_2025-05-17_22-15-31"  # set default for app load
+DATA_PREFIX = None  # No dataset loaded by default - user must select one
 SAMPLE_SIZE = 5000
 
 # In-memory cache for DataFrame to avoid reloading on every request
@@ -260,6 +260,17 @@ async def get_real_spots_data(
     logger.info(f"Real spots data requested with sample size: {sample_size}, "
                 f"force_refresh: {force_refresh}, valid_spots_only: {valid_spots_only}")
 
+    # Check if a dataset has been selected
+    if DATA_PREFIX is None:
+        logger.info("No dataset selected - returning empty response")
+        return JSONResponse(
+            status_code=200,
+            content={
+                "no_dataset_selected": True,
+                "message": "Please select a dataset from the Dataset Management panel"
+            }
+        )
+
     # Detect if this is a tile dataset at the beginning - needed for all operations
     import re
     tile_pattern = re.compile(r'_X_\d+_Y_\d+_Z_\d+$')
@@ -727,7 +738,7 @@ async def list_datasets():
                         "name": dataset_dir.name,
                         "creation_date": creation_time.strftime("%Y-%m-%d %H:%M:%S"),
                         "has_data": has_data,
-                        "is_current": dataset_dir.name == DATA_PREFIX
+                        "is_current": DATA_PREFIX is not None and dataset_dir.name == DATA_PREFIX
                     })
         
         # Sort by creation date (newest first)
@@ -750,6 +761,24 @@ async def download_dataset(request: Request):
         if not dataset_name:
             return JSONResponse(status_code=400, content={"error": "Dataset name is required"})
         
+        # Check if this is already a virtual tile dataset (ends with _X_####_Y_####_Z_####)
+        import re
+        tile_pattern = re.compile(r'_X_\d+_Y_\d+_Z_\d+$')
+        if tile_pattern.search(dataset_name):
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "error": (
+                        f"Cannot download virtual tile dataset '{dataset_name}'. "
+                        "Please download the base dataset instead."
+                    ),
+                    "hint": (
+                        "Remove the tile suffix "
+                        "(e.g., '_X_0000_Y_0000_Z_0000') and try again."
+                    )
+                }
+            )
+        
         # Check if dataset exists on S3 by looking for the processing manifest
         manifest_key = find_processing_manifest(S3_BUCKET, dataset_name)
         
diff --git a/src/see_spot/s3_utils.py b/src/see_spot/s3_utils.py
index 9ae6f3c..19582c0 100644
--- a/src/see_spot/s3_utils.py
+++ b/src/see_spot/s3_utils.py
@@ -314,8 +314,27 @@ def load_and_merge_spots_from_s3(
     """
     # Adjust paths for tile folder if provided
     if tile_folder:
-        cache_dir = Path("/s3-cache") / bucket / f"{dataset_name}_{extract_tile_suffix(tile_folder)}"
-        parquet_file = cache_dir / f"{dataset_name}_{extract_tile_suffix(tile_folder)}.parquet"
+        # Check if dataset_name already has a tile suffix (virtual tile dataset)
+        import re
+        tile_pattern = re.compile(r'_X_\d+_Y_\d+_Z_\d+$')
+        if tile_pattern.search(dataset_name):
+            # dataset_name is already a virtual tile dataset - use it as-is
+            cache_dir = Path("/s3-cache") / bucket / dataset_name
+            parquet_file = cache_dir / f"{dataset_name}.parquet"
+            logger.info(
+                f"Virtual tile dataset detected: {dataset_name}, "
+                f"using as-is"
+            )
+        else:
+            # dataset_name is a base dataset - append tile suffix
+            tile_suffix = extract_tile_suffix(tile_folder)
+            cache_dir = (
+                Path("/s3-cache") / bucket / f"{dataset_name}_{tile_suffix}"
+            )
+            parquet_file = cache_dir / f"{dataset_name}_{tile_suffix}.parquet"
+            logger.info(
+                f"Base dataset with tile: {dataset_name}, tile: {tile_suffix}"
+            )
         # Update prefix to look inside tile folder
         unmixed_spots_prefix = f"{unmixed_spots_prefix}{tile_folder}/"
         logger.info(f"Loading tile dataset from: {unmixed_spots_prefix}")
diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index fb6813d..44ed9de 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -255,6 +255,14 @@ document.addEventListener('DOMContentLoaded', function () {
         console.log('titleElement:', titleElement);
         console.log('nameSpan:', nameSpan);
         
+        if (datasetName === null) {
+            // No dataset selected - show selection prompt
+            console.log('No dataset selected, showing prompt');
+            titleElement.classList.remove('loading');
+            nameSpan.textContent = 'Please select a dataset ➡️';
+            return;
+        }
+        
         if (!datasetName || datasetName === 'Unknown Dataset') {
             console.log('No valid dataset name, showing loading state');
             titleElement.classList.add('loading');
@@ -715,8 +723,8 @@ document.addEventListener('DOMContentLoaded', function () {
     // Initial sample size note update
     updateSampleSizeNote(currentSampleSize);
     
-    // Initial data fetch
-    fetchData(currentSampleSize, false);
+    // Don't fetch data on initial load - wait for user to select a dataset
+    // fetchData(currentSampleSize, false);
     
     // Initialize button states
     updateButtonStates();
@@ -738,6 +746,14 @@ document.addEventListener('DOMContentLoaded', function () {
                 console.log(`Fetched spots data with sample size ${sampleSize}:`, data);
                 console.log('Current dataset from API:', data.current_dataset);
                 
+                // Check if no dataset is selected
+                if (data.no_dataset_selected) {
+                    console.log('No dataset selected:', data.message);
+                    updateDatasetTitle(null);  // Show "Please select a dataset" message
+                    myChart.hideLoading();
+                    return;
+                }
+                
                 if (!data.spots_data || !data.channel_pairs || data.spots_data.length === 0) {
                     throw new Error("Invalid or empty data received from API");
                 }
@@ -810,6 +826,10 @@ document.addEventListener('DOMContentLoaded', function () {
         // Always convert to typed arrays for better performance
         convertToTypedArrays(spotsData);
 
+        // Set allChartData before calling updateFilterSliderRanges
+        // so the sliders have data to work with
+        allChartData = spotsData;
+
         // Create channel selector buttons
         createChannelSelector();
         
@@ -818,7 +838,7 @@ document.addEventListener('DOMContentLoaded', function () {
 
         // Set initial channel pair
         currentPairIndex = 0;
-        updateChart(spotsData);
+        updateChart();  // Don't pass spotsData since allChartData is already set
     }
 
     function createChannelSelector() {
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index a977eb4..f70a4c5 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -567,9 +567,9 @@ <h3>Chart Limits</h3>
         <div id="charts-column">
             <!-- Current Dataset Display -->
             <div id="current-dataset-display">
-                <h2 id="dataset-title" class="loading">
+                <h2 id="dataset-title">
                     <span class="dataset-icon">📊</span>
-                    <span class="dataset-name">Loading dataset...</span>
+                    <span class="dataset-name">Please select a dataset ➡️</span>
                 </h2>
             </div>
             

From feab3fad1c3e7f79de09425b443e823c0389355a Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Fri, 14 Nov 2025 12:48:17 -0800
Subject: [PATCH 18/20] feat: multi annotate neuroglancer from table

---
 src/see_spot/app.py                       | 148 ++++++++++++++++++
 src/see_spot/ng_utils.py                  | 178 ++++++++++++++++++++++
 src/see_spot/static/js/unmixed_spots.js   |  81 ++++++++++
 src/see_spot/templates/unmixed_spots.html |   1 +
 4 files changed, 408 insertions(+)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index b83b267..eb9554c 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -545,6 +545,11 @@ async def get_real_spots_data(
         except Exception as e:
             logger.error(f"Error calculating Sankey data: {e}", exc_info=True)
 
+    # Cache spot_details and fused_s3_paths for use by other endpoints
+    df_cache["spot_details"] = spot_details
+    df_cache["fused_s3_paths"] = fused_s3_paths
+    logger.info(f"Cached {len(spot_details)} spot_details and {len(fused_s3_paths)} fused_s3_paths")
+
     # 11. Build the response
     response = {
         "channel_pairs": channel_pairs,
@@ -713,6 +718,146 @@ async def create_neuroglancer_link(request: Request):
         )
 
 
+@app.post("/api/create-neuroglancer-multi-annotations")
+async def create_neuroglancer_multi_annotations(request: Request):
+    """Creates a neuroglancer link with multiple point annotations (max 1000)."""
+    try:
+        data = await request.json()
+        
+        # Extract parameters
+        spot_ids = data.get("spot_ids", [])
+        annotation_color = data.get("annotation_color", "#00FF00")  # Green for SeeSpot
+        cross_section_scale = data.get("cross_section_scale", 0.2)
+        layer_name = data.get("layer_name", "SeeSpot")
+        
+        # Validate input
+        if not spot_ids or not isinstance(spot_ids, list):
+            return JSONResponse(
+                status_code=400,
+                content={"error": "spot_ids must be a non-empty list"}
+            )
+        
+        # Limit to 1000 annotations
+        if len(spot_ids) > 1000:
+            logger.warning(f"Limiting annotations from {len(spot_ids)} to 1000")
+            spot_ids = spot_ids[:1000]
+        
+        logger.info(f"Creating Neuroglancer link with {len(spot_ids)} annotations in layer '{layer_name}'")
+        
+        # Get spot details from cache
+        spot_details_cache = df_cache.get("spot_details", {})
+        if not spot_details_cache:
+            logger.error("No spot details in cache")
+            return JSONResponse(
+                status_code=400,
+                content={"error": "No spot details available in cache. Please load data first."}
+            )
+        
+        logger.debug(f"Cache has {len(spot_details_cache)} spot details")
+        logger.debug(f"First few spot_ids from request: {spot_ids[:5]}")
+        logger.debug(f"First few keys in cache: {list(spot_details_cache.keys())[:5]}")
+        
+        # Build annotations list
+        annotations = []
+        missing_spots = []
+        for spot_id in spot_ids:
+            # Try both string and int versions of spot_id
+            details = None
+            if spot_id in spot_details_cache:
+                details = spot_details_cache[spot_id]
+            elif isinstance(spot_id, str) and spot_id.isdigit():
+                # Try converting string to int
+                try:
+                    int_id = int(spot_id)
+                    if int_id in spot_details_cache:
+                        details = spot_details_cache[int_id]
+                except ValueError:
+                    pass
+            elif isinstance(spot_id, int):
+                # Try converting int to string
+                str_id = str(spot_id)
+                if str_id in spot_details_cache:
+                    details = spot_details_cache[str_id]
+            
+            if details:
+                # Create point annotation with [x, y, z, t, 0] format
+                point = [
+                    details.get("x", 0),
+                    details.get("y", 0),
+                    details.get("z", 0),
+                    0,  # t dimension
+                    0   # additional dimension
+                ]
+                annotations.append({
+                    "spot_id": spot_id,
+                    "point": point
+                })
+                logger.debug(f"Found spot {spot_id}: {point}")
+            else:
+                missing_spots.append(spot_id)
+                logger.debug(f"Missing spot {spot_id}")
+        
+        if not annotations:
+            logger.error(f"No valid spot details found. Missing all {len(missing_spots)} spots")
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "error": "No valid spot details found for provided spot_ids",
+                    "missing_spots": missing_spots[:10],
+                    "cache_sample_keys": list(spot_details_cache.keys())[:10]
+                }
+            )
+        
+        if missing_spots:
+            logger.warning(f"Missing spot details for {len(missing_spots)} spots: {missing_spots[:10]}...")
+        
+        # Get fused S3 paths from cache
+        fused_s3_paths = df_cache.get("fused_s3_paths", {})
+        if not fused_s3_paths:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "No fused S3 paths available in cache. Please load data first."}
+            )
+        
+        # Calculate center position from all annotations
+        if annotations:
+            avg_x = sum(a["point"][0] for a in annotations) / len(annotations)
+            avg_y = sum(a["point"][1] for a in annotations) / len(annotations)
+            avg_z = sum(a["point"][2] for a in annotations) / len(annotations)
+            position = [avg_x, avg_y, avg_z, 0]
+        else:
+            position = None
+        
+        logger.info(f"Center position: {position}")
+        
+        # Create the neuroglancer link
+        ng_link = ng_utils.create_link_with_multiple_annotations(
+            fused_s3_paths=fused_s3_paths,
+            annotations=annotations,
+            position=position,
+            layer_name=layer_name,
+            annotation_color=annotation_color,
+            spacing=3.0,
+            cross_section_scale=cross_section_scale
+        )
+        
+        logger.info(f"Successfully created Neuroglancer link with {len(annotations)} annotations")
+        
+        return {
+            "url": ng_link,
+            "annotation_count": len(annotations),
+            "missing_spots": len(missing_spots),
+            "layer_name": layer_name
+        }
+        
+    except Exception as e:
+        logger.error(f"Error creating multi-annotation neuroglancer link: {e}", exc_info=True)
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to create neuroglancer link: {str(e)}"}
+        )
+
+
 @app.get("/api/datasets")
 async def list_datasets():
     """List all available datasets in the local cache, including virtual tile datasets."""
@@ -933,6 +1078,9 @@ async def set_active_dataset(request: Request):
         df_cache["target_key"] = None
         df_cache["processing_manifest"] = None
         df_cache["spot_channels_from_manifest"] = None
+        df_cache["spot_details"] = None
+        df_cache["fused_s3_paths"] = None
+        df_cache["sankey_data"] = None
         
         logger.info(f"Active dataset changed to: {dataset_name}")
         
diff --git a/src/see_spot/ng_utils.py b/src/see_spot/ng_utils.py
index d48e72a..f760612 100644
--- a/src/see_spot/ng_utils.py
+++ b/src/see_spot/ng_utils.py
@@ -494,3 +494,181 @@ def wavelength_to_hex_pure_colours(wavelength: int) -> int:
         if wavelength < ub:  # Exclusive
             return hex_val
     return hex_val  # hex_val is set to the last color in for loop
+
+
+def create_link_with_multiple_annotations(
+    fused_s3_paths,
+    annotations,
+    position=None,
+    layer_name="SeeSpot",
+    annotation_color="#00FF00",
+    spacing=3.0,
+    cross_section_scale=1.0,
+    resolution_zyx=None,
+    max_dr=1200,
+    opacity=1.0,
+    blend="additive",
+    output_folder=None,
+):
+    """
+    Create a Neuroglancer link with multiple point annotations.
+
+    Parameters:
+    -----------
+    fused_s3_paths (dict or list): Dictionary mapping channel names to S3 paths, or list of S3 paths
+    annotations (list): List of annotation dicts, each containing:
+        - spot_id: Unique identifier for the spot
+        - point: Coordinates [x, y, z, t, ...] for the annotation
+    position (list, optional): Initial position to view [x, y, z, t]. If None, uses first annotation
+    layer_name (str): Name for the annotation layer. Default: "SeeSpot"
+    annotation_color (str): Hex color for annotations. Default: "#00FF00" (green)
+    spacing (float): Spacing for annotations in cross-section view. Default: 3.0
+    cross_section_scale (float): Scale for cross-section view. Default: 1.0
+    resolution_zyx (list, optional): Resolution in z,y,x order. If None, reads from zarr
+    max_dr (int): Maximum dynamic range for shader controls. Default: 1200
+    opacity (float): Opacity value for the layer. Default: 1.0
+    blend (str): Blending mode for the layer. Default: "additive"
+    output_folder (str, optional): Output folder path
+
+    Returns:
+    --------
+    str: Direct Neuroglancer URL with multiple annotations
+    """
+    # Convert fused_s3_paths to list if it's a dict
+    if isinstance(fused_s3_paths, dict):
+        fused_s3_path = list(fused_s3_paths.values())
+    elif isinstance(fused_s3_paths, str):
+        fused_s3_path = [fused_s3_paths]
+    else:
+        fused_s3_path = fused_s3_paths
+
+    # If resolution not provided, try to read from first zarr file
+    if resolution_zyx is None:
+        try:
+            resolution_zyx = read_zarr_resolution_boto(fused_s3_path[0])
+            print(f"Found resolution from zarr: {resolution_zyx}")
+        except Exception as e:
+            print(
+                f"Warning: Could not read resolution from zarr file: {str(e)}"
+            )
+            # Provide a default resolution if we can't read it
+            resolution_zyx = [1.0, 1.0, 1.0]
+            print(f"Using default resolution: {resolution_zyx}")
+
+    output_dimensions = {
+        "x": {"voxel_size": resolution_zyx[2], "unit": "microns"},
+        "y": {"voxel_size": resolution_zyx[1], "unit": "microns"},
+        "z": {"voxel_size": resolution_zyx[0], "unit": "microns"},
+        "c'": {"voxel_size": 1, "unit": ""},
+        "t": {"voxel_size": 0.001, "unit": "seconds"},
+    }
+
+    # Initialize layers list
+    layers = []
+
+    # Process each fused path to create image layers
+    for idx, fused_path in enumerate(fused_s3_path):
+        # Extract channel number from fused path
+        pattern = r"(ch|CH|channel)_(\d+)"
+        match = re.search(pattern, fused_path)
+        if not match:
+            raise ValueError(
+                f"Could not extract channel number from path: {fused_path}"
+            )
+
+        channel = int(match.group(2))
+        hex_val = wavelength_to_hex_pure_colours(channel)
+        hex_str = f"#{hex_val:06x}"
+
+        # Add image layer
+        image_layer = {
+            "type": "image",
+            "source": fused_path,
+            "channel": 0,
+            "shaderControls": {"normalized": {"range": [90, max_dr]}},
+            "shader": {
+                "color": hex_str,
+                "emitter": "RGB",
+                "vec": "vec3",
+            },
+            "localPosition": [0.5],
+            "visible": True,
+            "opacity": opacity,
+            "name": f"CH_{channel}",
+            "blend": blend,
+        }
+        layers.append(image_layer)
+
+    # Create annotation layer with multiple points
+    annotation_layer = {
+        "type": "annotation",
+        "name": layer_name,
+        "tab": "annotations",
+        "visible": True,
+        "annotationColor": annotation_color,
+        "crossSectionAnnotationSpacing": spacing,
+        "projectionAnnotationSpacing": 10,
+        "tool": "annotatePoint",
+        "annotations": []
+    }
+
+    # Add all annotations to the layer
+    for annot in annotations:
+        annotation = {
+            "type": "point",
+            "id": str(annot["spot_id"]),
+            "point": annot["point"],
+        }
+        annotation_layer["annotations"].append(annotation)
+
+    print(f"Created annotation layer '{layer_name}' with {len(annotations)} points")
+
+    # Use the first annotation's coordinates as the position if no position is specified
+    if position is None and len(annotations) > 0:
+        first_point = annotations[0]["point"]
+        position = first_point[:4] if len(first_point) >= 4 else first_point + [0] * (4 - len(first_point))
+
+    # Set input config with dimensions from resolution_zyx
+    input_config = {
+        "dimensions": output_dimensions,
+        "layers": layers,
+        "showScaleBar": False,
+        "showAxisLines": False,
+    }
+
+    # Extract bucket and dataset from first fused path
+    parts = fused_s3_path[0].split("/")
+    bucket_name = parts[2]
+    dataset_name = parts[3]
+
+    # Set up output folder
+    if output_folder is None:
+        cd = os.getcwd()
+        output_folder = f"{cd}/{dataset_name}/"
+    if not pathlib.Path(output_folder).exists():
+        pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+    # Create JSON file name
+    json_name = f"multi_annotation_ng_link_{len(annotations)}_spots.json"
+
+    # Generate the Neuroglancer state
+    neuroglancer_link = NgState(
+        input_config,
+        "s3",
+        bucket_name,
+        output_folder,
+        dataset_name=pathlib.Path(output_folder).stem,
+        base_url="https://neuroglancer-demo.appspot.com",
+        json_name=json_name,
+    )
+
+    state_dict = neuroglancer_link.state
+
+    # Add annotation layer and other state properties
+    state_dict["layers"].append(annotation_layer)
+    state_dict["crossSectionScale"] = cross_section_scale
+    state_dict["position"] = position
+
+    direct_url = create_direct_neuroglancer_url(state_dict)
+
+    return direct_url
diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index 44ed9de..f3f392d 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -5,6 +5,7 @@ document.addEventListener('DOMContentLoaded', function () {
     const clearButton = document.getElementById('clear_spots_button');
     const addLassoButton = document.getElementById('add_lasso_selection_button');
     const exportCsvButton = document.getElementById('export_csv_button');
+    const annotateNeuroglancerButton = document.getElementById('annotate_neuroglancer_button');
     const labelInput = document.getElementById('label_input');
     const activeLabelDisplay = document.getElementById('active_label_display');
     const prevChannelButton = document.getElementById('prev_channel_pair');
@@ -1760,6 +1761,86 @@ document.addEventListener('DOMContentLoaded', function () {
         exportTableToCSV('unmixed_spots_selection.csv');
     });
 
+    // Annotate Neuroglancer Functionality
+    annotateNeuroglancerButton.addEventListener('click', function() {
+        if (spotsTableBody.rows.length === 0) {
+            alert("Table is empty. Add some spots first.");
+            return;
+        }
+        
+        // Collect spot IDs from the table
+        const spotIds = [];
+        for (let i = 0; i < spotsTableBody.rows.length; i++) {
+            const row = spotsTableBody.rows[i];
+            const spotId = row.cells[0].textContent; // First column is Spot ID
+            spotIds.push(spotId);
+        }
+        
+        // Limit to 1000 annotations
+        if (spotIds.length > 1000) {
+            if (!confirm(`You have ${spotIds.length} spots selected. Only the first 1000 will be annotated in Neuroglancer. Continue?`)) {
+                return;
+            }
+        }
+        
+        console.log(`Creating Neuroglancer link with ${spotIds.length} annotations`);
+        
+        // Disable button and show loading state
+        annotateNeuroglancerButton.disabled = true;
+        annotateNeuroglancerButton.textContent = 'Creating link...';
+        
+        // Prepare request data
+        const requestData = {
+            spot_ids: spotIds,
+            annotation_color: "#00FF00",  // Green for SeeSpot
+            cross_section_scale: 0.2,
+            layer_name: "SeeSpot"
+        };
+        
+        // Make API request
+        fetch('/api/create-neuroglancer-multi-annotations', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify(requestData)
+        })
+        .then(response => {
+            if (!response.ok) {
+                throw new Error(`HTTP error! Status: ${response.status}`);
+            }
+            return response.json();
+        })
+        .then(data => {
+            if (data.url) {
+                console.log(`Neuroglancer link created with ${data.annotation_count} annotations`);
+                
+                // Show success message
+                const message = `Created Neuroglancer link with ${data.annotation_count} annotations!`;
+                if (data.missing_spots > 0) {
+                    alert(`${message}\n\nNote: ${data.missing_spots} spots were missing coordinate data and were skipped.`);
+                } else {
+                    alert(message);
+                }
+                
+                // Open URL in new tab
+                window.open(data.url, '_blank');
+            } else {
+                console.error("No URL returned from API");
+                alert("Failed to create Neuroglancer link. No URL returned.");
+            }
+        })
+        .catch(error => {
+            console.error("Error creating multi-annotation neuroglancer link:", error);
+            alert(`Error creating Neuroglancer link: ${error.message}`);
+        })
+        .finally(() => {
+            // Re-enable button and restore text
+            annotateNeuroglancerButton.disabled = false;
+            annotateNeuroglancerButton.textContent = 'Annotate Neuroglancer';
+        });
+    });
+
     function escapeCsvCell(cellData) {
         const dataString = String(cellData || '');
         if (dataString.includes(',') || dataString.includes('"') || dataString.includes('\n')) {
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index f70a4c5..68dad1d 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -685,6 +685,7 @@ <h4>📊 Dataset Management</h4>
                         <button id="add_lasso_selection_button">Add Lasso Selection</button>
                         <button id="clear_spots_button">Clear Selected Spots</button>
                         <button id="export_csv_button">Export Table as CSV</button>
+                        <button id="annotate_neuroglancer_button" style="background-color: #2196F3; color: white;">Annotate Neuroglancer</button>
                     </div>
                 </div>
             </div>

From d37e0e293a69bcd36cf372a87b3df7eb9af1d227 Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Fri, 14 Nov 2025 13:24:49 -0800
Subject: [PATCH 19/20] fix: table display limits

---
 src/see_spot/app.py                       |  3 +-
 src/see_spot/static/js/unmixed_spots.js   |  8 +-
 src/see_spot/templates/unmixed_spots.html | 91 ++++++++++++-----------
 3 files changed, 56 insertions(+), 46 deletions(-)

diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index eb9554c..622add8 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -4,6 +4,7 @@
 from fastapi.templating import Jinja2Templates
 from datetime import datetime
 from see_spot import ng_utils
+from see_spot import __version__
 import uvicorn
 import logging
 import os
@@ -1099,7 +1100,7 @@ async def set_active_dataset(request: Request):
 @app.get("/unmixed-spots")
 async def unmixed_spots_page(request: Request):
     logger.info("Unmixed spots page accessed")
-    return templates.TemplateResponse("unmixed_spots.html", {"request": request})
+    return templates.TemplateResponse("unmixed_spots.html", {"request": request, "version": __version__})
 
 
 if __name__ == '__main__':
diff --git a/src/see_spot/static/js/unmixed_spots.js b/src/see_spot/static/js/unmixed_spots.js
index f3f392d..3dcdba1 100644
--- a/src/see_spot/static/js/unmixed_spots.js
+++ b/src/see_spot/static/js/unmixed_spots.js
@@ -178,10 +178,12 @@ document.addEventListener('DOMContentLoaded', function () {
             searching: true, // Enable search box
             ordering: true, // Enable column sorting
             info: false, // Hide "Showing X to Y of Z entries" text
+            scrollX: false, // Disable horizontal scrolling
+            autoWidth: false, // Disable auto width calculation
             columnDefs: [
-                { width: "45%", targets: 0 }, // Dataset Name column
-                { width: "35%", targets: 1 }, // Date Added column
-                { width: "20%", targets: 2, orderable: false } // Status column (no sorting)
+                { width: "80%", targets: 0, className: "text-wrap" }, // Dataset Name column
+                { width: "10%", targets: 1 }, // Date Added column
+                { width: "10%", targets: 2, orderable: false } // Status column (no sorting)
             ],
             language: {
                 search: "Filter datasets:",
diff --git a/src/see_spot/templates/unmixed_spots.html b/src/see_spot/templates/unmixed_spots.html
index 68dad1d..3c1e3b3 100644
--- a/src/see_spot/templates/unmixed_spots.html
+++ b/src/see_spot/templates/unmixed_spots.html
@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Unmixed Spots Visualization</title>
+    <title>SeeSpot - HCR spot visualization</title>
     <!-- Include ECharts -->
     <script src="https://cdn.jsdelivr.net/npm/echarts@5.5.0/dist/echarts.min.js"></script>
     <!-- Include DataTables -->
@@ -286,6 +286,7 @@
             width: 100%;
             border-collapse: collapse;
             font-size: 12px;
+            table-layout: fixed;
         }
         
         #dataset_table th {
@@ -301,6 +302,10 @@
         #dataset_table td {
             padding: 6px 8px;
             border-bottom: 1px solid #eee;
+            word-wrap: break-word;
+            overflow-wrap: break-word;
+            white-space: normal;
+            overflow: hidden;
         }
         
         #dataset_table tbody tr {
@@ -473,7 +478,7 @@ <h3>Dataset Controls</h3>
         <div style="text-align: center; margin-bottom: 20px;">
             <img src="/static/img/see-spot-logo.png" alt="SeeSpot Logo" style="width: 180px; margin-bottom: 10px;">
             <div style="font-size: 32px; font-weight: bold;">SeeSpot</div>
-            <div style="font-size: 16px; color: #666;">by @mattjdavis</div>
+            <div style="font-size: 12px; color: #999; margin-top: 2px;">v{{ version }} by @mattjdavis</div>
         </div>
         <div style="height: 20px;"></div>
         <h3>Hotkeys</h3>
@@ -515,6 +520,12 @@ <h3>Data Filters</h3>
                     <span id="distance_max_label">5.00</span>
                 </div>
             </div>
+            <button id="reset_filters_btn" style="width: 100%; padding: 6px; background-color: #ff9800; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">Reset Filters</button>
+            <div id="filter_count" style="margin-top: 10px; font-size: 12px; color: #666; text-align: center;"></div>
+        </div>
+        
+        <h3>Display</h3>
+        <div style="padding: 10px; background-color: #f5f5f5; border-radius: 4px; margin-bottom: 15px;">
             <div style="margin-bottom: 20px;">
                 <label style="font-weight: bold; display: block; margin-bottom: 8px;">Marker Size</label>
                 <div id="marker_size_slider" style="margin: 0 10px 15px 10px;"></div>
@@ -538,8 +549,37 @@ <h3>Data Filters</h3>
                     Display learned dye directions
                 </div>
             </div>
-            <button id="reset_filters_btn" style="width: 100%; padding: 6px; background-color: #ff9800; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">Reset Filters</button>
-            <div id="filter_count" style="margin-top: 10px; font-size: 12px; color: #666; text-align: center;"></div>
+            <div style="margin-bottom: 15px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">Display chan</label>
+                <select id="display_chan_select" style="width: 100%; padding: 6px 8px; border: 1px solid #ccc; border-radius: 4px; font-size: 13px;">
+                    <option value="mixed">Mixed</option>
+                    <option value="unmixed">Unmixed</option>
+                </select>
+            </div>
+            <div style="margin-bottom: 15px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">Show reassigned</label>
+                <div style="display: flex; align-items: center; gap: 10px;">
+                    <div class="toggle-switch">
+                        <input type="checkbox" id="highlight_reassigned_toggle" style="display: none;">
+                        <label for="highlight_reassigned_toggle" style="position: relative; display: inline-block; width: 40px; height: 20px; background-color: #ccc; border-radius: 20px; cursor: pointer; transition: background-color 0.3s;">
+                            <span style="position: absolute; top: 2px; left: 2px; width: 16px; height: 16px; border-radius: 50%; background-color: white; transition: left 0.3s;"></span>
+                        </label>
+                    </div>
+                    <span id="highlight_status" style="font-size: 13px;">Off</span>
+                </div>
+            </div>
+            <div style="margin-bottom: 15px;">
+                <label style="font-weight: bold; display: block; margin-bottom: 8px;">Show removed</label>
+                <div style="display: flex; align-items: center; gap: 10px;">
+                    <div class="toggle-switch">
+                        <input type="checkbox" id="highlight_removed_toggle" style="display: none;">
+                        <label for="highlight_removed_toggle" style="position: relative; display: inline-block; width: 40px; height: 20px; background-color: #ccc; border-radius: 20px; cursor: pointer; transition: background-color 0.3s;">
+                            <span style="position: absolute; top: 2px; left: 2px; width: 16px; height: 16px; border-radius: 50%; background-color: white; transition: left 0.3s;"></span>
+                        </label>
+                    </div>
+                    <span id="highlight_removed_status" style="font-size: 13px;">Off</span>
+                </div>
+            </div>
         </div>
         
         <h3>Chart Limits</h3>
@@ -641,46 +681,13 @@ <h4>📊 Dataset Management</h4>
                     <span id="sample_size_icon" style="margin-right: 8px; font-weight: bold; color: #4caf50;">✓</span>
                     <span id="sample_size_text">Small sample: full feature set</span>
                 </div>
-                <div id="filters" style="display: flex; flex-direction: column; gap: 10px; margin-bottom: 15px; background-color: #ffebee; padding: 10px; border-radius: 4px;">
-                    <div style="font-weight: bold; margin-bottom: 5px;">Filters</div>
-                    <div style="display: flex; align-items: center; margin-bottom: 10px;">
-                        <label for="display_chan_select" style="margin-right: 10px;">Display chan:</label>
-                        <select id="display_chan_select" style="padding: 4px 8px; border: 1px solid #ccc; border-radius: 4px;">
-                            <option value="mixed">Mixed</option>
-                            <option value="unmixed">Unmixed</option>
-                        </select>
-                    </div>
-                    <div style="display: flex; align-items: center; margin-bottom: 10px;">
-                        <label for="highlight_reassigned_toggle" style="margin-right: 10px;">Show reassigned:</label>
-                        <div class="toggle-switch">
-                            <input type="checkbox" id="highlight_reassigned_toggle" style="display: none;">
-                            <label for="highlight_reassigned_toggle" style="position: relative; display: inline-block; width: 40px; height: 20px; background-color: #ccc; border-radius: 20px; cursor: pointer; transition: background-color 0.3s;">
-                                <span style="position: absolute; top: 2px; left: 2px; width: 16px; height: 16px; border-radius: 50%; background-color: white; transition: left 0.3s;"></span>
-                            </label>
-                        </div>
-                        <span id="highlight_status" style="margin-left: 10px; font-size: 13px;">Off</span>
-                    </div>
-                    <div style="display: flex; align-items: center;">
-                        <label for="highlight_removed_toggle" style="margin-right: 10px;">Show removed:</label>
-                        <div class="toggle-switch">
-                            <input type="checkbox" id="highlight_removed_toggle" style="display: none;">
-                            <label for="highlight_removed_toggle" style="position: relative; display: inline-block; width: 40px; height: 20px; background-color: #ccc; border-radius: 20px; cursor: pointer; transition: background-color 0.3s;">
-                                <span style="position: absolute; top: 2px; left: 2px; width: 16px; height: 16px; border-radius: 50%; background-color: white; transition: left 0.3s;"></span>
-                            </label>
-                        </div>
-                        <span id="highlight_removed_status" style="margin-left: 10px; font-size: 13px;">Off</span>
-                    </div>
-                </div>
-                <div id="label_controls" style="display: flex; flex-direction: column; gap: 10px; margin-bottom: 15px; background-color: #fff3e0; padding: 10px; border-radius: 4px;">
-                    <div style="font-weight: bold; margin-bottom: 5px;">Labeling Controls</div>
-                    <div>
-                        <label for="label_input">Current Label:</label>
-                        <input type="text" id="label_input" placeholder="Enter label (optional)">
-                        <span id="active_label_display" style="font-style: italic;">(None)</span>
-                    </div>
-                </div>
                 <div id="table_controls" style="display: flex; flex-direction: column; gap: 10px; margin-bottom: 15px; background-color: #e8f5e9; padding: 10px; border-radius: 4px;">
                     <div style="font-weight: bold; margin-bottom: 5px;">Table Controls</div>
+                    <div style="margin-bottom: 10px;">
+                        <label for="label_input" style="font-weight: normal; margin-right: 5px;">Current Label:</label>
+                        <input type="text" id="label_input" placeholder="Enter label (optional)" style="width: 120px; padding: 3px 6px; border: 1px solid #ccc; border-radius: 3px; font-size: 12px;">
+                        <span id="active_label_display" style="font-style: italic; margin-left: 5px; font-size: 12px;">(None)</span>
+                    </div>
                     <div id="spots_buttons_container">
                         <button id="add_lasso_selection_button">Add Lasso Selection</button>
                         <button id="clear_spots_button">Clear Selected Spots</button>

From 9185ae21652bb3b44b599295d633c48d7328f1bf Mon Sep 17 00:00:00 2001
From: Matt Davis <mattjdavis@gmail.com>
Date: Fri, 14 Nov 2025 14:05:56 -0800
Subject: [PATCH 20/20] fix: install script

---
 INSTALL.md          | 210 +++++++++++++++++
 README.md           |  29 ++-
 config.yaml.example |  35 +++
 install.sh          | 540 ++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt    |   1 +
 src/see_spot/app.py |  43 +++-
 uninstall.sh        | 111 +++++++++
 7 files changed, 965 insertions(+), 4 deletions(-)
 create mode 100644 INSTALL.md
 create mode 100644 config.yaml.example
 create mode 100755 install.sh
 create mode 100755 uninstall.sh

diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000..02a3274
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,210 @@
+# SeeSpot Installation Guide
+
+This guide walks you through installing SeeSpot on your system.
+
+## Prerequisites
+
+- **Operating System**: Linux (Ubuntu 20.04+, CentOS 7+, or similar)
+- **Disk Space**: At least 10 GB free for cache directory (more recommended for large datasets)
+- **AWS Credentials**: Valid AWS credentials with read access to `s3://aind-open-data`
+
+The installer will automatically install `uv` (Python package manager) if not present. Python 3.11+ will be installed by `uv` during the installation process.
+
+## Quick Start (Recommended)
+
+Run the installer with default settings:
+
+```bash
+cd /home/matt.davis/code/see-spot
+./install.sh
+```
+
+The installer will:
+1. Check system requirements
+2. Install `uv` if needed
+3. Validate AWS credentials
+4. Use default configuration (cache directory, server port, etc.)
+5. Set up Python environment
+6. Install dependencies
+7. Create launcher script
+
+## Interactive Installation
+
+If you want to customize settings during installation, use the `--interactive` flag:
+
+```bash
+./install.sh --interactive
+```
+
+This will prompt you for configuration values.
+- Cache directory: `~/.seespot/cache`
+- Server host: `0.0.0.0`
+- Server port: `5555`
+
+Examples:
+
+```bash
+# Standard installation (non-interactive, default)
+./install.sh
+
+# Interactive installation with prompts
+./install.sh --interactive
+
+# Verbose installation
+./install.sh --verbose
+
+# Dry run to preview changes
+./install.sh --dry-run
+
+# Interactive with verbose output
+./install.sh --interactive --verbose
+```
+
+## Installation Options
+
+The `install.sh` script supports the following options:
+
+- `--interactive` or `-i`: Prompt for configuration values
+- `--yes` or `-y`: Explicitly use non-interactive mode (default behavior)
+- `--verbose` or `-v`: Show detailed output
+- `--dry-run`: Show what would be done without making changes
+- `--help` or `-h`: Display help message
+
+**Default values used in non-interactive mode:**
+
+## AWS Credentials Setup
+
+SeeSpot requires AWS credentials to access data. The installer checks for credentials in this order:
+
+1. **Environment Variables**: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`
+2. **AWS CLI Config**: `~/.aws/credentials` (default profile)
+3. **IAM Role**: For EC2 instances with attached IAM roles
+
+### Setting up AWS CLI credentials:
+
+```bash
+# Install AWS CLI if not present
+curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
+unzip awscliv2.zip
+sudo ./aws/install
+
+# Configure credentials
+aws configure
+```
+
+You'll be prompted for:
+- AWS Access Key ID
+- AWS Secret Access Key
+- Default region (e.g., `us-west-2`)
+- Output format (e.g., `json`)
+
+### Using environment variables:
+
+```bash
+export AWS_ACCESS_KEY_ID="your-access-key"
+export AWS_SECRET_ACCESS_KEY="your-secret-key"
+export AWS_SESSION_TOKEN="your-session-token"  # Optional
+```
+
+## Configuration
+
+After installation, SeeSpot's configuration is stored in `~/.seespot/config.yaml`.
+
+### Configuration Options
+
+```yaml
+# Cache directory for downloaded S3 data
+cache_dir: /path/to/cache
+
+# Server settings
+server:
+  host: 0.0.0.0  # Listen on all interfaces
+  port: 5555     # Server port
+
+# AWS settings (optional overrides)
+aws:
+  profile: default
+  region: us-west-2
+  bucket: aind-open-data
+```
+
+### Configuration Precedence
+
+Configuration values are resolved in this order (highest to lowest priority):
+
+1. **Environment Variables**: `SEESPOT_CACHE_DIR`, `SEESPOT_HOST`, `SEESPOT_PORT`, `SEESPOT_BUCKET`
+2. **Command-Line Arguments**: Passed to the `seespot` launcher
+3. **Config File**: `~/.seespot/config.yaml` (user config) or `/etc/seespot/config.yaml` (system config)
+4. **Defaults**: Built-in fallback values
+
+### Example: Override port via environment variable
+
+```bash
+SEESPOT_PORT=8080 seespot start
+```
+
+## Using SeeSpot
+
+After installation, use the `seespot` command to manage the server:
+
+```bash
+# Start the server
+seespot start
+
+# Stop the server
+seespot stop
+
+# Check server status
+seespot status
+
+# View logs
+seespot logs
+```
+
+The server will be available at `http://localhost:5555` (or your configured port).
+
+## Accessing the Web Interface
+
+Once the server is running:
+
+1. Open a web browser
+2. Navigate to `http://localhost:5555`
+3. You'll see the SeeSpot visualization interface
+
+### First-Time Setup
+
+On first use:
+1. Click "Manage Datasets" in the sidebar
+2. Download a dataset from the list
+3. Set it as active
+4. The visualization will load automatically
+
+
+## Uninstallation
+
+To remove SeeSpot:
+
+```bash
+cd /home/matt.davis/code/see-spot
+./uninstall.sh
+```
+
+The uninstaller will:
+1. Stop any running servers
+2. Remove the launcher script
+3. Optionally remove cache directory
+4. Remove configuration directory
+
+
+## What Gets Installed
+
+After installation, you'll have:
+
+- **Python Environment**: `.venv/` in the repository
+- **Launcher Script**: `~/.local/bin/seespot`
+- **Config Directory**: `~/.seespot/`
+  - `config.yaml` - Configuration file
+  - `seespot.pid` - Server process ID
+  - `seespot.log` - Server logs
+- **Cache Directory**: `~/.seespot/cache/` (or your configured location)
+  - Downloads from S3 are cached here
diff --git a/README.md b/README.md
index 5187ca5..4335549 100644
--- a/README.md
+++ b/README.md
@@ -7,14 +7,39 @@
 ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
 ![Python](https://img.shields.io/badge/python->=3.10-blue?logo=python)
 
+## Quick Start
+
+### Installation
+```bash
+# Clone repository
+git clone https://github.com/AllenNeuralDynamics/see-spot.git
+cd see-spot
+
+# Run installer (uses defaults: port 5555, cache at ~/.seespot/cache)
+./install.sh
+
+# Or customize settings interactively
+./install.sh --interactive
+```
+
+### Launch
+```bash
+# Start the server
+seespot start
+
+# Access at http://localhost:5555
+```
+
+For detailed installation instructions, AWS credentials setup, and troubleshooting, see [INSTALL.md](INSTALL.md).
+
 ## App UI
 ![Spot Visualization](img/seespot-app-v.png)
 *Interactive dashboard showing spot channel analysis with Sankey flow diagram, scatter plot, and summary statistics*
 
-## start-up (local)
+## Development Setup
 + Install
   + `uv sync`
-+ Launch: 
++ Launch with auto-reload: 
 ```bash
 cd /home/matt.davis/code/see-spot && source .venv/bin/activate && cd src && uvicorn see_spot.app:app --host 0.0.0.0 --port 9999 --reload
 ```
diff --git a/config.yaml.example b/config.yaml.example
new file mode 100644
index 0000000..b2b4876
--- /dev/null
+++ b/config.yaml.example
@@ -0,0 +1,35 @@
+# SeeSpot Configuration Example
+# Copy this to ~/.seespot/config.yaml and customize as needed
+
+# Directory where S3 data will be cached locally
+# This should have at least 50GB of free space
+cache_dir: ~/.seespot/cache
+
+# Server configuration
+server:
+  # Host address to bind to
+  # Use 0.0.0.0 to allow external connections
+  # Use 127.0.0.1 for localhost only
+  host: 0.0.0.0
+  
+  # Port number for the web server
+  port: 5555
+
+# AWS configuration (optional)
+aws:
+  # AWS profile name from ~/.aws/credentials
+  # If not specified, uses default profile or environment variables
+  # profile: default
+  
+  # AWS region
+  # If not specified, uses AWS_DEFAULT_REGION or us-west-2
+  # region: us-west-2
+  
+  # S3 bucket containing the data
+  # Default: aind-open-data
+  # bucket: aind-open-data
+
+# Logging configuration (optional)
+# logging:
+#   level: INFO  # Options: DEBUG, INFO, WARNING, ERROR
+#   file: ~/.seespot/seespot.log
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..6c4e986
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,540 @@
+#!/bin/bash
+set -e
+
+# SeeSpot Installer
+# Linux only - installs SeeSpot visualization server
+# Usage: ./install.sh [--interactive] [--verbose] [--dry-run]
+
+VERSION="0.5.1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Parse command line arguments
+YES_FLAG=true
+VERBOSE=false
+DRY_RUN=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --interactive|-i)
+            YES_FLAG=false
+            shift
+            ;;
+        --yes|-y)
+            YES_FLAG=true
+            shift
+            ;;
+        --verbose|-v)
+            VERBOSE=true
+            shift
+            ;;
+        --dry-run)
+            DRY_RUN=true
+            shift
+            ;;
+        --help|-h)
+            echo "SeeSpot Installer"
+            echo "Usage: $0 [OPTIONS]"
+            echo ""
+            echo "Options:"
+            echo "  --interactive, -i  Interactive mode (prompt for configuration)"
+            echo "  --yes, -y          Non-interactive mode (use defaults, same as default)"
+            echo "  --verbose, -v      Verbose output"
+            echo "  --dry-run          Show what would be done without making changes"
+            echo "  --help, -h         Show this help message"
+            echo ""
+            echo "Note: Non-interactive mode is the default. Use --interactive for prompts."
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+# Logging functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+log_verbose() {
+    if [ "$VERBOSE" = true ]; then
+        echo -e "${BLUE}[VERBOSE]${NC} $1"
+    fi
+}
+
+# Check if running on Linux
+check_os() {
+    log_info "Checking operating system..."
+    if [[ "$OSTYPE" != "linux-gnu"* ]]; then
+        log_error "This installer currently supports Linux only."
+        log_error "Detected OS: $OSTYPE"
+        exit 1
+    fi
+    log_success "Linux detected"
+}
+
+# Check for uv and install if missing
+check_uv() {
+    log_info "Checking for uv..."
+    if command -v uv &> /dev/null; then
+        UV_VERSION=$(uv --version | awk '{print $2}')
+        log_success "uv is installed (version $UV_VERSION)"
+        return 0
+    else
+        log_warning "uv is not installed"
+        if [ "$YES_FLAG" = true ]; then
+            INSTALL_UV="y"
+        else
+            read -p "Would you like to install uv? [Y/n]: " INSTALL_UV
+            INSTALL_UV=${INSTALL_UV:-y}
+        fi
+        
+        if [[ "$INSTALL_UV" =~ ^[Yy]$ ]]; then
+            log_info "Installing uv..."
+            if [ "$DRY_RUN" = true ]; then
+                log_info "[DRY RUN] Would execute: curl -LsSf https://astral.sh/uv/install.sh | sh"
+                return 0
+            fi
+            curl -LsSf https://astral.sh/uv/install.sh | sh
+            # Add to current session PATH
+            export PATH="$HOME/.cargo/bin:$PATH"
+            log_success "uv installed successfully"
+        else
+            log_error "uv is required to install SeeSpot. Please install it manually:"
+            log_error "  curl -LsSf https://astral.sh/uv/install.sh | sh"
+            exit 1
+        fi
+    fi
+}
+
+# Check AWS credentials
+check_aws_credentials() {
+    log_info "Checking AWS credentials..."
+    
+    HAS_CREDS=false
+    
+    # Check environment variables
+    if [ ! -z "$AWS_ACCESS_KEY_ID" ] && [ ! -z "$AWS_SECRET_ACCESS_KEY" ]; then
+        log_success "AWS credentials found in environment variables"
+        HAS_CREDS=true
+        return 0
+    fi
+    
+    # Check ~/.aws/credentials
+    if [ -f "$HOME/.aws/credentials" ]; then
+        log_success "AWS credentials file found at ~/.aws/credentials"
+        HAS_CREDS=true
+        return 0
+    fi
+    
+    # Check for IAM role (EC2 instance)
+    if curl -s -f -m 2 http://169.254.169.254/latest/meta-data/iam/security-credentials/ &> /dev/null; then
+        log_success "IAM instance role detected (EC2)"
+        HAS_CREDS=true
+        return 0
+    fi
+    
+    if [ "$HAS_CREDS" = false ]; then
+        log_warning "No AWS credentials detected"
+        log_warning "SeeSpot requires access to S3 bucket: aind-open-data"
+        log_warning "Please set up AWS credentials before using SeeSpot:"
+        log_warning "  1. Environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY"
+        log_warning "  2. AWS CLI: aws configure"
+        log_warning "  3. IAM role (if running on EC2)"
+        log_warning ""
+        log_warning "You can continue installation and set up credentials later."
+        
+        if [ "$YES_FLAG" = false ]; then
+            read -p "Continue anyway? [y/N]: " CONTINUE
+            if [[ ! "$CONTINUE" =~ ^[Yy]$ ]]; then
+                exit 1
+            fi
+        fi
+    fi
+}
+
+# Check disk space
+check_disk_space() {
+    local TARGET_DIR=$1
+    log_info "Checking disk space at $TARGET_DIR..."
+    
+    # Get available space in GB
+    AVAILABLE=$(df -BG "$TARGET_DIR" | tail -1 | awk '{print $4}' | sed 's/G//')
+    log_verbose "Available space: ${AVAILABLE}GB"
+    
+    if [ "$AVAILABLE" -lt 50 ]; then
+        log_warning "Low disk space detected: ${AVAILABLE}GB available"
+        log_warning "Recommended: 50GB+ for S3 cache"
+        if [ "$YES_FLAG" = false ]; then
+            read -p "Continue anyway? [y/N]: " CONTINUE
+            if [[ ! "$CONTINUE" =~ ^[Yy]$ ]]; then
+                exit 1
+            fi
+        fi
+    else
+        log_success "Sufficient disk space available: ${AVAILABLE}GB"
+    fi
+}
+
+# Interactive configuration
+configure() {
+    log_info "Configuring SeeSpot..."
+    
+    # Default values
+    DEFAULT_CACHE_DIR="$HOME/.seespot/cache"
+    DEFAULT_PORT=5555
+    DEFAULT_HOST="0.0.0.0"
+    
+    if [ "$YES_FLAG" = true ]; then
+        CACHE_DIR=$DEFAULT_CACHE_DIR
+        SERVER_PORT=$DEFAULT_PORT
+        SERVER_HOST=$DEFAULT_HOST
+    else
+        echo ""
+        read -p "Cache directory for S3 data [$DEFAULT_CACHE_DIR]: " CACHE_DIR
+        CACHE_DIR=${CACHE_DIR:-$DEFAULT_CACHE_DIR}
+        
+        read -p "Server port [$DEFAULT_PORT]: " SERVER_PORT
+        SERVER_PORT=${SERVER_PORT:-$DEFAULT_PORT}
+        
+        read -p "Server host [$DEFAULT_HOST]: " SERVER_HOST
+        SERVER_HOST=${SERVER_HOST:-$DEFAULT_HOST}
+    fi
+    
+    # Expand ~ to actual home directory
+    CACHE_DIR="${CACHE_DIR/#\~/$HOME}"
+    
+    log_verbose "Configuration:"
+    log_verbose "  Cache directory: $CACHE_DIR"
+    log_verbose "  Server host: $SERVER_HOST"
+    log_verbose "  Server port: $SERVER_PORT"
+}
+
+# Create directories and config
+setup_directories() {
+    log_info "Setting up directories..."
+    
+    SEESPOT_DIR="$HOME/.seespot"
+    CONFIG_FILE="$SEESPOT_DIR/config.yaml"
+    
+    if [ "$DRY_RUN" = true ]; then
+        log_info "[DRY RUN] Would create: $SEESPOT_DIR"
+        log_info "[DRY RUN] Would create: $CACHE_DIR"
+        return 0
+    fi
+    
+    # Create SeeSpot directory
+    mkdir -p "$SEESPOT_DIR"
+    log_success "Created $SEESPOT_DIR"
+    
+    # Create cache directory
+    mkdir -p "$CACHE_DIR"
+    log_success "Created cache directory: $CACHE_DIR"
+    
+    # Create config file
+    cat > "$CONFIG_FILE" << EOF
+# SeeSpot Configuration
+# Generated by installer on $(date)
+
+cache_dir: $CACHE_DIR
+
+server:
+  host: $SERVER_HOST
+  port: $SERVER_PORT
+
+aws:
+  # Optional: specify AWS profile name
+  # profile: default
+  
+  # Optional: specify region
+  # region: us-west-2
+EOF
+    
+    log_success "Created config file: $CONFIG_FILE"
+    
+    # Create install log
+    INSTALL_LOG="$SEESPOT_DIR/install.log"
+    echo "SeeSpot installation started at $(date)" > "$INSTALL_LOG"
+    echo "Version: $VERSION" >> "$INSTALL_LOG"
+    echo "Install directory: $SCRIPT_DIR" >> "$INSTALL_LOG"
+    echo "Cache directory: $CACHE_DIR" >> "$INSTALL_LOG"
+}
+
+# Install Python environment with uv
+install_environment() {
+    log_info "Installing SeeSpot environment..."
+    
+    if [ "$DRY_RUN" = true ]; then
+        log_info "[DRY RUN] Would create virtual environment with uv"
+        log_info "[DRY RUN] Would install dependencies"
+        return 0
+    fi
+    
+    cd "$SCRIPT_DIR"
+    
+    # Create virtual environment with Python 3.11+
+    log_info "Creating virtual environment..."
+    uv venv .venv --python 3.11
+    
+    log_success "Virtual environment created"
+    
+    # Install package
+    log_info "Installing SeeSpot package and dependencies..."
+    uv pip install -e .
+    
+    log_success "SeeSpot package installed"
+}
+
+# Create launcher script
+create_launcher() {
+    log_info "Creating launcher script..."
+    
+    LAUNCHER="$HOME/.local/bin/seespot"
+    
+    if [ "$DRY_RUN" = true ]; then
+        log_info "[DRY RUN] Would create launcher: $LAUNCHER"
+        return 0
+    fi
+    
+    # Create .local/bin if it doesn't exist
+    mkdir -p "$HOME/.local/bin"
+    
+    # Create launcher script
+    cat > "$LAUNCHER" << 'EOF'
+#!/bin/bash
+# SeeSpot Launcher
+
+SEESPOT_DIR="$HOME/.seespot"
+CONFIG="$SEESPOT_DIR/config.yaml"
+INSTALL_DIR="__INSTALL_DIR__"
+VENV="$INSTALL_DIR/.venv"
+PID_FILE="$SEESPOT_DIR/seespot.pid"
+
+# Load config
+load_config() {
+    if [ ! -f "$CONFIG" ]; then
+        echo "Error: Config file not found at $CONFIG"
+        exit 1
+    fi
+    
+    # Parse YAML (simple parsing for our specific config)
+    SERVER_PORT=$(grep "port:" "$CONFIG" | awk '{print $2}')
+    SERVER_HOST=$(grep "host:" "$CONFIG" | awk '{print $2}')
+    
+    SERVER_PORT=${SERVER_PORT:-5555}
+    SERVER_HOST=${SERVER_HOST:-0.0.0.0}
+}
+
+start_server() {
+    if [ -f "$PID_FILE" ] && kill -0 $(cat "$PID_FILE") 2>/dev/null; then
+        echo "SeeSpot is already running (PID: $(cat "$PID_FILE"))"
+        exit 1
+    fi
+    
+    load_config
+    
+    echo "Starting SeeSpot on $SERVER_HOST:$SERVER_PORT..."
+    
+    cd "$INSTALL_DIR/src"
+    source "$VENV/bin/activate"
+    
+    nohup uvicorn see_spot.app:app --host "$SERVER_HOST" --port "$SERVER_PORT" > "$SEESPOT_DIR/seespot.log" 2>&1 &
+    echo $! > "$PID_FILE"
+    
+    echo "SeeSpot started (PID: $(cat "$PID_FILE"))"
+    echo "Log file: $SEESPOT_DIR/seespot.log"
+    echo "Visit: http://localhost:$SERVER_PORT/unmixed-spots"
+}
+
+stop_server() {
+    if [ ! -f "$PID_FILE" ]; then
+        echo "SeeSpot is not running (no PID file)"
+        exit 1
+    fi
+    
+    PID=$(cat "$PID_FILE")
+    
+    if kill -0 "$PID" 2>/dev/null; then
+        echo "Stopping SeeSpot (PID: $PID)..."
+        kill "$PID"
+        rm "$PID_FILE"
+        echo "SeeSpot stopped"
+    else
+        echo "SeeSpot is not running (stale PID file)"
+        rm "$PID_FILE"
+    fi
+}
+
+status_server() {
+    if [ ! -f "$PID_FILE" ]; then
+        echo "SeeSpot is not running"
+        exit 1
+    fi
+    
+    PID=$(cat "$PID_FILE")
+    
+    if kill -0 "$PID" 2>/dev/null; then
+        load_config
+        echo "SeeSpot is running (PID: $PID)"
+        echo "URL: http://localhost:$SERVER_PORT/unmixed-spots"
+    else
+        echo "SeeSpot is not running (stale PID file)"
+        rm "$PID_FILE"
+        exit 1
+    fi
+}
+
+show_config() {
+    if [ -f "$CONFIG" ]; then
+        cat "$CONFIG"
+    else
+        echo "Config file not found: $CONFIG"
+        exit 1
+    fi
+}
+
+show_logs() {
+    LOG_FILE="$SEESPOT_DIR/seespot.log"
+    if [ -f "$LOG_FILE" ]; then
+        tail -f "$LOG_FILE"
+    else
+        echo "Log file not found: $LOG_FILE"
+        exit 1
+    fi
+}
+
+case "$1" in
+    start)
+        start_server
+        ;;
+    stop)
+        stop_server
+        ;;
+    restart)
+        stop_server
+        sleep 1
+        start_server
+        ;;
+    status)
+        status_server
+        ;;
+    config)
+        show_config
+        ;;
+    logs)
+        show_logs
+        ;;
+    *)
+        echo "SeeSpot Launcher"
+        echo "Usage: $0 {start|stop|restart|status|config|logs}"
+        echo ""
+        echo "Commands:"
+        echo "  start    - Start the SeeSpot server"
+        echo "  stop     - Stop the SeeSpot server"
+        echo "  restart  - Restart the SeeSpot server"
+        echo "  status   - Check if SeeSpot is running"
+        echo "  config   - Show configuration"
+        echo "  logs     - Tail the server logs"
+        exit 1
+        ;;
+esac
+EOF
+    
+    # Replace placeholder with actual install directory
+    sed -i "s|__INSTALL_DIR__|$SCRIPT_DIR|g" "$LAUNCHER"
+    
+    # Make executable
+    chmod +x "$LAUNCHER"
+    
+    log_success "Launcher script created: $LAUNCHER"
+    
+    # Check if ~/.local/bin is in PATH
+    if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
+        log_warning "~/.local/bin is not in your PATH"
+        log_warning "Add this to your ~/.bashrc or ~/.zshrc:"
+        log_warning "  export PATH=\"\$HOME/.local/bin:\$PATH\""
+    fi
+}
+
+# Print success message
+print_success() {
+    echo ""
+    log_success "=========================================="
+    log_success "SeeSpot installation complete!"
+    log_success "=========================================="
+    echo ""
+    log_info "Configuration file: $HOME/.seespot/config.yaml"
+    log_info "Cache directory: $CACHE_DIR"
+    echo ""
+    log_info "To start SeeSpot:"
+    echo "  seespot start"
+    echo ""
+    log_info "Other commands:"
+    echo "  seespot stop     - Stop the server"
+    echo "  seespot status   - Check server status"
+    echo "  seespot logs     - View server logs"
+    echo "  seespot config   - Show configuration"
+    echo ""
+    log_info "Server will be available at:"
+    echo "  http://localhost:$SERVER_PORT/unmixed-spots"
+    echo ""
+    
+    if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
+        log_warning "Note: Add ~/.local/bin to your PATH:"
+        echo "  export PATH=\"\$HOME/.local/bin:\$PATH\""
+    fi
+}
+
+# Main installation flow
+main() {
+    echo ""
+    echo "=========================================="
+    echo "  SeeSpot Installer v${VERSION}"
+    echo "  Visualization for HCR Spot Data"
+    echo "=========================================="
+    echo ""
+    
+    if [ "$DRY_RUN" = true ]; then
+        log_warning "Running in DRY RUN mode - no changes will be made"
+        echo ""
+    fi
+    
+    check_os
+    check_uv
+    check_aws_credentials
+    configure
+    check_disk_space "$(dirname "$CACHE_DIR")"
+    setup_directories
+    install_environment
+    create_launcher
+    
+    if [ "$DRY_RUN" = false ]; then
+        print_success
+    else
+        log_info "[DRY RUN] Installation simulation complete"
+    fi
+}
+
+# Run installer
+main
diff --git a/requirements.txt b/requirements.txt
index 6f569ef..fb87c08 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ pathlib
 python-multipart
 s3fs
 zarr
+pyyaml
 aind-ng-link @ git+https://github.com/AllenNeuralDynamics/aind-ng-link.git
diff --git a/src/see_spot/app.py b/src/see_spot/app.py
index 622add8..3bb9a76 100644
--- a/src/see_spot/app.py
+++ b/src/see_spot/app.py
@@ -12,6 +12,7 @@
 import polars as pl
 import itertools
 from typing import List, Tuple, Dict, Any
+import yaml
 
 # Import your modules
 from see_spot.s3_handler import s3_handler
@@ -23,10 +24,45 @@
     find_processing_manifest, detect_tile_structure, extract_tile_suffix
 )
 
+
+def load_config():
+    """Load configuration from file with precedence: env var > config file > defaults"""
+    config_paths = [
+        os.getenv('SEESPOT_CONFIG'),
+        str(Path.home() / '.seespot' / 'config.yaml'),
+        str(Path('/etc/seespot/config.yaml')),
+    ]
+    
+    for config_path in config_paths:
+        if config_path and Path(config_path).exists():
+            try:
+                with open(config_path, 'r') as f:
+                    config = yaml.safe_load(f)
+                    logger.info(f"Loaded configuration from: {config_path}")
+                    return config
+            except Exception as e:
+                logger.warning(f"Failed to load config from {config_path}: {e}")
+    
+    # Return defaults if no config file found
+    logger.info("No config file found, using defaults")
+    return {
+        'cache_dir': str(Path.home() / '.seespot' / 'cache'),
+        'server': {'host': '0.0.0.0', 'port': 5555},
+        'aws': {}
+    }
+
+
 # Initialize logging using central utility (idempotent)
 setup_logging(os.getenv("SEE_SPOT_LOG_LEVEL", "INFO"))
 logger = logging.getLogger(__name__)
 
+# Load config
+config = load_config()
+
+# Get S3 cache directory from config or environment
+S3_CACHE_BASE = os.getenv('SEESPOT_CACHE_DIR') or config.get('cache_dir', str(Path.home() / '.seespot' / 'cache'))
+S3_CACHE_BASE = Path(S3_CACHE_BASE).expanduser()
+
 app = FastAPI()
 
 # Get the directory of the current file (app.py)
@@ -39,7 +75,7 @@
 templates = Jinja2Templates(directory=templates_dir)
 
 # Configuration for the spots data
-S3_BUCKET = "aind-open-data"
+S3_BUCKET = os.getenv('SEESPOT_BUCKET') or config.get('aws', {}).get('bucket', 'aind-open-data')
 DATA_PREFIX = None  # No dataset loaded by default - user must select one
 SAMPLE_SIZE = 5000
 
@@ -1104,4 +1140,7 @@ async def unmixed_spots_page(request: Request):
 
 
 if __name__ == '__main__':
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    # Get host and port from environment or config
+    host = os.getenv('SEESPOT_HOST') or config.get('server', {}).get('host', '0.0.0.0')
+    port = int(os.getenv('SEESPOT_PORT') or config.get('server', {}).get('port', 5555))
+    uvicorn.run(app, host=host, port=port)
diff --git a/uninstall.sh b/uninstall.sh
new file mode 100755
index 0000000..2e0346f
--- /dev/null
+++ b/uninstall.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+set -e
+
+# SeeSpot Uninstaller
+# Removes SeeSpot installation
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+SEESPOT_DIR="$HOME/.seespot"
+LAUNCHER="$HOME/.local/bin/seespot"
+PID_FILE="$SEESPOT_DIR/seespot.pid"
+
+echo ""
+echo "=========================================="
+echo "  SeeSpot Uninstaller"
+echo "=========================================="
+echo ""
+
+# Check if SeeSpot is installed
+if [ ! -d "$SEESPOT_DIR" ]; then
+    log_error "SeeSpot does not appear to be installed"
+    log_error "Directory not found: $SEESPOT_DIR"
+    exit 1
+fi
+
+# Show what will be removed
+echo "This will remove:"
+echo "  - SeeSpot configuration: $SEESPOT_DIR"
+echo "  - Launcher script: $LAUNCHER"
+echo ""
+
+# Load config to show cache directory
+CONFIG_FILE="$SEESPOT_DIR/config.yaml"
+if [ -f "$CONFIG_FILE" ]; then
+    CACHE_DIR=$(grep "cache_dir:" "$CONFIG_FILE" | awk '{print $2}')
+    if [ ! -z "$CACHE_DIR" ]; then
+        CACHE_DIR="${CACHE_DIR/#\~/$HOME}"
+        echo "  - Cache directory: $CACHE_DIR"
+        echo ""
+    fi
+fi
+
+read -p "Are you sure you want to uninstall SeeSpot? [y/N]: " CONFIRM
+if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then
+    log_info "Uninstall cancelled"
+    exit 0
+fi
+
+# Stop server if running
+if [ -f "$PID_FILE" ]; then
+    PID=$(cat "$PID_FILE")
+    if kill -0 "$PID" 2>/dev/null; then
+        log_info "Stopping SeeSpot server..."
+        kill "$PID"
+        log_success "Server stopped"
+    fi
+fi
+
+# Remove launcher
+if [ -f "$LAUNCHER" ]; then
+    log_info "Removing launcher script..."
+    rm "$LAUNCHER"
+    log_success "Launcher removed"
+fi
+
+# Ask about cache directory
+if [ ! -z "$CACHE_DIR" ] && [ -d "$CACHE_DIR" ]; then
+    echo ""
+    read -p "Remove cache directory ($CACHE_DIR)? This contains downloaded S3 data. [y/N]: " REMOVE_CACHE
+    if [[ "$REMOVE_CACHE" =~ ^[Yy]$ ]]; then
+        log_info "Removing cache directory..."
+        rm -rf "$CACHE_DIR"
+        log_success "Cache directory removed"
+    else
+        log_info "Cache directory preserved: $CACHE_DIR"
+    fi
+fi
+
+# Remove SeeSpot directory
+log_info "Removing SeeSpot configuration..."
+rm -rf "$SEESPOT_DIR"
+log_success "Configuration removed"
+
+echo ""
+log_success "=========================================="
+log_success "SeeSpot uninstalled successfully"
+log_success "=========================================="
+echo ""
+log_info "To reinstall, run: ./install.sh"
+echo ""