jayin92 · Hunter5Thompson · Nov 23, 2025 · Copilot · Nov 24, 2025 · Copilot
diff --git a/render_video.py b/render_video.py
@@ -156,7 +156,12 @@ def colorize_depth_torch(depth_tensor, mask=None, normalize=True, cmap='Spectral
     if normalize:
         min_disp = np.nanquantile(disp, 0.01)
         max_disp = np.nanquantile(disp, 0.99)
-        disp = (disp - min_disp) / (max_disp - min_disp)
+        # Avoid division by zero for constant depth
+        disp_range = max_disp - min_disp
+        if disp_range > 1e-6:
+            disp = (disp - min_disp) / disp_range
+        else:
+            disp = np.zeros_like(disp)
 
     # Apply colormap
     colored = plt.get_cmap(cmap)(1.0 - disp)

diff --git a/render_video_from_ply.py b/render_video_from_ply.py
@@ -153,7 +153,12 @@ def colorize_depth_torch(depth_tensor, mask=None, normalize=True, cmap='Spectral
     if normalize:
         min_disp = np.nanquantile(disp, 0.01)
         max_disp = np.nanquantile(disp, 0.99)
-        disp = (disp - min_disp) / (max_disp - min_disp)
+        # Avoid division by zero for constant depth
+        disp_range = max_disp - min_disp
+        if disp_range > 1e-6:
+            disp = (disp - min_disp) / disp_range
+        else:
+            disp = np.zeros_like(disp)
 
     # Apply colormap
     colored = plt.get_cmap(cmap)(1.0 - disp)

diff --git a/scene/dataset_readers.py b/scene/dataset_readers.py
@@ -111,13 +111,19 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
         image = copy.deepcopy(image)
         cx = (cx - width / 2) / width * 2
         cy = (cy - height / 2) / height * 2
-
-        cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, 
+
+        # For COLMAP data, depth and mask are not available
+        depth = None
+        mask = None
+
+        cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX,
                               cx=cx, cy=cy,
                               image=image,
-                              image_path=image_path, 
-                              image_name=image_name, 
-                              width=width, 
+                              image_path=image_path,
+                              image_name=image_name,
+                              depth=depth,
+                              mask=mask,
+                              width=width,
                               height=height)
         cam_infos.append(cam_info)
     sys.stdout.write('\n')
@@ -229,11 +235,22 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension=
             image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
 
             fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
-            FovY = fovy 
+            FovY = fovy
             FovX = fovx
 
-            cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
-                            image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1]))
+            # For NeRF synthetic data, we don't have cx, cy, depth, mask in the JSON
+            # Assume centered principal point and no depth/mask data
+            cx = 0.0
+            cy = 0.0
+            depth = None
+            mask = None
+
+            cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX,
+                            cx=cx, cy=cy,
+                            image=image,
+                            image_path=image_path, image_name=image_name,
+                            depth=depth, mask=mask,
+                            width=image.size[0], height=image.size[1]))
 
     return cam_infos
 
@@ -311,11 +328,21 @@ def readMultiScale(path, white_background,split, only_highres=False):
 
         fovx = focal2fov(meta["focal"][idx], image.size[0])
         fovy = focal2fov(meta["focal"][idx], image.size[1])
-        FovY = fovy 
+        FovY = fovy
         FovX = fovx
 
-        cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
-                        image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1]))
+        # For LLFF data, assume centered principal point and no depth/mask data
+        cx = 0.0
+        cy = 0.0
+        depth = None
+        mask = None
+
+        cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX,
+                        cx=cx, cy=cy,
+                        image=image,
+                        image_path=image_path, image_name=image_name,
+                        depth=depth, mask=mask,
+                        width=image.size[0], height=image.size[1]))
     return cam_infos
 
 

diff --git a/train.py b/train.py
@@ -69,8 +69,9 @@ def create_offset_gt(image, offset):
     id_coords = torch.from_numpy(id_coords).cuda()
 
     id_coords = id_coords.permute(1, 2, 0) + offset
-    id_coords[..., 0] /= (width - 1)
-    id_coords[..., 1] /= (height - 1)
+    # Avoid division by zero for single-pixel dimensions
+    id_coords[..., 0] /= max(width - 1, 1)
+    id_coords[..., 1] /= max(height - 1, 1)
     id_coords = id_coords * 2 - 1
 
     image = torch.nn.functional.grid_sample(image[None], id_coords[None], align_corners=True, padding_mode="border")[0]
@@ -235,10 +236,9 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
 
         opacity_loss = 0.0
         if opt.lambda_opacity > 0:
-            # Get each gaussians' opacity and use cross entropy loss
+            # Get each gaussians' opacity and use entropy loss to encourage binary values
             opacity = gaussians.get_opacity.clamp(1.0e-3, 1.0 - 1.0e-3)
-            opacity_loss = torch.nn.functional.binary_cross_entropy(opacity, opacity)
-            # opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
+            opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
-            opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
+            opacity_loss = torch.mean(-opacity * torch.log(opacity))
-            opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
+            opacity_loss = torch.mean(-opacity * torch.log(opacity))
             loss += opt.lambda_opacity * opacity_loss
 
 
@@ -272,7 +272,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
             depth_loss_pseudo = depth_loss_func(gt_depth, render_depth)
 
             if torch.isnan(depth_loss_pseudo).sum() == 0:
-                loss_scale = min((iteration - args.start_sample_pseudo) / 500., 1)
+                loss_scale = min((iteration - opt.start_sample_pseudo) / 500., 1)
                 loss += loss_scale * opt.lambda_pseudo_depth * depth_loss_pseudo
                 depth_loss += depth_loss_pseudo
 
@@ -459,28 +459,9 @@ def generate_idu_training_set(
                 n_avg=flow_edit_n_avg
             )
         elif use_difix3d:
-            refine_pipe = Difix3DRefineIDU(
-                save_path=refine_path,
-                device="cuda:0",
-                model_name=difix3d_model,
-                use_reference=difix3d_use_reference
-            )
-            final_imgs = refine_pipe.run(
-                imgs,
-                prompt=difix3d_prompt,
-                num_inference_steps=difix3d_steps,
-                timesteps=difix3d_timesteps,
-                guidance_scale=difix3d_guidance
-            )
+            raise NotImplementedError("Difix3D refine is not yet implemented. Please use FlowEdit refine instead.")
         elif use_dreamscene:
-            refine_pipe = DreamSceneRefineIDU(
-                save_path=refine_path,
-                device="cuda:0",
-                model="sd21" if use_sd21 else "diffusionsat",
-            )
-            final_imgs = refine_pipe.run(
-                imgs,
-            )
+            raise NotImplementedError("DreamScene refine is not yet implemented. Please use FlowEdit refine instead.")
         else:
             raise NotImplementedError("DiffusionSat refine is deprecated")
         if refine_pipe:
@@ -833,10 +814,9 @@ def training_idu_episode(
 
         opacity_loss = 0.0
         if opt.lambda_opacity > 0:
-            # Get each gaussians' opacity and use cross entropy loss
+            # Get each gaussians' opacity and use entropy loss to encourage binary values
             opacity = gaussians.get_opacity.clamp(1.0e-3, 1.0 - 1.0e-3)
-            opacity_loss = torch.nn.functional.binary_cross_entropy(opacity, opacity)
-            # opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
+            opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
-            opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
+            opacity_loss = torch.mean(-opacity * torch.log(opacity))
-            opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6))
+            opacity_loss = torch.mean(-opacity * torch.log(opacity))
             if loss:
                 loss += opt.lambda_opacity * opacity_loss
             else:
@@ -1027,7 +1007,12 @@ def colorize_depth_torch(depth_tensor, mask=None, normalize=True, cmap='Spectral
     if normalize:
         min_disp = np.nanquantile(disp, 0.01)
         max_disp = np.nanquantile(disp, 0.99)
-        disp = (disp - min_disp) / (max_disp - min_disp)
+        # Avoid division by zero for constant depth
+        disp_range = max_disp - min_disp
+        if disp_range > 1e-6:
+            disp = (disp - min_disp) / disp_range
+        else:
+            disp = np.zeros_like(disp)
 
     # Apply colormap
     colored = plt.get_cmap(cmap)(1.0 - disp)

diff --git a/utils/image_utils.py b/utils/image_utils.py
@@ -16,4 +16,5 @@ def mse(img1, img2):
 
 def psnr(img1, img2):
     mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
-    return 20 * torch.log10(1.0 / torch.sqrt(mse))
+    # Add epsilon to avoid division by zero when images are identical
+    return 20 * torch.log10(1.0 / torch.sqrt(mse + 1e-10))
diff --git a/utils/system_utils.py b/utils/system_utils.py
@@ -24,5 +24,14 @@ def mkdir_p(folder_path):
             raise
 
 def searchForMaxIteration(folder):
-    saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)]
+    saved_iters = []
+    for fname in os.listdir(folder):
+        try:
+            # Try to parse the iteration number from the filename
+            saved_iters.append(int(fname.split("_")[-1]))
+        except (ValueError, IndexError):
+            # Skip files that don't match the expected pattern
+            continue
+    if not saved_iters:
+        return 0
     return max(saved_iters)