diff --git a/render_video.py b/render_video.py index 7b3c365..a795514 100644 --- a/render_video.py +++ b/render_video.py @@ -156,7 +156,12 @@ def colorize_depth_torch(depth_tensor, mask=None, normalize=True, cmap='Spectral if normalize: min_disp = np.nanquantile(disp, 0.01) max_disp = np.nanquantile(disp, 0.99) - disp = (disp - min_disp) / (max_disp - min_disp) + # Avoid division by zero for constant depth + disp_range = max_disp - min_disp + if disp_range > 1e-6: + disp = (disp - min_disp) / disp_range + else: + disp = np.zeros_like(disp) # Apply colormap colored = plt.get_cmap(cmap)(1.0 - disp) diff --git a/render_video_from_ply.py b/render_video_from_ply.py index 07d742c..9d7c929 100644 --- a/render_video_from_ply.py +++ b/render_video_from_ply.py @@ -153,7 +153,12 @@ def colorize_depth_torch(depth_tensor, mask=None, normalize=True, cmap='Spectral if normalize: min_disp = np.nanquantile(disp, 0.01) max_disp = np.nanquantile(disp, 0.99) - disp = (disp - min_disp) / (max_disp - min_disp) + # Avoid division by zero for constant depth + disp_range = max_disp - min_disp + if disp_range > 1e-6: + disp = (disp - min_disp) / disp_range + else: + disp = np.zeros_like(disp) # Apply colormap colored = plt.get_cmap(cmap)(1.0 - disp) diff --git a/scene/dataset_readers.py b/scene/dataset_readers.py index 1aa78ed..32c8a80 100755 --- a/scene/dataset_readers.py +++ b/scene/dataset_readers.py @@ -111,13 +111,19 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): image = copy.deepcopy(image) cx = (cx - width / 2) / width * 2 cy = (cy - height / 2) / height * 2 - - cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, + + # For COLMAP data, depth and mask are not available + depth = None + mask = None + + cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, cx=cx, cy=cy, image=image, - image_path=image_path, - image_name=image_name, - width=width, + image_path=image_path, + image_name=image_name, + depth=depth, + mask=mask, + width=width, height=height) cam_infos.append(cam_info) sys.stdout.write('\n') @@ -229,11 +235,22 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension= image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB") fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1]) - FovY = fovy + FovY = fovy FovX = fovx - cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, - image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1])) + # For NeRF synthetic data, we don't have cx, cy, depth, mask in the JSON + # Assume centered principal point and no depth/mask data + cx = 0.0 + cy = 0.0 + depth = None + mask = None + + cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, + cx=cx, cy=cy, + image=image, + image_path=image_path, image_name=image_name, + depth=depth, mask=mask, + width=image.size[0], height=image.size[1])) return cam_infos @@ -311,11 +328,21 @@ def readMultiScale(path, white_background,split, only_highres=False): fovx = focal2fov(meta["focal"][idx], image.size[0]) fovy = focal2fov(meta["focal"][idx], image.size[1]) - FovY = fovy + FovY = fovy FovX = fovx - cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, - image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1])) + # For LLFF data, assume centered principal point and no depth/mask data + cx = 0.0 + cy = 0.0 + depth = None + mask = None + + cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, + cx=cx, cy=cy, + image=image, + image_path=image_path, image_name=image_name, + depth=depth, mask=mask, + width=image.size[0], height=image.size[1])) return cam_infos diff --git a/train.py b/train.py index 8ddba33..f85a1fc 100644 --- a/train.py +++ b/train.py @@ -69,8 +69,9 @@ def create_offset_gt(image, offset): id_coords = torch.from_numpy(id_coords).cuda() id_coords = id_coords.permute(1, 2, 0) + offset - id_coords[..., 0] /= (width - 1) - id_coords[..., 1] /= (height - 1) + # Avoid division by zero for single-pixel dimensions + id_coords[..., 0] /= max(width - 1, 1) + id_coords[..., 1] /= max(height - 1, 1) id_coords = id_coords * 2 - 1 image = torch.nn.functional.grid_sample(image[None], id_coords[None], align_corners=True, padding_mode="border")[0] @@ -235,10 +236,9 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi opacity_loss = 0.0 if opt.lambda_opacity > 0: - # Get each gaussians' opacity and use cross entropy loss + # Get each gaussians' opacity and use entropy loss to encourage binary values opacity = gaussians.get_opacity.clamp(1.0e-3, 1.0 - 1.0e-3) - opacity_loss = torch.nn.functional.binary_cross_entropy(opacity, opacity) - # opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6)) + opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6)) loss += opt.lambda_opacity * opacity_loss @@ -272,7 +272,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi depth_loss_pseudo = depth_loss_func(gt_depth, render_depth) if torch.isnan(depth_loss_pseudo).sum() == 0: - loss_scale = min((iteration - args.start_sample_pseudo) / 500., 1) + loss_scale = min((iteration - opt.start_sample_pseudo) / 500., 1) loss += loss_scale * opt.lambda_pseudo_depth * depth_loss_pseudo depth_loss += depth_loss_pseudo @@ -459,28 +459,9 @@ def generate_idu_training_set( n_avg=flow_edit_n_avg ) elif use_difix3d: - refine_pipe = Difix3DRefineIDU( - save_path=refine_path, - device="cuda:0", - model_name=difix3d_model, - use_reference=difix3d_use_reference - ) - final_imgs = refine_pipe.run( - imgs, - prompt=difix3d_prompt, - num_inference_steps=difix3d_steps, - timesteps=difix3d_timesteps, - guidance_scale=difix3d_guidance - ) + raise NotImplementedError("Difix3D refine is not yet implemented. Please use FlowEdit refine instead.") elif use_dreamscene: - refine_pipe = DreamSceneRefineIDU( - save_path=refine_path, - device="cuda:0", - model="sd21" if use_sd21 else "diffusionsat", - ) - final_imgs = refine_pipe.run( - imgs, - ) + raise NotImplementedError("DreamScene refine is not yet implemented. Please use FlowEdit refine instead.") else: raise NotImplementedError("DiffusionSat refine is deprecated") if refine_pipe: @@ -833,10 +814,9 @@ def training_idu_episode( opacity_loss = 0.0 if opt.lambda_opacity > 0: - # Get each gaussians' opacity and use cross entropy loss + # Get each gaussians' opacity and use entropy loss to encourage binary values opacity = gaussians.get_opacity.clamp(1.0e-3, 1.0 - 1.0e-3) - opacity_loss = torch.nn.functional.binary_cross_entropy(opacity, opacity) - # opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6)) + opacity_loss = torch.mean(-opacity * torch.log(opacity + 1e-6)) if loss: loss += opt.lambda_opacity * opacity_loss else: @@ -1027,7 +1007,12 @@ def colorize_depth_torch(depth_tensor, mask=None, normalize=True, cmap='Spectral if normalize: min_disp = np.nanquantile(disp, 0.01) max_disp = np.nanquantile(disp, 0.99) - disp = (disp - min_disp) / (max_disp - min_disp) + # Avoid division by zero for constant depth + disp_range = max_disp - min_disp + if disp_range > 1e-6: + disp = (disp - min_disp) / disp_range + else: + disp = np.zeros_like(disp) # Apply colormap colored = plt.get_cmap(cmap)(1.0 - disp) diff --git a/utils/image_utils.py b/utils/image_utils.py index cdeaa1b..5a1995b 100755 --- a/utils/image_utils.py +++ b/utils/image_utils.py @@ -16,4 +16,5 @@ def mse(img1, img2): def psnr(img1, img2): mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) - return 20 * torch.log10(1.0 / torch.sqrt(mse)) + # Add epsilon to avoid division by zero when images are identical + return 20 * torch.log10(1.0 / torch.sqrt(mse + 1e-10)) diff --git a/utils/system_utils.py b/utils/system_utils.py index 90ca6d7..e0e9104 100755 --- a/utils/system_utils.py +++ b/utils/system_utils.py @@ -24,5 +24,14 @@ def mkdir_p(folder_path): raise def searchForMaxIteration(folder): - saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)] + saved_iters = [] + for fname in os.listdir(folder): + try: + # Try to parse the iteration number from the filename + saved_iters.append(int(fname.split("_")[-1])) + except (ValueError, IndexError): + # Skip files that don't match the expected pattern + continue + if not saved_iters: + return 0 return max(saved_iters)