Skip to content

[Bug?] viscoelastic_instability contains a large number of duplicate frames #74

@till-m

Description

@till-m

I was plotting some rollout visualizations on the viscoelastic_instability and I noticed that there is a repeated frame in the sequence, which then leads to offsets in the rollout prediction vs. ground truth. See the following figure, with the duplication happening at timesteps 7 & 8.

Image

Based on this, I checked and found a total of 128 cases across the 257 trajectories contained in the viscoelastic dataset. I've plotted a number of frames around instances, and I think the duplicate frames can simply be removed. Still, this seems to be a rather critical issue given it's impact on the rollout evaluation, but also on the training.

JSON of duplicates
{
  "data_dir": "viscoelastic_instability/data",
  "files_checked": 21,
  "trajectories_checked": 257,
  "total_duplicates": 128,
  "duplicates": [
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_AH.hdf5",
      "trajectory": 9,
      "frame_idx": 0,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 0,
      "frame_idx": 28,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 0,
      "frame_idx": 54,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 1,
      "frame_idx": 20,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 1,
      "frame_idx": 42,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 2,
      "frame_idx": 15,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 2,
      "frame_idx": 51,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 3,
      "frame_idx": 5,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 3,
      "frame_idx": 39,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 4,
      "frame_idx": 17,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 5,
      "frame_idx": 17,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 5,
      "frame_idx": 33,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 6,
      "frame_idx": 28,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 7,
      "frame_idx": 11,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 7,
      "frame_idx": 35,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 8,
      "frame_idx": 2,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 8,
      "frame_idx": 31,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 9,
      "frame_idx": 24,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 9,
      "frame_idx": 25,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 10,
      "frame_idx": 4,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 10,
      "frame_idx": 30,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 10,
      "frame_idx": 58,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 11,
      "frame_idx": 23,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 11,
      "frame_idx": 51,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 12,
      "frame_idx": 4,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 12,
      "frame_idx": 47,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 13,
      "frame_idx": 16,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 13,
      "frame_idx": 40,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 14,
      "frame_idx": 10,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 15,
      "frame_idx": 10,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 15,
      "frame_idx": 56,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 16,
      "frame_idx": 57,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 17,
      "frame_idx": 26,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 18,
      "frame_idx": 14,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 19,
      "frame_idx": 50,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 21,
      "frame_idx": 47,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 22,
      "frame_idx": 1,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 23,
      "frame_idx": 22,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 24,
      "frame_idx": 19,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 24,
      "frame_idx": 54,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 25,
      "frame_idx": 14,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 25,
      "frame_idx": 43,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 26,
      "frame_idx": 20,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 27,
      "frame_idx": 29,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 28,
      "frame_idx": 11,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 29,
      "frame_idx": 5,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 30,
      "frame_idx": 0,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 30,
      "frame_idx": 41,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 31,
      "frame_idx": 25,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 31,
      "frame_idx": 46,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_CAR.hdf5",
      "trajectory": 32,
      "frame_idx": 23,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 1,
      "frame_idx": 12,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 2,
      "frame_idx": 33,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 3,
      "frame_idx": 5,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 3,
      "frame_idx": 45,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 4,
      "frame_idx": 20,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 5,
      "frame_idx": 35,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 5,
      "frame_idx": 55,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 6,
      "frame_idx": 7,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 6,
      "frame_idx": 18,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 6,
      "frame_idx": 27,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 6,
      "frame_idx": 38,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 6,
      "frame_idx": 49,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 6,
      "frame_idx": 58,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 7,
      "frame_idx": 11,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 7,
      "frame_idx": 21,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 7,
      "frame_idx": 32,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 7,
      "frame_idx": 38,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 7,
      "frame_idx": 50,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 8,
      "frame_idx": 4,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 8,
      "frame_idx": 15,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 8,
      "frame_idx": 34,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 8,
      "frame_idx": 53,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 9,
      "frame_idx": 23,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 9,
      "frame_idx": 41,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 10,
      "frame_idx": 0,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 10,
      "frame_idx": 18,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 10,
      "frame_idx": 47,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 11,
      "frame_idx": 8,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 11,
      "frame_idx": 38,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 11,
      "frame_idx": 57,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 12,
      "frame_idx": 19,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 12,
      "frame_idx": 35,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 13,
      "frame_idx": 21,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 13,
      "frame_idx": 32,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 14,
      "frame_idx": 55,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 15,
      "frame_idx": 16,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 16,
      "frame_idx": 28,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 18,
      "frame_idx": 53,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 19,
      "frame_idx": 52,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 20,
      "frame_idx": 52,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 21,
      "frame_idx": 52,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 22,
      "frame_idx": 55,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 23,
      "frame_idx": 37,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 24,
      "frame_idx": 34,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 25,
      "frame_idx": 19,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 25,
      "frame_idx": 55,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 26,
      "frame_idx": 33,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 27,
      "frame_idx": 11,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_EIT.hdf5",
      "trajectory": 27,
      "frame_idx": 38,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtochaos_EIT_L.hdf5",
      "trajectory": 20,
      "frame_idx": 12,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtochaos_EIT_L.hdf5",
      "trajectory": 26,
      "frame_idx": 14,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtochaos_EIT_SAR.hdf5",
      "trajectory": 26,
      "frame_idx": 17,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtochaos_EIT_SAR.hdf5",
      "trajectory": 28,
      "frame_idx": 9,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtononchaos_EIT_L.hdf5",
      "trajectory": 25,
      "frame_idx": 7,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtononchaos_EIT_SAR.hdf5",
      "trajectory": 0,
      "frame_idx": 8,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/train/viscoelastic_instability_transtononchaos_EIT_SAR.hdf5",
      "trajectory": 24,
      "frame_idx": 3,
      "n_timesteps": 20
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_CAR.hdf5",
      "trajectory": 0,
      "frame_idx": 20,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_CAR.hdf5",
      "trajectory": 1,
      "frame_idx": 50,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_CAR.hdf5",
      "trajectory": 2,
      "frame_idx": 44,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_EIT.hdf5",
      "trajectory": 0,
      "frame_idx": 50,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_EIT.hdf5",
      "trajectory": 1,
      "frame_idx": 12,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_EIT.hdf5",
      "trajectory": 1,
      "frame_idx": 39,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/valid/viscoelastic_instability_EIT.hdf5",
      "trajectory": 1,
      "frame_idx": 57,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_CAR.hdf5",
      "trajectory": 0,
      "frame_idx": 12,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_CAR.hdf5",
      "trajectory": 0,
      "frame_idx": 50,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_CAR.hdf5",
      "trajectory": 1,
      "frame_idx": 18,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_CAR.hdf5",
      "trajectory": 1,
      "frame_idx": 36,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_CAR.hdf5",
      "trajectory": 2,
      "frame_idx": 26,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 0,
      "frame_idx": 6,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 0,
      "frame_idx": 18,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 0,
      "frame_idx": 39,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 0,
      "frame_idx": 40,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 0,
      "frame_idx": 56,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 1,
      "frame_idx": 34,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 2,
      "frame_idx": 23,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_EIT.hdf5",
      "trajectory": 2,
      "frame_idx": 57,
      "n_timesteps": 60
    },
    {
      "file": "viscoelastic_instability/data/test/viscoelastic_instability_transtochaos_EIT_SAR.hdf5",
      "trajectory": 1,
      "frame_idx": 4,
      "n_timesteps": 20
    }
  ]
}
Duplication finding script

(the script was written by claude code)

#!/usr/bin/env python
"""Scan viscoelastic_instability dataset for duplicate consecutive frames."""

import argparse
import json
import os

import h5py
import numpy as np


def check_file_for_duplicates(filepath: str) -> list[dict]:
    """Check all trajectories in an HDF5 file for duplicate consecutive frames.

    Returns list of duplicates found, each with file, trajectory, frame_idx, n_timesteps.
    """
    duplicates = []

    with h5py.File(filepath, "r") as f:
        pressure = f["t0_fields/pressure"][:]  # (n_traj, n_timesteps, H, W)
        n_trajectories, n_timesteps = pressure.shape[:2]

        for traj_idx in range(n_trajectories):
            traj_data = pressure[traj_idx]  # (n_timesteps, H, W)

            # Compute frame-to-frame differences
            for t in range(n_timesteps - 1):
                diff = traj_data[t + 1] - traj_data[t]
                max_diff = np.abs(diff).max()

                if max_diff == 0:
                    duplicates.append(
                        {
                            "file": filepath,
                            "trajectory": int(traj_idx),
                            "frame_idx": int(t),
                            "n_timesteps": int(n_timesteps),
                        }
                    )

    return duplicates


def main():
    parser = argparse.ArgumentParser(
        description="Scan viscoelastic_instability dataset for duplicate frames"
    )
    parser.add_argument(
        "--data-dir",
        default="viscoelastic_instability/data",
        help="Base data directory",
    )
    parser.add_argument(
        "--output",
        default="duplicates.json",
        help="Output JSON file path",
    )
    parser.add_argument(
        "--splits",
        nargs="+",
        default=["train", "valid", "test"],
        help="Splits to check",
    )
    args = parser.parse_args()

    all_duplicates = []
    files_checked = 0
    trajectories_checked = 0

    for split in args.splits:
        split_path = os.path.join(args.data_dir, split)
        if not os.path.exists(split_path):
            print(f"Warning: {split_path} does not exist, skipping")
            continue

        hdf5_files = sorted(
            [f for f in os.listdir(split_path) if f.endswith(".hdf5")]
        )

        for fname in hdf5_files:
            filepath = os.path.join(split_path, fname)
            print(f"Checking {split}/{fname}...", end=" ", flush=True)

            duplicates = check_file_for_duplicates(filepath)
            all_duplicates.extend(duplicates)
            files_checked += 1

            # Count trajectories
            with h5py.File(filepath, "r") as f:
                n_traj = int(f.attrs.get("n_trajectories", f["t0_fields/pressure"].shape[0]))
                trajectories_checked += n_traj

            if duplicates:
                print(f"FOUND {len(duplicates)} duplicates!")
            else:
                print("OK")

    # Write results
    result = {
        "data_dir": args.data_dir,
        "files_checked": files_checked,
        "trajectories_checked": trajectories_checked,
        "total_duplicates": len(all_duplicates),
        "duplicates": all_duplicates,
    }

    with open(args.output, "w") as f:
        json.dump(result, f, indent=2)

    print()
    print("=" * 50)
    print(f"Files checked: {files_checked}")
    print(f"Trajectories checked: {trajectories_checked}")
    print(f"Total duplicates found: {len(all_duplicates)}")
    print(f"Results saved to: {args.output}")

    if all_duplicates:
        print()
        print("Duplicate summary:")
        # Group by file
        by_file = {}
        for dup in all_duplicates:
            fname = os.path.basename(dup["file"])
            if fname not in by_file:
                by_file[fname] = []
            by_file[fname].append(dup)

        for fname, dups in sorted(by_file.items()):
            print(f"  {fname}: {len(dups)} duplicates")
            for d in dups[:5]:  # Show first 5
                print(f"    - trajectory {d['trajectory']}, frame {d['frame_idx']}")
            if len(dups) > 5:
                print(f"    ... and {len(dups) - 5} more")


if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions