However, when I try to add mutiple gpus, it does not work.
Here is the script, where I just added the gpus id:
python main.py \
-t \
--base configs/objaverse.yaml \
--logdir /work/cxzheng/diff3d/test/logs \
--name test \
--gpus 0,1,2,3 \
--scale_lr False \
--num_nodes 1 \
--seed 42 \
--check_val_every_n_epoch 10 \
--finetune_from /work/cxzheng/code/zero123_old/zero123/105000.ckpt
Process 1 terminated with the following error:
Traceback (most recent call last):
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 75, in _wrap
fn(i, *args)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 139, in _wrapping_function
results = function(*args, **kwargs)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1103, in _run
results = self._run_stage()
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1182, in _run_stage
self._run_train()
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1205, in _run_train
self.fit_loop.run()
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py", line 267, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/loops/loop.py", line 194, in run
self.on_run_start(*args, **kwargs)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 160, in on_run_start
_ = iter(data_fetcher) # creates the iterator inside the fetcher
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/utilities/fetching.py", line 179, in __iter__
self._apply_patch()
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/utilities/fetching.py", line 120, in _apply_patch
apply_to_collections(self.loaders, self.loader_iters, (Iterator, DataLoader), _apply_patch_fn)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/utilities/fetching.py", line 156, in loader_iters
return self.dataloader_iter.loader_iters
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/trainer/supporters.py", line 556, in loader_iters
self._loader_iters = self.create_loader_iters(self.loaders)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/pytorch_lightning/trainer/supporters.py", line 596, in create_loader_iters
return apply_to_collection(loaders, Iterable, iter, wrong_dtype=(Sequence, Mapping))
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/lightning_utilities/core/apply_func.py", line 52, in apply_to_collection
return _apply_to_collection_slow(
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/lightning_utilities/core/apply_func.py", line 96, in _apply_to_collection_slow
return function(data, *args, **kwargs)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 439, in __iter__
return self._get_iterator()
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 387, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1040, in __init__
w.start()
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/home/dense/miniconda3/envs/free3d/lib/python3.9/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'ObjaverseDataset.__init__.<locals>.<lambda>
Hi,
great work and thanks for sharing the code.
I am trying to run the training script. With the original command, it works fine.
However, when I try to add mutiple gpus, it does not work.
Here is the script, where I just added the gpus id:
I get the following error
As mentioned above, with one GPU the training start smoothly
Thanks for the help