Skip to content

Commit

Permalink
Improve DDP on Windows (#376)
Browse files Browse the repository at this point in the history
* Update DDP-script.py for Windows

* Windows handling

---------

Co-authored-by: Nathan Brown <nathan@nkbrown.us>
  • Loading branch information
rasbt and ngbrown authored Sep 29, 2024
1 parent bfa4215 commit 4caafdd
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions appendix-A/01_main-chapter-code/DDP-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# NEW imports:
import os
import platform
import torch.multiprocessing as mp
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel as DDP
Expand All @@ -30,11 +31,19 @@ def ddp_setup(rank, world_size):
os.environ["MASTER_ADDR"] = "localhost"
# any free port on the machine
os.environ["MASTER_PORT"] = "12345"
if platform.system() == "Windows":
# Disable libuv because PyTorch for Windows isn't built with support
os.environ["USE_LIBUV"] = "0"

# initialize process group
# Windows users may have to use "gloo" instead of "nccl" as backend
# nccl: NVIDIA Collective Communication Library
init_process_group(backend="nccl", rank=rank, world_size=world_size)
if platform.system() == "Windows":
# Windows users may have to use "gloo" instead of "nccl" as backend
# gloo: Facebook Collective Communication Library
init_process_group(backend="gloo", rank=rank, world_size=world_size)
else:
# nccl: NVIDIA Collective Communication Library
init_process_group(backend="nccl", rank=rank, world_size=world_size)

torch.cuda.set_device(rank)


Expand Down

0 comments on commit 4caafdd

Please sign in to comment.