Step by Step Information to Construct and Examine FedAvg and FedProx Federated Studying on Non-IID CIFAR-10 with NVIDIA FLARE

Step by Step Information to Construct and Examine FedAvg and FedProx Federated Studying on Non-IID CIFAR-10 with NVIDIA FLARE


CLIENT_SCRIPT += r'''
def fundamental():
   p = argparse.ArgumentParser()
   p.add_argument("--num_sites", sort=int, default=3)
   p.add_argument("--alpha", sort=float, default=0.3)
   p.add_argument("--local_epochs", sort=int, default=1)
   p.add_argument("--mu", sort=float, default=0.0)
   p.add_argument("--max_samples", sort=int, default=4000)
   p.add_argument("--batch_size", sort=int, default=64)
   p.add_argument("--lr", sort=float, default=0.01)
   p.add_argument("--data_root", sort=str, default="/tmp/nvflare/knowledge")
   p.add_argument("--results_dir", sort=str, default="/tmp/nvflare/outcomes")
   p.add_argument("--tag", sort=str, default="fedavg")
   args = p.parse_args()
   machine = "cuda" if torch.cuda.is_available() else "cpu"
   tf = T.Compose([T.ToTensor(),
                   T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
   train_set = torchvision.datasets.CIFAR10(args.data_root, prepare=True,  obtain=False, rework=tf)
   test_set  = torchvision.datasets.CIFAR10(args.data_root, prepare=False, obtain=False, rework=tf)
   flare.init()
   site_name = flare.get_site_name()
   site_id   = int(site_name.cut up("-")[-1]) - 1
   labels   = np.array(train_set.targets)
   my_idx   = dirichlet_partition(labels, args.num_sites, args.alpha)[site_id]
   if len(my_idx) > args.max_samples:
       my_idx = my_idx[:args.max_samples]
   train_loader = DataLoader(Subset(train_set, my_idx), batch_size=args.batch_size, shuffle=True)
   test_loader  = DataLoader(test_set, batch_size=512, shuffle=False)
   print(f"[{site_name}] mu={args.mu}  native samples={len(my_idx)}", flush=True)
   mannequin     = Web().to(machine)
   optimizer = torch.optim.SGD(mannequin.parameters(), lr=args.lr, momentum=0.9)
   criterion = nn.CrossEntropyLoss()
   whereas flare.is_running():
       input_model = flare.obtain()
       rnd = input_model.current_round
       global_state = {okay: torch.as_tensor(v) for okay, v in input_model.params.objects()}
       mannequin.load_state_dict(global_state)
       acc = consider(mannequin, test_loader, machine)
       if site_id == 0:
           with open(os.path.be part of(args.results_dir, args.tag + ".csv"), "a", newline="") as f:
               csv.author(f).writerow([rnd, acc])
       print(f"[{site_name}] spherical {rnd}: international take a look at acc = {acc:.4f}", flush=True)
       global_w = [w.detach().clone() for w in model.parameters()]
       mannequin.prepare()
       steps = 0
       for _ in vary(args.local_epochs):
           for x, y in train_loader:
               x, y = x.to(machine), y.to(machine)
               optimizer.zero_grad()
               loss = criterion(mannequin(x), y)
               if args.mu > 0:
                   prox = sum(((w - g) ** 2).sum() for w, g in zip(mannequin.parameters(), global_w))
                   loss = loss + (args.mu / 2.0) * prox
               loss.backward()
               optimizer.step()
               steps += 1
       out = flare.FLModel(
           params={okay: v.cpu().numpy() for okay, v in mannequin.state_dict().objects()},
           metrics={"test_accuracy": acc},
           meta={"NUM_STEPS_CURRENT_ROUND": steps},
       )
       flare.ship(out)
if __name__ == "__main__":
   fundamental()
'''
with open("client_train.py", "w") as f:
   f.write(CLIENT_SCRIPT)
sys.path.insert(0, os.getcwd())
from client_train import Web



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *