diff --git a/LICENSE b/LICENSE new file mode 100755 index 0000000..54c82dd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Shaoxiong Ji + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/data/README.md b/data/README.md new file mode 100755 index 0000000..b044379 --- /dev/null +++ b/data/README.md @@ -0,0 +1,3 @@ +# Data + +MNIST & CIFAR-10 datasets will be downloaded automatically by the torchvision package. diff --git a/data/__init__.py b/data/__init__.py new file mode 100755 index 0000000..3918c7d --- /dev/null +++ b/data/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @python: 3.6 + diff --git a/data/mnist/.gitkeep b/data/mnist/.gitkeep new file mode 100755 index 0000000..e69de29 diff --git a/data/mnist/processed/test.pt b/data/mnist/processed/test.pt new file mode 100755 index 0000000..d654e9d Binary files /dev/null and b/data/mnist/processed/test.pt differ diff --git a/data/mnist/processed/training.pt b/data/mnist/processed/training.pt new file mode 100755 index 0000000..5529768 Binary files /dev/null and b/data/mnist/processed/training.pt differ diff --git a/data/mnist/raw/t10k-images-idx3-ubyte b/data/mnist/raw/t10k-images-idx3-ubyte new file mode 100755 index 0000000..1170b2c Binary files /dev/null and b/data/mnist/raw/t10k-images-idx3-ubyte differ diff --git a/data/mnist/raw/t10k-labels-idx1-ubyte b/data/mnist/raw/t10k-labels-idx1-ubyte new file mode 100755 index 0000000..d1c3a97 Binary files /dev/null and b/data/mnist/raw/t10k-labels-idx1-ubyte differ diff --git a/data/mnist/raw/train-images-idx3-ubyte b/data/mnist/raw/train-images-idx3-ubyte new file mode 100755 index 0000000..bbce276 Binary files /dev/null and b/data/mnist/raw/train-images-idx3-ubyte differ diff --git a/data/mnist/raw/train-labels-idx1-ubyte b/data/mnist/raw/train-labels-idx1-ubyte new file mode 100755 index 0000000..d6b4c5d Binary files /dev/null and b/data/mnist/raw/train-labels-idx1-ubyte differ diff --git a/global.py b/global.py new file mode 100755 index 0000000..c4b5c4d --- /dev/null +++ b/global.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import copy +import os +import itertools +import numpy as np +from scipy.stats import mode +from torchvision import datasets, transforms, models +import torch +from torch import nn +import torch.optim as optim +from utils.sampling import fair_iid, fair_noniid +from utils.options import args_parser +from models.Update import LocalUpdate, LocalUpdate_noLG +from models.Nets import MLP, CNNMnist, CNNCifar, ResnetCifar +from models.Fed import FedAvg +from models.test import test_img, test_img_local + +import pandas as pd + +from sklearn import metrics +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.utils.class_weight import compute_class_weight +from torch.utils.data import TensorDataset +from torch.utils.data import DataLoader + +from helpers import load_ICU_data, plot_distributions, _performance_text + +os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"]="0" + +import pdb + +def run_all(clf_all1, clf_all2, adv_all1, adv_all2, adv_all3): + # parse args + args = args_parser() + args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') + + # load ICU dataset and split users + # load ICU data set + X, y, Z = load_ICU_data('../fairness-in-ml/data/adult.data') + + if not args.iid: + X = X[:30000] + y = y[:30000] + Z = Z[:30000] + + n_points = X.shape[0] + n_features = X.shape[1] + n_sensitive = Z.shape[1] + + print (n_features) + + # split into train/test set + (X_train, X_test, y_train, y_test, Z_train, Z_test) = train_test_split(X, y, Z, test_size=0.5, stratify=y, random_state=7) + + # standardize the data + scaler = StandardScaler().fit(X_train) + scale_df = lambda df, scaler: pd.DataFrame(scaler.transform(df), columns=df.columns, index=df.index) + X_train = X_train.pipe(scale_df, scaler) + X_test = X_test.pipe(scale_df, scaler) + + class PandasDataSet(TensorDataset): + def __init__(self, *dataframes): + tensors = (self._df_to_tensor(df) for df in dataframes) + super(PandasDataSet, self).__init__(*tensors) + + def _df_to_tensor(self, df): + if isinstance(df, pd.Series): + df = df.to_frame('dummy') + return torch.from_numpy(df.values).float() + + def _df_to_tensor(df): + if isinstance(df, pd.Series): + df = df.to_frame('dummy') + return torch.from_numpy(df.values).float() + + train_data = PandasDataSet(X_train, y_train, Z_train) + test_data = PandasDataSet(X_test, y_test, Z_test) + + print('# train samples:', len(train_data)) # 15470 + print('# test samples:', len(test_data)) + + batch_size = 32 + + train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True) + test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=True, drop_last=True) + + # sample users + if args.iid: + dict_users_train = fair_iid(train_data, args.num_users) + dict_users_test = fair_iid(test_data, args.num_users) + else: + train_data = [_df_to_tensor(X_train), _df_to_tensor(y_train), _df_to_tensor(Z_train)] + test_data = [_df_to_tensor(X_test), _df_to_tensor(y_test), _df_to_tensor(Z_test)] + #import pdb; pdb.set_trace() + dict_users_train, rand_set_all = fair_noniid(train_data, args.num_users, num_shards=100, num_imgs=150, train=True) + dict_users_test, _ = fair_noniid(test_data, args.num_users, num_shards=100, num_imgs=150, train=False, rand_set_all=rand_set_all) + + train_data = [_df_to_tensor(X_train), _df_to_tensor(y_train), _df_to_tensor(Z_train)] + test_data = [_df_to_tensor(X_test), _df_to_tensor(y_test), _df_to_tensor(Z_test)] + + class LocalClassifier(nn.Module): + def __init__(self, n_features, n_hidden=32, p_dropout=0.2): + super(LocalClassifier, self).__init__() + self.network1 = nn.Sequential( + nn.Linear(n_features, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden) + ) + self.network2 = nn.Sequential( + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, 1) + ) + + def forward(self, x): + mid = self.network1(x) + final = torch.sigmoid(self.network2(mid)) + return mid, final + + def pretrain_classifier(clf, data_loader, optimizer, criterion): + losses = 0.0 + for x, y, _ in data_loader: + x = x.to(args.device) + y = y.to(args.device) + clf.zero_grad() + mid, p_y = clf(x) + loss = criterion(p_y, y) + loss.backward() + optimizer.step() + losses += loss.item() + print ('loss', losses/len(data_loader)) + return clf + + def test_classifier(clf, data_loader): + losses = 0 + assert len(data_loader) == 1 + with torch.no_grad(): + for x, y_test, _ in data_loader: + x = x.to(args.device) + mid, y_pred = clf(x) + y_pred = y_pred.cpu() + clf_accuracy = metrics.accuracy_score(y_test, y_pred > 0.5) * 100 + return clf_accuracy + + class Adversary(nn.Module): + + def __init__(self, n_sensitive, n_hidden=32): + super(Adversary, self).__init__() + self.network = nn.Sequential( + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Linear(n_hidden, n_sensitive), + ) + + def forward(self, x): + return torch.sigmoid(self.network(x)) + + def pretrain_adversary(adv, clf, data_loader, optimizer, criterion): + losses = 0.0 + for x, _, z in data_loader: + x = x.to(args.device) + z = z.to(args.device) + mid, p_y = clf(x) + mid = mid.detach() + p_y = p_y.detach() + adv.zero_grad() + p_z = adv(mid) + loss = (criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() + loss.backward() + optimizer.step() + losses += loss.item() + print ('loss', losses/len(data_loader)) + return adv + + def test_adversary(adv, clf, data_loader): + losses = 0 + adv_accuracies = [] + assert len(data_loader) == 1 + with torch.no_grad(): + for x, _, z_test in data_loader: + x = x.to(args.device) + mid, p_y = clf(x) + mid = mid.detach() + p_y = p_y.detach() + p_z = adv(mid) + for i in range(p_z.shape[1]): + z_test_i = z_test[:,i] + z_pred_i = p_z[:,i] + z_pred_i = z_pred_i.cpu() + adv_accuracy = metrics.accuracy_score(z_test_i, z_pred_i > 0.5) * 100 + adv_accuracies.append(adv_accuracy) + return adv_accuracies + + def train_both(clf, adv, data_loader, clf_criterion, adv_criterion, clf_optimizer, adv_optimizer, lambdas): + # Train adversary + adv_losses = 0.0 + for x, y, z in data_loader: + x = x.to(args.device) + z = z.to(args.device) + local, p_y = clf(x) + adv.zero_grad() + p_z = adv(local) + loss_adv = (adv_criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() + loss_adv.backward() + adv_optimizer.step() + adv_losses += loss_adv.item() + print ('adversarial loss', adv_losses/len(data_loader)) + + # Train classifier on single batch + clf_losses = 0.0 + for x, y, z in data_loader: + x = x.to(args.device) + y = y.to(args.device) + z = z.to(args.device) + local, p_y = clf(x) + p_z = adv(local) + clf.zero_grad() + if args.adv: + clf_loss = clf_criterion(p_y.to(args.device), y.to(args.device)) - (adv_criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() + else: + clf_loss = clf_criterion(p_y.to(args.device), y.to(args.device)) + clf_loss.backward() + clf_optimizer.step() + clf_losses += clf_loss.item() + print ('classifier loss', clf_losses/len(data_loader)) + return clf, adv + + def eval_global_performance_text(test_loader_i, global_model, adv_model): + with torch.no_grad(): + for test_x, test_y, test_z in test_loader_i: + test_x = test_x.to(args.device) + local_pred, clf_pred = global_model(test_x) + adv_pred = adv_model(local_pred) + + y_post_clf = pd.Series(clf_pred.cpu().numpy().ravel(), index=y_test[list(dict_users_train[idx])].index) + Z_post_adv = pd.DataFrame(adv_pred.cpu().numpy(), columns=Z_test.columns) + + clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc = _performance_text(test_y, test_z, y_post_clf, Z_post_adv, epoch=None) + return clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc + + lambdas = torch.Tensor([30.0, 30.0]) + net_local_list = [] + + print ('\n\n======================== STARTING LOCAL TRAINING ========================\n\n\n') + + for idx in range(args.num_users): + train_data_i_raw = [torch.FloatTensor(bb[list(dict_users_train[idx])]) for bb in train_data] + train_data_i = TensorDataset(train_data_i_raw[0],train_data_i_raw[1],train_data_i_raw[2]) + train_loader_i = torch.utils.data.DataLoader(train_data_i, batch_size=batch_size, shuffle=False, num_workers=4) + + test_data_i_raw = [torch.FloatTensor(bb[list(dict_users_train[idx])]) for bb in test_data] + test_data_i = TensorDataset(test_data_i_raw[0],test_data_i_raw[1],test_data_i_raw[2]) + test_loader_i = torch.utils.data.DataLoader(test_data_i, batch_size=len(test_data_i), shuffle=False, num_workers=4) + + net_local_list.append([train_loader_i,test_loader_i]) + + class GlobalClassifier(nn.Module): + def __init__(self, n_features, n_hidden=32, p_dropout=0.2): + super(GlobalClassifier, self).__init__() + self.network1 = nn.Sequential( + nn.Linear(n_features, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden) + ) + self.network2 = nn.Sequential( + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, 1) + ) + + def forward(self, x): + mid = self.network1(x) + final = torch.sigmoid(self.network2(mid)) + return mid, final + + # build global model + global_clf = GlobalClassifier(n_features=n_features).to(args.device) + global_clf_criterion = nn.BCELoss().to(args.device) + global_clf_optimizer = optim.Adam(global_clf.parameters(), lr=0.01) + + adv_model = Adversary(Z_train.shape[1]).to(args.device) + adv_criterion = nn.BCELoss(reduce=False).to(args.device) + adv_optimizer = optim.Adam(adv_model.parameters(), lr=0.01) + + # copy weights + w_glob = global_clf.state_dict() + adv_glob = adv_model.state_dict() + + print ('\n\n======================== STARTING GLOBAL TRAINING ========================\n\n\n') + + global_epochs = 10 + for iter in range(global_epochs): + w_locals, adv_locals, w_loss_locals, adv_loss_locals = [], [], [], [] + for idx in range(args.num_users): + print ('\n\n======================== GLOBAL TRAINING, ITERATION %d, USER %d ========================\n\n\n' %(iter,idx)) + train_loader_i,test_loader_i = net_local_list[idx] + + local = LocalUpdate_noLG(args=args, dataset=train_loader_i) + w, w_loss, adv, adv_loss = local.train(global_net=copy.deepcopy(global_clf).to(args.device), adv_model=copy.deepcopy(adv_model).to(args.device), lambdas=lambdas) + + w_locals.append(copy.deepcopy(w)) + w_loss_locals.append(copy.deepcopy(w_loss)) + + adv_locals.append(copy.deepcopy(adv)) + adv_loss_locals.append(copy.deepcopy(adv_loss)) + + w_glob = FedAvg(w_locals) + # copy weight to net_glob + global_clf.load_state_dict(w_glob) + + adv_glob = FedAvg(adv_locals) + # copy weight to net_glob + adv_model.load_state_dict(adv_glob) + + for idx in range(args.num_users): + train_loader_i,test_loader_i = net_local_list[idx] + + print ('======================== local and global training: evaluating _global_performance_text on device %d ========================' %idx) + clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc = eval_global_performance_text(test_loader_i, global_clf, adv_model) + print ('======================== by now the global classifier should work better than local classifier ========================') + + clf_all1.append(clf_roc_auc) + clf_all2.append(clf_accuracy) + adv_all1.append(adv_acc1) + adv_all2.append(adv_acc2) + adv_all3.append(adv_roc_auc) + + print ('clf_all1', np.mean(np.array(clf_all1)), np.std(np.array(clf_all1))) + print ('clf_all2', np.mean(np.array(clf_all2)), np.std(np.array(clf_all2))) + print ('adv_all1', np.mean(np.array(adv_all1)), np.std(np.array(adv_all1))) + print ('adv_all2', np.mean(np.array(adv_all2)), np.std(np.array(adv_all2))) + print ('adv_all3', np.mean(np.array(adv_all3)), np.std(np.array(adv_all3))) + return clf_all1, clf_all2, adv_all1, adv_all2, adv_all3 + + +if __name__ == '__main__': + clf_all1, clf_all2, adv_all1, adv_all2, adv_all3 = [], [], [], [], [] + for _ in range(10): + clf_all1, clf_all2, adv_all1, adv_all2, adv_all3 = run_all(clf_all1, clf_all2, adv_all1, adv_all2, adv_all3) + print ('final') + print ('clf_all1', np.mean(np.array(clf_all1)), np.std(np.array(clf_all1))) + print ('clf_all2', np.mean(np.array(clf_all2)), np.std(np.array(clf_all2))) + print ('adv_all1', np.mean(np.array(adv_all1)), np.std(np.array(adv_all1))) + print ('adv_all2', np.mean(np.array(adv_all2)), np.std(np.array(adv_all2))) + print ('adv_all3', np.mean(np.array(adv_all3)), np.std(np.array(adv_all3))) + diff --git a/helpers.py b/helpers.py new file mode 100755 index 0000000..56f48ef --- /dev/null +++ b/helpers.py @@ -0,0 +1,94 @@ +import numpy as np +import pandas as pd +from sklearn import metrics + + +def load_ICU_data(path): + column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', + 'martial_status', 'occupation', 'relationship', 'race', 'sex', + 'capital_gain', 'capital_loss', 'hours_per_week', 'country', 'target'] + input_data = (pd.read_csv(path, names=column_names, + na_values="?", sep=r'\s*,\s*', engine='python') + .loc[lambda df: df['race'].isin(['White', 'Black'])]) + # sensitive attributes; we identify 'race' and 'sex' as sensitive attributes + sensitive_attribs = ['race', 'sex'] + Z = (input_data.loc[:, sensitive_attribs] + .assign(race=lambda df: (df['race'] == 'White').astype(int), + sex=lambda df: (df['sex'] == 'Male').astype(int))) + + # targets; 1 when someone makes over 50k , otherwise 0 + y = (input_data['target'] == '>50K').astype(int) + + # features; note that the 'target' and sentive attribute columns are dropped + X = (input_data + .drop(columns=['target', 'race', 'sex', 'fnlwgt']) + .fillna('Unknown') + .pipe(pd.get_dummies, drop_first=True)) + + print("features X: {X.shape[0]} samples, {X.shape[1]} attributes") + print("targets y: {y.shape} samples") + print("sensitives Z: {Z.shape[0]} samples, {Z.shape[1]} attributes") + return X, y, Z + + +def p_rule(y_pred, z_values, threshold=0.5): + y_z_1 = y_pred[z_values == 1] > threshold if threshold else y_pred[z_values == 1] + y_z_0 = y_pred[z_values == 0] > threshold if threshold else y_pred[z_values == 0] + odds = y_z_1.mean() / y_z_0.mean() + return np.min([odds, 1/odds]) * 100 + + +def plot_distributions(y_true, Z_true, y_pred, Z_pred=None, epoch=None): + + fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharey=True) + + subplot_df = ( + Z_true + .assign(race=lambda x: x['race'].map({1: 'white', 0: 'black'})) + .assign(sex=lambda x: x['sex'].map({1: 'male', 0: 'female'})) + .assign(y_pred=y_pred) + ) + _subplot(subplot_df, 'race', ax=axes[0]) + _subplot(subplot_df, 'sex', ax=axes[1]) + _performance_text(fig, y_true, Z_true, y_pred, Z_pred, epoch) + fig.tight_layout() + return fig + + +def _subplot(subplot_df, col, ax): + ax.set_title('Sensitive attribute: {col}') + ax.set_xlim(0, 1) + ax.set_ylim(0, 7) + ax.set_yticks([]) + ax.set_ylabel('Prediction distribution') + ax.set_xlabel(r'$P({{income>50K}}|z_{{{}}})$'.format(col)) + + +def _performance_text(y_test, Z_test, y_pred, Z_pred=None, epoch=None): + + if epoch is not None: + print ("Training epoch %d" %epoch) + + clf_roc_auc = metrics.roc_auc_score(y_test, y_pred) + clf_accuracy = metrics.accuracy_score(y_test, y_pred > 0.5) * 100 + #p_rules = {'race': p_rule(y_pred, Z_test['race']), + # 'sex': p_rule(y_pred, Z_test['sex']),} + print ("Classifier performance: ROC AUC: %f Accuracy: %f" %(clf_roc_auc,clf_accuracy)) + #print ('\n'.join(["Satisfied p%-rules:"] + + # ["- {attr}: {p_rules[attr]:.0f}%-rule" + # for attr in p_rules.keys()])) + if Z_pred is not None: + adv_acc1 = metrics.accuracy_score(Z_test[:,0], Z_pred['race'] > 0.5) * 100 + adv_acc2 = metrics.accuracy_score(Z_test[:,1], Z_pred['sex'] > 0.5) * 100 + adv_roc_auc = metrics.roc_auc_score(Z_test, Z_pred) + print ("Adversary performance: ROC AUC: %f acc1: %f, acc2: %f" %(adv_roc_auc, adv_acc1, adv_acc2)) + return clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc + + + + + + + + + diff --git a/main_fair.py b/main_fair.py new file mode 100755 index 0000000..a14eed0 --- /dev/null +++ b/main_fair.py @@ -0,0 +1,422 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import copy +import os +import itertools +import numpy as np +from scipy.stats import mode +from torchvision import datasets, transforms, models +import torch +from torch import nn +import torch.optim as optim +from utils.sampling import fair_iid, fair_noniid +from utils.options import args_parser +from models.Update import LocalUpdate +from models.Nets import MLP, CNNMnist, CNNCifar, ResnetCifar +from models.Fed import FedAvg +from models.test import test_img, test_img_local + +import pandas as pd + +from sklearn import metrics +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.utils.class_weight import compute_class_weight +from torch.utils.data import TensorDataset +from torch.utils.data import DataLoader + +from helpers import load_ICU_data, plot_distributions, _performance_text + +os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"]="3" + +import pdb + +def run_all(clf_all1, clf_all2, adv_all1, adv_all2, adv_all3): + # parse args + args = args_parser() + args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') + + # load ICU dataset and split users + # load ICU data set + X, y, Z = load_ICU_data('../fairness-in-ml/data/adult.data') + + if not args.iid: + X = X[:30000] + y = y[:30000] + Z = Z[:30000] + + n_points = X.shape[0] + n_features = X.shape[1] + n_sensitive = Z.shape[1] + + # split into train/test set + (X_train, X_test, y_train, y_test, Z_train, Z_test) = train_test_split(X, y, Z, test_size=0.5, stratify=y, random_state=7) + + # standardize the data + scaler = StandardScaler().fit(X_train) + scale_df = lambda df, scaler: pd.DataFrame(scaler.transform(df), columns=df.columns, index=df.index) + X_train = X_train.pipe(scale_df, scaler) + X_test = X_test.pipe(scale_df, scaler) + + class PandasDataSet(TensorDataset): + def __init__(self, *dataframes): + tensors = (self._df_to_tensor(df) for df in dataframes) + super(PandasDataSet, self).__init__(*tensors) + + def _df_to_tensor(self, df): + if isinstance(df, pd.Series): + df = df.to_frame('dummy') + return torch.from_numpy(df.values).float() + + def _df_to_tensor(df): + if isinstance(df, pd.Series): + df = df.to_frame('dummy') + return torch.from_numpy(df.values).float() + + train_data = PandasDataSet(X_train, y_train, Z_train) + test_data = PandasDataSet(X_test, y_test, Z_test) + + print('# train samples:', len(train_data)) # 15470 + print('# test samples:', len(test_data)) + + batch_size = 32 + + train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True) + test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=True, drop_last=True) + + # sample users + if args.iid: + dict_users_train = fair_iid(train_data, args.num_users) + dict_users_test = fair_iid(test_data, args.num_users) + else: + train_data = [_df_to_tensor(X_train), _df_to_tensor(y_train), _df_to_tensor(Z_train)] + test_data = [_df_to_tensor(X_test), _df_to_tensor(y_test), _df_to_tensor(Z_test)] + #import pdb; pdb.set_trace() + dict_users_train, rand_set_all = fair_noniid(train_data, args.num_users, num_shards=100, num_imgs=150, train=True) + dict_users_test, _ = fair_noniid(test_data, args.num_users, num_shards=100, num_imgs=150, train=False, rand_set_all=rand_set_all) + + train_data = [_df_to_tensor(X_train), _df_to_tensor(y_train), _df_to_tensor(Z_train)] + test_data = [_df_to_tensor(X_test), _df_to_tensor(y_test), _df_to_tensor(Z_test)] + + class LocalClassifier(nn.Module): + def __init__(self, n_features, n_hidden=32, p_dropout=0.2): + super(LocalClassifier, self).__init__() + self.network1 = nn.Sequential( + nn.Linear(n_features, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden) + ) + self.network2 = nn.Sequential( + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, 1) + ) + + def forward(self, x): + mid = self.network1(x) + final = torch.sigmoid(self.network2(mid)) + return mid, final + + def pretrain_classifier(clf, data_loader, optimizer, criterion): + losses = 0.0 + for x, y, _ in data_loader: + x = x.to(args.device) + y = y.to(args.device) + clf.zero_grad() + mid, p_y = clf(x) + loss = criterion(p_y, y) + loss.backward() + optimizer.step() + losses += loss.item() + print ('loss', losses/len(data_loader)) + return clf + + def test_classifier(clf, data_loader): + losses = 0 + assert len(data_loader) == 1 + with torch.no_grad(): + for x, y_test, _ in data_loader: + x = x.to(args.device) + mid, y_pred = clf(x) + y_pred = y_pred.cpu() + clf_accuracy = metrics.accuracy_score(y_test, y_pred > 0.5) * 100 + return clf_accuracy + + class Adversary(nn.Module): + + def __init__(self, n_sensitive, n_hidden=32): + super(Adversary, self).__init__() + self.network = nn.Sequential( + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Linear(n_hidden, n_sensitive), + ) + + def forward(self, x): + return torch.sigmoid(self.network(x)) + + def pretrain_adversary(adv, clf, data_loader, optimizer, criterion): + losses = 0.0 + for x, _, z in data_loader: + x = x.to(args.device) + z = z.to(args.device) + mid, p_y = clf(x) + mid = mid.detach() + p_y = p_y.detach() + adv.zero_grad() + p_z = adv(mid) + loss = (criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() + loss.backward() + optimizer.step() + losses += loss.item() + print ('loss', losses/len(data_loader)) + return adv + + def test_adversary(adv, clf, data_loader): + losses = 0 + adv_accuracies = [] + assert len(data_loader) == 1 + with torch.no_grad(): + for x, _, z_test in data_loader: + x = x.to(args.device) + mid, p_y = clf(x) + mid = mid.detach() + p_y = p_y.detach() + p_z = adv(mid) + for i in range(p_z.shape[1]): + z_test_i = z_test[:,i] + z_pred_i = p_z[:,i] + z_pred_i = z_pred_i.cpu() + adv_accuracy = metrics.accuracy_score(z_test_i, z_pred_i > 0.5) * 100 + adv_accuracies.append(adv_accuracy) + return adv_accuracies + + def train_both(clf, adv, data_loader, clf_criterion, adv_criterion, clf_optimizer, adv_optimizer, lambdas): + # Train adversary + adv_losses = 0.0 + for x, y, z in data_loader: + x = x.to(args.device) + z = z.to(args.device) + local, p_y = clf(x) + adv.zero_grad() + p_z = adv(local) + loss_adv = (adv_criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() + loss_adv.backward() + adv_optimizer.step() + adv_losses += loss_adv.item() + print ('adversarial loss', adv_losses/len(data_loader)) + + # Train classifier on single batch + clf_losses = 0.0 + for x, y, z in data_loader: + pass + x = x.to(args.device) + y = y.to(args.device) + z = z.to(args.device) + local, p_y = clf(x) + p_z = adv(local) + clf.zero_grad() + if args.adv: + clf_loss = clf_criterion(p_y.to(args.device), y.to(args.device)) - (adv_criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() + else: + clf_loss = clf_criterion(p_y.to(args.device), y.to(args.device)) + clf_loss.backward() + clf_optimizer.step() + clf_losses += clf_loss.item() + print ('classifier loss', clf_losses/len(data_loader)) + return clf, adv + + def eval_performance_text(test_loader_i, local_clf_i, adv_i): + with torch.no_grad(): + for test_x, test_y, test_z in test_loader_i: + test_x = test_x.to(args.device) + local_pred, clf_pred = local_clf_i(test_x) + adv_pred = adv_i(local_pred) + + y_post_clf = pd.Series(clf_pred.cpu().numpy().ravel(), index=y_test[list(dict_users_train[idx])].index) + Z_post_adv = pd.DataFrame(adv_pred.cpu().numpy(), columns=Z_test.columns) + + clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc = _performance_text(test_y, test_z, y_post_clf, Z_post_adv, epoch=None) + return clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc + + def eval_global_performance_text(test_loader_i, local_clf_i, adv_i, global_clf): + with torch.no_grad(): + for test_x, test_y, test_z in test_loader_i: + test_x = test_x.to(args.device) + local_pred, clf_pred = local_clf_i(test_x) + adv_pred = adv_i(local_pred) + global_pred = global_clf(local_pred) + + y_post_clf = pd.Series(global_pred.cpu().numpy().ravel(), index=y_test[list(dict_users_train[idx])].index) + Z_post_adv = pd.DataFrame(adv_pred.cpu().numpy(), columns=Z_test.columns) + + clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc = _performance_text(test_y, test_z, y_post_clf, Z_post_adv, epoch=None) + return clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc + + lambdas = torch.Tensor([30.0, 30.0]) + net_local_list = [] + + print ('\n\n======================== STARTING LOCAL TRAINING ========================\n\n\n') + + for idx in range(args.num_users): + print ('\n======================== LOCAL TRAINING, USER %d ========================\n\n\n' %idx) + train_data_i_raw = [torch.FloatTensor(bb[list(dict_users_train[idx])]) for bb in train_data] + train_data_i = TensorDataset(train_data_i_raw[0],train_data_i_raw[1],train_data_i_raw[2]) + train_loader_i = torch.utils.data.DataLoader(train_data_i, batch_size=batch_size, shuffle=False, num_workers=4) + + test_data_i_raw = [torch.FloatTensor(bb[list(dict_users_train[idx])]) for bb in test_data] + test_data_i = TensorDataset(test_data_i_raw[0],test_data_i_raw[1],test_data_i_raw[2]) + test_loader_i = torch.utils.data.DataLoader(test_data_i, batch_size=len(test_data_i), shuffle=False, num_workers=4) + + local_clf_i = LocalClassifier(n_features=n_features).to(args.device) + local_clf_criterion_i = nn.BCELoss().to(args.device) + local_clf_optimizer_i = optim.SGD(local_clf_i.parameters(), lr=0.1) + + adv_i = Adversary(Z_train.shape[1]).to(args.device) + adv_criterion_i = nn.BCELoss(reduce=False).to(args.device) + adv_optimizer_i = optim.SGD(adv_i.parameters(), lr=0.1) + + net_local_list.append([train_loader_i,test_loader_i,local_clf_i,local_clf_optimizer_i,local_clf_criterion_i,adv_i,adv_criterion_i,adv_optimizer_i]) + + N_CLF_EPOCHS = 10 + for epoch in range(N_CLF_EPOCHS): + print ('======================== pretrain_classifier epoch %d ========================' %epoch) + local_clf = pretrain_classifier(local_clf_i, train_loader_i, local_clf_optimizer_i, local_clf_criterion_i) + # test classifier + # print ('\npretrained test accuracy on income prediction', test_classifier(local_clf_i, test_loader)) + # print () + print ('======================== local classifier pretraining: evaluating _performance_text on device %d ========================' %idx) + eval_performance_text(test_loader_i, local_clf_i, adv_i) + + N_ADV_EPOCHS = 10 + for epoch in range(N_ADV_EPOCHS): + print ('======================== pretrain_adversary epoch %d ========================' %epoch) + pretrain_adversary(adv_i, local_clf_i, train_loader_i, adv_optimizer_i, adv_criterion_i) + + # test adversary + # print ('\npretrained adversary accuracy on race, sex prediction', test_adversary(adv_i, local_clf_i, test_loader)) + # print () + print ('======================== local adversary pretraining: evaluating _performance_text on device %d ========================' %idx) + eval_performance_text(test_loader_i, local_clf_i, adv_i) + print ('======================== by now both the local classifier and the local adversary should do well ========================') + + # train both + N_EPOCH_COMBINED = 0 #250 + for epoch in range(N_EPOCH_COMBINED): + print ('======================== combined training epoch %d ========================' %epoch) + clf, adv = train_both(local_clf_i, adv_i, train_loader_i, local_clf_criterion_i, adv_criterion_i, + local_clf_optimizer_i, adv_optimizer_i, lambdas) + # test classifier + #print ('final test accuracy on income prediction', test_classifier(clf, test_loader)) + # test adversary + #print ('final adversary accuracy on race, sex prediction', test_adversary(adv, clf, test_loader)) + + print ('======================== local classifier and adversary pretraining: evaluating _performance_text on device %d ========================' %idx) + eval_performance_text(test_loader_i, local_clf_i, adv_i) + + print ('======================== by now the local classifier should do well but the local adversary should not do well ========================') + + print ('======================== done pretraining local classifiers and adversaries ========================') + + class GlobalClassifier(nn.Module): + def __init__(self, n_hidden=32, p_dropout=0.2): + super(GlobalClassifier, self).__init__() + self.global_network = nn.Sequential( + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, n_hidden), + nn.ReLU(), + nn.Dropout(p_dropout), + nn.Linear(n_hidden, 1), + ) + + def forward(self, local): + final = torch.sigmoid(self.global_network(local)) + return final + + # build global model + global_clf = GlobalClassifier().to(args.device) + global_clf_criterion = nn.BCELoss().to(args.device) + global_clf_optimizer = optim.Adam(global_clf.parameters(), lr=0.01) + + # copy weights + w_glob = global_clf.state_dict() + + print ('\n\n======================== STARTING GLOBAL TRAINING ========================\n\n\n') + + global_epochs = 10 + for iter in range(global_epochs): + w_locals, loss_locals = [], [] + for idx in range(args.num_users): + print ('\n\n======================== GLOBAL TRAINING, ITERATION %d, USER %d ========================\n\n\n' %(iter,idx)) + train_loader_i,test_loader_i,local_clf_i,local_clf_optimizer_i,local_clf_criterion_i,adv_i,adv_criterion_i,adv_optimizer_i = net_local_list[idx] + # train both local models: classifier and adversary + if iter % 2 == 0: + N_EPOCH_COMBINED = 0 #65 + for epoch in range(N_EPOCH_COMBINED): + print ('======================== combined training epoch %d ========================' %epoch) + local_clf_i, adv_i = train_both(local_clf_i, adv_i, train_loader_i, local_clf_criterion_i, adv_criterion_i, + local_clf_optimizer_i, adv_optimizer_i, lambdas) + + local = LocalUpdate(args=args, dataset=train_loader_i) + w, loss = local.train(local_net=local_clf_i, local_opt=local_clf_optimizer_i, local_adv=adv_i, adv_opt=adv_optimizer_i, global_net=copy.deepcopy(global_clf).to(args.device), global_opt=global_clf_optimizer) + + w_locals.append(copy.deepcopy(w)) + loss_locals.append(copy.deepcopy(loss)) + + w_glob = FedAvg(w_locals) + # copy weight to net_glob + global_clf.load_state_dict(w_glob) + + + for idx in range(args.num_users): + train_loader_i,test_loader_i,local_clf_i,local_clf_optimizer_i,local_clf_criterion_i,adv_i,adv_criterion_i,adv_optimizer_i = net_local_list[idx] + + print ('======================== local and global training: evaluating _performance_text on device %d ========================' %idx) + eval_performance_text(test_loader_i, local_clf_i, adv_i) + print ('======================== by now the local classifier should do well but the local adversary should not do well ========================') + + print ('======================== local and global training: evaluating _global_performance_text on device %d ========================' %idx) + clf_roc_auc,clf_accuracy,adv_acc1,adv_acc2,adv_roc_auc = eval_global_performance_text(test_loader_i, local_clf_i, adv_i, global_clf) + print ('======================== by now the global classifier should work better than local classifier ========================') + + clf_all1.append(clf_roc_auc) + clf_all2.append(clf_accuracy) + adv_all1.append(adv_acc1) + adv_all2.append(adv_acc2) + adv_all3.append(adv_roc_auc) + print ('clf_all1', np.mean(np.array(clf_all1)), np.std(np.array(clf_all1))) + print ('clf_all2', np.mean(np.array(clf_all2)), np.std(np.array(clf_all2))) + print ('adv_all1', np.mean(np.array(adv_all1)), np.std(np.array(adv_all1))) + print ('adv_all2', np.mean(np.array(adv_all2)), np.std(np.array(adv_all2))) + print ('adv_all3', np.mean(np.array(adv_all3)), np.std(np.array(adv_all3))) + return clf_all1, clf_all2, adv_all1, adv_all2, adv_all3 + + +if __name__ == '__main__': + clf_all1, clf_all2, adv_all1, adv_all2, adv_all3 = [], [], [], [], [] + for _ in range(10): + clf_all1, clf_all2, adv_all1, adv_all2, adv_all3 = run_all(clf_all1, clf_all2, adv_all1, adv_all2, adv_all3) + print ('final') + print ('clf_all1', np.mean(np.array(clf_all1)), np.std(np.array(clf_all1))) + print ('clf_all2', np.mean(np.array(clf_all2)), np.std(np.array(clf_all2))) + print ('adv_all1', np.mean(np.array(adv_all1)), np.std(np.array(adv_all1))) + print ('adv_all2', np.mean(np.array(adv_all2)), np.std(np.array(adv_all2))) + print ('adv_all3', np.mean(np.array(adv_all3)), np.std(np.array(adv_all3))) + + + + + diff --git a/main_fed.py b/main_fed.py new file mode 100755 index 0000000..1902540 --- /dev/null +++ b/main_fed.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import copy +import numpy as np +from torchvision import datasets, transforms, models +import torch + +from utils.sampling import mnist_iid, mnist_noniid, cifar10_iid, cifar10_noniid +from utils.options import args_parser +from models.Update import LocalUpdate +from models.Nets import MLP, CNNMnist, CNNCifar, ResnetCifar +from models.Fed import FedAvg +from models.test import test_img + +import pdb + +if __name__ == '__main__': + # parse args + args = args_parser() + args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') + + trans_mnist = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + + if args.model == 'resnet': + trans_cifar_train = transforms.Compose([transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.Resize([256,256]), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + trans_cifar_val = transforms.Compose([transforms.Resize([256,256]), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + else: + trans_cifar_train = transforms.Compose([transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + trans_cifar_val = transforms.Compose([transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + + # load dataset and split users + if args.dataset == 'mnist': + dataset_train = datasets.MNIST('data/mnist/', train=True, download=True, transform=trans_mnist) + dataset_test = datasets.MNIST('data/mnist/', train=False, download=True, transform=trans_mnist) + # sample users + if args.iid: + dict_users = mnist_iid(dataset_train, args.num_users) + else: + dict_users, _ = mnist_noniid(dataset_train, args.num_users) + elif args.dataset == 'cifar10': + dataset_train = datasets.CIFAR10('data/cifar10', train=True, download=True, transform=trans_cifar_train) + dataset_test = datasets.CIFAR10('data/cifar10', train=False, download=True, transform=trans_cifar_val) + if args.iid: + dict_users = cifar10_iid(dataset_train, args.num_users) + else: + dict_users, _ = cifar10_noniid(dataset_train, args.num_users) + # exit('Error: only consider IID setting in CIFAR10') + elif args.dataset == 'cifar100': + dataset_train = datasets.CIFAR100('data/cifar100', train=True, download=True, transform=trans_cifar_train) + dataset_test = datasets.CIFAR100('data/cifar100', train=False, download=True, transform=trans_cifar_val) + if args.iid: + dict_users = cifar10_iid(dataset_train, args.num_users) + else: + exit('Error: only consider IID setting in CIFAR10') + else: + exit('Error: unrecognized dataset') + img_size = dataset_train[0][0].shape + + # build model + if args.model == 'cnn' and args.dataset in ['cifar10', 'cifar100']: + net_glob = CNNCifar(args=args).to(args.device) + elif args.model == 'cnn' and args.dataset == 'mnist': + net_glob = CNNMnist(args=args).to(args.device) + elif args.model == 'resnet' and args.dataset in ['cifar10', 'cifar100']: + net_glob = ResnetCifar(args=args).to(args.device) + elif args.model == 'mlp': + len_in = 1 + for x in img_size: + len_in *= x + net_glob = MLP(dim_in=len_in, dim_hidden=256, dim_out=args.num_classes).to(args.device) + elif args.model == 'mlp_orig': + len_in = 1 + for x in img_size: + len_in *= x + net_glob = MLP(dim_in=len_in, dim_hidden=256, dim_out=args.num_classes).to(args.device) + else: + exit('Error: unrecognized model') + print(net_glob) + net_glob.train() + + # training + loss_train = [] + cv_loss, cv_acc = [], [] + val_loss_pre, counter = 0, 0 + net_best = None + best_loss = None + val_acc_list, net_list = [], [] + + lr = args.lr + results = [] + + for iter in range(args.epochs): + w_glob = None + loss_locals, grads_local = [], [] + m = max(int(args.frac * args.num_users), 1) + idxs_users = np.random.choice(range(args.num_users), m, replace=False) + print("Round {}, lr: {:.6f}, {}".format(iter, lr, idxs_users)) + + for idx in idxs_users: + local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) + net_local = copy.deepcopy(net_glob) + + w_local, loss = local.train(net=net_local.to(args.device)) + loss_locals.append(copy.deepcopy(loss)) + + if not args.grad_norm: + grads = 1.0 + else: + grads = [] + for grad in [param.grad for param in net_local.parameters()]: + if grad is not None: + grads.append(grad.view(-1)) + grads = torch.cat(grads).norm().item() + # print(grads) + grads_local.append(grads) + + if w_glob is None: + w_glob = copy.deepcopy(w_local) + for k in w_glob.keys(): + w_glob[k] *= grads + else: + for k in w_glob.keys(): + w_glob[k] += w_local[k] * grads + + lr *= args.lr_decay + + # update global weights + for k in w_glob.keys(): + w_glob[k] = torch.div(w_glob[k], sum(grads_local)) + + # copy weight to net_glob + net_glob.load_state_dict(w_glob) + + # print loss + loss_avg = sum(loss_locals) / len(loss_locals) + loss_train.append(loss_avg) + + if (iter + 1) % 1 == 0: + net_glob.eval() + acc_test, loss_test = test_img(net_glob, dataset_test, args) + print('Round {:3d}, Average loss {:.3f}, Test loss {:.3f}, Test accuracy: {:.2f}'.format( + iter, loss_avg, loss_test, acc_test)) + + results.append(np.array([iter, loss_avg, loss_test, acc_test])) + final_results = np.array(results) + + results_save_path = './log/fed_{}_{}_iid{}_num{}_C{}_le{}_gn{}.npy'.format( + args.dataset, args.model, args.iid, args.num_users, args.frac, args.local_ep, args.grad_norm) + np.save(results_save_path, final_results) + + model_save_path = './save/fed_{}_{}_iid{}_num{}_C{}_le{}_gn{}.pt'.format( + args.dataset, args.model, args.iid, args.num_users, args.frac, args.local_ep, args.grad_norm) + if best_loss is None or loss_test < best_loss: + best_loss = loss_test + torch.save(net_glob.state_dict(), model_save_path) + + # plot loss curve + plt.figure() + plt.plot(range(len(loss_train)), loss_train) + plt.ylabel('train_loss') + plt.savefig('./log/fed_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, args.iid)) + + # testing + net_glob.eval() + acc_train, loss_train = test_img(net_glob, dataset_train, args) + acc_test, loss_test = test_img(net_glob, dataset_test, args) + print("Training accuracy: {:.2f}".format(acc_train)) + print("Testing accuracy: {:.2f}".format(acc_test)) \ No newline at end of file diff --git a/main_lg.py b/main_lg.py new file mode 100755 index 0000000..702f0aa --- /dev/null +++ b/main_lg.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import copy +import os +import itertools +import numpy as np +from scipy.stats import mode +from torchvision import datasets, transforms, models +import torch +from torch import nn + +from utils.sampling import mnist_iid, mnist_noniid, cifar10_iid, cifar10_noniid +from utils.options import args_parser +from models.Update import LocalUpdate +from models.Nets import MLP, CNNMnist, CNNCifar, ResnetCifar +from models.Fed import FedAvg +from models.test import test_img, test_img_local + +import pdb + +if __name__ == '__main__': + # parse args + args = args_parser() + args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') + + trans_mnist = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + if args.model == 'resnet': + trans_cifar_train = transforms.Compose([transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.Resize([256,256]), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + trans_cifar_val = transforms.Compose([transforms.Resize([256,256]), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + else: + trans_cifar_train = transforms.Compose([transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + trans_cifar_val = transforms.Compose([transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])]) + + # load dataset and split users + if args.dataset == 'mnist': + dataset_train = datasets.MNIST('data/mnist/', train=True, download=True, transform=trans_mnist) + dataset_test = datasets.MNIST('data/mnist/', train=False, download=True, transform=trans_mnist) + # sample users + if args.iid: + dict_users_train = mnist_iid(dataset_train, args.num_users) + dict_users_test = mnist_iid(dataset_test, args.num_users) + else: + dict_users_train, rand_set_all = mnist_noniid(dataset_train, args.num_users, num_shards=200, num_imgs=300, train=True) + dict_users_test, _ = mnist_noniid(dataset_test, args.num_users, num_shards=200, num_imgs=50, train=False, rand_set_all=rand_set_all) + + elif args.dataset == 'cifar10': + dataset_train = datasets.CIFAR10('data/cifar10', train=True, download=True, transform=trans_cifar_train) + dataset_test = datasets.CIFAR10('data/cifar10', train=False, download=True, transform=trans_cifar_val) + if args.iid: + dict_users_train = cifar10_iid(dataset_train, args.num_users) + dict_users_test = cifar10_iid(dataset_test, args.num_users) + else: + dict_users_train, rand_set_all = cifar10_noniid(dataset_train, args.num_users, num_shards=200, num_imgs=250, train=True) + dict_users_test, _ = cifar10_noniid(dataset_test, args.num_users, num_shards=200, num_imgs=50, train=False, rand_set_all=rand_set_all) + + elif args.dataset == 'cifar100': + dataset_train = datasets.CIFAR100('data/cifar100', train=True, download=True, transform=trans_cifar_train) + dataset_test = datasets.CIFAR100('data/cifar100', train=False, download=True, transform=trans_cifar_val) + if args.iid: + dict_users_train = cifar10_iid(dataset_train, args.num_users) + else: + exit('Error: only consider IID setting in CIFAR10') + else: + exit('Error: unrecognized dataset') + + import pdb; pdb.set_trace() + + img_size = dataset_train[0][0].shape + + # build model + if args.model == 'cnn' and args.dataset in ['cifar10', 'cifar100']: + net_glob = CNNCifar(args=args).to(args.device) + elif args.model == 'cnn' and args.dataset == 'mnist': + net_glob = CNNMnist(args=args).to(args.device) + elif args.model == 'resnet' and args.dataset in ['cifar10', 'cifar100']: + net_glob = ResnetCifar(args=args).to(args.device) + elif args.model == 'mlp': + len_in = 1 + for x in img_size: + len_in *= x + net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) + else: + exit('Error: unrecognized model') + + print(net_glob) + net_glob.train() + if args.load_fed: + fed_model_path = './save/keep/fed_{}_{}_iid{}_num{}_C{}_le{}_gn{}.npy'.format( + args.dataset, args.model, args.iid, args.num_users, args.frac, args.local_ep, args.grad_norm) + if len(args.load_fed_name) > 0: + fed_model_path = './save/keep/{}'.format(args.load_fed_name) + net_glob.load_state_dict(torch.load(fed_model_path)) + + total_num_layers = len(net_glob.weight_keys) + w_glob_keys = net_glob.weight_keys[total_num_layers - args.num_layers_keep:] + w_glob_keys = list(itertools.chain.from_iterable(w_glob_keys)) + + num_param_glob = 0 + num_param_local = 0 + for key in net_glob.state_dict().keys(): + num_param_local += net_glob.state_dict()[key].numel() + if key in w_glob_keys: + num_param_glob += net_glob.state_dict()[key].numel() + percentage_param = 100 * float(num_param_glob) / num_param_local + print('# Params: {} (local), {} (global); Percentage {:.2f} ({}/{})'.format( + num_param_local, num_param_glob, percentage_param, num_param_glob, num_param_local)) + # generate list of local models for each user + net_local_list = [] + for user_ix in range(args.num_users): + net_local_list.append(copy.deepcopy(net_glob)) + + criterion = nn.CrossEntropyLoss() + + + def test_img_ensemble_all(): + probs_all = [] + preds_all = [] + for idx in range(args.num_users): + net_local = net_local_list[idx] + net_local.eval() + _, _, probs = test_img(net_local, dataset_test, args, return_probs=True, user_idx=idx) + probs_all.append(probs.detach()) + + preds = probs.data.max(1, keepdim=True)[1].cpu().numpy().reshape(-1) + preds_all.append(preds) + + labels = np.array(dataset_test.test_labels) + preds_probs = torch.mean(torch.stack(probs_all), dim=0) + + # ensemble metrics + preds_avg = preds_probs.data.max(1, keepdim=True)[1].cpu().numpy().reshape(-1) + loss_test = criterion(preds_probs, torch.tensor(labels).cuda()).item() + acc_test = (preds_avg == labels).mean() * 100 + + return loss_test, acc_test + + def test_img_local_all(): + acc_test_local = 0 + loss_test_local = 0 + for idx in range(args.num_users): + net_local = net_local_list[idx] + net_local.eval() + a, b = test_img_local(net_local, dataset_test, args, user_idx=idx, idxs=dict_users_test[idx]) + + acc_test_local += a + loss_test_local += b + acc_test_local /= args.num_users + loss_test_local /= args.num_users + + return acc_test_local, loss_test_local + + def test_img_avg_all(): + net_glob_temp = copy.deepcopy(net_glob) + w_keys_epoch = net_glob.state_dict().keys() + w_glob_temp = {} + for idx in range(args.num_users): + net_local = net_local_list[idx] + w_local = net_local.state_dict() + + if len(w_glob_temp) == 0: + w_glob_temp = copy.deepcopy(w_local) + else: + for k in w_keys_epoch: + w_glob_temp[k] += w_local[k] + + for k in w_keys_epoch: + w_glob_temp[k] = torch.div(w_glob_temp[k], args.num_users) + net_glob_temp.load_state_dict(w_glob_temp) + acc_test_avg, loss_test_avg = test_img(net_glob_temp, dataset_test, args) + + return acc_test_avg, loss_test_avg + + if args.local_ep_pretrain > 0: + # pretrain each local model + pretrain_save_path = 'pretrain/{}/{}_{}/user_{}/ep_{}/'.format(args.model, args.dataset, + 'iid' if args.iid else 'noniid', args.num_users, + args.local_ep_pretrain) + if not os.path.exists(pretrain_save_path): + os.makedirs(pretrain_save_path) + + print("\nPretraining local models...") + for idx in range(args.num_users): + net_local = net_local_list[idx] + net_local_path = os.path.join(pretrain_save_path, '{}.pt'.format(idx)) + if os.path.exists(net_local_path): # check if we have a saved model + net_local.load_state_dict(torch.load(net_local_path)) + else: + local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_train[idx], pretrain=True) + w_local, loss = local.train(net=net_local.to(args.device)) + print('Local model {}, Train Epoch Loss: {:.4f}'.format(idx, loss)) + torch.save(net_local.state_dict(), net_local_path) + + print("Getting initial loss and acc...") + acc_test_local, loss_test_local = test_img_local_all() + acc_test_avg, loss_test_avg = test_img_avg_all() + loss_test, acc_test = test_img_ensemble_all() + + print('Initial Ensemble: Loss (local): {:.3f}, Acc (local): {:.2f}, Loss (avg): {:.3}, Acc (avg): {:.2f}, Loss (ens) {:.3f}, Acc: (ens) {:.2f}, '.format( + loss_test_local, acc_test_local, loss_test_avg, acc_test_avg, loss_test, acc_test)) + + # training + loss_train = [] + cv_loss, cv_acc = [], [] + val_loss_pre, counter = 0, 0 + net_best = None + best_loss = None + val_acc_list, net_list = [], [] + + lr = args.lr + results = [] + + for iter in range(args.epochs): + w_glob = {} + loss_locals, grads_local = [], [] + m = max(int(args.frac * args.num_users), 1) + idxs_users = np.random.choice(range(args.num_users), m, replace=False) + # w_keys_epoch = net_glob.state_dict().keys() if (iter + 1) % 25 == 0 else w_glob_keys + w_keys_epoch = w_glob_keys + + if args.verbose: + print("Round {}: lr: {:.6f}, {}".format(iter, lr, idxs_users)) + for idx in idxs_users: + local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_train[idx]) + net_local = net_local_list[idx] + + w_local, loss = local.train(net=net_local.to(args.device), lr=lr) + loss_locals.append(copy.deepcopy(loss)) + + modules_glob = set([x.split('.')[0] for x in w_keys_epoch]) + modules_all = net_local.__dict__['_modules'] + + # use grads to calculate a weighted average + if not args.grad_norm: + grads = 1.0 + else: + grads = [] + for key in modules_glob: + module = modules_all[key] + grad = module.weight.grad + if grad is not None: + grads.append(grad.view(-1)) + + try: + grad = module.bias.grad + if grad is not None: + grads.append(grad.view(-1)) + except: + pass + grads = torch.cat(grads).norm().item() + # print(grads) + grads_local.append(grads) + + # sum up weights + if len(w_glob) == 0: + w_glob = copy.deepcopy(net_glob.state_dict()) + for k in w_keys_epoch: # this depends on the layers being named the same (in Nets.py) + w_glob[k] = w_local[k] * grads + else: + for k in w_keys_epoch: + w_glob[k] += w_local[k] * grads + + if (iter+1) % int(args.num_users * args.frac): + lr *= args.lr_decay + + # get weighted average for global weights + for k in w_keys_epoch: + w_glob[k] = torch.div(w_glob[k], sum(grads_local)) + + # copy weight to the global model (not really necessary) + net_glob.load_state_dict(w_glob) + + # copy weights to each local model + for idx in range(args.num_users): + net_local = net_local_list[idx] + w_local = net_local.state_dict() + for k in w_keys_epoch: + w_local[k] = w_glob[k] + + net_local.load_state_dict(w_local) + + loss_avg = sum(loss_locals) / len(loss_locals) + loss_train.append(loss_avg) + + # eval + acc_test_local, loss_test_local = test_img_local_all() + acc_test_avg, loss_test_avg = test_img_avg_all() + + if (iter + 1) % args.test_freq == 0: # this takes too much time, so we run it less frequently + loss_test, acc_test = test_img_ensemble_all() + print('Round {:3d}, Avg Loss {:.3f}, Loss (local): {:.3f}, Acc (local): {:.2f}, Loss (avg): {:.3}, Acc (avg): {:.2f}, Loss (ens) {:.3f}, Acc: (ens) {:.2f}, '.format( + iter, loss_avg, loss_test_local, acc_test_local, loss_test_avg, acc_test_avg, loss_test, acc_test)) + results.append(np.array([iter, loss_avg, loss_test_local, acc_test_local, loss_test_avg, acc_test_avg, loss_test, acc_test])) + + else: + print('Round {:3d}, Avg Loss {:.3f}, Loss (local): {:.3f}, Acc (local): {:.2f}, Loss (avg): {:.3}, Acc (avg): {:.2f}'.format( + iter, loss_avg, loss_test_local, acc_test_local, loss_test_avg, acc_test_avg)) + results.append(np.array([iter, loss_avg, loss_test_local, acc_test_local, loss_test_avg, acc_test_avg, np.nan, np.nan])) + + final_results = np.array(results) + results_save_path = './log/lg_{}_{}_keep{}_iid{}_num{}_C{}_le{}_gn{}_pt{}_load{}_tfreq{}.npy'.format( + args.dataset, args.model, args.num_layers_keep, args.iid, args.num_users, args.frac, + args.local_ep, args.grad_norm, args.local_ep_pretrain, args.load_fed, args.test_freq) + np.save(results_save_path, final_results) + + # plot loss curve + plt.figure() + plt.plot(range(len(loss_train)), loss_train) + plt.ylabel('train_loss') + plt.savefig('./log/fed_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, args.iid)) + + # testing + net_glob.eval() + acc_train, loss_train = test_img(net_glob, dataset_train, args) + acc_test, loss_test = test_img(net_glob, dataset_test, args) + print("Training accuracy: {:.2f}".format(acc_train)) + print("Testing accuracy: {:.2f}".format(acc_test)) \ No newline at end of file diff --git a/main_nn.py b/main_nn.py new file mode 100755 index 0000000..7e23435 --- /dev/null +++ b/main_nn.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +import torch +import torch.nn.functional as F +from torch.utils.data import DataLoader +import torch.optim as optim +from torchvision import datasets, transforms + +from utils.options import args_parser +from models.Nets import MLP, CNNMnist, CNNCifar + + +def test(net_g, data_loader): + # testing + net_g.eval() + test_loss = 0 + correct = 0 + l = len(data_loader) + for idx, (data, target) in enumerate(data_loader): + data, target = data.to(args.device), target.to(args.device) + log_probs = net_g(data) + test_loss += F.cross_entropy(log_probs, target).item() + y_pred = log_probs.data.max(1, keepdim=True)[1] + correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum() + + test_loss /= len(data_loader.dataset) + test_acc = 100. * float(correct) / len(data_loader.dataset) + if args.verbose: + print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( + test_loss, correct, len(data_loader.dataset), test_acc)) + + return test_acc, test_loss + + +if __name__ == '__main__': + # parse args + args = args_parser() + args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') + + torch.manual_seed(args.seed) + + # load dataset and split users + if args.dataset == 'mnist': + dataset_train = datasets.MNIST('./data/mnist/', train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + img_size = dataset_train[0][0].shape + elif args.dataset == 'cifar': + transform = transforms.Compose( + [transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + dataset_train = datasets.CIFAR10('./data/cifar', train=True, transform=transform, target_transform=None, download=True) + img_size = dataset_train[0][0].shape + else: + exit('Error: unrecognized dataset') + + # testing + if args.dataset == 'mnist': + dataset_test = datasets.MNIST('./data/mnist/', train=False, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + test_loader = DataLoader(dataset_test, batch_size=1000, shuffle=False) + elif args.dataset == 'cifar': + transform = transforms.Compose( + [transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + dataset_test = datasets.CIFAR10('./data/cifar', train=False, transform=transform, target_transform=None, download=True) + test_loader = DataLoader(dataset_test, batch_size=1000, shuffle=False) + else: + exit('Error: unrecognized dataset') + + # build model + if args.model == 'cnn' and args.dataset == 'cifar': + net_glob = CNNCifar(args=args).to(args.device) + elif args.model == 'cnn' and args.dataset == 'mnist': + net_glob = CNNMnist(args=args).to(args.device) + elif args.model == 'mlp': + len_in = 1 + for x in img_size: + len_in *= x + net_glob = MLP(dim_in=len_in, dim_hidden=256, dim_out=args.num_classes).to(args.device) + else: + exit('Error: unrecognized model') + print(net_glob) + + # training + optimizer = optim.SGD(net_glob.parameters(), lr=args.lr, momentum=args.momentum) + train_loader = DataLoader(dataset_train, batch_size=64, shuffle=True) + + list_loss = [] + net_glob.train() + for epoch in range(args.epochs): + batch_loss = [] + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(args.device), target.to(args.device) + optimizer.zero_grad() + output = net_glob(data) + loss = F.cross_entropy(output, target) + loss.backward() + optimizer.step() + if args.verbose and batch_idx % 50 == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_loader.dataset), + 100. * batch_idx / len(train_loader), loss.item())) + batch_loss.append(loss.item()) + loss_avg = sum(batch_loss)/len(batch_loss) + list_loss.append(loss_avg) + test_acc, test_loss = test(net_glob, test_loader) + print('Train Epoch: {}, Train loss: {}, Test loss: {}, Test acc: {}'.format( + epoch, loss_avg, test_loss, test_acc)) + + # plot loss + plt.figure() + plt.plot(range(len(list_loss)), list_loss) + plt.xlabel('epochs') + plt.ylabel('train loss') + plt.savefig('./log/nn_{}_{}_{}.png'.format(args.dataset, args.model, args.epochs)) + + + print('test on', len(dataset_test), 'samples') + test_acc, test_loss = test(net_glob, test_loader) diff --git a/models/Fed.py b/models/Fed.py new file mode 100755 index 0000000..29a03d1 --- /dev/null +++ b/models/Fed.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import copy +import torch +from torch import nn + + +def FedAvg(w): + w_avg = copy.deepcopy(w[0]) + for k in w_avg.keys(): + for i in range(1, len(w)): + w_avg[k] += w[i][k] + w_avg[k] = torch.div(w_avg[k], len(w)) + return w_avg diff --git a/models/Nets.py b/models/Nets.py new file mode 100755 index 0000000..7b6b2b5 --- /dev/null +++ b/models/Nets.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import torch +from torch import nn +import torch.nn.functional as F +from torchvision import models + + +class MLP(nn.Module): + def __init__(self, dim_in, dim_hidden, dim_out): + super(MLP, self).__init__() + self.layer_input = nn.Linear(dim_in, 512) + self.relu = nn.ReLU() + self.dropout = nn.Dropout() + self.layer_hidden1 = nn.Linear(512, 256) + self.layer_hidden2 = nn.Linear(256, 256) + self.layer_hidden3 = nn.Linear(256, 128) + self.layer_out = nn.Linear(128, dim_out) + self.softmax = nn.Softmax(dim=1) + self.weight_keys = [['layer_input.weight', 'layer_input.bias'], + ['layer_hidden1.weight', 'layer_hidden1.bias'], + ['layer_hidden2.weight', 'layer_hidden2.bias'], + ['layer_hidden3.weight', 'layer_hidden3.bias'], + ['layer_out.weight', 'layer_out.bias'] + ] + + def forward(self, x): + x = x.view(-1, x.shape[1]*x.shape[-2]*x.shape[-1]) + x = self.layer_input(x) + x = self.relu(x) + + x = self.layer_hidden1(x) + x = self.relu(x) + + x = self.layer_hidden2(x) + x = self.relu(x) + + x = self.layer_hidden3(x) + x = self.relu(x) + + x = self.layer_out(x) + return self.softmax(x) + + +class CNNMnist(nn.Module): + def __init__(self, args): + super(CNNMnist, self).__init__() + self.conv1 = nn.Conv2d(args.num_channels, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, args.num_classes) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3]) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x, dim=1) + + +class CNNCifar(nn.Module): + def __init__(self, args): + super(CNNCifar, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 100) + self.fc3 = nn.Linear(100, args.num_classes) + + # self.weight_keys = [['fc3.weight', 'fc3.bias'], + # ['fc2.weight', 'fc2.bias'], + # ['fc1.weight', 'fc1.bias'], + # ['conv2.weight', 'conv2.bias'], + # ['conv1.weight', 'conv1.bias'], + # ] + + # self.weight_keys = [['conv1.weight', 'conv1.bias'], + # ['conv2.weight', 'conv2.bias'], + # ['fc2.weight', 'fc2.bias'], + # ['fc3.weight', 'fc3.bias'], + # ['fc1.weight', 'fc1.bias'], + # ] + + self.weight_keys = [['fc1.weight', 'fc1.bias'], + ['fc2.weight', 'fc2.bias'], + ['fc3.weight', 'fc3.bias'], + ['conv2.weight', 'conv2.bias'], + ['conv1.weight', 'conv1.bias'], + ] + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return F.log_softmax(x, dim=1) + + +class ResnetCifar(nn.Module): + def __init__(self, args): + super(ResnetCifar, self).__init__() + self.extractor = models.resnet18(pretrained=False) + self.fflayer = nn.Sequential(nn.Linear(1000, args.num_classes)) + + def forward(self, x): + x = self.extractor(x) + x = self.fflayer(x) + return F.log_softmax(x, dim=1) + +class ResnetCifar(nn.Module): + def __init__(self, args): + super(ResnetCifar, self).__init__() + self.extractor = models.resnet18(pretrained=False) + self.fflayer = nn.Sequential(nn.Linear(1000, args.num_classes)) + + def forward(self, x): + x = self.extractor(x) + x = self.fflayer(x) + return F.log_softmax(x, dim=1) \ No newline at end of file diff --git a/models/Update.py b/models/Update.py new file mode 100755 index 0000000..f275497 --- /dev/null +++ b/models/Update.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import torch +from torch import nn, autograd +from torch.utils.data import DataLoader, Dataset +import numpy as np +import random +from sklearn import metrics + + +class DatasetSplit(Dataset): + def __init__(self, dataset, idxs): + self.dataset = dataset + self.idxs = list(idxs) + + def __len__(self): + return len(self.idxs) + + def __getitem__(self, item): + image, label = self.dataset[self.idxs[item]] + return image, label + +class LocalUpdate_noLG(object): + def __init__(self, args, dataset=None, pretrain=False): + self.args = args + self.global_loss_func = nn.BCELoss().to(args.device) + self.adv_loss_func = nn.BCELoss(reduce=False).to(args.device) + self.selected_clients = [] + self.ldr_train = dataset + self.pretrain = pretrain + + def train(self, global_net, adv_model, lambdas, idx=-1, lr=0.1): + global_net.train() + adv_model.train() + # train and update + global_optimizer = torch.optim.SGD(global_net.parameters(), lr=lr, momentum=0.5) + adv_optimizer = torch.optim.SGD(adv_model.parameters(), lr=lr, momentum=0.5) + + global_epoch_loss = [] + adv_epoch_loss = [] + if self.pretrain: + local_eps = self.args.local_ep_pretrain + else: + local_eps = self.args.local_ep + for iter in range(local_eps): + global_batch_loss = [] + adv_batch_loss = [] + for (batch_idx, data) in enumerate(self.ldr_train): + (images, labels, protected) = data + images, labels, protected = images.to(self.args.device), labels.to(self.args.device), protected.to(self.args.device) + global_net.zero_grad() + adv_model.zero_grad() + + mid, log_probs = global_net(images) + pred_proctected = adv_model(mid) + + global_loss = self.global_loss_func(log_probs, labels) + global_loss.backward(retain_graph=True) + global_optimizer.step() + + adv_loss = (self.adv_loss_func(pred_proctected, protected) * lambdas.to(self.args.device)).mean() + adv_loss.backward() + adv_optimizer.step() + + global_batch_loss.append(global_loss.item()) + adv_batch_loss.append(adv_loss.item()) + + global_epoch_loss.append(sum(global_batch_loss)/len(global_batch_loss)) + adv_epoch_loss.append(sum(adv_batch_loss)/len(adv_batch_loss)) + + w = global_net.state_dict() + w_loss = sum(global_epoch_loss) / len(global_epoch_loss) + adv = adv_model.state_dict() + adv_loss = sum(adv_epoch_loss) / len(adv_epoch_loss) + + return w, w_loss, adv, adv_loss + + +class LocalUpdate(object): + def __init__(self, args, dataset=None, pretrain=False): + self.args = args + self.loss_func = nn.BCELoss().to(args.device) + self.adv_criterion = nn.BCELoss(reduce=False).to(args.device) + self.selected_clients = [] + self.ldr_train = dataset + self.pretrain = pretrain + self.lambdas = torch.Tensor([0.03, 0.03]) + + def train(self, local_net, local_opt, local_adv, adv_opt, global_net, global_opt, idx=-1, lr=0.1): + global_net.train() + local_net.train() + local_adv.train() + # train and update + # optimizer = torch.optim.SGD(global_net.parameters(), lr=lr, momentum=0.5) + + epoch_loss = [] + if self.pretrain: + local_eps = self.args.local_ep_pretrain + else: + local_eps = self.args.local_ep + for iter in range(local_eps): + batch_loss = [] + for (batch_idx, data) in enumerate(self.ldr_train): + (images, labels, protected) = data + images, labels = images.to(self.args.device), labels.to(self.args.device) + local_net.zero_grad() + global_net.zero_grad() + mid, _ = local_net(images) + log_protected = local_adv(mid) + log_probs = global_net(mid) + # import pdb + # pdb.set_trace() + loss = self.loss_func(log_probs, labels) + adv_loss = (self.adv_criterion(log_protected.to(self.args.device), protected.to(self.args.device)) * self.lambdas.to(self.args.device)).mean() + if self.args.adv: + loss = loss - adv_loss + loss.backward(retain_graph=True) + adv_loss.backward() + local_opt.step() + global_opt.step() + adv_opt.step() + + batch_loss.append(loss.item()) + + if not self.pretrain and self.args.verbose and batch_idx % 300 == 0: + if idx < 0: + print('Update Epoch: {} [{}/{} ({:.0f}%)], Epoch Loss: {:.4f}, Batch Loss: {:.4f}'.format( + iter, batch_idx * len(images), len(self.ldr_train.dataset), 100. * batch_idx / len(self.ldr_train), + sum(batch_loss)/len(batch_loss), loss.item())) + else: + print('Local model {}, Update Epoch: {} [{}/{} ({:.0f}%)], Epoch Loss: {:.4f}, Batch Loss: {:.4f}'.format( + idx, iter, batch_idx * len(images), len(self.ldr_train.dataset), 100. * batch_idx / len(self.ldr_train), + sum(batch_loss)/len(batch_loss), loss.item())) + + epoch_loss.append(sum(batch_loss)/len(batch_loss)) + + return global_net.state_dict(), sum(epoch_loss) / len(epoch_loss) + diff --git a/models/__init__.py b/models/__init__.py new file mode 100755 index 0000000..3918c7d --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @python: 3.6 + diff --git a/models/test.py b/models/test.py new file mode 100755 index 0000000..660544e --- /dev/null +++ b/models/test.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @python: 3.6 + +import torch +from torch import nn +import torch.nn.functional as F +from torch.utils.data import DataLoader, Dataset + +class DatasetSplit(Dataset): + def __init__(self, dataset, idxs): + self.dataset = dataset + self.idxs = list(idxs) + + def __len__(self): + return len(self.idxs) + + def __getitem__(self, item): + image, label = self.dataset[self.idxs[item]] + return image, label + + +def test_img(net_g, datatest, args, return_probs=False, user_idx=-1): + net_g.eval() + # testing + test_loss = 0 + correct = 0 + data_loader = DataLoader(datatest, batch_size=args.bs) + l = len(data_loader) + + probs = [] + + for idx, (data, target) in enumerate(data_loader): + if args.gpu != -1: + data, target = data.cuda(), target.cuda() + log_probs = net_g(data) + probs.append(log_probs) + + # sum up batch loss + test_loss += F.cross_entropy(log_probs, target, reduction='sum').item() + # get the index of the max log-probability + y_pred = log_probs.data.max(1, keepdim=True)[1] + correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum() + + test_loss /= len(data_loader.dataset) + accuracy = 100.00 * float(correct) / len(data_loader.dataset) + if args.verbose: + if user_idx < 0: + print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( + test_loss, correct, len(data_loader.dataset), accuracy)) + else: + print('Local model {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( + user_idx, test_loss, correct, len(data_loader.dataset), accuracy)) + + if return_probs: + return accuracy, test_loss, torch.cat(probs) + else: + return accuracy, test_loss + + +def test_img_local(net_g, dataset, args, user_idx=-1, idxs=None): + net_g.eval() + # testing + test_loss = 0 + correct = 0 + # data_loader = DataLoader(dataset, batch_size=args.bs) + data_loader = DataLoader(DatasetSplit(dataset, idxs), batch_size=args.bs, shuffle=True) + l = len(data_loader) + + for idx, (data, target) in enumerate(data_loader): + if args.gpu != -1: + data, target = data.cuda(), target.cuda() + log_probs = net_g(data) + + # sum up batch loss + test_loss += F.cross_entropy(log_probs, target, reduction='sum').item() + # get the index of the max log-probability + y_pred = log_probs.data.max(1, keepdim=True)[1] + correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum() + + test_loss /= len(data_loader.dataset) + accuracy = 100.00 * float(correct) / len(data_loader.dataset) + if args.verbose: + print('Local model {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format( + user_idx, test_loss, correct, len(data_loader.dataset), accuracy)) + + return accuracy, test_loss diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..bb0425d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +torch==0.4.1 +torchvision==0.2.1 + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100755 index 0000000..3918c7d --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @python: 3.6 + diff --git a/utils/options.py b/utils/options.py new file mode 100755 index 0000000..11859cb --- /dev/null +++ b/utils/options.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + +import argparse + +def args_parser(): + parser = argparse.ArgumentParser() + # federated arguments + parser.add_argument('--epochs', type=int, default=10, help="rounds of training") + parser.add_argument('--adv', type=int, default=1, help="whether to use adversarial training") + parser.add_argument('--num_users', type=int, default=2, help="number of users: K") + parser.add_argument('--frac', type=float, default=0.1, help="the fraction of clients: C") + parser.add_argument('--local_ep', type=int, default=5, help="the number of local epochs: E") + parser.add_argument('--local_bs', type=int, default=10, help="local batch size: B") + parser.add_argument('--bs', type=int, default=128, help="test batch size") + parser.add_argument('--lr', type=float, default=0.01, help="learning rate") + parser.add_argument('--momentum', type=float, default=0.5, help="SGD momentum (default: 0.5)") + parser.add_argument('--split', type=str, default='user', help="train-test split type, user or sample") + parser.add_argument('--grad_norm', action='store_true', help='use_gradnorm_avging') + parser.add_argument('--local_ep_pretrain', type=int, default=0, help="the number of pretrain local ep") + parser.add_argument('--lr_decay', type=float, default=1.0, help="learning rate decay per round") + + # model arguments + parser.add_argument('--model', type=str, default='mlp', help='model name') + parser.add_argument('--kernel_num', type=int, default=9, help='number of each kind of kernel') + parser.add_argument('--kernel_sizes', type=str, default='3,4,5', + help='comma-separated kernel size to use for convolution') + parser.add_argument('--norm', type=str, default='batch_norm', help="batch_norm, layer_norm, or None") + parser.add_argument('--num_filters', type=int, default=32, help="number of filters for conv nets") + parser.add_argument('--max_pool', type=str, default='True', + help="Whether use max pooling rather than strided convolutions") + parser.add_argument('--num_layers_keep', type=int, default=1, help='number layers to keep') + + # other arguments + parser.add_argument('--dataset', type=str, default='mnist', help="name of dataset") + parser.add_argument('--iid', type=int, default=0, help='whether i.i.d or not') + parser.add_argument('--num_classes', type=int, default=10, help="number of classes") + parser.add_argument('--num_channels', type=int, default=3, help="number of channels of imges") + parser.add_argument('--gpu', type=int, default=0, help="GPU ID, -1 for CPU") + parser.add_argument('--stopping_rounds', type=int, default=10, help='rounds of early stopping') + parser.add_argument('--verbose', action='store_true', help='verbose print') + parser.add_argument('--print_freq', type=int, default=100, help="print loss frequency during training") + parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') + parser.add_argument('--test_freq', type=int, default=1, help='how often to test on val set') + parser.add_argument('--load_fed', action='store_true', help='load pretrained federated model for local_global') + parser.add_argument('--load_fed_name', type=str, default='', help='define pretrained federated model path') + + args = parser.parse_args() + return args diff --git a/utils/sampling.py b/utils/sampling.py new file mode 100755 index 0000000..67f50f8 --- /dev/null +++ b/utils/sampling.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Python version: 3.6 + + +import numpy as np +from torchvision import datasets, transforms + +def fair_iid(dataset, num_users): + """ + Sample I.I.D. client data from fairness dataset + :param dataset: + :param num_users: + :return: dict of image index + """ + num_items = int(len(dataset)/num_users) + dict_users, all_idxs = {}, [i for i in range(len(dataset))] + for i in range(num_users): + dict_users[i] = set(np.random.choice(all_idxs, num_items, replace=False)) + all_idxs = list(set(all_idxs) - dict_users[i]) + return dict_users + +def fair_noniid(train_data, num_users, num_shards=200, num_imgs=300, train=True, rand_set_all=[]): + """ + Sample non-I.I.D client data from fairness dataset + :param dataset: + :param num_users: + :return: + """ + assert num_shards % num_users == 0 + shard_per_user = int(num_shards / num_users) + + idx_shard = [i for i in range(num_shards)] + dict_users = {i: np.array([], dtype='int64') for i in range(num_users)} + idxs = np.arange(num_shards*num_imgs) + + #import pdb; pdb.set_trace() + + labels = train_data[1].numpy().reshape(len(train_data[0]),) + assert num_shards * num_imgs == len(labels) + #import pdb; pdb.set_trace() + + # sort labels + idxs_labels = np.vstack((idxs, labels)) + idxs_labels = idxs_labels[:,idxs_labels[1,:].argsort()] + idxs = idxs_labels[0,:] + + # divide and assign + if len(rand_set_all) == 0: + for i in range(num_users): + rand_set = set(np.random.choice(idx_shard, shard_per_user, replace=False)) + for rand in rand_set: + rand_set_all.append(rand) + + idx_shard = list(set(idx_shard) - rand_set) # remove shards from possible choices for other users + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + + else: # this only works if the train and test set have the same distribution of labels + for i in range(num_users): + rand_set = rand_set_all[i*shard_per_user: (i+1)*shard_per_user] + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + + return dict_users, rand_set_all + +def mnist_iid(dataset, num_users): + """ + Sample I.I.D. client data from MNIST dataset + :param dataset: + :param num_users: + :return: dict of image index + """ + num_items = int(len(dataset)/num_users) + dict_users, all_idxs = {}, [i for i in range(len(dataset))] + for i in range(num_users): + dict_users[i] = set(np.random.choice(all_idxs, num_items, replace=False)) + all_idxs = list(set(all_idxs) - dict_users[i]) + return dict_users + + +def mnist_noniid(dataset, num_users, num_shards=200, num_imgs=300, train=True, rand_set_all=[]): + """ + Sample non-I.I.D client data from MNIST dataset + :param dataset: + :param num_users: + :return: + """ + + assert num_shards % num_users == 0 + shard_per_user = int(num_shards / num_users) + + idx_shard = [i for i in range(num_shards)] + dict_users = {i: np.array([], dtype='int64') for i in range(num_users)} + idxs = np.arange(num_shards*num_imgs) + if train: + labels = dataset.train_labels.numpy() + else: + labels = dataset.test_labels.numpy() + + assert num_shards * num_imgs == len(labels) + + # sort labels + idxs_labels = np.vstack((idxs, labels)) + idxs_labels = idxs_labels[:,idxs_labels[1,:].argsort()] + idxs = idxs_labels[0,:] + + # divide and assign + if len(rand_set_all) == 0: + for i in range(num_users): + rand_set = set(np.random.choice(idx_shard, shard_per_user, replace=False)) + for rand in rand_set: + rand_set_all.append(rand) + + idx_shard = list(set(idx_shard) - rand_set) # remove shards from possible choices for other users + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + + else: # this only works if the train and test set have the same distribution of labels + for i in range(num_users): + rand_set = rand_set_all[i*shard_per_user: (i+1)*shard_per_user] + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + + return dict_users, rand_set_all + + +def cifar10_iid(dataset, num_users): + """ + Sample I.I.D. client data from CIFAR10 dataset + :param dataset: + :param num_users: + :return: dict of image index + """ + num_items = int(len(dataset)/num_users) + dict_users, all_idxs = {}, [i for i in range(len(dataset))] + for i in range(num_users): + dict_users[i] = set(np.random.choice(all_idxs, num_items, replace=False)) + all_idxs = list(set(all_idxs) - dict_users[i]) + return dict_users + +''' +def cifar10_noniid(dataset, num_users): + """ + Sample non-I.I.D client data from MNIST dataset + :param dataset: + :param num_users: + :return: + """ + num_shards, num_imgs = 200, 250 + idx_shard = [i for i in range(num_shards)] + dict_users = {i: np.array([], dtype='int64') for i in range(num_users)} + idxs = np.arange(num_shards*num_imgs) + labels = np.array(dataset.train_labels) + + # sort labels + idxs_labels = np.vstack((idxs, labels)) + idxs_labels = idxs_labels[:,idxs_labels[1,:].argsort()] + idxs = idxs_labels[0,:] + + # divide and assign + for i in range(num_users): + rand_set = set(np.random.choice(idx_shard, 2, replace=False)) + idx_shard = list(set(idx_shard) - rand_set) + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + return dict_users +''' + +def cifar10_noniid(dataset, num_users, num_shards=200, num_imgs=250, train=True, rand_set_all=[]): + """ + Sample non-I.I.D client data from MNIST dataset + :param dataset: + :param num_users: + :return: + """ + + assert num_shards % num_users == 0 + shard_per_user = int(num_shards / num_users) + + idx_shard = [i for i in range(num_shards)] + dict_users = {i: np.array([], dtype='int64') for i in range(num_users)} + idxs = np.arange(num_shards*num_imgs) + if train: + labels = np.array(dataset.train_labels) + else: + labels = np.array(dataset.test_labels) + + assert num_shards * num_imgs == len(labels) + + # sort labels + idxs_labels = np.vstack((idxs, labels)) + idxs_labels = idxs_labels[:,idxs_labels[1,:].argsort()] + idxs = idxs_labels[0,:] + + # divide and assign + if len(rand_set_all) == 0: + for i in range(num_users): + rand_set = set(np.random.choice(idx_shard, shard_per_user, replace=False)) + for rand in rand_set: + rand_set_all.append(rand) + + idx_shard = list(set(idx_shard) - rand_set) # remove shards from possible choices for other users + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + + else: # this only works if the train and test set have the same distribution of labels + for i in range(num_users): + rand_set = rand_set_all[i*shard_per_user: (i+1)*shard_per_user] + for rand in rand_set: + dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0) + + return dict_users, rand_set_all + + +if __name__ == '__main__': + dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)) + ])) + num = 100 + d = mnist_noniid(dataset_train, num)