diff --git a/datasets/mnist.py b/datasets/mnist.py index 4d243c0..03e64ca 100644 --- a/datasets/mnist.py +++ b/datasets/mnist.py @@ -8,14 +8,15 @@ import os def get_mnist(dataset_root, batch_size, train): """Get MNIST datasets loader.""" # image pre-processing - pre_process = transforms.Compose([transforms.ToTensor(), + pre_process = transforms.Compose([transforms.Resize(32), # different img size settings for mnist(28) and svhn(32). + transforms.ToTensor(), transforms.Normalize( mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5) )]) # datasets and data loader - mnist_dataset = datasets.MNIST(root=os.path.join(dataset_root,'mnist'), + mnist_dataset = datasets.MNIST(root=os.path.join(dataset_root), train=train, transform=pre_process, download=False) diff --git a/datasets/svhn.py b/datasets/svhn.py index 64b6e25..df7d231 100644 --- a/datasets/svhn.py +++ b/datasets/svhn.py @@ -8,7 +8,7 @@ import os def get_svhn(dataset_root, batch_size, train): """Get SVHN datasets loader.""" # image pre-processing - pre_process = transforms.Compose([transforms.Resize(28), + pre_process = transforms.Compose([transforms.Resize(32), transforms.ToTensor(), transforms.Normalize( mean=(0.5, 0.5, 0.5), @@ -17,12 +17,12 @@ def get_svhn(dataset_root, batch_size, train): # datasets and data loader if train: - svhn_dataset = datasets.SVHN(root=os.path.join(dataset_root,'svhn'), + svhn_dataset = datasets.SVHN(root=os.path.join(dataset_root), split='train', transform=pre_process, download=True) else: - svhn_dataset = datasets.SVHN(root=os.path.join(dataset_root,'svhn'), + svhn_dataset = datasets.SVHN(root=os.path.join(dataset_root), split='test', transform=pre_process, download=True) diff --git a/experiments/svhn_mnist.py b/experiments/svhn_mnist.py index 2158e6f..a545535 100644 --- a/experiments/svhn_mnist.py +++ b/experiments/svhn_mnist.py @@ -1,8 +1,9 @@ import os import sys +import datetime +from tensorboardX import SummaryWriter import torch - sys.path.append('../') from models.model import SVHNmodel from core.train import train_dann @@ -11,29 +12,32 @@ from utils.utils import get_data_loader, init_model, init_random_seed class Config(object): # params for path - dataset_root = os.path.expanduser(os.path.join('~', 'Datasets')) model_name = "svhn-mnist" + model_base = '/home/wogong/models/pytorch-dann' model_root = os.path.expanduser(os.path.join('~', 'Models', 'pytorch-DANN', model_name)) + note = 'paper-structure' + model_root = os.path.join(model_base, model_name, note + '_' + datetime.datetime.now().strftime('%m%d_%H%M%S')) + os.makedirs(model_root) + config = os.path.join(model_root, 'config.txt') + finetune_flag = False + lr_adjust_flag = 'simple' + src_only_flag = False # params for datasets and data loader batch_size = 128 # params for source dataset src_dataset = "svhn" + src_image_root = os.path.join('/home/wogong/datasets', 'svhn') src_model_trained = True src_classifier_restore = os.path.join(model_root, src_dataset + '-source-classifier-final.pt') # params for target dataset tgt_dataset = "mnist" + tgt_image_root = os.path.join('/home/wogong/datasets', 'mnist') tgt_model_trained = True dann_restore = os.path.join(model_root, src_dataset + '-' + tgt_dataset + '-dann-final.pt') - # params for pretrain - num_epochs_src = 100 - log_step_src = 10 - save_step_src = 50 - eval_step_src = 20 - # params for training dann gpu_id = '0' @@ -41,7 +45,7 @@ class Config(object): num_epochs = 200 log_step = 50 save_step = 100 - eval_step = 5 + eval_step = 1 ## for office # num_epochs = 1000 @@ -53,21 +57,22 @@ class Config(object): alpha = 0 # params for optimizing models - lr = 2e-4 + lr = 0.01 + momentum = 0.9 + weight_decay = 1e-6 params = Config() +logger = SummaryWriter(params.model_root) +device = torch.device("cuda:" + params.gpu_id if torch.cuda.is_available() else "cpu") # init random seed init_random_seed(params.manual_seed) -# init device -device = torch.device("cuda:" + params.gpu_id if torch.cuda.is_available() else "cpu") - # load dataset -src_data_loader = get_data_loader(params.src_dataset, params.dataset_root, params.batch_size, train=True) -src_data_loader_eval = get_data_loader(params.src_dataset, params.dataset_root, params.batch_size, train=False) -tgt_data_loader = get_data_loader(params.tgt_dataset, params.dataset_root, params.batch_size, train=True) -tgt_data_loader_eval = get_data_loader(params.tgt_dataset, params.dataset_root, params.batch_size, train=False) +src_data_loader = get_data_loader(params.src_dataset, params.src_image_root, params.batch_size, train=True) +src_data_loader_eval = get_data_loader(params.src_dataset, params.src_image_root, params.batch_size, train=False) +tgt_data_loader = get_data_loader(params.tgt_dataset, params.tgt_image_root, params.batch_size, train=True) +tgt_data_loader_eval = get_data_loader(params.tgt_dataset, params.tgt_image_root, params.batch_size, train=False) # load dann model dann = init_model(net=SVHNmodel(), restore=None) @@ -75,4 +80,4 @@ dann = init_model(net=SVHNmodel(), restore=None) # train dann model print("Training dann model") if not (dann.restored and params.dann_restore): - dann = train_dann(dann, params, src_data_loader, tgt_data_loader, tgt_data_loader_eval, device) + dann = train_dann(dann, params, src_data_loader, tgt_data_loader, tgt_data_loader_eval, device, logger) diff --git a/models/model.py b/models/model.py index cbf891f..ebcdab0 100644 --- a/models/model.py +++ b/models/model.py @@ -150,8 +150,6 @@ class MNISTmodel_plain(nn.Module): class SVHNmodel(nn.Module): """ SVHN architecture - I don't know how to implement the paper's structure - """ def __init__(self): @@ -159,22 +157,21 @@ class SVHNmodel(nn.Module): self.restored = False self.feature = nn.Sequential( - nn.Conv2d(in_channels=3, out_channels=64, kernel_size=( - 5, 5), stride=(1, 1)), # 3 28 28, 64 24 24 + nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(5, 5)), # 28 nn.BatchNorm2d(64), nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=(2, 2)), # 64 12 12 - nn.Conv2d(in_channels=64, out_channels=64, - kernel_size=(5, 5)), # 64 8 8 + nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)), # 14 + nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(5, 5)), # 10 nn.BatchNorm2d(64), nn.Dropout2d(), nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)), # 64 4 4 + nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)), # 5 nn.ReLU(inplace=True), + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4)), # 1 ) self.classifier = nn.Sequential( - nn.Linear(64*4*4, 1024), + nn.Linear(128 * 1 * 1, 1024), nn.BatchNorm1d(1024), nn.ReLU(inplace=True), nn.Linear(1024, 256), @@ -184,7 +181,7 @@ class SVHNmodel(nn.Module): ) self.discriminator = nn.Sequential( - nn.Linear(64*4*4, 1024), + nn.Linear(128 * 1 * 1, 1024), nn.BatchNorm1d(1024), nn.ReLU(inplace=True), nn.Linear(1024, 256), @@ -194,9 +191,9 @@ class SVHNmodel(nn.Module): ) def forward(self, input_data, alpha = 1.0): - input_data = input_data.expand(input_data.data.shape[0], 3, 28, 28) + input_data = input_data.expand(input_data.data.shape[0], 3, 32, 32) feature = self.feature(input_data) - feature = feature.view(-1, 64 * 4 * 4) + feature = feature.view(-1, 128 * 1 * 1) reverse_feature = ReverseLayerF.apply(feature, alpha) class_output = self.classifier(feature) domain_output = self.discriminator(reverse_feature)