def input_fn(mode, batch_size, params):
"""
mode: {'train', 'eval'}
:param mode:
:param params:
:return:
"""
import torch
import torch.utils.data as data
from torchvision import datasets, transforms
from PIL import Image
class TorchDataset(data.Dataset):
"""Must implement methods __getitem__, __len__ and get_batch_size. Following implementation are examples on how to implement them."""
def __init__(self, data_file, batch_size, data_transforms = None, target_transforms = None, ):
"""Assumes data_file was saved using `torch.save`"""
self.data_file = data_file
self.batch_size = batch_size
self.data_transforms = data_transforms if data_transforms else lambda x: x
self.target_transforms = target_transforms if target_transforms else lambda x: x
if not os.path.exists(self.data_file):
raise NameError(data_file + ' does not exist')
self.data, self.labels = torch.load(self.data_file)
def __getitem__(self, index):
img, target = self.data[index], self.labels[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = Image.fromarray(img.numpy(), mode='L')
return self.data_transforms(img), self.target_transforms(target)
def __len__(self):
return len(self.data)
def get_batch_size(self):
return self.batch_size
data_transforms = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
data_dir = '/workspace/shared-dir/data'
if mode == 'train':
return TorchDataset(os.path.join(data_dir, 'training.pt'),
batch_size = 64,
data_transforms = data_transforms)
else:
return TorchDataset(os.path.join(data_dir, 'test.pt'),
batch_size = 1000,
data_transforms = data_transforms)
def model_fn(params, cuda = True):
"""
:param params: Dictionary. Any hyper params that user may want to supply. For example, any permutation of a Neural Network Architechture.
:return: model, loss function, optimizer
"""
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
model = Net()
if cuda:
# Move model to GPU.
model.cuda()
# Horovod: scale learning rate by the number of GPUs.
optimizer = optim.SGD(model.parameters(), lr = params['lr'],
momentum = params['momentum'])
def loss_fn(output, target):
return F.nll_loss(output, target)
return model, optimizer, loss_fn
def train_step(model, optimizer, loss_fn, X, y):
optimizer.zero_grad() # clear gradient buffers
y_prime = model(X)
loss = loss_fn(y_prime, y)
loss.backward() # calculate all gradients d(loss)/d(parameter) \forall parameter \in Parameters
optimizer.step() # update \forall parameter \in Parameters as parameter <- parameter - lr * d(loss)/d(parameter)
return loss
def test_step(model, X, y):
import torch.nn.functional as F
y_prime = model(X)
loss = F.nll_loss(y_prime, y, size_average=False)
# get the index of the max log-probability --> this is most likely class
y_pred = y_prime.data.max(1, keepdim=True)[1]
accuracy = y_pred.eq(y.data.view_as(y_pred)).cpu().float().sum()
return loss, accuracy