In [ ]:
from sbrain.learning.experiment import *
from sbrain.dataset.dataset import *
In [ ]:
import os
os.listdir('/home/jovyan/shared-dir/data')
In [ ]:
def input_fn(mode, batch_size, params):
    """
    mode: {'train', 'eval'}
    :param mode:
    :param params:
    :return:
    """
    import torch
    import torch.utils.data as data
    from torchvision import datasets, transforms
    from PIL import Image

    class TorchDataset(data.Dataset):
        """Must implement methods __getitem__, __len__ and get_batch_size. Following implementation are examples on how to implement them."""
        def __init__(self, data_file, batch_size, data_transforms = None, target_transforms = None, ):
            """Assumes data_file was saved using `torch.save`"""
            self.data_file = data_file
            self.batch_size = batch_size
            self.data_transforms = data_transforms if data_transforms else lambda x: x
            self.target_transforms = target_transforms if target_transforms else lambda x: x

            if not os.path.exists(self.data_file):
                raise NameError(data_file + ' does not exist')
            self.data, self.labels = torch.load(self.data_file)


        def __getitem__(self, index):
            img, target = self.data[index], self.labels[index]

            # doing this so that it is consistent with all other datasets
            # to return a PIL Image
            img = Image.fromarray(img.numpy(), mode='L')

            return self.data_transforms(img), self.target_transforms(target)


        def __len__(self):
            return len(self.data)


        def get_batch_size(self):
            return self.batch_size

    data_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    data_dir = '/workspace/shared-dir/data'

    if mode == 'train':
        return TorchDataset(os.path.join(data_dir, 'training.pt'),
                            batch_size = 64,
                            data_transforms = data_transforms)
    else:
        return TorchDataset(os.path.join(data_dir, 'test.pt'),
                            batch_size = 1000,
                            data_transforms = data_transforms)



def model_fn(params, cuda = True):
    """
    :param params: Dictionary. Any hyper params that user may want to supply. For example, any permutation of a Neural Network Architechture.
    :return: model, loss function, optimizer
    """
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
            self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
            self.conv2_drop = nn.Dropout2d()
            self.fc1 = nn.Linear(320, 50)
            self.fc2 = nn.Linear(50, 10)

        def forward(self, x):
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 320)
            x = F.relu(self.fc1(x))
            x = F.dropout(x, training=self.training)
            x = self.fc2(x)
            return F.log_softmax(x)

    model = Net()
    if cuda:
        # Move model to GPU.
        model.cuda()

    # Horovod: scale learning rate by the number of GPUs.
    optimizer = optim.SGD(model.parameters(), lr = params['lr'],
                          momentum = params['momentum'])

    def loss_fn(output, target):
        return F.nll_loss(output, target)

    return model, optimizer, loss_fn


def train_step(model, optimizer, loss_fn, X, y):
    optimizer.zero_grad()  # clear gradient buffers
    y_prime = model(X)
    loss = loss_fn(y_prime, y)
    loss.backward()  # calculate all gradients d(loss)/d(parameter) \forall parameter \in Parameters
    optimizer.step()  # update \forall parameter \in Parameters as parameter <- parameter - lr * d(loss)/d(parameter)
    return loss


def test_step(model, X, y):
    import torch.nn.functional as F
    y_prime = model(X)
    loss = F.nll_loss(y_prime, y, size_average=False)
    # get the index of the max log-probability --> this is most likely class
    y_pred = y_prime.data.max(1, keepdim=True)[1]
    accuracy = y_pred.eq(y.data.view_as(y_pred)).cpu().float().sum()
    return loss, accuracy
In [ ]:
import sbrain
sbrain._debug = True
In [ ]:
estimator = Estimator.NewClassificationEstimator(pytorch_model_fn=model_fn)
# name = "MyFirstEstimator"
name = "MyFirstEstimator" + str(time.time()).replace(".", "")
estimator = Estimator.create(name, "Hello", estimator)


hyper_parameters = HParams(iterations=8000, batch_size=10, lr=0.01, momentum=0.5)
rc = RunConfig(no_of_ps=0, no_of_workers=2, summary_save_frequency=1000, run_eval=False, use_gpu=False, sync=True,
              benchmark_command="{\"cmd\":\"\"}")


# job = Experiment.run(experiment_name="exp with space" + str(time.time()).replace(".", ""),
exper = Experiment.run(experiment_name="MyFirstModel" + str(time.time()).replace(".", ""),
                         description="Really first model",
                         estimator=estimator,
                         hyper_parameters=hyper_parameters,
                         run_config=rc,
                         dataset_version_split=None,
                         input_function=input_fn,
                      user_functions = {'train_step':train_step, 'test_step':test_step})
job = exper.get_single_job()
print(job.__dict__)
In [ ]:
print(job.has_finished())
In [ ]:
# job.wait_until_finish()
import time
time.sleep(40)
job.cancel()
In [ ]:
job.has_finished()
In [ ]:
time.sleep(10)
exper.report_status()
In [ ]: