I tried to run all the computations on gpu, but the increase in the speed wasn't as big as i expected, also nvidia-smi gives this: nvidia-smi output, while program's running, so does pytorch "see" my gpu or not. If not, how can i fix this?
I've install CUDA toolkit v11.1 and cuDNN. Merged what was in cuDNN into CUDA toolkit directory.
# In[1]:
import torch
import torch.nn.functional as F
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from timeit import default_timer as timer
from typing import Tuple, List, Type, Dict, Any
# In[2]:
torch.manual_seed(0)
np.random.seed(0)
# In[3]:
class Perceptron(torch.nn.Module):
def __init__(self,
input_resolution: Tuple[int, int] = (28, 28),
input_channels: int = 1,
hidden_layer_features: List[int] = [256, 256, 256],
activation: Type[torch.nn.Module] = torch.nn.Tanh,
num_classes: int = 10):
super().__init__()
self.input_resolution = input_resolution
self.input_channels = input_channels
self.hidden_layer_features = hidden_layer_features
self.activation = activation()
self.num_classes = num_classes
# Layers:
self.fc1 = torch.nn.Linear(self.input_resolution[0] * self.input_resolution[1], 128)
self.fc2 = torch.nn.Linear(128, self.num_classes)
def forward(self, X : Type[torch.Tensor]) -> Type[torch.Tensor]:
X = self.fc1(X)
X = self.activation(X)
X = self.fc2(X)
output = F.softmax(X, dim = 1)
return output
# In[4]:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device('cpu')
# In[5]:
model = Perceptron().to(device)
print(model)
print('Total number of trainable parameters',
sum(p.numel() for p in model.parameters() if p.requires_grad))
# In[6]:
train_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
valid_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
# In[7]:
train_dataset = torchvision.datasets.MNIST(root='./mnist',
train=True,
download=True,
transform=train_transforms)
valid_dataset = torchvision.datasets.MNIST(root='./mnist',
train=False,
download=True,
transform=valid_transforms)
# In[8]:
indices = np.random.randint(0, len(train_dataset), size=256)
fig, axes = plt.subplots(nrows=8, ncols=8, figsize=(10, 10))
for i, row in enumerate(axes):
for j, ax in enumerate(row):
sample_index = indices[i*16+j]
sample, label = train_dataset[sample_index]
ax.imshow(sample.cpu().numpy().transpose(1, 2, 0))
ax.set_title(label)
# In[9]:
def train_single_epoch(model: torch.nn.Module,
optimizer: torch.optim.Optimizer,
loss_function: torch.nn.Module,
data_loader: torch.utils.data.DataLoader):
for data in data_loader:
X, y = data
X, y = X.to(device), y.to(device)
model.zero_grad()
output = model(X.view(-1, 784))
loss = loss_function(output, y)
loss.backward()
optimizer.step()
# In[10]:
def validate_single_epoch(model: torch.nn.Module,
loss_function: torch.nn.Module,
data_loader: torch.utils.data.DataLoader):
loss_total = 0
accuracy_total = 0
for data in data_loader:
X, y = data
X, y = X.to(device), y.to(device)
output = model(X.view(-1, 784))
loss = loss_function(output, y)
loss_total += loss
y_pred = output.argmax(dim = 1, keepdim=True).to(device)
accuracy_total += y_pred.eq(y.view_as(y_pred)).sum().item()
loss_avg = loss_total / len(data_loader.dataset)
accuracy_avg = 100.0 * accuracy_total / len(data_loader.dataset)
return {'loss' : loss_avg, 'accuracy' : accuracy_avg}
# In[11]:
def train_model(model: torch.nn.Module,
train_dataset: torch.utils.data.Dataset,
valid_dataset: torch.utils.data.Dataset,
loss_function: torch.nn.Module = torch.nn.CrossEntropyLoss(),
optimizer_class: Type[torch.optim.Optimizer] = torch.optim,
optimizer_params: Dict = {},
initial_lr = 0.01,
lr_scheduler_class: Any = torch.optim.lr_scheduler.ReduceLROnPlateau,
lr_scheduler_params: Dict = {},
batch_size = 64,
max_epochs = 1000,
early_stopping_patience = 20):
optimizer = torch.optim.Adam(model.parameters(), lr=initial_lr, **optimizer_params)
lr_scheduler = lr_scheduler_class(optimizer, **lr_scheduler_params)
train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size)
best_valid_loss = None
best_epoch = None
for epoch in range(max_epochs):
print(f'Epoch {epoch}')
start = timer()
train_single_epoch(model, optimizer, loss_function, train_loader)
valid_metrics = validate_single_epoch(model, loss_function, valid_loader)
print('time:', timer() - start)
print(f'Validation metrics: \n{valid_metrics}')
lr_scheduler.step(valid_metrics['loss'])
if best_valid_loss is None or best_valid_loss > valid_metrics['loss']:
print(f'Best model yet, saving')
best_valid_loss = valid_metrics['loss']
best_epoch = epoch
torch.save(model, './best_model.pth')
if epoch - best_epoch > early_stopping_patience:
print('Early stopping triggered')
return
# In[ ]:
train_model(model,
train_dataset=train_dataset,
valid_dataset=valid_dataset,
loss_function=torch.nn.CrossEntropyLoss(),
initial_lr=0.01, max_epochs = 500)