Save and load a state dict
# Specify a path
PATH = "state_dict_model.pt"
# Save
torch.save(net.state_dict(), PATH)
# Load
model = Net()
model.load_state_dict(torch.load(PATH))
model.eval()
Save and load entire model
# Specify a path
PATH = "entire_model.pt"
# Save
torch.save(net, PATH)
# Load
model = torch.load(PATH)
model.eval()
Save a checkpoint
EPOCH = 5
PATH = "model.pt"
LOSS = 0.4
torch.save({
'epoch': EPOCH,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': LOSS,
}, PATH)
See more https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
Load a checkpoint
model = Net()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
model.eval()
# - or -
model.train()
See more https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
Save on GPU, Load on CPU
# Specify a path to save to
PATH = "model.pt"
# Save
torch.save(net.state_dict(), PATH)
# Load
device = torch.device('cpu')
model = Net()
model.load_state_dict(torch.load(PATH, map_location=device))
Save on GPU, Load on GPU
# Save
torch.save(net.state_dict(), PATH)
# Load
device = torch.device("cuda")
model = Net()
model.load_state_dict(torch.load(PATH))
model.to(device)
Save on CPU, Load on GPU
# Save
torch.save(net.state_dict(), PATH)
# Load
device = torch.device("cuda")
model = Net()
# Choose whatever GPU device number you want
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
# Make sure to call input = input.to(device) on any input tensors that you feed to the model
model.to(device)
Saving torch.nn.DataParallel Models
# Save
torch.save(net.module.state_dict(), PATH)
# Load to whatever device you want