Package gnnnas
Expand source code
from gnnnas.data import DataLoader
from gnnnas.models import MPNN
from gnnnas.util import Util
__all__ = ["DataLoader", "MPNN", "Util"]
Sub-modules
gnnnas.data
gnnnas.models
gnnnas.util
Classes
class DataLoader
-
Data Loader class
Expand source code
class DataLoader: """Data Loader class""" @staticmethod def load_molnet_phys_chem_data( name: str, batch_size: int, training_split: float, seed: int = 2022 ): """ Load the specific Graph dataset from MoleculeNet Parameters ---------- arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns ------- Pytorch Geometric Dataset(s) torch_geometric.datasets.molecule_net.MoleculeNet """ # load the dataset dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name) # set the seed torch.manual_seed(seed) # shuffle the data dataset = dataset.shuffle() # set an stop index for gathering train data stop_index = int(np.floor(training_split * dataset.len())) # separate training data train_dataset = dataset[0:stop_index] # separate test data test_dataset = dataset[stop_index:] # create dataloaders for train and test samples train_loader = pyg.loader.DataLoader( train_dataset, batch_size=batch_size, shuffle=True ) test_loader = pyg.loader.DataLoader( test_dataset, batch_size=batch_size, shuffle=False ) return dataset, train_loader, test_loader
Static methods
def load_molnet_phys_chem_data(name: str, batch_size: int, training_split: float, seed: int = 2022)
-
Load the specific Graph dataset from MoleculeNet Parameters
arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns
Pytorch Geometric Dataset(s)
- torch_geometric.datasets.molecule_net.MoleculeNet
Expand source code
@staticmethod def load_molnet_phys_chem_data( name: str, batch_size: int, training_split: float, seed: int = 2022 ): """ Load the specific Graph dataset from MoleculeNet Parameters ---------- arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns ------- Pytorch Geometric Dataset(s) torch_geometric.datasets.molecule_net.MoleculeNet """ # load the dataset dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name) # set the seed torch.manual_seed(seed) # shuffle the data dataset = dataset.shuffle() # set an stop index for gathering train data stop_index = int(np.floor(training_split * dataset.len())) # separate training data train_dataset = dataset[0:stop_index] # separate test data test_dataset = dataset[stop_index:] # create dataloaders for train and test samples train_loader = pyg.loader.DataLoader( train_dataset, batch_size=batch_size, shuffle=True ) test_loader = pyg.loader.DataLoader( test_dataset, batch_size=batch_size, shuffle=False ) return dataset, train_loader, test_loader
class MPNN (n_node_features: int, n_edge_features: int, n_hidden: int, n_output: int, MPNN_inp: torch.nn.modules.module.Module, MPNN_hidden: torch.nn.modules.module.Module, n_conv_blocks: int, skip_connection: str = 'plain')
-
Creates an MPNN model in pytorch geometric
Build the MPNN model Parameters
arg1 | n_node_features: int Number of features at node level arg2 | n_edge_features: int Number of features at edge level arg3 | n_hidden: int Number of hidden activations arg4 | n_output: int Number of output activations arg5 | n_conv_blocks: int Number of convolutional kernels Returns
Nothing
- None
Expand source code
class MPNN(torch.nn.Module): """Creates an MPNN model in pytorch geometric""" def __init__( self, n_node_features: int, n_edge_features: int, n_hidden: int, n_output: int, MPNN_inp: torch.nn.Module, MPNN_hidden: torch.nn.Module, n_conv_blocks: int, skip_connection: str = "plain", ) -> None: """ Build the MPNN model Parameters ---------- arg1 | n_node_features: int Number of features at node level arg2 | n_edge_features: int Number of features at edge level arg3 | n_hidden: int Number of hidden activations arg4 | n_output: int Number of output activations arg5 | n_conv_blocks: int Number of convolutional kernels Returns ------- Nothing None """ # super class the class structure super().__init__() # set the growth dimension self.growth_dimension = n_hidden # encode the node information self.node_encoder = MPNN_inp(n_node_features, n_hidden) # add the ability to add one or more conv layers conv_blocks = [] # ability to add one or more conv blocks for block in range(n_conv_blocks): if skip_connection == "dense": self.growth_dimension = n_hidden + (n_hidden * block) conv = MPNN_hidden(self.growth_dimension, n_hidden) norm = torch.nn.LayerNorm(n_hidden, elementwise_affine=True) act = torch.nn.ReLU(inplace=True) layer = pyg.nn.DeepGCNLayer(conv, norm, act, block=skip_connection) conv_blocks.append(layer) # group all the conv layers self.conv_layers = torch.nn.ModuleList(conv_blocks) # add the linear layers for flattening the output from MPNN self.flatten = torch.nn.Sequential( torch.nn.Linear(self.growth_dimension, n_hidden), torch.nn.ReLU(), torch.nn.Linear(n_hidden, n_output), ) def forward( self, x: torch.Tensor, edge_index: torch.Tensor, batch_idx: torch.Tensor ) -> torch.Tensor: """ Process the MPNN model Parameters ---------- arg1 | x: torch.Tensor Input features at node level arg2 | edge_index: torch.Tensor Index pairs of verticies arg3 | batch_idx: torch.Tensor Batch index Returns ------- Tensor torch.Tensor """ # obtaint the input if isinstance(self.node_encoder, pyg.nn.MessagePassing): x = self.node_encoder(x, edge_index) else: x = self.node_encoder(x) # pass the node information to the conv layer x = self.conv_layers[0].conv(x, edge_index) # process the layers for layer in range(len(self.conv_layers[1:])): x = self.conv_layers[layer](x, edge_index) # obtain the output from the MPNN final layer y = pyg.nn.global_add_pool(x, batch=batch_idx) # pass the output to the linear output layer out = self.flatten(y) # return the output return out
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, x: torch.Tensor, edge_index: torch.Tensor, batch_idx: torch.Tensor) ‑> torch.Tensor
-
Process the MPNN model Parameters
arg1 | x: torch.Tensor Input features at node level arg2 | edge_index: torch.Tensor Index pairs of verticies arg3 | batch_idx: torch.Tensor Batch index Returns
Tensor
- torch.Tensor
Expand source code
def forward( self, x: torch.Tensor, edge_index: torch.Tensor, batch_idx: torch.Tensor ) -> torch.Tensor: """ Process the MPNN model Parameters ---------- arg1 | x: torch.Tensor Input features at node level arg2 | edge_index: torch.Tensor Index pairs of verticies arg3 | batch_idx: torch.Tensor Batch index Returns ------- Tensor torch.Tensor """ # obtaint the input if isinstance(self.node_encoder, pyg.nn.MessagePassing): x = self.node_encoder(x, edge_index) else: x = self.node_encoder(x) # pass the node information to the conv layer x = self.conv_layers[0].conv(x, edge_index) # process the layers for layer in range(len(self.conv_layers[1:])): x = self.conv_layers[layer](x, edge_index) # obtain the output from the MPNN final layer y = pyg.nn.global_add_pool(x, batch=batch_idx) # pass the output to the linear output layer out = self.flatten(y) # return the output return out
class Util
-
Utility methods
Expand source code
class Util: """Utility methods""" # define the helper method to train def model_train( model: torch.nn.Module, dataloader: pyg.loader.DataLoader, learning_rate: float, ITERATIONS: int, optimizer: torch.optim.Optimizer, compute_loss: Callable, logging=False, ) -> torch.nn.Module: """ Train the Pytorch Geometric model and return the model Parameters ---------- arg1 | model: torch.nn.Module Trained Neural network model arg2 | dataloader: DataLoader Dataset as a DataLoader object arg3 | model: float Trained Neural network model arg4 | dataloader: int Dataset as a DataLoader object arg5 | model: torch.optim.Optimizer Trained Neural network model arg6 | dataloader: DataLoader Dataset as a DataLoader object Returns ------- Pytorch model torch.nn.Module """ for iteration in range(ITERATIONS): # set the model for training model.train() # iterate in batches over the training dataset for data in dataloader: # set the gradients to zero optimizer.zero_grad() # forward pass and compute the y hat values y_hat = model( data.x.float().to(DEVICE), data.edge_index.long().to(DEVICE), data.batch.long().to(DEVICE), ) # compute the mean squared error loss cost = compute_loss(y_hat, data.y.to(DEVICE)) # compute mse loss again for the backward pass cost.backward() # update the weights optimizer.step() # display the stats if logging: print(f"Epoch: {iteration:03d}, Loss: {cost:.4f}") # return the tuple [Ground truth, Predictions] return model # define the helper method to evaluate def model_evaluate( model: torch.nn.Module, test_loader: pyg.loader.DataLoader, ) -> (torch.Tensor, torch.Tensor): """ Evaluate the Pytorch model and return ground truth along with predictions Parameters ---------- arg1 | model: torch.nn.Module Trained Neural network model arg2 | test_loader: DataLoader Dataset as a DataLoader object """ # init an empty list to capture y hats y_preds = [] # init an empty list to capture ground truth y_true = [] # set the model to evaluate model.eval() # Iterate in batches over the test dataset. for data in test_loader: # store the ground truth y_true.append(data.y) # gather the model prediction out = model( data.x.float().to(DEVICE), data.edge_index.long().to(DEVICE), data.batch.long().to(DEVICE), ) # store the model predictions y_preds.append(torch.flatten(out, start_dim=1)) # concat the predictions obtained in batches y_preds = torch.cat(y_preds) # concat the ground truth obtained in batches y_true = torch.cat(y_true) # return the tuple [Ground truth, Predictions] return (y_true, y_preds) # define the helper method to obtain evaluation metrics def regression_evaluation_metrics( y_true: torch.Tensor, y_preds: torch.Tensor, metric: str ) -> None: """ Print the Pytorch model metrics based on the ground truth vs predictions Parameters ---------- arg1 | y_true: torch.Tensor Ground truth values of the data arg2 | y_preds: torch.Tensor Model Predictions for the input data """ # init an empty dict to store results results = defaultdict(dict) # store y_preds and y_true as numpy arrays y_true = y_true.detach().numpy() y_preds = y_preds.cpu().detach().numpy() # MSE results["mse"] = mean_squared_error(y_true, y_preds) # MAE results["mae"] = mean_absolute_error(y_true, y_preds) # RMSE results["rmse"] = mean_squared_error(y_true, y_preds, squared=False) # R2 results["r2"] = r2_score(y_true, y_preds) # return appropriate metric(s) if metric == "all": return results if metric in results.keys(): return results[metric] else: return defaultdict(dict)
Methods
def model_evaluate(model: torch.nn.modules.module.Module, test_loader: torch_geometric.loader.dataloader.DataLoader) ‑> (
, ) -
Evaluate the Pytorch model and return ground truth along with predictions Parameters
arg1 | model: torch.nn.Module Trained Neural network model arg2 | test_loader: DataLoader Dataset as a DataLoader object
Expand source code
def model_evaluate( model: torch.nn.Module, test_loader: pyg.loader.DataLoader, ) -> (torch.Tensor, torch.Tensor): """ Evaluate the Pytorch model and return ground truth along with predictions Parameters ---------- arg1 | model: torch.nn.Module Trained Neural network model arg2 | test_loader: DataLoader Dataset as a DataLoader object """ # init an empty list to capture y hats y_preds = [] # init an empty list to capture ground truth y_true = [] # set the model to evaluate model.eval() # Iterate in batches over the test dataset. for data in test_loader: # store the ground truth y_true.append(data.y) # gather the model prediction out = model( data.x.float().to(DEVICE), data.edge_index.long().to(DEVICE), data.batch.long().to(DEVICE), ) # store the model predictions y_preds.append(torch.flatten(out, start_dim=1)) # concat the predictions obtained in batches y_preds = torch.cat(y_preds) # concat the ground truth obtained in batches y_true = torch.cat(y_true) # return the tuple [Ground truth, Predictions] return (y_true, y_preds)
def model_train(model: torch.nn.modules.module.Module, dataloader: torch_geometric.loader.dataloader.DataLoader, learning_rate: float, ITERATIONS: int, optimizer: torch.optim.optimizer.Optimizer, compute_loss: Callable, logging=False) ‑> torch.nn.modules.module.Module
-
Train the Pytorch Geometric model and return the model Parameters
arg1 | model: torch.nn.Module Trained Neural network model arg2 | dataloader: DataLoader Dataset as a DataLoader object arg3 | model: float Trained Neural network model arg4 | dataloader: int Dataset as a DataLoader object arg5 | model: torch.optim.Optimizer Trained Neural network model arg6 | dataloader: DataLoader Dataset as a DataLoader object Returns
Pytorch model
- torch.nn.Module
Expand source code
def model_train( model: torch.nn.Module, dataloader: pyg.loader.DataLoader, learning_rate: float, ITERATIONS: int, optimizer: torch.optim.Optimizer, compute_loss: Callable, logging=False, ) -> torch.nn.Module: """ Train the Pytorch Geometric model and return the model Parameters ---------- arg1 | model: torch.nn.Module Trained Neural network model arg2 | dataloader: DataLoader Dataset as a DataLoader object arg3 | model: float Trained Neural network model arg4 | dataloader: int Dataset as a DataLoader object arg5 | model: torch.optim.Optimizer Trained Neural network model arg6 | dataloader: DataLoader Dataset as a DataLoader object Returns ------- Pytorch model torch.nn.Module """ for iteration in range(ITERATIONS): # set the model for training model.train() # iterate in batches over the training dataset for data in dataloader: # set the gradients to zero optimizer.zero_grad() # forward pass and compute the y hat values y_hat = model( data.x.float().to(DEVICE), data.edge_index.long().to(DEVICE), data.batch.long().to(DEVICE), ) # compute the mean squared error loss cost = compute_loss(y_hat, data.y.to(DEVICE)) # compute mse loss again for the backward pass cost.backward() # update the weights optimizer.step() # display the stats if logging: print(f"Epoch: {iteration:03d}, Loss: {cost:.4f}") # return the tuple [Ground truth, Predictions] return model
def regression_evaluation_metrics(y_true: torch.Tensor, y_preds: torch.Tensor, metric: str) ‑> None
-
Print the Pytorch model metrics based on the ground truth vs predictions Parameters
arg1 | y_true: torch.Tensor Ground truth values of the data arg2 | y_preds: torch.Tensor Model Predictions for the input data
Expand source code
def regression_evaluation_metrics( y_true: torch.Tensor, y_preds: torch.Tensor, metric: str ) -> None: """ Print the Pytorch model metrics based on the ground truth vs predictions Parameters ---------- arg1 | y_true: torch.Tensor Ground truth values of the data arg2 | y_preds: torch.Tensor Model Predictions for the input data """ # init an empty dict to store results results = defaultdict(dict) # store y_preds and y_true as numpy arrays y_true = y_true.detach().numpy() y_preds = y_preds.cpu().detach().numpy() # MSE results["mse"] = mean_squared_error(y_true, y_preds) # MAE results["mae"] = mean_absolute_error(y_true, y_preds) # RMSE results["rmse"] = mean_squared_error(y_true, y_preds, squared=False) # R2 results["r2"] = r2_score(y_true, y_preds) # return appropriate metric(s) if metric == "all": return results if metric in results.keys(): return results[metric] else: return defaultdict(dict)