Package `gnnnas`

Expand source code

from gnnnas.data import DataLoader
from gnnnas.models import MPNN
from gnnnas.util import Util

__all__ = ["DataLoader", "MPNN", "Util"]

Sub-modules

gnnnas.data
gnnnas.models
gnnnas.util

Classes

class DataLoader

Data Loader class

Expand source code

class DataLoader:
    """Data Loader class"""

    @staticmethod
    def load_molnet_phys_chem_data(
        name: str, batch_size: int, training_split: float, seed: int = 2022
    ):
        """
        Load the specific Graph dataset from MoleculeNet
        Parameters
        ----------
        arg1 | name: str
            Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader.
        arg2 | batch_size: int
            Batch size for creating the train/test dataloaders.
        arg3 | training_split: float
            Percentage of samples to be kept in training set.
        arg4 | seed: int
            Torch Random seed to ensure reproducibility. Default value is 2022
        Returns
        -------
        Pytorch Geometric Dataset(s)
            torch_geometric.datasets.molecule_net.MoleculeNet
        """
        # load the dataset
        dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name)

        # set the seed
        torch.manual_seed(seed)

        # shuffle the data
        dataset = dataset.shuffle()

        # set an stop index for gathering train data
        stop_index = int(np.floor(training_split * dataset.len()))

        # separate training data
        train_dataset = dataset[0:stop_index]

        # separate test data
        test_dataset = dataset[stop_index:]

        # create dataloaders for train and test samples
        train_loader = pyg.loader.DataLoader(
            train_dataset, batch_size=batch_size, shuffle=True
        )
        test_loader = pyg.loader.DataLoader(
            test_dataset, batch_size=batch_size, shuffle=False
        )

        return dataset, train_loader, test_loader

Static methods

def load_molnet_phys_chem_data(name: str, batch_size: int, training_split: float, seed: int = 2022)

Load the specific Graph dataset from MoleculeNet Parameters

arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns

Pytorch Geometric Dataset(s): torch_geometric.datasets.molecule_net.MoleculeNet

Expand source code

@staticmethod
def load_molnet_phys_chem_data(
    name: str, batch_size: int, training_split: float, seed: int = 2022
):
    """
    Load the specific Graph dataset from MoleculeNet
    Parameters
    ----------
    arg1 | name: str
        Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader.
    arg2 | batch_size: int
        Batch size for creating the train/test dataloaders.
    arg3 | training_split: float
        Percentage of samples to be kept in training set.
    arg4 | seed: int
        Torch Random seed to ensure reproducibility. Default value is 2022
    Returns
    -------
    Pytorch Geometric Dataset(s)
        torch_geometric.datasets.molecule_net.MoleculeNet
    """
    # load the dataset
    dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name)

    # set the seed
    torch.manual_seed(seed)

    # shuffle the data
    dataset = dataset.shuffle()

    # set an stop index for gathering train data
    stop_index = int(np.floor(training_split * dataset.len()))

    # separate training data
    train_dataset = dataset[0:stop_index]

    # separate test data
    test_dataset = dataset[stop_index:]

    # create dataloaders for train and test samples
    train_loader = pyg.loader.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True
    )
    test_loader = pyg.loader.DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False
    )

    return dataset, train_loader, test_loader

class MPNN (n_node_features: int, n_edge_features: int, n_hidden: int, n_output: int, MPNN_inp: torch.nn.modules.module.Module, MPNN_hidden: torch.nn.modules.module.Module, n_conv_blocks: int, skip_connection: str = 'plain')

Creates an MPNN model in pytorch geometric

Build the MPNN model Parameters

arg1 | n_node_features: int Number of features at node level arg2 | n_edge_features: int Number of features at edge level arg3 | n_hidden: int Number of hidden activations arg4 | n_output: int Number of output activations arg5 | n_conv_blocks: int Number of convolutional kernels Returns

Nothing: None

Expand source code

class MPNN(torch.nn.Module):
    """Creates an MPNN model in pytorch geometric"""

    def __init__(
        self,
        n_node_features: int,
        n_edge_features: int,
        n_hidden: int,
        n_output: int,
        MPNN_inp: torch.nn.Module,
        MPNN_hidden: torch.nn.Module,
        n_conv_blocks: int,
        skip_connection: str = "plain",
    ) -> None:
        """
        Build the MPNN model
        Parameters
        ----------
        arg1 | n_node_features: int
            Number of features at node level
        arg2 | n_edge_features: int
            Number of features at edge level
        arg3 | n_hidden: int
            Number of hidden activations
        arg4 | n_output: int
            Number of output activations
        arg5 | n_conv_blocks: int
            Number of convolutional kernels
        Returns
        -------
        Nothing
            None
        """
        # super class the class structure
        super().__init__()

        # set the growth dimension
        self.growth_dimension = n_hidden

        # encode the node information
        self.node_encoder = MPNN_inp(n_node_features, n_hidden)

        # add the ability to add one or more conv layers
        conv_blocks = []

        # ability to add one or more conv blocks
        for block in range(n_conv_blocks):
            if skip_connection == "dense":
                self.growth_dimension = n_hidden + (n_hidden * block)
            conv = MPNN_hidden(self.growth_dimension, n_hidden)
            norm = torch.nn.LayerNorm(n_hidden, elementwise_affine=True)
            act = torch.nn.ReLU(inplace=True)
            layer = pyg.nn.DeepGCNLayer(conv, norm, act, block=skip_connection)
            conv_blocks.append(layer)

        # group all the conv layers
        self.conv_layers = torch.nn.ModuleList(conv_blocks)

        # add the linear layers for flattening the output from MPNN
        self.flatten = torch.nn.Sequential(
            torch.nn.Linear(self.growth_dimension, n_hidden),
            torch.nn.ReLU(),
            torch.nn.Linear(n_hidden, n_output),
        )

    def forward(
        self, x: torch.Tensor, edge_index: torch.Tensor, batch_idx: torch.Tensor
    ) -> torch.Tensor:
        """
        Process the MPNN model
        Parameters
        ----------
        arg1 | x: torch.Tensor
            Input features at node level
        arg2 | edge_index: torch.Tensor
            Index pairs of verticies
        arg3 | batch_idx: torch.Tensor
            Batch index
        Returns
        -------
        Tensor
            torch.Tensor
        """
        # obtaint the input
        if isinstance(self.node_encoder, pyg.nn.MessagePassing):
            x = self.node_encoder(x, edge_index)
        else:
            x = self.node_encoder(x)

        # pass the node information to the conv layer
        x = self.conv_layers[0].conv(x, edge_index)

        # process the layers
        for layer in range(len(self.conv_layers[1:])):
            x = self.conv_layers[layer](x, edge_index)

        # obtain the output from the MPNN final layer
        y = pyg.nn.global_add_pool(x, batch=batch_idx)

        # pass the output to the linear output layer
        out = self.flatten(y)

        # return the output
        return out

Ancestors

torch.nn.modules.module.Module

Methods

def forward(self, x: torch.Tensor, edge_index: torch.Tensor, batch_idx: torch.Tensor) ‑> torch.Tensor

Process the MPNN model Parameters

arg1 | x: torch.Tensor Input features at node level arg2 | edge_index: torch.Tensor Index pairs of verticies arg3 | batch_idx: torch.Tensor Batch index Returns

Tensor: torch.Tensor

Expand source code

def forward(
    self, x: torch.Tensor, edge_index: torch.Tensor, batch_idx: torch.Tensor
) -> torch.Tensor:
    """
    Process the MPNN model
    Parameters
    ----------
    arg1 | x: torch.Tensor
        Input features at node level
    arg2 | edge_index: torch.Tensor
        Index pairs of verticies
    arg3 | batch_idx: torch.Tensor
        Batch index
    Returns
    -------
    Tensor
        torch.Tensor
    """
    # obtaint the input
    if isinstance(self.node_encoder, pyg.nn.MessagePassing):
        x = self.node_encoder(x, edge_index)
    else:
        x = self.node_encoder(x)

    # pass the node information to the conv layer
    x = self.conv_layers[0].conv(x, edge_index)

    # process the layers
    for layer in range(len(self.conv_layers[1:])):
        x = self.conv_layers[layer](x, edge_index)

    # obtain the output from the MPNN final layer
    y = pyg.nn.global_add_pool(x, batch=batch_idx)

    # pass the output to the linear output layer
    out = self.flatten(y)

    # return the output
    return out

class Util

Utility methods

Expand source code

class Util:
    """Utility methods"""

    # define the helper method to train
    def model_train(
        model: torch.nn.Module,
        dataloader: pyg.loader.DataLoader,
        learning_rate: float,
        ITERATIONS: int,
        optimizer: torch.optim.Optimizer,
        compute_loss: Callable,
        logging=False,
    ) -> torch.nn.Module:
        """
        Train the Pytorch Geometric model and return
        the model
        Parameters
        ----------
        arg1 | model: torch.nn.Module
            Trained Neural network model
        arg2 | dataloader: DataLoader
            Dataset as a DataLoader object
        arg3 | model: float
            Trained Neural network model
        arg4 | dataloader: int
            Dataset as a DataLoader object
        arg5 | model: torch.optim.Optimizer
            Trained Neural network model
        arg6 | dataloader: DataLoader
            Dataset as a DataLoader object
        Returns
        -------
        Pytorch model
            torch.nn.Module
        """
        for iteration in range(ITERATIONS):
            # set the model for training
            model.train()

            # iterate in batches over the training dataset
            for data in dataloader:
                # set the gradients to zero
                optimizer.zero_grad()

                # forward pass and compute the y hat values
                y_hat = model(
                    data.x.float().to(DEVICE),
                    data.edge_index.long().to(DEVICE),
                    data.batch.long().to(DEVICE),
                )

                # compute the mean squared error loss
                cost = compute_loss(y_hat, data.y.to(DEVICE))

                # compute mse loss again for the backward pass
                cost.backward()

                # update the weights
                optimizer.step()

            # display the stats
            if logging:
                print(f"Epoch: {iteration:03d}, Loss: {cost:.4f}")

        # return the tuple [Ground truth, Predictions]
        return model

    # define the helper method to evaluate
    def model_evaluate(
        model: torch.nn.Module,
        test_loader: pyg.loader.DataLoader,
    ) -> (torch.Tensor, torch.Tensor):
        """
        Evaluate the Pytorch model and return
        ground truth along with predictions
        Parameters
        ----------
        arg1 | model: torch.nn.Module
            Trained Neural network model
        arg2 | test_loader: DataLoader
            Dataset as a DataLoader object
        """
        # init an empty list to capture y hats
        y_preds = []

        # init an empty list to capture ground truth
        y_true = []

        # set the model to evaluate
        model.eval()

        # Iterate in batches over the test dataset.
        for data in test_loader:
            # store the ground truth
            y_true.append(data.y)

            # gather the model prediction
            out = model(
                data.x.float().to(DEVICE),
                data.edge_index.long().to(DEVICE),
                data.batch.long().to(DEVICE),
            )

            # store the model predictions
            y_preds.append(torch.flatten(out, start_dim=1))

        # concat the predictions obtained in batches
        y_preds = torch.cat(y_preds)

        # concat the ground truth obtained in batches
        y_true = torch.cat(y_true)

        # return the tuple [Ground truth, Predictions]
        return (y_true, y_preds)

    # define the helper method to obtain evaluation metrics
    def regression_evaluation_metrics(
        y_true: torch.Tensor, y_preds: torch.Tensor, metric: str
    ) -> None:
        """
        Print the Pytorch model metrics based
        on the ground truth vs predictions
        Parameters
        ----------
        arg1 | y_true: torch.Tensor
            Ground truth values of the data
        arg2 | y_preds: torch.Tensor
            Model Predictions for the input data
        """
        # init an empty dict to store results
        results = defaultdict(dict)

        # store y_preds and y_true as numpy arrays
        y_true = y_true.detach().numpy()
        y_preds = y_preds.cpu().detach().numpy()

        # MSE
        results["mse"] = mean_squared_error(y_true, y_preds)

        # MAE
        results["mae"] = mean_absolute_error(y_true, y_preds)

        # RMSE
        results["rmse"] = mean_squared_error(y_true, y_preds, squared=False)

        # R2
        results["r2"] = r2_score(y_true, y_preds)

        # return appropriate metric(s)
        if metric == "all":
            return results
        if metric in results.keys():
            return results[metric]
        else:
            return defaultdict(dict)

Methods

def model_evaluate(model: torch.nn.modules.module.Module, test_loader: torch_geometric.loader.dataloader.DataLoader) ‑> (, )

Evaluate the Pytorch model and return ground truth along with predictions Parameters

arg1 | model: torch.nn.Module Trained Neural network model arg2 | test_loader: DataLoader Dataset as a DataLoader object

Expand source code

def model_evaluate(
    model: torch.nn.Module,
    test_loader: pyg.loader.DataLoader,
) -> (torch.Tensor, torch.Tensor):
    """
    Evaluate the Pytorch model and return
    ground truth along with predictions
    Parameters
    ----------
    arg1 | model: torch.nn.Module
        Trained Neural network model
    arg2 | test_loader: DataLoader
        Dataset as a DataLoader object
    """
    # init an empty list to capture y hats
    y_preds = []

    # init an empty list to capture ground truth
    y_true = []

    # set the model to evaluate
    model.eval()

    # Iterate in batches over the test dataset.
    for data in test_loader:
        # store the ground truth
        y_true.append(data.y)

        # gather the model prediction
        out = model(
            data.x.float().to(DEVICE),
            data.edge_index.long().to(DEVICE),
            data.batch.long().to(DEVICE),
        )

        # store the model predictions
        y_preds.append(torch.flatten(out, start_dim=1))

    # concat the predictions obtained in batches
    y_preds = torch.cat(y_preds)

    # concat the ground truth obtained in batches
    y_true = torch.cat(y_true)

    # return the tuple [Ground truth, Predictions]
    return (y_true, y_preds)

def model_train(model: torch.nn.modules.module.Module, dataloader: torch_geometric.loader.dataloader.DataLoader, learning_rate: float, ITERATIONS: int, optimizer: torch.optim.optimizer.Optimizer, compute_loss: Callable, logging=False) ‑> torch.nn.modules.module.Module

Train the Pytorch Geometric model and return the model Parameters

arg1 | model: torch.nn.Module Trained Neural network model arg2 | dataloader: DataLoader Dataset as a DataLoader object arg3 | model: float Trained Neural network model arg4 | dataloader: int Dataset as a DataLoader object arg5 | model: torch.optim.Optimizer Trained Neural network model arg6 | dataloader: DataLoader Dataset as a DataLoader object Returns

Pytorch model: torch.nn.Module

Expand source code

def model_train(
    model: torch.nn.Module,
    dataloader: pyg.loader.DataLoader,
    learning_rate: float,
    ITERATIONS: int,
    optimizer: torch.optim.Optimizer,
    compute_loss: Callable,
    logging=False,
) -> torch.nn.Module:
    """
    Train the Pytorch Geometric model and return
    the model
    Parameters
    ----------
    arg1 | model: torch.nn.Module
        Trained Neural network model
    arg2 | dataloader: DataLoader
        Dataset as a DataLoader object
    arg3 | model: float
        Trained Neural network model
    arg4 | dataloader: int
        Dataset as a DataLoader object
    arg5 | model: torch.optim.Optimizer
        Trained Neural network model
    arg6 | dataloader: DataLoader
        Dataset as a DataLoader object
    Returns
    -------
    Pytorch model
        torch.nn.Module
    """
    for iteration in range(ITERATIONS):
        # set the model for training
        model.train()

        # iterate in batches over the training dataset
        for data in dataloader:
            # set the gradients to zero
            optimizer.zero_grad()

            # forward pass and compute the y hat values
            y_hat = model(
                data.x.float().to(DEVICE),
                data.edge_index.long().to(DEVICE),
                data.batch.long().to(DEVICE),
            )

            # compute the mean squared error loss
            cost = compute_loss(y_hat, data.y.to(DEVICE))

            # compute mse loss again for the backward pass
            cost.backward()

            # update the weights
            optimizer.step()

        # display the stats
        if logging:
            print(f"Epoch: {iteration:03d}, Loss: {cost:.4f}")

    # return the tuple [Ground truth, Predictions]
    return model

def regression_evaluation_metrics(y_true: torch.Tensor, y_preds: torch.Tensor, metric: str) ‑> None

Print the Pytorch model metrics based on the ground truth vs predictions Parameters

arg1 | y_true: torch.Tensor Ground truth values of the data arg2 | y_preds: torch.Tensor Model Predictions for the input data

Expand source code

def regression_evaluation_metrics(
    y_true: torch.Tensor, y_preds: torch.Tensor, metric: str
) -> None:
    """
    Print the Pytorch model metrics based
    on the ground truth vs predictions
    Parameters
    ----------
    arg1 | y_true: torch.Tensor
        Ground truth values of the data
    arg2 | y_preds: torch.Tensor
        Model Predictions for the input data
    """
    # init an empty dict to store results
    results = defaultdict(dict)

    # store y_preds and y_true as numpy arrays
    y_true = y_true.detach().numpy()
    y_preds = y_preds.cpu().detach().numpy()

    # MSE
    results["mse"] = mean_squared_error(y_true, y_preds)

    # MAE
    results["mae"] = mean_absolute_error(y_true, y_preds)

    # RMSE
    results["rmse"] = mean_squared_error(y_true, y_preds, squared=False)

    # R2
    results["r2"] = r2_score(y_true, y_preds)

    # return appropriate metric(s)
    if metric == "all":
        return results
    if metric in results.keys():
        return results[metric]
    else:
        return defaultdict(dict)