Module gnnnas.data
Expand source code
# This Source Code Form is subject to the terms of the
# BSD 2-Clause "Simplified" License. If a copy of the same
# was not distributed with this file, You can obtain one at
# https://github.com/akhilpandey95/gnnNAS/blob/master/LICENSE.
import numpy as np
import torch
import torch_geometric as pyg
class DataLoader:
"""Data Loader class"""
@staticmethod
def load_molnet_phys_chem_data(
name: str, batch_size: int, training_split: float, seed: int = 2022
):
"""
Load the specific Graph dataset from MoleculeNet
Parameters
----------
arg1 | name: str
Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader.
arg2 | batch_size: int
Batch size for creating the train/test dataloaders.
arg3 | training_split: float
Percentage of samples to be kept in training set.
arg4 | seed: int
Torch Random seed to ensure reproducibility. Default value is 2022
Returns
-------
Pytorch Geometric Dataset(s)
torch_geometric.datasets.molecule_net.MoleculeNet
"""
# load the dataset
dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name)
# set the seed
torch.manual_seed(seed)
# shuffle the data
dataset = dataset.shuffle()
# set an stop index for gathering train data
stop_index = int(np.floor(training_split * dataset.len()))
# separate training data
train_dataset = dataset[0:stop_index]
# separate test data
test_dataset = dataset[stop_index:]
# create dataloaders for train and test samples
train_loader = pyg.loader.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True
)
test_loader = pyg.loader.DataLoader(
test_dataset, batch_size=batch_size, shuffle=False
)
return dataset, train_loader, test_loader
Classes
class DataLoader
-
Data Loader class
Expand source code
class DataLoader: """Data Loader class""" @staticmethod def load_molnet_phys_chem_data( name: str, batch_size: int, training_split: float, seed: int = 2022 ): """ Load the specific Graph dataset from MoleculeNet Parameters ---------- arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns ------- Pytorch Geometric Dataset(s) torch_geometric.datasets.molecule_net.MoleculeNet """ # load the dataset dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name) # set the seed torch.manual_seed(seed) # shuffle the data dataset = dataset.shuffle() # set an stop index for gathering train data stop_index = int(np.floor(training_split * dataset.len())) # separate training data train_dataset = dataset[0:stop_index] # separate test data test_dataset = dataset[stop_index:] # create dataloaders for train and test samples train_loader = pyg.loader.DataLoader( train_dataset, batch_size=batch_size, shuffle=True ) test_loader = pyg.loader.DataLoader( test_dataset, batch_size=batch_size, shuffle=False ) return dataset, train_loader, test_loader
Static methods
def load_molnet_phys_chem_data(name: str, batch_size: int, training_split: float, seed: int = 2022)
-
Load the specific Graph dataset from MoleculeNet Parameters
arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns
Pytorch Geometric Dataset(s)
- torch_geometric.datasets.molecule_net.MoleculeNet
Expand source code
@staticmethod def load_molnet_phys_chem_data( name: str, batch_size: int, training_split: float, seed: int = 2022 ): """ Load the specific Graph dataset from MoleculeNet Parameters ---------- arg1 | name: str Name of the dataset to import from Pytorch Geometric MoleculeNet dataloader. arg2 | batch_size: int Batch size for creating the train/test dataloaders. arg3 | training_split: float Percentage of samples to be kept in training set. arg4 | seed: int Torch Random seed to ensure reproducibility. Default value is 2022 Returns ------- Pytorch Geometric Dataset(s) torch_geometric.datasets.molecule_net.MoleculeNet """ # load the dataset dataset = pyg.datasets.MoleculeNet(root="/tmp/Molnet", name=name) # set the seed torch.manual_seed(seed) # shuffle the data dataset = dataset.shuffle() # set an stop index for gathering train data stop_index = int(np.floor(training_split * dataset.len())) # separate training data train_dataset = dataset[0:stop_index] # separate test data test_dataset = dataset[stop_index:] # create dataloaders for train and test samples train_loader = pyg.loader.DataLoader( train_dataset, batch_size=batch_size, shuffle=True ) test_loader = pyg.loader.DataLoader( test_dataset, batch_size=batch_size, shuffle=False ) return dataset, train_loader, test_loader