Skip to content

ModelNet

ModelNet is a dataset containing 3D grids of voxelized objects.

The objects are available either as 20×20×20 or 32×32×32 voxel grids, and are classified into 10 classes.

A visualization of a single object of every class is available both for the 20×20×20 and the 32×32×32 resolutions.

Each dataset element is a Python dictionary with the following keys:

  • "grid": a torch.Tensor with shape [1, 20, 20, 20] or [1, 32, 32, 32] of type torch.uint8 with values 0 or 1 indicating whether the corresponding voxel is empty or occupied by the object,
  • "label": a torch.uint8 value of the gold class.

The dataset is split into:

  • train: 3,718 objects for training;
  • dev: 273 objects for development (validation);
  • test: 908 objects for testing.

npfl138.datasets.modelnet.ModelNet

Source code in npfl138/datasets/modelnet.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class ModelNet:
    C: int = 1
    """The number of 3D grid channels."""
    D: int
    """The depth of the 3D grid, set in the constructor to 20 or 32."""
    H: int
    """The height of the 3D grid, set in the constructor to 20 or 32."""
    W: int
    """The width of the 3D grid, set in the constructor to 20 or 32."""
    LABELS: int = 10
    """The number of object classes."""
    LABEL_NAMES: list[str] = [
        "bathtub", "bed", "chair", "desk", "dresser", "monitor", "night_stand", "sofa", "table", "toilet",
    ]
    """The names of the object classes."""

    Element = TypedDict("Element", {"grid": np.ndarray, "label": np.ndarray})
    """The type of a single dataset element."""
    Elements = TypedDict("Elements", {"grids": np.ndarray, "labels": np.ndarray})
    """The type of the whole dataset."""

    URL: str = "https://ufal.mff.cuni.cz/~straka/courses/npfl138/2526/datasets"

    class Dataset(torch.utils.data.Dataset):
        def __init__(self, data: "ModelNet.Elements") -> None:
            self._data = {key: torch.as_tensor(value) for key, value in data.items()}
            self._data["grids"] = self._data["grids"].movedim(-1, 1)

        @property
        def data(self) -> "ModelNet.Elements":
            """Return the whole dataset as a `ModelNet.Elements` object."""
            return self._data

        def __len__(self) -> int:
            """Return the number of elements in the dataset."""
            return len(self._data["grids"])

        def __getitem__(self, index: int) -> "ModelNet.Element":
            """Return the `index`-th element of the dataset."""
            return {key.removesuffix("s"): value[index] for key, value in self._data.items()}

    def __init__(self, resolution: Literal[20, 32]) -> None:
        """Load the ModelNet dataset, downloading it if necessary.

        Parameters:
          resolution: The resolution of the dataset to load.
        """
        assert resolution in [20, 32], "Only 20 or 32 resolution is supported"

        setattr(self, "D", resolution)  # use setattr to prevent mkdocs
        setattr(self, "H", resolution)  # moving the attribute docstring
        setattr(self, "W", resolution)  # after the __init__ method

        path = download_url_to_file(self.URL, f"modelnet{resolution}.npz")
        modelnet = np.load(path)
        for dataset, _size in [("train", 3_718), ("dev", 273), ("test", 908)]:
            data = dict((key[len(dataset) + 1:], modelnet[key]) for key in modelnet if key.startswith(dataset))
            setattr(self, dataset, self.Dataset(data))

    train: Dataset
    """The training dataset."""
    dev: Dataset
    """The development dataset."""
    test: Dataset
    """The test dataset."""

    # Evaluation infrastructure.
    @staticmethod
    def evaluate(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
        """Evaluate the `predictions` labels against the gold dataset.

        Returns:
          accuracy: The average accuracy of the predicted labels.
        """
        gold = gold_dataset.data["labels"]

        if len(predictions) != len(gold):
            raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
                len(predictions), len(gold)))

        correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
        return correct / len(gold)

    @staticmethod
    def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
        """Evaluate the file with predictions against the gold dataset.

        Returns:
          accuracy: The average accuracy of the predicted labels.
        """
        predictions = [int(line) for line in predictions_file]
        return ModelNet.evaluate(gold_dataset, predictions)

C class-attribute instance-attribute

C: int = 1

The number of 3D grid channels.

D instance-attribute

D: int

The depth of the 3D grid, set in the constructor to 20 or 32.

H instance-attribute

H: int

The height of the 3D grid, set in the constructor to 20 or 32.

W instance-attribute

W: int

The width of the 3D grid, set in the constructor to 20 or 32.

LABELS class-attribute instance-attribute

LABELS: int = 10

The number of object classes.

LABEL_NAMES class-attribute instance-attribute

LABEL_NAMES: list[str] = [
    "bathtub",
    "bed",
    "chair",
    "desk",
    "dresser",
    "monitor",
    "night_stand",
    "sofa",
    "table",
    "toilet",
]

The names of the object classes.

Element class-attribute instance-attribute

Element = TypedDict('Element', {'grid': ndarray, 'label': ndarray})

The type of a single dataset element.

Elements class-attribute instance-attribute

Elements = TypedDict('Elements', {'grids': ndarray, 'labels': ndarray})

The type of the whole dataset.

Dataset

Bases: Dataset

Source code in npfl138/datasets/modelnet.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data: "ModelNet.Elements") -> None:
        self._data = {key: torch.as_tensor(value) for key, value in data.items()}
        self._data["grids"] = self._data["grids"].movedim(-1, 1)

    @property
    def data(self) -> "ModelNet.Elements":
        """Return the whole dataset as a `ModelNet.Elements` object."""
        return self._data

    def __len__(self) -> int:
        """Return the number of elements in the dataset."""
        return len(self._data["grids"])

    def __getitem__(self, index: int) -> "ModelNet.Element":
        """Return the `index`-th element of the dataset."""
        return {key.removesuffix("s"): value[index] for key, value in self._data.items()}

data property

data: Elements

Return the whole dataset as a ModelNet.Elements object.

__len__

__len__() -> int

Return the number of elements in the dataset.

Source code in npfl138/datasets/modelnet.py
69
70
71
def __len__(self) -> int:
    """Return the number of elements in the dataset."""
    return len(self._data["grids"])

__getitem__

__getitem__(index: int) -> Element

Return the index-th element of the dataset.

Source code in npfl138/datasets/modelnet.py
73
74
75
def __getitem__(self, index: int) -> "ModelNet.Element":
    """Return the `index`-th element of the dataset."""
    return {key.removesuffix("s"): value[index] for key, value in self._data.items()}

__init__

__init__(resolution: Literal[20, 32]) -> None

Load the ModelNet dataset, downloading it if necessary.

Parameters:

  • resolution (Literal[20, 32]) –

    The resolution of the dataset to load.

Source code in npfl138/datasets/modelnet.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def __init__(self, resolution: Literal[20, 32]) -> None:
    """Load the ModelNet dataset, downloading it if necessary.

    Parameters:
      resolution: The resolution of the dataset to load.
    """
    assert resolution in [20, 32], "Only 20 or 32 resolution is supported"

    setattr(self, "D", resolution)  # use setattr to prevent mkdocs
    setattr(self, "H", resolution)  # moving the attribute docstring
    setattr(self, "W", resolution)  # after the __init__ method

    path = download_url_to_file(self.URL, f"modelnet{resolution}.npz")
    modelnet = np.load(path)
    for dataset, _size in [("train", 3_718), ("dev", 273), ("test", 908)]:
        data = dict((key[len(dataset) + 1:], modelnet[key]) for key in modelnet if key.startswith(dataset))
        setattr(self, dataset, self.Dataset(data))

train instance-attribute

train: Dataset

The training dataset.

dev instance-attribute

dev: Dataset

The development dataset.

test instance-attribute

test: Dataset

The test dataset.

evaluate staticmethod

evaluate(gold_dataset: Dataset, predictions: Sequence[int]) -> float

Evaluate the predictions labels against the gold dataset.

Returns:

  • accuracy ( float ) –

    The average accuracy of the predicted labels.

Source code in npfl138/datasets/modelnet.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
@staticmethod
def evaluate(gold_dataset: Dataset, predictions: Sequence[int]) -> float:
    """Evaluate the `predictions` labels against the gold dataset.

    Returns:
      accuracy: The average accuracy of the predicted labels.
    """
    gold = gold_dataset.data["labels"]

    if len(predictions) != len(gold):
        raise RuntimeError("The predictions are of different size than gold data: {} vs {}".format(
            len(predictions), len(gold)))

    correct = sum(gold[i] == predictions[i] for i in range(len(gold)))
    return correct / len(gold)

evaluate_file staticmethod

evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float

Evaluate the file with predictions against the gold dataset.

Returns:

  • accuracy ( float ) –

    The average accuracy of the predicted labels.

Source code in npfl138/datasets/modelnet.py
119
120
121
122
123
124
125
126
127
@staticmethod
def evaluate_file(gold_dataset: Dataset, predictions_file: TextIO) -> float:
    """Evaluate the file with predictions against the gold dataset.

    Returns:
      accuracy: The average accuracy of the predicted labels.
    """
    predictions = [int(line) for line in predictions_file]
    return ModelNet.evaluate(gold_dataset, predictions)