shap-e/shap_e/rendering/view_data.py


								from abc import ABC, abstractmethod

								from dataclasses import dataclass

								from typing import Dict, List, Tuple


								import numpy as np


								@dataclass

								class Camera(ABC):

								    """

								    An object describing how a camera corresponds to pixels in an image.

								    """


								    @abstractmethod

								    def image_coords(self) -> np.ndarray:

								        """

								        :return: ([self.height, self.width, 2]).reshape(self.height * self.width, 2) image coordinates

								        """


								    @abstractmethod

								    def camera_rays(self, coords: np.ndarray) -> np.ndarray:

								        """

								        For every (x, y) coordinate in a rendered image, compute the ray of the

								        corresponding pixel.


								        :param coords: an [N x 2] integer array of 2D image coordinates.

								        :return: an [N x 2 x 3] array of [2 x 3] (origin, direction) tuples.

								                 The direction should always be unit length.

								        """


								    def depth_directions(self, coords: np.ndarray) -> np.ndarray:

								        """

								        For every (x, y) coordinate in a rendered image, get the direction that

								        corresponds to "depth" in an RGBD rendering.


								        This may raise an exception if there is no "D" channel in the

								        corresponding ViewData.


								        :param coords: an [N x 2] integer array of 2D image coordinates.

								        :return: an [N x 3] array of normalized depth directions.

								        """

								        _ = coords

								        raise NotImplementedError


								    @abstractmethod

								    def center_crop(self) -> "Camera":

								        """

								        Creates a new camera with the same intrinsics and direction as this one,

								        but with a center crop to a square of the smaller dimension.

								        """


								    @abstractmethod

								    def resize_image(self, width: int, height: int) -> "Camera":

								        """

								        Creates a new camera with the same intrinsics and direction as this one,

								        but with resized image dimensions.

								        """


								    @abstractmethod

								    def scale_scene(self, factor: float) -> "Camera":

								        """

								        Creates a new camera with the same intrinsics and direction as this one,

								        but with the scene rescaled by the given factor.

								        """


								@dataclass

								class ProjectiveCamera(Camera):

								    """

								    A Camera implementation for a standard pinhole camera.


								    The camera rays shoot away from the origin in the z direction, with the x

								    and y directions corresponding to the positive horizontal and vertical axes

								    in image space.

								    """


								    origin: np.ndarray

								    x: np.ndarray

								    y: np.ndarray

								    z: np.ndarray

								    width: int

								    height: int

								    x_fov: float

								    y_fov: float


								    def image_coords(self) -> np.ndarray:

								        ind = np.arange(self.width * self.height)

								        coords = np.stack([ind % self.width, ind // self.width], axis=1).astype(np.float32)

								        return coords


								    def camera_rays(self, coords: np.ndarray) -> np.ndarray:

								        fracs = (coords / (np.array([self.width, self.height], dtype=np.float32) - 1)) * 2 - 1

								        fracs = fracs * np.tan(np.array([self.x_fov, self.y_fov]) / 2)

								        directions = self.z + self.x * fracs[:, :1] + self.y * fracs[:, 1:]

								        directions = directions / np.linalg.norm(directions, axis=-1, keepdims=True)

								        return np.stack([np.broadcast_to(self.origin, directions.shape), directions], axis=1)


								    def depth_directions(self, coords: np.ndarray) -> np.ndarray:

								        return np.tile((self.z / np.linalg.norm(self.z))[None], [len(coords), 1])


								    def resize_image(self, width: int, height: int) -> "ProjectiveCamera":

								        """

								        Creates a new camera for the resized view assuming the aspect ratio does not change.

								        """

								        assert width * self.height == height * self.width, "The aspect ratio should not change."

								        return ProjectiveCamera(

								            origin=self.origin,

								            x=self.x,

								            y=self.y,

								            z=self.z,

								            width=width,

								            height=height,

								            x_fov=self.x_fov,

								            y_fov=self.y_fov,

								        )


								    def center_crop(self) -> "ProjectiveCamera":

								        """

								        Creates a new camera for the center-cropped view

								        """

								        size = min(self.width, self.height)

								        fov = min(self.x_fov, self.y_fov)

								        return ProjectiveCamera(

								            origin=self.origin,

								            x=self.x,

								            y=self.y,

								            z=self.z,

								            width=size,

								            height=size,

								            x_fov=fov,

								            y_fov=fov,

								        )


								    def scale_scene(self, factor: float) -> "ProjectiveCamera":

								        """

								        Creates a new camera with the same intrinsics and direction as this one,

								        but with the camera frame rescaled by the given factor.

								        """

								        return ProjectiveCamera(

								            origin=self.origin * factor,

								            x=self.x,

								            y=self.y,

								            z=self.z,

								            width=self.width,

								            height=self.height,

								            x_fov=self.x_fov,

								            y_fov=self.y_fov,

								        )


								class ViewData(ABC):

								    """

								    A collection of rendered camera views of a scene or object.


								    This is a generalization of a NeRF dataset, since NeRF datasets only encode

								    RGB or RGBA data, whereas this dataset supports arbitrary channels.

								    """


								    @property

								    @abstractmethod

								    def num_views(self) -> int:

								        """

								        The number of rendered views.

								        """


								    @property

								    @abstractmethod

								    def channel_names(self) -> List[str]:

								        """

								        Get all of the supported channels available for the views.


								        This can be arbitrary, but there are some standard names:

								        "R", "G", "B", "A" (alpha), and "D" (depth).

								        """


								    @abstractmethod

								    def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:

								        """

								        Load the given channels from the view at the given index.


								        :return: a tuple (camera_view, data), where data is a float array of

								                 shape [height x width x num_channels].

								        """


								class MemoryViewData(ViewData):

								    """

								    A ViewData that is implemented in memory.

								    """


								    def __init__(self, channels: Dict[str, np.ndarray], cameras: List[Camera]):

								        assert all(v.shape[0] == len(cameras) for v in channels.values())

								        self.channels = channels

								        self.cameras = cameras


								    @property

								    def num_views(self) -> int:

								        return len(self.cameras)


								    @property

								    def channel_names(self) -> List[str]:

								        return list(self.channels.keys())


								    def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:

								        outputs = [self.channels[channel][index] for channel in channels]

								        return self.cameras[index], np.stack(outputs, axis=-1)