You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
206 lines
6.4 KiB
206 lines
6.4 KiB
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List, Tuple
|
|
|
|
import numpy as np
|
|
|
|
|
|
@dataclass
|
|
class Camera(ABC):
|
|
"""
|
|
An object describing how a camera corresponds to pixels in an image.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def image_coords(self) -> np.ndarray:
|
|
"""
|
|
:return: ([self.height, self.width, 2]).reshape(self.height * self.width, 2) image coordinates
|
|
"""
|
|
|
|
@abstractmethod
|
|
def camera_rays(self, coords: np.ndarray) -> np.ndarray:
|
|
"""
|
|
For every (x, y) coordinate in a rendered image, compute the ray of the
|
|
corresponding pixel.
|
|
|
|
:param coords: an [N x 2] integer array of 2D image coordinates.
|
|
:return: an [N x 2 x 3] array of [2 x 3] (origin, direction) tuples.
|
|
The direction should always be unit length.
|
|
"""
|
|
|
|
def depth_directions(self, coords: np.ndarray) -> np.ndarray:
|
|
"""
|
|
For every (x, y) coordinate in a rendered image, get the direction that
|
|
corresponds to "depth" in an RGBD rendering.
|
|
|
|
This may raise an exception if there is no "D" channel in the
|
|
corresponding ViewData.
|
|
|
|
:param coords: an [N x 2] integer array of 2D image coordinates.
|
|
:return: an [N x 3] array of normalized depth directions.
|
|
"""
|
|
_ = coords
|
|
raise NotImplementedError
|
|
|
|
@abstractmethod
|
|
def center_crop(self) -> "Camera":
|
|
"""
|
|
Creates a new camera with the same intrinsics and direction as this one,
|
|
but with a center crop to a square of the smaller dimension.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def resize_image(self, width: int, height: int) -> "Camera":
|
|
"""
|
|
Creates a new camera with the same intrinsics and direction as this one,
|
|
but with resized image dimensions.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def scale_scene(self, factor: float) -> "Camera":
|
|
"""
|
|
Creates a new camera with the same intrinsics and direction as this one,
|
|
but with the scene rescaled by the given factor.
|
|
"""
|
|
|
|
|
|
@dataclass
|
|
class ProjectiveCamera(Camera):
|
|
"""
|
|
A Camera implementation for a standard pinhole camera.
|
|
|
|
The camera rays shoot away from the origin in the z direction, with the x
|
|
and y directions corresponding to the positive horizontal and vertical axes
|
|
in image space.
|
|
"""
|
|
|
|
origin: np.ndarray
|
|
x: np.ndarray
|
|
y: np.ndarray
|
|
z: np.ndarray
|
|
width: int
|
|
height: int
|
|
x_fov: float
|
|
y_fov: float
|
|
|
|
def image_coords(self) -> np.ndarray:
|
|
ind = np.arange(self.width * self.height)
|
|
coords = np.stack([ind % self.width, ind // self.width], axis=1).astype(np.float32)
|
|
return coords
|
|
|
|
def camera_rays(self, coords: np.ndarray) -> np.ndarray:
|
|
fracs = (coords / (np.array([self.width, self.height], dtype=np.float32) - 1)) * 2 - 1
|
|
fracs = fracs * np.tan(np.array([self.x_fov, self.y_fov]) / 2)
|
|
directions = self.z + self.x * fracs[:, :1] + self.y * fracs[:, 1:]
|
|
directions = directions / np.linalg.norm(directions, axis=-1, keepdims=True)
|
|
return np.stack([np.broadcast_to(self.origin, directions.shape), directions], axis=1)
|
|
|
|
def depth_directions(self, coords: np.ndarray) -> np.ndarray:
|
|
return np.tile((self.z / np.linalg.norm(self.z))[None], [len(coords), 1])
|
|
|
|
def resize_image(self, width: int, height: int) -> "ProjectiveCamera":
|
|
"""
|
|
Creates a new camera for the resized view assuming the aspect ratio does not change.
|
|
"""
|
|
assert width * self.height == height * self.width, "The aspect ratio should not change."
|
|
return ProjectiveCamera(
|
|
origin=self.origin,
|
|
x=self.x,
|
|
y=self.y,
|
|
z=self.z,
|
|
width=width,
|
|
height=height,
|
|
x_fov=self.x_fov,
|
|
y_fov=self.y_fov,
|
|
)
|
|
|
|
def center_crop(self) -> "ProjectiveCamera":
|
|
"""
|
|
Creates a new camera for the center-cropped view
|
|
"""
|
|
size = min(self.width, self.height)
|
|
fov = min(self.x_fov, self.y_fov)
|
|
return ProjectiveCamera(
|
|
origin=self.origin,
|
|
x=self.x,
|
|
y=self.y,
|
|
z=self.z,
|
|
width=size,
|
|
height=size,
|
|
x_fov=fov,
|
|
y_fov=fov,
|
|
)
|
|
|
|
def scale_scene(self, factor: float) -> "ProjectiveCamera":
|
|
"""
|
|
Creates a new camera with the same intrinsics and direction as this one,
|
|
but with the camera frame rescaled by the given factor.
|
|
"""
|
|
return ProjectiveCamera(
|
|
origin=self.origin * factor,
|
|
x=self.x,
|
|
y=self.y,
|
|
z=self.z,
|
|
width=self.width,
|
|
height=self.height,
|
|
x_fov=self.x_fov,
|
|
y_fov=self.y_fov,
|
|
)
|
|
|
|
|
|
class ViewData(ABC):
|
|
"""
|
|
A collection of rendered camera views of a scene or object.
|
|
|
|
This is a generalization of a NeRF dataset, since NeRF datasets only encode
|
|
RGB or RGBA data, whereas this dataset supports arbitrary channels.
|
|
"""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def num_views(self) -> int:
|
|
"""
|
|
The number of rendered views.
|
|
"""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def channel_names(self) -> List[str]:
|
|
"""
|
|
Get all of the supported channels available for the views.
|
|
|
|
This can be arbitrary, but there are some standard names:
|
|
"R", "G", "B", "A" (alpha), and "D" (depth).
|
|
"""
|
|
|
|
@abstractmethod
|
|
def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
|
|
"""
|
|
Load the given channels from the view at the given index.
|
|
|
|
:return: a tuple (camera_view, data), where data is a float array of
|
|
shape [height x width x num_channels].
|
|
"""
|
|
|
|
|
|
class MemoryViewData(ViewData):
|
|
"""
|
|
A ViewData that is implemented in memory.
|
|
"""
|
|
|
|
def __init__(self, channels: Dict[str, np.ndarray], cameras: List[Camera]):
|
|
assert all(v.shape[0] == len(cameras) for v in channels.values())
|
|
self.channels = channels
|
|
self.cameras = cameras
|
|
|
|
@property
|
|
def num_views(self) -> int:
|
|
return len(self.cameras)
|
|
|
|
@property
|
|
def channel_names(self) -> List[str]:
|
|
return list(self.channels.keys())
|
|
|
|
def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]:
|
|
outputs = [self.channels[channel][index] for channel in channels]
|
|
return self.cameras[index], np.stack(outputs, axis=-1)
|
|
|