Create a stereo agent

This example shows how to create an agent with a stereo camera pair.

This can be done by giving the agent two sensors (be it RGB, depth, or semantic) with different positions. Note that the cameras must have different UUIDs.

Example is runnable via

$ python examples/tutorials/stereo_agent.py
import numpy as np

import habitat_sim

cv2 = None


# Helper function to render observations from the stereo agent
def _render(sim, display, depth=False):
    for _ in range(100):
        # Just spin in a circle
        obs = sim.step("turn_right")
        # Put the two stereo observations next to each other
        stereo_pair = np.concatenate([obs["left_sensor"], obs["right_sensor"]], axis=1)

        # If it is a depth pair, manually normalize into [0, 1]
        # so that images are always consistent
        if depth:
            stereo_pair = np.clip(stereo_pair, 0, 10)
            stereo_pair /= 10.0

        # If in RGB/RGBA format, change first to RGB and change to BGR
        if len(stereo_pair.shape) > 2:
            stereo_pair = stereo_pair[..., 0:3][..., ::-1]

        # display=False is used for the smoke test
        if display:
            cv2.imshow("stereo_pair", stereo_pair)
            k = cv2.waitKey()
            if k == ord("q"):
                break


def main(display=True):
    global cv2
    # Only import cv2 if we are doing to display
    if display:
        import cv2 as _cv2

        cv2 = _cv2

        cv2.namedWindow("stereo_pair")

    backend_cfg = habitat_sim.SimulatorConfiguration()
    backend_cfg.scene_id = (
        "data/scene_datasets/habitat-test-scenes/skokloster-castle.glb"
    )

    # First, let's create a stereo RGB agent
    left_rgb_sensor = habitat_sim.bindings.CameraSensorSpec()
    # Give it the uuid of left_sensor, this will also be how we
    # index the observations to retrieve the rendering from this sensor
    left_rgb_sensor.uuid = "left_sensor"
    left_rgb_sensor.resolution = [512, 512]
    # The left RGB sensor will be 1.5 meters off the ground
    # and 0.25 meters to the left of the center of the agent
    left_rgb_sensor.position = 1.5 * habitat_sim.geo.UP + 0.25 * habitat_sim.geo.LEFT

    # Same deal with the right sensor
    right_rgb_sensor = habitat_sim.CameraSensorSpec()
    right_rgb_sensor.uuid = "right_sensor"
    right_rgb_sensor.resolution = [512, 512]
    # The right RGB sensor will be 1.5 meters off the ground
    # and 0.25 meters to the right of the center of the agent
    right_rgb_sensor.position = 1.5 * habitat_sim.geo.UP + 0.25 * habitat_sim.geo.RIGHT

    agent_config = habitat_sim.AgentConfiguration()
    # Now we simply set the agent's list of sensor specs to be the two specs for our two sensors
    agent_config.sensor_specifications = [left_rgb_sensor, right_rgb_sensor]

    sim = habitat_sim.Simulator(habitat_sim.Configuration(backend_cfg, [agent_config]))

    _render(sim, display)
    sim.close()

    # Now let's do the exact same thing but for a depth camera stereo pair!
    left_depth_sensor = habitat_sim.CameraSensorSpec()
    left_depth_sensor.uuid = "left_sensor"
    left_depth_sensor.resolution = [512, 512]
    left_depth_sensor.position = 1.5 * habitat_sim.geo.UP + 0.25 * habitat_sim.geo.LEFT
    # The only difference is that we set the sensor type to DEPTH
    left_depth_sensor.sensor_type = habitat_sim.SensorType.DEPTH

    right_depth_sensor = habitat_sim.CameraSensorSpec()
    right_depth_sensor.uuid = "right_sensor"
    right_depth_sensor.resolution = [512, 512]
    right_depth_sensor.position = (
        1.5 * habitat_sim.geo.UP + 0.25 * habitat_sim.geo.RIGHT
    )
    # The only difference is that we set the sensor type to DEPTH
    right_depth_sensor.sensor_type = habitat_sim.SensorType.DEPTH

    agent_config = habitat_sim.AgentConfiguration()
    agent_config.sensor_specifications = [left_depth_sensor, right_depth_sensor]

    sim = habitat_sim.Simulator(habitat_sim.Configuration(backend_cfg, [agent_config]))

    _render(sim, display, depth=True)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--no-display", dest="display", action="store_false")
    parser.set_defaults(display=True)
    args = parser.parse_args()
    main(display=args.display)