Commit ea1d12a2 authored by VIOLANTE Nicolas's avatar VIOLANTE Nicolas
Browse files

Merge branch 'camera' into 'master'

EgoNet to EG3D

See merge request !1
parents d04c9b3c 2b6140de
......@@ -11,11 +11,13 @@ from pathlib import Path
from libs.model.egonet import EgoNet
from libs.arguments.parse import read_yaml_file
from convert_to_eg3d import to_eg3d, get_box_height
def angles_to_coords(roll, yaw, pitch):
x = -np.cos(yaw) * np.sin(pitch) * np.sin(roll) - np.sin(yaw) * np.cos(roll)
y = -np.sin(yaw) * np.sin(pitch) * np.sin(roll) + np.cos(yaw) * np.cos(roll)
z = np.cos(pitch) * np.sin(roll)
z = np.cos(pitch) * np.sin(roll)
return x, y, z
......@@ -25,20 +27,32 @@ def file_ext(name):
def is_image_ext(fname):
ext = file_ext(fname).lower()
return f'.{ext}' in Image.EXTENSION
return f".{ext}" in Image.EXTENSION
@click.command()
@click.option("--cfg", help="Configuration .yaml file", default="./configs/custom.yml")
@click.option("--data", help="Dataset folder path", required=True)
@click.option("--dest", help="Destination .json file", default="./dataset.json")
@click.option("--stylegan-format", help="True is image are stored following the format of StyleGAN, i.e, data/00000/img00000.png")
def main(cfg, data, dest, stylegan_format):
cfg=read_yaml_file(cfg)
@click.option(
"--cfg",
type=str,
help="Configuration .yaml file",
default="./configs/custom.yml",
)
@click.option("--data", type=str, help="Dataset folder path", required=True)
@click.option(
"--dest", type=str, help="Destination .json file", default="./dataset.json"
)
@click.option(
"--stylegan-format",
type=bool,
help="True is image are stored following the format of StyleGAN, i.e, data/00000/img00000.png",
)
@click.option("--max-images", type=int, default=None)
def main(cfg, data, dest, stylegan_format, max_images):
cfg = read_yaml_file(cfg)
# cudnn related setting
torch.backends.cudnn.benchmark = cfg['cudnn']['benchmark']
torch.backends.cudnn.deterministic = cfg['cudnn']['deterministic']
torch.backends.cudnn.enabled = cfg['cudnn']['enabled']
torch.backends.cudnn.benchmark = cfg["cudnn"]["benchmark"]
torch.backends.cudnn.deterministic = cfg["cudnn"]["deterministic"]
torch.backends.cudnn.enabled = cfg["cudnn"]["enabled"]
# Initialize Ego-Net and load the pre-trained checkpoint
print("Loading models..")
......@@ -49,42 +63,60 @@ def main(cfg, data, dest, stylegan_format):
# ---------------------------------------------------------------------
Image.init()
image_paths = (str(f) for f in Path(data).rglob("*") if is_image_ext(f) and os.path.isfile(f))
image_paths = (
str(f)
for f in Path(data).rglob("*")
if is_image_ext(f) and os.path.isfile(f)
)
dataset_json = {"labels": []}
total = 0
intrinsics = [4.2647, 0.0, 0.5, 0.0, 4.2647, 0.5, 0.0, 0.0, 1.0]
for image_path in image_paths:
total += 1
print(f'Processing {total} images')
print(f"Processing {total} images")
try:
rgb_image = np.array(Image.open(image_path))
except:
continue
# 2D bounding box detection
image = rgb_image.transpose(2, 0, 1) / 255.0
image_tensor = torch.FloatTensor(image)[None, ...].cuda()
detections = model(image_tensor)[0]
areas = box_area(detections["boxes"])
idx = areas.argmax().item()
detected_box = detections["boxes"][idx].cpu().detach().numpy().astype(int)
detected_box = (
detections["boxes"][idx].cpu().detach().numpy().astype(int)
)
# 3D bounding box detection
annot = {"path": [image_path],
"boxes": [[detected_box]]
}
record = model_3d(annot)[image_path]
extrinsics = model_3d.get_extrinsics_as_list(record['kpts_3d_pred'])
annot = {"path": [image_path], "boxes": [[detected_box]]}
records = model_3d(annot) # [image_path]
record = model_3d.post_process(records, False)[image_path]
height = get_box_height(record["kpts_3d_pred"][0])
rotation = model_3d.get_rotation(record["kpts_3d_pred"])
cam2world = to_eg3d(rotation, height).flatten().tolist()
image_name = os.path.basename(image_path)
if stylegan_format:
folder_num = image_path.split('/')[-2]
name = os.path.join(folder_num, image_name)
folder_num = image_path.split("/")[-2]
name = os.path.join(folder_num, image_name)
else:
name = image_name
dataset_json["labels"].append([name, extrinsics])
dataset_json["labels"].append([name, cam2world + intrinsics])
# # ----
# record['cam2world'] = cam2world
# record['rotation'] = rotation
# np.save(f"./camera_calib/results/{image_name.split('.')[0]}.npy", record)
# # ---
with open(dest, 'w') as f:
if max_images is not None:
if total >= max_images:
break
with open(dest, "w") as f:
json.dump(dataset_json, f, indent=2)
if __name__ == "__main__":
main()
\ No newline at end of file
main()
import numpy as np
from scipy.spatial.transform import Rotation
from collections import namedtuple
import polyscope as ps
ObjectBox = namedtuple(
"ObjectBox", ["coord_system", "vertices", "faces", "name"]
)
CoordSystem = namedtuple("CoordSystem", ["basis", "origin"])
faces = np.array(
[
[3, 2, 0],
[3, 1, 0],
[3, 2, 6],
[3, 7, 6],
[1, 0, 4],
[1, 5, 4],
[7, 5, 4],
[7, 6, 4],
[7, 5, 1],
[7, 3, 1],
[6, 2, 0],
[6, 4, 0],
]
)
interp_dict = {
"bbox12": (
np.array(
[1, 3, 5, 7, 1, 2, 3, 4, 1, 2, 5, 6] # h direction # l direction
), # w direction
np.array([2, 4, 6, 8, 5, 6, 7, 8, 3, 4, 7, 8]),
),
"bbox12l": (
np.array(
[
1,
2,
3,
4,
]
), # w direction
np.array([5, 6, 7, 8]),
),
"bbox12h": (np.array([1, 3, 5, 7]), np.array([2, 4, 6, 8])), # w direction
"bbox12w": (np.array([1, 2, 5, 6]), np.array([3, 4, 7, 8])), # w direction
}
def get_box_height(prediction):
parents = prediction[interp_dict["bbox12"][0] - 1]
children = prediction[interp_dict["bbox12"][1] - 1]
lines = parents - children
lines = np.sqrt(np.sum(lines**2, axis=1))
h = np.sum(lines[:4]) / 4
return h
def get_transform_in_sys(object_sys, transformation):
to_world = np.eye(4)
to_world[:3, -1] = -object_sys.origin
to_sys_center = np.eye(4)
to_sys_center[:3, -1] = object_sys.origin
algin_axes_to_world = np.eye(4)
algin_axes_to_world[:3, :3] = object_sys.basis
return (
to_sys_center
@ np.linalg.inv(algin_axes_to_world)
@ transformation
@ algin_axes_to_world
@ to_world
)
def to_eg3d(extrinsics, height):
eg3d_camera_sys = CoordSystem(
basis=np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
origin=np.array([[0, 0, 0]]),
)
translation = np.eye(4, dtype=np.float32)
translation[2, -1] = 2.7
object_origin = transform_points(
eg3d_camera_sys, eg3d_camera_sys.origin, translation @ extrinsics
)
object_basis = (
transform_points(
eg3d_camera_sys,
eg3d_camera_sys.basis + eg3d_camera_sys.origin,
translation @ extrinsics,
)
- object_origin
)
object_sys = CoordSystem(basis=object_basis, origin=object_origin)
# Object system to EG3D object system
rot_x = np.eye(4, dtype=np.float32)
rot_x[:3, :3] = Rotation.from_rotvec([90, 0, 0], degrees=True).as_matrix()
inv_z = np.eye(4, dtype=np.float32)
inv_z[2, 2] = -1
offset = np.eye(4, dtype=np.float32)
offset[1, -1] = -0.5 * height
M = get_transform_in_sys(object_sys, offset @ inv_z @ rot_x)
cam2world = M @ translation @ extrinsics
return cam2world
def transform_coord_system(coord_system: CoordSystem, transformation):
new_origin = transformation @ np.block([coord_system.origin, 1.0]).T
new_origin /= new_origin[-1]
new_origin = new_origin[:3].T
new_basis = (
transformation
@ np.block(
[coord_system.basis, np.ones((coord_system.basis.shape[0], 1))]
).T
).T
new_basis = new_basis[:, :3] - new_origin
return CoordSystem(new_basis, new_origin)
def create_canonical_box(l=2, h=1, w=1, prediction=None):
if prediction is not None:
parents = prediction[interp_dict["bbox12"][0] - 1]
children = prediction[interp_dict["bbox12"][1] - 1]
lines = parents - children
lines = np.sqrt(np.sum(lines**2, axis=1))
# averaged over the four parallel line segments
h, l, w = (
np.sum(lines[:4]) / 4,
np.sum(lines[4:8]) / 4,
np.sum(lines[8:]) / 4,
)
x_corners = [l, l, l, l, 0, 0, 0, 0]
y_corners = [0, h, 0, h, 0, h, 0, h]
z_corners = [w, w, 0, 0, w, w, 0, 0]
x_corners -= np.float32(l) / 2.0
y_corners -= np.float32(h)
z_corners -= np.float32(w) / 2.0
corners_3d = np.array([x_corners, y_corners, z_corners])
return corners_3d.T
def create_box(transformation, coord_system, vertices, faces, name):
new_vertices = (
transformation @ np.block([vertices, np.ones((vertices.shape[0], 1))]).T
).T
new_vertices = new_vertices[:, :3]
new_coord_system = transform_coord_system(coord_system, transformation)
return ObjectBox(new_coord_system, new_vertices, faces, name)
def add_box(box: ObjectBox, color):
ps.register_surface_mesh(
box.name, box.vertices, box.faces, color=color, transparency=0.6
)
add_coords(box.name, box.coord_system)
def add_coords(name: str, coord_system: CoordSystem):
coord_sys = ps.register_point_cloud(
f"{name} sys", coord_system.origin, radius=0.02, color=(1, 1, 1)
)
coord_sys.add_vector_quantity(
"X",
coord_system.basis[0][None, :],
length=0.1,
enabled=True,
radius=0.01,
color=(1, 0, 0),
)
coord_sys.add_vector_quantity(
"Y",
coord_system.basis[1][None, :],
length=0.1,
enabled=True,
radius=0.01,
color=(0, 1, 0),
)
coord_sys.add_vector_quantity(
"Z",
coord_system.basis[2][None, :],
length=0.1,
enabled=True,
radius=0.01,
color=(0, 0, 1),
)
def create_egonet2eg3d():
egonet2eg3d = np.zeros((4, 4), dtype=np.float32)
roty = np.zeros((4, 4), dtype=np.float32)
rotx = np.zeros((4, 4), dtype=np.float32)
roty[:3, :3] = Rotation.from_rotvec([0, 90, 0], degrees=True).as_matrix()
roty[3, 3] = 1.0
rotx[:3, :3] = Rotation.from_rotvec([-90, 0, 0], degrees=True).as_matrix()
rotx[3, 3] = 1.0
egonet2eg3d = rotx @ roty
return egonet2eg3d
def transform_points(object_sys, points, transformation):
to_world = np.eye(4)
to_world[:3, -1] = -object_sys.origin
to_sys_center = np.eye(4)
to_sys_center[:3, -1] = object_sys.origin
algin_axes_to_world = np.eye(4)
algin_axes_to_world[:3, :3] = object_sys.basis
M = (
to_sys_center
@ np.linalg.inv(algin_axes_to_world)
@ transformation
@ algin_axes_to_world
@ to_world
)
new_points = (M @ np.block([points, np.ones((points.shape[0], 1))]).T).T
new_points = new_points[:, :3]
return new_points
if __name__ == "__main__":
record = np.load(
"./camera_calib/results/img00001075.npy", allow_pickle=True
).item()
rotation = record["rotation"].reshape(4, 4)
rotation[:3, -1] = np.zeros(3)
h = get_box_height(record["kpts_3d_pred"][0])
cam2world = to_eg3d(rotation, h)
eg3d_camera_sys = CoordSystem(
basis=np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
origin=np.array([[0, 0, 0]]),
)
translation = np.eye(4, dtype=np.float32)
translation[2, -1] = 2.7
box = create_box(
translation @ rotation,
eg3d_camera_sys,
create_canonical_box(prediction=record["kpts_3d_pred"][0]),
faces,
"car 1075",
)
ps.init()
ps.set_ground_plane_mode("none")
ps.set_up_dir("neg_y_up")
add_coords("EG3D", eg3d_camera_sys)
add_coords(
"EG3D object system", transform_coord_system(eg3d_camera_sys, cam2world)
)
add_box(box, color=(0, 1, 0))
ps.show()
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment