Commit 929c8b62 authored by nviolante's avatar nviolante
Browse files

add conversion code with example

parent 7094ae45
......@@ -11,6 +11,8 @@ from pathlib import Path
from libs.model.egonet import EgoNet
from libs.arguments.parse import read_yaml_file
from convert_to_eg3d import to_eg3d, get_box_height
def angles_to_coords(roll, yaw, pitch):
x = -np.cos(yaw) * np.sin(pitch) * np.sin(roll) - np.sin(yaw) * np.cos(roll)
......@@ -28,12 +30,14 @@ def is_image_ext(fname):
return f'.{ext}' in Image.EXTENSION
@click.command()
@click.option("--cfg", help="Configuration .yaml file", default="./configs/custom.yml")
@click.option("--data", help="Dataset folder path", required=True)
@click.option("--dest", help="Destination .json file", default="./dataset.json")
@click.option("--stylegan-format", help="True is image are stored following the format of StyleGAN, i.e, data/00000/img00000.png")
def main(cfg, data, dest, stylegan_format):
@click.option("--cfg", type=str, help="Configuration .yaml file", default="./configs/custom.yml")
@click.option("--data", type=str, help="Dataset folder path", required=True)
@click.option("--dest", type=str, help="Destination .json file", default="./dataset.json")
@click.option("--stylegan-format", type=bool, help="True is image are stored following the format of StyleGAN, i.e, data/00000/img00000.png")
@click.option("--max-images" , type=int, default=None)
def main(cfg, data, dest, stylegan_format, max_images):
cfg=read_yaml_file(cfg)
# cudnn related setting
torch.backends.cudnn.benchmark = cfg['cudnn']['benchmark']
......@@ -52,6 +56,7 @@ def main(cfg, data, dest, stylegan_format):
image_paths = (str(f) for f in Path(data).rglob("*") if is_image_ext(f) and os.path.isfile(f))
dataset_json = {"labels": []}
total = 0
intrinsics = [4.2647, 0.0, 0.5, 0.0, 4.2647, 0.5, 0.0, 0.0, 1.0]
for image_path in image_paths:
total += 1
print(f'Processing {total} images')
......@@ -72,8 +77,11 @@ def main(cfg, data, dest, stylegan_format):
annot = {"path": [image_path],
"boxes": [[detected_box]]
}
record = model_3d(annot)[image_path]
extrinsics = model_3d.get_extrinsics_as_list(record['kpts_3d_pred'])
records = model_3d(annot)#[image_path]
record = model_3d.post_process(records, False)[image_path]
height = get_box_height(record['kpts_3d_pred'][0])
rotation = model_3d.get_rotation(record['kpts_3d_pred'])
cam2world = to_eg3d(rotation, height).flatten().tolist()
image_name = os.path.basename(image_path)
if stylegan_format:
......@@ -81,7 +89,16 @@ def main(cfg, data, dest, stylegan_format):
name = os.path.join(folder_num, image_name)
else:
name = image_name
dataset_json["labels"].append([name, extrinsics])
dataset_json["labels"].append([name, cam2world + intrinsics])
# # ----
# record['cam2world'] = cam2world
# record['rotation'] = rotation
# np.save(f"./camera_calib/results/{image_name.split('.')[0]}.npy", record)
# # ---
if max_images is not None:
if total >= max_images:
break
with open(dest, 'w') as f:
json.dump(dataset_json, f, indent=2)
......
import numpy as np
from scipy.spatial.transform import Rotation
from collections import namedtuple
import polyscope as ps
interp_dict = {
'bbox12':(np.array([1,3,5,7,# h direction
1,2,3,4,# l direction
1,2,5,6]), # w direction
np.array([2,4,6,8,
5,6,7,8,
3,4,7,8])
),
'bbox12l':(np.array([1,2,3,4,]), # w direction
np.array([5,6,7,8])
),
'bbox12h':(np.array([1,3,5,7]), # w direction
np.array([2,4,6,8])
),
'bbox12w':(np.array([1,2,5,6]), # w direction
np.array([3,4,7,8])
),
}
def get_box_height(prediction):
parents = prediction[interp_dict['bbox12'][0] - 1]
children = prediction[interp_dict['bbox12'][1] - 1]
lines = parents - children
lines = np.sqrt(np.sum(lines**2, axis=1))
h = np.sum(lines[:4])/4
return h
def get_transform_in_sys(object_sys, transformation):
to_world = np.eye(4)
to_world[:3, -1] = - object_sys.origin
to_sys_center = np.eye(4)
to_sys_center[:3, -1] = object_sys.origin
algin_axes_to_world = np.eye(4)
algin_axes_to_world[:3, :3] = object_sys.basis
return to_sys_center @ np.linalg.inv(algin_axes_to_world) @ transformation @ algin_axes_to_world @ to_world
def to_eg3d(extrinsics, height):
eg3d_camera_sys = CoordSystem(
basis=np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
origin=np.array([[0, 0, 0]])
)
translation = np.eye(4, dtype=np.float32)
translation[2, -1] = 2.7
object_origin = transform_points(eg3d_camera_sys, eg3d_camera_sys.origin, translation @ extrinsics)
object_basis = transform_points(eg3d_camera_sys, eg3d_camera_sys.basis + eg3d_camera_sys.origin, translation @ extrinsics) - object_origin
object_sys = CoordSystem(
basis=object_basis,
origin=object_origin
)
# Object system to EG3D object system
rot_x = np.eye(4, dtype=np.float32)
rot_x[:3, :3] = Rotation.from_rotvec([90, 0, 0], degrees=True).as_matrix()
inv_z = np.eye(4, dtype=np.float32)
inv_z[2,2] = -1
offset = np.eye(4, dtype=np.float32)
offset[1, -1] = - 0.5 * height
M = get_transform_in_sys(object_sys, offset @inv_z @ rot_x)
cam2world = M @ translation @ extrinsics
return cam2world
ObjectBox = namedtuple('ObjectBox', ['coord_system', 'vertices', 'faces', 'name'])
CoordSystem = namedtuple('CoordSystem', ['basis', 'origin'])
faces = np.array([[3, 2, 0],
[3, 1, 0],
[3, 2, 6],
[3, 7, 6],
[1, 0, 4],
[1, 5, 4],
[7, 5, 4],
[7, 6, 4],
[7, 5, 1],
[7, 3, 1],
[6, 2, 0],
[6, 4, 0]
])
def transform_coord_system(coord_system: CoordSystem, transformation):
new_origin = (transformation @ np.block([coord_system.origin, 1.0]).T)
new_origin /= new_origin[-1]
new_origin = new_origin[:3].T
new_basis = (transformation @ np.block([coord_system.basis, np.ones((coord_system.basis.shape[0], 1))]).T).T
new_basis = new_basis[:, :3] - new_origin
return CoordSystem(new_basis, new_origin)
def create_canonical_box(l=2, h=1, w=1, prediction=None):
if prediction is not None:
parents = prediction[interp_dict['bbox12'][0] - 1]
children = prediction[interp_dict['bbox12'][1] - 1]
lines = parents - children
lines = np.sqrt(np.sum(lines**2, axis=1))
# averaged over the four parallel line segments
h, l, w = np.sum(lines[:4])/4, np.sum(lines[4:8])/4, np.sum(lines[8:])/4
x_corners = [l, l, l, l, 0, 0, 0, 0]
y_corners = [0, h, 0, h, 0, h, 0, h]
z_corners = [w, w, 0, 0, w, w, 0, 0]
x_corners -= np.float32(l) / 2.0
y_corners -= np.float32(h)
z_corners -= np.float32(w) / 2.0
corners_3d = np.array([x_corners, y_corners, z_corners])
return corners_3d.T
def create_box(transformation, coord_system, vertices, faces, name):
new_vertices = (transformation @ np.block([vertices, np.ones((vertices.shape[0], 1))]).T).T
new_vertices = new_vertices[:, :3]
new_coord_system = transform_coord_system(coord_system, transformation)
return ObjectBox(new_coord_system, new_vertices, faces, name)
def add_box(box: ObjectBox, color):
ps.register_surface_mesh(box.name, box.vertices, box.faces, color=color, transparency=0.6)
add_coords(box.name, box.coord_system)
def add_coords(name: str, coord_system: CoordSystem):
coord_sys = ps.register_point_cloud(f"{name} sys", coord_system.origin, radius=0.02, color=(1, 1, 1))
coord_sys.add_vector_quantity("X", coord_system.basis[0][None, :], length=0.1, enabled=True, radius=0.01, color=(1, 0, 0))
coord_sys.add_vector_quantity("Y", coord_system.basis[1][None, :], length=0.1, enabled=True, radius=0.01, color=(0, 1, 0))
coord_sys.add_vector_quantity("Z", coord_system.basis[2][None, :], length=0.1, enabled=True, radius=0.01, color=(0, 0, 1))
def create_egonet2eg3d():
egonet2eg3d = np.zeros((4, 4), dtype=np.float32)
roty = np.zeros((4, 4), dtype=np.float32)
rotx = np.zeros((4, 4), dtype=np.float32)
roty[:3, :3] = Rotation.from_rotvec([0, 90, 0], degrees=True).as_matrix()
roty[3, 3] = 1.0
rotx[:3, :3] = Rotation.from_rotvec([-90, 0, 0], degrees=True).as_matrix()
rotx[3, 3] = 1.0
egonet2eg3d = rotx @ roty
return egonet2eg3d
def transform_points(object_sys, points, transformation):
to_world = np.eye(4)
to_world[:3, -1] = - object_sys.origin
to_sys_center = np.eye(4)
to_sys_center[:3, -1] = object_sys.origin
algin_axes_to_world = np.eye(4)
algin_axes_to_world[:3, :3] = object_sys.basis
M = to_sys_center @ np.linalg.inv(algin_axes_to_world) @ transformation @ algin_axes_to_world @ to_world
new_points = (M @ np.block([points, np.ones((points.shape[0], 1))]).T).T
new_points = new_points[:, :3]
return new_points
if __name__ == "__main__":
record = np.load("./camera_calib/results/img00001075.npy", allow_pickle=True).item()
rotation = record["rotation"].reshape(4, 4)
rotation[:3, -1] = np.zeros(3)
h = get_box_height(record['kpts_3d_pred'][0])
cam2world = to_eg3d(rotation, h)
eg3d_camera_sys = CoordSystem(
basis=np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]),
origin=np.array([[0, 0, 0]])
)
translation = np.eye(4, dtype=np.float32)
translation[2, -1] = 2.7
box1075 = create_box(translation @ rotation,
eg3d_camera_sys,
create_canonical_box(prediction=record['kpts_3d_pred'][0]),
faces,
"car 1075")
ps.init()
ps.set_ground_plane_mode("none")
ps.set_up_dir('neg_y_up')
add_coords('EG3D', eg3d_camera_sys)
add_coords("EG3D object system", transform_coord_system(eg3d_camera_sys, cam2world))
add_box(box1075, color=(0, 1, 0))
ps.show()
\ No newline at end of file
......@@ -252,8 +252,8 @@ class EgoNet(nn.Module):
z_corners = [w, w, 0, 0, w, w, 0, 0]
x_corners += - np.float32(l) / 2
# TODO: this was not like this!!!
y_corners += - np.float32(h)
# y_corners += - np.float32(h/2)
# y_corners += - np.float32(h)
y_corners += - np.float32(h) / 2
z_corners += - np.float32(w) / 2
corners_3d = np.array([x_corners, y_corners, z_corners])
......@@ -298,19 +298,16 @@ class EgoNet(nn.Module):
translation = predictions[:, 0, :]
return angles, translation
def get_extrinsics_as_list(self, kpts_3d_pred):
def get_rotation(self, kpts_3d_pred):
""" Returns flattened extrinsic matrix of the 3D cloud points in kpts_3d_pred
"""
kpts_3d_pred = kpts_3d_pred.reshape(len(kpts_3d_pred), -1, 3)
prediction = kpts_3d_pred[0]
template = self.get_template(prediction)
# TODO: May need to change to yxz somehow
rotation, _ = ltr.compute_rigid_transform(template, prediction.T)
translation = kpts_3d_pred[0, 0, :] # First element is the translation ???
extrinsics = np.block([[rotation, translation[:, None]],
[np.array([0.0, 0.0, 0.0, 1.0])]
])
return extrinsics.flatten().tolist()
extrinsics = np.eye(4, dtype=np.float32)
extrinsics[:3, :3] = rotation
return extrinsics
def gather_lifting_results(self,
......@@ -399,9 +396,12 @@ class EgoNet(nn.Module):
)
# save KITTI-style prediction file in .txt format
save_txt_file(img_path, record, save_dict)
# save_txt_file(img_path, record, save_dict)
return record
def estimate_pose(self, record):
self.get_6d_rep()
def post_process(self,
records,
visualize=False,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment