Commit 323f3860 authored by nviolante25's avatar nviolante25
Browse files

new annotator

parent 9999ea17
import click
import json
import os
import torch
import numpy as np
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.ops import box_area
from PIL import Image
from pathlib import Path
from libs.model.egonet import EgoNet
from libs.arguments.parse import read_yaml_file
from convert_to_eg3d import get_box_height, generate_cam2world
def angles_to_coords(roll, yaw, pitch):
x = -np.cos(yaw) * np.sin(pitch) * np.sin(roll) - np.sin(yaw) * np.cos(roll)
y = -np.sin(yaw) * np.sin(pitch) * np.sin(roll) + np.cos(yaw) * np.cos(roll)
z = np.cos(pitch) * np.sin(roll)
return x, y, z
def file_ext(name):
return str(name).split(".")[-1]
def is_image_ext(fname):
ext = file_ext(fname).lower()
return f".{ext}" in Image.EXTENSION
@click.command()
@click.option(
"--cfg",
type=str,
help="Configuration .yaml file",
default="./configs/custom.yml",
)
@click.option("--data", type=str, help="Dataset folder path", required=True)
@click.option(
"--dest", type=str, help="Destination .json file", default="./dataset.json"
)
@click.option(
"--stylegan-format",
type=bool,
help="True is image are stored following the format of StyleGAN, i.e, data/00000/img00000.png",
)
@click.option("--max-images", type=int, default=None)
def main(cfg, data, dest, stylegan_format, max_images):
cfg = read_yaml_file(cfg)
# cudnn related setting
torch.backends.cudnn.benchmark = cfg["cudnn"]["benchmark"]
torch.backends.cudnn.deterministic = cfg["cudnn"]["deterministic"]
torch.backends.cudnn.enabled = cfg["cudnn"]["enabled"]
# Initialize Ego-Net and load the pre-trained checkpoint
print("Loading models..")
model = maskrcnn_resnet50_fpn(pretrained=True).cuda()
model.eval()
model_3d = EgoNet(cfg, pre_trained=True)
model_3d = model_3d.eval().cuda()
# ---------------------------------------------------------------------
Image.init()
image_paths = (
str(f)
for f in Path(data).rglob("*")
if is_image_ext(f) and os.path.isfile(f)
)
dataset_json = {"labels": []}
total = 0
intrinsics = [4.2647, 0.0, 0.5, 0.0, 4.2647, 0.5, 0.0, 0.0, 1.0]
for image_path in image_paths:
total += 1
print(f"Processing {total} images")
try:
rgb_image = np.array(Image.open(image_path))
except:
continue
# 2D bounding box detection
image = rgb_image.transpose(2, 0, 1) / 255.0
image_tensor = torch.FloatTensor(image)[None, ...].cuda()
detections = model(image_tensor)[0]
areas = box_area(detections["boxes"])
idx = areas.argmax().item()
detected_box = (
detections["boxes"][idx].cpu().detach().numpy().astype(int)
)
# 3D bounding box detection
annot = {"path": [image_path], "boxes": [[detected_box]]}
records = model_3d(annot) # [image_path]
record = model_3d.post_process(records, False)[image_path]
# height = get_box_height(record["kpts_3d_pred"][0])
rotation = model_3d.get_rotation(record["kpts_3d_pred"])
cam2world = generate_cam2world(rotation).flatten().tolist()
image_name = os.path.basename(image_path)
if stylegan_format:
folder_num = image_path.split("/")[-2]
name = os.path.join(folder_num, image_name)
else:
name = image_name
dataset_json["labels"].append([name, cam2world + intrinsics])
# # ----
# record['cam2world'] = cam2world
# record['rotation'] = rotation
# np.save(f"./camera_calib/results/{image_name.split('.')[0]}.npy", record)
# # ---
if max_images is not None:
if total >= max_images:
break
with open(dest, "w") as f:
json.dump(dataset_json, f, indent=2)
if __name__ == "__main__":
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment