Commit 5b8c754d authored by jphilion's avatar jphilion
Browse files

release

parent 88fbfcba
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# other things
*.jpg
*.png
*.pt
*.json
runs/
goodmodel.pt.bak
*.gif
storage/
imagenet_pretrained/
*.csv
NVIDIA Source Code License for Lift, Splat, Shoot
1. Definitions
“Licensor” means any person or entity that distributes its Work.
“Software” means the original work of authorship made available under this License.
“Work” means the Software and any additions to or derivative works of the Software that are made available under this License.
The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
2. License Grant
2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
3. Limitations
3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately.
3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License.
3.6 Termination. If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately.
4. Disclaimer of Warranty.
THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
5. Limitation of Liability.
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
# Lift, Splat, Shoot: Encoding Images From Arbitrary Camera Rigs by Implicitly Unprojecting to 3D
PyTorch code for Lift-Splat-Shoot (ECCV 2020).
**Lift, Splat, Shoot: Encoding Images From Arbitrary Camera Rigs by Implicitly Unprojecting to 3D**
Jonah Philion, [Sanja Fidler](http://www.cs.toronto.edu/~fidler/)\
ECCV, 2020 (Poster)\
**[[Paper](https://arxiv.org/abs/2008.05711)] [[Project Page](https://nv-tlabs.github.io/lift-splat-shoot/)] [[10-min video](https://youtu.be/oL5ISk6BnDE)] [[1-min video](https://youtu.be/ypQQUG4nFJY)]**
**Abstract:**
The goal of perception for autonomous vehicles is to extract semantic representations from multiple sensors and fuse these representations into a single "bird's-eye-view" coordinate frame for consumption by motion planning. We propose a new end-to-end architecture that directly extracts a bird's-eye-view representation of a scene given image data from an arbitrary number of cameras. The core idea behind our approach is to "lift" each image individually into a frustum of features for each camera, then "splat" all frustums into a rasterized bird's-eye-view grid. By training on the entire camera rig, we provide evidence that our model is able to learn not only how to represent images but how to fuse predictions from all cameras into a single cohesive representation of the scene while being robust to calibration error. On standard bird's-eye-view tasks such as object segmentation and map segmentation, our model outperforms all baselines and prior work. In pursuit of the goal of learning dense representations for motion planning, we show that the representations inferred by our model enable interpretable end-to-end motion planning by "shooting" template trajectories into a bird's-eye-view cost map output by our network. We benchmark our approach against models that use oracle depth from lidar. Project page: [https://nv-tlabs.github.io/lift-splat-shoot/](https://nv-tlabs.github.io/lift-splat-shoot/).
### Citation
If you found this codebase useful in your research, please consider citing
```
@inproceedings{philion2020lift,
title={Lift, Splat, Shoot: Encoding Images From Arbitrary Camera Rigs by Implicitly Unprojecting to 3D},
author={Jonah Philion and Sanja Fidler},
booktitle={Proceedings of the European Conference on Computer Vision},
year={2020},
}
```
### Preparation
Download nuscenes data from [https://www.nuscenes.org/](https://www.nuscenes.org/). Install dependencies.
```
pip install nuscenes-devkit tensorboardY efficientnet_pytorch
```
### Pre-trained Model
Download a pre-trained BEV vehicle segmentation model from here: [https://drive.google.com/file/d/18fy-6beTFTZx5SrYLs9Xk7cY-fGSm7kw/view?usp=sharing](https://drive.google.com/file/d/18fy-6beTFTZx5SrYLs9Xk7cY-fGSm7kw/view?usp=sharing)
| Vehicle IOU (reported in paper) | Vehicle IOU (this repository) |
|:-------------:|:-------------:|
| 32.07 | 33.03 |
### Evaluate a model
Evaluate the IOU of a model on the nuScenes validation set:
```
python main.py eval_model_iou mini/trainval --modelf=MODEL_LOCATION --dataroot=NUSCENES_ROOT
```
### Visualize Predictions
Visualize the BEV segmentation output by a model:
```
python main.py viz_model_preds mini/trainval --modelf=MODEL_LOCATION --dataroot=NUSCENES_ROOT --map_folder=NUSCENES_MAP_ROOT
```
<img src="./imgs/eval.gif">
### Visualize Input/Output Data (optional)
Run a visual check to make sure extrinsics/intrinsics are being parsed correctly. Left: input images with LiDAR scans projected using the extrinsics and intrinsics. Middle: the LiDAR scan that is projected. Right: X-Y projection of the point cloud generated by the lift-splat model. Pass `--viz_train=True` to view data augmentation.
```
python main.py lidar_check mini/trainval --dataroot=NUSCENES_ROOT --viz_train=False
```
<img src="./imgs/check.gif">
### Train a model (optional)
Train a model. Monitor with tensorboard.
```
python main.py train mini/trainval --dataroot=NUSCENES_ROOT --logdir=./runs --gpuid=0
tensorboard --logdir=./runs --bind_all
```
### Acknowledgements
Thank you to Sanja Fidler, as well as David Acuna, Daiqing Li, Amlan Kar, Jun Gao, Kevin, Xie, Karan Sapra, the NVIDIA AV Team, and NVIDIA Research for their help in making this research possible.
"""
Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
Authors: Jonah Philion and Sanja Fidler
"""
from . import explore, train
\ No newline at end of file
"""
Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
Authors: Jonah Philion and Sanja Fidler
"""
import torch
import os
import numpy as np
from PIL import Image
import cv2
from pyquaternion import Quaternion
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.splits import create_splits_scenes
from nuscenes.utils.data_classes import Box
from glob import glob
from .tools import get_lidar_data, img_transform, normalize_img, gen_dx_bx
class NuscData(torch.utils.data.Dataset):
def __init__(self, nusc, is_train, data_aug_conf, grid_conf):
self.nusc = nusc
self.is_train = is_train
self.data_aug_conf = data_aug_conf
self.grid_conf = grid_conf
self.scenes = self.get_scenes()
self.ixes = self.prepro()
dx, bx, nx = gen_dx_bx(grid_conf['xbound'], grid_conf['ybound'], grid_conf['zbound'])
self.dx, self.bx, self.nx = dx.numpy(), bx.numpy(), nx.numpy()
self.fix_nuscenes_formatting()
print(self)
def fix_nuscenes_formatting(self):
"""If nuscenes is stored with trainval/1 trainval/2 ... structure, adjust the file paths
stored in the nuScenes object.
"""
# check if default file paths work
rec = self.ixes[0]
sampimg = self.nusc.get('sample_data', rec['data']['CAM_FRONT'])
imgname = os.path.join(self.nusc.dataroot, sampimg['filename'])
def find_name(f):
d, fi = os.path.split(f)
d, di = os.path.split(d)
d, d0 = os.path.split(d)
d, d1 = os.path.split(d)
d, d2 = os.path.split(d)
return di, fi, f'{d2}/{d1}/{d0}/{di}/{fi}'
# adjust the image paths if needed
if not os.path.isfile(imgname):
print('adjusting nuscenes file paths')
fs = glob(os.path.join(self.nusc.dataroot, 'samples/*/samples/CAM*/*.jpg'))
fs += glob(os.path.join(self.nusc.dataroot, 'samples/*/samples/LIDAR_TOP/*.pcd.bin'))
info = {}
for f in fs:
di, fi, fname = find_name(f)
info[f'samples/{di}/{fi}'] = fname
fs = glob(os.path.join(self.nusc.dataroot, 'sweeps/*/sweeps/LIDAR_TOP/*.pcd.bin'))
for f in fs:
di, fi, fname = find_name(f)
info[f'sweeps/{di}/{fi}'] = fname
for rec in self.nusc.sample_data:
if rec['channel'] == 'LIDAR_TOP' or (rec['is_key_frame'] and rec['channel'] in self.data_aug_conf['cams']):
rec['filename'] = info[rec['filename']]
def get_scenes(self):
# filter by scene split
split = {
'v1.0-trainval': {True: 'train', False: 'val'},
'v1.0-mini': {True: 'mini_train', False: 'mini_val'},
}[self.nusc.version][self.is_train]
scenes = create_splits_scenes()[split]
return scenes
def prepro(self):
samples = [samp for samp in self.nusc.sample]
# remove samples that aren't in this split
samples = [samp for samp in samples if
self.nusc.get('scene', samp['scene_token'])['name'] in self.scenes]
# sort by scene, timestamp (only to make chronological viz easier)
samples.sort(key=lambda x: (x['scene_token'], x['timestamp']))
return samples
def sample_augmentation(self):
H, W = self.data_aug_conf['H'], self.data_aug_conf['W']
fH, fW = self.data_aug_conf['final_dim']
if self.is_train:
resize = np.random.uniform(*self.data_aug_conf['resize_lim'])
resize_dims = (int(W*resize), int(H*resize))
newW, newH = resize_dims
crop_h = int((1 - np.random.uniform(*self.data_aug_conf['bot_pct_lim']))*newH) - fH
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
if self.data_aug_conf['rand_flip'] and np.random.choice([0, 1]):
flip = True
rotate = np.random.uniform(*self.data_aug_conf['rot_lim'])
else:
resize = max(fH/H, fW/W)
resize_dims = (int(W*resize), int(H*resize))
newW, newH = resize_dims
crop_h = int((1 - np.mean(self.data_aug_conf['bot_pct_lim']))*newH) - fH
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
rotate = 0
return resize, resize_dims, crop, flip, rotate
def get_image_data(self, rec, cams):
imgs = []
rots = []
trans = []
intrins = []
post_rots = []
post_trans = []
for cam in cams:
samp = self.nusc.get('sample_data', rec['data'][cam])
imgname = os.path.join(self.nusc.dataroot, samp['filename'])
img = Image.open(imgname)
post_rot = torch.eye(2)
post_tran = torch.zeros(2)
sens = self.nusc.get('calibrated_sensor', samp['calibrated_sensor_token'])
intrin = torch.Tensor(sens['camera_intrinsic'])
rot = torch.Tensor(Quaternion(sens['rotation']).rotation_matrix)
tran = torch.Tensor(sens['translation'])
# augmentation (resize, crop, horizontal flip, rotate)
resize, resize_dims, crop, flip, rotate = self.sample_augmentation()
img, post_rot2, post_tran2 = img_transform(img, post_rot, post_tran,
resize=resize,
resize_dims=resize_dims,
crop=crop,
flip=flip,
rotate=rotate,
)
# for convenience, make augmentation matrices 3x3
post_tran = torch.zeros(3)
post_rot = torch.eye(3)
post_tran[:2] = post_tran2
post_rot[:2, :2] = post_rot2
imgs.append(normalize_img(img))
intrins.append(intrin)
rots.append(rot)
trans.append(tran)
post_rots.append(post_rot)
post_trans.append(post_tran)
return (torch.stack(imgs), torch.stack(rots), torch.stack(trans),
torch.stack(intrins), torch.stack(post_rots), torch.stack(post_trans))
def get_lidar_data(self, rec, nsweeps):
pts = get_lidar_data(self.nusc, rec,
nsweeps=nsweeps, min_distance=2.2)
return torch.Tensor(pts)[:3] # x,y,z
def get_binimg(self, rec):
egopose = self.nusc.get('ego_pose',
self.nusc.get('sample_data', rec['data']['LIDAR_TOP'])['ego_pose_token'])
trans = -np.array(egopose['translation'])
rot = Quaternion(egopose['rotation']).inverse
img = np.zeros((self.nx[0], self.nx[1]))
for tok in rec['anns']:
inst = self.nusc.get('sample_annotation', tok)
# add category for lyft
if not inst['category_name'].split('.')[0] == 'vehicle':
continue
box = Box(inst['translation'], inst['size'], Quaternion(inst['rotation']))
box.translate(trans)
box.rotate(rot)
pts = box.bottom_corners()[:2].T
pts = np.round(
(pts - self.bx[:2] + self.dx[:2]/2.) / self.dx[:2]
).astype(np.int32)
pts[:, [1, 0]] = pts[:, [0, 1]]
cv2.fillPoly(img, [pts], 1.0)
return torch.Tensor(img).unsqueeze(0)
def choose_cams(self):
if self.is_train and self.data_aug_conf['Ncams'] < len(self.data_aug_conf['cams']):
cams = np.random.choice(self.data_aug_conf['cams'], self.data_aug_conf['Ncams'],
replace=False)
else:
cams = self.data_aug_conf['cams']
return cams
def __str__(self):
return f"""NuscData: {len(self)} samples. Split: {"train" if self.is_train else "val"}.
Augmentation Conf: {self.data_aug_conf}"""
def __len__(self):
return len(self.ixes)
class VizData(NuscData):
def __init__(self, *args, **kwargs):
super(VizData, self).__init__(*args, **kwargs)
def __getitem__(self, index):
rec = self.ixes[index]
cams = self.choose_cams()
imgs, rots, trans, intrins, post_rots, post_trans = self.get_image_data(rec, cams)
lidar_data = self.get_lidar_data(rec, nsweeps=3)
binimg = self.get_binimg(rec)
return imgs, rots, trans, intrins, post_rots, post_trans, lidar_data, binimg
class SegmentationData(NuscData):
def __init__(self, *args, **kwargs):
super(SegmentationData, self).__init__(*args, **kwargs)
def __getitem__(self, index):
rec = self.ixes[index]
cams = self.choose_cams()
imgs, rots, trans, intrins, post_rots, post_trans = self.get_image_data(rec, cams)
binimg = self.get_binimg(rec)
return imgs, rots, trans, intrins, post_rots, post_trans, binimg
def worker_rnd_init(x):
np.random.seed(13 + x)
def compile_data(version, dataroot, data_aug_conf, grid_conf, bsz,
nworkers, parser_name):
nusc = NuScenes(version='v1.0-{}'.format(version),
dataroot=os.path.join(dataroot, version),
verbose=False)
parser = {
'vizdata': VizData,
'segmentationdata': SegmentationData,
}[parser_name]
traindata = parser(nusc, is_train=True, data_aug_conf=data_aug_conf,
grid_conf=grid_conf)
valdata = parser(nusc, is_train=False, data_aug_conf=data_aug_conf,
grid_conf=grid_conf)
trainloader = torch.utils.data.DataLoader(traindata, batch_size=bsz,
shuffle=True,
num_workers=nworkers,
drop_last=True,
worker_init_fn=worker_rnd_init)
valloader = torch.utils.data.DataLoader(valdata, batch_size=bsz,
shuffle=False,
num_workers=nworkers)
return trainloader, valloader
"""
Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
Authors: Jonah Philion and Sanja Fidler
"""
import torch
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.patches as mpatches
from .data import compile_data
from .tools import (ego_to_cam, get_only_in_img_mask, denormalize_img,
SimpleLoss, get_val_info, add_ego, gen_dx_bx,
get_nusc_maps, plot_nusc_map)
from .models import compile_model
def lidar_check(version,
dataroot='/data/nuscenes',
show_lidar=True,
viz_train=False,
nepochs=1,
H=900, W=1600,
resize_lim=(0.193, 0.225),
final_dim=(128, 352),
bot_pct_lim=(0.0, 0.22),
rot_lim=(-5.4, 5.4),
rand_flip=True,
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0],
dbound=[4.0, 45.0, 1.0],
bsz=1,
nworkers=10,
):
grid_conf = {
'xbound': xbound,
'ybound': ybound,
'zbound': zbound,
'dbound': dbound,
}
cams = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT']
data_aug_conf = {
'resize_lim': resize_lim,
'final_dim': final_dim,
'rot_lim': rot_lim,
'H': H, 'W': W,
'rand_flip': rand_flip,
'bot_pct_lim': bot_pct_lim,
'cams': cams,
'Ncams': 5,
}
trainloader, valloader = compile_data(version, dataroot, data_aug_conf=data_aug_conf,
grid_conf=grid_conf, bsz=bsz, nworkers=nworkers,
parser_name='vizdata')
loader = trainloader if viz_train else valloader
model = compile_model(grid_conf, data_aug_conf, outC=1)
rat = H / W
val = 10.1
fig = plt.figure(figsize=(val + val/3*2*rat*3, val/3*2*rat))
gs = mpl.gridspec.GridSpec(2, 6, width_ratios=(1, 1, 1, 2*rat, 2*rat, 2*rat))
gs.update(wspace=0.0, hspace=0.0, left=0.0, right=1.0, top=1.0, bottom=0.0)
for epoch in range(nepochs):
for batchi, (imgs, rots, trans, intrins, post_rots, post_trans, pts, binimgs) in enumerate(loader):
img_pts = model.get_geometry(rots, trans, intrins, post_rots, post_trans)
for si in range(imgs.shape[0]):
plt.clf()
final_ax = plt.subplot(gs[:, 5:6])
for imgi, img in enumerate(imgs[si]):
ego_pts = ego_to_cam(pts[si], rots[si, imgi], trans[si, imgi], intrins[si, imgi])
mask = get_only_in_img_mask(ego_pts, H, W)
plot_pts = post_rots[si, imgi].matmul(ego_pts) + post_trans[si, imgi].unsqueeze(1)
ax = plt.subplot(gs[imgi // 3, imgi % 3])
showimg = denormalize_img(img)
plt.imshow(showimg)
if show_lidar:
plt.scatter(plot_pts[0, mask], plot_pts[1, mask], c=ego_pts[2, mask],
s=5, alpha=0.1, cmap='jet')
# plot_pts = post_rots[si, imgi].matmul(img_pts[si, imgi].view(-1, 3).t()) + post_trans[si, imgi].unsqueeze(1)
# plt.scatter(img_pts[:, :, :, 0].view(-1), img_pts[:, :, :, 1].view(-1), s=1)
plt.axis('off')
plt.sca(final_ax)
plt.plot(img_pts[si, imgi, :, :, :, 0].view(-1), img_pts[si, imgi, :, :, :, 1].view(-1), '.', label=cams[imgi].replace('_', ' '))