data_storage.cpp 13.5 KB
Newer Older
1 2
/* ALTA --- Analysis of Bidirectional Reflectance Distribution Functions

3
   Copyright (C) 2013, 2014, 2015, 2016, 2017 Inria
4
   Copyright (C) 2017, CNRS
5 6 7 8 9 10 11 12 13 14 15 16 17

   This file is part of ALTA.

   This Source Code Form is subject to the terms of the Mozilla Public
   License, v. 2.0.  If a copy of the MPL was not distributed with this
   file, You can obtain one at http://mozilla.org/MPL/2.0/.  */

#include "data.h"
#include "data_storage.h"
#include "vertical_segment.h"

#include <iostream>
#include <limits>
18
#include <iomanip>
19
#include <cassert>
20

21 22 23 24
#ifdef __GLIBC__
# include <endian.h>
#endif

25
using namespace alta;
26
using namespace Eigen;
27

28 29 30 31 32 33 34 35 36 37
// A deleter for arrays, to work around the lack of array support in C++11's
// 'shared_ptr':
// <http://stackoverflow.com/questions/8947579/why-isnt-there-a-stdshared-ptrt-specialisation#8947700>.
static void delete_array(double *thing)
{
    delete[] thing;
}


static bool cosine_correction(vecref v, unsigned int dimX, unsigned int dimY,
38 39 40 41
                              alta::params::input in_param)
{
    using namespace alta;

42 43
    assert(v.size() == dimX + dimY);

44 45 46
    // Correction of the data by 1/cosine(theta_L)
    double factor = 1.0;
    double cart[6];
47
    params::convert(&v(0), in_param, params::CARTESIAN, cart);
48 49 50 51 52
    if(cart[5] > 0.0 && cart[2] > 0.0)
    {
        factor = 1.0/cart[5]*cart[2];
        for(unsigned int i = 0; i < dimY; ++i)
        {
53 54 55 56
            std::cout << "i = " << i << " dimx = " << dimX
                      << " + = " << i + dimX << " | " << v.size()
                      << std::endl;
            v(i + dimX) /= factor;
57 58 59 60 61 62 63
        }
        return true;
    }
    else return false;
}

// Return true if V is between MIN and MAX.
64 65
static bool within_bounds(const vecref v,
                          const vec& min, const vec& max)
66
{
67 68
    return (v.array() >= min.array()).all()
        && (v.array() <= max.array()).all();
69 70
}

71 72 73 74 75 76 77 78 79
// Return the 'ci_kind' value corresponding to VS_VALUE, an integer found in
// a '#VS' header.
static vertical_segment::ci_kind ci_kind_from_number(int vs_value)
{
    return vs_value == 2 ? vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL
        : (vs_value == 1 ? vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL
           : vertical_segment::NO_CONFIDENCE_INTERVAL);
}

80 81 82 83 84 85 86 87
// Return the number used to represent KIND in '#VS' headers.
static int number_from_ci_kind(vertical_segment::ci_kind kind)
{
    return kind == vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL
        ? 2 : (kind == vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL
               ? 1 : 0);
}

88 89
// Read a confidence interval on the output parameters from INPUT into V.
static void read_confidence_interval(std::istream& input,
90 91
                                     vecref v,
                                     vertical_segment::ci_kind kind,
92 93 94 95
                                     unsigned int dimX,
                                     unsigned int dimY,
                                     const alta::arguments& args)
{
96 97
    assert(v.size() == dimX + 3 * dimY);

98 99 100 101
    for(int i=0; i<dimY; ++i)
    {
        double min_dt = 0.0, max_dt = 0.0;

102
        if(i == 0 && kind == vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL)
103 104 105
        {
            input >> min_dt ;
            input >> max_dt ;
106 107 108
            //min_dt = min_dt-v(dimX + i);
            //max_dt = max_dt-v(dimX + i);
            min_dt = -min_dt;
109
        }
110
        else if(i == 0 && kind == vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL)
111 112 113 114 115 116 117 118 119
        {
            double dt ;
            input >> dt ;
            min_dt = -dt;
            max_dt =  dt;
        }
        else
        {
            // Confidence interval data not provided in INPUT.
120
            double dt = args.get_double("dt", 0.1);
121 122 123 124 125 126
            min_dt = -dt;
            max_dt =  dt;
        }

        if(args.is_defined("dt-relative"))
        {
127 128
            v(dimX +   dimY+i) = v(dimX + i) * (1.0 + min_dt) ;
            v(dimX + 2*dimY+i) = v(dimX + i) * (1.0 + max_dt) ;
129 130 131
        }
        else if(args.is_defined("dt-max"))
        {
132 133
            v(dimX +   dimY+i) = v(dimX + i) + std::max(v(dimX + i) * min_dt, min_dt);
            v(dimX + 2*dimY+i) = v(dimX + i) + std::max(v(dimX + i) * max_dt, max_dt);
134 135 136
        }
        else
        {
137 138
            v(dimX +   dimY+i) = v(dimX + i) + min_dt ;
            v(dimX + 2*dimY+i) = v(dimX + i) + max_dt ;
139 140 141 142 143 144 145
        }

        // You can enforce the vertical segment to stay in the positive
        // region using the --data-positive command line argument. Note
        // that the data point is also clamped to zero if negative.
        if(args.is_defined("dt-positive"))
        {
146 147 148
            v(dimX +        i) = std::max(v(dimX +        i), 0.0);
            v(dimX +   dimY+i) = std::max(v(dimX +   dimY+i), 0.0);
            v(dimX + 2*dimY+i) = std::max(v(dimX + 2*dimY+i), 0.0);
149 150 151 152 153 154 155 156
        }

#ifdef DEBUG
        std::cout << "<<DEBUG>> vs = [" << v[dimX +   dimY+i] << ", " << v[dimX + 2*dimY+i] << "]" << std::endl;
#endif
    }
}

157
alta::data* alta::load_data_from_text(std::istream& input,
158 159
                                      const alta::arguments& header,
                                      const alta::arguments& args)
160
{
161 162
  vec min, max ;
  vec ymin, ymax;
163

164 165 166 167 168
  if(! header.is_defined("DIM")) {
    std::cerr << "<<ERROR>> Undefined dimensions ! ";
    std::cerr << "Please add DIM [int] [int] into the file header." << std::endl;
    throw;
  }
169

170 171
  params::input  in_param  = params::parse_input(header.get_string("PARAM_IN", "UNKNOWN_INPUT"));
  params::output out_param = params::parse_output(header.get_string("PARAM_OUT", "UNKNOWN_OUTPUT"));
172

173
  std::pair<int, int> dim = header.get_pair<int>("DIM");
174
  size_t row_count = dim.first + 3 * dim.second;
175

176 177
  min = args.get_vec("min", dim.first, -std::numeric_limits<float>::max()) ;
  max = args.get_vec("max", dim.first,  std::numeric_limits<float>::max()) ;
178
#ifdef DEBUG
179
  std::cout << "<<DEBUG>> data will remove outside of " << min << " -> " << max << " x-interval" << std::endl;
180 181
#endif

182 183
  ymin = args.get_vec("ymin", dim.second, -std::numeric_limits<float>::max()) ;
  ymax = args.get_vec("ymax", dim.second,  std::numeric_limits<float>::max()) ;
184
#ifdef DEBUG
185
  std::cout << "<<DEBUG>> data will remove outside of " << ymin << " -> " << ymax << " y-interval" << std::endl;
186 187
#endif

188
  auto kind = ci_kind_from_number(header.get_int("VS"));
189
  std::vector<double> content;
190

191 192
  std::cout << "<<INFO>> Starting to load file ... " << std::endl;

193 194 195 196 197 198 199 200 201 202 203 204 205 206
  // Now read the body.
  while(input.good())
  {
    std::string line ;
    std::getline(input, line) ;
    std::stringstream linestream(line) ;

    // Discard comments and empty lines.
    if(line.empty() || linestream.peek() == '#')
    {
      continue ;
    }
    else
    {
207 208
      auto start = content.size();

209
      // Read the data point x and y coordinates
210
      for(int i=0; i < dim.first + dim.second; ++i)
211
      {
212 213 214
          double item;
          linestream >> item;
          content.push_back(item);
215 216
      }

217 218 219 220 221
      // Save room for the confidence interval.
      for (int i = 0; i < 2 * dim.second; i++)
          content.push_back(0.);

      Map<VectorXd> v(&content[start], row_count);
222 223 224 225 226 227 228

      // Read the confidence interval data if available.
      read_confidence_interval(linestream, v, kind,
                               dim.first, dim.second, args);

      // Check if we need to filter out what we just read according to ARGS.
      // TODO: Move filtering to a post-parsing operation on 'data'.
229 230 231
      if (!(within_bounds(v.segment(0, dim.first), min, max)
            && within_bounds(v.segment(dim.first, dim.second),
                             ymin, ymax)))
232 233 234 235
      {
          content.resize(start);
      }
      else if (args.is_defined("data-correct-cosine"))
236
      {
237 238
          if (!cosine_correction(v.segment(0, dim.first + dim.second),
                                 dim.first, dim.second, in_param))
239
              content.resize(start);
240 241 242 243 244
      }
    }
  }

  std::cout << "<<INFO>> loaded input stream" << std::endl ;
245
  std::cout << "<<INFO>> loaded data input of R^"
246 247
            << dim.first
            << " -> R^" << dim.second << std::endl ;
248 249

  std::cout << "<<DEBUG>> " << content.size() << " double loaded. " << std::endl ;
250

251 252 253 254

  double *raw_content = new double[content.size()];
  memcpy(raw_content, content.data(), content.size() * sizeof(double));

255
  parameters param(dim.first, dim.second, in_param, out_param);
256 257 258 259
  size_t element_count = content.size() / row_count;
  data* result = new vertical_segment(param, element_count,
                                      std::shared_ptr<double>(raw_content,
                                                                            delete_array));
260 261 262
  if(args.is_defined("data-correct-cosine"))
      result->save("/tmp/data-corrected.dat");

263 264
  std::cout << "<<INFO>> " << element_count << " elements (rows) loaded" << std::endl ;

265
  return result;
266 267
}

268
void alta::save_data_as_text(std::ostream& out, const alta::data &data)
269
{
Romain Pacanowski's avatar
Romain Pacanowski committed
270
        using namespace alta;
271

272 273 274 275 276 277 278 279 280 281 282 283 284
    out << "#DIM " << data.parametrization().dimX() << " " << data.parametrization().dimY() << std::endl;
    out << "#PARAM_IN  "
        << params::get_name(data.parametrization().input_parametrization())
        << std::endl;
    out << "#PARAM_OUT "
        << params::get_name(data.parametrization().output_parametrization())
        << std::endl;

    for(int i=0; i < data.size(); ++i)
    {
        vec x = data.get(i);
        for(int j=0; j< data.parametrization().dimX() + data.parametrization().dimY(); ++j)
        {
Romain Pacanowski's avatar
Romain Pacanowski committed
285 286
                    out << std::setprecision(std::numeric_limits<double>::digits10)
                        << x[j] << "\t";
287 288 289
        }
        out << std::endl;
    }
290
}
291

292
void alta::save_data_as_binary(std::ostream &out, const alta::data& data)
293
{
294 295
    using namespace alta;

296 297 298 299
    auto maybe_vs = dynamic_cast<const vertical_segment*>(&data);
    auto kind = maybe_vs != NULL
        ? maybe_vs->confidence_interval_kind()
        : vertical_segment::NO_CONFIDENCE_INTERVAL;
300

301 302 303 304 305 306 307 308 309 310 311
    out << "#DIM " << data.parametrization().dimX() << " " << data.parametrization().dimY() << std::endl;
    out << "#PARAM_IN  "
        << params::get_name(data.parametrization().input_parametrization())
        << std::endl;
    out << "#PARAM_OUT "
        << params::get_name(data.parametrization().output_parametrization())
        << std::endl;
    out << "#FORMAT binary" << std::endl;
    out << "#VERSION 0" << std::endl;
    out << "#PRECISION ieee754-double" << std::endl;
    out << "#SAMPLE_COUNT " << data.size() << std::endl;
312
    out << "#VS " << number_from_ci_kind(kind) << std::endl;
313 314 315 316

    // FIXME: Note: on non-glibc systems, both macros may be undefined, so
    // the conditional is equivalent to "#if 0 == 0", which is usually what
    // we want.
317
#if __BYTE_ORDER == __LITTLE_ENDIAN
318
    out << "#ENDIAN little" << std::endl;
319
#else
320
    out << "#ENDIAN big" << std::endl;
321 322
#endif

323
    out << "#BEGIN_STREAM" << std::endl;
324

325 326 327 328 329 330 331 332 333 334 335 336 337
    if (kind == vertical_segment::NO_CONFIDENCE_INTERVAL)
    {
        // No confidence interval to be saved.
        for(int i=0; i < data.size(); ++i)
        {
            vec sample = data.get(i);
            const double *numbers = sample.data();

            assert(sample.size() == data.parametrization().dimX() + data.parametrization().dimY());
            out.write((const char *)numbers, sample.size() * sizeof(*numbers));
        }
    }
    else
338
    {
339 340 341
        // Saving data along with the confidence interval.
        auto matrix = maybe_vs->matrix_view();
        auto byte_count = matrix.cols() * matrix.rows() * sizeof(double);
342

343 344 345
        // MATRIX has no stride so we can access the underlying storage
        // directly.
        out.write((const char *)matrix.data(), byte_count);
346
    }
347

348
    out << std::endl << "#END_STREAM" << std::endl;
349
}
350

351
alta::data* alta::load_data_from_binary(std::istream& in, const alta::arguments& header)
352
{
353
    using namespace alta;
354

355 356 357 358
    // FIXME: For now we make a number of assumptions.
    assert(header["FORMAT"] == "binary");
    assert(header.get_int("VERSION") == 0);
    assert(header["PRECISION"] == "ieee754-double");
359
#if __BYTE_ORDER == __LITTLE_ENDIAN
360
    assert(header["ENDIAN"] == "little");
361
#else
362
    assert(header["ENDIAN"] == "big");
363 364
#endif

365 366
    std::pair<int, int> dim = header.get_pair<int>("DIM");
    assert(dim.first > 0 && dim.second > 0);
367

368 369
    auto kind = ci_kind_from_number(header.get_int("VS"));

370
    in.exceptions(std::ios_base::failbit);
371

372
    int sample_count = header.get_int("SAMPLE_COUNT");
373 374 375
      if(sample_count <= 0) {
         std::cerr << "<<ERROR>> Uncorrect or not samples count in the header, please check \'SAMPLE_COUNT\'" << std::endl;
      }
376

377 378 379 380 381 382 383 384 385
      size_t ci_rows =
          kind == vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL
          ? 2 : (kind == vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL
                 ? 1 : 0);

      size_t rows = dim.first + dim.second + ci_rows * dim.second;
      size_t element_count = sample_count * rows;
      double *content = new double[element_count];
      size_t byte_count = element_count * sizeof *content;
386

387 388 389 390
      // TODO: Arrange to use mmap and make it zero-alloc and zero-copy.
      for (std::streamsize total = 0;
           total < byte_count && !in.eof();
           total += in.gcount())
391
      {
392
          in.read((char *) content + total, byte_count - total);
393
      }
394

395 396 397 398
    parameters param(dim.first, dim.second,
                     params::parse_input(header["PARAM_IN"]),
                     params::parse_output(header["PARAM_OUT"]));

399 400
    return new alta::vertical_segment(param, sample_count,
                                      std::shared_ptr<double>(content,
401 402
                                                              delete_array),
                                      kind);
403
}