data_storage.cpp 13.3 KB
Newer Older
1 2
/* ALTA --- Analysis of Bidirectional Reflectance Distribution Functions

3
   Copyright (C) 2013, 2014, 2015, 2016, 2017 Inria
4 5 6 7 8 9 10 11 12 13 14 15 16

   This file is part of ALTA.

   This Source Code Form is subject to the terms of the Mozilla Public
   License, v. 2.0.  If a copy of the MPL was not distributed with this
   file, You can obtain one at http://mozilla.org/MPL/2.0/.  */

#include "data.h"
#include "data_storage.h"
#include "vertical_segment.h"

#include <iostream>
#include <limits>
17
#include <iomanip>
18
#include <cassert>
19

20 21 22 23
#ifdef __GLIBC__
# include <endian.h>
#endif

24
using namespace alta;
25
using namespace Eigen;
26

27 28 29 30 31 32 33 34 35 36
// A deleter for arrays, to work around the lack of array support in C++11's
// 'shared_ptr':
// <http://stackoverflow.com/questions/8947579/why-isnt-there-a-stdshared-ptrt-specialisation#8947700>.
static void delete_array(double *thing)
{
    delete[] thing;
}


static bool cosine_correction(vecref v, unsigned int dimX, unsigned int dimY,
37 38 39 40
                              alta::params::input in_param)
{
    using namespace alta;

41 42
    assert(v.size() == dimX + dimY);

43 44 45
    // Correction of the data by 1/cosine(theta_L)
    double factor = 1.0;
    double cart[6];
46
    params::convert(&v(0), in_param, params::CARTESIAN, cart);
47 48 49 50 51
    if(cart[5] > 0.0 && cart[2] > 0.0)
    {
        factor = 1.0/cart[5]*cart[2];
        for(unsigned int i = 0; i < dimY; ++i)
        {
52 53 54 55
            std::cout << "i = " << i << " dimx = " << dimX
                      << " + = " << i + dimX << " | " << v.size()
                      << std::endl;
            v(i + dimX) /= factor;
56 57 58 59 60 61 62
        }
        return true;
    }
    else return false;
}

// Return true if V is between MIN and MAX.
63 64
static bool within_bounds(const vecref v,
                          const vec& min, const vec& max)
65
{
66 67
    return (v.array() >= min.array()).all()
        && (v.array() <= max.array()).all();
68 69
}

70 71 72 73 74 75 76 77 78
// Return the 'ci_kind' value corresponding to VS_VALUE, an integer found in
// a '#VS' header.
static vertical_segment::ci_kind ci_kind_from_number(int vs_value)
{
    return vs_value == 2 ? vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL
        : (vs_value == 1 ? vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL
           : vertical_segment::NO_CONFIDENCE_INTERVAL);
}

79 80 81 82 83 84 85 86
// Return the number used to represent KIND in '#VS' headers.
static int number_from_ci_kind(vertical_segment::ci_kind kind)
{
    return kind == vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL
        ? 2 : (kind == vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL
               ? 1 : 0);
}

87 88
// Read a confidence interval on the output parameters from INPUT into V.
static void read_confidence_interval(std::istream& input,
89 90
                                     vecref v,
                                     vertical_segment::ci_kind kind,
91 92 93 94
                                     unsigned int dimX,
                                     unsigned int dimY,
                                     const alta::arguments& args)
{
95 96
    assert(v.size() == dimX + 3 * dimY);

97 98 99 100
    for(int i=0; i<dimY; ++i)
    {
        double min_dt = 0.0, max_dt = 0.0;

101
        if(i == 0 && kind == vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL)
102 103 104
        {
            input >> min_dt ;
            input >> max_dt ;
105 106
            min_dt = min_dt-v(dimX + i);
            max_dt = max_dt-v(dimX + i);
107
        }
108
        else if(i == 0 && kind == vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL)
109 110 111 112 113 114 115 116 117
        {
            double dt ;
            input >> dt ;
            min_dt = -dt;
            max_dt =  dt;
        }
        else
        {
            // Confidence interval data not provided in INPUT.
118
            double dt = args.get_double("dt", 0.1);
119 120 121 122 123 124
            min_dt = -dt;
            max_dt =  dt;
        }

        if(args.is_defined("dt-relative"))
        {
125 126
            v(dimX +   dimY+i) = v(dimX + i) * (1.0 + min_dt) ;
            v(dimX + 2*dimY+i) = v(dimX + i) * (1.0 + max_dt) ;
127 128 129
        }
        else if(args.is_defined("dt-max"))
        {
130 131
            v(dimX +   dimY+i) = v(dimX + i) + std::max(v(dimX + i) * min_dt, min_dt);
            v(dimX + 2*dimY+i) = v(dimX + i) + std::max(v(dimX + i) * max_dt, max_dt);
132 133 134
        }
        else
        {
135 136
            v(dimX +   dimY+i) = v(dimX + i) + min_dt ;
            v(dimX + 2*dimY+i) = v(dimX + i) + max_dt ;
137 138 139 140 141 142 143
        }

        // You can enforce the vertical segment to stay in the positive
        // region using the --data-positive command line argument. Note
        // that the data point is also clamped to zero if negative.
        if(args.is_defined("dt-positive"))
        {
144 145 146
            v(dimX +        i) = std::max(v(dimX +        i), 0.0);
            v(dimX +   dimY+i) = std::max(v(dimX +   dimY+i), 0.0);
            v(dimX + 2*dimY+i) = std::max(v(dimX + 2*dimY+i), 0.0);
147 148 149 150 151 152 153 154
        }

#ifdef DEBUG
        std::cout << "<<DEBUG>> vs = [" << v[dimX +   dimY+i] << ", " << v[dimX + 2*dimY+i] << "]" << std::endl;
#endif
    }
}

155
alta::data* alta::load_data_from_text(std::istream& input,
156 157
                                      const alta::arguments& header,
                                      const alta::arguments& args)
158
{
159 160
  vec min, max ;
  vec ymin, ymax;
161

162 163 164 165 166
  if(! header.is_defined("DIM")) {
    std::cerr << "<<ERROR>> Undefined dimensions ! ";
    std::cerr << "Please add DIM [int] [int] into the file header." << std::endl;
    throw;
  }
167

168 169
  params::input  in_param  = params::parse_input(header.get_string("PARAM_IN", "UNKNOWN_INPUT"));
  params::output out_param = params::parse_output(header.get_string("PARAM_OUT", "UNKNOWN_OUTPUT"));
170

171
  std::pair<int, int> dim = header.get_pair<int>("DIM");
172
  size_t row_count = dim.first + 3 * dim.second;
173

174 175
  min = args.get_vec("min", dim.first, -std::numeric_limits<float>::max()) ;
  max = args.get_vec("max", dim.first,  std::numeric_limits<float>::max()) ;
176
#ifdef DEBUG
177
  std::cout << "<<DEBUG>> data will remove outside of " << min << " -> " << max << " x-interval" << std::endl;
178 179
#endif

180 181
  ymin = args.get_vec("ymin", dim.second, -std::numeric_limits<float>::max()) ;
  ymax = args.get_vec("ymax", dim.second,  std::numeric_limits<float>::max()) ;
182
#ifdef DEBUG
183
  std::cout << "<<DEBUG>> data will remove outside of " << ymin << " -> " << ymax << " y-interval" << std::endl;
184 185
#endif

186
  auto kind = ci_kind_from_number(header.get_int("VS"));
187
  std::vector<double> content;
188

189 190 191 192 193 194 195 196 197 198 199 200 201 202
  // Now read the body.
  while(input.good())
  {
    std::string line ;
    std::getline(input, line) ;
    std::stringstream linestream(line) ;

    // Discard comments and empty lines.
    if(line.empty() || linestream.peek() == '#')
    {
      continue ;
    }
    else
    {
203 204
      auto start = content.size();

205
      // Read the data point x and y coordinates
206
      for(int i=0; i < dim.first + dim.second; ++i)
207
      {
208 209 210
          double item;
          linestream >> item;
          content.push_back(item);
211 212
      }

213 214 215 216 217
      // Save room for the confidence interval.
      for (int i = 0; i < 2 * dim.second; i++)
          content.push_back(0.);

      Map<VectorXd> v(&content[start], row_count);
218 219 220 221 222 223 224

      // Read the confidence interval data if available.
      read_confidence_interval(linestream, v, kind,
                               dim.first, dim.second, args);

      // Check if we need to filter out what we just read according to ARGS.
      // TODO: Move filtering to a post-parsing operation on 'data'.
225 226 227
      if (!(within_bounds(v.segment(0, dim.first), min, max)
            && within_bounds(v.segment(dim.first, dim.second),
                             ymin, ymax)))
228 229 230 231
      {
          content.resize(start);
      }
      else if (args.is_defined("data-correct-cosine"))
232
      {
233 234
          if (!cosine_correction(v.segment(0, dim.first + dim.second),
                                 dim.first, dim.second, in_param))
235
              content.resize(start);
236 237 238 239 240 241 242 243
      }
    }
  }

  std::cout << "<<INFO>> loaded input stream" << std::endl ;
  std::cout << "<<INFO>> loading data input of R^"
            << dim.first
            << " -> R^" << dim.second << std::endl ;
244
  std::cout << "<<INFO>> " << content.size() << " elements to fit" << std::endl ;
245

246 247 248 249

  double *raw_content = new double[content.size()];
  memcpy(raw_content, content.data(), content.size() * sizeof(double));

250
  parameters param(dim.first, dim.second, in_param, out_param);
251 252 253 254
  size_t element_count = content.size() / row_count;
  data* result = new vertical_segment(param, element_count,
                                      std::shared_ptr<double>(raw_content,
                                                                            delete_array));
255 256 257 258
  if(args.is_defined("data-correct-cosine"))
      result->save("/tmp/data-corrected.dat");

  return result;
259 260
}

261
void alta::save_data_as_text(std::ostream& out, const alta::data &data)
262
{
Romain Pacanowski's avatar
Romain Pacanowski committed
263
        using namespace alta;
264

265 266 267 268 269 270 271 272 273 274 275 276 277
    out << "#DIM " << data.parametrization().dimX() << " " << data.parametrization().dimY() << std::endl;
    out << "#PARAM_IN  "
        << params::get_name(data.parametrization().input_parametrization())
        << std::endl;
    out << "#PARAM_OUT "
        << params::get_name(data.parametrization().output_parametrization())
        << std::endl;

    for(int i=0; i < data.size(); ++i)
    {
        vec x = data.get(i);
        for(int j=0; j< data.parametrization().dimX() + data.parametrization().dimY(); ++j)
        {
Romain Pacanowski's avatar
Romain Pacanowski committed
278 279
                    out << std::setprecision(std::numeric_limits<double>::digits10)
                        << x[j] << "\t";
280 281 282
        }
        out << std::endl;
    }
283
}
284

285
void alta::save_data_as_binary(std::ostream &out, const alta::data& data)
286
{
287 288
    using namespace alta;

289 290 291 292
    auto maybe_vs = dynamic_cast<const vertical_segment*>(&data);
    auto kind = maybe_vs != NULL
        ? maybe_vs->confidence_interval_kind()
        : vertical_segment::NO_CONFIDENCE_INTERVAL;
293

294 295 296 297 298 299 300 301 302 303 304
    out << "#DIM " << data.parametrization().dimX() << " " << data.parametrization().dimY() << std::endl;
    out << "#PARAM_IN  "
        << params::get_name(data.parametrization().input_parametrization())
        << std::endl;
    out << "#PARAM_OUT "
        << params::get_name(data.parametrization().output_parametrization())
        << std::endl;
    out << "#FORMAT binary" << std::endl;
    out << "#VERSION 0" << std::endl;
    out << "#PRECISION ieee754-double" << std::endl;
    out << "#SAMPLE_COUNT " << data.size() << std::endl;
305
    out << "#VS " << number_from_ci_kind(kind) << std::endl;
306 307 308 309

    // FIXME: Note: on non-glibc systems, both macros may be undefined, so
    // the conditional is equivalent to "#if 0 == 0", which is usually what
    // we want.
310
#if __BYTE_ORDER == __LITTLE_ENDIAN
311
    out << "#ENDIAN little" << std::endl;
312
#else
313
    out << "#ENDIAN big" << std::endl;
314 315
#endif

316
    out << "#BEGIN_STREAM" << std::endl;
317

318 319 320 321 322 323 324 325 326 327 328 329 330
    if (kind == vertical_segment::NO_CONFIDENCE_INTERVAL)
    {
        // No confidence interval to be saved.
        for(int i=0; i < data.size(); ++i)
        {
            vec sample = data.get(i);
            const double *numbers = sample.data();

            assert(sample.size() == data.parametrization().dimX() + data.parametrization().dimY());
            out.write((const char *)numbers, sample.size() * sizeof(*numbers));
        }
    }
    else
331
    {
332 333 334
        // Saving data along with the confidence interval.
        auto matrix = maybe_vs->matrix_view();
        auto byte_count = matrix.cols() * matrix.rows() * sizeof(double);
335

336 337 338
        // MATRIX has no stride so we can access the underlying storage
        // directly.
        out.write((const char *)matrix.data(), byte_count);
339
    }
340

341
    out << std::endl << "#END_STREAM" << std::endl;
342
}
343

344
alta::data* alta::load_data_from_binary(std::istream& in, const alta::arguments& header)
345
{
346
    using namespace alta;
347

348 349 350 351
    // FIXME: For now we make a number of assumptions.
    assert(header["FORMAT"] == "binary");
    assert(header.get_int("VERSION") == 0);
    assert(header["PRECISION"] == "ieee754-double");
352
#if __BYTE_ORDER == __LITTLE_ENDIAN
353
    assert(header["ENDIAN"] == "little");
354
#else
355
    assert(header["ENDIAN"] == "big");
356 357
#endif

358 359
    std::pair<int, int> dim = header.get_pair<int>("DIM");
    assert(dim.first > 0 && dim.second > 0);
360

361 362
    auto kind = ci_kind_from_number(header.get_int("VS"));

363
    in.exceptions(std::ios_base::failbit);
364

365
    int sample_count = header.get_int("SAMPLE_COUNT");
366 367 368
      if(sample_count <= 0) {
         std::cerr << "<<ERROR>> Uncorrect or not samples count in the header, please check \'SAMPLE_COUNT\'" << std::endl;
      }
369

370 371 372 373 374 375 376 377 378
      size_t ci_rows =
          kind == vertical_segment::ASYMMETRICAL_CONFIDENCE_INTERVAL
          ? 2 : (kind == vertical_segment::SYMMETRICAL_CONFIDENCE_INTERVAL
                 ? 1 : 0);

      size_t rows = dim.first + dim.second + ci_rows * dim.second;
      size_t element_count = sample_count * rows;
      double *content = new double[element_count];
      size_t byte_count = element_count * sizeof *content;
379

380 381 382 383
      // TODO: Arrange to use mmap and make it zero-alloc and zero-copy.
      for (std::streamsize total = 0;
           total < byte_count && !in.eof();
           total += in.gcount())
384
      {
385
          in.read((char *) content + total, byte_count - total);
386
      }
387

388 389 390 391
    parameters param(dim.first, dim.second,
                     params::parse_input(header["PARAM_IN"]),
                     params::parse_output(header["PARAM_OUT"]));

392 393
    return new alta::vertical_segment(param, sample_count,
                                      std::shared_ptr<double>(content,
394 395
                                                              delete_array),
                                      kind);
396
}