mksqlite  2.5
A MATLAB interface to SQLite
number_compressor.hpp
Go to the documentation of this file.
1 
17 #pragma once
18 
30 extern "C"
31 {
32  #include "blosc/blosc.h"
33 }
34 //#include "global.hpp"
35 #include "locale.hpp"
36 
42 #define BLOSC_LZ4_ID BLOSC_LZ4_COMPNAME
43 #define BLOSC_LZ4HC_ID BLOSC_LZ4HC_COMPNAME
44 #define BLOSC_DEFAULT_ID BLOSC_BLOSCLZ_COMPNAME
45 #define QLIN16_ID "QLIN16"
46 #define QLOG16_ID "QLOG16"
47 
49 #define COMPRESSOR_DEFAULT_ID BLOSC_DEFAULT_ID
51 
54 {
55 public:
57  typedef enum
58  {
59  CT_NONE = 0,
64 
66  void* m_result;
67  size_t m_result_size;
68 
69 
70 private:
72 
73  const char* m_strCompressorType;
74  compressor_type_e m_eCompressorType;
76 public:
77  void* m_rdata;
78  size_t m_rdata_size;
81  void* m_cdata;
82  size_t m_cdata_size;
83 private:
84 
85  void* (*m_Allocator)( size_t szBytes );
86  void (*m_DeAllocator)( void* ptr );
87 
91  NumberCompressor& operator=( const NumberCompressor& );
93 
94 
95 public:
97  explicit
98  NumberCompressor() : m_result(0)
99  {
100  m_Allocator = malloc; // using C memory allocators
101  m_DeAllocator = free;
102 
103  // no compression is the default
105 
106  clear_data();
107  free_result();
108  }
109 
110 
112  void free_result()
113  {
114  if( m_result && !m_result_is_const )
115  {
116  m_DeAllocator( m_result );
117  m_result = NULL;
118  m_result_size = 0;
119  m_result_is_const = true;
120  }
121  }
122 
123 
125  void clear_data()
126  {
127  m_rdata = NULL;
128  m_rdata_size = 0;
129  m_cdata = NULL;
130  m_cdata_size = 0;
131  m_rdata_is_double_type = false;
132  }
133 
134 
136  void clear_err()
137  {
138  m_err.clear();
139  }
140 
141 
143  int get_err()
144  {
145  return m_err.getMsgId();
146  }
147 
148 
151  {
152  clear_data();
153  free_result();
154  }
155 
156 
163  void setAllocator( void* (*Allocator)(size_t), void (*DeAllocator)(void*) )
164  {
165  if( Allocator && DeAllocator )
166  {
167  m_Allocator = Allocator;
168  m_DeAllocator = DeAllocator;
169  }
170  else
171  {
172  assert( false );
173  }
174  }
175 
176 
183  bool setCompressor( const char *strCompressorType, int iCompressionLevel = -1 )
184  {
185  compressor_type_e eCompressorType = CT_NONE;
186 
187  m_err.clear();
188 
189  // if no compressor or compression is specified, use standard compressor
190  // which leads to no compression
191  if( 0 == iCompressionLevel || !strCompressorType || !*strCompressorType )
192  {
193  strCompressorType = COMPRESSOR_DEFAULT_ID;
194  iCompressionLevel = 0;
195  }
196 
197  // checking compressor names
198  if( 0 == _strcmpi( strCompressorType, BLOSC_LZ4_ID ) )
199  {
200  eCompressorType = CT_BLOSC;
201  }
202  else if( 0 == _strcmpi( strCompressorType, BLOSC_LZ4HC_ID ) )
203  {
204  eCompressorType = CT_BLOSC;
205  }
206  else if( 0 == _strcmpi( strCompressorType, BLOSC_DEFAULT_ID ) )
207  {
208  eCompressorType = CT_BLOSC;
209  }
210  else if( 0 == _strcmpi( strCompressorType, QLIN16_ID ) )
211  {
212  eCompressorType = CT_QLIN16;
213  }
214  else if( 0 == _strcmpi( strCompressorType, QLOG16_ID ) )
215  {
216  eCompressorType = CT_QLOG16;
217  }
218 
219  // check and acquire valid settings
220  if( CT_NONE != eCompressorType )
221  {
222  m_strCompressorType = strCompressorType;
223  m_eCompressorType = eCompressorType;
224 
225  if( iCompressionLevel >= 0 )
226  {
227  m_iCompressionLevel = iCompressionLevel;
228  }
229 
230  if( m_eCompressorType == CT_BLOSC )
231  {
232  blosc_set_compressor( m_strCompressorType );
233  }
234 
235  return true;
236  }
237  else return false;
238  }
239 
240 
242  const char* getCompressorName()
243  {
244  return m_strCompressorType;
245  }
246 
247 
249  bool isLossy()
250  {
251  return m_eCompressorType == CT_QLIN16 || m_eCompressorType == CT_QLOG16;
252  }
253 
254 
264  bool pack( void* rdata, size_t rdata_size, size_t rdata_element_size, bool isDoubleClass )
265  {
266  bool status = false;
267 
268  free_result();
269  clear_data();
270  clear_err();
271 
272  // acquire raw data
273  m_rdata = rdata;
274  m_rdata_size = rdata_size;
275  m_rdata_element_size = rdata_element_size;
276  m_rdata_is_double_type = isDoubleClass;
277 
278  // dispatch
279  switch( m_eCompressorType )
280  {
281  case CT_BLOSC:
282  status = bloscCompress();
283  break;
284 
285  case CT_QLIN16:
286  status = linlogQuantizerCompress( /* bDoLog*/ false );
287  break;
288 
289  case CT_QLOG16:
290  status = linlogQuantizerCompress( /* bDoLog*/ true );
291  break;
292 
293  default:
294  break;
295  }
296 
298  m_result_is_const = false;
299  m_result = m_cdata;
300  m_result_size = m_cdata_size;
301 
302  return status;
303  }
304 
305 
316  bool unpack( void* cdata, size_t cdata_size, void* rdata, size_t rdata_size, size_t rdata_element_size )
317  {
318  bool status = false;
319 
320  assert( rdata && rdata_size > 0 );
321 
322  free_result();
323  clear_data();
324  clear_err();
325 
327  m_cdata = cdata;
328  m_cdata_size = cdata_size;
329  m_rdata = rdata;
330  m_rdata_size = rdata_size;
331  m_rdata_element_size = rdata_element_size;
332 
334  switch( m_eCompressorType )
335  {
336  case CT_BLOSC:
337  status = bloscDecompress();
338  break;
339 
340  case CT_QLIN16:
341  status = linlogQuantizerDecompress( /* bDoLog*/ false );
342  break;
343 
344  case CT_QLOG16:
345  status = linlogQuantizerDecompress( /* bDoLog*/ true );
346  break;
347 
348  default:
349  break;
350  }
351 
353  m_result_is_const = true;
354  m_result = m_rdata;
355  m_result_size = m_rdata_size;
356 
357  return status;
358  }
359 
360 
361 private:
368  {
369  assert( m_rdata && !m_cdata );
370 
371  // BLOSC grants for that compressed data never
372  // exceeds original size + BLOSC_MAX_OVERHEAD
373  m_cdata_size = m_rdata_size + BLOSC_MAX_OVERHEAD;
374  m_cdata = m_Allocator( m_cdata_size );
375 
376  if( NULL == m_cdata )
377  {
378  m_err.set( MSG_ERRMEMORY );
379  return false;
380  }
381 
382  /* compress raw data (rdata) and store it in cdata */
383  m_cdata_size = blosc_compress(
384  /*clevel*/ m_iCompressionLevel,
385  /*doshuffle*/ BLOSC_DOSHUFFLE,
386  /*typesize*/ m_rdata_element_size,
387  /*nbytes*/ m_rdata_size,
388  /*src*/ m_rdata,
389  /*dest*/ m_cdata,
390  /*destsize*/ m_cdata_size );
391 
392  return NULL != m_cdata;
393  }
394 
395 
405  {
406  assert( m_rdata && m_cdata );
407 
408  size_t blosc_nbytes, blosc_cbytes, blosc_blocksize;
409 
410  // calculate necessary buffer sizes
411  blosc_cbuffer_sizes( m_cdata, &blosc_nbytes, &blosc_cbytes, &blosc_blocksize );
412 
413  // uncompressed data must fit into
414  if( blosc_nbytes != m_rdata_size )
415  {
416  m_err.set( MSG_ERRCOMPRESSION );
417  return false;
418  }
419 
420  // decompress directly into items memory space
421  if( blosc_decompress( m_cdata, m_rdata, m_rdata_size ) <= 0 )
422  {
423  m_err.set( MSG_ERRCOMPRESSION );
424  return false;
425  }
426 
427  return true;
428  }
429 
430 
439  bool linlogQuantizerCompress( bool bDoLog )
440  {
441  assert( m_rdata && !m_cdata &&
442  m_rdata_element_size == sizeof( double ) &&
443  m_rdata_size % m_rdata_element_size == 0 );
444 
445  double dOffset = 0.0, dScale = 1.0;
446  double dMinVal, dMaxVal;
447  bool bMinValSet = false, bMaxValSet = false;
448  double* rdata = (double*)m_rdata;
449  size_t cntElements = m_rdata_size / sizeof(*rdata);
450  float* pFloatData;
451  uint16_t* pUintData;
452 
453  // compressor works for double type only
454  if( !m_rdata_is_double_type )
455  {
456  m_err.set( MSG_ERRCOMPRARG );
457  return false;
458  }
459 
460  // seek data limits for quantization
461  for( size_t i = 0; i < cntElements; i++ )
462  {
463  if( DBL_ISFINITE( rdata[i] ) && rdata[i] != 0.0 )
464  {
465  if( !bMinValSet || rdata[i] < dMinVal )
466  {
467  dMinVal = rdata[i];
468  bMinValSet = true;
469  }
470 
471  if( !bMaxValSet || rdata[i] > dMaxVal )
472  {
473  dMaxVal = rdata[i];
474  bMaxValSet = true;
475  }
476  }
477  }
478 
479  // in logarithmic mode, no negative values are allowed
480  if( bDoLog && dMinVal < 0.0 )
481  {
482  m_err.set( MSG_ERRCOMPRLOGMINVALS );
483  return false;
484  }
485 
486  // compressor converts each value to uint16_t
487  // 2 additional floats for offset and scale
488  m_cdata_size = 2 * sizeof( float ) + cntElements * sizeof( uint16_t );
489  m_cdata = m_Allocator( m_cdata_size );
490 
491  if( !m_cdata )
492  {
493  m_err.set( MSG_ERRMEMORY );
494  return false;
495  }
496 
497  pFloatData = (float*)m_cdata;
498  pUintData = (uint16_t*)&pFloatData[2];
499 
500  // calculate offset information
501  if( bMinValSet )
502  {
503  dOffset = bDoLog ? log( dMinVal ) : dMinVal;
504  }
505 
506  // calculate scale information
507  if( bMaxValSet )
508  {
509  double dValue = bDoLog ? log( dMaxVal ) : dMaxVal;
510 
511  // data is mapped on 65529 (0xFFF8u) levels
512  dScale = ( dValue - dOffset ) / 0xFFF8u;
513 
514  // if dMaxValue == dMinValue, scale would be set to zero.
515  // to avoid division by zero on decompression, it is set to 1.0 here.
516  // this doesn't affect the result (0/1 = 0)
517  if( dScale == 0.0 )
518  {
519  dScale = 1.0;
520  }
521  }
522 
523  // store offset and scale information for decompression
524  pFloatData[0] = (float)dOffset;
525  pFloatData[1] = (float)dScale;
526 
527  // quantization
528  for( size_t i = 0; i < cntElements; i++ )
529  {
530  // non-finite values and zero are mapped to special values
531  if( DBL_ISFINITE( rdata[i] ) && rdata[i] != 0.0 )
532  {
533  double dValue = bDoLog ? log( rdata[i] ) : rdata[i];
534 
535  *pUintData++ = (uint16_t) ( (dValue - dOffset ) / dScale ) & 0xFFF8u;
536  }
537  else
538  {
539  // special values for zero, infinity and nan
540  if( fabs( rdata[i] ) == 0.0 )
541  {
542  *pUintData++ = 0xFFF8u + 1 + ( _copysign( 1.0, rdata[i] ) < 0.0 );
543  }
544  else if( DBL_ISINF( rdata[i] ) )
545  {
546  *pUintData++ = 0xFFF8u + 3 + ( _copysign( 1.0, rdata[i] ) < 0.0 );
547  }
548  else if( DBL_ISNAN( rdata[i] ) )
549  {
550  *pUintData++ = 0xFFF8u + 5;
551  }
552  }
553  }
554 
555  return true;
556  }
557 
558 
570  bool linlogQuantizerDecompress( bool bDoLog )
571  {
572  assert( m_rdata && m_cdata &&
573  m_rdata_element_size == sizeof( double ) &&
574  m_rdata_size % m_rdata_element_size == 0 );
575 
576  double dOffset = 0.0, dScale = 1.0;
577  double* rdata = (double*)m_rdata;
578  size_t cntElements = m_rdata_size / sizeof(*rdata);
579  float* pFloatData = (float*)m_cdata;
580  uint16_t* pUintData = (uint16_t*)&pFloatData[2];
581 
582  // compressor works for double type only
583  if( m_rdata_is_double_type )
584  {
585  m_err.set( MSG_ERRCOMPRARG );
586  return false;
587  }
588 
589  // restore offset and scale information
590  dOffset = pFloatData[0];
591  dScale = pFloatData[1];
592 
593  // rescale values to its originals
594  for( size_t i = 0; i < cntElements; i++ )
595  {
596  if( *pUintData > 0xFFF8u )
597  {
598  // handle special values for zero, infinity and nan
599  switch( *pUintData - 0xFFF8u )
600  {
601  case 1: *rdata = +0.0; break;
602  case 2: *rdata = -0.0; break;
603  case 3: *rdata = +DBL_INF; break; // pos. infinity
604  case 4: *rdata = -DBL_INF; break; // neg. infinity
605  case 5: *rdata = DBL_NAN; break; // not a number (NaN)
606  }
607 
608  pUintData++;
609  rdata++;
610  }
611  else
612  {
613  // all other values are rescaled respective to offset and scale
614  if( bDoLog )
615  {
616  *rdata++ = exp( (double)*pUintData++ * dScale + dOffset );
617  }
618  else
619  {
620  *rdata++ = (double)*pUintData++ * dScale + dOffset;
621  }
622  }
623  }
624 
625  return true;
626  }
627 
628 };
void * m_result
compressor output
bool linlogQuantizerDecompress(bool bDoLog)
void clear_data()
Reset input data (compressed and uncompressed) memory without deallocation!
bool linlogQuantizerCompress(bool bDoLog)
Lossy data compression by linear or logarithmic quantization (16 bit)
void setAllocator(void *(*Allocator)(size_t), void(*DeAllocator)(void *))
Set memory management.
void *(* m_Allocator)(size_t szBytes)
memory allocator
const char * m_strCompressorType
name of compressor to use
(Error-)messages in english and german.
const char * getCompressorName()
Get compressor name.
bool bloscCompress()
Allocates memory for compressed data and use it to store results (lossless data compression) ...
compressor_type_e m_eCompressorType
enum type of compressor to use
compressor_type_e
supported compressor types
size_t m_rdata_element_size
size of one element in bytes
bool pack(void *rdata, size_t rdata_size, size_t rdata_element_size, bool isDoubleClass)
Calls the qualified compressor (deflate) which always allocates sufficient memory (m_cdata) ...
size_t m_cdata_size
size of compressed data in bytes
bool isLossy()
Returns true, if current compressor modifies value data.
compressor class
void clear()
Reset error message.
Definition: locale.hpp:134
void * m_cdata
compressed data
size_t m_rdata_size
size of uncompressed data in bytes
int m_iCompressionLevel
compression level (0 to 9)
Helperclass for error message transport.
Definition: locale.hpp:116
void clear_err()
Reset recent error message.
bool m_rdata_is_double_type
Flag type is mxDOUBLE_CLASS.
void set(const char *strMsg, const char *strId=NULL)
Set error message to a constant string (without translation)
Definition: locale.hpp:150
void free_result()
Clear self created results with memory deallocation.
void(* m_DeAllocator)(void *ptr)
memory deallocator
using linear quantization (lossy)
int get_err()
Get recent error message id.
using BLOSC compressor (lossless)
bool unpack(void *cdata, size_t cdata_size, void *rdata, size_t rdata_size, size_t rdata_element_size)
Calls the qualified compressor (inflate)
int getMsgId()
Get the current message identifier.
Definition: locale.hpp:278
Err m_err
recent error
void * m_rdata
uncompressed data
bool m_result_is_const
true, if result is const type
size_t m_result_size
size of compressor output in bytes
bool setCompressor(const char *strCompressorType, int iCompressionLevel=-1)
Converts compressor ID string to category enum.
#define COMPRESSOR_DEFAULT_ID
Which compression method is to use, if its name is empty.
using logarithmic quantization (lossy)
bool bloscDecompress()
Uncompress compressed data m_cdata to data m_rdata.