The Conley-Morse Graphs Software
bytecompr.h
Go to the documentation of this file.
1/////////////////////////////////////////////////////////////////////////////
2///
3/// @file bytecompr.h
4///
5/// Writing and reading binary data (int, double) with bzip2 compression.
6/// This file contains the definition of two classes "ByteCompressor"
7/// and "ByteDecompressor" which serve as simple wrappers for putting
8/// binary data into a previously opened binary stream, with compression,
9/// and getting the data back from a previously compressed stream.
10/// This may be useful e.g. for saving a memory dump to a bz2 file,
11/// or for saving and then retrieving temporary binary data structures
12/// to a bz2 compressed file.
13/// Note that the data is not portable, the sizes of types as well
14/// as byte order may depend on a particular architecture.
15/// These data structures are designed for storing temporary data
16/// and re-using it from the same (binary) program. Use with caution.
17///
18/// @author Pawel Pilarczyk
19///
20/////////////////////////////////////////////////////////////////////////////
21
22// Copyright (C) 1997-2014 by Pawel Pilarczyk.
23//
24// This file is part of my research software package. This is free software:
25// you can redistribute it and/or modify it under the terms of the GNU
26// General Public License as published by the Free Software Foundation,
27// either version 3 of the License, or (at your option) any later version.
28//
29// This software is distributed in the hope that it will be useful,
30// but WITHOUT ANY WARRANTY; without even the implied warranty of
31// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32// GNU General Public License for more details.
33//
34// You should have received a copy of the GNU General Public License
35// along with this software; see the file "license.txt". If not,
36// please, see <https://www.gnu.org/licenses/>.
37
38// Started on July 2, 2014. Last revision: July 7, 2014.
39
40
41#ifndef _PWP_BYTECOMPR_H_
42#define _PWP_BYTECOMPR_H_
43
44#include <iostream>
45#include <bzlib.h>
46
47// include selected header files from the CHomP library
48// (this is temporary and serves the debugging purpose only)
49//#include "chomp/system/textfile.h"
50//using chomp::homology::sout;
51
52
53// --------------------------------------------------
54// ---------------- Byte Compressor -----------------
55// --------------------------------------------------
56
57/// A simple wrapper for the bzip2 data compression to an output stream.
59{
60public:
61 /// The constructor of an output stream for storing binary data.
62 /// It also initializes the bzip2 compression structure.
63 ByteCompressor (std::ostream &out);
64
65 /// The destructor that ends the compression procedure
66 /// and releases the internal memory buffers.
68
69 /// Ends the compression and writes the remaining data to the stream.
70 /// It also deletes the bzip2 compression structure.
71 void close ();
72
73 /// Writes the given number of bytes to the compression stream.
74 /// Uses an internal buffer to avoid the inefficiency that comes
75 /// from passing small portions of data to the bzip2 procedures.
76 void write (const char *buf, size_t n);
77
78private:
79 /// The bzip2 data stream information.
80 bz_stream *bzStream;
81
82 /// A reference to the output stream to which the data
83 /// should be written.
84 std::ostream &outStream;
85
86 /// An internal buffer for caching data for compression.
87 char *inBuffer;
88
89 /// An internal buffer for caching compressed data.
90 char *outBuffer;
91
92 /// The size of each of the internal buffers.
93 size_t bufSize;
94
95 /// The current position in the internal buffer for compression.
96 size_t inPos;
97
98 /// Compresses the input buffer and writes the resulting data
99 /// to the output stream.
100 void compress (bool finishing);
101
102 // Copy constructor is not allowed.
104
105 // Assignement operator is not allowed.
107
108}; /* class ByteCompressor */
109
110// --------------------------------------------------
111
112inline ByteCompressor::ByteCompressor (std::ostream &out):
113 outStream (out)
114{
115 bufSize = 102400;
116 inBuffer = new char [bufSize];
117 outBuffer = new char [bufSize];
118 inPos = 0;
119 bzStream = new bz_stream;
120 bzStream -> next_in = inBuffer;
121 bzStream -> avail_in = 0;
122 bzStream -> total_in_lo32 = 0;
123 bzStream -> total_in_hi32 = 0;
124 bzStream -> next_out = outBuffer;
125 bzStream -> avail_out = bufSize;
126 bzStream -> total_out_lo32 = 0;
127 bzStream -> total_out_hi32 = 0;
128 bzStream -> state = 0;
129 bzStream -> bzalloc = 0;
130 bzStream -> bzfree = 0;
131 bzStream -> opaque = 0;
132 int code = BZ2_bzCompressInit (bzStream, 9, 0, 0);
133 switch (code)
134 {
135 case BZ_CONFIG_ERROR:
136 throw "BZ2_bzCompressInit config error.";
137 case BZ_PARAM_ERROR:
138 throw "BZ2_bzCompressInit wrong parameters.";
139 case BZ_MEM_ERROR:
140 throw "BZ2_bzCompressInit memory error.";
141 case BZ_OK:
142 break;
143 default:
144 throw "BZ2_bzCompressInit returned an unknown code.";
145 }
146 return;
147} /* ByteCompressor::ByteCompressor */
148
149inline void ByteCompressor::compress (bool finishing)
150{
151// sout << "DEBUG: compress ().\n";
152 if (!bzStream)
153 return;
154
155 // compress all the data gathered in the input buffer
156 size_t startPos (0);
157 bool finished (false);
158 while ((finishing && !finished) || (startPos < inPos))
159 {
160 // sout << "DEBUG: inPos = " << inPos << ".\n";
161 // sout << "DEBUG: startPos = " << startPos << ".\n";
162 // sout << "DEBUG: finishing = " << finishing << ".\n";
163 // sout << "DEBUG: finished = " << finished << ".\n";
164 // prepare buffer information in the bz2 data structure
165 bzStream -> next_in = inBuffer + startPos;
166 bzStream -> avail_in = inPos - startPos;
167 bzStream -> next_out = outBuffer;
168 bzStream -> avail_out = bufSize;
169
170 // compress the input buffer to the output buffer
171 int code = BZ2_bzCompress (bzStream,
172 finishing ? BZ_FINISH : BZ_RUN);
173 switch (code)
174 {
175 case BZ_SEQUENCE_ERROR:
176 throw "BZ2_bzCompress sequence error.";
177 case BZ_STREAM_END:
178 finished = true;
179 if (finishing)
180 break;
181 throw "BZ2_bzCompress unexpected stream end.";
182 case BZ_OK:
183 case BZ_RUN_OK:
184 case BZ_FINISH_OK:
185 break;
186 default:
187 // sout << "DEBUG: code = " << code << ".\n";
188 throw "BZ2_bzCompress returned an unknown code.";
189 }
190
191 // write the output to the output stream
192 size_t outSize (bzStream -> next_out - outBuffer);
193 if (outSize)
194 outStream. write (outBuffer, outSize);
195
196 // update the reading offset in the input buffer
197 startPos = bzStream -> next_in - inBuffer;
198
199 // exit the loop if this is the end of the procedure
200 if (finishing && finished)
201 break;
202 }
203
204 // make a note that the input buffer is now empty
205 inPos = 0;
206
207 return;
208} /* ByteCompressor::compress */
209
210void ByteCompressor::write (const char *buf, size_t n)
211{
212// sout << "DEBUG: write " << n << " byte(s).\n";
213 if (n > bufSize)
214 throw "Compression buffer size too small. Fix the code!";
215 if (inPos + n > bufSize)
216 compress (false);
217 if (inPos + n > bufSize)
218 throw "Not enough buffer space to compress data.";
219 for (size_t i = 0; i < n; ++ i)
220 inBuffer [inPos ++] = buf [i];
221 return;
222} /* ByteCompressor::write */
223
225{
226// sout << "DEBUG: close ().\n";
227 if (!bzStream)
228 return;
229
230 // finish off compressing all the data
231 this -> compress (true);
232
233 // release all memory associated with the compression stream
234 int code = BZ2_bzCompressEnd (bzStream);
235 switch (code)
236 {
237 case BZ_PARAM_ERROR:
238 throw "BZ2_bzCompressEnd: wrong bz stream data structure";
239 case BZ_OK:
240 break;
241 default:
242 throw "BZ2_bzCompressEnd returned an unknown code.";
243 }
244
245 // delete the compression structure
246 delete bzStream;
247 bzStream = 0;
248
249 return;
250} /* ByteCompressor::close */
251
253{
254// sout << "DEBUG: destructor.\n";
255 this -> close ();
256 if (bzStream)
257 delete bzStream;
258 if (inBuffer)
259 delete [] inBuffer;
260 if (outBuffer)
261 delete [] outBuffer;
262 return;
263} /* ByteCompressor::~ByteCompressor */
264
265// --------------------------------------------------
266
267/// Writes a variable to a byte compressor stream.
268#define DEFBYTEWRITE(var_type) \
269inline ByteCompressor &operator << (ByteCompressor &c, const var_type &n) \
270{ \
271 c. write (reinterpret_cast<const char *> (&n), sizeof (var_type)); \
272 return c; \
273} /* operator << */
274
275DEFBYTEWRITE(unsigned int)
276DEFBYTEWRITE(signed int)
277DEFBYTEWRITE(unsigned long)
278DEFBYTEWRITE(signed long)
279DEFBYTEWRITE(unsigned short)
280DEFBYTEWRITE(signed short)
281DEFBYTEWRITE(unsigned char)
282DEFBYTEWRITE(char)
283DEFBYTEWRITE(float)
284DEFBYTEWRITE(double)
285
286#undef DEFBYTEWRITE
287
288
289// --------------------------------------------------
290// --------------- Byte Decompressor ----------------
291// --------------------------------------------------
292
293/// A simple wrapper for the bzip2 data decompression from an input stream.
295{
296public:
297 /// The constructor of an input stream for retrieving binary data.
298 /// It also initializes the bzip2 decompression structure.
299 ByteDecompressor (std::istream &in);
300
301 /// The destructor that ends the decompression procedure
302 /// and releases the internal memory buffers.
304
305 /// Ends the decompression and deletes the bzip2 structure.
306 void close ();
307
308 /// Reads the given number of bytes from the decompressed stream.
309 /// Uses an internal buffer to avoid the inefficiency that comes
310 /// from taking small portions of data from the bzip2 procedures.
311 /// Throws an exception if there is not enough data.
312 void read (char *buf, size_t n);
313
314private:
315 /// The bzip2 data stream information.
316 bz_stream *bzStream;
317
318 /// A reference to the input stream from which the compressed data
319 /// should be read.
320 std::istream &inStream;
321
322 /// An internal buffer for caching data read from the input stream.
323 char *inBuffer;
324
325 /// An internal buffer for caching decompressed data.
327
328 /// The size of each of the internal buffers.
329 size_t bufSize;
330
331 /// Is the decompression procedure completed?
333
334 /// The current position in the internal buffer with compressed data.
335 size_t inPos;
336
337 /// The end of the compressed data in the internal buffer.
338 size_t inPosEnd;
339
340 /// The current position in the internal buffer
341 /// with decompressed data.
342 size_t outPos;
343
344 /// The position of the end of the decompressed data
345 /// in the internal buffer.
346 size_t outPosEnd;
347
348 /// Decompresses data from the input buffer (and the input stream)
349 /// to provide more data for reading if possible.
350 void decompress ();
351
352 // Copy constructor is not allowed.
354
355 // Assignement operator is not allowed.
357
358}; /* class ByteDecompressor */
359
360// --------------------------------------------------
361
362inline ByteDecompressor::ByteDecompressor (std::istream &in):
363 inStream (in)
364{
365 bufSize = 102400;
366 finished = false;
367 inBuffer = new char [bufSize];
368 outBuffer = new char [bufSize];
369 inPos = 0;
370 inPosEnd = 0;
371 outPos = 0;
372 outPosEnd = 0;
373 bzStream = new bz_stream;
374 bzStream -> next_in = inBuffer;
375 bzStream -> avail_in = 0;
376 bzStream -> total_in_lo32 = 0;
377 bzStream -> total_in_hi32 = 0;
378 bzStream -> next_out = outBuffer;
379 bzStream -> avail_out = bufSize;
380 bzStream -> total_out_lo32 = 0;
381 bzStream -> total_out_hi32 = 0;
382 bzStream -> state = 0;
383 bzStream -> bzalloc = 0;
384 bzStream -> bzfree = 0;
385 bzStream -> opaque = 0;
386 int code = BZ2_bzDecompressInit (bzStream, 0, 0);
387 switch (code)
388 {
389 case BZ_CONFIG_ERROR:
390 throw "BZ2_bzDecompressInit config error.";
391 case BZ_PARAM_ERROR:
392 throw "BZ2_bzDecompressInit wrong parameters.";
393 case BZ_MEM_ERROR:
394 throw "BZ2_bzDecompressInit memory error.";
395 case BZ_OK:
396 break;
397 default:
398 throw "BZ2_bzDecompressInit returned an unknown code.";
399 }
400 return;
401} /* ByteDecompressor::ByteDecompressor */
402
404{
405// sout << "DEBUG: decompress ().\n";
406 if (!bzStream)
407 return;
408
409 // if there is no data in the output buffer then make sure
410 // that the writing position is reset to the buffer start
411 if (outPos && (outPos == outPosEnd))
412 {
413 outPos = 0;
414 outPosEnd = 0;
415 }
416
417 // if the output data is close to the end of the buffer
418 // then move it to the front of the buffer to make sure
419 // that there is a lot of space available for new data
420 if (bufSize - outPos < (bufSize >> 8))
421 {
422 char *outBegin = outBuffer;
423 char *outCurrent = outBuffer + outPos;
424 for (size_t i = outPos; i < outPosEnd; ++ i)
425 *(outBegin ++) = *(outCurrent ++);
426 outPosEnd -= outPos;
427 outPos = 0;
428 }
429
430 while (!finished && (outPosEnd < bufSize))
431 {
432 // sout << "DEBUG: inPos = " << inPos << ".\n";
433 // sout << "DEBUG: inPosEnd = " << inPosEnd << ".\n";
434 // sout << "DEBUG: finished = " << finished << ".\n";
435
436 // if there is no data in the input buffer then make sure
437 // that the reading position is reset to the buffer start
438 if (inPos && (inPos == inPosEnd))
439 {
440 inPos = 0;
441 inPosEnd = 0;
442 }
443
444 // if the input data is close to the end of the buffer
445 // then move it to the front of the buffer to make sure
446 // that there is a lot of space available for new data
447 if (bufSize - inPos < (bufSize >> 8))
448 {
449 char *inBegin = inBuffer;
450 char *inCurrent = inBuffer + inPos;
451 for (size_t i = inPos; i < inPosEnd; ++ i)
452 *(inBegin ++) = *(inCurrent ++);
453 inPosEnd -= inPos;
454 inPos = 0;
455 }
456
457 // read more data from the input stream if possible
458 if (!inStream. eof () && (inPosEnd < bufSize))
459 {
461 bufSize - inPosEnd);
462 inPosEnd += inStream. gcount ();
463 }
464
465 // prepare buffer information in the bz2 data structure
466 bzStream -> next_in = inBuffer + inPos;
467 bzStream -> avail_in = inPosEnd - inPos;
468 bzStream -> next_out = outBuffer + outPosEnd;
469 bzStream -> avail_out = bufSize - outPosEnd;
470
471 // decompress the input buffer to the output buffer
472 int code = BZ2_bzDecompress (bzStream);
473 switch (code)
474 {
475 case BZ_PARAM_ERROR:
476 throw "BZ2_bzDecompress parameter error.";
477 case BZ_DATA_ERROR:
478 throw "BZ2_bzDecompress data integrity error.";
479 case BZ_DATA_ERROR_MAGIC:
480 throw "BZ2_bzDecompress magic data error.";
481 case BZ_MEM_ERROR:
482 throw "BZ2_bzDecompress not enough memory available.";
483 case BZ_STREAM_END:
484 finished = true;
485 break;
486 case BZ_OK:
487 break;
488 default:
489 // sout << "DEBUG: code = " << code << ".\n";
490 throw "BZ2_bzDecompress returned an unknown code.";
491 }
492
493 // update the reading offset in the input buffer
494 inPos = bzStream -> next_in - inBuffer;
495
496 // update the offset in the output buffer
497 outPosEnd = bzStream -> next_out - outBuffer;
498 }
499
500 return;
501} /* ByteDecompressor::decompress */
502
503void ByteDecompressor::read (char *buf, size_t n)
504{
505// sout << "DEBUG: read " << n << " byte(s).\n";
506 if (n > bufSize)
507 throw "Compression buffer size too small. Fix the code!";
508 if (outPos + n > outPosEnd)
509 decompress ();
510 if (outPos + n > outPosEnd)
511 throw "Not enough data read from the compressed stream.";
512 for (size_t i = 0; i < n; ++ i)
513 buf [i] = outBuffer [outPos ++];
514 return;
515} /* ByteDecompressor::read */
516
518{
519// sout << "DEBUG: close ().\n";
520 if (!bzStream)
521 return;
522
523 // release all memory associated with the compression stream
524 int code = BZ2_bzDecompressEnd (bzStream);
525 switch (code)
526 {
527 case BZ_PARAM_ERROR:
528 throw "BZ2_bzDecompressEnd: wrong bz stream data structure";
529 case BZ_OK:
530 break;
531 default:
532 throw "BZ2_bzDecompressEnd returned an unknown code.";
533 }
534
535 // delete the compression structure
536 delete bzStream;
537 bzStream = 0;
538
539 return;
540} /* ByteDecompressor::close */
541
543{
544// sout << "DEBUG: destructor.\n";
545 this -> close ();
546 if (bzStream)
547 delete bzStream;
548 if (inBuffer)
549 delete [] inBuffer;
550 if (outBuffer)
551 delete [] outBuffer;
552 return;
553} /* ByteDecompressor::~ByteDecompressor */
554
555// --------------------------------------------------
556
557/// Reads a variable from a byte decompressor stream.
558#define DEFBYTEREAD(var_type) \
559inline ByteDecompressor &operator >> (ByteDecompressor &c, var_type &n) \
560{ \
561 c. read (reinterpret_cast<char *> (&n), sizeof (var_type)); \
562 return c; \
563} /* operator >> */
564
565DEFBYTEREAD(unsigned int)
566DEFBYTEREAD(signed int)
567DEFBYTEREAD(unsigned long)
568DEFBYTEREAD(signed long)
569DEFBYTEREAD(unsigned short)
570DEFBYTEREAD(signed short)
571DEFBYTEREAD(unsigned char)
572DEFBYTEREAD(char)
573DEFBYTEREAD(float)
574DEFBYTEREAD(double)
575
576#undef DEFBYTEREAD
577
578
579#endif // _PWP_BYTECOMPR_H_
580
#define DEFBYTEWRITE(var_type)
Writes a variable to a byte compressor stream.
Definition: bytecompr.h:268
#define DEFBYTEREAD(var_type)
Reads a variable from a byte decompressor stream.
Definition: bytecompr.h:558
A simple wrapper for the bzip2 data compression to an output stream.
Definition: bytecompr.h:59
ByteCompressor(const ByteCompressor &)
ByteCompressor & operator=(const ByteCompressor &)
std::ostream & outStream
A reference to the output stream to which the data should be written.
Definition: bytecompr.h:84
void close()
Ends the compression and writes the remaining data to the stream.
Definition: bytecompr.h:224
char * inBuffer
An internal buffer for caching data for compression.
Definition: bytecompr.h:87
size_t inPos
The current position in the internal buffer for compression.
Definition: bytecompr.h:96
char * outBuffer
An internal buffer for caching compressed data.
Definition: bytecompr.h:90
ByteCompressor(std::ostream &out)
The constructor of an output stream for storing binary data.
Definition: bytecompr.h:112
bz_stream * bzStream
The bzip2 data stream information.
Definition: bytecompr.h:80
void write(const char *buf, size_t n)
Writes the given number of bytes to the compression stream.
Definition: bytecompr.h:210
void compress(bool finishing)
Compresses the input buffer and writes the resulting data to the output stream.
Definition: bytecompr.h:149
~ByteCompressor()
The destructor that ends the compression procedure and releases the internal memory buffers.
Definition: bytecompr.h:252
size_t bufSize
The size of each of the internal buffers.
Definition: bytecompr.h:93
A simple wrapper for the bzip2 data decompression from an input stream.
Definition: bytecompr.h:295
void close()
Ends the decompression and deletes the bzip2 structure.
Definition: bytecompr.h:517
bz_stream * bzStream
The bzip2 data stream information.
Definition: bytecompr.h:316
size_t inPos
The current position in the internal buffer with compressed data.
Definition: bytecompr.h:335
void read(char *buf, size_t n)
Reads the given number of bytes from the decompressed stream.
Definition: bytecompr.h:503
ByteDecompressor(std::istream &in)
The constructor of an input stream for retrieving binary data.
Definition: bytecompr.h:362
std::istream & inStream
A reference to the input stream from which the compressed data should be read.
Definition: bytecompr.h:320
char * inBuffer
An internal buffer for caching data read from the input stream.
Definition: bytecompr.h:323
~ByteDecompressor()
The destructor that ends the decompression procedure and releases the internal memory buffers.
Definition: bytecompr.h:542
ByteDecompressor & operator=(const ByteDecompressor &)
size_t bufSize
The size of each of the internal buffers.
Definition: bytecompr.h:329
size_t outPos
The current position in the internal buffer with decompressed data.
Definition: bytecompr.h:342
char * outBuffer
An internal buffer for caching decompressed data.
Definition: bytecompr.h:326
size_t inPosEnd
The end of the compressed data in the internal buffer.
Definition: bytecompr.h:338
bool finished
Is the decompression procedure completed?
Definition: bytecompr.h:332
ByteDecompressor(const ByteDecompressor &)
size_t outPosEnd
The position of the end of the decompressed data in the internal buffer.
Definition: bytecompr.h:346
void decompress()
Decompresses data from the input buffer (and the input stream) to provide more data for reading if po...
Definition: bytecompr.h:403