Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
zipstream.ipp
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file utility/io/zipstream.ipp
11 /// @brief Altered zipstream library implementation
12 /// @author Jonathan de Halleux (dehalleux@pelikhan.com)
13 /// @author Stuart G. Mentzer (Stuart_Mentzer@objexx.com)
14 /// @author David Kim (dekim@u.washington.edu)
15 /// @author Yih-En Andrew Ban (yab@u.washington.edu)
16 
17 // zipstream Library License:
18 // --------------------------
19 //
20 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
21 //
22 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
23 //
24 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
25 //
26 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
27 //
28 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
29 //
30 // 3. This notice may not be removed or altered from any source distribution
31 //
32 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
33 
34 
35 #ifndef INCLUDED_utility_io_zipstream_IPP
36 #define INCLUDED_utility_io_zipstream_IPP
37 
38 
39 // Unit headers
40 #include <utility/io/zipstream.hpp>
41 
42 // C++ headers
43 #include <sstream>
44 #include <cstdio>
45 #include <istream>
46 
47 namespace zlib_stream {
48 
49 
50 namespace detail {
51 
52 const int gz_magic[2] = { 0x1f, 0x8b }; // gzip magic header
53 
54 // gzip flag byte
55 const int gz_ascii_flag = 0x01; // bit 0 set: file probably ascii text
56 const int gz_head_crc = 0x02; // bit 1 set: header CRC present
57 const int gz_extra_field = 0x04; // bit 2 set: extra field present
58 const int gz_orig_name = 0x08; // bit 3 set: original file name present
59 const int gz_comment = 0x10; // bit 4 set: file comment present
60 const int gz_reserved = 0xE0; // bits 5..7: reserved
61 
62 } // namespace detail
63 
64 
65 template<
66 typename Elem,
67 typename Tr,
68 typename ElemA,
69 typename ByteT,
70 typename ByteAT
71 >
73  ostream_reference ostream_,
74  size_t level_,
75  EStrategy strategy_,
76  size_t window_size_,
77  size_t memory_level_,
78  size_t buffer_size_
79 ) :
80 m_ostream( ostream_ ),
81 m_output_buffer( buffer_size_, 0 ),
82 m_buffer( buffer_size_, 0 ),
83 m_crc( 0 )
84 {
85  m_zip_stream.zalloc = (alloc_func)0;
86  m_zip_stream.zfree = (free_func)0;
87 
88  m_zip_stream.next_in = NULL;
89  m_zip_stream.avail_in = 0;
90  m_zip_stream.avail_out = 0;
91  m_zip_stream.next_out = NULL;
92 
93  m_err = deflateInit2(
94  &m_zip_stream,
95  std::min( 9, static_cast< int >( level_ ) ),
96  Z_DEFLATED,
97  - static_cast< int >(window_size_), // <-- changed
98  std::min( 9, static_cast< int >( memory_level_ ) ),
99  static_cast< int >( strategy_ )
100  );
101 
102  this->setp( &(m_buffer[0]), &(m_buffer[m_buffer.size()-1]) );
103 }
104 
105 template<
106 typename Elem,
107 typename Tr,
108 typename ElemA,
109 typename ByteT,
110 typename ByteAT
111 >
113 {
114  flush_finalize();
115  m_ostream.flush();
116  m_err=deflateEnd( &m_zip_stream );
117 }
118 
119 template<
120 typename Elem,
121 typename Tr,
122 typename ElemA,
123 typename ByteT,
124 typename ByteAT
125 >
126 int
128 {
129  if ( pptr() && pptr() > pbase() ) {
130  if ( traits_type::eq_int_type( overflow( traits_type::eof() ), traits_type::eof() ) ) return -1;
131  }
132 
133  return 0;
134 }
135 
136 template<
137 typename Elem,
138 typename Tr,
139 typename ElemA,
140 typename ByteT,
141 typename ByteAT
142 >
146 )
147 {
148  bool const test_eof = traits_type::eq_int_type( c, traits_type::eof() );
149  int w = static_cast< int >( pptr() - pbase() );
150  if ( !test_eof ) {
151  *pptr() = c;
152  ++w;
153  }
154  if ( zip_to_stream( pbase(), w ) ) {
155  this->setp( pbase(), epptr() - 1 );
156  return traits_type::not_eof( c );
157  } else {
158  return traits_type::eof();
159  }
160 }
161 
162 template<
163 typename Elem,
164 typename Tr,
165 typename ElemA,
166 typename ByteT,
167 typename ByteAT
168 >
169 bool
172  std::streamsize buffer_size_
173 )
174 {
175  std::streamsize written_byte_size = 0/*, total_written_byte_size = 0*/;
176 
177  m_zip_stream.next_in = (byte_buffer_type)buffer_;
178  m_zip_stream.avail_in = static_cast< uInt >( buffer_size_ * sizeof(char_type) );
179  m_zip_stream.avail_out = static_cast< uInt >( m_output_buffer.size() );
180  m_zip_stream.next_out = &(m_output_buffer[0]);
181  size_t remainder = 0;
182 
183  // updating crc
184  m_crc = crc32(
185  m_crc,
186  m_zip_stream.next_in,
187  m_zip_stream.avail_in
188  );
189 
190  do {
191  m_err = deflate( &m_zip_stream, 0 );
192 
193  if ( m_err == Z_OK || m_err == Z_STREAM_END ) {
194  written_byte_size =
195  static_cast< std::streamsize >( m_output_buffer.size() )
196  - m_zip_stream.avail_out;
197  //total_written_byte_size += written_byte_size;
198  // ouput buffer is full, dumping to ostream
199  // std::cerr << "dump to m_ostream " << std::endl;
200  m_ostream.write(
201  (const char_type*) &(m_output_buffer[0]),
202  static_cast< std::streamsize >( written_byte_size / sizeof( char_type ) )
203  );
204  // std::cerr << "dump to m_ostream -- done " << std::endl;
205  // checking if some bytes were not written.
206  if ( ( remainder = written_byte_size%sizeof( char_type ) ) != 0 ) {
207  // copy to the beginning of the stream
208  // std::cerr << "before memcopy in zipstream.ipp: 206" << std::endl;
209  memcpy(
210  &(m_output_buffer[0]),
211  &(m_output_buffer[written_byte_size-remainder]),
212  remainder
213  );
214  // std::cerr << "after memcopy in zipstream.ipp: 206" << std::endl;
215  }
216 
217  m_zip_stream.avail_out =
218  static_cast< uInt >( m_output_buffer.size() - remainder );
219  m_zip_stream.next_out = &m_output_buffer[remainder];
220  // std::cerr << "after static_cast: line 219" << std::endl;
221  }
222  } while ( m_zip_stream.avail_in != 0 && m_err == Z_OK );
223  // std::cerr << "line 222" << std::endl;
224  return m_err == Z_OK;
225 }
226 
227 template<
228 typename Elem,
229 typename Tr,
230 typename ElemA,
231 typename ByteT,
232 typename ByteAT
233 >
235  int flush_mode
236 )
237 {
238  std::streamsize written_byte_size = 0, total_written_byte_size = 0;
239 
240  int const buffer_size = static_cast< int >( pptr() - pbase() ); // amount of data currently in buffer
241 
242  m_zip_stream.next_in = (byte_buffer_type)pbase();
243  m_zip_stream.avail_in = static_cast< uInt >( buffer_size * sizeof(char_type) );
244  m_zip_stream.avail_out = static_cast< uInt >( m_output_buffer.size() );
245  m_zip_stream.next_out = &(m_output_buffer[0]);
246  size_t remainder = 0;
247 
248  // std::cerr << "flush() " << std::endl;
249 
250  // updating crc
251  m_crc = crc32(
252  m_crc,
253  m_zip_stream.next_in,
254  m_zip_stream.avail_in
255  );
256 
257  do {
258  m_err = deflate( &m_zip_stream, flush_mode );
259  if ( m_err == Z_OK || m_err == Z_STREAM_END ) {
260  written_byte_size =
261  static_cast< std::streamsize >( m_output_buffer.size() )
262  - m_zip_stream.avail_out;
263  total_written_byte_size += written_byte_size;
264  // ouput buffer is full, dumping to ostream
265  m_ostream.write(
266  (const char_type*) &(m_output_buffer[0]),
267  static_cast< std::streamsize >( written_byte_size / sizeof( char_type ) * sizeof( byte_type ) )
268  );
269 
270  // checking if some bytes were not written.
271  if ( ( remainder = written_byte_size%sizeof( char_type ) ) != 0 ) {
272  // copy to the beginning of the stream
273  memcpy(
274  &(m_output_buffer[0]),
275  &(m_output_buffer[written_byte_size-remainder]),
276  remainder
277  );
278 
279  }
280 
281  m_zip_stream.avail_out = static_cast< uInt >( m_output_buffer.size() - remainder );
282  m_zip_stream.next_out = &m_output_buffer[remainder];
283  }
284  } while ( m_zip_stream.avail_in != 0 && m_err == Z_OK );
285 
286  m_ostream.flush();
287  // std::cerr << "flush() -- done" << std::endl;
288 
289  return total_written_byte_size;
290 }
291 
292 
293 template<
294 typename Elem,
295 typename Tr,
296 typename ElemA,
297 typename ByteT,
298 typename ByteAT
299 >
301 {
302  std::streamsize const total_written_byte_size = flush( Z_SYNC_FLUSH );
303  return total_written_byte_size;
304 }
305 
306 
307 template<
308 typename Elem,
309 typename Tr,
310 typename ElemA,
311 typename ByteT,
312 typename ByteAT
313 >
315 {
316  std::streamsize const total_written_byte_size = flush( Z_FINISH );
317  return total_written_byte_size;
318 }
319 
320 
321 template<
322 typename Elem,
323 typename Tr,
324 typename ElemA,
325 typename ByteT,
326 typename ByteAT
327 >
329 {
330  m_crc = 0;
331 
332  m_zip_stream.next_in = NULL;
333  m_zip_stream.avail_in = 0;
334  m_zip_stream.avail_out = 0;
335  m_zip_stream.next_out = NULL;
336 
337  m_err = deflateReset( &m_zip_stream );
338 }
339 
340 
341 template<
342 typename Elem,
343 typename Tr,
344 typename ElemA,
345 typename ByteT,
346 typename ByteAT
347 >
349  istream_reference istream_,
350  size_t window_size_,
351  size_t read_buffer_size_,
352  size_t input_buffer_size_
353 ) :
354 m_istream( istream_ ),
355 m_input_buffer( input_buffer_size_ ),
356 m_buffer( read_buffer_size_ ),
357 m_crc( 0 )
358 {
359  // setting zalloc, zfree and opaque
360  m_zip_stream.zalloc = (alloc_func)0;
361  m_zip_stream.zfree = (free_func)0;
362 
363  m_zip_stream.next_in = NULL;
364  m_zip_stream.avail_in = 0;
365  m_zip_stream.avail_out = 0;
366  m_zip_stream.next_out = NULL;
367 
368  m_err = inflateInit2( &m_zip_stream, -static_cast< int >( window_size_ ) );
369 
370  this->setg(
371  &( m_buffer[0] ) + 4, // beginning of putback area
372  &( m_buffer[0] ) + 4, // read position
373  &( m_buffer[0] ) + 4 // end position
374  );
375 }
376 
377 template<
378 typename Elem,
379 typename Tr,
380 typename ElemA,
381 typename ByteT,
382 typename ByteAT
383 >
385 {
386  m_zip_stream.next_in = &( m_input_buffer[0] );
387  m_istream.read(
388  (char_type*)( &( m_input_buffer[0] ) ),
389  static_cast< std::streamsize >( m_input_buffer.size() / sizeof( char_type ) )
390  );
391  return m_zip_stream.avail_in = m_istream.gcount() * sizeof( char_type );
392 }
393 
394 
395 template<
396 typename Elem,
397 typename Tr,
398 typename ElemA,
399 typename ByteT,
400 typename ByteAT
401 >
403 {
404  inflateEnd( &m_zip_stream );
405 }
406 
407 template<
408 typename Elem,
409 typename Tr,
410 typename ElemA,
411 typename ByteT,
412 typename ByteAT
413 >
416 {
417  if ( gptr() && ( gptr() < egptr() ) ) return *reinterpret_cast< unsigned char * >( gptr() );
418 
419  int n_putback = static_cast< int >( gptr() - eback() );
420  if ( n_putback > 4 ) n_putback = 4;
421  memcpy(
422  &( m_buffer[0] ) + ( 4 - n_putback ),
423  gptr() - n_putback,
424  n_putback * sizeof( char_type )
425  );
426 
427  int num = unzip_from_stream(
428  &( m_buffer[0] ) + 4,
429  static_cast< std::streamsize >( ( m_buffer.size() - 4 ) * sizeof( char_type ) )
430  );
431  if ( num <= 0 ) return EOF; // ERROR or EOF
432 
433  // reset buffer pointers
434  this->setg(
435  &( m_buffer[0] ) + ( 4 - n_putback ), // beginning of putback area
436  &( m_buffer[0] ) + 4, // read position
437  &( m_buffer[0] ) + 4 + num // end of buffer
438  );
439 
440  // return next character
441  return *reinterpret_cast< unsigned char * >( gptr() );
442 }
443 
444 template<
445 typename Elem,
446 typename Tr,
447 typename ElemA,
448 typename ByteT,
449 typename ByteAT
450 >
452  char_type * buffer_,
453  std::streamsize buffer_size_
454 )
455 {
456  m_zip_stream.next_out = (byte_buffer_type)buffer_;
457  m_zip_stream.avail_out = static_cast< uInt >( buffer_size_ * sizeof( char_type ) );
458  size_t count = m_zip_stream.avail_in;
459 
460  do {
461  if ( m_zip_stream.avail_in == 0 ) count = fill_input_buffer();
462 
463  if ( m_zip_stream.avail_in ) {
464  m_err = inflate( &m_zip_stream, Z_SYNC_FLUSH );
465  }
466  } while ( m_err == Z_OK && m_zip_stream.avail_out != 0 && count != 0 );
467 
468  // updating crc
469  m_crc = crc32(
470  m_crc,
471  (byte_buffer_type)buffer_,
472  buffer_size_ - m_zip_stream.avail_out / sizeof( char_type )
473  );
474  std::streamsize n_read = buffer_size_ - m_zip_stream.avail_out / sizeof( char_type );
475 
476  // check if it is the end
477  if ( m_err == Z_STREAM_END ) put_back_from_zip_stream();
478 
479  return n_read;
480 }
481 
482 template<
483 typename Elem,
484 typename Tr,
485 typename ElemA,
486 typename ByteT,
487 typename ByteAT
488 >
490 {
491  if ( m_zip_stream.avail_in == 0 ) return;
492 
493  m_istream.clear( std::ios::goodbit );
494  m_istream.seekg(
495  -static_cast< int >( m_zip_stream.avail_in ),
496  std::ios_base::cur
497  );
498 
499  m_zip_stream.avail_in = 0;
500 }
501 
502 template<
503 typename Elem,
504 typename Tr,
505 typename ElemA,
506 typename ByteT,
507 typename ByteAT
508 >
509 int
511 {
512  int method; // method byte
513  int flags; // flags byte
514  uInt len;
515  int c;
516  int err = 0;
517  z_stream & zip_stream = rdbuf()->get_zip_stream();
518 
519  // Check the gzip magic header
520  for ( len = 0; len < 2; ++len ) {
521  c = (int)rdbuf()->get_istream().get();
522  if ( c != detail::gz_magic[len] ) {
523  if ( len != 0 ) rdbuf()->get_istream().unget();
524  if ( c != EOF ) {
525  rdbuf()->get_istream().unget();
526  }
527 
528  err = ( zip_stream.avail_in != 0 ? Z_OK : Z_STREAM_END );
529  m_is_gzip = false;
530  return err;
531  }
532  }
533 
534  m_is_gzip = true;
535  method = (int)rdbuf()->get_istream().get();
536  flags = (int)rdbuf()->get_istream().get();
537  if ( method != Z_DEFLATED || ( flags & detail::gz_reserved ) != 0 ) {
538  err = Z_DATA_ERROR;
539  return err;
540  }
541 
542  // Discard time, xflags and OS code:
543  for ( len = 0; len < 6; ++len ) rdbuf()->get_istream().get();
544 
545  if ( ( flags & detail::gz_extra_field ) != 0 ) {
546  // skip the extra field
547  len = (uInt)rdbuf()->get_istream().get();
548  len += ((uInt)rdbuf()->get_istream().get()) << 8;
549  // len is garbage if EOF but the loop below will quit anyway
550  while ( len-- != 0 && rdbuf()->get_istream().get() != EOF ) {}
551  }
552  if ( ( flags & detail::gz_orig_name ) != 0 ) {
553  // skip the original file name
554  while ( ( c = rdbuf()->get_istream().get() ) != 0 && c != EOF ) {}
555  }
556  if ( ( flags & detail::gz_comment ) != 0 ) {
557  // skip the .gz file comment
558  while ( ( c = rdbuf()->get_istream().get() ) != 0 && c != EOF ) {}
559  }
560  if ( ( flags & detail::gz_head_crc ) != 0 ) { // skip the header crc
561  for ( len = 0; len < 2; ++len ) rdbuf()->get_istream().get();
562  }
563  err = ( rdbuf()->get_istream().eof() ? Z_DATA_ERROR : Z_OK );
564 
565  return err;
566 }
567 
568 template<
569 typename Elem,
570 typename Tr,
571 typename ElemA,
572 typename ByteT,
573 typename ByteAT
574 >
576 {
577  if ( m_is_gzip ) {
578  read_uint32( rdbuf()->get_istream(), m_gzip_crc );
579  read_uint32( rdbuf()->get_istream(), m_gzip_data_size );
580  }
581 }
582 
583 template<
584 typename Elem,
585 typename Tr,
586 typename ElemA,
587 typename ByteT,
588 typename ByteAT
589 >
592  unsigned long x_
593 )
594 {
595  // yab: 20090414, modified to conform to gzip standard where
596  // trailer crc and length must both be 32-bit, otherwise there
597  // is breakage in systems where 'unsigned long' is not 32-bit
598  // and external archiving programs end up complaining.
599  char b1, b2, b3, b4; // assuming char is 8 bits
600  b1 = 0xFF & x_;
601  b2 = 0xFF & ( x_ >> 8 );
602  b3 = 0xFF & ( x_ >> 16 );
603  b4 = 0xFF & ( x_ >> 24 );
604 
605  out_.write( &b1, 1 );
606  out_.write( &b2, 1 );
607  out_.write( &b3, 1 );
608  out_.write( &b4, 1 );
609 }
610 
611 template<
612 typename Elem,
613 typename Tr,
614 typename ElemA,
615 typename ByteT,
616 typename ByteAT
617 >
619  istream_reference in_,
620  unsigned long & x_
621 )
622 {
623  // yab: 20090414, modified to conform to gzip standard where
624  // trailer crc and length must both be 32-bit, otherwise there
625  // is breakage in systems where 'unsigned long' is not 32-bit
626  // and external archiving programs end up complaining.
627  unsigned char b1, b2, b3, b4; // assuming char is 8 bits
628 
629  in_.read( reinterpret_cast< char * >( &b1 ), 1 );
630  in_.read( reinterpret_cast< char * >( &b2 ), 1 );
631  in_.read( reinterpret_cast< char * >( &b3 ), 1 );
632  in_.read( reinterpret_cast< char * >( &b4 ), 1 );
633 
634  x_ = b1 + ( b2 << 8 ) + ( b3 << 16 ) + ( b4 << 24 );
635 }
636 
637 template<
638 typename Elem,
639 typename Tr,
640 typename ElemA,
641 typename ByteT,
642 typename ByteAT
643 >
645 {
646  char_type zero = 0;
647 
648  rdbuf()->get_ostream()
649  .put( static_cast< char_type >( detail::gz_magic[0] ) )
650  .put( static_cast< char_type >( detail::gz_magic[1] ) )
651  .put( static_cast< char_type >( Z_DEFLATED ) )
652  .put( zero ) // flags
653  .put( zero ).put( zero ).put( zero ).put( zero ) // time
654  .put( zero ) // xflags
655  .put( static_cast< char_type >( OS_CODE ) );
656 }
657 
658 template<
659 typename Elem,
660 typename Tr,
661 typename ElemA,
662 typename ByteT,
663 typename ByteAT
664 >
666 {
667  put_long_as_uint32( rdbuf()->get_ostream(), rdbuf()->get_crc() );
668  put_long_as_uint32( rdbuf()->get_ostream(), rdbuf()->get_in_size() );
669 }
670 
671 
672 } // namespace zlib_stream
673 
674 
675 #endif // INCLUDED_utility_io_zipstream_IPP
static T min(T x, T y)
Definition: Svm.cc:16
basic_zip_ostream & write(char const *str, std::streamsize const count)
write a string
Definition: zipstream.hpp:506
Altered zipstream library header.
std::basic_ostream< Elem, Tr > & ostream_reference
Definition: zipstream.hpp:401
std::streamsize flush_finalize()
flushes the zip buffer and output buffer and finalize the zip stream
Definition: zipstream.ipp:314
cmplx w(cmplx z, double relerr)
Definition: functions.cc:470
std::streamsize unzip_from_stream(char_type *, std::streamsize)
Definition: zipstream.ipp:451
T remainder(T const &x, T const &y)
Remainder of x with respect to division by y that is of smallest magnitude.
const int gz_extra_field
Definition: zipstream.ipp:57
std::basic_istream< Elem, Tr > & istream_reference
Definition: zipstream.hpp:180
static void read_uint32(istream_reference in_, unsigned long &x_)
Definition: zipstream.ipp:618
basic_unzip_streambuf(istream_reference istream_, size_t window_size_, size_t read_buffer_size_, size_t input_buffer_size_)
Construct an unzip stream.
Definition: zipstream.ipp:348
static void put_long_as_uint32(ostream_reference out_, unsigned long x_)
Definition: zipstream.ipp:590
list flags
bool zip_to_stream(char_type *, std::streamsize)
Definition: zipstream.ipp:170
const int gz_head_crc
Definition: zipstream.ipp:56
std::streamsize flush()
flushes the zip buffer and output buffer
Definition: zipstream.ipp:300
Fstring::size_type len(Fstring const &s)
Length.
Definition: Fstring.hh:2207
std::basic_ostream< Elem, Tr > & ostream_reference
Definition: zipstream.hpp:88
const int gz_ascii_flag
Definition: zipstream.ipp:55
basic_zip_streambuf(ostream_reference ostream_, size_t level_, EStrategy strategy_, size_t window_size_, size_t memory_level_, size_t buffer_size_)
Construct a zip stream.
Definition: zipstream.ipp:72
void reset_state()
resets the zip stream and zeros the crc
Definition: zipstream.ipp:328
std::basic_istream< Elem, Tr > & istream_reference
Definition: zipstream.hpp:319
const int gz_orig_name
Definition: zipstream.ipp:58
const int gz_magic[2]
Definition: zipstream.ipp:52
const int gz_reserved
Definition: zipstream.ipp:60
EStrategy
Compression strategy, see zlib doc.
Definition: zipstream.hpp:64
void read_footer()
reads the gzip footer
Definition: zipstream.ipp:575
const int gz_comment
Definition: zipstream.ipp:59
int_type overflow(int_type c)
Definition: zipstream.ipp:144