kaitaistream.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. #include <kaitai/kaitaistream.h>
  2. #if defined(__APPLE__)
  3. #include <machine/endian.h>
  4. #include <libkern/OSByteOrder.h>
  5. #define bswap_16(x) OSSwapInt16(x)
  6. #define bswap_32(x) OSSwapInt32(x)
  7. #define bswap_64(x) OSSwapInt64(x)
  8. #define __BYTE_ORDER BYTE_ORDER
  9. #define __BIG_ENDIAN BIG_ENDIAN
  10. #define __LITTLE_ENDIAN LITTLE_ENDIAN
  11. #elif defined(_MSC_VER) // !__APPLE__
  12. #include <stdlib.h>
  13. #define __LITTLE_ENDIAN 1234
  14. #define __BIG_ENDIAN 4321
  15. #define __BYTE_ORDER __LITTLE_ENDIAN
  16. #define bswap_16(x) _byteswap_ushort(x)
  17. #define bswap_32(x) _byteswap_ulong(x)
  18. #define bswap_64(x) _byteswap_uint64(x)
  19. #else // !__APPLE__ or !_MSC_VER
  20. #include <endian.h>
  21. #include <byteswap.h>
  22. #endif
  23. #include <iostream>
  24. #include <vector>
  25. #include <stdexcept>
  26. kaitai::kstream::kstream(std::istream* io) {
  27. m_io = io;
  28. init();
  29. }
  30. kaitai::kstream::kstream(std::string& data): m_io_str(data) {
  31. m_io = &m_io_str;
  32. init();
  33. }
  34. void kaitai::kstream::init() {
  35. exceptions_enable();
  36. align_to_byte();
  37. }
  38. void kaitai::kstream::close() {
  39. // m_io->close();
  40. }
  41. void kaitai::kstream::exceptions_enable() const {
  42. m_io->exceptions(
  43. std::istream::eofbit |
  44. std::istream::failbit |
  45. std::istream::badbit
  46. );
  47. }
  48. // ========================================================================
  49. // Stream positioning
  50. // ========================================================================
  51. bool kaitai::kstream::is_eof() const {
  52. if (m_bits_left > 0) {
  53. return false;
  54. }
  55. char t;
  56. m_io->exceptions(
  57. std::istream::badbit
  58. );
  59. m_io->get(t);
  60. if (m_io->eof()) {
  61. m_io->clear();
  62. exceptions_enable();
  63. return true;
  64. } else {
  65. m_io->unget();
  66. exceptions_enable();
  67. return false;
  68. }
  69. }
  70. void kaitai::kstream::seek(uint64_t pos) {
  71. m_io->seekg(pos);
  72. }
  73. uint64_t kaitai::kstream::pos() {
  74. return m_io->tellg();
  75. }
  76. uint64_t kaitai::kstream::size() {
  77. std::iostream::pos_type cur_pos = m_io->tellg();
  78. m_io->seekg(0, std::ios::end);
  79. std::iostream::pos_type len = m_io->tellg();
  80. m_io->seekg(cur_pos);
  81. return len;
  82. }
  83. // ========================================================================
  84. // Integer numbers
  85. // ========================================================================
  86. // ------------------------------------------------------------------------
  87. // Signed
  88. // ------------------------------------------------------------------------
  89. int8_t kaitai::kstream::read_s1() {
  90. char t;
  91. m_io->get(t);
  92. return t;
  93. }
  94. // ........................................................................
  95. // Big-endian
  96. // ........................................................................
  97. int16_t kaitai::kstream::read_s2be() {
  98. int16_t t;
  99. m_io->read(reinterpret_cast<char *>(&t), 2);
  100. #if __BYTE_ORDER == __LITTLE_ENDIAN
  101. t = bswap_16(t);
  102. #endif
  103. return t;
  104. }
  105. int32_t kaitai::kstream::read_s4be() {
  106. int32_t t;
  107. m_io->read(reinterpret_cast<char *>(&t), 4);
  108. #if __BYTE_ORDER == __LITTLE_ENDIAN
  109. t = bswap_32(t);
  110. #endif
  111. return t;
  112. }
  113. int64_t kaitai::kstream::read_s8be() {
  114. int64_t t;
  115. m_io->read(reinterpret_cast<char *>(&t), 8);
  116. #if __BYTE_ORDER == __LITTLE_ENDIAN
  117. t = bswap_64(t);
  118. #endif
  119. return t;
  120. }
  121. // ........................................................................
  122. // Little-endian
  123. // ........................................................................
  124. int16_t kaitai::kstream::read_s2le() {
  125. int16_t t;
  126. m_io->read(reinterpret_cast<char *>(&t), 2);
  127. #if __BYTE_ORDER == __BIG_ENDIAN
  128. t = bswap_16(t);
  129. #endif
  130. return t;
  131. }
  132. int32_t kaitai::kstream::read_s4le() {
  133. int32_t t;
  134. m_io->read(reinterpret_cast<char *>(&t), 4);
  135. #if __BYTE_ORDER == __BIG_ENDIAN
  136. t = bswap_32(t);
  137. #endif
  138. return t;
  139. }
  140. int64_t kaitai::kstream::read_s8le() {
  141. int64_t t;
  142. m_io->read(reinterpret_cast<char *>(&t), 8);
  143. #if __BYTE_ORDER == __BIG_ENDIAN
  144. t = bswap_64(t);
  145. #endif
  146. return t;
  147. }
  148. // ------------------------------------------------------------------------
  149. // Unsigned
  150. // ------------------------------------------------------------------------
  151. uint8_t kaitai::kstream::read_u1() {
  152. char t;
  153. m_io->get(t);
  154. return t;
  155. }
  156. // ........................................................................
  157. // Big-endian
  158. // ........................................................................
  159. uint16_t kaitai::kstream::read_u2be() {
  160. uint16_t t;
  161. m_io->read(reinterpret_cast<char *>(&t), 2);
  162. #if __BYTE_ORDER == __LITTLE_ENDIAN
  163. t = bswap_16(t);
  164. #endif
  165. return t;
  166. }
  167. uint32_t kaitai::kstream::read_u4be() {
  168. uint32_t t;
  169. m_io->read(reinterpret_cast<char *>(&t), 4);
  170. #if __BYTE_ORDER == __LITTLE_ENDIAN
  171. t = bswap_32(t);
  172. #endif
  173. return t;
  174. }
  175. uint64_t kaitai::kstream::read_u8be() {
  176. uint64_t t;
  177. m_io->read(reinterpret_cast<char *>(&t), 8);
  178. #if __BYTE_ORDER == __LITTLE_ENDIAN
  179. t = bswap_64(t);
  180. #endif
  181. return t;
  182. }
  183. // ........................................................................
  184. // Little-endian
  185. // ........................................................................
  186. uint16_t kaitai::kstream::read_u2le() {
  187. uint16_t t;
  188. m_io->read(reinterpret_cast<char *>(&t), 2);
  189. #if __BYTE_ORDER == __BIG_ENDIAN
  190. t = bswap_16(t);
  191. #endif
  192. return t;
  193. }
  194. uint32_t kaitai::kstream::read_u4le() {
  195. uint32_t t;
  196. m_io->read(reinterpret_cast<char *>(&t), 4);
  197. #if __BYTE_ORDER == __BIG_ENDIAN
  198. t = bswap_32(t);
  199. #endif
  200. return t;
  201. }
  202. uint64_t kaitai::kstream::read_u8le() {
  203. uint64_t t;
  204. m_io->read(reinterpret_cast<char *>(&t), 8);
  205. #if __BYTE_ORDER == __BIG_ENDIAN
  206. t = bswap_64(t);
  207. #endif
  208. return t;
  209. }
  210. // ========================================================================
  211. // Floating point numbers
  212. // ========================================================================
  213. // ........................................................................
  214. // Big-endian
  215. // ........................................................................
  216. float kaitai::kstream::read_f4be() {
  217. uint32_t t;
  218. m_io->read(reinterpret_cast<char *>(&t), 4);
  219. #if __BYTE_ORDER == __LITTLE_ENDIAN
  220. t = bswap_32(t);
  221. #endif
  222. return reinterpret_cast<float&>(t);
  223. }
  224. double kaitai::kstream::read_f8be() {
  225. uint64_t t;
  226. m_io->read(reinterpret_cast<char *>(&t), 8);
  227. #if __BYTE_ORDER == __LITTLE_ENDIAN
  228. t = bswap_64(t);
  229. #endif
  230. return reinterpret_cast<double&>(t);
  231. }
  232. // ........................................................................
  233. // Little-endian
  234. // ........................................................................
  235. float kaitai::kstream::read_f4le() {
  236. uint32_t t;
  237. m_io->read(reinterpret_cast<char *>(&t), 4);
  238. #if __BYTE_ORDER == __BIG_ENDIAN
  239. t = bswap_32(t);
  240. #endif
  241. return reinterpret_cast<float&>(t);
  242. }
  243. double kaitai::kstream::read_f8le() {
  244. uint64_t t;
  245. m_io->read(reinterpret_cast<char *>(&t), 8);
  246. #if __BYTE_ORDER == __BIG_ENDIAN
  247. t = bswap_64(t);
  248. #endif
  249. return reinterpret_cast<double&>(t);
  250. }
  251. // ========================================================================
  252. // Unaligned bit values
  253. // ========================================================================
  254. void kaitai::kstream::align_to_byte() {
  255. m_bits_left = 0;
  256. m_bits = 0;
  257. }
  258. uint64_t kaitai::kstream::read_bits_int_be(int n) {
  259. int bits_needed = n - m_bits_left;
  260. if (bits_needed > 0) {
  261. // 1 bit => 1 byte
  262. // 8 bits => 1 byte
  263. // 9 bits => 2 bytes
  264. int bytes_needed = ((bits_needed - 1) / 8) + 1;
  265. if (bytes_needed > 8)
  266. throw std::runtime_error("read_bits_int: more than 8 bytes requested");
  267. char buf[8];
  268. m_io->read(buf, bytes_needed);
  269. for (int i = 0; i < bytes_needed; i++) {
  270. uint8_t b = buf[i];
  271. m_bits <<= 8;
  272. m_bits |= b;
  273. m_bits_left += 8;
  274. }
  275. }
  276. // raw mask with required number of 1s, starting from lowest bit
  277. uint64_t mask = get_mask_ones(n);
  278. // shift mask to align with highest bits available in @bits
  279. int shift_bits = m_bits_left - n;
  280. mask <<= shift_bits;
  281. // derive reading result
  282. uint64_t res = (m_bits & mask) >> shift_bits;
  283. // clear top bits that we've just read => AND with 1s
  284. m_bits_left -= n;
  285. mask = get_mask_ones(m_bits_left);
  286. m_bits &= mask;
  287. return res;
  288. }
  289. // Deprecated, use read_bits_int_be() instead.
  290. uint64_t kaitai::kstream::read_bits_int(int n) {
  291. return read_bits_int_be(n);
  292. }
  293. uint64_t kaitai::kstream::read_bits_int_le(int n) {
  294. int bits_needed = n - m_bits_left;
  295. if (bits_needed > 0) {
  296. // 1 bit => 1 byte
  297. // 8 bits => 1 byte
  298. // 9 bits => 2 bytes
  299. int bytes_needed = ((bits_needed - 1) / 8) + 1;
  300. if (bytes_needed > 8)
  301. throw std::runtime_error("read_bits_int_le: more than 8 bytes requested");
  302. char buf[8];
  303. m_io->read(buf, bytes_needed);
  304. for (int i = 0; i < bytes_needed; i++) {
  305. uint8_t b = buf[i];
  306. m_bits |= (static_cast<uint64_t>(b) << m_bits_left);
  307. m_bits_left += 8;
  308. }
  309. }
  310. // raw mask with required number of 1s, starting from lowest bit
  311. uint64_t mask = get_mask_ones(n);
  312. // derive reading result
  313. uint64_t res = m_bits & mask;
  314. // remove bottom bits that we've just read by shifting
  315. m_bits >>= n;
  316. m_bits_left -= n;
  317. return res;
  318. }
  319. uint64_t kaitai::kstream::get_mask_ones(int n) {
  320. if (n == 64) {
  321. return 0xFFFFFFFFFFFFFFFF;
  322. } else {
  323. return ((uint64_t) 1 << n) - 1;
  324. }
  325. }
  326. // ========================================================================
  327. // Byte arrays
  328. // ========================================================================
  329. std::string kaitai::kstream::read_bytes(std::streamsize len) {
  330. std::vector<char> result(len);
  331. // NOTE: streamsize type is signed, negative values are only *supposed* to not be used.
  332. // http://en.cppreference.com/w/cpp/io/streamsize
  333. if (len < 0) {
  334. throw std::runtime_error("read_bytes: requested a negative amount");
  335. }
  336. if (len > 0) {
  337. m_io->read(&result[0], len);
  338. }
  339. return std::string(result.begin(), result.end());
  340. }
  341. std::string kaitai::kstream::read_bytes_full() {
  342. std::iostream::pos_type p1 = m_io->tellg();
  343. m_io->seekg(0, std::ios::end);
  344. std::iostream::pos_type p2 = m_io->tellg();
  345. size_t len = p2 - p1;
  346. // Note: this requires a std::string to be backed with a
  347. // contiguous buffer. Officially, it's a only requirement since
  348. // C++11 (C++98 and C++03 didn't have this requirement), but all
  349. // major implementations had contiguous buffers anyway.
  350. std::string result(len, ' ');
  351. m_io->seekg(p1);
  352. m_io->read(&result[0], len);
  353. return result;
  354. }
  355. std::string kaitai::kstream::read_bytes_term(char term, bool include, bool consume, bool eos_error) {
  356. std::string result;
  357. std::getline(*m_io, result, term);
  358. if (m_io->eof()) {
  359. // encountered EOF
  360. if (eos_error) {
  361. throw std::runtime_error("read_bytes_term: encountered EOF");
  362. }
  363. } else {
  364. // encountered terminator
  365. if (include)
  366. result.push_back(term);
  367. if (!consume)
  368. m_io->unget();
  369. }
  370. return result;
  371. }
  372. std::string kaitai::kstream::ensure_fixed_contents(std::string expected) {
  373. std::string actual = read_bytes(expected.length());
  374. if (actual != expected) {
  375. // NOTE: I think printing it outright is not best idea, it could contain non-ascii charactes like backspace and beeps and whatnot. It would be better to print hexlified version, and also to redirect it to stderr.
  376. throw std::runtime_error("ensure_fixed_contents: actual data does not match expected data");
  377. }
  378. return actual;
  379. }
  380. std::string kaitai::kstream::bytes_strip_right(std::string src, char pad_byte) {
  381. std::size_t new_len = src.length();
  382. while (new_len > 0 && src[new_len - 1] == pad_byte)
  383. new_len--;
  384. return src.substr(0, new_len);
  385. }
  386. std::string kaitai::kstream::bytes_terminate(std::string src, char term, bool include) {
  387. std::size_t new_len = 0;
  388. std::size_t max_len = src.length();
  389. while (new_len < max_len && src[new_len] != term)
  390. new_len++;
  391. if (include && new_len < max_len)
  392. new_len++;
  393. return src.substr(0, new_len);
  394. }
  395. // ========================================================================
  396. // Byte array processing
  397. // ========================================================================
  398. std::string kaitai::kstream::process_xor_one(std::string data, uint8_t key) {
  399. size_t len = data.length();
  400. std::string result(len, ' ');
  401. for (size_t i = 0; i < len; i++)
  402. result[i] = data[i] ^ key;
  403. return result;
  404. }
  405. std::string kaitai::kstream::process_xor_many(std::string data, std::string key) {
  406. size_t len = data.length();
  407. size_t kl = key.length();
  408. std::string result(len, ' ');
  409. size_t ki = 0;
  410. for (size_t i = 0; i < len; i++) {
  411. result[i] = data[i] ^ key[ki];
  412. ki++;
  413. if (ki >= kl)
  414. ki = 0;
  415. }
  416. return result;
  417. }
  418. std::string kaitai::kstream::process_rotate_left(std::string data, int amount) {
  419. size_t len = data.length();
  420. std::string result(len, ' ');
  421. for (size_t i = 0; i < len; i++) {
  422. uint8_t bits = data[i];
  423. result[i] = (bits << amount) | (bits >> (8 - amount));
  424. }
  425. return result;
  426. }
  427. #ifdef KS_ZLIB
  428. #include <zlib.h>
  429. std::string kaitai::kstream::process_zlib(std::string data) {
  430. int ret;
  431. unsigned char *src_ptr = reinterpret_cast<unsigned char*>(&data[0]);
  432. std::stringstream dst_strm;
  433. z_stream strm;
  434. strm.zalloc = Z_NULL;
  435. strm.zfree = Z_NULL;
  436. strm.opaque = Z_NULL;
  437. ret = inflateInit(&strm);
  438. if (ret != Z_OK)
  439. throw std::runtime_error("process_zlib: inflateInit error");
  440. strm.next_in = src_ptr;
  441. strm.avail_in = data.length();
  442. unsigned char outbuffer[ZLIB_BUF_SIZE];
  443. std::string outstring;
  444. // get the decompressed bytes blockwise using repeated calls to inflate
  445. do {
  446. strm.next_out = reinterpret_cast<Bytef*>(outbuffer);
  447. strm.avail_out = sizeof(outbuffer);
  448. ret = inflate(&strm, 0);
  449. if (outstring.size() < strm.total_out)
  450. outstring.append(reinterpret_cast<char*>(outbuffer), strm.total_out - outstring.size());
  451. } while (ret == Z_OK);
  452. if (ret != Z_STREAM_END) { // an error occurred that was not EOF
  453. std::ostringstream exc_msg;
  454. exc_msg << "process_zlib: error #" << ret << "): " << strm.msg;
  455. throw std::runtime_error(exc_msg.str());
  456. }
  457. if (inflateEnd(&strm) != Z_OK)
  458. throw std::runtime_error("process_zlib: inflateEnd error");
  459. return outstring;
  460. }
  461. #endif
  462. // ========================================================================
  463. // Misc utility methods
  464. // ========================================================================
  465. int kaitai::kstream::mod(int a, int b) {
  466. if (b <= 0)
  467. throw std::invalid_argument("mod: divisor b <= 0");
  468. int r = a % b;
  469. if (r < 0)
  470. r += b;
  471. return r;
  472. }
  473. #include <stdio.h>
  474. std::string kaitai::kstream::to_string(int val) {
  475. // if int is 32 bits, "-2147483648" is the longest string representation
  476. // => 11 chars + zero => 12 chars
  477. // if int is 64 bits, "-9223372036854775808" is the longest
  478. // => 20 chars + zero => 21 chars
  479. char buf[25];
  480. int got_len = snprintf(buf, sizeof(buf), "%d", val);
  481. // should never happen, but check nonetheless
  482. if (got_len > sizeof(buf))
  483. throw std::invalid_argument("to_string: integer is longer than string buffer");
  484. return std::string(buf);
  485. }
  486. #include <algorithm>
  487. std::string kaitai::kstream::reverse(std::string val) {
  488. std::reverse(val.begin(), val.end());
  489. return val;
  490. }
  491. uint8_t kaitai::kstream::byte_array_min(const std::string val) {
  492. uint8_t min = 0xff; // UINT8_MAX
  493. std::string::const_iterator end = val.end();
  494. for (std::string::const_iterator it = val.begin(); it != end; ++it) {
  495. uint8_t cur = static_cast<uint8_t>(*it);
  496. if (cur < min) {
  497. min = cur;
  498. }
  499. }
  500. return min;
  501. }
  502. uint8_t kaitai::kstream::byte_array_max(const std::string val) {
  503. uint8_t max = 0; // UINT8_MIN
  504. std::string::const_iterator end = val.end();
  505. for (std::string::const_iterator it = val.begin(); it != end; ++it) {
  506. uint8_t cur = static_cast<uint8_t>(*it);
  507. if (cur > max) {
  508. max = cur;
  509. }
  510. }
  511. return max;
  512. }
  513. // ========================================================================
  514. // Other internal methods
  515. // ========================================================================
  516. #ifndef KS_STR_DEFAULT_ENCODING
  517. #define KS_STR_DEFAULT_ENCODING "UTF-8"
  518. #endif
  519. #ifdef KS_STR_ENCODING_ICONV
  520. #include <iconv.h>
  521. #include <cerrno>
  522. #include <stdexcept>
  523. std::string kaitai::kstream::bytes_to_str(std::string src, std::string src_enc) {
  524. iconv_t cd = iconv_open(KS_STR_DEFAULT_ENCODING, src_enc.c_str());
  525. if (cd == (iconv_t) -1) {
  526. if (errno == EINVAL) {
  527. throw std::runtime_error("bytes_to_str: invalid encoding pair conversion requested");
  528. } else {
  529. throw std::runtime_error("bytes_to_str: error opening iconv");
  530. }
  531. }
  532. size_t src_len = src.length();
  533. size_t src_left = src_len;
  534. // Start with a buffer length of double the source length.
  535. size_t dst_len = src_len * 2;
  536. std::string dst(dst_len, ' ');
  537. size_t dst_left = dst_len;
  538. char *src_ptr = &src[0];
  539. char *dst_ptr = &dst[0];
  540. while (true) {
  541. size_t res = iconv(cd, &src_ptr, &src_left, &dst_ptr, &dst_left);
  542. if (res == (size_t) -1) {
  543. if (errno == E2BIG) {
  544. // dst buffer is not enough to accomodate whole string
  545. // enlarge the buffer and try again
  546. size_t dst_used = dst_len - dst_left;
  547. dst_left += dst_len;
  548. dst_len += dst_len;
  549. dst.resize(dst_len);
  550. // dst.resize might have allocated destination buffer in another area
  551. // of memory, thus our previous pointer "dst" will be invalid; re-point
  552. // it using "dst_used".
  553. dst_ptr = &dst[dst_used];
  554. } else {
  555. throw std::runtime_error("bytes_to_str: iconv error");
  556. }
  557. } else {
  558. // conversion successful
  559. dst.resize(dst_len - dst_left);
  560. break;
  561. }
  562. }
  563. if (iconv_close(cd) != 0) {
  564. throw std::runtime_error("bytes_to_str: iconv close error");
  565. }
  566. return dst;
  567. }
  568. #elif defined(KS_STR_ENCODING_NONE)
  569. std::string kaitai::kstream::bytes_to_str(std::string src, std::string src_enc) {
  570. return src;
  571. }
  572. #else
  573. #error Need to decide how to handle strings: please define one of: KS_STR_ENCODING_ICONV, KS_STR_ENCODING_NONE
  574. #endif