tcpdi_parser.php 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. <?php
  2. //============================================================+
  3. // File name : tcpdi_parser.php
  4. // Version : 1.0
  5. // Begin : 2013-09-25
  6. // Last Update : 2013-09-25
  7. // Author : Paul Nicholls - https://github.com/pauln
  8. // License : GNU-LGPL v3 (https://www.gnu.org/copyleft/lesser.html)
  9. //
  10. // Based on : tcpdf_parser.php
  11. // Version : 1.0.003
  12. // Begin : 2011-05-23
  13. // Last Update : 2013-03-17
  14. // Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
  15. // License : GNU-LGPL v3 (https://www.gnu.org/copyleft/lesser.html)
  16. // -------------------------------------------------------------------
  17. // Copyright (C) 2011-2013 Nicola Asuni - Tecnick.com LTD
  18. //
  19. // This file is for use with the TCPDF software library.
  20. //
  21. // tcpdi_parser is free software: you can redistribute it and/or modify it
  22. // under the terms of the GNU Lesser General Public License as
  23. // published by the Free Software Foundation, either version 3 of the
  24. // License, or (at your option) any later version.
  25. //
  26. // tcpdi_parser is distributed in the hope that it will be useful, but
  27. // WITHOUT ANY WARRANTY; without even the implied warranty of
  28. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  29. // See the GNU Lesser General Public License for more details.
  30. //
  31. // You should have received a copy of the License
  32. // along with tcpdi_parser. If not, see
  33. // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  34. //
  35. // See LICENSE file for more information.
  36. // -------------------------------------------------------------------
  37. //
  38. // Description : This is a PHP class for parsing PDF documents.
  39. //
  40. //============================================================+
  41. /**
  42. * @file
  43. * This is a PHP class for parsing PDF documents.<br>
  44. * @author Paul Nicholls
  45. * @author Nicola Asuni
  46. * @version 1.0
  47. */
  48. // include class for decoding filters
  49. if (defined('TCPDF_PATH')) require_once(constant('TCPDF_PATH').'/include/tcpdf_filters.php');
  50. else require_once(dirname(__FILE__).'/../tecnickcom/tcpdf/include/tcpdf_filters.php');
  51. if (!defined ('PDF_TYPE_NULL'))
  52. define ('PDF_TYPE_NULL', 0);
  53. if (!defined ('PDF_TYPE_NUMERIC'))
  54. define ('PDF_TYPE_NUMERIC', 1);
  55. if (!defined ('PDF_TYPE_TOKEN'))
  56. define ('PDF_TYPE_TOKEN', 2);
  57. if (!defined ('PDF_TYPE_HEX'))
  58. define ('PDF_TYPE_HEX', 3);
  59. if (!defined ('PDF_TYPE_STRING'))
  60. define ('PDF_TYPE_STRING', 4);
  61. if (!defined ('PDF_TYPE_DICTIONARY'))
  62. define ('PDF_TYPE_DICTIONARY', 5);
  63. if (!defined ('PDF_TYPE_ARRAY'))
  64. define ('PDF_TYPE_ARRAY', 6);
  65. if (!defined ('PDF_TYPE_OBJDEC'))
  66. define ('PDF_TYPE_OBJDEC', 7);
  67. if (!defined ('PDF_TYPE_OBJREF'))
  68. define ('PDF_TYPE_OBJREF', 8);
  69. if (!defined ('PDF_TYPE_OBJECT'))
  70. define ('PDF_TYPE_OBJECT', 9);
  71. if (!defined ('PDF_TYPE_STREAM'))
  72. define ('PDF_TYPE_STREAM', 10);
  73. if (!defined ('PDF_TYPE_BOOLEAN'))
  74. define ('PDF_TYPE_BOOLEAN', 11);
  75. if (!defined ('PDF_TYPE_REAL'))
  76. define ('PDF_TYPE_REAL', 12);
  77. /**
  78. * @class tcpdi_parser
  79. * This is a PHP class for parsing PDF documents.<br>
  80. * Based on TCPDF_PARSER, part of the TCPDF project by Nicola Asuni.
  81. * @brief This is a PHP class for parsing PDF documents..
  82. * @version 1.0
  83. * @author Paul Nicholls - github.com/pauln
  84. * @author Nicola Asuni - info@tecnick.com
  85. */
  86. class tcpdi_parser {
  87. /**
  88. * Unique parser ID
  89. * @public
  90. */
  91. public $uniqueid = '';
  92. /**
  93. * Raw content of the PDF document.
  94. * @private
  95. */
  96. private $pdfdata = '';
  97. /**
  98. * XREF data.
  99. * @protected
  100. */
  101. protected $xref = array();
  102. /**
  103. * Object streams.
  104. * @protected
  105. */
  106. protected $objstreams = array();
  107. /**
  108. * Objects in objstreams.
  109. * @protected
  110. */
  111. protected $objstreamobjs = array();
  112. /**
  113. * List of seen XREF data locations.
  114. * @protected
  115. */
  116. protected $xref_seen_offsets = array();
  117. /**
  118. * Array of PDF objects.
  119. * @protected
  120. */
  121. protected $objects = array();
  122. /**
  123. * Array of object offsets.
  124. * @private
  125. */
  126. private $objoffsets = array();
  127. /**
  128. * Class object for decoding filters.
  129. * @private
  130. */
  131. private $FilterDecoders;
  132. /**
  133. * Pages
  134. *
  135. * @private array
  136. */
  137. private $pages;
  138. /**
  139. * Page count
  140. * @private integer
  141. */
  142. private $page_count;
  143. /**
  144. * actual page number
  145. * @private integer
  146. */
  147. private $pageno;
  148. /**
  149. * PDF version of the loaded document
  150. * @private string
  151. */
  152. private $pdfVersion;
  153. /**
  154. * Available BoxTypes
  155. *
  156. * @public array
  157. */
  158. public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
  159. // -----------------------------------------------------------------------------
  160. /**
  161. * Parse a PDF document an return an array of objects.
  162. * @param $data (string) PDF data to parse.
  163. * @public
  164. * @since 1.0.000 (2011-05-24)
  165. */
  166. public function __construct($data, $uniqueid) {
  167. if (empty($data)) {
  168. $this->Error('Empty PDF data.');
  169. }
  170. $this->uniqueid = $uniqueid;
  171. $this->pdfdata = $data;
  172. // get length
  173. $pdflen = strlen($this->pdfdata);
  174. // initialize class for decoding filters
  175. $this->FilterDecoders = new TCPDF_FILTERS();
  176. // get xref and trailer data
  177. $this->xref = $this->getXrefData();
  178. $this->findObjectOffsets();
  179. // parse all document objects
  180. $this->objects = array();
  181. /*foreach ($this->xref['xref'] as $obj => $offset) {
  182. if (!isset($this->objects[$obj]) AND ($offset > 0)) {
  183. // decode only objects with positive offset
  184. //$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
  185. }
  186. }*/
  187. $this->getPDFVersion();
  188. $this->readPages();
  189. }
  190. /**
  191. * Clean up when done, to free memory etc
  192. */
  193. public function cleanUp() {
  194. unset($this->pdfdata);
  195. $this->pdfdata = '';
  196. unset($this->objstreams);
  197. $this->objstreams = array();
  198. unset($this->objects);
  199. $this->objects = array();
  200. unset($this->objstreamobjs);
  201. $this->objstreamobjs = array();
  202. unset($this->xref);
  203. $this->xref = array();
  204. unset($this->objoffsets);
  205. $this->objoffsets = array();
  206. unset($this->pages);
  207. $this->pages = array();
  208. }
  209. /**
  210. * Return an array of parsed PDF document objects.
  211. * @return (array) Array of parsed PDF document objects.
  212. * @public
  213. * @since 1.0.000 (2011-06-26)
  214. */
  215. public function getParsedData() {
  216. return array($this->xref, $this->objects, $this->pages);
  217. }
  218. /**
  219. * Get PDF-Version
  220. *
  221. * And reset the PDF Version used in FPDI if needed
  222. * @public
  223. */
  224. public function getPDFVersion() {
  225. preg_match('/\d\.\d/', substr($this->pdfdata, 0, 16), $m);
  226. if (isset($m[0]))
  227. $this->pdfVersion = $m[0];
  228. return $this->pdfVersion;
  229. }
  230. /**
  231. * Read all /Page(es)
  232. *
  233. */
  234. function readPages() {
  235. $params = $this->getObjectVal($this->xref['trailer'][1]['/Root']);
  236. $objref = null;
  237. foreach ($params[1][1] as $k=>$v) {
  238. if ($k == '/Pages') {
  239. $objref = $v;
  240. break;
  241. }
  242. }
  243. if ($objref == null || $objref[0] !== PDF_TYPE_OBJREF) {
  244. // Offset not found.
  245. return;
  246. }
  247. $dict = $this->getObjectVal($objref);
  248. if ($dict[0] == PDF_TYPE_OBJECT && $dict[1][0] == PDF_TYPE_DICTIONARY) {
  249. // Dict wrapped in an object
  250. $dict = $dict[1];
  251. }
  252. if ($dict[0] !== PDF_TYPE_DICTIONARY) {
  253. return;
  254. }
  255. $this->pages = array();
  256. if (isset($dict[1]['/Kids'])) {
  257. $v = $dict[1]['/Kids'];
  258. if ($v[0] == PDF_TYPE_ARRAY) {
  259. foreach ($v[1] as $ref) {
  260. $page = $this->getObjectVal($ref);
  261. $this->readPage($page);
  262. }
  263. }
  264. }
  265. $this->page_count = count($this->pages);
  266. }
  267. /**
  268. * Read a single /Page element, recursing through /Kids if necessary
  269. *
  270. */
  271. private function readPage($page) {
  272. if (isset($page[1][1]['/Kids'])) {
  273. // Nested pages!
  274. foreach ($page[1][1]['/Kids'][1] as $subref) {
  275. $subpage = $this->getObjectVal($subref);
  276. $this->readPage($subpage);
  277. }
  278. } else {
  279. $this->pages[] = $page;
  280. }
  281. }
  282. /**
  283. * Get pagecount from sourcefile
  284. *
  285. * @return int
  286. */
  287. function getPageCount() {
  288. return $this->page_count;
  289. }
  290. /**
  291. * Get Cross-Reference (xref) table and trailer data from PDF document data.
  292. * @param $offset (int) xref offset (if know).
  293. * @param $xref (array) previous xref array (if any).
  294. * @return Array containing xref and trailer data.
  295. * @protected
  296. * @since 1.0.000 (2011-05-24)
  297. */
  298. protected function getXrefData($offset=0, $xref=array()) {
  299. if ($offset == 0) {
  300. // find last startxref
  301. if (preg_match('/.*[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/is', $this->pdfdata, $matches) == 0) {
  302. $this->Error('Unable to find startxref');
  303. }
  304. $startxref = $matches[1];
  305. } else {
  306. if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
  307. // Cross-Reference Stream object
  308. $startxref = $offset;
  309. } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
  310. // startxref found
  311. $startxref = $matches[1][0];
  312. } else {
  313. $this->Error('Unable to find startxref');
  314. }
  315. }
  316. unset($matches);
  317. // DOMPDF gets the startxref wrong, giving us the linebreak before the xref starts.
  318. $startxref += strspn($this->pdfdata, "\r\n", $startxref);
  319. // check xref position
  320. if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
  321. // Cross-Reference
  322. $xref = $this->decodeXref($startxref, $xref);
  323. } else {
  324. // Cross-Reference Stream
  325. $xref = $this->decodeXrefStream($startxref, $xref);
  326. }
  327. if (empty($xref)) {
  328. $this->Error('Unable to find xref');
  329. }
  330. return $xref;
  331. }
  332. /**
  333. * Decode the Cross-Reference section
  334. * @param $startxref (int) Offset at which the xref section starts.
  335. * @param $xref (array) Previous xref array (if any).
  336. * @return Array containing xref and trailer data.
  337. * @protected
  338. * @since 1.0.000 (2011-06-20)
  339. */
  340. protected function decodeXref($startxref, $xref=array()) {
  341. $this->xref_seen_offsets[] = $startxref;
  342. if (!isset($xref['xref_location'])) {
  343. $xref['xref_location'] = $startxref;
  344. $xref['max_object'] = 0;
  345. }
  346. // extract xref data (object indexes and offsets)
  347. $xoffset = $startxref + 5;
  348. // initialize object number
  349. $obj_num = 0;
  350. $offset = $xoffset;
  351. while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
  352. $offset = (strlen($matches[0][0]) + $matches[0][1]);
  353. if ($matches[3][0] == 'n') {
  354. // create unique object index: [object number]_[generation number]
  355. $gen_num = intval($matches[2][0]);
  356. $index = $obj_num.'_'.$gen_num;
  357. // check if object already exist
  358. if (!isset($xref['xref'][$obj_num][$gen_num])) {
  359. // store object offset position
  360. $xref['xref'][$obj_num][$gen_num] = intval($matches[1][0]);
  361. }
  362. ++$obj_num;
  363. $offset += 2;
  364. } elseif ($matches[3][0] == 'f') {
  365. ++$obj_num;
  366. $offset += 2;
  367. } else {
  368. // object number (index)
  369. $obj_num = intval($matches[1][0]);
  370. }
  371. }
  372. unset($matches);
  373. $xref['max_object'] = max($xref['max_object'], $obj_num);
  374. // get trailer data
  375. if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
  376. $trailer_data = $matches[1][0];
  377. if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
  378. // get only the last updated version
  379. $xref['trailer'] = array();
  380. $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
  381. $xref['trailer'][1] = array();
  382. // parse trailer_data
  383. if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
  384. $xref['trailer'][1]['/Size'] = array(PDF_TYPE_NUMERIC, intval($matches[1]));
  385. }
  386. if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  387. $xref['trailer'][1]['/Root'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
  388. }
  389. if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  390. $xref['trailer'][1]['/Encrypt'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
  391. }
  392. if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  393. $xref['trailer'][1]['/Info'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
  394. }
  395. if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
  396. $xref['trailer'][1]['/ID'] = array(PDF_TYPE_ARRAY, array());
  397. $xref['trailer'][1]['/ID'][1][0] = array(PDF_TYPE_HEX, $matches[1]);
  398. $xref['trailer'][1]['/ID'][1][1] = array(PDF_TYPE_HEX, $matches[2]);
  399. }
  400. }
  401. if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
  402. // get previous xref
  403. $prevoffset = intval($matches[1]);
  404. if (!in_array($prevoffset, $this->xref_seen_offsets)) {
  405. $this->xref_seen_offsets[] = $prevoffset;
  406. $xref = $this->getXrefData($prevoffset, $xref);
  407. }
  408. }
  409. unset($matches);
  410. } else {
  411. $this->Error('Unable to find trailer');
  412. }
  413. return $xref;
  414. }
  415. /**
  416. * Decode the Cross-Reference Stream section
  417. * @param $startxref (int) Offset at which the xref section starts.
  418. * @param $xref (array) Previous xref array (if any).
  419. * @return Array containing xref and trailer data.
  420. * @protected
  421. * @since 1.0.003 (2013-03-16)
  422. */
  423. protected function decodeXrefStream($startxref, $xref=array()) {
  424. // try to read Cross-Reference Stream
  425. list($xrefobj, $unused) = $this->getRawObject($startxref);
  426. $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
  427. if (!isset($xref['xref_location'])) {
  428. $xref['xref_location'] = $startxref;
  429. $xref['max_object'] = 0;
  430. }
  431. if (!isset($xref['xref'])) {
  432. $xref['xref'] = array();
  433. }
  434. if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
  435. // get only the last updated version
  436. $xref['trailer'] = array();
  437. $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
  438. $xref['trailer'][1] = array();
  439. $filltrailer = true;
  440. } else {
  441. $filltrailer = false;
  442. }
  443. $valid_crs = false;
  444. $sarr = $xrefcrs[0][1];
  445. $keys = array_keys($sarr);
  446. $columns = 1; // Default as per PDF 32000-1:2008.
  447. $predictor = 1; // Default as per PDF 32000-1:2008.
  448. foreach ($keys as $k=>$key) {
  449. $v = $sarr[$key];
  450. if (($key == '/Type') AND ($v[0] == PDF_TYPE_TOKEN AND ($v[1] == 'XRef'))) {
  451. $valid_crs = true;
  452. } elseif (($key == '/Index') AND ($v[0] == PDF_TYPE_ARRAY AND count($v[1]) >= 2)) {
  453. // first object number in the subsection
  454. $index_first = intval($v[1][0][1]);
  455. // number of entries in the subsection
  456. $index_entries = intval($v[1][1][1]);
  457. } elseif (($key == '/Prev') AND ($v[0] == PDF_TYPE_NUMERIC)) {
  458. // get previous xref offset
  459. $prevxref = intval($v[1]);
  460. } elseif (($key == '/W') AND ($v[0] == PDF_TYPE_ARRAY)) {
  461. // number of bytes (in the decoded stream) of the corresponding field
  462. $wb = array();
  463. $wb[0] = intval($v[1][0][1]);
  464. $wb[1] = intval($v[1][1][1]);
  465. $wb[2] = intval($v[1][2][1]);
  466. } elseif (($key == '/DecodeParms') AND ($v[0] == PDF_TYPE_DICTIONARY)) {
  467. $decpar = $v[1];
  468. foreach ($decpar as $kdc => $vdc) {
  469. if (($kdc == '/Columns') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
  470. $columns = intval($vdc[1]);
  471. } elseif (($kdc == '/Predictor') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
  472. $predictor = intval($vdc[1]);
  473. }
  474. }
  475. } elseif ($filltrailer) {
  476. switch($key) {
  477. case '/Size':
  478. case '/Root':
  479. case '/Info':
  480. case '/ID':
  481. $xref['trailer'][1][$key] = $v;
  482. break;
  483. default:
  484. break;
  485. }
  486. }
  487. }
  488. // decode data
  489. $obj_num = 0;
  490. if ($valid_crs AND isset($xrefcrs[1][3][0])) {
  491. // number of bytes in a row
  492. $rowlen = ($columns + 1);
  493. // convert the stream into an array of integers
  494. $sdata = unpack('C*', $xrefcrs[1][3][0]);
  495. // split the rows
  496. $sdata = array_chunk($sdata, $rowlen);
  497. // initialize decoded array
  498. $ddata = array();
  499. // initialize first row with zeros
  500. $prev_row = array_fill (0, $rowlen, 0);
  501. // for each row apply PNG unpredictor
  502. foreach ($sdata as $k => $row) {
  503. // initialize new row
  504. $ddata[$k] = array();
  505. // get PNG predictor value
  506. if (empty($predictor)) {
  507. $predictor = (10 + $row[0]);
  508. }
  509. // for each byte on the row
  510. for ($i=1; $i<=$columns; ++$i) {
  511. // new index
  512. $j = ($i - 1);
  513. $row_up = $prev_row[$j];
  514. if ($i == 1) {
  515. $row_left = 0;
  516. $row_upleft = 0;
  517. } else {
  518. $row_left = $row[($i - 1)];
  519. $row_upleft = $prev_row[($j - 1)];
  520. }
  521. switch ($predictor) {
  522. case 1: // No prediction (equivalent to PNG None)
  523. case 10: { // PNG prediction (on encoding, PNG None on all rows)
  524. $ddata[$k][$j] = $row[$i];
  525. break;
  526. }
  527. case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
  528. $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
  529. break;
  530. }
  531. case 12: { // PNG prediction (on encoding, PNG Up on all rows)
  532. $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
  533. break;
  534. }
  535. case 13: { // PNG prediction (on encoding, PNG Average on all rows)
  536. $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
  537. break;
  538. }
  539. case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
  540. // initial estimate
  541. $p = ($row_left + $row_up - $row_upleft);
  542. // distances
  543. $pa = abs($p - $row_left);
  544. $pb = abs($p - $row_up);
  545. $pc = abs($p - $row_upleft);
  546. $pmin = min($pa, $pb, $pc);
  547. // return minumum distance
  548. switch ($pmin) {
  549. case $pa: {
  550. $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
  551. break;
  552. }
  553. case $pb: {
  554. $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
  555. break;
  556. }
  557. case $pc: {
  558. $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
  559. break;
  560. }
  561. }
  562. break;
  563. }
  564. default: { // PNG prediction (on encoding, PNG optimum)
  565. $this->Error("Unknown PNG predictor $predictor");
  566. break;
  567. }
  568. }
  569. }
  570. $prev_row = $ddata[$k];
  571. } // end for each row
  572. // complete decoding
  573. unset($sdata);
  574. $sdata = array();
  575. // for every row
  576. foreach ($ddata as $k => $row) {
  577. // initialize new row
  578. $sdata[$k] = array(0, 0, 0);
  579. if ($wb[0] == 0) {
  580. // default type field
  581. $sdata[$k][0] = 1;
  582. }
  583. $i = 0; // count bytes on the row
  584. // for every column
  585. for ($c = 0; $c < 3; ++$c) {
  586. // for every byte on the column
  587. for ($b = 0; $b < $wb[$c]; ++$b) {
  588. $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
  589. ++$i;
  590. }
  591. }
  592. }
  593. unset($ddata);
  594. // fill xref
  595. if (isset($index_first)) {
  596. $obj_num = $index_first;
  597. } else {
  598. $obj_num = 0;
  599. }
  600. foreach ($sdata as $k => $row) {
  601. switch ($row[0]) {
  602. case 0: { // (f) linked list of free objects
  603. ++$obj_num;
  604. break;
  605. }
  606. case 1: { // (n) objects that are in use but are not compressed
  607. // create unique object index: [object number]_[generation number]
  608. $index = $obj_num.'_'.$row[2];
  609. // check if object already exist
  610. if (!isset($xref['xref'][$obj_num][$row[2]])) {
  611. // store object offset position
  612. $xref['xref'][$obj_num][$row[2]] = $row[1];
  613. }
  614. ++$obj_num;
  615. break;
  616. }
  617. case 2: { // compressed objects
  618. // $row[1] = object number of the object stream in which this object is stored
  619. // $row[2] = index of this object within the object stream
  620. /*$index = $row[1].'_0_'.$row[2];
  621. $xref['xref'][$row[1]][0][$row[2]] = -1;*/
  622. break;
  623. }
  624. default: { // null objects
  625. break;
  626. }
  627. }
  628. }
  629. } // end decoding data
  630. $xref['max_object'] = max($xref['max_object'], $obj_num);
  631. if (isset($prevxref)) {
  632. // get previous xref
  633. $xref = $this->getXrefData($prevxref, $xref);
  634. }
  635. return $xref;
  636. }
  637. /**
  638. * Get raw stream data
  639. * @param $offset (int) Stream offset.
  640. * @param $length (int) Stream length.
  641. * @return string Steam content
  642. * @protected
  643. */
  644. protected function getRawStream($offset, $length) {
  645. $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
  646. $offset += 6; // "stream"
  647. $offset += strspn($this->pdfdata, "\r\n", $offset);
  648. $obj = array();
  649. $obj[] = PDF_TYPE_STREAM;
  650. $obj[] = substr($this->pdfdata, $offset, $length);
  651. return array($obj, $offset+$length);
  652. }
  653. /**
  654. * Get object type, raw value and offset to next object
  655. * @param $offset (int) Object offset.
  656. * @return array containing object type, raw value and offset to next object
  657. * @protected
  658. * @since 1.0.000 (2011-06-20)
  659. */
  660. protected function getRawObject($offset=0, $data=null) {
  661. if ($data == null) {
  662. $data =& $this->pdfdata;
  663. }
  664. $objtype = ''; // object type to be returned
  665. $objval = ''; // object value to be returned
  666. // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
  667. while (strspn($data[$offset], "\x00\x09\x0a\x0c\x0d\x20") == 1) {
  668. $offset++;
  669. }
  670. // get first char
  671. $char = $data[$offset];
  672. // get object type
  673. switch ($char) {
  674. case '%': { // \x25 PERCENT SIGN
  675. // skip comment and search for next token
  676. $next = strcspn($data, "\r\n", $offset);
  677. if ($next > 0) {
  678. $offset += $next;
  679. list($obj, $unused) = $this->getRawObject($offset, $data);
  680. return $obj;
  681. }
  682. break;
  683. }
  684. case '/': { // \x2F SOLIDUS
  685. // name object
  686. $objtype = PDF_TYPE_TOKEN;
  687. ++$offset;
  688. $length = strcspn($data, "\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset);
  689. $objval = substr($data, $offset, $length);
  690. $offset += $length;
  691. break;
  692. }
  693. case '(': // \x28 LEFT PARENTHESIS
  694. case ')': { // \x29 RIGHT PARENTHESIS
  695. // literal string object
  696. $objtype = PDF_TYPE_STRING;
  697. ++$offset;
  698. $strpos = $offset;
  699. if ($char == '(') {
  700. $open_bracket = 1;
  701. while ($open_bracket > 0) {
  702. if (!isset($data[$strpos])) {
  703. break;
  704. }
  705. $ch = $data[$strpos];
  706. switch ($ch) {
  707. case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
  708. // skip next character
  709. ++$strpos;
  710. break;
  711. }
  712. case '(': { // LEFT PARENHESIS (28h)
  713. ++$open_bracket;
  714. break;
  715. }
  716. case ')': { // RIGHT PARENTHESIS (29h)
  717. --$open_bracket;
  718. break;
  719. }
  720. }
  721. ++$strpos;
  722. }
  723. $objval = substr($data, $offset, ($strpos - $offset - 1));
  724. $offset = $strpos;
  725. }
  726. break;
  727. }
  728. case '[': // \x5B LEFT SQUARE BRACKET
  729. case ']': { // \x5D RIGHT SQUARE BRACKET
  730. // array object
  731. $objtype = PDF_TYPE_ARRAY;
  732. ++$offset;
  733. if ($char == '[') {
  734. // get array content
  735. $objval = array();
  736. do {
  737. // get element
  738. list($element, $offset) = $this->getRawObject($offset, $data);
  739. $objval[] = $element;
  740. } while ($element[0] !== ']');
  741. // remove closing delimiter
  742. array_pop($objval);
  743. } else {
  744. $objtype = ']';
  745. }
  746. break;
  747. }
  748. case '<': // \x3C LESS-THAN SIGN
  749. case '>': { // \x3E GREATER-THAN SIGN
  750. if (isset($data[($offset + 1)]) AND ($data[($offset + 1)] == $char)) {
  751. // dictionary object
  752. $objtype = PDF_TYPE_DICTIONARY;
  753. if ($char == '<') {
  754. list ($objval, $offset) = $this->getDictValue($offset, $data);
  755. } else {
  756. $objtype = '>>';
  757. $offset += 2;
  758. }
  759. } else {
  760. // hexadecimal string object
  761. $objtype = PDF_TYPE_HEX;
  762. ++$offset;
  763. // The "Panose" entry in the FontDescriptor Style dict seems to have hex bytes separated by spaces.
  764. if (($char == '<') AND (preg_match('/^([0-9A-Fa-f ]+)[>]/iU', substr($data, $offset), $matches) == 1)) {
  765. $objval = $matches[1];
  766. $offset += strlen($matches[0]);
  767. unset($matches);
  768. }
  769. }
  770. break;
  771. }
  772. default: {
  773. $frag = $data[$offset] . @$data[$offset+1] . @$data[$offset+2] . @$data[$offset+3];
  774. switch ($frag) {
  775. case 'endo':
  776. // indirect object
  777. $objtype = 'endobj';
  778. $offset += 6;
  779. break;
  780. case 'stre':
  781. // Streams should always be indirect objects, and thus processed by getRawStream().
  782. // If we get here, treat it as a null object as something has gone wrong.
  783. case 'null':
  784. // null object
  785. $objtype = PDF_TYPE_NULL;
  786. $offset += 4;
  787. $objval = 'null';
  788. break;
  789. case 'true':
  790. // boolean true object
  791. $objtype = PDF_TYPE_BOOLEAN;
  792. $offset += 4;
  793. $objval = true;
  794. break;
  795. case 'fals':
  796. // boolean false object
  797. $objtype = PDF_TYPE_BOOLEAN;
  798. $offset += 5;
  799. $objval = false;
  800. break;
  801. case 'ends':
  802. // end stream object
  803. $objtype = 'endstream';
  804. $offset += 9;
  805. break;
  806. default:
  807. if (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+([Robj]{1,3})/i', substr($data, $offset, 33), $matches) == 1) {
  808. if ($matches[3] == 'R') {
  809. // indirect object reference
  810. $objtype = PDF_TYPE_OBJREF;
  811. $offset += strlen($matches[0]);
  812. $objval = array(intval($matches[1]), intval($matches[2]));
  813. } elseif ($matches[3] == 'obj') {
  814. // object start
  815. $objtype = PDF_TYPE_OBJECT;
  816. $objval = intval($matches[1]).'_'.intval($matches[2]);
  817. $offset += strlen ($matches[0]);
  818. }
  819. } elseif (($numlen = strspn($data, '+-.0123456789', $offset)) > 0) {
  820. // numeric object
  821. $objval = substr($data, $offset, $numlen);
  822. $objtype = (intval($objval) != $objval) ? PDF_TYPE_REAL : PDF_TYPE_NUMERIC;
  823. $offset += $numlen;
  824. }
  825. unset($matches);
  826. break;
  827. }
  828. break;
  829. }
  830. }
  831. $obj = array();
  832. $obj[] = $objtype;
  833. if ($objtype == PDF_TYPE_OBJREF && is_array($objval)) {
  834. foreach ($objval as $val) {
  835. $obj[] = $val;
  836. }
  837. } else {
  838. $obj[] = $objval;
  839. }
  840. return array($obj, $offset);
  841. }
  842. private function getDictValue($offset, &$data) {
  843. $objval = array();
  844. // Extract dict from data.
  845. $i=1;
  846. $dict = '';
  847. $offset += 2;
  848. do {
  849. if ($data[$offset] == '>' && $data[$offset+1] == '>') {
  850. $i--;
  851. $dict .= '>>';
  852. $offset += 2;
  853. } else if ($data[$offset] == '<' && $data[$offset+1] == '<') {
  854. $i++;
  855. $dict .= '<<';
  856. $offset += 2;
  857. } else {
  858. $dict .= $data[$offset];
  859. $offset++;
  860. }
  861. } while ($i>0);
  862. // Now that we have just the dict, parse it.
  863. $dictoffset = 0;
  864. do {
  865. // Get dict element.
  866. list($key, $eloffset) = $this->getRawObject($dictoffset, $dict);
  867. if ($key[0] == '>>') {
  868. break;
  869. }
  870. list($element, $dictoffset) = $this->getRawObject($eloffset, $dict);
  871. $objval['/'.$key[1]] = $element;
  872. unset($key);
  873. unset($element);
  874. } while (true);
  875. return array($objval, $offset);
  876. }
  877. /**
  878. * Get content of indirect object.
  879. * @param $obj_ref (string) Object number and generation number separated by underscore character.
  880. * @param $offset (int) Object offset.
  881. * @param $decoding (boolean) If true decode streams.
  882. * @return array containing object data.
  883. * @protected
  884. * @since 1.0.000 (2011-05-24)
  885. */
  886. protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
  887. $obj = explode('_', $obj_ref);
  888. if (($obj === false) OR (count($obj) != 2)) {
  889. $this->Error('Invalid object reference: '.$obj);
  890. return;
  891. }
  892. $objref = $obj[0].' '.$obj[1].' obj';
  893. if (strpos($this->pdfdata, $objref, $offset) != $offset) {
  894. // an indirect reference to an undefined object shall be considered a reference to the null object
  895. return array('null', 'null', $offset);
  896. }
  897. // starting position of object content
  898. $offset += strlen($objref);
  899. // get array of object content
  900. $objdata = array();
  901. $i = 0; // object main index
  902. do {
  903. if (($i > 0) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY) AND array_key_exists('/Length', $objdata[($i - 1)][1])) {
  904. // Stream - get using /Length in stream's dict
  905. $lengthobj = $objdata[($i-1)][1]['/Length'];
  906. if ($lengthobj[0] === PDF_TYPE_OBJREF) {
  907. $lengthobj = $this->getObjectVal($lengthobj);
  908. if ($lengthobj[0] === PDF_TYPE_OBJECT) {
  909. $lengthobj = $lengthobj[1];
  910. }
  911. }
  912. $streamlength = $lengthobj[1];
  913. list($element, $offset) = $this->getRawStream($offset, $streamlength);
  914. } else {
  915. // get element
  916. list($element, $offset) = $this->getRawObject($offset);
  917. }
  918. // decode stream using stream's dictionary information
  919. if ($decoding AND ($element[0] == PDF_TYPE_STREAM) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY)) {
  920. $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
  921. }
  922. $objdata[$i] = $element;
  923. ++$i;
  924. } while ($element[0] != 'endobj');
  925. // remove closing delimiter
  926. array_pop($objdata);
  927. // return raw object content
  928. return $objdata;
  929. }
  930. /**
  931. * Get the content of object, resolving indect object reference if necessary.
  932. * @param $obj (string) Object value.
  933. * @return array containing object data.
  934. * @public
  935. * @since 1.0.000 (2011-06-26)
  936. */
  937. public function getObjectVal($obj) {
  938. if ($obj[0] == PDF_TYPE_OBJREF) {
  939. if (strpos($obj[1], '_') !== false) {
  940. $key = explode('_', $obj[1]);
  941. } else {
  942. $key = array($obj[1], $obj[2]);
  943. }
  944. $ret = array(0=>PDF_TYPE_OBJECT, 'obj'=>$key[0], 'gen'=>$key[1]);
  945. // reference to indirect object
  946. $object = null;
  947. if (isset($this->objects[$key[0]][$key[1]])) {
  948. // this object has been already parsed
  949. $object = $this->objects[$key[0]][$key[1]];
  950. } elseif (($offset = $this->findObjectOffset($key)) !== false) {
  951. // parse new object
  952. $this->objects[$key[0]][$key[1]] = $this->getIndirectObject($key[0].'_'.$key[1], $offset, false);
  953. $object = $this->objects[$key[0]][$key[1]];
  954. } elseif (($key[1] == 0) && isset($this->objstreamobjs[$key[0]])) {
  955. // Object is in an object stream
  956. $streaminfo = $this->objstreamobjs[$key[0]];
  957. $objs = $streaminfo[0];
  958. if (!isset($this->objstreams[$objs[0]][$objs[1]])) {
  959. // Fetch and decode object stream
  960. $offset = $this->findObjectOffset($objs);;
  961. $objstream = $this->getObjectVal(array(PDF_TYPE_OBJREF, $objs[0], $objs[1]));
  962. $decoded = $this->decodeStream($objstream[1][1], $objstream[2][1]);
  963. $this->objstreams[$objs[0]][$objs[1]] = $decoded[0]; // Store just the data, in case we need more from this objstream
  964. // Free memory
  965. unset($objstream);
  966. unset($decoded);
  967. }
  968. $this->objects[$key[0]][$key[1]] = $this->getRawObject($streaminfo[1], $this->objstreams[$objs[0]][$objs[1]]);
  969. $object = $this->objects[$key[0]][$key[1]];
  970. }
  971. if (!is_null($object)) {
  972. $ret[1] = $object[0];
  973. if (isset($object[1][0]) && $object[1][0] == PDF_TYPE_STREAM) {
  974. $ret[0] = PDF_TYPE_STREAM;
  975. $ret[2] = $object[1];
  976. }
  977. return $ret;
  978. }
  979. }
  980. return $obj;
  981. }
  982. /**
  983. * Extract object stream to find out what it contains.
  984. *
  985. */
  986. function extractObjectStream($key) {
  987. $objref = array(PDF_TYPE_OBJREF, $key[0], $key[1]);
  988. $obj = $this->getObjectVal($objref);
  989. if ($obj[0] !== PDF_TYPE_STREAM || !isset($obj[1][1]['/First'][1])) {
  990. // Not a valid object stream dictionary - skip it.
  991. return;
  992. }
  993. $stream = $this->decodeStream($obj[1][1], $obj[2][1]);// Decode object stream, as we need the first bit
  994. $first = intval($obj[1][1]['/First'][1]);
  995. $ints = explode(' ', substr($stream[0], 0, $first)); // Get list of object / offset pairs
  996. for ($j=1; $j<count($ints); $j++) {
  997. if (($j % 2) == 1) {
  998. $this->objstreamobjs[$ints[$j-1]] = array($key, $ints[$j]+$first);
  999. }
  1000. }
  1001. // Free memory - we may not need this at all.
  1002. unset($obj);
  1003. unset($stream);
  1004. }
  1005. /**
  1006. * Find all object offsets. Saves having to scour the file multiple times.
  1007. * @private
  1008. */
  1009. private function findObjectOffsets() {
  1010. $this->objoffsets = array();
  1011. if (preg_match_all('/(*ANYCRLF)^[\s]*([0-9]+)[\s]+([0-9]+)[\s]+obj/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE) >= 1) {
  1012. $i = 0;
  1013. foreach($matches[0] as $match) {
  1014. $offset = $match[1] + strspn($match[0], "\x00\x09\x0a\x0c\x0d\x20");
  1015. $this->objoffsets[trim($match[0])] = $offset;
  1016. $dictoffset = $match[1] + strlen($match[0]);
  1017. if (preg_match('|^\s+<<[^>]+/ObjStm|', substr($this->pdfdata, $dictoffset, 256), $objstm) == 1) {
  1018. $this->extractObjectStream(array($matches[1][$i][0], $matches[2][$i][0]));
  1019. }
  1020. $i++;
  1021. }
  1022. }
  1023. unset($matches);
  1024. }
  1025. /**
  1026. * Get offset of an object. Checks xref first, then offsets found by scouring the file.
  1027. * @param $key (array) Object key to find (obj, gen).
  1028. * @return int Offset of the object in $this->pdfdata.
  1029. * @private
  1030. */
  1031. private function findObjectOffset($key) {
  1032. $objref = $key[0].' '.$key[1].' obj';
  1033. if (isset($this->xref['xref'][$key[0]][$key[1]])) {
  1034. $offset = $this->xref['xref'][$key[0]][$key[1]];
  1035. if (strpos($this->pdfdata, $objref, $offset) === $offset) {
  1036. // Offset is in xref table and matches actual position in file
  1037. //echo "Offset in XREF is correct, returning<br>";
  1038. return $this->xref['xref'][$key[0]][$key[1]];
  1039. }
  1040. }
  1041. if (array_key_exists($objref, $this->objoffsets)) {
  1042. //echo "Offset found in internal reftable<br>";
  1043. return $this->objoffsets[$objref];
  1044. }
  1045. return false;
  1046. }
  1047. /**
  1048. * Decode the specified stream.
  1049. * @param $sdic (array) Stream's dictionary array.
  1050. * @param $stream (string) Stream to decode.
  1051. * @return array containing decoded stream data and remaining filters.
  1052. * @protected
  1053. * @since 1.0.000 (2011-06-22)
  1054. */
  1055. protected function decodeStream($sdic, $stream) {
  1056. // get stream lenght and filters
  1057. $slength = strlen($stream);
  1058. if ($slength <= 0) {
  1059. return array('', array());
  1060. }
  1061. $filters = array();
  1062. foreach ($sdic as $k => $v) {
  1063. if ($v[0] == PDF_TYPE_TOKEN) {
  1064. if (($k == '/Length') AND ($v[0] == PDF_TYPE_NUMERIC)) {
  1065. // get declared stream lenght
  1066. $declength = intval($v[1]);
  1067. if ($declength < $slength) {
  1068. $stream = substr($stream, 0, $declength);
  1069. $slength = $declength;
  1070. }
  1071. } elseif ($k == '/Filter') {
  1072. if ($v[0] == PDF_TYPE_TOKEN) {
  1073. // single filter
  1074. $filters[] = $v[1];
  1075. } elseif ($v[0] == PDF_TYPE_ARRAY) {
  1076. // array of filters
  1077. foreach ($v[1] as $flt) {
  1078. if ($flt[0] == PDF_TYPE_TOKEN) {
  1079. $filters[] = $flt[1];
  1080. }
  1081. }
  1082. }
  1083. }
  1084. }
  1085. }
  1086. // decode the stream
  1087. $remaining_filters = array();
  1088. foreach ($filters as $filter) {
  1089. if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
  1090. $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
  1091. } else {
  1092. // add missing filter to array
  1093. $remaining_filters[] = $filter;
  1094. }
  1095. }
  1096. return array($stream, $remaining_filters);
  1097. }
  1098. /**
  1099. * Set pageno
  1100. *
  1101. * @param int $pageno Pagenumber to use
  1102. */
  1103. public function setPageno($pageno) {
  1104. $pageno = ((int) $pageno) - 1;
  1105. if ($pageno < 0 || $pageno >= $this->getPageCount()) {
  1106. $this->error("Pagenumber is wrong! (Requested $pageno, max ".$this->getPageCount().")");
  1107. }
  1108. $this->pageno = $pageno;
  1109. }
  1110. /**
  1111. * Get page-resources from current page
  1112. *
  1113. * @return array
  1114. */
  1115. public function getPageResources() {
  1116. return $this->_getPageResources($this->pages[$this->pageno]);
  1117. }
  1118. /**
  1119. * Get page-resources from /Page
  1120. *
  1121. * @param array $obj Array of pdf-data
  1122. */
  1123. private function _getPageResources ($obj) { // $obj = /Page
  1124. $obj = $this->getObjectVal($obj);
  1125. // If the current object has a resources
  1126. // dictionary associated with it, we use
  1127. // it. Otherwise, we move back to its
  1128. // parent object.
  1129. if (isset ($obj[1][1]['/Resources'])) {
  1130. $res = $obj[1][1]['/Resources'];
  1131. if ($res[0] == PDF_TYPE_OBJECT)
  1132. return $res[1];
  1133. return $res;
  1134. } else {
  1135. if (!isset ($obj[1][1]['/Parent'])) {
  1136. return false;
  1137. } else {
  1138. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  1139. if ($res[0] == PDF_TYPE_OBJECT)
  1140. return $res[1];
  1141. return $res;
  1142. }
  1143. }
  1144. }
  1145. /**
  1146. * Get content of current page
  1147. *
  1148. * If more /Contents is an array, the streams are concated
  1149. *
  1150. * @return string
  1151. */
  1152. public function getContent() {
  1153. $buffer = '';
  1154. if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
  1155. $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
  1156. foreach($contents AS $tmp_content) {
  1157. $buffer .= $this->_rebuildContentStream($tmp_content) . ' ';
  1158. }
  1159. }
  1160. return $buffer;
  1161. }
  1162. /**
  1163. * Resolve all content-objects
  1164. *
  1165. * @param array $content_ref
  1166. * @return array
  1167. */
  1168. private function _getPageContent($content_ref) {
  1169. $contents = array();
  1170. if ($content_ref[0] == PDF_TYPE_OBJREF) {
  1171. $content = $this->getObjectVal($content_ref);
  1172. if ($content[1][0] == PDF_TYPE_ARRAY) {
  1173. $contents = $this->_getPageContent($content[1]);
  1174. } else {
  1175. $contents[] = $content;
  1176. }
  1177. } elseif ($content_ref[0] == PDF_TYPE_ARRAY) {
  1178. foreach ($content_ref[1] AS $tmp_content_ref) {
  1179. $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
  1180. }
  1181. }
  1182. return $contents;
  1183. }
  1184. /**
  1185. * Rebuild content-streams
  1186. *
  1187. * @param array $obj
  1188. * @return string
  1189. */
  1190. private function _rebuildContentStream($obj) {
  1191. $filters = array();
  1192. if (isset($obj[1][1]['/Filter'])) {
  1193. $_filter = $obj[1][1]['/Filter'];
  1194. if ($_filter[0] == PDF_TYPE_OBJREF) {
  1195. $tmpFilter = $this->getObjectVal($_filter);
  1196. $_filter = $tmpFilter[1];
  1197. }
  1198. if ($_filter[0] == PDF_TYPE_TOKEN) {
  1199. $filters[] = $_filter;
  1200. } elseif ($_filter[0] == PDF_TYPE_ARRAY) {
  1201. $filters = $_filter[1];
  1202. }
  1203. }
  1204. $stream = $obj[2][1];
  1205. foreach ($filters AS $_filter) {
  1206. $stream = $this->FilterDecoders->decodeFilter($_filter[1], $stream);
  1207. }
  1208. return $stream;
  1209. }
  1210. /**
  1211. * Get a Box from a page
  1212. * Arrayformat is same as used by fpdf_tpl
  1213. *
  1214. * @param array $page a /Page
  1215. * @param string $box_index Type of Box @see $availableBoxes
  1216. * @param float Scale factor from user space units to points
  1217. * @return array
  1218. */
  1219. public function getPageBox($page, $box_index, $k) {
  1220. $page = $this->getObjectVal($page);
  1221. $box = null;
  1222. if (isset($page[1][1][$box_index]))
  1223. $box =& $page[1][1][$box_index];
  1224. if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
  1225. $tmp_box = $this->getObjectVal($box);
  1226. $box = $tmp_box[1];
  1227. }
  1228. if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
  1229. $b =& $box[1];
  1230. return array('x' => $b[0][1] / $k,
  1231. 'y' => $b[1][1] / $k,
  1232. 'w' => abs($b[0][1] - $b[2][1]) / $k,
  1233. 'h' => abs($b[1][1] - $b[3][1]) / $k,
  1234. 'llx' => min($b[0][1], $b[2][1]) / $k,
  1235. 'lly' => min($b[1][1], $b[3][1]) / $k,
  1236. 'urx' => max($b[0][1], $b[2][1]) / $k,
  1237. 'ury' => max($b[1][1], $b[3][1]) / $k,
  1238. );
  1239. } elseif (!isset ($page[1][1]['/Parent'])) {
  1240. return false;
  1241. } else {
  1242. return $this->getPageBox($this->getObjectVal($page[1][1]['/Parent']), $box_index, $k);
  1243. }
  1244. }
  1245. /**
  1246. * Get all page boxes by page no
  1247. *
  1248. * @param int The page number
  1249. * @param float Scale factor from user space units to points
  1250. * @return array
  1251. */
  1252. public function getPageBoxes($pageno, $k) {
  1253. return $this->_getPageBoxes($this->pages[$pageno - 1], $k);
  1254. }
  1255. /**
  1256. * Get all boxes from /Page
  1257. *
  1258. * @param array a /Page
  1259. * @return array
  1260. */
  1261. private function _getPageBoxes($page, $k) {
  1262. $boxes = array();
  1263. foreach($this->availableBoxes AS $box) {
  1264. if ($_box = $this->getPageBox($page, $box, $k)) {
  1265. $boxes[$box] = $_box;
  1266. }
  1267. }
  1268. return $boxes;
  1269. }
  1270. /**
  1271. * Get the page rotation by pageno
  1272. *
  1273. * @param integer $pageno
  1274. * @return array
  1275. */
  1276. public function getPageRotation($pageno) {
  1277. return $this->_getPageRotation($this->pages[$pageno - 1]);
  1278. }
  1279. private function _getPageRotation($obj) { // $obj = /Page
  1280. $obj = $this->getObjectVal($obj);
  1281. if (isset ($obj[1][1]['/Rotate'])) {
  1282. $res = $this->getObjectVal($obj[1][1]['/Rotate']);
  1283. if (isset($res[0]) && $res[0] == PDF_TYPE_OBJECT)
  1284. return $res[1];
  1285. return $res;
  1286. } else {
  1287. if (!isset ($obj[1][1]['/Parent'])) {
  1288. return false;
  1289. } else {
  1290. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  1291. if (isset($res[0]) && $res[0] == PDF_TYPE_OBJECT)
  1292. return $res[1];
  1293. return $res;
  1294. }
  1295. }
  1296. }
  1297. /**
  1298. * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
  1299. * @param $msg (string) The error message
  1300. * @public
  1301. * @since 1.0.000 (2011-05-23)
  1302. */
  1303. public function Error($msg) {
  1304. // exit program and print error
  1305. die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
  1306. }
  1307. } // END OF TCPDF_PARSER CLASS
  1308. //============================================================+
  1309. // END OF FILE
  1310. //============================================================+