Gnumeric.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Reader;
  3. use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
  4. use PhpOffice\PhpSpreadsheet\Cell\DataType;
  5. use PhpOffice\PhpSpreadsheet\DefinedName;
  6. use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\PageSetup;
  7. use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Properties;
  8. use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Styles;
  9. use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  10. use PhpOffice\PhpSpreadsheet\ReferenceHelper;
  11. use PhpOffice\PhpSpreadsheet\RichText\RichText;
  12. use PhpOffice\PhpSpreadsheet\Settings;
  13. use PhpOffice\PhpSpreadsheet\Shared\File;
  14. use PhpOffice\PhpSpreadsheet\Spreadsheet;
  15. use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
  16. use SimpleXMLElement;
  17. use XMLReader;
  18. class Gnumeric extends BaseReader
  19. {
  20. const NAMESPACE_GNM = 'http://www.gnumeric.org/v10.dtd'; // gmr in old sheets
  21. const NAMESPACE_XSI = 'http://www.w3.org/2001/XMLSchema-instance';
  22. const NAMESPACE_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0';
  23. const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
  24. const NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/';
  25. const NAMESPACE_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0';
  26. const NAMESPACE_OOO = 'http://openoffice.org/2004/office';
  27. /**
  28. * Shared Expressions.
  29. *
  30. * @var array
  31. */
  32. private $expressions = [];
  33. /**
  34. * Spreadsheet shared across all functions.
  35. *
  36. * @var Spreadsheet
  37. */
  38. private $spreadsheet;
  39. /** @var ReferenceHelper */
  40. private $referenceHelper;
  41. /** @var array */
  42. public static $mappings = [
  43. 'dataType' => [
  44. '10' => DataType::TYPE_NULL,
  45. '20' => DataType::TYPE_BOOL,
  46. '30' => DataType::TYPE_NUMERIC, // Integer doesn't exist in Excel
  47. '40' => DataType::TYPE_NUMERIC, // Float
  48. '50' => DataType::TYPE_ERROR,
  49. '60' => DataType::TYPE_STRING,
  50. //'70': // Cell Range
  51. //'80': // Array
  52. ],
  53. ];
  54. /**
  55. * Create a new Gnumeric.
  56. */
  57. public function __construct()
  58. {
  59. parent::__construct();
  60. $this->referenceHelper = ReferenceHelper::getInstance();
  61. $this->securityScanner = XmlScanner::getInstance($this);
  62. }
  63. /**
  64. * Can the current IReader read the file?
  65. */
  66. public function canRead(string $filename): bool
  67. {
  68. // Check if gzlib functions are available
  69. if (File::testFileNoThrow($filename) && function_exists('gzread')) {
  70. // Read signature data (first 3 bytes)
  71. $fh = fopen($filename, 'rb');
  72. if ($fh !== false) {
  73. $data = fread($fh, 2);
  74. fclose($fh);
  75. }
  76. }
  77. return isset($data) && $data === chr(0x1F) . chr(0x8B);
  78. }
  79. private static function matchXml(XMLReader $xml, string $expectedLocalName): bool
  80. {
  81. return $xml->namespaceURI === self::NAMESPACE_GNM
  82. && $xml->localName === $expectedLocalName
  83. && $xml->nodeType === XMLReader::ELEMENT;
  84. }
  85. /**
  86. * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
  87. *
  88. * @param string $pFilename
  89. *
  90. * @return array
  91. */
  92. public function listWorksheetNames($pFilename)
  93. {
  94. File::assertFile($pFilename);
  95. $xml = new XMLReader();
  96. $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
  97. $xml->setParserProperty(2, true);
  98. $worksheetNames = [];
  99. while ($xml->read()) {
  100. if (self::matchXml($xml, 'SheetName')) {
  101. $xml->read(); // Move onto the value node
  102. $worksheetNames[] = (string) $xml->value;
  103. } elseif (self::matchXml($xml, 'Sheets')) {
  104. // break out of the loop once we've got our sheet names rather than parse the entire file
  105. break;
  106. }
  107. }
  108. return $worksheetNames;
  109. }
  110. /**
  111. * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
  112. *
  113. * @param string $pFilename
  114. *
  115. * @return array
  116. */
  117. public function listWorksheetInfo($pFilename)
  118. {
  119. File::assertFile($pFilename);
  120. $xml = new XMLReader();
  121. $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
  122. $xml->setParserProperty(2, true);
  123. $worksheetInfo = [];
  124. while ($xml->read()) {
  125. if (self::matchXml($xml, 'Sheet')) {
  126. $tmpInfo = [
  127. 'worksheetName' => '',
  128. 'lastColumnLetter' => 'A',
  129. 'lastColumnIndex' => 0,
  130. 'totalRows' => 0,
  131. 'totalColumns' => 0,
  132. ];
  133. while ($xml->read()) {
  134. if (self::matchXml($xml, 'Name')) {
  135. $xml->read(); // Move onto the value node
  136. $tmpInfo['worksheetName'] = (string) $xml->value;
  137. } elseif (self::matchXml($xml, 'MaxCol')) {
  138. $xml->read(); // Move onto the value node
  139. $tmpInfo['lastColumnIndex'] = (int) $xml->value;
  140. $tmpInfo['totalColumns'] = (int) $xml->value + 1;
  141. } elseif (self::matchXml($xml, 'MaxRow')) {
  142. $xml->read(); // Move onto the value node
  143. $tmpInfo['totalRows'] = (int) $xml->value + 1;
  144. break;
  145. }
  146. }
  147. $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
  148. $worksheetInfo[] = $tmpInfo;
  149. }
  150. }
  151. return $worksheetInfo;
  152. }
  153. /**
  154. * @param string $filename
  155. *
  156. * @return string
  157. */
  158. private function gzfileGetContents($filename)
  159. {
  160. $file = @gzopen($filename, 'rb');
  161. $data = '';
  162. if ($file !== false) {
  163. while (!gzeof($file)) {
  164. $data .= gzread($file, 1024);
  165. }
  166. gzclose($file);
  167. }
  168. return $data;
  169. }
  170. public static function gnumericMappings(): array
  171. {
  172. return array_merge(self::$mappings, Styles::$mappings);
  173. }
  174. private function processComments(SimpleXMLElement $sheet): void
  175. {
  176. if ((!$this->readDataOnly) && (isset($sheet->Objects))) {
  177. foreach ($sheet->Objects->children(self::NAMESPACE_GNM) as $key => $comment) {
  178. $commentAttributes = $comment->attributes();
  179. // Only comment objects are handled at the moment
  180. if ($commentAttributes && $commentAttributes->Text) {
  181. $this->spreadsheet->getActiveSheet()->getComment((string) $commentAttributes->ObjectBound)
  182. ->setAuthor((string) $commentAttributes->Author)
  183. ->setText($this->parseRichText((string) $commentAttributes->Text));
  184. }
  185. }
  186. }
  187. }
  188. /**
  189. * @param mixed $value
  190. */
  191. private static function testSimpleXml($value): SimpleXMLElement
  192. {
  193. return ($value instanceof SimpleXMLElement) ? $value : new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><root></root>');
  194. }
  195. /**
  196. * Loads Spreadsheet from file.
  197. *
  198. * @return Spreadsheet
  199. */
  200. public function load(string $filename, int $flags = 0)
  201. {
  202. $this->processFlags($flags);
  203. // Create new Spreadsheet
  204. $spreadsheet = new Spreadsheet();
  205. $spreadsheet->removeSheetByIndex(0);
  206. // Load into this instance
  207. return $this->loadIntoExisting($filename, $spreadsheet);
  208. }
  209. /**
  210. * Loads from file into Spreadsheet instance.
  211. */
  212. public function loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet): Spreadsheet
  213. {
  214. $this->spreadsheet = $spreadsheet;
  215. File::assertFile($pFilename);
  216. $gFileData = $this->gzfileGetContents($pFilename);
  217. $xml2 = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
  218. $xml = self::testSimpleXml($xml2);
  219. $gnmXML = $xml->children(self::NAMESPACE_GNM);
  220. (new Properties($this->spreadsheet))->readProperties($xml, $gnmXML);
  221. $worksheetID = 0;
  222. foreach ($gnmXML->Sheets->Sheet as $sheetOrNull) {
  223. $sheet = self::testSimpleXml($sheetOrNull);
  224. $worksheetName = (string) $sheet->Name;
  225. if (is_array($this->loadSheetsOnly) && !in_array($worksheetName, $this->loadSheetsOnly, true)) {
  226. continue;
  227. }
  228. $maxRow = $maxCol = 0;
  229. // Create new Worksheet
  230. $this->spreadsheet->createSheet();
  231. $this->spreadsheet->setActiveSheetIndex($worksheetID);
  232. // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in formula
  233. // cells... during the load, all formulae should be correct, and we're simply bringing the worksheet
  234. // name in line with the formula, not the reverse
  235. $this->spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
  236. if (!$this->readDataOnly) {
  237. (new PageSetup($this->spreadsheet))
  238. ->printInformation($sheet)
  239. ->sheetMargins($sheet);
  240. }
  241. foreach ($sheet->Cells->Cell as $cellOrNull) {
  242. $cell = self::testSimpleXml($cellOrNull);
  243. $cellAttributes = self::testSimpleXml($cell->attributes());
  244. $row = (int) $cellAttributes->Row + 1;
  245. $column = (int) $cellAttributes->Col;
  246. if ($row > $maxRow) {
  247. $maxRow = $row;
  248. }
  249. if ($column > $maxCol) {
  250. $maxCol = $column;
  251. }
  252. $column = Coordinate::stringFromColumnIndex($column + 1);
  253. // Read cell?
  254. if ($this->getReadFilter() !== null) {
  255. if (!$this->getReadFilter()->readCell($column, $row, $worksheetName)) {
  256. continue;
  257. }
  258. }
  259. $ValueType = $cellAttributes->ValueType;
  260. $ExprID = (string) $cellAttributes->ExprID;
  261. $type = DataType::TYPE_FORMULA;
  262. if ($ExprID > '') {
  263. if (((string) $cell) > '') {
  264. $this->expressions[$ExprID] = [
  265. 'column' => $cellAttributes->Col,
  266. 'row' => $cellAttributes->Row,
  267. 'formula' => (string) $cell,
  268. ];
  269. } else {
  270. $expression = $this->expressions[$ExprID];
  271. $cell = $this->referenceHelper->updateFormulaReferences(
  272. $expression['formula'],
  273. 'A1',
  274. $cellAttributes->Col - $expression['column'],
  275. $cellAttributes->Row - $expression['row'],
  276. $worksheetName
  277. );
  278. }
  279. $type = DataType::TYPE_FORMULA;
  280. } else {
  281. $vtype = (string) $ValueType;
  282. if (array_key_exists($vtype, self::$mappings['dataType'])) {
  283. $type = self::$mappings['dataType'][$vtype];
  284. }
  285. if ($vtype === '20') { // Boolean
  286. $cell = $cell == 'TRUE';
  287. }
  288. }
  289. $this->spreadsheet->getActiveSheet()->getCell($column . $row)->setValueExplicit((string) $cell, $type);
  290. }
  291. if ($sheet->Styles !== null) {
  292. (new Styles($this->spreadsheet, $this->readDataOnly))->read($sheet, $maxRow, $maxCol);
  293. }
  294. $this->processComments($sheet);
  295. $this->processColumnWidths($sheet, $maxCol);
  296. $this->processRowHeights($sheet, $maxRow);
  297. $this->processMergedCells($sheet);
  298. $this->processAutofilter($sheet);
  299. ++$worksheetID;
  300. }
  301. $this->processDefinedNames($gnmXML);
  302. // Return
  303. return $this->spreadsheet;
  304. }
  305. private function processMergedCells(?SimpleXMLElement $sheet): void
  306. {
  307. // Handle Merged Cells in this worksheet
  308. if ($sheet !== null && isset($sheet->MergedRegions)) {
  309. foreach ($sheet->MergedRegions->Merge as $mergeCells) {
  310. if (strpos((string) $mergeCells, ':') !== false) {
  311. $this->spreadsheet->getActiveSheet()->mergeCells($mergeCells);
  312. }
  313. }
  314. }
  315. }
  316. private function processAutofilter(?SimpleXMLElement $sheet): void
  317. {
  318. if ($sheet !== null && isset($sheet->Filters)) {
  319. foreach ($sheet->Filters->Filter as $autofilter) {
  320. if ($autofilter !== null) {
  321. $attributes = $autofilter->attributes();
  322. if (isset($attributes['Area'])) {
  323. $this->spreadsheet->getActiveSheet()->setAutoFilter((string) $attributes['Area']);
  324. }
  325. }
  326. }
  327. }
  328. }
  329. private function setColumnWidth(int $whichColumn, float $defaultWidth): void
  330. {
  331. $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
  332. if ($columnDimension !== null) {
  333. $columnDimension->setWidth($defaultWidth);
  334. }
  335. }
  336. private function setColumnInvisible(int $whichColumn): void
  337. {
  338. $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
  339. if ($columnDimension !== null) {
  340. $columnDimension->setVisible(false);
  341. }
  342. }
  343. private function processColumnLoop(int $whichColumn, int $maxCol, ?SimpleXMLElement $columnOverride, float $defaultWidth): int
  344. {
  345. $columnOverride = self::testSimpleXml($columnOverride);
  346. $columnAttributes = self::testSimpleXml($columnOverride->attributes());
  347. $column = $columnAttributes['No'];
  348. $columnWidth = ((float) $columnAttributes['Unit']) / 5.4;
  349. $hidden = (isset($columnAttributes['Hidden'])) && ((string) $columnAttributes['Hidden'] == '1');
  350. $columnCount = (int) ($columnAttributes['Count'] ?? 1);
  351. while ($whichColumn < $column) {
  352. $this->setColumnWidth($whichColumn, $defaultWidth);
  353. ++$whichColumn;
  354. }
  355. while (($whichColumn < ($column + $columnCount)) && ($whichColumn <= $maxCol)) {
  356. $this->setColumnWidth($whichColumn, $columnWidth);
  357. if ($hidden) {
  358. $this->setColumnInvisible($whichColumn);
  359. }
  360. ++$whichColumn;
  361. }
  362. return $whichColumn;
  363. }
  364. private function processColumnWidths(?SimpleXMLElement $sheet, int $maxCol): void
  365. {
  366. if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Cols))) {
  367. // Column Widths
  368. $defaultWidth = 0;
  369. $columnAttributes = $sheet->Cols->attributes();
  370. if ($columnAttributes !== null) {
  371. $defaultWidth = $columnAttributes['DefaultSizePts'] / 5.4;
  372. }
  373. $whichColumn = 0;
  374. foreach ($sheet->Cols->ColInfo as $columnOverride) {
  375. $whichColumn = $this->processColumnLoop($whichColumn, $maxCol, $columnOverride, $defaultWidth);
  376. }
  377. while ($whichColumn <= $maxCol) {
  378. $this->setColumnWidth($whichColumn, $defaultWidth);
  379. ++$whichColumn;
  380. }
  381. }
  382. }
  383. private function setRowHeight(int $whichRow, float $defaultHeight): void
  384. {
  385. $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
  386. if ($rowDimension !== null) {
  387. $rowDimension->setRowHeight($defaultHeight);
  388. }
  389. }
  390. private function setRowInvisible(int $whichRow): void
  391. {
  392. $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
  393. if ($rowDimension !== null) {
  394. $rowDimension->setVisible(false);
  395. }
  396. }
  397. private function processRowLoop(int $whichRow, int $maxRow, ?SimpleXMLElement $rowOverride, float $defaultHeight): int
  398. {
  399. $rowOverride = self::testSimpleXml($rowOverride);
  400. $rowAttributes = self::testSimpleXml($rowOverride->attributes());
  401. $row = $rowAttributes['No'];
  402. $rowHeight = (float) $rowAttributes['Unit'];
  403. $hidden = (isset($rowAttributes['Hidden'])) && ((string) $rowAttributes['Hidden'] == '1');
  404. $rowCount = (int) ($rowAttributes['Count'] ?? 1);
  405. while ($whichRow < $row) {
  406. ++$whichRow;
  407. $this->setRowHeight($whichRow, $defaultHeight);
  408. }
  409. while (($whichRow < ($row + $rowCount)) && ($whichRow < $maxRow)) {
  410. ++$whichRow;
  411. $this->setRowHeight($whichRow, $rowHeight);
  412. if ($hidden) {
  413. $this->setRowInvisible($whichRow);
  414. }
  415. }
  416. return $whichRow;
  417. }
  418. private function processRowHeights(?SimpleXMLElement $sheet, int $maxRow): void
  419. {
  420. if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Rows))) {
  421. // Row Heights
  422. $defaultHeight = 0;
  423. $rowAttributes = $sheet->Rows->attributes();
  424. if ($rowAttributes !== null) {
  425. $defaultHeight = (float) $rowAttributes['DefaultSizePts'];
  426. }
  427. $whichRow = 0;
  428. foreach ($sheet->Rows->RowInfo as $rowOverride) {
  429. $whichRow = $this->processRowLoop($whichRow, $maxRow, $rowOverride, $defaultHeight);
  430. }
  431. // never executed, I can't figure out any circumstances
  432. // under which it would be executed, and, even if
  433. // such exist, I'm not convinced this is needed.
  434. //while ($whichRow < $maxRow) {
  435. // ++$whichRow;
  436. // $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow)->setRowHeight($defaultHeight);
  437. //}
  438. }
  439. }
  440. private function processDefinedNames(?SimpleXMLElement $gnmXML): void
  441. {
  442. // Loop through definedNames (global named ranges)
  443. if ($gnmXML !== null && isset($gnmXML->Names)) {
  444. foreach ($gnmXML->Names->Name as $definedName) {
  445. $name = (string) $definedName->name;
  446. $value = (string) $definedName->value;
  447. if (stripos($value, '#REF!') !== false) {
  448. continue;
  449. }
  450. [$worksheetName] = Worksheet::extractSheetTitle($value, true);
  451. $worksheetName = trim($worksheetName, "'");
  452. $worksheet = $this->spreadsheet->getSheetByName($worksheetName);
  453. // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet
  454. if ($worksheet !== null) {
  455. $this->spreadsheet->addDefinedName(DefinedName::createInstance($name, $worksheet, $value));
  456. }
  457. }
  458. }
  459. }
  460. private function parseRichText(string $is): RichText
  461. {
  462. $value = new RichText();
  463. $value->createText($is);
  464. return $value;
  465. }
  466. }