Highlighter.php 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. <?php
  2. /* Copyright (c)
  3. * - 2006-2013, Ivan Sagalaev (maniac@softwaremaniacs.org), highlight.js
  4. * (original author)
  5. * - 2013-2019, Geert Bergman (geert@scrivo.nl), highlight.php
  6. * - 2014 Daniel Lynge, highlight.php (contributor)
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright notice,
  12. * this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
  17. * contributors may be used to endorse or promote products derived from this
  18. * software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. namespace Highlight;
  33. /**
  34. * @api
  35. *
  36. * @since 7.5.0.0
  37. */
  38. class Highlighter
  39. {
  40. /**
  41. * @since 9.12.0.4
  42. */
  43. const SPAN_END_TAG = "</span>";
  44. /** @var bool Disable warnings thrown on PHP installations without multibyte functions available. */
  45. public static $DISABLE_MULTIBYTE_WARNING = false;
  46. /** @var bool */
  47. private $safeMode = true;
  48. // @TODO In v10.x, this value should be static to match highlight.js behavior
  49. /** @var array<string, mixed> */
  50. private $options;
  51. /** @var string */
  52. private $modeBuffer = "";
  53. /** @var string */
  54. private $result = "";
  55. /** @var Mode|null */
  56. private $top = null;
  57. /** @var Language|null */
  58. private $language = null;
  59. /** @var int */
  60. private $relevance = 0;
  61. /** @var bool */
  62. private $ignoreIllegals = false;
  63. /** @var array<string, Mode> */
  64. private $continuations = array();
  65. /** @var RegExMatch */
  66. private $lastMatch;
  67. /** @var string The current code we are highlighting */
  68. private $codeToHighlight;
  69. /** @var bool */
  70. private $needsMultibyteSupport = false;
  71. /** @var bool|null */
  72. private static $hasMultiByteSupport = null;
  73. /** @var bool */
  74. private static $hasThrownMultiByteWarning = false;
  75. /** @var string[] A list of all the bundled languages */
  76. private static $bundledLanguages = array();
  77. /** @var array<string, Language> A mapping of a language ID to a Language definition */
  78. private static $classMap = array();
  79. /** @var string[] A list of registered language IDs */
  80. private static $languages = array();
  81. /** @var array<string, string> A mapping from alias (key) to main language ID (value) */
  82. private static $aliases = array();
  83. /**
  84. * @param bool $loadAllLanguages If true, will automatically register all languages distributed with this library.
  85. * If false, user must explicitly register languages by calling `registerLanguage()`.
  86. *
  87. * @since 9.18.1.4 added `$loadAllLanguages` parameter
  88. * @see Highlighter::registerLanguage()
  89. */
  90. public function __construct($loadAllLanguages = true)
  91. {
  92. $this->lastMatch = new RegExMatch(array());
  93. $this->lastMatch->type = "";
  94. $this->lastMatch->rule = null;
  95. // @TODO In v10.x, remove the default value for the `languages` value to follow highlight.js behavior
  96. $this->options = array(
  97. 'classPrefix' => 'hljs-',
  98. 'tabReplace' => null,
  99. 'useBR' => false,
  100. 'languages' => array(
  101. "xml", "json", "javascript", "css", "php", "http",
  102. ),
  103. );
  104. if ($loadAllLanguages) {
  105. self::registerAllLanguages();
  106. }
  107. }
  108. /**
  109. * Return a list of all available languages bundled with this library.
  110. *
  111. * @since 9.18.1.4
  112. *
  113. * @return string[] An array of language names
  114. */
  115. public static function listBundledLanguages()
  116. {
  117. if (!empty(self::$bundledLanguages)) {
  118. return self::$bundledLanguages;
  119. }
  120. // Languages that take precedence in the classMap array. (I don't know why...)
  121. $bundledLanguages = array(
  122. "xml" => true,
  123. "django" => true,
  124. "javascript" => true,
  125. "matlab" => true,
  126. "cpp" => true,
  127. );
  128. $languagePath = __DIR__ . '/languages/';
  129. $d = @dir($languagePath);
  130. if (!$d) {
  131. throw new \RuntimeException('Could not read bundled language definition directory.');
  132. }
  133. // @TODO In 10.x, rewrite this as a generator yielding results
  134. while (($entry = $d->read()) !== false) {
  135. if (substr($entry, -5) === ".json") {
  136. $languageId = substr($entry, 0, -5);
  137. $filePath = $languagePath . $entry;
  138. if (is_readable($filePath)) {
  139. $bundledLanguages[$languageId] = true;
  140. }
  141. }
  142. }
  143. $d->close();
  144. return self::$bundledLanguages = array_keys($bundledLanguages);
  145. }
  146. /**
  147. * Return a list of all the registered languages. Using this list in
  148. * setAutodetectLanguages will turn on auto-detection for all supported
  149. * languages.
  150. *
  151. * @since 9.18.1.4
  152. *
  153. * @param bool $includeAliases Specify whether language aliases should be
  154. * included as well
  155. *
  156. * @return string[] An array of language names
  157. */
  158. public static function listRegisteredLanguages($includeAliases = false)
  159. {
  160. if ($includeAliases === true) {
  161. return array_merge(self::$languages, array_keys(self::$aliases));
  162. }
  163. return self::$languages;
  164. }
  165. /**
  166. * Register all 185+ languages that are bundled in this library.
  167. *
  168. * To register languages individually, use `registerLanguage`.
  169. *
  170. * @since 9.18.1.4 Method is now public
  171. * @since 8.3.0.0
  172. * @see Highlighter::registerLanguage
  173. *
  174. * @return void
  175. */
  176. public static function registerAllLanguages()
  177. {
  178. // Languages that take precedence in the classMap array.
  179. $languagePath = __DIR__ . DIRECTORY_SEPARATOR . "languages" . DIRECTORY_SEPARATOR;
  180. foreach (array("xml", "django", "javascript", "matlab", "cpp") as $languageId) {
  181. $filePath = $languagePath . $languageId . ".json";
  182. if (is_readable($filePath)) {
  183. self::registerLanguage($languageId, $filePath);
  184. }
  185. }
  186. // @TODO In 10.x, call `listBundledLanguages()` instead when it's a generator
  187. $d = @dir($languagePath);
  188. if ($d) {
  189. while (($entry = $d->read()) !== false) {
  190. if (substr($entry, -5) === ".json") {
  191. $languageId = substr($entry, 0, -5);
  192. $filePath = $languagePath . $entry;
  193. if (is_readable($filePath)) {
  194. self::registerLanguage($languageId, $filePath);
  195. }
  196. }
  197. }
  198. $d->close();
  199. }
  200. }
  201. /**
  202. * Register a language definition with the Highlighter's internal language
  203. * storage. Languages are stored in a static variable, so they'll be available
  204. * across all instances. You only need to register a language once.
  205. *
  206. * @param string $languageId The unique name of a language
  207. * @param string $filePath The file path to the language definition
  208. * @param bool $overwrite Overwrite language if it already exists
  209. *
  210. * @return Language The object containing the definition for a language's markup
  211. */
  212. public static function registerLanguage($languageId, $filePath, $overwrite = false)
  213. {
  214. if (!isset(self::$classMap[$languageId]) || $overwrite) {
  215. $lang = new Language($languageId, $filePath);
  216. self::$classMap[$languageId] = $lang;
  217. self::$languages[] = $languageId;
  218. self::$languages = array_unique(self::$languages);
  219. if ($lang->aliases) {
  220. foreach ($lang->aliases as $alias) {
  221. self::$aliases[$alias] = $languageId;
  222. }
  223. }
  224. }
  225. return self::$classMap[$languageId];
  226. }
  227. /**
  228. * Clear all registered languages.
  229. *
  230. * @since 9.18.1.4
  231. *
  232. * @return void
  233. */
  234. public static function clearAllLanguages()
  235. {
  236. self::$classMap = array();
  237. self::$languages = array();
  238. self::$aliases = array();
  239. }
  240. /**
  241. * @param RegEx|null $re
  242. * @param string $lexeme
  243. *
  244. * @return bool
  245. */
  246. private function testRe($re, $lexeme)
  247. {
  248. if (!$re) {
  249. return false;
  250. }
  251. $lastIndex = $re->lastIndex;
  252. $result = $re->exec($lexeme);
  253. $re->lastIndex = $lastIndex;
  254. return $result && $result->index === 0;
  255. }
  256. /**
  257. * @param string $value
  258. *
  259. * @return RegEx
  260. */
  261. private function escapeRe($value)
  262. {
  263. return new RegEx(sprintf('/%s/um', preg_quote($value)));
  264. }
  265. /**
  266. * @param Mode $mode
  267. * @param string $lexeme
  268. *
  269. * @return Mode|null
  270. */
  271. private function endOfMode($mode, $lexeme)
  272. {
  273. if ($this->testRe($mode->endRe, $lexeme)) {
  274. while ($mode->endsParent && $mode->parent) {
  275. $mode = $mode->parent;
  276. }
  277. return $mode;
  278. }
  279. if ($mode->endsWithParent) {
  280. return $this->endOfMode($mode->parent, $lexeme);
  281. }
  282. return null;
  283. }
  284. /**
  285. * @param Mode $mode
  286. * @param RegExMatch $match
  287. *
  288. * @return mixed|null
  289. */
  290. private function keywordMatch($mode, $match)
  291. {
  292. $kwd = $this->language->case_insensitive ? $this->strToLower($match[0]) : $match[0];
  293. return isset($mode->keywords[$kwd]) ? $mode->keywords[$kwd] : null;
  294. }
  295. /**
  296. * @param string $className
  297. * @param string $insideSpan
  298. * @param bool $leaveOpen
  299. * @param bool $noPrefix
  300. *
  301. * @return string
  302. */
  303. private function buildSpan($className, $insideSpan, $leaveOpen = false, $noPrefix = false)
  304. {
  305. if (!$leaveOpen && $insideSpan === '') {
  306. return '';
  307. }
  308. if (!$className) {
  309. return $insideSpan;
  310. }
  311. $classPrefix = $noPrefix ? "" : $this->options['classPrefix'];
  312. $openSpan = "<span class=\"" . $classPrefix;
  313. $closeSpan = $leaveOpen ? "" : self::SPAN_END_TAG;
  314. $openSpan .= $className . "\">";
  315. return $openSpan . $insideSpan . $closeSpan;
  316. }
  317. /**
  318. * @param string $value
  319. *
  320. * @return string
  321. */
  322. private function escape($value)
  323. {
  324. return htmlspecialchars($value, ENT_NOQUOTES);
  325. }
  326. /**
  327. * @return string
  328. */
  329. private function processKeywords()
  330. {
  331. if (!$this->top->keywords) {
  332. return $this->escape($this->modeBuffer);
  333. }
  334. $result = "";
  335. $lastIndex = 0;
  336. $this->top->lexemesRe->lastIndex = 0;
  337. $match = $this->top->lexemesRe->exec($this->modeBuffer);
  338. while ($match) {
  339. $result .= $this->escape(substr($this->modeBuffer, $lastIndex, $match->index - $lastIndex));
  340. $keyword_match = $this->keywordMatch($this->top, $match);
  341. if ($keyword_match) {
  342. $this->relevance += $keyword_match[1];
  343. $result .= $this->buildSpan($keyword_match[0], $this->escape($match[0]));
  344. } else {
  345. $result .= $this->escape($match[0]);
  346. }
  347. $lastIndex = $this->top->lexemesRe->lastIndex;
  348. $match = $this->top->lexemesRe->exec($this->modeBuffer);
  349. }
  350. return $result . $this->escape(substr($this->modeBuffer, $lastIndex));
  351. }
  352. /**
  353. * @return string
  354. */
  355. private function processSubLanguage()
  356. {
  357. try {
  358. $hl = new Highlighter();
  359. // @TODO in v10.x, this should no longer be necessary once `$options` is made static
  360. $hl->setAutodetectLanguages($this->options['languages']);
  361. $hl->setClassPrefix($this->options['classPrefix']);
  362. $hl->setTabReplace($this->options['tabReplace']);
  363. if (!$this->safeMode) {
  364. $hl->disableSafeMode();
  365. }
  366. $explicit = is_string($this->top->subLanguage);
  367. if ($explicit && !in_array($this->top->subLanguage, self::$languages)) {
  368. return $this->escape($this->modeBuffer);
  369. }
  370. if ($explicit) {
  371. $res = $hl->highlight(
  372. $this->top->subLanguage,
  373. $this->modeBuffer,
  374. true,
  375. isset($this->continuations[$this->top->subLanguage]) ? $this->continuations[$this->top->subLanguage] : null
  376. );
  377. } else {
  378. $res = $hl->highlightAuto(
  379. $this->modeBuffer,
  380. count($this->top->subLanguage) ? $this->top->subLanguage : null
  381. );
  382. }
  383. // Counting embedded language score towards the host language may be disabled
  384. // with zeroing the containing mode relevance. Use case in point is Markdown that
  385. // allows XML everywhere and makes every XML snippet to have a much larger Markdown
  386. // score.
  387. if ($this->top->relevance > 0) {
  388. $this->relevance += $res->relevance;
  389. }
  390. if ($explicit) {
  391. $this->continuations[$this->top->subLanguage] = $res->top;
  392. }
  393. return $this->buildSpan($res->language, $res->value, false, true);
  394. } catch (\Exception $e) {
  395. return $this->escape($this->modeBuffer);
  396. }
  397. }
  398. /**
  399. * @return void
  400. */
  401. private function processBuffer()
  402. {
  403. if (is_object($this->top) && $this->top->subLanguage) {
  404. $this->result .= $this->processSubLanguage();
  405. } else {
  406. $this->result .= $this->processKeywords();
  407. }
  408. $this->modeBuffer = '';
  409. }
  410. /**
  411. * @param Mode $mode
  412. *
  413. * @return void
  414. */
  415. private function startNewMode($mode)
  416. {
  417. $this->result .= $mode->className ? $this->buildSpan($mode->className, "", true) : "";
  418. $t = clone $mode;
  419. $t->parent = $this->top;
  420. $this->top = $t;
  421. }
  422. /**
  423. * @param RegExMatch $match
  424. *
  425. * @return int
  426. */
  427. private function doBeginMatch($match)
  428. {
  429. $lexeme = $match[0];
  430. $newMode = $match->rule;
  431. if ($newMode && $newMode->endSameAsBegin) {
  432. $newMode->endRe = $this->escapeRe($lexeme);
  433. }
  434. if ($newMode->skip) {
  435. $this->modeBuffer .= $lexeme;
  436. } else {
  437. if ($newMode->excludeBegin) {
  438. $this->modeBuffer .= $lexeme;
  439. }
  440. $this->processBuffer();
  441. if (!$newMode->returnBegin && !$newMode->excludeBegin) {
  442. $this->modeBuffer = $lexeme;
  443. }
  444. }
  445. $this->startNewMode($newMode);
  446. return $newMode->returnBegin ? 0 : strlen($lexeme);
  447. }
  448. /**
  449. * @param RegExMatch $match
  450. *
  451. * @return int|null
  452. */
  453. private function doEndMatch($match)
  454. {
  455. $lexeme = $match[0];
  456. $matchPlusRemainder = substr($this->codeToHighlight, $match->index);
  457. $endMode = $this->endOfMode($this->top, $matchPlusRemainder);
  458. if (!$endMode) {
  459. return null;
  460. }
  461. $origin = $this->top;
  462. if ($origin->skip) {
  463. $this->modeBuffer .= $lexeme;
  464. } else {
  465. if (!($origin->returnEnd || $origin->excludeEnd)) {
  466. $this->modeBuffer .= $lexeme;
  467. }
  468. $this->processBuffer();
  469. if ($origin->excludeEnd) {
  470. $this->modeBuffer = $lexeme;
  471. }
  472. }
  473. do {
  474. if ($this->top->className) {
  475. $this->result .= self::SPAN_END_TAG;
  476. }
  477. if (!$this->top->skip && !$this->top->subLanguage) {
  478. $this->relevance += $this->top->relevance;
  479. }
  480. $this->top = $this->top->parent;
  481. } while ($this->top !== $endMode->parent);
  482. if ($endMode->starts) {
  483. if ($endMode->endSameAsBegin) {
  484. $endMode->starts->endRe = $endMode->endRe;
  485. }
  486. $this->startNewMode($endMode->starts);
  487. }
  488. return $origin->returnEnd ? 0 : strlen($lexeme);
  489. }
  490. /**
  491. * @param string $textBeforeMatch
  492. * @param RegExMatch|null $match
  493. *
  494. * @return int
  495. */
  496. private function processLexeme($textBeforeMatch, $match = null)
  497. {
  498. $lexeme = $match ? $match[0] : null;
  499. // add non-matched text to the current mode buffer
  500. $this->modeBuffer .= $textBeforeMatch;
  501. if ($lexeme === null) {
  502. $this->processBuffer();
  503. return 0;
  504. }
  505. // we've found a 0 width match and we're stuck, so we need to advance
  506. // this happens when we have badly behaved rules that have optional matchers to the degree that
  507. // sometimes they can end up matching nothing at all
  508. // Ref: https://github.com/highlightjs/highlight.js/issues/2140
  509. if ($this->lastMatch->type === "begin" && $match->type === "end" && $this->lastMatch->index === $match->index && $lexeme === "") {
  510. // spit the "skipped" character that our regex choked on back into the output sequence
  511. $this->modeBuffer .= substr($this->codeToHighlight, $match->index, 1);
  512. return 1;
  513. }
  514. $this->lastMatch = $match;
  515. if ($match->type === "begin") {
  516. return $this->doBeginMatch($match);
  517. } elseif ($match->type === "illegal" && !$this->ignoreIllegals) {
  518. // illegal match, we do not continue processing
  519. $_modeRaw = isset($this->top->className) ? $this->top->className : "<unnamed>";
  520. throw new \UnexpectedValueException("Illegal lexeme \"$lexeme\" for mode \"$_modeRaw\"");
  521. } elseif ($match->type === "end") {
  522. $processed = $this->doEndMatch($match);
  523. if ($processed !== null) {
  524. return $processed;
  525. }
  526. }
  527. // Why might be find ourselves here? Only one occasion now. An end match that was
  528. // triggered but could not be completed. When might this happen? When an `endSameasBegin`
  529. // rule sets the end rule to a specific match. Since the overall mode termination rule that's
  530. // being used to scan the text isn't recompiled that means that any match that LOOKS like
  531. // the end (but is not, because it is not an exact match to the beginning) will
  532. // end up here. A definite end match, but when `doEndMatch` tries to "reapply"
  533. // the end rule and fails to match, we wind up here, and just silently ignore the end.
  534. //
  535. // This causes no real harm other than stopping a few times too many.
  536. $this->modeBuffer .= $lexeme;
  537. return strlen($lexeme);
  538. }
  539. /**
  540. * Replace tabs for something more usable.
  541. *
  542. * @param string $code
  543. *
  544. * @return string
  545. */
  546. private function replaceTabs($code)
  547. {
  548. if ($this->options['tabReplace'] !== null) {
  549. return str_replace("\t", $this->options['tabReplace'], $code);
  550. }
  551. return $code;
  552. }
  553. private function checkMultibyteNecessity()
  554. {
  555. $this->needsMultibyteSupport = preg_match('/[^\x00-\x7F]/', $this->codeToHighlight) === 1;
  556. // If we aren't working with Unicode strings, then we default to `strtolower` since it's significantly faster
  557. // https://github.com/scrivo/highlight.php/pull/92#pullrequestreview-782213861
  558. if (!$this->needsMultibyteSupport) {
  559. return;
  560. }
  561. if (self::$hasMultiByteSupport === null) {
  562. self::$hasMultiByteSupport = function_exists('mb_strtolower');
  563. }
  564. if (!self::$hasMultiByteSupport && !self::$hasThrownMultiByteWarning) {
  565. if (!self::$DISABLE_MULTIBYTE_WARNING) {
  566. trigger_error('Your code snippet has unicode characters but your PHP version does not have multibyte string support. You should install the `mbstring` PHP package or `symfony/polyfill-mbstring` composer package if you use unicode characters.', E_USER_WARNING);
  567. }
  568. self::$hasThrownMultiByteWarning = true;
  569. }
  570. }
  571. /**
  572. * Allow for graceful failure if the mb_strtolower function doesn't exist.
  573. *
  574. * @param string $str
  575. *
  576. * @return string
  577. */
  578. private function strToLower($str)
  579. {
  580. if ($this->needsMultibyteSupport && self::$hasMultiByteSupport) {
  581. return mb_strtolower($str);
  582. }
  583. return strtolower($str);
  584. }
  585. /**
  586. * Set the languages that will used for auto-detection. When using auto-
  587. * detection the code to highlight will be probed for every language in this
  588. * set. Limiting this set to only the languages you want to use will greatly
  589. * improve highlighting speed.
  590. *
  591. * @param string[] $set An array of language games to use for autodetection.
  592. * This defaults to a typical set Web development
  593. * languages.
  594. *
  595. * @return void
  596. */
  597. public function setAutodetectLanguages(array $set)
  598. {
  599. $this->options['languages'] = array_unique($set);
  600. }
  601. /**
  602. * Get the tab replacement string.
  603. *
  604. * @return string The tab replacement string
  605. */
  606. public function getTabReplace()
  607. {
  608. return $this->options['tabReplace'];
  609. }
  610. /**
  611. * Set the tab replacement string. This defaults to NULL: no tabs
  612. * will be replaced.
  613. *
  614. * @param string $tabReplace The tab replacement string
  615. *
  616. * @return void
  617. */
  618. public function setTabReplace($tabReplace)
  619. {
  620. $this->options['tabReplace'] = $tabReplace;
  621. }
  622. /**
  623. * Get the class prefix string.
  624. *
  625. * @return string The class prefix string
  626. */
  627. public function getClassPrefix()
  628. {
  629. return $this->options['classPrefix'];
  630. }
  631. /**
  632. * Set the class prefix string.
  633. *
  634. * @param string $classPrefix The class prefix string
  635. *
  636. * @return void
  637. */
  638. public function setClassPrefix($classPrefix)
  639. {
  640. $this->options['classPrefix'] = $classPrefix;
  641. }
  642. /**
  643. * @since 9.17.1.0
  644. *
  645. * @return void
  646. */
  647. public function enableSafeMode()
  648. {
  649. $this->safeMode = true;
  650. }
  651. /**
  652. * @since 9.17.1.0
  653. *
  654. * @return void
  655. */
  656. public function disableSafeMode()
  657. {
  658. $this->safeMode = false;
  659. }
  660. /**
  661. * @param string $name
  662. *
  663. * @return Language|null
  664. */
  665. private function getLanguage($name)
  666. {
  667. if (isset(self::$classMap[$name])) {
  668. return self::$classMap[$name];
  669. } elseif (isset(self::$aliases[$name]) && isset(self::$classMap[self::$aliases[$name]])) {
  670. return self::$classMap[self::$aliases[$name]];
  671. }
  672. return null;
  673. }
  674. /**
  675. * Determine whether or not a language definition supports auto detection.
  676. *
  677. * @param string $name Language name
  678. *
  679. * @return bool
  680. */
  681. private function autoDetection($name)
  682. {
  683. $lang = $this->getLanguage($name);
  684. return $lang && !$lang->disableAutodetect;
  685. }
  686. /**
  687. * Core highlighting function. Accepts a language name, or an alias, and a
  688. * string with the code to highlight. Returns an object with the following
  689. * properties:
  690. * - relevance (int)
  691. * - value (an HTML string with highlighting markup).
  692. *
  693. * @todo In v10.x, change the return type from \stdClass to HighlightResult
  694. *
  695. * @param string $languageName
  696. * @param string $code
  697. * @param bool $ignoreIllegals
  698. * @param Mode|null $continuation
  699. *
  700. * @throws \DomainException if the requested language was not in this
  701. * Highlighter's language set
  702. * @throws \Exception if an invalid regex was given in a language file
  703. *
  704. * @return HighlightResult|\stdClass
  705. */
  706. public function highlight($languageName, $code, $ignoreIllegals = true, $continuation = null)
  707. {
  708. $this->codeToHighlight = $code;
  709. $this->language = $this->getLanguage($languageName);
  710. if ($this->language === null) {
  711. throw new \DomainException("Unknown language: \"$languageName\"");
  712. }
  713. $this->checkMultibyteNecessity();
  714. $this->language->compile($this->safeMode);
  715. $this->top = $continuation ? $continuation : $this->language;
  716. $this->continuations = array();
  717. $this->result = "";
  718. for ($current = $this->top; $current !== $this->language; $current = $current->parent) {
  719. if ($current->className) {
  720. $this->result = $this->buildSpan($current->className, '', true) . $this->result;
  721. }
  722. }
  723. $this->modeBuffer = "";
  724. $this->relevance = 0;
  725. $this->ignoreIllegals = $ignoreIllegals;
  726. /** @var HighlightResult $res */
  727. $res = new \stdClass();
  728. $res->relevance = 0;
  729. $res->value = "";
  730. $res->language = "";
  731. $res->top = null;
  732. $res->errorRaised = null;
  733. try {
  734. $match = null;
  735. $count = 0;
  736. $index = 0;
  737. while ($this->top) {
  738. $this->top->terminators->lastIndex = $index;
  739. $match = $this->top->terminators->exec($this->codeToHighlight);
  740. if (!$match) {
  741. break;
  742. }
  743. $count = $this->processLexeme(substr($this->codeToHighlight, $index, $match->index - $index), $match);
  744. $index = $match->index + $count;
  745. }
  746. $this->processLexeme(substr($this->codeToHighlight, $index));
  747. for ($current = $this->top; isset($current->parent); $current = $current->parent) {
  748. if ($current->className) {
  749. $this->result .= self::SPAN_END_TAG;
  750. }
  751. }
  752. $res->relevance = $this->relevance;
  753. $res->value = $this->replaceTabs($this->result);
  754. $res->illegal = false;
  755. $res->language = $this->language->name;
  756. $res->top = $this->top;
  757. return $res;
  758. } catch (\Exception $e) {
  759. if (strpos($e->getMessage(), "Illegal") !== false) {
  760. $res->illegal = true;
  761. $res->relevance = 0;
  762. $res->value = $this->escape($this->codeToHighlight);
  763. return $res;
  764. } elseif ($this->safeMode) {
  765. $res->relevance = 0;
  766. $res->value = $this->escape($this->codeToHighlight);
  767. $res->language = $languageName;
  768. $res->top = $this->top;
  769. $res->errorRaised = $e;
  770. return $res;
  771. }
  772. throw $e;
  773. }
  774. }
  775. /**
  776. * Highlight the given code by highlighting the given code with each
  777. * registered language and then finding the match with highest accuracy.
  778. *
  779. * @param string $code
  780. * @param string[]|null $languageSubset When set to null, this method will attempt to highlight $text with each
  781. * language. Set this to an array of languages of your choice to limit the
  782. * amount of languages to try.
  783. *
  784. * @throws \Exception if an invalid regex was given in a language file
  785. * @throws \DomainException if the attempted language to check does not exist
  786. *
  787. * @return HighlightResult|\stdClass
  788. */
  789. public function highlightAuto($code, $languageSubset = null)
  790. {
  791. /** @var HighlightResult $result */
  792. $result = new \stdClass();
  793. $result->relevance = 0;
  794. $result->value = $this->escape($code);
  795. $result->language = "";
  796. $secondBest = clone $result;
  797. if ($languageSubset === null) {
  798. $optionsLanguages = $this->options['languages'];
  799. if (is_array($optionsLanguages) && count($optionsLanguages) > 0) {
  800. $languageSubset = $optionsLanguages;
  801. } else {
  802. $languageSubset = self::$languages;
  803. }
  804. }
  805. foreach ($languageSubset as $name) {
  806. if ($this->getLanguage($name) === null || !$this->autoDetection($name)) {
  807. continue;
  808. }
  809. $current = $this->highlight($name, $code, false);
  810. if ($current->relevance > $secondBest->relevance) {
  811. $secondBest = $current;
  812. }
  813. if ($current->relevance > $result->relevance) {
  814. $secondBest = $result;
  815. $result = $current;
  816. }
  817. }
  818. if ($secondBest->language) {
  819. $result->secondBest = $secondBest;
  820. }
  821. return $result;
  822. }
  823. /**
  824. * Return a list of all supported languages. Using this list in
  825. * setAutodetectLanguages will turn on autodetection for all supported
  826. * languages.
  827. *
  828. * @deprecated use `Highlighter::listRegisteredLanguages()` or `Highlighter::listBundledLanguages()` instead
  829. *
  830. * @param bool $include_aliases specify whether language aliases
  831. * should be included as well
  832. *
  833. * @since 9.18.1.4 Deprecated in favor of `Highlighter::listRegisteredLanguages()`
  834. * and `Highlighter::listBundledLanguages()`.
  835. * @since 9.12.0.3 The `$include_aliases` parameter was added
  836. * @since 8.3.0.0
  837. *
  838. * @return string[] An array of language names
  839. */
  840. public function listLanguages($include_aliases = false)
  841. {
  842. @trigger_error('This method is deprecated in favor `Highlighter::listRegisteredLanguages()` or `Highlighter::listBundledLanguages()`. This function will be removed in highlight.php 10.', E_USER_DEPRECATED);
  843. if (empty(self::$languages)) {
  844. trigger_error('No languages are registered, returning all bundled languages instead. You probably did not want this.', E_USER_WARNING);
  845. return self::listBundledLanguages();
  846. }
  847. if ($include_aliases === true) {
  848. return array_merge(self::$languages, array_keys(self::$aliases));
  849. }
  850. return self::$languages;
  851. }
  852. /**
  853. * Returns list of all available aliases for given language name.
  854. *
  855. * @param string $name name or alias of language to look-up
  856. *
  857. * @throws \DomainException if the requested language was not in this
  858. * Highlighter's language set
  859. *
  860. * @since 9.12.0.3
  861. *
  862. * @return string[] An array of all aliases associated with the requested
  863. * language name language. Passed-in name is included as
  864. * well.
  865. */
  866. public function getAliasesForLanguage($name)
  867. {
  868. $language = self::getLanguage($name);
  869. if ($language === null) {
  870. throw new \DomainException("Unknown language: $language");
  871. }
  872. if ($language->aliases === null) {
  873. return array($language->name);
  874. }
  875. return array_merge(array($language->name), $language->aliases);
  876. }
  877. }