Language.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. <?php
  2. /* Copyright (c)
  3. * - 2006-2013, Ivan Sagalaev (maniacsoftwaremaniacs.org), highlight.js
  4. * (original author)
  5. * - 2013-2019, Geert Bergman (geertscrivo.nl), highlight.php
  6. * - 2014 Daniel Lynge, highlight.php (contributor)
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright notice,
  12. * this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
  17. * contributors may be used to endorse or promote products derived from this
  18. * software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. namespace Highlight;
  33. /**
  34. * @todo In highlight.php 10.x, replace the @final attribute with the `final` keyword.
  35. *
  36. * @final
  37. *
  38. * @internal
  39. *
  40. * // Backward compatibility properties
  41. *
  42. * @property Mode $mode (DEPRECATED) All properties traditionally inside of $mode are now available directly from this class.
  43. * @property bool $caseInsensitive (DEPRECATED) Due to compatibility requirements with highlight.js, use `case_insensitive` instead.
  44. */
  45. class Language extends Mode
  46. {
  47. /** @var string[] */
  48. private static $COMMON_KEYWORDS = array('of', 'and', 'for', 'in', 'not', 'or', 'if', 'then');
  49. /** @var string */
  50. public $name;
  51. /** @var Mode|null */
  52. private $mode = null;
  53. /**
  54. * @param string $lang
  55. * @param string $filePath
  56. *
  57. * @throws \InvalidArgumentException when the given $filePath is inaccessible
  58. */
  59. public function __construct($lang, $filePath)
  60. {
  61. $this->name = $lang;
  62. // We're loading the JSON definition file as an \stdClass object instead of an associative array. This is being
  63. // done to take advantage of objects being pass by reference automatically in PHP whereas arrays are pass by
  64. // value.
  65. $json = file_get_contents($filePath);
  66. if ($json === false) {
  67. throw new \InvalidArgumentException("Language file inaccessible: $filePath");
  68. }
  69. $this->mode = json_decode($json);
  70. }
  71. /**
  72. * @param string $name
  73. *
  74. * @return bool|Mode|null
  75. */
  76. public function __get($name)
  77. {
  78. if ($name === 'mode') {
  79. @trigger_error('The "mode" property will be removed in highlight.php 10.x', E_USER_DEPRECATED);
  80. return $this->mode;
  81. }
  82. if ($name === 'caseInsensitive') {
  83. @trigger_error('Due to compatibility requirements with highlight.js, use "case_insensitive" instead.', E_USER_DEPRECATED);
  84. if (isset($this->mode->case_insensitive)) {
  85. return $this->mode->case_insensitive;
  86. }
  87. return false;
  88. }
  89. if (isset($this->mode->{$name})) {
  90. return $this->mode->{$name};
  91. }
  92. return null;
  93. }
  94. /**
  95. * @param string $value
  96. * @param bool $global
  97. *
  98. * @return RegEx
  99. */
  100. private function langRe($value, $global = false)
  101. {
  102. return RegExUtils::langRe($value, $global, $this->case_insensitive);
  103. }
  104. /**
  105. * Performs a shallow merge of multiple objects into one.
  106. *
  107. * @param Mode $params the objects to merge
  108. * @param array<string, mixed> ...$_
  109. *
  110. * @return Mode
  111. */
  112. private function inherit($params, $_ = array())
  113. {
  114. /** @var Mode $result */
  115. $result = new \stdClass();
  116. $objects = func_get_args();
  117. $parent = array_shift($objects);
  118. foreach ($parent as $key => $value) {
  119. $result->{$key} = $value;
  120. }
  121. foreach ($objects as $object) {
  122. foreach ($object as $key => $value) {
  123. $result->{$key} = $value;
  124. }
  125. }
  126. return $result;
  127. }
  128. /**
  129. * @param Mode|null $mode
  130. *
  131. * @return bool
  132. */
  133. private function dependencyOnParent($mode)
  134. {
  135. if (!$mode) {
  136. return false;
  137. }
  138. if (isset($mode->endsWithParent) && $mode->endsWithParent) {
  139. return $mode->endsWithParent;
  140. }
  141. return $this->dependencyOnParent(isset($mode->starts) ? $mode->starts : null);
  142. }
  143. /**
  144. * @param Mode $mode
  145. *
  146. * @return array<int, \stdClass|Mode>
  147. */
  148. private function expandOrCloneMode($mode)
  149. {
  150. if ($mode->variants && !$mode->cachedVariants) {
  151. $mode->cachedVariants = array();
  152. foreach ($mode->variants as $variant) {
  153. $mode->cachedVariants[] = $this->inherit($mode, array('variants' => null), $variant);
  154. }
  155. }
  156. // EXPAND
  157. // if we have variants then essentially "replace" the mode with the variants
  158. // this happens in compileMode, where this function is called from
  159. if ($mode->cachedVariants) {
  160. return $mode->cachedVariants;
  161. }
  162. // CLONE
  163. // if we have dependencies on parents then we need a unique
  164. // instance of ourselves, so we can be reused with many
  165. // different parents without issue
  166. if ($this->dependencyOnParent($mode)) {
  167. return array($this->inherit($mode, array(
  168. 'starts' => $mode->starts ? $this->inherit($mode->starts) : null,
  169. )));
  170. }
  171. // highlight.php does not have a concept freezing our Modes
  172. // no special dependency issues, just return ourselves
  173. return array($mode);
  174. }
  175. /**
  176. * @param Mode $mode
  177. * @param Mode|null $parent
  178. *
  179. * @return void
  180. */
  181. private function compileMode($mode, $parent = null)
  182. {
  183. Mode::_normalize($mode);
  184. if ($mode->compiled) {
  185. return;
  186. }
  187. $mode->compiled = true;
  188. $mode->keywords = $mode->keywords ? $mode->keywords : $mode->beginKeywords;
  189. if ($mode->keywords) {
  190. $mode->keywords = $this->compileKeywords($mode->keywords, (bool) $this->case_insensitive);
  191. }
  192. $mode->lexemesRe = $this->langRe($mode->lexemes ? $mode->lexemes : "\w+", true);
  193. if ($parent) {
  194. if ($mode->beginKeywords) {
  195. $mode->begin = "\\b(" . implode("|", explode(" ", $mode->beginKeywords)) . ")\\b";
  196. }
  197. if (!$mode->begin) {
  198. $mode->begin = "\B|\b";
  199. }
  200. $mode->beginRe = $this->langRe($mode->begin);
  201. if ($mode->endSameAsBegin) {
  202. $mode->end = $mode->begin;
  203. }
  204. if (!$mode->end && !$mode->endsWithParent) {
  205. $mode->end = "\B|\b";
  206. }
  207. if ($mode->end) {
  208. $mode->endRe = $this->langRe($mode->end);
  209. }
  210. $mode->terminator_end = $mode->end;
  211. if ($mode->endsWithParent && $parent->terminator_end) {
  212. $mode->terminator_end .= ($mode->end ? "|" : "") . $parent->terminator_end;
  213. }
  214. }
  215. if ($mode->illegal) {
  216. $mode->illegalRe = $this->langRe($mode->illegal);
  217. }
  218. if ($mode->relevance === null) {
  219. $mode->relevance = 1;
  220. }
  221. if (!$mode->contains) {
  222. $mode->contains = array();
  223. }
  224. /** @var Mode[] $expandedContains */
  225. $expandedContains = array();
  226. foreach ($mode->contains as &$c) {
  227. if ($c instanceof \stdClass) {
  228. Mode::_normalize($c);
  229. }
  230. $expandedContains = array_merge($expandedContains, $this->expandOrCloneMode(
  231. $c === 'self' ? $mode : $c
  232. ));
  233. }
  234. $mode->contains = $expandedContains;
  235. /** @var Mode $contain */
  236. foreach ($mode->contains as $contain) {
  237. $this->compileMode($contain, $mode);
  238. }
  239. if ($mode->starts) {
  240. $this->compileMode($mode->starts, $parent);
  241. }
  242. $terminators = new Terminators($this->case_insensitive);
  243. $mode->terminators = $terminators->_buildModeRegex($mode);
  244. Mode::_handleDeprecations($mode);
  245. }
  246. /**
  247. * @param array<string, string>|string $rawKeywords
  248. * @param bool $caseSensitive
  249. *
  250. * @return array<string, array<int, string|int>>
  251. */
  252. private function compileKeywords($rawKeywords, $caseSensitive)
  253. {
  254. /** @var array<string, array<int, string|int>> $compiledKeywords */
  255. $compiledKeywords = array();
  256. if (is_string($rawKeywords)) {
  257. $this->splitAndCompile("keyword", $rawKeywords, $compiledKeywords, $caseSensitive);
  258. } else {
  259. foreach ($rawKeywords as $className => $rawKeyword) {
  260. $this->splitAndCompile($className, $rawKeyword, $compiledKeywords, $caseSensitive);
  261. }
  262. }
  263. return $compiledKeywords;
  264. }
  265. /**
  266. * @param string $className
  267. * @param string $str
  268. * @param array<string, array<int, string|int>> $compiledKeywords
  269. * @param bool $caseSensitive
  270. *
  271. * @return void
  272. */
  273. private function splitAndCompile($className, $str, array &$compiledKeywords, $caseSensitive)
  274. {
  275. if ($caseSensitive) {
  276. $str = strtolower($str);
  277. }
  278. $keywords = explode(' ', $str);
  279. foreach ($keywords as $keyword) {
  280. $pair = explode('|', $keyword);
  281. $providedScore = isset($pair[1]) ? $pair[1] : null;
  282. $compiledKeywords[$pair[0]] = array($className, $this->scoreForKeyword($pair[0], $providedScore));
  283. }
  284. }
  285. /**
  286. * @param string $keyword
  287. * @param string $providedScore
  288. *
  289. * @return int
  290. */
  291. private function scoreForKeyword($keyword, $providedScore)
  292. {
  293. if ($providedScore) {
  294. return (int) $providedScore;
  295. }
  296. return $this->commonKeyword($keyword) ? 0 : 1;
  297. }
  298. /**
  299. * @param string $word
  300. *
  301. * @return bool
  302. */
  303. private function commonKeyword($word)
  304. {
  305. return in_array(strtolower($word), self::$COMMON_KEYWORDS);
  306. }
  307. /**
  308. * Compile the Language definition.
  309. *
  310. * @param bool $safeMode
  311. *
  312. * @since 9.17.1.0 The 'safeMode' parameter was added.
  313. *
  314. * @return void
  315. */
  316. public function compile($safeMode)
  317. {
  318. if ($this->compiled) {
  319. return;
  320. }
  321. $jr = new JsonRef();
  322. $jr->decodeRef($this->mode);
  323. // self is not valid at the top-level
  324. if (isset($this->mode->contains) && !in_array("self", $this->mode->contains)) {
  325. if (!$safeMode) {
  326. throw new \LogicException("`self` is not supported at the top-level of a language.");
  327. }
  328. $this->mode->contains = array_filter($this->mode->contains, function ($mode) {
  329. return $mode !== "self";
  330. });
  331. }
  332. $this->compileMode($this->mode);
  333. }
  334. /**
  335. * @todo Remove in highlight.php 10.x
  336. *
  337. * @deprecated 9.16.0 This method should never have been exposed publicly as part of the API.
  338. *
  339. * @param \stdClass|null $e
  340. *
  341. * @return void
  342. */
  343. public function complete(&$e)
  344. {
  345. Mode::_normalize($e);
  346. }
  347. }