| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915 | <?php/* * This file is part of the Symfony package. * * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me> * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */namespace Symfony\Polyfill\Intl\Idn;use Exception;use Normalizer;use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;/** * @see https://www.unicode.org/reports/tr46/ * * @internal */final class Idn{    const ERROR_EMPTY_LABEL = 1;    const ERROR_LABEL_TOO_LONG = 2;    const ERROR_DOMAIN_NAME_TOO_LONG = 4;    const ERROR_LEADING_HYPHEN = 8;    const ERROR_TRAILING_HYPHEN = 0x10;    const ERROR_HYPHEN_3_4 = 0x20;    const ERROR_LEADING_COMBINING_MARK = 0x40;    const ERROR_DISALLOWED = 0x80;    const ERROR_PUNYCODE = 0x100;    const ERROR_LABEL_HAS_DOT = 0x200;    const ERROR_INVALID_ACE_LABEL = 0x400;    const ERROR_BIDI = 0x800;    const ERROR_CONTEXTJ = 0x1000;    const ERROR_CONTEXTO_PUNCTUATION = 0x2000;    const ERROR_CONTEXTO_DIGITS = 0x4000;    const INTL_IDNA_VARIANT_2003 = 0;    const INTL_IDNA_VARIANT_UTS46 = 1;    const MAX_DOMAIN_SIZE = 253;    const MAX_LABEL_SIZE = 63;    const BASE = 36;    const TMIN = 1;    const TMAX = 26;    const SKEW = 38;    const DAMP = 700;    const INITIAL_BIAS = 72;    const INITIAL_N = 128;    const DELIMITER = '-';    const MAX_INT = 2147483647;    /**     * Contains the numeric value of a basic code point (for use in representing integers) in the     * range 0 to BASE-1, or -1 if b is does not represent a value.     *     * @var array<int, int>     */    private static $basicToDigit = array(        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,    );    /**     * @var array<int, int>     */    private static $virama;    /**     * @var array<int, string>     */    private static $mapped;    /**     * @var array<int, bool>     */    private static $ignored;    /**     * @var array<int, string>     */    private static $deviation;    /**     * @var array<int, bool>     */    private static $disallowed;    /**     * @var array<int, string>     */    private static $disallowed_STD3_mapped;    /**     * @var array<int, bool>     */    private static $disallowed_STD3_valid;    /**     * @var bool     */    private static $mappingTableLoaded = false;    /**     * @see https://www.unicode.org/reports/tr46/#ToASCII     *     * @param string $domainName     * @param int    $options     * @param int    $variant     * @param array  $idna_info     *     * @return string|false     */    public static function idn_to_ascii($domainName, $options = IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = array())    {        if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {            @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', E_USER_DEPRECATED);        }        $options = array(            'CheckHyphens' => true,            'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & \IDNA_CHECK_BIDI),            'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & \IDNA_CHECK_CONTEXTJ),            'UseSTD3ASCIIRules' => 0 !== ($options & \IDNA_USE_STD3_RULES),            'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & \IDNA_NONTRANSITIONAL_TO_ASCII),            'VerifyDnsLength' => true,        );        $info = new Info();        $labels = self::process((string) $domainName, $options, $info);        foreach ($labels as $i => $label) {            // Only convert labels to punycode that contain non-ASCII code points            if (1 === preg_match('/[^\x00-\x7F]/', $label)) {                try {                    $label = 'xn--'.self::punycodeEncode($label);                } catch (Exception $e) {                    $info->errors |= self::ERROR_PUNYCODE;                }                $labels[$i] = $label;            }        }        if ($options['VerifyDnsLength']) {            self::validateDomainAndLabelLength($labels, $info);        }        $idna_info = array(            'result' => implode('.', $labels),            'isTransitionalDifferent' => $info->transitionalDifferent,            'errors' => $info->errors,        );        return 0 === $info->errors ? $idna_info['result'] : false;    }    /**     * @see https://www.unicode.org/reports/tr46/#ToUnicode     *     * @param string $domainName     * @param int    $options     * @param int    $variant     * @param array  $idna_info     *     * @return string|false     */    public static function idn_to_utf8($domainName, $options = IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = array())    {        if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {            @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', E_USER_DEPRECATED);        }        $info = new Info();        $labels = self::process((string) $domainName, array(            'CheckHyphens' => true,            'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & \IDNA_CHECK_BIDI),            'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & \IDNA_CHECK_CONTEXTJ),            'UseSTD3ASCIIRules' => 0 !== ($options & \IDNA_USE_STD3_RULES),            'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & \IDNA_NONTRANSITIONAL_TO_UNICODE),        ), $info);        $idna_info = array(            'result' => implode('.', $labels),            'isTransitionalDifferent' => $info->transitionalDifferent,            'errors' => $info->errors,        );        return 0 === $info->errors ? $idna_info['result'] : false;    }    /**     * @param string $label     *     * @return bool     */    private static function isValidContextJ(array $codePoints, $label)    {        if (!isset(self::$virama)) {            self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';        }        $offset = 0;        foreach ($codePoints as $i => $codePoint) {            if (0x200C !== $codePoint && 0x200D !== $codePoint) {                continue;            }            if (!isset($codePoints[$i - 1])) {                return false;            }            // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;            if (isset(self::$virama[$codePoints[$i - 1]])) {                continue;            }            // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then            // True;            // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]            if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, PREG_OFFSET_CAPTURE, $offset)) {                $offset += \strlen($matches[1][0]);                continue;            }            return false;        }        return true;    }    /**     * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap     *     * @param string              $input     * @param array<string, bool> $options     *     * @return string     */    private static function mapCodePoints($input, array $options, Info $info)    {        $str = '';        $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];        $transitional = $options['Transitional_Processing'];        foreach (self::utf8Decode($input) as $codePoint) {            $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);            switch ($data['status']) {                case 'disallowed':                    $info->errors |= self::ERROR_DISALLOWED;                    // no break.                case 'valid':                    $str .= mb_chr($codePoint, 'utf-8');                    break;                case 'ignored':                    // Do nothing.                    break;                case 'mapped':                    $str .= $data['mapping'];                    break;                case 'deviation':                    $info->transitionalDifferent = true;                    $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));                    break;            }        }        return $str;    }    /**     * @see https://www.unicode.org/reports/tr46/#Processing     *     * @param string              $domain     * @param array<string, bool> $options     *     * @return array<int, string>     */    private static function process($domain, array $options, Info $info)    {        // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and        // we need to respect the VerifyDnsLength option.        $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];        if ($checkForEmptyLabels && '' === $domain) {            $info->errors |= self::ERROR_EMPTY_LABEL;            return array($domain);        }        // Step 1. Map each code point in the domain name string        $domain = self::mapCodePoints($domain, $options, $info);        // Step 2. Normalize the domain name string to Unicode Normalization Form C.        if (!Normalizer::isNormalized($domain, Normalizer::FORM_C)) {            $domain = Normalizer::normalize($domain, Normalizer::FORM_C);        }        // Step 3. Break the string into labels at U+002E (.) FULL STOP.        $labels = explode('.', $domain);        $lastLabelIndex = \count($labels) - 1;        // Step 4. Convert and validate each label in the domain name string.        foreach ($labels as $i => $label) {            $validationOptions = $options;            if ('xn--' === substr($label, 0, 4)) {                try {                    $label = self::punycodeDecode(substr($label, 4));                } catch (Exception $e) {                    $info->errors |= self::ERROR_PUNYCODE;                    continue;                }                $validationOptions['Transitional_Processing'] = false;                $labels[$i] = $label;            }            self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);        }        if ($info->bidiDomain && !$info->validBidiDomain) {            $info->errors |= self::ERROR_BIDI;        }        // Any input domain name string that does not record an error has been successfully        // processed according to this specification. Conversely, if an input domain_name string        // causes an error, then the processing of the input domain_name string fails. Determining        // what to do with error input is up to the caller, and not in the scope of this document.        return $labels;    }    /**     * @see https://tools.ietf.org/html/rfc5893#section-2     *     * @param string $label     */    private static function validateBidiLabel($label, Info $info)    {        if (1 === preg_match(Regex::RTL_LABEL, $label)) {            $info->bidiDomain = true;            // Step 1. The first character must be a character with Bidi property L, R, or AL.            // If it has the R or AL property, it is an RTL label            if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {                $info->validBidiDomain = false;                return;            }            // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,            // CS, ET, ON, BN, or NSM are allowed.            if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {                $info->validBidiDomain = false;                return;            }            // Step 3. In an RTL label, the end of the label must be a character with Bidi property            // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.            if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {                $info->validBidiDomain = false;                return;            }            // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.            if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {                $info->validBidiDomain = false;                return;            }            return;        }        // We are a LTR label        // Step 1. The first character must be a character with Bidi property L, R, or AL.        // If it has the L property, it is an LTR label.        if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {            $info->validBidiDomain = false;            return;        }        // Step 5. In an LTR label, only characters with the Bidi properties L, EN,        // ES, CS, ET, ON, BN, or NSM are allowed.        if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {            $info->validBidiDomain = false;            return;        }        // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or        // EN, followed by zero or more characters with Bidi property NSM.        if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {            $info->validBidiDomain = false;            return;        }    }    /**     * @param array<int, string> $labels     */    private static function validateDomainAndLabelLength(array $labels, Info $info)    {        $maxDomainSize = self::MAX_DOMAIN_SIZE;        $length = \count($labels);        // Number of "." delimiters.        $domainLength = $length - 1;        // If the last label is empty and it is not the first label, then it is the root label.        // Increase the max size by 1, making it 254, to account for the root label's "."        // delimiter. This also means we don't need to check the last label's length for being too        // long.        if ($length > 1 && '' === $labels[$length - 1]) {            ++$maxDomainSize;            --$length;        }        for ($i = 0; $i < $length; ++$i) {            $bytes = \strlen($labels[$i]);            $domainLength += $bytes;            if ($bytes > self::MAX_LABEL_SIZE) {                $info->errors |= self::ERROR_LABEL_TOO_LONG;            }        }        if ($domainLength > $maxDomainSize) {            $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;        }    }    /**     * @see https://www.unicode.org/reports/tr46/#Validity_Criteria     *     * @param string              $label     * @param array<string, bool> $options     * @param bool                $canBeEmpty     */    private static function validateLabel($label, Info $info, array $options, $canBeEmpty)    {        if ('' === $label) {            if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {                $info->errors |= self::ERROR_EMPTY_LABEL;            }            return;        }        // Step 1. The label must be in Unicode Normalization Form C.        if (!Normalizer::isNormalized($label, Normalizer::FORM_C)) {            $info->errors |= self::ERROR_INVALID_ACE_LABEL;        }        $codePoints = self::utf8Decode($label);        if ($options['CheckHyphens']) {            // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character            // in both the thrid and fourth positions.            if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {                $info->errors |= self::ERROR_HYPHEN_3_4;            }            // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D            // HYPHEN-MINUS character.            if ('-' === substr($label, 0, 1)) {                $info->errors |= self::ERROR_LEADING_HYPHEN;            }            if ('-' === substr($label, -1, 1)) {                $info->errors |= self::ERROR_TRAILING_HYPHEN;            }        }        // Step 4. The label must not contain a U+002E (.) FULL STOP.        if (false !== strpos($label, '.')) {            $info->errors |= self::ERROR_LABEL_HAS_DOT;        }        // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.        if (1 === preg_match(Regex::COMBINING_MARK, $label)) {            $info->errors |= self::ERROR_LEADING_COMBINING_MARK;        }        // Step 6. Each code point in the label must only have certain status values according to        // Section 5, IDNA Mapping Table:        $transitional = $options['Transitional_Processing'];        $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];        foreach ($codePoints as $codePoint) {            $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);            $status = $data['status'];            if ('valid' === $status || (!$transitional && 'deviation' === $status)) {                continue;            }            $info->errors |= self::ERROR_DISALLOWED;            break;        }        // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in        // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)        // [IDNA2008].        if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {            $info->errors |= self::ERROR_CONTEXTJ;        }        // Step 8. If CheckBidi, and if the domain name is a  Bidi domain name, then the label must        // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.        if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {            self::validateBidiLabel($label, $info);        }    }    /**     * @see https://tools.ietf.org/html/rfc3492#section-6.2     *     * @param string $input     *     * @return string     */    private static function punycodeDecode($input)    {        $n = self::INITIAL_N;        $out = 0;        $i = 0;        $bias = self::INITIAL_BIAS;        $lastDelimIndex = strrpos($input, self::DELIMITER);        $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;        $inputLength = \strlen($input);        $output = array();        $bytes = array_map('ord', str_split($input));        for ($j = 0; $j < $b; ++$j) {            if ($bytes[$j] > 0x7F) {                throw new Exception('Invalid input');            }            $output[$out++] = $input[$j];        }        if ($b > 0) {            ++$b;        }        for ($in = $b; $in < $inputLength; ++$out) {            $oldi = $i;            $w = 1;            for ($k = self::BASE; /* no condition */; $k += self::BASE) {                if ($in >= $inputLength) {                    throw new Exception('Invalid input');                }                $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];                if ($digit < 0) {                    throw new Exception('Invalid input');                }                if ($digit > intdiv(self::MAX_INT - $i, $w)) {                    throw new Exception('Integer overflow');                }                $i += $digit * $w;                if ($k <= $bias) {                    $t = self::TMIN;                } elseif ($k >= $bias + self::TMAX) {                    $t = self::TMAX;                } else {                    $t = $k - $bias;                }                if ($digit < $t) {                    break;                }                $baseMinusT = self::BASE - $t;                if ($w > intdiv(self::MAX_INT, $baseMinusT)) {                    throw new Exception('Integer overflow');                }                $w *= $baseMinusT;            }            $outPlusOne = $out + 1;            $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);            if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {                throw new Exception('Integer overflow');            }            $n += intdiv($i, $outPlusOne);            $i %= $outPlusOne;            array_splice($output, $i++, 0, array(mb_chr($n, 'utf-8')));        }        return implode('', $output);    }    /**     * @see https://tools.ietf.org/html/rfc3492#section-6.3     *     * @param string $input     *     * @return string     */    private static function punycodeEncode($input)    {        $n = self::INITIAL_N;        $delta = 0;        $out = 0;        $bias = self::INITIAL_BIAS;        $inputLength = 0;        $output = '';        $iter = self::utf8Decode($input);        foreach ($iter as $codePoint) {            ++$inputLength;            if ($codePoint < 0x80) {                $output .= \chr($codePoint);                ++$out;            }        }        $h = $out;        $b = $out;        if ($b > 0) {            $output .= self::DELIMITER;            ++$out;        }        while ($h < $inputLength) {            $m = self::MAX_INT;            foreach ($iter as $codePoint) {                if ($codePoint >= $n && $codePoint < $m) {                    $m = $codePoint;                }            }            if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {                throw new Exception('Integer overflow');            }            $delta += ($m - $n) * ($h + 1);            $n = $m;            foreach ($iter as $codePoint) {                if ($codePoint < $n && 0 === ++$delta) {                    throw new Exception('Integer overflow');                } elseif ($codePoint === $n) {                    $q = $delta;                    for ($k = self::BASE; /* no condition */; $k += self::BASE) {                        if ($k <= $bias) {                            $t = self::TMIN;                        } elseif ($k >= $bias + self::TMAX) {                            $t = self::TMAX;                        } else {                            $t = $k - $bias;                        }                        if ($q < $t) {                            break;                        }                        $qMinusT = $q - $t;                        $baseMinusT = self::BASE - $t;                        $output .= self::encodeDigit($t + ($qMinusT) % ($baseMinusT), false);                        ++$out;                        $q = intdiv($qMinusT, $baseMinusT);                    }                    $output .= self::encodeDigit($q, false);                    ++$out;                    $bias = self::adaptBias($delta, $h + 1, $h === $b);                    $delta = 0;                    ++$h;                }            }            ++$delta;            ++$n;        }        return $output;    }    /**     * @see https://tools.ietf.org/html/rfc3492#section-6.1     *     * @param int  $delta     * @param int  $numPoints     * @param bool $firstTime     *     * @return int     */    private static function adaptBias($delta, $numPoints, $firstTime)    {        // xxx >> 1 is a faster way of doing intdiv(xxx, 2)        $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;        $delta += intdiv($delta, $numPoints);        $k = 0;        while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {            $delta = intdiv($delta, self::BASE - self::TMIN);            $k += self::BASE;        }        return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);    }    /**     * @param int  $d     * @param bool $flag     *     * @return string     */    private static function encodeDigit($d, $flag)    {        return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));    }    /**     * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any     * invalid byte sequences will be replaced by a U+FFFD replacement code point.     *     * @see https://encoding.spec.whatwg.org/#utf-8-decoder     *     * @param string $input     *     * @return array<int, int>     */    private static function utf8Decode($input)    {        $bytesSeen = 0;        $bytesNeeded = 0;        $lowerBoundary = 0x80;        $upperBoundary = 0xBF;        $codePoint = 0;        $codePoints = array();        $length = \strlen($input);        for ($i = 0; $i < $length; ++$i) {            $byte = \ord($input[$i]);            if (0 === $bytesNeeded) {                if ($byte >= 0x00 && $byte <= 0x7F) {                    $codePoints[] = $byte;                    continue;                }                if ($byte >= 0xC2 && $byte <= 0xDF) {                    $bytesNeeded = 1;                    $codePoint = $byte & 0x1F;                } elseif ($byte >= 0xE0 && $byte <= 0xEF) {                    if (0xE0 === $byte) {                        $lowerBoundary = 0xA0;                    } elseif (0xED === $byte) {                        $upperBoundary = 0x9F;                    }                    $bytesNeeded = 2;                    $codePoint = $byte & 0xF;                } elseif ($byte >= 0xF0 && $byte <= 0xF4) {                    if (0xF0 === $byte) {                        $lowerBoundary = 0x90;                    } elseif (0xF4 === $byte) {                        $upperBoundary = 0x8F;                    }                    $bytesNeeded = 3;                    $codePoint = $byte & 0x7;                } else {                    $codePoints[] = 0xFFFD;                }                continue;            }            if ($byte < $lowerBoundary || $byte > $upperBoundary) {                $codePoint = 0;                $bytesNeeded = 0;                $bytesSeen = 0;                $lowerBoundary = 0x80;                $upperBoundary = 0xBF;                --$i;                $codePoints[] = 0xFFFD;                continue;            }            $lowerBoundary = 0x80;            $upperBoundary = 0xBF;            $codePoint = ($codePoint << 6) | ($byte & 0x3F);            if (++$bytesSeen !== $bytesNeeded) {                continue;            }            $codePoints[] = $codePoint;            $codePoint = 0;            $bytesNeeded = 0;            $bytesSeen = 0;        }        // String unexpectedly ended, so append a U+FFFD code point.        if (0 !== $bytesNeeded) {            $codePoints[] = 0xFFFD;        }        return $codePoints;    }    /**     * @param int  $codePoint     * @param bool $useSTD3ASCIIRules     *     * @return array{status: string, mapping?: string}     */    private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)    {        if (!self::$mappingTableLoaded) {            self::$mappingTableLoaded = true;            self::$mapped = require __DIR__.'/Resources/unidata/mapped.php';            self::$ignored = require __DIR__.'/Resources/unidata/ignored.php';            self::$deviation = require __DIR__.'/Resources/unidata/deviation.php';            self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php';            self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';            self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';        }        if (isset(self::$mapped[$codePoint])) {            return array('status' => 'mapped', 'mapping' => self::$mapped[$codePoint]);        }        if (isset(self::$ignored[$codePoint])) {            return array('status' => 'ignored');        }        if (isset(self::$deviation[$codePoint])) {            return array('status' => 'deviation', 'mapping' => self::$deviation[$codePoint]);        }        if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {            return array('status' => 'disallowed');        }        $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);        if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {            $status = 'disallowed';            if (!$useSTD3ASCIIRules) {                $status = $isDisallowedMapped ? 'mapped' : 'valid';            }            if ($isDisallowedMapped) {                return array('status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]);            }            return array('status' => $status);        }        return array('status' => 'valid');    }}
 |