| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 | <?phpnamespace addons\cms\library;/** * Created by PhpStorm. * User: tanszhe * Date: 2017/12/21 * Time: 下午8:11 */class VicWord{    private $dict = [];    private $end = '\\';    private $auto = false;    private $count = 0;    /**     * @var string 词性     */    private $x = '\\x';    public function __construct($type = 'igb')    {        if (!file_exists(_VIC_WORD_DICT_PATH_)) {            return false;        }        if ($type == 'igb') {            $this->dict = igbinary_unserialize(file_get_contents(_VIC_WORD_DICT_PATH_));        } else {            $this->dict = json_decode(file_get_contents(_VIC_WORD_DICT_PATH_), true);        }    }    /**     * @param string $path     */    public function getWord($str)    {        $this->auto = false;        $str = $this->filter($str);        return $this->find($str);    }    /**     * @param string $path     */    public function getShortWord($str)    {        $this->auto = false;        $str = $this->filter($str);        return $this->shortfind($str);    }    /**     * @param string $path     */    public function getAutoWord($str)    {        $this->auto = true;        $str = $this->filter($str);        return $this->autoFind($str, ['long' => 1]);    }    private function filter($str)    {        return strtolower(trim($str));    }    private function getD(&$str, $i)    {        $o = ord($str[$i]);        if ($o < 128) {            $d = $str[$i];        } else {            $o = $o >> 4;            if ($o == 12) {                $d = $str[$i] . $str[++$i];            } elseif ($o === 14) {                $d = $str[$i] . $str[++$i] . $str[++$i];            } elseif ($o == 15) {                $d = $str[$i] . $str[++$i] . $str[++$i] . $str[++$i];            } else {                exit('我不认识的编码');            }        }        return [$d, $i];    }    private function autoFind($str, $auto_info = [])    {        if ($auto_info['long']) {            return $this->find($str, $auto_info);        } else {            return $this->shortfind($str, $auto_info);        }    }    private function reGet(&$r, $auto_info)    {        $auto_info['c'] = isset($auto_info['c']) ? $auto_info['c']++ : 1;        $l = count($r) - 1;        $p = [];        $str = '';        for ($i = $l; $i >= 0; $i--) {            $str = $r[$i][0] . $str;            $f = $r[$i][3];            array_unshift($p, $r[$i]);            unset($r[$i]);            if ($f == 1) {                break;            }        }        $this->count++;        $l = strlen($str);        if (isset($r[$i - 1])) {            $w = $r[$i - 1][1];        } else {            $w = 0;        }        if (isset($auto_info['pl']) && $l == $auto_info['pl']) {            $r = $p;            return false;        } elseif ($str && $auto_info['c'] < 3) {            $auto_info['pl'] = $l;            $auto_info['long'] = !$auto_info['long'];            $sr = $this->autoFind($str, $auto_info);            $sr = array_map(function ($v) use ($w) {                $v[1] += $w;                return $v;            }, $sr);            $r = array_merge($r, $this->getGoodWord($p, $sr));        }    }    private function getGoodWord($old, $new)    {        if (!$new) {            return $old;        }        if ($this->getUnknowCount($old) > $this->getUnknowCount($new)) {            return $new;        } else {            return $old;        }    }    private function getUnknowCount($ar)    {        $i = 0;        foreach ($ar as $v) {            if ($v[3] == 0) {                $i += strlen($v[0]);            }        }        return $i;    }    private function find($str, $auto_info = [])    {        $len = strlen($str);        $s = '';        $n = '';        $j = 0;        $r = [];        for ($i = 0; $i < $len; $i++) {            list($d, $i) = $this->getD($str, $i);            if (isset($wr[$d])) {                $s .= $d;                $wr = $wr[$d];            } else {                if (isset($wr[$this->end])) {                    $this->addNotFind($r, $n, $s, $j, $auto_info);                    $this->addResult($r, $s, $j, $wr[$this->x]);                    $n = '';                }                $wr = $this->dict;                if (isset($wr[$d])) {                    $s = $d;                    $wr = $wr[$d];                } else {                    $s = '';                }            }            $n .= $d;            $j = $i;        }        if (isset($wr[$this->end])) {            $this->addNotFind($r, $n, $s, $i, $auto_info);            $this->addResult($r, $s, $i, $wr[$this->x]);        } else {            $this->addNotFind($r, $n, '', $i, $auto_info);        }        return $r;    }    private function addNotFind(&$r, $n, $s, $i, $auto_info = [])    {        if ($n !== $s) {            $n = str_replace($s, '', $n);            $this->addResult($r, $n, $i - strlen($s), null, 0);            if ($this->auto) {                $this->reGet($r, $auto_info);            }        }    }    private function shortFind($str, $auto_info = [])    {        $len = strlen($str);        $s = '';        $n = '';        $r = [];        for ($i = 0; $i < $len; $i++) {            $j = $i;            list($d, $i) = $this->getD($str, $i);            if (isset($wr[$d])) {                $s .= $d;                $wr = $wr[$d];            } else {                if (isset($wr[$this->end])) {                    $this->addNotFind($r, $n, $s, $j, $auto_info);                    $this->addResult($r, $s, $j, $wr[$this->x]);                    $n = '';                }                $wr = $this->dict;                if (isset($wr[$d])) {                    $s = $d;                    $wr = $wr[$d];                } else {                    $s = '';                }            }            $n .= $d;            if (isset($wr[$this->end])) {                $this->addNotFind($r, $n, $s, $i, $auto_info);                $this->addResult($r, $s, $i, $wr[$this->x]);                $wr = $this->dict;                $s = '';                $n = '';            }        }        if (isset($wr[$this->end])) {            $this->addNotFind($r, $n, $s, $i, $auto_info);            $this->addResult($r, $s, $i, $wr[$this->x]);        } else {            $this->addNotFind($r, $n, '', $i, $auto_info);        }        return $r;    }    private function addResult(&$r, $k, $i, $x, $find = 1)    {        $r[] = [$k, $i, $x, $find];    }}
 |