AipNlp.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. <?php
  2. namespace addons\cms\library\aip;
  3. /*
  4. * Copyright (c) 2017 Baidu.com, Inc. All Rights Reserved
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  7. * use this file except in compliance with the License. You may obtain a copy of
  8. * the License at
  9. *
  10. * Http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  15. * License for the specific language governing permissions and limitations under
  16. * the License.
  17. */
  18. use addons\cms\library\aip\lib\AipBase;
  19. class AipNlp extends AipBase
  20. {
  21. /**
  22. * 词法分析 lexer api url
  23. * @var string
  24. */
  25. private $lexerUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer';
  26. /**
  27. * 词法分析(定制版) lexer_custom api url
  28. * @var string
  29. */
  30. private $lexerCustomUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom';
  31. /**
  32. * 依存句法分析 dep_parser api url
  33. * @var string
  34. */
  35. private $depParserUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser';
  36. /**
  37. * 词向量表示 word_embedding api url
  38. * @var string
  39. */
  40. private $wordEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_vec';
  41. /**
  42. * DNN语言模型 dnnlm_cn api url
  43. * @var string
  44. */
  45. private $dnnlmCnUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/dnnlm_cn';
  46. /**
  47. * 词义相似度 word_sim_embedding api url
  48. * @var string
  49. */
  50. private $wordSimEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_sim';
  51. /**
  52. * 短文本相似度 simnet api url
  53. * @var string
  54. */
  55. private $simnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/simnet';
  56. /**
  57. * 评论观点抽取 comment_tag api url
  58. * @var string
  59. */
  60. private $commentTagUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag';
  61. /**
  62. * 情感倾向分析 sentiment_classify api url
  63. * @var string
  64. */
  65. private $sentimentClassifyUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify';
  66. /**
  67. * 文章标签 keyword api url
  68. * @var string
  69. */
  70. private $keywordUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/keyword';
  71. /**
  72. * 文章分类 topic api url
  73. * @var string
  74. */
  75. private $topicUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/topic';
  76. /**
  77. * 文本纠错 ecnet api url
  78. * @var string
  79. */
  80. private $ecnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/ecnet';
  81. /**
  82. * 对话情绪识别接口 emotion api url
  83. * @var string
  84. */
  85. private $emotionUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion';
  86. /**
  87. * 新闻摘要接口 news_summary api url
  88. * @var string
  89. */
  90. private $newsSummaryUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary';
  91. /**
  92. * 地址识别接口 address api url
  93. * @var string
  94. */
  95. private $addressUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/address';
  96. /**
  97. * 格式化结果
  98. * @param $content string
  99. * @return mixed
  100. */
  101. protected function proccessResult($content)
  102. {
  103. return json_decode(mb_convert_encoding($content, 'UTF8', 'GBK'), true, 512, JSON_BIGINT_AS_STRING);
  104. }
  105. /**
  106. * 词法分析接口
  107. *
  108. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过65536字节
  109. * @param array $options - 可选参数对象,key: value都为string类型
  110. * @description options列表:
  111. * @return array
  112. */
  113. public function lexer($text, $options = array())
  114. {
  115. $data = array();
  116. $data['text'] = $text;
  117. $data = array_merge($data, $options);
  118. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  119. return $this->request($this->lexerUrl, $data);
  120. }
  121. /**
  122. * 词法分析(定制版)接口
  123. *
  124. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过65536字节
  125. * @param array $options - 可选参数对象,key: value都为string类型
  126. * @description options列表:
  127. * @return array
  128. */
  129. public function lexerCustom($text, $options = array())
  130. {
  131. $data = array();
  132. $data['text'] = $text;
  133. $data = array_merge($data, $options);
  134. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  135. return $this->request($this->lexerCustomUrl, $data);
  136. }
  137. /**
  138. * 依存句法分析接口
  139. *
  140. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过256字节
  141. * @param array $options - 可选参数对象,key: value都为string类型
  142. * @description options列表:
  143. * mode 模型选择。默认值为0,可选值mode=0(对应web模型);mode=1(对应query模型)
  144. * @return array
  145. */
  146. public function depParser($text, $options = array())
  147. {
  148. $data = array();
  149. $data['text'] = $text;
  150. $data = array_merge($data, $options);
  151. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  152. return $this->request($this->depParserUrl, $data);
  153. }
  154. /**
  155. * 词向量表示接口
  156. *
  157. * @param string $word - 文本内容(GBK编码),最大64字节
  158. * @param array $options - 可选参数对象,key: value都为string类型
  159. * @description options列表:
  160. * @return array
  161. */
  162. public function wordEmbedding($word, $options = array())
  163. {
  164. $data = array();
  165. $data['word'] = $word;
  166. $data = array_merge($data, $options);
  167. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  168. return $this->request($this->wordEmbeddingUrl, $data);
  169. }
  170. /**
  171. * DNN语言模型接口
  172. *
  173. * @param string $text - 文本内容(GBK编码),最大512字节,不需要切词
  174. * @param array $options - 可选参数对象,key: value都为string类型
  175. * @description options列表:
  176. * @return array
  177. */
  178. public function dnnlm($text, $options = array())
  179. {
  180. $data = array();
  181. $data['text'] = $text;
  182. $data = array_merge($data, $options);
  183. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  184. return $this->request($this->dnnlmCnUrl, $data);
  185. }
  186. /**
  187. * 词义相似度接口
  188. *
  189. * @param string $word1 - 词1(GBK编码),最大64字节
  190. * @param string $word2 - 词1(GBK编码),最大64字节
  191. * @param array $options - 可选参数对象,key: value都为string类型
  192. * @description options列表:
  193. * mode 预留字段,可选择不同的词义相似度模型。默认值为0,目前仅支持mode=0
  194. * @return array
  195. */
  196. public function wordSimEmbedding($word1, $word2, $options = array())
  197. {
  198. $data = array();
  199. $data['word_1'] = $word1;
  200. $data['word_2'] = $word2;
  201. $data = array_merge($data, $options);
  202. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  203. return $this->request($this->wordSimEmbeddingUrl, $data);
  204. }
  205. /**
  206. * 短文本相似度接口
  207. *
  208. * @param string $text1 - 待比较文本1(GBK编码),最大512字节
  209. * @param string $text2 - 待比较文本2(GBK编码),最大512字节
  210. * @param array $options - 可选参数对象,key: value都为string类型
  211. * @description options列表:
  212. * model 默认为"BOW",可选"BOW"、"CNN"与"GRNN"
  213. * @return array
  214. */
  215. public function simnet($text1, $text2, $options = array())
  216. {
  217. $data = array();
  218. $data['text_1'] = $text1;
  219. $data['text_2'] = $text2;
  220. $data = array_merge($data, $options);
  221. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  222. return $this->request($this->simnetUrl, $data);
  223. }
  224. /**
  225. * 评论观点抽取接口
  226. *
  227. * @param string $text - 评论内容(GBK编码),最大10240字节
  228. * @param array $options - 可选参数对象,key: value都为string类型
  229. * @description options列表:
  230. * type 评论行业类型,默认为4(餐饮美食)
  231. * @return array
  232. */
  233. public function commentTag($text, $options = array())
  234. {
  235. $data = array();
  236. $data['text'] = $text;
  237. $data = array_merge($data, $options);
  238. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  239. return $this->request($this->commentTagUrl, $data);
  240. }
  241. /**
  242. * 情感倾向分析接口
  243. *
  244. * @param string $text - 文本内容(GBK编码),最大102400字节
  245. * @param array $options - 可选参数对象,key: value都为string类型
  246. * @description options列表:
  247. * @return array
  248. */
  249. public function sentimentClassify($text, $options = array())
  250. {
  251. $data = array();
  252. $data['text'] = $text;
  253. $data = array_merge($data, $options);
  254. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  255. return $this->request($this->sentimentClassifyUrl, $data);
  256. }
  257. /**
  258. * 文章标签接口
  259. *
  260. * @param string $title - 篇章的标题,最大80字节
  261. * @param string $content - 篇章的正文,最大65535字节
  262. * @param array $options - 可选参数对象,key: value都为string类型
  263. * @description options列表:
  264. * @return array
  265. */
  266. public function keyword($title, $content, $options = array())
  267. {
  268. $data = array();
  269. $data['title'] = $title;
  270. $data['content'] = $content;
  271. $data = array_merge($data, $options);
  272. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  273. return $this->request($this->keywordUrl, $data);
  274. }
  275. /**
  276. * 文章分类接口
  277. *
  278. * @param string $title - 篇章的标题,最大80字节
  279. * @param string $content - 篇章的正文,最大65535字节
  280. * @param array $options - 可选参数对象,key: value都为string类型
  281. * @description options列表:
  282. * @return array
  283. */
  284. public function topic($title, $content, $options = array())
  285. {
  286. $data = array();
  287. $data['title'] = $title;
  288. $data['content'] = $content;
  289. $data = array_merge($data, $options);
  290. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  291. return $this->request($this->topicUrl, $data);
  292. }
  293. /**
  294. * 文本纠错接口
  295. *
  296. * @param string $text - 待纠错文本,输入限制511字节
  297. * @param array $options - 可选参数对象,key: value都为string类型
  298. * @description options列表:
  299. * @return array
  300. */
  301. public function ecnet($text, $options = array())
  302. {
  303. $data = array();
  304. $data['text'] = $text;
  305. $data = array_merge($data, $options);
  306. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  307. return $this->request($this->ecnetUrl, $data);
  308. }
  309. /**
  310. * 对话情绪识别接口接口
  311. *
  312. * @param string $text - 待识别情感文本,输入限制512字节
  313. * @param array $options - 可选参数对象,key: value都为string类型
  314. * @description options列表:
  315. * scene default(默认项-不区分场景),talk(闲聊对话-如度秘聊天等),task(任务型对话-如导航对话等),customer_service(客服对话-如电信/银行客服等)
  316. * @return array
  317. */
  318. public function emotion($text, $options = array())
  319. {
  320. $data = array();
  321. $data['text'] = $text;
  322. $data = array_merge($data, $options);
  323. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  324. return $this->request($this->emotionUrl, $data);
  325. }
  326. /**
  327. * 新闻摘要接口接口
  328. *
  329. * @param string $content - 字符串(限3000字符数以内)字符串仅支持GBK编码,长度需小于3000字符数(即6000字节),请输入前确认字符数没有超限,若字符数超长会返回错误。正文中如果包含段落信息,请使用"\n"分隔,段落信息算法中有重要的作用,请尽量保留
  330. * @param integer $maxSummaryLen - 此数值将作为摘要结果的最大长度。例如:原文长度1000字,本参数设置为150,则摘要结果的最大长度是150字;推荐最优区间:200-500字
  331. * @param array $options - 可选参数对象,key: value都为string类型
  332. * @description options列表:
  333. * title 字符串(限200字符数)字符串仅支持GBK编码,长度需小于200字符数(即400字节),请输入前确认字符数没有超限,若字符数超长会返回错误。标题在算法中具有重要的作用,若文章确无标题,输入参数的“标题”字段为空即可
  334. * @return array
  335. */
  336. public function newsSummary($content, $maxSummaryLen, $options = array())
  337. {
  338. $data = array();
  339. $data['content'] = $content;
  340. $data['max_summary_len'] = $maxSummaryLen;
  341. $data = array_merge($data, $options);
  342. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  343. return $this->request($this->newsSummaryUrl, $data);
  344. }
  345. /**
  346. * 地址识别接口接口
  347. *
  348. * @param string $text - 待识别的文本内容,不超过1000字节
  349. * @param array $options - 可选参数对象,key: value都为string类型
  350. * @description options列表:
  351. * @return array
  352. */
  353. public function address($text, $options = array())
  354. {
  355. $data = array();
  356. $data['text'] = $text;
  357. $data = array_merge($data, $options);
  358. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  359. return $this->request($this->addressUrl, $data);
  360. }
  361. }