first commit

This commit is contained in:
Mr.Qin
2022-08-19 19:48:37 +08:00
commit afdd648b65
3275 changed files with 631084 additions and 0 deletions

View File

@@ -0,0 +1,179 @@
<?php
declare (strict_types = 1);
/**
* php构建哈希表类.
* User: Lustre
* Date: 17/3/9
* Time: 上午9:10
**/
namespace SensitiveHelper;
class HashMap
{
/**
* 哈希表变量
*
* @var array|null
*/
protected $hashTable = array();
public function __construct()
{}
/**
* 向HashMap中添加一个键值对
*
* @param $key
* @param $value
* @return mixed|null
*/
public function put($key, $value)
{
if (!array_key_exists($key, $this->hashTable)) {
$this->hashTable[$key] = $value;
return null;
}
$_temp = $this->hashTable[$key];
$this->hashTable[$key] = $value;
return $_temp;
}
/**
* 根据key获取对应的value
*
* @param $key
* @return mixed|null
*/
public function get($key)
{
if (array_key_exists($key, $this->hashTable)) {
return $this->hashTable[$key];
}
return null;
}
/**
* 删除指定key的键值对
*
* @param $key
* @return mixed|null
*/
public function remove($key)
{
$temp_table = array();
if (array_key_exists($key, $this->hashTable)) {
$tempValue = $this->hashTable[$key];
while ($curValue = current($this->hashTable)) {
if (!(key($this->hashTable) == $key)) {
$temp_table[key($this->hashTable)] = $curValue;
}
next($this->hashTable);
}
$this->hashTable = null;
$this->hashTable = $temp_table;
return $tempValue;
}
return null;
}
/**
* 获取HashMap的所有键值
*
* @return array
*/
public function keys()
{
return array_keys($this->hashTable);
}
/**
* 获取HashMap的所有value值
*
* @return array
*/
public function values()
{
return array_values($this->hashTable);
}
/**
* 将一个HashMap的值全部put到当前HashMap中
*
* @param \DfaFilter\HashMap $map
*/
public function putAll($map)
{
if (!$map->isEmpty() && $map->size() > 0) {
$keys = $map->keys();
foreach ($keys as $key) {
$this->put($key, $map->get($key));
}
}
return;
}
/**
* 移除HashMap中所有元素
*
* @return bool
*/
public function removeAll()
{
$this->hashTable = null;
return true;
}
/**
* 判断HashMap中是否包含指定的值
*
* @param $value
* @return bool
*/
public function containsValue($value)
{
while ($curValue = current($this->hashTable)) {
if ($curValue == $value) {
return true;
}
next($this->hashTable);
}
return false;
}
/**
* 判断HashMap中是否包含指定的键key
*
* @param $key
* @return bool
*/
public function containsKey($key)
{
if (array_key_exists($key, $this->hashTable)) {
return true;
} else {
return false;
}
}
/**
* 获取HashMap中元素个数
*
* @return int
*/
public function size()
{
return count($this->hashTable);
}
/**
* 判断HashMap是否为空
*
* @return bool
*/
public function isEmpty()
{
return (count($this->hashTable) == 0);
}
}

View File

@@ -0,0 +1,336 @@
<?php
declare (strict_types = 1);
/**
* 敏感词类库.
* User: Lustre
* Date: 17/3/9
* Time: 上午9:11
*/
namespace SensitiveHelper;
use app\common\controller\common\model\system\Tags;
class SensitiveHelper
{
/**
* 待检测语句长度
*
* @var int
*/
protected $contentLength = 0;
/**
* 敏感词单例
*
* @var object|null
*/
private static $_instance = null;
/**
* 敏感词库树
*
* @var HashMap|null
*/
protected $wordTree = null;
/**
* 存放待检测语句敏感词
*
* @var array|null
*/
protected static $badWordList = null;
/**
* 获取单例
*
* @return self
*/
public static function instance()
{
if (!self::$_instance instanceof self) {
self::$_instance = new self();
}
return self::$_instance;
}
/**
* 构建敏感词树【文件模式】
* @param string $filepath
* @return $this
* @throws \Exception
*/
public function setTreeByFile($filepath = null)
{
if (!file_exists($filepath)) {
throw new \Exception('没有词库');
}
// 词库树初始化
$this->wordTree = $this->wordTree ?: new HashMap();
foreach ($this->yieldToReadFile($filepath) as $word) {
$this->buildWordToTree(trim($word));
}
return $this;
}
/**
* 构建敏感词树【数组模式】
* @param null $sensitiveWords
* @return $this
* @throws \Exception
*/
public function setTree($sensitiveWords = null, bool $type = true)
{
// 默认从数据库读取
if (empty($sensitiveWords)) {
$sensitiveWords = Tags::where([
'type'=> $type,
'status'=> true,
])->column('name');
}
$this->wordTree = new HashMap();
foreach ($sensitiveWords as $word) {
$this->buildWordToTree($word);
}
return $this;
}
/**
* 检测文字中的敏感词
*
* @param string $content 待检测内容
* @param int $matchType 匹配类型 [默认为最小匹配规则]
* @param int $wordNum 需要获取的敏感词数量 [默认获取全部]
* @return array
*/
public function getBadWord($content, $matchType = 1, $wordNum = 0)
{
$this->contentLength = mb_strlen($content, 'utf-8');
$badWordList = array();
for ($length = 0; $length < $this->contentLength; $length++) {
$matchFlag = 0;
$flag = false;
$tempMap = $this->wordTree;
for ($i = $length; $i < $this->contentLength; $i++) {
$keyChar = mb_substr($content, $i, 1, 'utf-8');
// 获取指定节点树
$nowMap = $tempMap->get($keyChar);
// 不存在节点树,直接返回
if (empty($nowMap)) {
break;
}
// 存在,则判断是否为最后一个
$tempMap = $nowMap;
// 找到相应key偏移量+1
$matchFlag++;
// 如果为最后一个匹配规则,结束循环,返回匹配标识数
if (false === $nowMap->get('ending')) {
continue;
}
$flag = true;
// 最小规则,直接退出
if (1 === $matchType) {
break;
}
}
if (!$flag) {
$matchFlag = 0;
}
// 找到相应key
if ($matchFlag <= 0) {
continue;
}
$badWordList[] = mb_substr($content, $length, $matchFlag, 'utf-8');
// 有返回数量限制
if ($wordNum > 0 && count($badWordList) == $wordNum) {
return $badWordList;
}
// 需匹配内容标志位往后移
$length = $length + $matchFlag - 1;
}
return $badWordList;
}
/**
* 替换敏感字字符
*
* @param $content 文本内容
* @param string $replaceChar 替换字符
* @param bool $repeat true=>重复替换为敏感词相同长度的字符
* @param int $matchType
* @return mixed
*/
public function replace($content, $replaceChar = '', $repeat = false, $matchType = 1)
{
if (empty($content)) {
throw new \Exception('请填写检测的内容');
}
$badWordList = self::$badWordList ? self::$badWordList : $this->getBadWord($content, $matchType);
// 未检测到敏感词,直接返回
if (empty($badWordList)) {
return $content;
}
foreach ($badWordList as $badWord) {
$hasReplacedChar = $replaceChar;
if ($repeat) {
$hasReplacedChar = $this->dfaBadWordConversChars($badWord, $replaceChar);
}
$content = str_replace($badWord, $hasReplacedChar, $content);
}
return $content;
}
/**
* 标记敏感词
* @param $content 文本内容
* @param string $sTag 标签开头,如<mark>
* @param string $eTag 标签结束,如</mark>
* @param int $matchType
* @return mixed
*/
public function mark($content, $sTag, $eTag, $matchType = 1)
{
if (empty($content)) {
throw new \Exception('请填写检测的内容');
}
$badWordList = self::$badWordList ? self::$badWordList : $this->getBadWord($content, $matchType);
// 未检测到敏感词,直接返回
if (empty($badWordList)) {
return $content;
}
foreach ($badWordList as $badWord) {
$replaceChar = $sTag . $badWord . $eTag;
$content = str_replace($badWord, $replaceChar, $content);
}
return $content;
}
/**
* 被检测内容是否合法
* @param $content
* @return bool
*/
public function islegal($content)
{
$this->contentLength = mb_strlen($content, 'utf-8');
for ($length = 0; $length < $this->contentLength; $length++) {
$matchFlag = 0;
$tempMap = $this->wordTree;
for ($i = $length; $i < $this->contentLength; $i++) {
$keyChar = mb_substr($content, $i, 1, 'utf-8');
// 获取指定节点树
$nowMap = $tempMap->get($keyChar);
// 不存在节点树,直接返回
if (empty($nowMap)) {
break;
}
// 找到相应key偏移量+1
$tempMap = $nowMap;
$matchFlag++;
// 如果为最后一个匹配规则,结束循环,返回匹配标识数
if (false === $nowMap->get('ending')) {
continue;
}
return true;
}
// 找到相应key
if ($matchFlag <= 0) {
continue;
}
// 需匹配内容标志位往后移
$length = $length + $matchFlag - 1;
}
return false;
}
protected function yieldToReadFile($filepath)
{
$fp = fopen($filepath, 'r');
while (!feof($fp)) {
yield fgets($fp);
}
fclose($fp);
}
// 将单个敏感词构建成树结构
protected function buildWordToTree($word = '')
{
if ('' === $word) {
return;
}
$tree = $this->wordTree;
$wordLength = mb_strlen($word, 'utf-8');
for ($i = 0; $i < $wordLength; $i++) {
$keyChar = mb_substr($word, $i, 1, 'utf-8');
// 获取子节点树结构
$tempTree = $tree->get($keyChar);
if ($tempTree) {
$tree = $tempTree;
} else {
// 设置标志位
$newTree = new HashMap();
$newTree->put('ending', false);
// 添加到集合
$tree->put($keyChar, $newTree);
$tree = $newTree;
}
// 到达最后一个节点
if ($i == $wordLength - 1) {
$tree->put('ending', true);
}
}
return;
}
/**
* 敏感词替换为对应长度的字符
* @param $word
* @param $char
* @return string
*/
protected function dfaBadWordConversChars($word, $char)
{
$str = '';
$length = mb_strlen($word, 'utf-8');
for ($counter = 0; $counter < $length; ++$counter) {
$str .= $char;
}
return $str;
}
}