fix:修复BUG/升级1.1.6版本

This commit is contained in:
Ying
2023-04-25 20:11:49 +08:00
parent 445e5f9662
commit 6a6866bbaf
2357 changed files with 456920 additions and 140567 deletions

View File

@@ -1,9 +1 @@
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: overtrue
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
custom: # Replace with a single custom sponsorship URL
github: [overtrue]

View File

@@ -0,0 +1,22 @@
name: Test
on: [push, pull_request]
jobs:
phpunit:
name: PHP-${{ matrix.php_version }}-${{ matrix.perfer }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
php_version:
- 8.0
- 8.1
perfer:
- stable
- lowest
steps:
- uses: actions/checkout@master
- name: Install Dependencies
run: composer update --prefer-dist --no-interaction --no-suggest --prefer-${{ matrix.perfer }}
- name: Run PHPUnit
run: ./vendor/bin/phpunit

View File

@@ -0,0 +1,49 @@
<?php
return (new PhpCsFixer\Config())
->setRules([
'@PSR12' => true,
'binary_operator_spaces' => true,
'blank_line_after_opening_tag' => true,
'compact_nullable_typehint' => true,
'declare_equal_normalize' => true,
'lowercase_cast' => true,
'lowercase_static_reference' => true,
'new_with_braces' => true,
'no_blank_lines_after_class_opening' => true,
'no_leading_import_slash' => true,
'no_whitespace_in_blank_line' => true,
'no_unused_imports' => true,
'ordered_class_elements' => [
'order' => [
'use_trait',
],
],
'ordered_imports' => [
'imports_order' => [
'class',
'function',
'const',
],
'sort_algorithm' => 'none',
],
'return_type_declaration' => true,
'short_scalar_cast' => true,
'single_blank_line_before_namespace' => true,
'single_trait_insert_per_statement' => true,
'ternary_operator_spaces' => true,
'unary_operator_spaces' => true,
'visibility_required' => [
'elements' => [
'const',
'method',
'property',
],
],
])
->setFinder(
PhpCsFixer\Finder::create()
->exclude('vendor')
->in([__DIR__.'/src/', __DIR__.'/tests/'])
)
;

View File

@@ -1,124 +1,212 @@
<h1 align="center">Pinyin</h1>
# Pinyin
<p align="center">
[![Build Status](https://travis-ci.org/overtrue/pinyin.svg?branch=master)](https://travis-ci.org/overtrue/pinyin)
[![Test](https://github.com/overtrue/pinyin/actions/workflows/test.yml/badge.svg)](https://github.com/overtrue/pinyin/actions/workflows/test.yml)
[![Latest Stable Version](https://poser.pugx.org/overtrue/pinyin/v/stable.svg)](https://packagist.org/packages/overtrue/pinyin) [![Total Downloads](https://poser.pugx.org/overtrue/pinyin/downloads.svg)](https://packagist.org/packages/overtrue/pinyin) [![Latest Unstable Version](https://poser.pugx.org/overtrue/pinyin/v/unstable.svg)](https://packagist.org/packages/overtrue/pinyin) [![License](https://poser.pugx.org/overtrue/pinyin/license.svg)](https://packagist.org/packages/overtrue/pinyin)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/overtrue/pinyin/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/overtrue/pinyin/?branch=master)
[![Code Coverage](https://scrutinizer-ci.com/g/overtrue/pinyin/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/overtrue/pinyin/?branch=master)
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fovertrue%2Fpinyin.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fovertrue%2Fpinyin?ref=badge_shield)
</p>
:cn: 基于 [CC-CEDICT](http://cc-cedict.org/wiki/) 词典的中文转拼音工具,更准确的支持多音字的汉字转拼音解决方案。
:cn: 基于 [mozillazg/pinyin-data](https://github.com/mozillazg/pinyin-data) 词典的中文转拼音工具,更准确的支持多音字的汉字转拼音解决方案。
[喜欢我的项目?点击这里支持我](https://github.com/sponsors/overtrue)
## 安装
使用 Composer 安装:
```
$ composer require "overtrue/pinyin:~4.0"
``` bash
composer require overtrue/pinyin:^5.0
```
## 使用
可选转换方案:
### 拼音风格
- 内存型,适用于服务器内存空间较富余,优点:转换快
- 小内存型(默认),适用于内存比较紧张的环境,优点:占用内存小,转换不如内存型快
- I/O型适用于虚拟机内存限制比较严格环境。优点非常微小内存消耗。缺点转换慢不如内存型转换快,php >= 5.5
除了获取首字母的方法外,所有方法都支持第二个参数,用于指定拼音的格式,可选值为:
## 可用选项:
- `symbol` (默认)声调符号,例如 `pīn yīn`
- `none` 不输出拼音,例如 `pin yin`
- `number` 末尾数字模式的拼音,例如 `pin1 yin1`
| 选项 | 描述 |
| ------------- | ---------------------------------------------------|
| `PINYIN_TONE` | UNICODE 式音调:`měi hǎo` |
| `PINYIN_ASCII_TONE` | 带数字式音调: `mei3 hao3` |
| `PINYIN_NO_TONE` | 无音调:`mei hao` |
| `PINYIN_KEEP_NUMBER` | 保留数字 |
| `PINYIN_KEEP_ENGLISH` | 保留英文 |
| `PINYIN_KEEP_PUNCTUATION` | 保留标点 |
| `PINYIN_UMLAUT_V` | 使用 `v` 代替 `yu`, 例如:吕 `lyu` 将会转为 `lv` |
### 返回值
### 拼音数组
除了 `permalink` 返回字符串外,其它方法都返回集合类型 [`Overtrue\Pinyin\Collection`](https://github.com/overtrue/pinyin/blob/master/src/Collection.php)
```php
use Overtrue\Pinyin\Pinyin;
// 小内存型
$pinyin = new Pinyin(); // 默认
// 内存型
// $pinyin = new Pinyin('\\Overtrue\\Pinyin\\MemoryFileDictLoader');
// I/O型
// $pinyin = new Pinyin('\\Overtrue\\Pinyin\\GeneratorFileDictLoader');
$pinyin->convert('带着希望去旅行,比到达终点更美好');
// ["dai", "zhe", "xi", "wang", "qu", "lyu", "xing", "bi", "dao", "da", "zhong", "dian", "geng", "mei", "hao"]
$pinyin->convert('带着希望去旅行,比到达终点更美好', PINYIN_TONE);
// ["dài","zhe","xī","wàng","qù","lǚ","xíng","bǐ","dào","dá","zhōng","diǎn","gèng","měi","hǎo"]
$pinyin->convert('带着希望去旅行,比到达终点更美好', PINYIN_ASCII_TONE);
//["dai4","zhe","xi1","wang4","qu4","lyu3","xing2","bi3","dao4","da2","zhong1","dian3","geng4","mei3","hao3"]
$pinyin = Pinyin::sentence('你好,世界');
```
- 小内存型: 将字典分片载入内存
- 内存型: 将所有字典预先载入内存
- I/O型: 不载入内存将字典使用文件流打开逐行遍历并运用php5.5生成器(yield)特性分配单行内存
你可以通过以下方式访问集合内容:
```php
echo $pinyin; // nǐ hǎo shì jiè
// 直接将对象转成字符串
$string = (string) $pinyin; // nǐ hǎo shì jiè
$pinyin->toArray(); // ['nǐ', 'hǎo', 'shì', 'jiè']
// 直接使用索引访问
$pinyin[0]; // 'nǐ'
// 使用函数遍历
$pinyin->map('ucfirst'); // ['Nǐ', 'Hǎo', 'Shì', 'Jiè']
// 拼接为字符串
$pinyin->join(' '); // 'nǐ hǎo shì jiè'
$pinyin->join('-'); // 'nǐ-hǎo-shì-jiè'
// 转成 json
$pinyin->toJson(); // '["nǐ","hǎo","shì","jiè"]'
json_encode($pinyin); // '["nǐ","hǎo","shì","jiè"]'
```
### 文字段落转拼音
```php
use Overtrue\Pinyin\Pinyin;
echo Pinyin::sentence('带着希望去旅行,比到达终点更美好');
// dài zhe xī wàng qù lyu xíng bǐ dào dá zhōng diǎn gèng měi hǎo
// 去除声调
echo Pinyin::sentence('带着希望去旅行,比到达终点更美好', 'none');
dai zhe xi wang qu lyu xing bi dao da zhong dian geng mei hao
```
### 生成用于链接的拼音字符串
通常用于文章链接等,可以使用 `permalink` 方法获取拼音字符串:
```php
$pinyin->permalink('带着希望去旅行'); // dai-zhe-xi-wang-qu-lyu-xing
$pinyin->permalink('带着希望去旅行', '.'); // dai.zhe.xi.wang.qu.lyu.xing
echo Pinyin::permalink('带着希望去旅行'); // dai-zhe-xi-wang-qu-lyu-xing
echo Pinyin::permalink('带着希望去旅行', '.'); // dai.zhe.xi.wang.qu.lyu.xing
```
### 获取首字符字符串
```php
$pinyin->abbr('带着希望去旅行'); // dzxwqlx
$pinyin->abbr('带着希望去旅行', '-'); // d-z-x-w-q-l-x
$pinyin->abbr('你好2018', PINYIN_KEEP_NUMBER); // nh2018
$pinyin->abbr('Happy New Year! 2018', PINYIN_KEEP_ENGLISH); // HNY2018
```
### 翻译整段文字为拼音
将会保留中文字符:`,。 “ ” ` 并替换为对应的英文符号。
通常用于创建搜索用的索引,可以使用 `abbr` 方法转换:
```php
$pinyin->sentence('带着希望去旅行,比到达终点更美好!');
// dai zhe xi wang qu lyu xing, bi dao da zhong dian geng mei hao!
echo Pinyin::abbr('带着希望去旅行'); // d z x w q l x
echo Pinyin::abbr('带着希望去旅行')->join('-'); // d-z-x-w-q-l-x
$pinyin->sentence('带着希望去旅行,比到达终点更美好!', PINYIN_TONE);
// dài zhe xī wàng qù lǚ xíng, bǐ dào dá zhōng diǎn gèng měi hǎo!
echo Pinyin::abbr('你好2018')->join(''); // nh2018
echo Pinyin::abbr('Happy New Year! 2018')->join(''); // HNY2018
```
### 翻译姓名
**姓名首字母**
将首字作为姓氏转换,其余作为普通词语转换:
```php
echo Pinyin::nameAbbr('欧阳'); // o y
echo Pinyin::nameAbbr('单单单')->join('-'); // s-d-d
```
### 姓名转换
姓名的姓的读音有些与普通字不一样,比如 ‘单’ 常见的音为 `dan`,而作为姓的时候读 `shan`。
```php
$pinyin->name('单某某'); // ['shan', 'mou', 'mou']
$pinyin->name('单某某', PINYIN_TONE); // ["shàn","mǒu","mǒu"]
echo Pinyin::name('单某某'); // shàn mǒu mǒu
echo Pinyin::name('单某某', 'none'); // shan mou mou
echo Pinyin::name('单某某', 'none')->join('-'); // shan-mou-mou
```
更多使用请参考 [测试用例](https://github.com/overtrue/pinyin/blob/master/tests/AbstractDictLoaderTestCase.php)。
### 多音字
多音字的返回值为关联数组的集合:
```php
$pinyin = Pinyin::polyphones('重庆');
echo $pinyin['重']; // ["zhòng", "chóng", "tóng"]
echo $pinyin['庆']; // ["qìng"]
$pinyin->toArray();
// [
// "重": ["zhòng", "chóng", "tóng"],
// "庆": ["qìng"]
// ]
```
### 单字转拼音
和多音字类似,单字的返回值为字符串,多音字将根据该字字频调整得到常用音:
```php
$pinyin = Pinyin::polyphones('重庆');
echo $pinyin['重']; // "zhòng"
echo $pinyin['庆']; // "qìng"
$pinyin->toArray();
// [
// "重": "zhòng",
// "庆": "qìng"
// ]
```
> **Warning**
>
> 当单字处理时由于多音字来自词频表中取得常用音,所以在词语环境下可能出现不正确的情况,建议使用多音字处理。
更多使用请参考 [测试用例](https://github.com/overtrue/pinyin/blob/master/tests/PinyinTest.php)。
## 命令行工具
你可以使用命令行来实现拼音的转换:
```bash
php ./bin/pinyin 带着希望去旅行
# dài zhe xī wàng qù lyu xíng
```
更多使用方法,可以查看帮助文档:
```bash
php ./bin/pinyin --help
# Usage:
# ./pinyin [chinese] [method] [options]
# Options:
# -j, --json 输出 JSON 格式.
# -c, --compact 不格式化输出 JSON.
# -m, --method=[method] 转换方式可选name/phrase/permalink/polyphones/chars/nameAbbr/abbr/sentence.
# --no-tone 不使用音调.
# --tone-style=[style] 音调风格可选值default/none/number.
# -h, --help 显示帮助.
```
## 在 Laravel 中使用
独立的包在这里:[overtrue/laravel-pinyin](https://github.com/overtrue/laravel-pinyin)
## Contribution
欢迎提意见及完善补充词库 [`overtrue/pinyin-dictionary-maker`](https://github.com/overtrue/pinyin-dictionary-maker/tree/master/patches) :kiss:
欢迎提意见及完善补充词库:
- 单字拼音错误请添加到:[sources/pathes/chars.txt](https://github.com/overtrue/pinyin/blob/master/sources/pathes/chars.txt)
- 词语错误或补齐,请添加到:[sources/pathes/words.txt](https://github.com/overtrue/pinyin/blob/master/sources/pathes/words.txt)
## 参考
- [mozillazg/pinyin-data](https://github.com/mozillazg/pinyin-data)
- [详细参考资料](https://github.com/overtrue/pinyin-resources)
## :heart: Sponsor me
[![Sponsor me](https://github.com/overtrue/overtrue/blob/master/sponsor-me.svg?raw=true)](https://github.com/sponsors/overtrue)
如果你喜欢我的项目并想支持它,[点击这里 :heart:](https://github.com/sponsors/overtrue)
## Project supported by JetBrains
Many thanks to Jetbrains for kindly providing a license for me to work on this and other open-source projects.
[![](https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.svg)](https://www.jetbrains.com/?from=https://github.com/overtrue)
## PHP 扩展包开发
> 想知道如何从零开始构建 PHP 扩展包?
@@ -127,4 +215,4 @@ $pinyin->name('单某某', PINYIN_TONE); // ["shàn","mǒu","mǒu"]
# License
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fovertrue%2Fpinyin.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fovertrue%2Fpinyin?ref=badge_large)
MIT

96
vendor/overtrue/pinyin/bin/build vendored Normal file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/env php
<?php
require __DIR__ . '/utils.php';
$polyphones = explode(',', file_get_contents(__DIR__ . '/../sources/polyphones.txt'));
$charsSouce = __DIR__ . '/../sources/chars.txt';
$charsPathes = __DIR__.'/../sources/pathes/chars.txt';
$wordsSouce = __DIR__ . '/../sources/words.txt';
$wordsPathes = __DIR__ . '/../sources/pathes/words.txt';
$surnamesSource = file(__DIR__.'/../sources/surnames.txt');
if (!file_exists($charsSouce)) {
file_put_contents($charsSouce, file_get_contents('https://raw.githubusercontent.com/mozillazg/pinyin-data/master/pinyin.txt'));
}
if (!file_exists($wordsSouce)) {
file_put_contents($wordsSouce, file_get_contents('https://raw.githubusercontent.com/mozillazg/phrase-pinyin-data/master/large_pinyin.txt'));
}
// ------------------------------------------------
$surnames = [];
foreach ($surnamesSource as $line) {
[$surname, $pinyin] = explode(',', trim($line));
$surnames[trim($surname)] = join("\t", ["", ...preg_split('/\s+/', trim($pinyin)), ""]);
}
// ------------------------------------------------
// 单字,带多音
$charWithPolyphones = [];
// 单字,不带多音
$chars = [];
foreach (parse_chars($charsSouce) as $char => $pinyin) {
$charWithPolyphones[$char] = $pinyin;
$chars[$char] = "\t{$pinyin[0]}\t";
}
// 补丁部分
foreach (parse_chars($charsPathes, fn ($p) => "\t{$p[0]}\t") as $char => $pinyin) {
$chars[$char] = $pinyin;
}
// ------------------------------------------------
$words = [];
foreach (parse_words($wordsSouce) as $word => $pinyin) {
$wordChars = preg_split('//u', $word, -1, PREG_SPLIT_NO_EMPTY);
try {
$pinyinSegments = array_combine($wordChars, $pinyin);
} catch (Throwable $e) {
throw new Exception("行解析错误:$line");
}
// 多音字处理
$polyphoneChars = array_intersect_key($wordChars, $polyphones);
foreach ($polyphoneChars as $char) {
// 如果词里的任何一个多音字在词里的读音和常用读音不一致,则需要加入词典,否则抛弃该词
if (isset($charWithPolyphones[$char]) && $pinyinSegments[$char] != $charWithPolyphones[$char][0]) {
$words[$word] = join("\t", ["", ...$pinyin, ""]);
break;
}
}
}
foreach (parse_words($wordsPathes) as $word => $pinyin) {
$words[$word] = join("\t", ["", ...$pinyin, ""]);
}
// 清理
exec('rm -rf ' . __DIR__ . '/../data/*');
// 姓氏
file_put_contents(__DIR__ . '/../data/surnames.php', "<?php\nreturn ".var_export($surnames, true).";\n");
echo count($surnames)." surnames saved.\n";
// 单字:带多音
file_put_contents(__DIR__ . '/../data/chars.php', "<?php\nreturn ".var_export($charWithPolyphones, true).";\n");
echo count($chars)." chars saved.\n";
// 词:从长到短 + 单字
$words = array_merge($words, $chars);
uksort($words, fn ($a, $b) => strlen($b) <=> strlen($a));
foreach (array_chunk($words, 8000, true) as $index => $group) {
file_put_contents(__DIR__ . "/../data/words-{$index}.php", "<?php\nreturn ".var_export($group, true).";\n");
echo count($group)." words saved in ".__DIR__ . "/../data/words-{$index}.php \n";
}

84
vendor/overtrue/pinyin/bin/pinyin vendored Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env php
<?php
require __DIR__ . '/../vendor/autoload.php';
use Overtrue\Pinyin\Pinyin;
$input = $argv[1] ?? null;
$methods = explode(',', 'name,phrase,permalink,polyphones,chars,nameAbbr,abbr,sentence');
$method = 'sentence';
$inputOptions = [];
$help = <<<"HELP"
Usage:
./pinyin [chinese] [method] [options]
Options:
-j, --json 输出 JSON 格式.
-c, --compact 不格式化输出 JSON.
-m, --method=[method] 转换方式可选name/phrase/permalink/polyphones/chars/nameAbbr/abbr/sentence.
--no-tone 不使用音调.
--tone-style=[style] 音调风格可选值default/none/number.
-h, --help 显示帮助.
HELP;
foreach ($argv as $i => $arg) {
if ($i === 0) {
continue;
}
if (in_array($arg, $methods)) {
$method = $arg;
} elseif (str_starts_with($arg, '-')) {
[$key, $value] = array_pad(array_map('trim', explode('=', $arg, 2)), 2, null);
$inputOptions[$key] = $value;
}
}
function has_option($option, $alias = null): bool
{
global $inputOptions;
if ($alias) {
return array_key_exists($option, $inputOptions) || array_key_exists($alias, $inputOptions);
}
return array_key_exists($option, $inputOptions);
}
function get_option($option, $default = null, $alias = null): ?string
{
global $inputOptions;
if ($alias) {
return $inputOptions[$option] ?? $inputOptions[$alias] ?? $default;
}
return $inputOptions[$option] ?? $default;
}
if (empty($input) || has_option('--help', '-h')) {
echo $help;
exit(0);
}
if (has_option('--method', '-m')) {
$method = get_option('--method');
}
$toneStyle = has_option('--no-tone') ? 'none' : get_option('--tone-style', 'default');
$result = Pinyin::$method($input, $method === 'permalink' ? '-' : $toneStyle);
$toJson = has_option('--json', '-j') || in_array($method, ['polyphones']);
if ($toJson) {
$options = JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT;
if (has_option('--compact', '-c')) {
$options = 0;
}
$result = json_encode($result, $options);
}
echo $result, "\n";

44
vendor/overtrue/pinyin/bin/utils.php vendored Normal file
View File

@@ -0,0 +1,44 @@
<?php
/**
* @example
* <pre>
* // U+4E2D: zhōng,zhòng # 中
* </pre>
* @throws Exception
*/
function parse_chars(string $path, callable $fn = null): Generator
{
$fn ??= fn ($p) => $p;
foreach (file($path) as $line) {
preg_match('/^U\+(?<code>[0-9A-Z]+):\s+(?<pinyin>\S+)\s+#\s*(?<char>\S+)/', $line, $matched);
if ($matched && !empty($matched['pinyin'])) {
yield $matched['char'] => $fn(explode(',', $matched['pinyin']));
} elseif (!str_starts_with($line, '#')) {
throw new Exception("行解析错误:$line");
}
}
}
/**
* @example
* <pre>
* // 㞎㞎: bǎ ba # 注释
* </pre>
*
* @throws Exception
*/
function parse_words(string $path, callable $fn = null): Generator
{
$fn ??= fn ($p) => $p;
foreach (file($path) as $line) {
preg_match('/^(?<word>[^#\s]+):\s+(?<pinyin>[\p{L} ]+)#?/u', $line, $matched);
if ($matched && !empty($matched['pinyin'])) {
yield $matched['word'] => $fn(explode(' ', trim($matched['pinyin'])));
}
}
}

View File

@@ -1,68 +1,69 @@
{
"name": "overtrue/pinyin",
"description": "Chinese to pinyin translator.",
"keywords": [
"chinese",
"pinyin",
"cn2pinyin"
],
"homepage": "https://github.com/overtrue/pinyin",
"license": "MIT",
"authors": [
{
"name": "overtrue",
"homepage": "http://github.com/overtrue",
"email": "anzhengchao@gmail.com"
}
],
"autoload": {
"psr-4": {
"Overtrue\\Pinyin\\": "src/"
},
"files": ["src/const.php"]
},
"autoload-dev": {
"psr-4": {
"Overtrue\\Pinyin\\Test\\": "tests/"
}
},
"require": {
"php":">=7.1"
},
"require-dev": {
"phpunit/phpunit": "~8.0",
"brainmaestro/composer-git-hooks": "^2.7",
"friendsofphp/php-cs-fixer": "^2.16"
},
"extra": {
"hooks": {
"pre-commit": [
"composer test",
"composer fix-style"
],
"pre-push": [
"composer test",
"composer check-style"
]
}
},
"scripts": {
"post-update-cmd": [
"cghooks update"
],
"post-merge": "composer install",
"post-install-cmd": [
"cghooks add --ignore-lock",
"cghooks update"
],
"cghooks": "vendor/bin/cghooks",
"check-style": "php-cs-fixer fix --using-cache=no --diff --config=.php_cs --dry-run --ansi",
"fix-style": "php-cs-fixer fix --using-cache=no --config=.php_cs --ansi",
"test": "vendor/bin/phpunit --colors=always"
},
"scripts-descriptions": {
"test": "Run all tests.",
"check-style": "Run style checks (only dry run - no fixing!).",
"fix-style": "Run style checks and fix violations."
"name": "overtrue/pinyin",
"description": "Chinese to pinyin translator.",
"keywords": [
"chinese",
"pinyin",
"cn2pinyin"
],
"homepage": "https://github.com/overtrue/pinyin",
"license": "MIT",
"authors": [
{
"name": "overtrue",
"homepage": "http://github.com/overtrue",
"email": "anzhengchao@gmail.com"
}
],
"autoload": {
"psr-4": {
"Overtrue\\Pinyin\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"Overtrue\\Pinyin\\Tests\\": "tests/"
}
},
"require": {
"php": ">=8.0.2"
},
"require-dev": {
"phpunit/phpunit": "^9.5",
"brainmaestro/composer-git-hooks": "^2.7",
"friendsofphp/php-cs-fixer": "^3.2",
"nunomaduro/termwind": "^1.13"
},
"extra": {
"hooks": {
"pre-commit": [
"composer test",
"composer fix-style"
],
"pre-push": [
"composer test",
"composer check-style"
]
}
},
"scripts": {
"post-update-cmd": [
"cghooks update"
],
"post-merge": "composer install",
"post-install-cmd": [
"cghooks add --ignore-lock",
"cghooks update"
],
"cghooks": "vendor/bin/cghooks",
"check-style": "php-cs-fixer fix --using-cache=no --diff --dry-run --ansi",
"fix-style": "php-cs-fixer fix --using-cache=no --ansi",
"test": "vendor/bin/phpunit --colors=always",
"build": "php ./bin/build"
},
"scripts-descriptions": {
"test": "Run all tests.",
"check-style": "Run style checks (only dry run - no fixing!).",
"fix-style": "Run style checks and fix violations."
}
}

177507
vendor/overtrue/pinyin/data/chars.php vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,86 +0,0 @@
<?php
return array (
'万俟' => ' mò qí',
'尉迟' => ' yù chí',
'单于' => ' chán yú',
'不' => ' fǒu',
'沈' => ' shěn',
'称' => ' chēng',
'车' => ' chē',
'万' => ' wàn',
'汤' => ' tāng',
'阿' => ' ā',
'丁' => ' dīng',
'强' => ' qiáng',
'仇' => ' qiú',
'叶' => ' yè',
'阚' => ' kàn',
'乐' => ' yuè',
'乜' => ' niè',
'陆' => ' lù',
'殷' => ' yīn',
'牟' => ' móu',
'区' => ' ōu',
'宿' => ' sù',
'俞' => ' yú',
'余' => ' yú',
'齐' => ' qí',
'许' => ' xǔ',
'信' => ' xìn',
'无' => ' wú',
'浣' => ' wǎn',
'艾' => ' ài',
'浅' => ' qiǎn',
'烟' => ' yān',
'蓝' => ' lán',
'於' => ' yú',
'寻' => ' xún',
'殳' => ' shū',
'思' => ' sī',
'鸟' => ' niǎo',
'卜' => ' bǔ',
'单' => ' shàn',
'南' => ' nán',
'柏' => ' bǎi',
'朴' => ' piáo',
'繁' => ' pó',
'曾' => ' zēng',
'瞿' => ' qú',
'缪' => ' miào',
'石' => ' shí',
'冯' => ' féng',
'覃' => ' qín',
'幺' => ' yāo',
'种' => ' chóng',
'折' => ' shè',
'燕' => ' yān',
'纪' => ' jǐ',
'过' => ' guō',
'华' => ' huà',
'冼' => ' xiǎn',
'秘' => ' bì',
'重' => ' chóng',
'解' => ' xiè',
'那' => ' nā',
'和' => ' hé',
'贾' => ' jiǎ',
'塔' => ' tǎ',
'盛' => ' shèng',
'查' => ' zhā',
'盖' => ' gě',
'居' => ' jū',
'哈' => ' hǎ',
'的' => ' dē',
'薄' => ' bó',
'佴' => ' nài',
'六' => ' lù',
'都' => ' dū',
'翟' => ' zhái',
'扎' => ' zā',
'藏' => ' zàng',
'粘' => ' niàn',
'难' => ' nàn',
'若' => ' ruò',
'貟' => ' yùn',
'贠' => ' yùn',
);

View File

@@ -0,0 +1,86 @@
<?php
return array (
'万俟' => ' mò qí ',
'尉迟' => ' yù chí ',
'单于' => ' chán yú ',
'重' => ' chóng ',
'秘' => ' bì ',
'冼' => ' xiǎn ',
'华' => ' huà ',
'过' => ' guō ',
'纪' => ' jǐ ',
'燕' => ' yān ',
'种' => ' chóng ',
'繁' => ' pó ',
'幺' => ' yāo ',
'覃' => ' qín ',
'冯' => ' féng ',
'石' => ' shí ',
'缪' => ' miào ',
'瞿' => ' qú ',
'曾' => ' zēng ',
'解' => ' xiè ',
'折' => ' shè ',
'那' => ' nā ',
'佴' => ' nài ',
'难' => ' nàn ',
'粘' => ' niàn ',
'藏' => ' zàng ',
'扎' => ' zā ',
'翟' => ' zhái ',
'都' => ' dū ',
'六' => ' lù ',
'薄' => ' bó ',
'贾' => ' jiǎ ',
'的' => ' dē ',
'哈' => ' hǎ ',
'居' => ' jū ',
'盖' => ' gě ',
'查' => ' zhā ',
'盛' => ' shèng ',
'塔' => ' tǎ ',
'和' => ' hé ',
'柏' => ' bǎi ',
'朴' => ' piáo ',
'蓝' => ' lán ',
'牟' => ' móu ',
'殷' => ' yīn ',
'陆' => ' lù ',
'乜' => ' niè ',
'乐' => ' yuè ',
'阚' => ' kàn ',
'叶' => ' yè ',
'强' => ' qiáng ',
'不' => ' fǒu ',
'丁' => ' dīng ',
'阿' => ' ā ',
'汤' => ' tāng ',
'万' => ' wàn ',
'车' => ' chē ',
'称' => ' chēng ',
'沈' => ' shěn ',
'区' => ' ōu ',
'仇' => ' qiú ',
'宿' => ' sù ',
'南' => ' nán ',
'单' => ' shàn ',
'卜' => ' bǔ ',
'鸟' => ' niǎo ',
'思' => ' sī ',
'殳' => ' shū ',
'寻' => ' xún ',
'於' => ' yú ',
'烟' => ' yān ',
'余' => ' yú ',
'浅' => ' qiǎn ',
'艾' => ' ài ',
'浣' => ' wǎn ',
'无' => ' wú ',
'信' => ' xìn ',
'许' => ' xǔ ',
'齐' => ' qí ',
'俞' => ' yú ',
'若' => ' ruò ',
'贠' => ' yùn ',
'貟' => ' yùn ',
);

8003
vendor/overtrue/pinyin/data/words-0.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-1.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-2.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-3.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-4.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-5.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-6.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-7.php vendored Normal file

File diff suppressed because it is too large Load Diff

8003
vendor/overtrue/pinyin/data/words-8.php vendored Normal file

File diff suppressed because it is too large Load Diff

1503
vendor/overtrue/pinyin/data/words-9.php vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

6
vendor/overtrue/pinyin/phpstan.neon vendored Normal file
View File

@@ -0,0 +1,6 @@
parameters:
level: 9
paths:
- src
inferPrivatePropertyTypeFromConstructor: true
checkMissingIterableValueType: false

View File

@@ -0,0 +1,75 @@
<?php
namespace Overtrue\Pinyin;
use ArrayAccess;
use JsonSerializable;
use Stringable;
class Collection implements ArrayAccess, JsonSerializable, Stringable
{
public function __construct(protected $items = [])
{
}
public function join(string $separator = ' '): string
{
return implode($separator, \array_map(function ($item) {
return \is_array($item) ? '['.\implode(', ', $item).']' : $item;
}, $this->items));
}
public function map(callable $callback): Collection
{
return new static(array_map($callback, $this->all()));
}
public function all(): array
{
return $this->items;
}
public function toArray(): array
{
return $this->all();
}
public function toJson(int $options = 0): string
{
return json_encode($this->all(), $options);
}
public function __toString()
{
return $this->join();
}
public function offsetExists(mixed $offset): bool
{
return isset($this->items[$offset]);
}
public function offsetGet(mixed $offset): mixed
{
return $this->items[$offset] ?? null;
}
public function offsetSet(mixed $offset, mixed $value): void
{
if (null === $offset) {
$this->items[] = $value;
} else {
$this->items[$offset] = $value;
}
}
public function offsetUnset(mixed $offset): void
{
unset($this->items[$offset]);
}
public function jsonSerialize(): mixed
{
return $this->items;
}
}

252
vendor/overtrue/pinyin/src/Converter.php vendored Normal file
View File

@@ -0,0 +1,252 @@
<?php
namespace Overtrue\Pinyin;
class Converter
{
private const SEGMENTS_COUNT = 10;
private const WORDS_PATH = __DIR__.'/../data/words-%s.php';
private const CHARS_PATH = __DIR__.'/../data/chars.php';
private const SURNAMES_PATH = __DIR__.'/../data/surnames.php';
public const TONE_STYLE_SYMBOL = 'symbol';
public const TONE_STYLE_NUMBER = 'number';
public const TONE_STYLE_NONE = 'none';
protected bool $polyphonic = false;
protected bool $asSurname = false;
protected bool $noWords = false;
protected string $yuTo = 'yu';
protected string $toneStyle = self::TONE_STYLE_SYMBOL;
protected array $regexps = [
'separator' => '\p{Z}',
'mark' => '\p{M}',
'tab' => "\t"
];
public const REGEXPS = [
'number' => '0-9',
'alphabet' => 'a-zA-Z',
// 中文不带符号
'hans' => '\x{3007}\x{2E80}-\x{2FFF}\x{3100}-\x{312F}\x{31A0}-\x{31EF}\x{3400}-\x{4DBF}\x{4E00}-\x{9FFF}\x{F900}-\x{FAFF}',
// 符号: !"#$%&'()*+,-./:;<=>?@[\]^_{|}~`
'punctuation' => '\p{P}',
];
public function __construct()
{
$this->regexps = \array_merge($this->regexps, self::REGEXPS);
}
public static function make(): static
{
return new static();
}
public function polyphonic(): static
{
$this->polyphonic = true;
return $this;
}
public function surname(): static
{
$this->asSurname = true;
return $this;
}
public function noWords(): static
{
$this->noWords = true;
return $this;
}
public function onlyHans(): static
{
// 中文汉字不含符号
$this->regexps['hans'] = self::REGEXPS['hans'];
return $this->noAlpha()->noNumber()->noPunctuation();
}
public function noAlpha(): static
{
unset($this->regexps['alphabet']);
return $this;
}
public function noNumber(): static
{
unset($this->regexps['number']);
return $this;
}
public function noPunctuation(): static
{
unset($this->regexps['punctuation']);
return $this;
}
public function withToneStyle(string $toneStyle): static
{
$this->toneStyle = $toneStyle;
return $this;
}
public function noTone(): static
{
$this->toneStyle = self::TONE_STYLE_NONE;
return $this;
}
public function useNumberTone(): static
{
$this->toneStyle = self::TONE_STYLE_NUMBER;
return $this;
}
public function yuToV(): static
{
$this->yuTo = 'v';
return $this;
}
public function yuToU(): static
{
$this->yuTo = 'u';
return $this;
}
public function when(bool $condition, callable $callback): static
{
if ($condition) {
$callback($this);
}
return $this;
}
public function convert(string $string, callable $beforeSplit = null): Collection
{
// 把原有的数字和汉字分离,避免拼音转换时被误作声调
$string = preg_replace_callback('~[a-z0-9_-]+~i', function ($matches) {
return "\t" . $matches[0];
}, $string);
// 过滤掉不保留的字符
$string = \preg_replace(\sprintf('~[^%s]~u', \implode($this->regexps)), '', $string);
// 多音字
if ($this->polyphonic) {
return $this->convertAsChars($string, true);
}
if ($this->noWords) {
return $this->convertAsChars($string);
}
// 替换姓氏
if ($this->asSurname) {
$string = $this->convertSurname($string);
}
for ($i = 0; $i < self::SEGMENTS_COUNT; $i++) {
$string = strtr($string, require sprintf(self::WORDS_PATH, $i));
}
return $this->split($beforeSplit ? $beforeSplit($string) : $string);
}
public function convertAsChars(string $string, bool $polyphonic = false): Collection
{
$map = require self::CHARS_PATH;
// split string as chinese chars
$chars = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY);
$items = [];
foreach ($chars as $char) {
if (isset($map[$char])) {
if ($polyphonic) {
$items[$char] = \array_map(fn ($pinyin) => $this->formatTone($pinyin, $this->toneStyle), $map[$char]);
} else {
$items[$char] = $this->formatTone($map[$char][0], $this->toneStyle);
}
}
}
return new Collection($items);
}
protected function convertSurname(string $name): string
{
static $surnames = null;
$surnames ??= require self::SURNAMES_PATH;
foreach ($surnames as $surname => $pinyin) {
if (\str_starts_with($name, $surname)) {
return $pinyin . \mb_substr($name, \mb_strlen($surname));
}
}
return $name;
}
protected function split(string $item): Collection
{
$items = \array_values(array_filter(preg_split('/\s+/i', $item)));
foreach ($items as $index => $item) {
$items[$index] = $this->formatTone($item, $this->toneStyle);
}
return new Collection($items);
}
protected function formatTone(string $pinyin, string $style): string
{
$replacements = [
'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['yu', 1],
'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['yu', 2],
'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['yu', 3],
'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['yu', 4],
];
foreach ($replacements as $unicode => $replacement) {
if (\str_contains($pinyin, $unicode)) {
$umlaut = $replacement[0];
if ($umlaut !== 'yu' && $style === self::TONE_STYLE_SYMBOL) {
continue;
}
// https://zh.wikipedia.org/wiki/%C3%9C
if ($this->yuTo !== 'yu') {
$umlaut = $this->yuTo;
}
$pinyin = \str_replace($unicode, $umlaut, $pinyin);
if ($this->toneStyle === self::TONE_STYLE_NUMBER) {
$pinyin .= $replacement[1];
}
}
}
return $pinyin;
}
}

View File

@@ -1,42 +0,0 @@
<?php
/*
* This file is part of the overtrue/pinyin.
*
* (c) overtrue <i@overtrue.me>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace Overtrue\Pinyin;
use Closure;
/**
* Dict loader interface.
*/
interface DictLoaderInterface
{
/**
* Load dict.
*
* <pre>
* [
* '响应时间' => "[\t]xiǎng[\t]yìng[\t]shí[\t]jiān",
* '长篇连载' => '[\t]cháng[\t]piān[\t]lián[\t]zǎi',
* //...
* ]
* </pre>
*
* @param Closure $callback
*/
public function map(Closure $callback);
/**
* Load surname dict.
*
* @param Closure $callback
*/
public function mapSurname(Closure $callback);
}

View File

@@ -1,73 +0,0 @@
<?php
/*
* This file is part of the overtrue/pinyin.
*
* (c) overtrue <i@overtrue.me>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace Overtrue\Pinyin;
use Closure;
class FileDictLoader implements DictLoaderInterface
{
/**
* Words segment name.
*
* @var string
*/
protected $segmentName = 'words_%s';
/**
* Dict path.
*
* @var string
*/
protected $path;
/**
* Constructor.
*
* @param string $path
*/
public function __construct($path)
{
$this->path = $path;
}
/**
* Load dict.
*
* @param Closure $callback
*/
public function map(Closure $callback)
{
for ($i = 0; $i < 100; ++$i) {
$segment = $this->path . '/' . sprintf($this->segmentName, $i);
if (file_exists($segment)) {
$dictionary = (array) include $segment;
$callback($dictionary);
}
}
}
/**
* Load surname dict.
*
* @param Closure $callback
*/
public function mapSurname(Closure $callback)
{
$surnames = $this->path . '/surnames';
if (file_exists($surnames)) {
$dictionary = (array) include $surnames;
$callback($dictionary);
}
}
}

View File

@@ -1,142 +0,0 @@
<?php
/*
* This file is part of the overtrue/pinyin.
*
* (c) overtrue <i@overtrue.me>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace Overtrue\Pinyin;
use Closure;
use SplFileObject;
use Generator;
class GeneratorFileDictLoader implements DictLoaderInterface
{
/**
* Data directory.
*
* @var string
*/
protected $path;
/**
* Words segment name.
*
* @var string
*/
protected $segmentName = 'words_%s';
/**
* SplFileObjects.
*
* @var array
*/
protected static $handles = [];
/**
* surnames.
*
* @var SplFileObject
*/
protected static $surnamesHandle;
/**
* Constructor.
*
* @param string $path
*/
public function __construct($path)
{
$this->path = $path;
for ($i = 0; $i < 100; ++$i) {
$segment = $this->path . '/' . sprintf($this->segmentName, $i);
if (file_exists($segment) && is_file($segment)) {
array_push(static::$handles, $this->openFile($segment));
}
}
}
/**
* Construct a new file object.
*
* @param string $filename file path
* @param string $mode file open mode
*
* @return SplFileObject
*/
protected function openFile($filename, $mode = 'r')
{
return new SplFileObject($filename, $mode);
}
/**
* get Generator syntax.
*
* @param array $handles SplFileObjects
*
* @return Generator
*/
protected function getGenerator(array $handles)
{
foreach ($handles as $handle) {
$handle->seek(0);
while (false === $handle->eof()) {
$string = str_replace(['\'', ' ', PHP_EOL, ','], '', $handle->fgets());
if (false === strpos($string, '=>')) {
continue;
}
list($string, $pinyin) = explode('=>', $string);
yield $string => $pinyin;
}
}
}
/**
* Traverse the stream.
*
* @param Generator $generator
* @param Closure $callback
*
* @author Seven Du <shiweidu@outlook.com>
*/
protected function traversing(Generator $generator, Closure $callback)
{
foreach ($generator as $string => $pinyin) {
$callback([$string => $pinyin]);
}
}
/**
* Load dict.
*
* @param Closure $callback
*/
public function map(Closure $callback)
{
$this->traversing($this->getGenerator(static::$handles), $callback);
}
/**
* Load surname dict.
*
* @param Closure $callback
*/
public function mapSurname(Closure $callback)
{
if (!static::$surnamesHandle instanceof SplFileObject) {
static::$surnamesHandle = $this->openFile($this->path . '/surnames');
}
$this->traversing($this->getGenerator([static::$surnamesHandle]), $callback);
}
}

View File

@@ -1,93 +0,0 @@
<?php
/*
* This file is part of the overtrue/pinyin.
*
* (c) overtrue <i@overtrue.me>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace Overtrue\Pinyin;
use Closure;
class MemoryFileDictLoader implements DictLoaderInterface
{
/**
* Data directory.
*
* @var string
*/
protected $path;
/**
* Words segment name.
*
* @var string
*/
protected $segmentName = 'words_%s';
/**
* Segment files.
*
* @var array
*/
protected $segments = [];
/**
* Surname cache.
*
* @var array
*/
protected $surnames = [];
/**
* Constructor.
*
* @param string $path
*/
public function __construct($path)
{
$this->path = $path;
for ($i = 0; $i < 100; ++$i) {
$segment = $path . '/' . sprintf($this->segmentName, $i);
if (file_exists($segment)) {
$this->segments[] = (array) include $segment;
}
}
}
/**
* Load dict.
*
* @param Closure $callback
*/
public function map(Closure $callback)
{
foreach ($this->segments as $dictionary) {
$callback($dictionary);
}
}
/**
* Load surname dict.
*
* @param Closure $callback
*/
public function mapSurname(Closure $callback)
{
if (empty($this->surnames)) {
$surnames = $this->path . '/surnames';
if (file_exists($surnames)) {
$this->surnames = (array) include $surnames;
}
}
$callback($this->surnames);
}
}

View File

@@ -1,341 +1,85 @@
<?php
/*
* This file is part of the overtrue/pinyin.
*
* (c) overtrue <i@overtrue.me>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace Overtrue\Pinyin;
use InvalidArgumentException;
/**
* @method static Converter polyphonic()
* @method static Converter surname()
* @method static Converter noWords()
* @method static Converter onlyHans()
* @method static Converter noAlpha()
* @method static Converter noNumber()
* @method static Converter noPunctuation()
* @method static Converter noTone()
* @method static Converter useNumberTone()
* @method static Converter yuToV()
* @method static Converter yuToU()
* @method static Converter withToneStyle(string $toneStyle = 'symbol')
* @method static Collection convert(string $string, callable $beforeSplit = null)
*/
class Pinyin
{
/**
* Dict loader.
*
* @var \Overtrue\Pinyin\DictLoaderInterface
*/
protected $loader;
/**
* Punctuations map.
*
* @var array
*/
protected $punctuations = [
'' => ',',
'。' => '.',
'' => '!',
'' => '?',
'' => ':',
'“' => '"',
'”' => '"',
'' => "'",
'' => "'",
'_' => '_',
];
/**
* Constructor.
*
* @param string $loaderName
*/
public function __construct($loaderName = null)
public static function name(string $name, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
{
$this->loader = $loaderName ?: 'Overtrue\\Pinyin\\FileDictLoader';
return self::surname()->withToneStyle($toneStyle)->convert($name);
}
/**
* Convert string to pinyin.
*
* @param string $string
* @param int $option
*
* @return array
*/
public function convert($string, $option = PINYIN_DEFAULT)
public static function phrase(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
{
$pinyin = $this->romanize($string, $option);
return $this->splitWords($pinyin, $option);
return self::noPunctuation()->withToneStyle($toneStyle)->convert($string);
}
/**
* Convert string (person name) to pinyin.
*
* @param string $stringName
* @param int $option
*
* @return array
*/
public function name($stringName, $option = PINYIN_NAME)
public static function sentence(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
{
$option = $option | PINYIN_NAME;
$pinyin = $this->romanize($stringName, $option);
return $this->splitWords($pinyin, $option);
return self::withToneStyle($toneStyle)->convert($string);
}
/**
* Return a pinyin permalink from string.
*
* @param string $string
* @param string $delimiter
* @param int $option
*
* @return string
*/
public function permalink($string, $delimiter = '-', $option = PINYIN_DEFAULT)
public static function polyphones(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
{
if (\is_int($delimiter)) {
list($option, $delimiter) = [$delimiter, '-'];
}
return self::polyphonic()->withToneStyle($toneStyle)->convert($string);
}
public static function chars(string $string, string $toneStyle = Converter::TONE_STYLE_SYMBOL): Collection
{
return self::onlyHans()->noWords()->withToneStyle($toneStyle)->convert($string);
}
public static function permalink(string $string, string $delimiter = '-'): string
{
if (!in_array($delimiter, ['_', '-', '.', ''], true)) {
throw new InvalidArgumentException("Delimiter must be one of: '_', '-', '', '.'.");
}
return implode($delimiter, $this->convert($string, $option | \PINYIN_KEEP_NUMBER | \PINYIN_KEEP_ENGLISH));
return self::noPunctuation()->noTone()->convert($string)->join($delimiter);
}
/**
* Return first letters.
*
* @param string $string
* @param string $delimiter
* @param int $option
*
* @return string
*/
public function abbr($string, $delimiter = '', $option = PINYIN_DEFAULT)
public static function nameAbbr(string $string): Collection
{
if (\is_int($delimiter)) {
list($option, $delimiter) = [$delimiter, ''];
return self::abbr($string, true);
}
public static function abbr(string $string, bool $asName = false): Collection
{
return self::noTone()
->noPunctuation()
->when($asName, fn ($c) => $c->surname())
->convert($string)
->map(function ($pinyin) {
// 常用于电影名称入库索引处理例如《晚娘2012》-> WN2012
return \is_numeric($pinyin) || preg_match('/\d{2,}/', $pinyin) ? $pinyin : \mb_substr($pinyin, 0, 1);
});
}
public static function __callStatic(string $name, array $arguments)
{
$converter = Converter::make();
if (\method_exists($converter, $name)) {
return $converter->$name(...$arguments);
}
return implode($delimiter, array_map(function ($pinyin) {
return \is_numeric($pinyin) || preg_match('/\d+/', $pinyin) ? $pinyin : mb_substr($pinyin, 0, 1);
}, $this->convert($string, $option | PINYIN_NO_TONE)));
}
/**
* Chinese phrase to pinyin.
*
* @param string $string
* @param string $delimiter
* @param int $option
*
* @return string
*/
public function phrase($string, $delimiter = ' ', $option = PINYIN_DEFAULT)
{
if (\is_int($delimiter)) {
list($option, $delimiter) = [$delimiter, ' '];
}
return implode($delimiter, $this->convert($string, $option));
}
/**
* Chinese to pinyin sentence.
*
* @param string $string
* @param string $delimiter
* @param int $option
*
* @return string
*/
public function sentence($string, $delimiter = ' ', $option = \PINYIN_NO_TONE)
{
if (\is_int($delimiter)) {
list($option, $delimiter) = [$delimiter, ' '];
}
return implode($delimiter, $this->convert($string, $option | \PINYIN_KEEP_PUNCTUATION | \PINYIN_KEEP_ENGLISH | \PINYIN_KEEP_NUMBER));
}
/**
* Loader setter.
*
* @param \Overtrue\Pinyin\DictLoaderInterface $loader
*
* @return $this
*/
public function setLoader(DictLoaderInterface $loader)
{
$this->loader = $loader;
return $this;
}
/**
* Return dict loader,.
*
* @return \Overtrue\Pinyin\DictLoaderInterface
*/
public function getLoader()
{
if (!($this->loader instanceof DictLoaderInterface)) {
$dataDir = dirname(__DIR__) . '/data/';
$loaderName = $this->loader;
$this->loader = new $loaderName($dataDir);
}
return $this->loader;
}
/**
* Convert Chinese to pinyin.
*
* @param string $string
* @param int $option
*
* @return string
*/
protected function romanize($string, $option = \PINYIN_DEFAULT)
{
$string = $this->prepare($string, $option);
$dictLoader = $this->getLoader();
if ($this->hasOption($option, \PINYIN_NAME)) {
$string = $this->convertSurname($string, $dictLoader);
}
$dictLoader->map(function ($dictionary) use (&$string) {
$string = strtr($string, $dictionary);
});
return $string;
}
/**
* Convert Chinese Surname to pinyin.
*
* @param string $string
* @param \Overtrue\Pinyin\DictLoaderInterface $dictLoader
*
* @return string
*/
protected function convertSurname($string, $dictLoader)
{
$dictLoader->mapSurname(function ($dictionary) use (&$string) {
foreach ($dictionary as $surname => $pinyin) {
if (0 === strpos($string, $surname)) {
$string = $pinyin . mb_substr($string, mb_strlen($surname, 'UTF-8'), mb_strlen($string, 'UTF-8') - 1, 'UTF-8');
break;
}
}
});
return $string;
}
/**
* Split pinyin string to words.
*
* @param string $pinyin
* @param string $option
*
* @return array
*/
protected function splitWords($pinyin, $option)
{
$split = array_filter(preg_split('/\s+/i', $pinyin));
if (!$this->hasOption($option, PINYIN_TONE)) {
foreach ($split as $index => $pinyin) {
$split[$index] = $this->formatTone($pinyin, $option);
}
}
return array_values($split);
}
/**
* @param int $option
* @param int $check
*
* @return bool
*/
public function hasOption($option, $check)
{
return ($option & $check) === $check;
}
/**
* Pre-process.
*
* @param string $string
* @param int $option
*
* @return string
*/
protected function prepare($string, $option = \PINYIN_DEFAULT)
{
$string = preg_replace_callback('~[a-z0-9_-]+~i', function ($matches) {
return "\t" . $matches[0];
}, $string);
$regex = ['\p{Han}', '\p{Z}', '\p{M}', "\t"];
if ($this->hasOption($option, \PINYIN_KEEP_NUMBER)) {
\array_push($regex, '0-9');
}
if ($this->hasOption($option, \PINYIN_KEEP_ENGLISH)) {
\array_push($regex, 'a-zA-Z');
}
if ($this->hasOption($option, \PINYIN_KEEP_PUNCTUATION)) {
$punctuations = array_merge($this->punctuations, ["\t" => ' ', ' ' => ' ']);
$string = trim(str_replace(array_keys($punctuations), $punctuations, $string));
\array_push($regex, preg_quote(implode(array_merge(array_keys($this->punctuations), $this->punctuations)), '~'));
}
return preg_replace(\sprintf('~[^%s]~u', implode($regex)), '', $string);
}
/**
* Format.
*
* @param string $pinyin
* @param int $option
*
* @return string
*/
protected function formatTone($pinyin, $option = \PINYIN_NO_TONE)
{
$replacements = [
'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['yu', 1],
'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['yu', 2],
'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['yu', 3],
'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['yu', 4],
];
foreach ($replacements as $unicode => $replacement) {
if (false !== strpos($pinyin, $unicode)) {
$umlaut = $replacement[0];
// https://zh.wikipedia.org/wiki/%C3%9C
if ($this->hasOption($option, \PINYIN_UMLAUT_V) && 'yu' == $umlaut) {
$umlaut = 'v';
}
$pinyin = str_replace($unicode, $umlaut, $pinyin) . ($this->hasOption($option, PINYIN_ASCII_TONE) ? $replacement[1] : '');
}
}
return $pinyin;
throw new InvalidArgumentException("Method {$name} does not exist.");
}
}

View File

@@ -1,20 +0,0 @@
<?php
/*
* This file is part of the overtrue/pinyin.
*
* (c) overtrue <i@overtrue.me>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
define('PINYIN_DEFAULT', 4096);
define('PINYIN_TONE', 2);
define('PINYIN_NO_TONE', 4);
define('PINYIN_ASCII_TONE', 8);
define('PINYIN_NAME', 16);
define('PINYIN_KEEP_NUMBER', 32);
define('PINYIN_KEEP_ENGLISH', 64);
define('PINYIN_UMLAUT_V', 128);
define('PINYIN_KEEP_PUNCTUATION', 256);