fix:修复BUG/升级1.1.6版本

This commit is contained in:
Ying
2023-04-25 20:11:49 +08:00
parent 445e5f9662
commit 6a6866bbaf
2357 changed files with 456920 additions and 140567 deletions

96
vendor/overtrue/pinyin/bin/build vendored Normal file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/env php
<?php
require __DIR__ . '/utils.php';
$polyphones = explode(',', file_get_contents(__DIR__ . '/../sources/polyphones.txt'));
$charsSouce = __DIR__ . '/../sources/chars.txt';
$charsPathes = __DIR__.'/../sources/pathes/chars.txt';
$wordsSouce = __DIR__ . '/../sources/words.txt';
$wordsPathes = __DIR__ . '/../sources/pathes/words.txt';
$surnamesSource = file(__DIR__.'/../sources/surnames.txt');
if (!file_exists($charsSouce)) {
file_put_contents($charsSouce, file_get_contents('https://raw.githubusercontent.com/mozillazg/pinyin-data/master/pinyin.txt'));
}
if (!file_exists($wordsSouce)) {
file_put_contents($wordsSouce, file_get_contents('https://raw.githubusercontent.com/mozillazg/phrase-pinyin-data/master/large_pinyin.txt'));
}
// ------------------------------------------------
$surnames = [];
foreach ($surnamesSource as $line) {
[$surname, $pinyin] = explode(',', trim($line));
$surnames[trim($surname)] = join("\t", ["", ...preg_split('/\s+/', trim($pinyin)), ""]);
}
// ------------------------------------------------
// 单字,带多音
$charWithPolyphones = [];
// 单字,不带多音
$chars = [];
foreach (parse_chars($charsSouce) as $char => $pinyin) {
$charWithPolyphones[$char] = $pinyin;
$chars[$char] = "\t{$pinyin[0]}\t";
}
// 补丁部分
foreach (parse_chars($charsPathes, fn ($p) => "\t{$p[0]}\t") as $char => $pinyin) {
$chars[$char] = $pinyin;
}
// ------------------------------------------------
$words = [];
foreach (parse_words($wordsSouce) as $word => $pinyin) {
$wordChars = preg_split('//u', $word, -1, PREG_SPLIT_NO_EMPTY);
try {
$pinyinSegments = array_combine($wordChars, $pinyin);
} catch (Throwable $e) {
throw new Exception("行解析错误:$line");
}
// 多音字处理
$polyphoneChars = array_intersect_key($wordChars, $polyphones);
foreach ($polyphoneChars as $char) {
// 如果词里的任何一个多音字在词里的读音和常用读音不一致,则需要加入词典,否则抛弃该词
if (isset($charWithPolyphones[$char]) && $pinyinSegments[$char] != $charWithPolyphones[$char][0]) {
$words[$word] = join("\t", ["", ...$pinyin, ""]);
break;
}
}
}
foreach (parse_words($wordsPathes) as $word => $pinyin) {
$words[$word] = join("\t", ["", ...$pinyin, ""]);
}
// 清理
exec('rm -rf ' . __DIR__ . '/../data/*');
// 姓氏
file_put_contents(__DIR__ . '/../data/surnames.php', "<?php\nreturn ".var_export($surnames, true).";\n");
echo count($surnames)." surnames saved.\n";
// 单字:带多音
file_put_contents(__DIR__ . '/../data/chars.php', "<?php\nreturn ".var_export($charWithPolyphones, true).";\n");
echo count($chars)." chars saved.\n";
// 词:从长到短 + 单字
$words = array_merge($words, $chars);
uksort($words, fn ($a, $b) => strlen($b) <=> strlen($a));
foreach (array_chunk($words, 8000, true) as $index => $group) {
file_put_contents(__DIR__ . "/../data/words-{$index}.php", "<?php\nreturn ".var_export($group, true).";\n");
echo count($group)." words saved in ".__DIR__ . "/../data/words-{$index}.php \n";
}

84
vendor/overtrue/pinyin/bin/pinyin vendored Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env php
<?php
require __DIR__ . '/../vendor/autoload.php';
use Overtrue\Pinyin\Pinyin;
$input = $argv[1] ?? null;
$methods = explode(',', 'name,phrase,permalink,polyphones,chars,nameAbbr,abbr,sentence');
$method = 'sentence';
$inputOptions = [];
$help = <<<"HELP"
Usage:
./pinyin [chinese] [method] [options]
Options:
-j, --json 输出 JSON 格式.
-c, --compact 不格式化输出 JSON.
-m, --method=[method] 转换方式可选name/phrase/permalink/polyphones/chars/nameAbbr/abbr/sentence.
--no-tone 不使用音调.
--tone-style=[style] 音调风格可选值default/none/number.
-h, --help 显示帮助.
HELP;
foreach ($argv as $i => $arg) {
if ($i === 0) {
continue;
}
if (in_array($arg, $methods)) {
$method = $arg;
} elseif (str_starts_with($arg, '-')) {
[$key, $value] = array_pad(array_map('trim', explode('=', $arg, 2)), 2, null);
$inputOptions[$key] = $value;
}
}
function has_option($option, $alias = null): bool
{
global $inputOptions;
if ($alias) {
return array_key_exists($option, $inputOptions) || array_key_exists($alias, $inputOptions);
}
return array_key_exists($option, $inputOptions);
}
function get_option($option, $default = null, $alias = null): ?string
{
global $inputOptions;
if ($alias) {
return $inputOptions[$option] ?? $inputOptions[$alias] ?? $default;
}
return $inputOptions[$option] ?? $default;
}
if (empty($input) || has_option('--help', '-h')) {
echo $help;
exit(0);
}
if (has_option('--method', '-m')) {
$method = get_option('--method');
}
$toneStyle = has_option('--no-tone') ? 'none' : get_option('--tone-style', 'default');
$result = Pinyin::$method($input, $method === 'permalink' ? '-' : $toneStyle);
$toJson = has_option('--json', '-j') || in_array($method, ['polyphones']);
if ($toJson) {
$options = JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT;
if (has_option('--compact', '-c')) {
$options = 0;
}
$result = json_encode($result, $options);
}
echo $result, "\n";

44
vendor/overtrue/pinyin/bin/utils.php vendored Normal file
View File

@@ -0,0 +1,44 @@
<?php
/**
* @example
* <pre>
* // U+4E2D: zhōng,zhòng # 中
* </pre>
* @throws Exception
*/
function parse_chars(string $path, callable $fn = null): Generator
{
$fn ??= fn ($p) => $p;
foreach (file($path) as $line) {
preg_match('/^U\+(?<code>[0-9A-Z]+):\s+(?<pinyin>\S+)\s+#\s*(?<char>\S+)/', $line, $matched);
if ($matched && !empty($matched['pinyin'])) {
yield $matched['char'] => $fn(explode(',', $matched['pinyin']));
} elseif (!str_starts_with($line, '#')) {
throw new Exception("行解析错误:$line");
}
}
}
/**
* @example
* <pre>
* // 㞎㞎: bǎ ba # 注释
* </pre>
*
* @throws Exception
*/
function parse_words(string $path, callable $fn = null): Generator
{
$fn ??= fn ($p) => $p;
foreach (file($path) as $line) {
preg_match('/^(?<word>[^#\s]+):\s+(?<pinyin>[\p{L} ]+)#?/u', $line, $matched);
if ($matched && !empty($matched['pinyin'])) {
yield $matched['word'] => $fn(explode(' ', trim($matched['pinyin'])));
}
}
}