#!/usr/bin/env php $pinyin) { $charWithPolyphones[$char] = $pinyin; $chars[$char] = "\t{$pinyin[0]}\t"; } // 补丁部分 foreach (parse_chars($charsPathes, fn ($p) => "\t{$p[0]}\t") as $char => $pinyin) { $chars[$char] = $pinyin; } // ------------------------------------------------ $words = []; foreach (parse_words($wordsSouce) as $word => $pinyin) { $wordChars = preg_split('//u', $word, -1, PREG_SPLIT_NO_EMPTY); try { $pinyinSegments = array_combine($wordChars, $pinyin); } catch (Throwable $e) { throw new Exception("行解析错误:$line"); } // 多音字处理 $polyphoneChars = array_intersect_key($wordChars, $polyphones); foreach ($polyphoneChars as $char) { // 如果词里的任何一个多音字在词里的读音和常用读音不一致,则需要加入词典,否则抛弃该词 if (isset($charWithPolyphones[$char]) && $pinyinSegments[$char] != $charWithPolyphones[$char][0]) { $words[$word] = join("\t", ["", ...$pinyin, ""]); break; } } } foreach (parse_words($wordsPathes) as $word => $pinyin) { $words[$word] = join("\t", ["", ...$pinyin, ""]); } // 清理 exec('rm -rf ' . __DIR__ . '/../data/*'); // 姓氏 file_put_contents(__DIR__ . '/../data/surnames.php', " strlen($b) <=> strlen($a)); foreach (array_chunk($words, 8000, true) as $index => $group) { file_put_contents(__DIR__ . "/../data/words-{$index}.php", "