package tdlchr; # ###################################################################### ### ### ### CGI辞書システム T-Dictionary Ver.0.90 ### [5/8] 文字関連ライブラリ (tdllchr.pl) ### (c) 1996-2002 Takahiro Nishida ### http://www.mytools.net/ ### ### ###################################################################### # ### 変数設定部 (詳細は上記ページをご覧下さい) ###################### ### 変数設定部 (ここまで)########################################### # 補正値 $SJIS_STARTNO = 200; $ENG_STARTNO = 120; $NUM_STARTNO = 100; $SJIS_OFFSET = 159 - $SJIS_STARTNO; $ENG_OFFSET = 65 - $ENG_STARTNO; $NUM_OFFSET = 48 - $NUM_STARTNO; ########## よみがなを10進化 sub RubyToDecs{ local($ruby, $dmflag, $skflag) = @_; local(@bins, $dec, $decs, $bin1, $bin2, $type, $result); # デフォは濁点考慮、促音無視 &jcode'convert(*ruby, "sjis"); @bins = split(//, $ruby); while(@bins){ # 英語、数字、日本語の判定 $bin1 = shift(@bins); (($type = &CheckCharType($bin1)) == -1) && (return -1); if($type eq 'K'){ $bin2 = shift(@bins); (($dec = &SJISToDec($bin1 . $bin2)) > 0) || (return -1); # 濁点無視フラグがあったら清音化 ($dmflag) && ($dec = &RubyToIndex($dec)); } elsif($type eq 'N'){ $dec = &NumToDec($bin1); } elsif($type eq 'E'){ $dec = &EngToDec($bin1); } elsif($type eq 'L'){ $bin2 = shift(@bins); # 促音考慮フラグがあったら999で評価、なければ無視 ($skflag) ? ($dec = "999") : next; } $result .= sprintf("%03d", $dec); } return $result; } ########## 10進をよみがな化(「ー」は復元されないので注意) sub DecsToRuby{ local($decs) = @_; local($dec, $bin, $type, $result); while($decs){ $decs =~ s/^(.{3})(.*)/$2/; $dec = $1; (($type = &CheckDecType($dec)) == -1) && (return -1); if($type eq 'K'){ $bin = &DecToSJIS($dec); } elsif($type eq 'N'){ $bin = &DecToNum($dec); } elsif($type eq 'E'){ $bin = &DecToEng($dec); } $result .= $bin; } return $result; } ########## 1バイト目を見て、かな(K)、数字(N)、英語(E)を判定 sub CheckCharType{ local($bin1) = @_; # 1バイト目が 82 は「かな」(ホントは2バイト目の判定も必要) ($bin1 =~ /\x82/) && (return 'K'); # 1バイト目が 81 は「ー」(ホントは2バイト目が「61」じゃなきゃダメ) ($bin1 =~ /\x81/) && (return 'L'); # 0x30-0x39 は「数字」 ($bin1 =~ /[\x30-\x39]/) && (return 'N'); # 0x41-0x5A は「半角英語」 ($bin1 =~ /[\x41-\x5a]/) && (return 'E'); # それ以外はエラー return -1; } ########## 10進を見て、かな(K)、数字(N)、英語(E)を判定 sub CheckDecType{ local($dec) = @_; # 大きすぎたらエラー(SJIS:84, ENG:26, NUM:10) ($dec > $SJIS_STARTNO + 84) && (return -1); ($dec >= $SJIS_STARTNO) && (return 'K'); ($dec >= $ENG_STARTNO) && (return 'E'); ($dec >= $NUM_STARTNO) && (return 'N'); # 小さすぎたらエラー return -1; } ########## ひらがな→2バイト目十進変換(SJIS専用) sub SJISToDec{ local($kana) = @_; local($bin1, $bin2); ($bin1, $bin2) = split(//, $kana); # 1バイト目が 82 じゃなかったらエラー ($bin1 =~ /\x82/) || (return -1); # 2バイト目が 9f-f1 の外だったらエラー ($bin2 =~ /[\x9f-\xf1]/) || (return -1); return unpack("C", $bin2) - $SJIS_OFFSET; } ########## 2バイト目十進→ひらがな変換(SJIS専用) sub DecToSJIS{ local($dec2) = @_; local($bin2); $dec2 += $SJIS_OFFSET; $bin2 = pack("C", $dec2); # 2バイト目が 9f-f1 の外だったらエラー ($bin2 =~ /[\x9f-\xf1]/) || (return -1); return pack("C", hex('82')) . $bin2; } ########## 数字→十進変換 sub NumToDec{ local($num) = @_; # 0x30-0x39 の外ならエラー ($num =~ /[\x30-\x39]/) || (return -1); return unpack("C", $num) -$NUM_OFFSET; } ########## 十進→数字変換 sub DecToNum{ local($dec) = @_; $dec += $NUM_OFFSET; $bin = pack("C", $dec); # 0x30-0x39 の外ならエラー ($bin =~ /[\x30-\x39]/) || (return -1); return $bin; } ########## 英語→十進変換 sub EngToDec{ local($eng) = @_; # 0x41-0x5a の外ならエラー ($eng =~ /[\x41-\x5a]/) || (return -1); return unpack("C", $eng) -$ENG_OFFSET; } ########## 英語→十進変換 sub DecToEng{ local($dec) = @_; $dec += $ENG_OFFSET; $bin = pack("C", $dec); # 0x41-0x5a の外ならエラー ($bin =~ /[\x41-\x5a]/) || (return -1); return $bin; } ########## 2つの十進表記の長さをあわせる sub SetDecLength{ local($dec1, $dec2) = @_; local($length1, $length2); $length1 = length($dec1); $length2 = length($dec2); $length_diff = abs($length1 - $length2); ($length1 > $length2) ? ($dec2 .= "0" x $length_diff) : ($dec1 .= "0" x $length_diff); return ($dec1, $dec2); } ########## 頭2文字10進化 sub DecsToH2Dec{ local($decs) = @_; (length($decs) % 3 == 0) || &tdllib'error("LC-0501", 1, "致命的エラー", "文字数が3の倍数になっていない"); ($dec1, $dec2) = $decs =~ m|(\d{3})(\d{3})?|; $dec1 = &RubyToIndex($dec1); $dec2 = ($dec2) ? &RubyToIndex($dec2) : "000"; return $dec1 . $dec2; } ########## 頭1文字Index化 sub DecsToIdxDec{ local($decs) = @_; (length($decs) % 3 == 0) || &tdllib'error("LC-0502", 0, "致命的エラー", "文字数が3の倍数になっていない"); $hdec = substr($decs, 0, 3); ($UNHEADDING_CHAR =~ /,$hdec,/) && (return -1); return &RubyToIndex($hdec); } ########## 辞書見出し用変換マッピング sub RubyToIndex{ local($dec) = @_; return $CH[$dec]; } ### 頭にきたらおかしい文字番号一覧 $UNHEADDING_CHAR = ',200,202,204,206,208,234,266,268,270,277,'; ### RubyToIndexで使うマッピング規則 $CH[200] = 201; # ぁ $CH[201] = 201; # あ $CH[202] = 203; # ぃ $CH[203] = 203; # い $CH[204] = 205; # ぅ $CH[205] = 205; # う $CH[206] = 207; # ぇ $CH[207] = 207; # え $CH[208] = 209; # ぉ $CH[209] = 209; # お $CH[210] = 210; # か $CH[211] = 210; # が $CH[212] = 212; # き $CH[213] = 212; # ぎ $CH[214] = 214; # く $CH[215] = 214; # ぐ $CH[216] = 216; # け $CH[217] = 216; # げ $CH[218] = 218; # こ $CH[219] = 218; # ご $CH[220] = 220; # さ $CH[221] = 220; # ざ $CH[222] = 222; # し $CH[223] = 222; # じ $CH[224] = 224; # す $CH[225] = 224; # ず $CH[226] = 226; # せ $CH[227] = 226; # ぜ $CH[228] = 228; # そ $CH[229] = 228; # ぞ $CH[230] = 230; # た $CH[231] = 230; # だ $CH[232] = 232; # ち $CH[233] = 232; # ぢ $CH[234] = 235; # っ $CH[235] = 235; # つ $CH[236] = 235; # づ $CH[237] = 237; # て $CH[238] = 237; # で $CH[239] = 239; # と $CH[240] = 239; # ど $CH[241] = 241; # な $CH[242] = 242; # に $CH[243] = 243; # ぬ $CH[244] = 244; # ね $CH[245] = 245; # の $CH[246] = 246; # は $CH[247] = 246; # ば $CH[248] = 246; # ぱ $CH[249] = 249; # ひ $CH[250] = 249; # び $CH[251] = 249; # ぴ $CH[252] = 252; # ふ $CH[253] = 252; # ぶ $CH[254] = 252; # ぷ $CH[255] = 255; # へ $CH[256] = 255; # べ $CH[257] = 255; # ぺ $CH[258] = 258; # ほ $CH[259] = 258; # ぼ $CH[260] = 258; # ぽ $CH[261] = 261; # ま $CH[262] = 262; # み $CH[263] = 263; # む $CH[264] = 264; # め $CH[265] = 265; # も $CH[266] = 267; # ゃ $CH[267] = 267; # や $CH[268] = 269; # ゅ $CH[269] = 269; # ゆ $CH[270] = 271; # ょ $CH[271] = 271; # よ $CH[272] = 272; # ら $CH[273] = 273; # り $CH[274] = 274; # る $CH[275] = 275; # れ $CH[276] = 276; # ろ $CH[277] = 278; # ゎ $CH[278] = 278; # わ $CH[279] = 279; # ゐ $CH[280] = 280; # ゑ $CH[281] = 281; # を $CH[282] = 282; # ん $CH[100] = 100; # 0 $CH[101] = 101; # 1 $CH[102] = 102; # 2 $CH[103] = 103; # 3 $CH[104] = 104; # 4 $CH[105] = 105; # 5 $CH[106] = 106; # 6 $CH[107] = 107; # 7 $CH[108] = 108; # 8 $CH[109] = 109; # 9 $CH[120] = 120; # A $CH[121] = 121; # B $CH[122] = 122; # C $CH[123] = 123; # D $CH[124] = 124; # E $CH[125] = 125; # F $CH[126] = 126; # G $CH[127] = 127; # H $CH[128] = 128; # I $CH[129] = 129; # J $CH[130] = 130; # K $CH[131] = 131; # L $CH[132] = 132; # M $CH[133] = 133; # N $CH[134] = 134; # O $CH[135] = 135; # P $CH[136] = 136; # Q $CH[137] = 137; # R $CH[138] = 138; # S $CH[139] = 139; # T $CH[140] = 140; # U $CH[141] = 141; # V $CH[142] = 142; # W $CH[143] = 143; # X $CH[144] = 144; # Y $CH[145] = 145; # Z 1;