Sophie

Sophie

distrib > Mandriva > 2010.0 > i586 > media > contrib-release > by-pkgid > 1dcef8f7b86dc3b3c7b89dd968fc4c12 > files > 14

mecab-0.96-1mdv2008.1.i586.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=EUC-JP">
<title>MeCab ¤Î¼­½ñ¹½Â¤¤ÈÈÆÍѥƥ­¥¹¥ÈÊÑ´¹¥Ä¡¼¥ë¤È¤·¤Æ¤ÎÍøÍÑ</title>

<link type="text/css" rel="stylesheet" href="mecab.css">
</head>
<body>
<h1>MeCab ¤Î¼­½ñ¹½Â¤¤ÈÈÆÍѥƥ­¥¹¥ÈÊÑ´¹¥Ä¡¼¥ë¤È¤·¤Æ¤ÎÍøÍÑ</h1>

<p>$Id: dic-detail.html 65 2007-01-30 00:52:53Z taku-ku $;</p>

<h2>³µÍ×</h2>
<p>ñ¸ì¼­½ñ¤Î¹½Â¤¤òÍý²ò¤¹¤ë¤³¤È¤Ç, MeCab ¤òÈÆÍÑŪ¤Ê¥Æ¥­¥¹¥ÈÊÑ´¹¥Ä¡¼¥ë¤È¤·¤ÆÍøÍѤ¹¤ë¤³¤È¤¬¤Ç¤­¤Þ¤¹. Î㤨¤Ð, 
¤Ò¤é¤¬¤Ê to ¥«¥¿¥«¥ÊÊÑ´¹, ¥í¡¼¥Þ»ú to ¤Ò¤é¤¬¤ÊÊÑ´¹, Auto LinkÅù¤ò MeCab ¤À¤±¤Ç¼Â¹Ô¤Ç¤­¤Þ¤¹
</p>

<h2>¥Õ¥¡¥¤¥ë</h2>
<p>
ñ¸ì¼­½ñ¤ò¹½ÃÛ¤¹¤ë¤Ë¤Ï, ºÇÄã°Ê²¼¤Î¥Õ¥¡¥¤¥ë¤òºîÀ®¤¹¤ëɬÍפ¬¤¢¤ê¤Þ¤¹. 
</p>

<ul>
<li>*.csv ¥Õ¥¡¥¤¥ë (ñ¸ì¼­½ñ)
<li>matrix.def (Ï¢ÀÜɽ)
<li>unk.def (̤ÃθìÍÑÉÊ»ìÄêµÁ)
<li>char.def (̤Ãθì¤Îʸ»úÄêµÁ)
<li>dicrc (ÀßÄê¥Õ¥¡¥¤¥ë)
</ul>

<h3>*.csv ¥Õ¥¡¥¤¥ë</h3>
<p>ñ¸ì¼­½ñ¤Ç¤¹</p>

<p>¥¨¥ó¥È¥ê¤Ï, °Ê²¼¤Î¤è¤¦¤Ê CSV ¤ÇÄɲä·¤Þ¤¹.</p>

<pre>
test,1223,1223,6058,foo,bar,baz
</pre>

<p>ºÇ½é¤Î4¤Ä¤Ïɬ¿Ü¥¨¥ó¥È¥ê¤Ç, ¤½¤ì¤¾¤ì</p>
<ul>
<li>ɽÁØ·Á
<li>º¸Ê¸Ì®ID (ñ¸ì¤òº¸¤«¤é¸«¤¿¤È¤­¤Îʸ̮ ID)
<li>±¦Ê¸Ì®ID (ñ¸ì¤ò±¦¤«¤é¸«¤¿¤È¤­¤Îʸ̮ ID)
<li>ñ¸ì¥³¥¹¥È (¾®¤µ¤¤¤Û¤É½Ð¸½¤·¤ä¤¹¤¤)
</ul>
<p>¤È¤Ê¤Ã¤Æ¤¤¤Þ¤¹. ¥³¥¹¥ÈÃÍ¤Ï short int (16bit À°¿ô)
¤ÎÈϰϤˤª¤µ¤á¤ëɬÍפ¬¤¢¤ê¤Þ¤¹. </p>

<p>
5¥«¥é¥àÌܰʹߤÏ, ¥æ¡¼¥¶ÄêµÁ¤Î CSV ¥Õ¥£¡¼¥ë¥É¤Ç¤¹. ´ðËÜŪ¤Ë
¤É¤ó¤ÊÆâÍƤǤâ CSV ¤Îµö¤¹¸Â¤êÄɲ乤뤳¤È¤¬¤Ç¤­¤Þ¤¹. 
</p>

<h3>matrix.def</h3>
<p>ºÇ½é¤Î¹Ô¤ËÏ¢ÀÜɽ¤Î¥µ¥¤¥º(Á°·ï¥µ¥¤¥º, ¸å·ï¥µ¥¤¥º)¤ò½ñ¤­¤Þ¤¹. ¤½¤Î¸å¤Ï, 
Ï¢ÀÜɽ¤ÎÁ°·ï¤Îʸ̮ ID, ¸å·ï¤Îʸ̮ID¤È, ¤½¤ì¤ËÂбþ¤¹¤ë¥³¥¹¥È¤ò½ñ¤­¤Þ¤¹. 
</p>
<p>¤¢¤ëñ¸ì A, B ¤¬Ï¢Àܤò¤Ê¤¹¤È¤­, </p>
<ul>
<li>Á°·ïʸ̮ID = ñ¸ìA¤Î±¦Ê¸Ì®ID 
<li>¸å·ïʸ̮ID = ñ¸ìB¤Îº¸Ê¸Ì®ID
</ul>

<p>¤È¤Ê¤ê¤Þ¤¹. ¤Ä¤Þ¤ê, ñ¸ì¼­½ñ¤ËÅÐÏ¿¤·¤¿ ID ¤¬Ï¢ÀÜɽ¤ò»²¾È¤¹¤ëºÝ¤Î
¥­¡¼¤È¤Ê¤ê¤Þ¤¹. ¥³¥¹¥ÈÃÍ¤Ï short int (16bit À°¿ô)
¤ÎÈϰϤˤª¤µ¤á¤ëɬÍפ¬¤¢¤ê¤Þ¤¹. </p>

<pre>
100 120
0 0 1
0 1 10
0 2 5
</pre>

<p>¾åµ­¤ÎÎã¤Ç¤Ï, Á°·ï¤Îʸ̮¤Î¥µ¥¤¥º¤¬100, ¸å·ï¤Îʸ̮¤Î¥µ¥¤¥º¤¬ 120 ¤È¤Ê¤Ã¤Æ
¤¤¤Þ¤¹. ¤Þ¤¿, Á°·ïʸ̮ 0 ¤«¤é ¸å·ïʸ̮ 1 ¤Ø¤ÎÁ«°Ü¥³¥¹¥È¤¬ 10 ¤È¤Ê¤Ã¤Æ¤¤¤Þ¤¹. 
</p>

<h3>char.def</h3>
<p>̤Ãθì½èÍý¤Î¥ë¡¼¥ë¤Ç¤¹. <a href="learn.html">¤³¤Á¤é</a>¤ò¸æÍ÷¤¯¤À¤µ¤¤.
</p>

<p>°Ê²¼¤¬ºÇÄã¸Â¤ÎÀßÄê (DEFAULT ¤È SPACE) ¤Ç¤¹</p>
<pre>
DEFAULT 1 0 0
SPACE   0 1 0
0x0020 SPACE
</pre>

<h3>unk.def</h3>
<p>̤Ãθì¤ËÂФ¹¤ëÉÊ»ìÎó¤Î¥Æ¡¼¥Ö¥ë¤Ç¤¹. <a href="learn.html">¤³¤Á¤é</a>¤ò
¸æÍ÷¤¯¤À¤µ¤¤. </p>

<p>°Ê²¼¤¬ºÇÄã¸Â¤ÎÀßÄê (DEFAULT ¤È SPACE) ¤Ç¤¹</p>
<pre>
DEFAULT,0,0,0,*
SPACE,0,0,0,*
</pre>


<h2>¼­½ñ¤Î¥³¥ó¥Ñ¥¤¥ë</h2>
<p>¼¡¤Î¥³¥Þ¥ó¥É¤ò¼Â¹Ô¤¹¤ë¤³¤È¤Ç, ²òÀÏÍѤΥХ¤¥Ê¥ê¼­½ñ¤òºîÀ®¤·¤Þ¤¹. </p>
<pre>
% /usr/local/libexec/mecab/mecab-dict-index 
</pre>

</p>

<h2>¥±¡¼¥¹¥¹¥¿¥Ç¥£</h2>
<p>example ¥Ç¥£¥ì¥¯¥È¥ê¤Ë¤¤¤¯¤Ä¤«¤Î±þÍÑÎ㤬¤¢¤ê¤Þ¤¹.</p>

<h3>Auto Link</h3>
<p>Hatena Keyword ¤Î¤è¤¦¤Ê Auto Link ¤ò¼ÂÁõ¤·¤Æ¤ß¤Þ¤¹</p>
<ul>
<li>url.csv
<p>ñ¸ì¤È¤·¤Æ¥­¡¼¥ï¡¼¥É, ÉÊ»ì¤È¤·¤Æ¥­¡¼¥ï¡¼¥É¤ËÂбþ¤¹¤ë URL ¤ò½ñ¤­¤Þ¤¹.
Ï¢ÀܤξõÂÖ¤Ï1¾õÂ֤ǽ½Ê¬¤Ê¤Î¤Ç, º¸Ê¸Ì®/±¦Ê¸Ì®ID¤È¤â¤Ë 0 ¤È¤·¤Þ¤¹. 
¥³¥¹¥ÈÃͤÏŤ¤¥­¡¼¥ï¡¼¥É¤¬Í¥À褵¤ì¤ë¤è¤¦ÀßÄꤷ¤Þ¤¹. Î㤨¤Ð°Ê²¼¤Î¤è¤¦¤Ê´Ø¿ô¤ò»È¤¦¤È¤è¤¤¤Ç¤·¤ç¤¦.</p>
<pre>cost = (int)max(-36000, 400 * (length^1.5)) </pre>

url.csv
<pre>
Google,0,0,-5878,http://www.google.com/
Yahoo,0,0,-4472,http://www.yahoo.com/
ChaSen,0,0,-5878,http://chasen.org/
µþÅÔ,0,0,-3200,http://www.city.kyoto.jp/
...
</pre>

<li>matrix.def
<p>1 ¾õÂ֤ʤΤÇ, Ï¢ÀÜɽ¤Î¥µ¥¤¥º¤Ï 1 x 1 ¤È¤Ê¤ê¤Þ¤¹. 
¸å·ï 0 ¤«¤éÁ°·ï 0 ¤ÎÏ¢ÀÜ¥³¥¹¥È¤Ï 0 ¤È¤·¤Þ¤¹.</p>
<pre>
1 1
0 0 0
</pre>

<li>char.def
<p>ºÇÄã¸Â¤ÎÀßÄê (DEFAULT ¤È SPACE) ¤Ç¤¹</p>
<pre>
DEFAULT 1 0 0
SPACE   0 1 0
0x0020 SPACE
</pre>

<li>unk.def
<p>ºÇÄã¸Â¤ÎÀßÄê (DEFAULT ¤È SPACE) ¤Ç¤¹</p>
<pre>
DEFAULT,0,0,0,*
SPACE,0,0,0,*
</pre>

<li>dicrc
<p>autolink ¤È¤¤¤¦¥Õ¥©¡¼¥Þ¥Ã¥È¤òºîÀ®¤·, ¤½¤ì¤¬¥Ç¥Õ¥©¥ë¥È¤Î½ÐÎϤˤʤë¤è¤¦¤Ë¤·¤Þ¤¹</p>
<pre>
cost-factor = 800
bos-feature = BOS/EOS
output-format-type=autolink

node-format-autolink = &lt;a href="%H"&gt;%M&lt;/a&gt;
unk-format-autolink = %M
eos-format-autolink = \n
</pre>

<li>¥³¥ó¥Ñ¥¤¥ë + ¥Æ¥¹¥È
<pre>
% /usr/local/libexec/mecab/mecab-dict-index -f euc-jp -c euc-jp
reading ./unk.def ..  2
emitting double-array: 100% |###########################################| 
reading ./dic.csv ..  4
emitting double-array: 100% |###########################################| 
emitting matrix      : 100% |###########################################
done!

% mecab -d .
µþÅԤ˹Ԥä¿. 
&lt;a href="http://www.city.kyoto.jp/"&gt;µþÅÔ&lt;/a&gt;¤Ë¹Ô¤Ã¤¿¡£
Yahoo¤ÈGoogle
&lt;a href="http://www.yahoo.com/"&gt;Yahoo&lt;/a&gt;¤È&lt;a href="http://www.google.com/"&gt;Google&lt;/a&gt;
</pre>
</ul>

<h3>¤Ò¤é¤¬¤Ê to ¥«¥¿¥«¥ÊÊÑ´¹¥Ä¡¼¥ë</h3>
<ul>
<li>dic.csv
<p>ñ¸ì¤È¤·¤Æ¤Ò¤é¤¬¤Ê1ʸ»ú, ÉÊ»ì¤È¤·¤Æ³Æ¤Ò¤é¤¬¤ÊÂбþ¤¹¤ë¥«¥¿¥«¥Ê1ʸ»ú¤ò½ñ¤­¤Þ¤¹.
Ï¢ÀܤξõÂÖ¤Ï1¾õÂ֤ǽ½Ê¬¤Ê¤Î¤Ç, º¸Ê¸Ì®/±¦Ê¸Ì®ID¤È¤â¤Ë 0 ¤È¤·¤Þ¤¹. 
Û£ËæÀ­¤¬¤Ê¤¤¤¿¤á ¥³¥¹¥ÈÃÍ¤Ï 0 ¤È¤·¤Þ¤¹</p>
<pre>
¤¦¡«,0,0,0,¥ô
¤¢,0,0,0,¥¢
¤¤,0,0,0,¥¤
¤¦,0,0,0,¥¦
¤¨,0,0,0,¥¨
¤ª,0,0,0,¥ª
¤¡,0,0,0,¥¡
¤£,0,0,0,¥£
¤¥,0,0,0,¥¥
¤§,0,0,0,¥§
¤©,0,0,0,¥©
¤«,0,0,0,¥«
¤­,0,0,0,¥­
¤¯,0,0,0,¥¯
¤±,0,0,0,¥±
¤³,0,0,0,¥³
¤¬,0,0,0,¥¬
¤®,0,0,0,¥®
¤°,0,0,0,¥°
¤²,0,0,0,¥²
¤´,0,0,0,¥´
¤µ,0,0,0,¥µ
¤·,0,0,0,¥·
¤¹,0,0,0,¥¹
¤»,0,0,0,¥»
¤½,0,0,0,¥½
¤¶,0,0,0,¥¶
¤¸,0,0,0,¥¸
¤º,0,0,0,¥º
¤¼,0,0,0,¥¼
¤¾,0,0,0,¥¾
¤¿,0,0,0,¥¿
¤Á,0,0,0,¥Á
¤Ä,0,0,0,¥Ä
¤Æ,0,0,0,¥Æ
¤È,0,0,0,¥È
¤À,0,0,0,¥À
¤Â,0,0,0,¥Â
¤Å,0,0,0,¥Å
¤Ç,0,0,0,¥Ç
¤É,0,0,0,¥É
¤Ã,0,0,0,¥Ã
¤Ê,0,0,0,¥Ê
¤Ë,0,0,0,¥Ë
¤Ì,0,0,0,¥Ì
¤Í,0,0,0,¥Í
¤Î,0,0,0,¥Î
¤Ï,0,0,0,¥Ï
¤Ò,0,0,0,¥Ò
¤Õ,0,0,0,¥Õ
¤Ø,0,0,0,¥Ø
¤Û,0,0,0,¥Û
¤Ð,0,0,0,¥Ð
¤Ó,0,0,0,¥Ó
¤Ö,0,0,0,¥Ö
¤Ù,0,0,0,¥Ù
¤Ü,0,0,0,¥Ü
¤Ñ,0,0,0,¥Ñ
¤Ô,0,0,0,¥Ô
¤×,0,0,0,¥×
¤Ú,0,0,0,¥Ú
¤Ý,0,0,0,¥Ý
¤Þ,0,0,0,¥Þ
¤ß,0,0,0,¥ß
¤à,0,0,0,¥à
¤á,0,0,0,¥á
¤â,0,0,0,¥â
¤ã,0,0,0,¥ã
¤ä,0,0,0,¥ä
¤å,0,0,0,¥å
¤æ,0,0,0,¥æ
¤ç,0,0,0,¥ç
¤è,0,0,0,¥è
¤é,0,0,0,¥é
¤ê,0,0,0,¥ê
¤ë,0,0,0,¥ë
¤ì,0,0,0,¥ì
¤í,0,0,0,¥í
¤î,0,0,0,¥î
¤ï,0,0,0,¥ï
¤ð,0,0,0,¥ð
¤ñ,0,0,0,¥ñ
¤ò,0,0,0,¥ò
¤ó,0,0,0,¥ó
</pre>

<li>matrix.def
<p>1 ¾õÂ֤ʤΤÇ, Ï¢ÀÜɽ¤Î¥µ¥¤¥º¤Ï 1 x 1 ¤È¤Ê¤ê¤Þ¤¹. 
¸å·ï 0 ¤«¤éÁ°·ï 0 ¤ÎÏ¢ÀÜ¥³¥¹¥È¤Ï 0 ¤È¤·¤Þ¤¹.</p>
<pre>
1 1
0 0 0
</pre>

<li>char.def
<p>ºÇÄã¸Â¤ÎÀßÄê (DEFAULT ¤È SPACE) ¤Ç¤¹</p>
<pre>
DEFAULT 1 0 0
SPACE   0 1 0
0x0020 SPACE
</pre>

<li>unk.def
<p>ºÇÄã¸Â¤ÎÀßÄê (DEFAULT ¤È SPACE) ¤Ç¤¹</p>
<pre>
DEFAULT,0,0,0,*
SPACE,0,0,0,*
</pre>

<li>dicrc
<p>katakana ¤È¤¤¤¦¥Õ¥©¡¼¥Þ¥Ã¥È¤òºîÀ®¤·, ¤½¤ì¤¬¥Ç¥Õ¥©¥ë¥È¤Î½ÐÎϤˤʤë¤è¤¦
    ¤Ë¤·¤Þ¤¹</p>
<pre>
dictionary-charset = euc-jp
cost-factor = 800
bos-feature = BOS/EOS
output-format-type=katakana

node-format-katakana = %H
unk-format-katakana = %M
eos-format-katakana  = \n
</pre>

<li>¥³¥ó¥Ñ¥¤¥ë + ¥Æ¥¹¥È
<pre>
% /usr/local/libexec/mecab/mecab-dict-index -f euc-jp -c euc-jp
reading ./unk.def ..  2
emitting double-array: 100% |###########################################| 
reading ./dic.csv ..  4
emitting double-array: 100% |###########################################| 
emitting matrix      : 100% |###########################################
done!
% mecab -d .
¤³¤ì¤Ï¤Æ¤¹¤È¤Ç¤¹
¥³¥ì¥Ï¥Æ¥¹¥È¥Ç¥¹
</pre>
</ul>

<hr>
<p>$Id: dic-detail.html 65 2007-01-30 00:52:53Z taku-ku $;</p>
</body>
</html>