<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <link rel="stylesheet" href="style.css" type="text/css"> <meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type"> <link rel="Start" href="index.html"> <link rel="previous" href="UTF16.html"> <link rel="next" href="UText.html"> <link rel="Up" href="index.html"> <link title="Index of types" rel=Appendix href="index_types.html"> <link title="Index of exceptions" rel=Appendix href="index_exceptions.html"> <link title="Index of values" rel=Appendix href="index_values.html"> <link title="Index of class methods" rel=Appendix href="index_methods.html"> <link title="Index of classes" rel=Appendix href="index_classes.html"> <link title="Index of class types" rel=Appendix href="index_class_types.html"> <link title="Index of modules" rel=Appendix href="index_modules.html"> <link title="Index of module types" rel=Appendix href="index_module_types.html"> <link title="CaseMap" rel="Chapter" href="CaseMap.html"> <link title="CharEncoding" rel="Chapter" href="CharEncoding.html"> <link title="Default" rel="Chapter" href="Default.html"> <link title="IMap" rel="Chapter" href="IMap.html"> <link title="ISet" rel="Chapter" href="ISet.html"> <link title="Locale" rel="Chapter" href="Locale.html"> <link title="Main" rel="Chapter" href="Main.html"> <link title="OOChannel" rel="Chapter" href="OOChannel.html"> <link title="SubText" rel="Chapter" href="SubText.html"> <link title="UCS4" rel="Chapter" href="UCS4.html"> <link title="UChar" rel="Chapter" href="UChar.html"> <link title="UCharInfo" rel="Chapter" href="UCharInfo.html"> <link title="UCharTbl" rel="Chapter" href="UCharTbl.html"> <link title="UCol" rel="Chapter" href="UCol.html"> <link title="ULine" rel="Chapter" href="ULine.html"> <link title="UMap" rel="Chapter" href="UMap.html"> <link title="UNF" rel="Chapter" href="UNF.html"> <link title="UPervasives" rel="Chapter" href="UPervasives.html"> <link title="URe" rel="Chapter" href="URe.html"> <link title="UReStr" rel="Chapter" href="UReStr.html"> <link title="USet" rel="Chapter" href="USet.html"> <link title="UTF16" rel="Chapter" href="UTF16.html"> <link title="UTF8" rel="Chapter" href="UTF8.html"> <link title="UText" rel="Chapter" href="UText.html"> <link title="UnicodeString" rel="Chapter" href="UnicodeString.html"> <link title="XArray" rel="Chapter" href="XArray.html"> <link title="XString" rel="Chapter" href="XString.html"><title>UTF8</title> </head> <body> <div class="navbar"><a href="UTF16.html">Previous</a> <a href="index.html">Up</a> <a href="UText.html">Next</a> </div> <center><h1>Module <a href="type_UTF8.html">UTF8</a></h1></center> <br> <pre><span class="keyword">module</span> UTF8: <code class="code">sig</code> <a href="UTF8.html">..</a> <code class="code">end</code></pre>UTF-8 encoded Unicode strings. <p> The Module for UTF-8 encoded Unicode strings.<br> <hr width="100%"> <pre><span class="keyword">type</span> <a name="TYPEt"></a><code class="type"></code>t = <code class="type">string</code> </pre> <div class="info"> UTF-8 encoded Unicode strings. The type is normal string.<br> </div> <pre><span class="keyword">exception</span> <a name="EXCEPTIONMalformed_code"></a>Malformed_code</pre> <pre><span class="keyword">val</span> <a name="VALvalidate"></a>validate : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> unit</code></pre><div class="info"> <code class="code">validate s</code> successes if s is valid UTF-8, otherwise raises Malformed_code. Other functions assume strings are valid UTF-8, so it is prudent to test their validity for strings from untrusted origins.<br> </div> <pre><span class="keyword">val</span> <a name="VALget"></a>get : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> int -> <a href="UChar.html#TYPEt">UChar.t</a></code></pre><div class="info"> <code class="code">get s n</code> returns <code class="code">n</code>-th Unicode character of <code class="code">s</code>. The call requires O(n)-time.<br> </div> <pre><span class="keyword">val</span> <a name="VALinit"></a>init : <code class="type">int -> (int -> <a href="UChar.html#TYPEt">UChar.t</a>) -> <a href="UTF8.html#TYPEt">t</a></code></pre><div class="info"> <code class="code">init len f</code> returns a new string which contains <code class="code">len</code> Unicode characters. The i-th Unicode character is initialized by <code class="code">f i</code><br> </div> <pre><span class="keyword">val</span> <a name="VALlength"></a>length : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> int</code></pre><div class="info"> <code class="code">length s</code> returns the number of Unicode characters contained in s<br> </div> <pre><span class="keyword">type</span> <a name="TYPEindex"></a><code class="type"></code>index = <code class="type">int</code> </pre> <div class="info"> Positions in the string represented by the number of bytes from the head. The location of the first character is <code class="code">0</code><br> </div> <pre><span class="keyword">val</span> <a name="VALnth"></a>nth : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> int -> <a href="UTF8.html#TYPEindex">index</a></code></pre><div class="info"> <code class="code">nth s n</code> returns the position of the <code class="code">n</code>-th Unicode character. The call requires O(n)-time<br> </div> <pre><span class="keyword">val</span> <a name="VALfirst"></a>first : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a></code></pre><div class="info"> The position of the head of the first Unicode character.<br> </div> <pre><span class="keyword">val</span> <a name="VALlast"></a>last : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a></code></pre><div class="info"> The position of the head of the last Unicode character.<br> </div> <pre><span class="keyword">val</span> <a name="VALlook"></a>look : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a> -> <a href="UChar.html#TYPEt">UChar.t</a></code></pre><div class="info"> <code class="code">look s i</code> returns the Unicode character of the location <code class="code">i</code> in the string <code class="code">s</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALout_of_range"></a>out_of_range : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a> -> bool</code></pre><div class="info"> <code class="code">out_of_range s i</code> tests whether <code class="code">i</code> is a position inside of <code class="code">s</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALcompare_index"></a>compare_index : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a> -> <a href="UTF8.html#TYPEindex">index</a> -> int</code></pre><div class="info"> <code class="code">compare_index s i1 i2</code> returns a value < 0 if <code class="code">i1</code> is the position located before <code class="code">i2</code>, 0 if <code class="code">i1</code> and <code class="code">i2</code> points the same location, a value > 0 if <code class="code">i1</code> is the position located after <code class="code">i2</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALnext"></a>next : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a> -> <a href="UTF8.html#TYPEindex">index</a></code></pre><div class="info"> <code class="code">next s i</code> returns the position of the head of the Unicode character located immediately after <code class="code">i</code>. If <code class="code">i</code> is inside of <code class="code">s</code>, the function always successes. If <code class="code">i</code> is inside of <code class="code">s</code> and there is no Unicode character after <code class="code">i</code>, the position outside <code class="code">s</code> is returned. If <code class="code">i</code> is not inside of <code class="code">s</code>, the behaviour is unspecified.<br> </div> <pre><span class="keyword">val</span> <a name="VALprev"></a>prev : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a> -> <a href="UTF8.html#TYPEindex">index</a></code></pre><div class="info"> <code class="code">prev s i</code> returns the position of the head of the Unicode character located immediately before <code class="code">i</code>. If <code class="code">i</code> is inside of <code class="code">s</code>, the function always successes. If <code class="code">i</code> is inside of <code class="code">s</code> and there is no Unicode character before <code class="code">i</code>, the position outside <code class="code">s</code> is returned. If <code class="code">i</code> is not inside of <code class="code">s</code>, the behaviour is unspecified.<br> </div> <pre><span class="keyword">val</span> <a name="VALmove"></a>move : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEindex">index</a> -> int -> <a href="UTF8.html#TYPEindex">index</a></code></pre><div class="info"> <code class="code">move s i n</code> returns <code class="code">n</code>-th Unicode character after <code class="code">i</code> if n >= 0, <code class="code">n</code>-th Unicode character before <code class="code">i</code> if n < 0. If there is no such character, the result is unspecified.<br> </div> <pre><span class="keyword">val</span> <a name="VALiter"></a>iter : <code class="type">(<a href="UChar.html#TYPEt">UChar.t</a> -> unit) -> <a href="UTF8.html#TYPEt">t</a> -> unit</code></pre><div class="info"> <code class="code">iter f s</code> applies <code class="code">f</code> to all Unicode characters in <code class="code">s</code>. The order of application is same to the order of the Unicode characters in <code class="code">s</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALcompare"></a>compare : <code class="type"><a href="UTF8.html#TYPEt">t</a> -> <a href="UTF8.html#TYPEt">t</a> -> int</code></pre><div class="info"> Code point comparison by the lexicographic order. <code class="code">compare s1 s2</code> returns a positive integer if <code class="code">s1</code> > <code class="code">s2</code>, 0 if <code class="code">s1</code> = <code class="code">s2</code>, a negative integer if <code class="code">s1</code> < <code class="code">s2</code>.<br> </div> <pre><span class="keyword">module</span> <a href="UTF8.Buf.html">Buf</a>: <code class="code">sig</code> <a href="UTF8.Buf.html">..</a> <code class="code">end</code><code class="type"> with type buf = Buffer.t</code></pre><div class="info"> Buffer module for UTF-8 strings </div> </body></html>