Details
gunichar
typedef guint32 gunichar; |
gunichar2
typedef guint16 gunichar2; |
g_get_charset ()
gboolean g_get_charset (char **charset); |
g_unichar_isupper ()
Determines if a character is uppercase.
g_unichar_isxdigit ()
Determines if a characters is a hexidecimal digit
g_unichar_istitle ()
Determines if a character is titlecase. Some characters in
Unicode which are composites, such as the DZ digraph
have three case variants instead of just two. The titlecase
form is used at the beginning of a word where only the
first letter is capitalized. The titlecase form of the DZ
digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z
g_unichar_isdefined ()
Determines if a given character is assigned in the Unicode
standard
g_unichar_iswide ()
Determines if a character is typically rendered in a double-width
cell.
g_unichar_toupper ()
Convert a character to uppercase.
g_unichar_tolower ()
Convert a character to lower case
g_unichar_totitle ()
Convert a character to the titlecase
g_unichar_xdigit_value ()
Determines the numeric value of a character as a hexidecimal
degital.
enum GUnicodeType
typedef enum
{
G_UNICODE_CONTROL,
G_UNICODE_FORMAT,
G_UNICODE_UNASSIGNED,
G_UNICODE_PRIVATE_USE,
G_UNICODE_SURROGATE,
G_UNICODE_LOWERCASE_LETTER,
G_UNICODE_MODIFIER_LETTER,
G_UNICODE_OTHER_LETTER,
G_UNICODE_TITLECASE_LETTER,
G_UNICODE_UPPERCASE_LETTER,
G_UNICODE_COMBINING_MARK,
G_UNICODE_ENCLOSING_MARK,
G_UNICODE_NON_SPACING_MARK,
G_UNICODE_DECIMAL_NUMBER,
G_UNICODE_LETTER_NUMBER,
G_UNICODE_OTHER_NUMBER,
G_UNICODE_CONNECT_PUNCTUATION,
G_UNICODE_DASH_PUNCTUATION,
G_UNICODE_CLOSE_PUNCTUATION,
G_UNICODE_FINAL_PUNCTUATION,
G_UNICODE_INITIAL_PUNCTUATION,
G_UNICODE_OTHER_PUNCTUATION,
G_UNICODE_OPEN_PUNCTUATION,
G_UNICODE_CURRENCY_SYMBOL,
G_UNICODE_MODIFIER_SYMBOL,
G_UNICODE_MATH_SYMBOL,
G_UNICODE_OTHER_SYMBOL,
G_UNICODE_LINE_SEPARATOR,
G_UNICODE_PARAGRAPH_SEPARATOR,
G_UNICODE_SPACE_SEPARATOR
} GUnicodeType; |
g_unichar_type ()
Classifies a unicode character by type.
enum GUnicodeBreakType
typedef enum
{
G_UNICODE_BREAK_MANDATORY,
G_UNICODE_BREAK_CARRIAGE_RETURN,
G_UNICODE_BREAK_LINE_FEED,
G_UNICODE_BREAK_COMBINING_MARK,
G_UNICODE_BREAK_SURROGATE,
G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
G_UNICODE_BREAK_INSEPARABLE,
G_UNICODE_BREAK_NON_BREAKING_GLUE,
G_UNICODE_BREAK_CONTINGENT,
G_UNICODE_BREAK_SPACE,
G_UNICODE_BREAK_AFTER,
G_UNICODE_BREAK_BEFORE,
G_UNICODE_BREAK_BEFORE_AND_AFTER,
G_UNICODE_BREAK_HYPHEN,
G_UNICODE_BREAK_NON_STARTER,
G_UNICODE_BREAK_OPEN_PUNCTUATION,
G_UNICODE_BREAK_CLOSE_PUNCTUATION,
G_UNICODE_BREAK_QUOTATION,
G_UNICODE_BREAK_EXCLAMATION,
G_UNICODE_BREAK_IDEOGRAPHIC,
G_UNICODE_BREAK_NUMERIC,
G_UNICODE_BREAK_INFIX_SEPARATOR,
G_UNICODE_BREAK_SYMBOL,
G_UNICODE_BREAK_ALPHABETIC,
G_UNICODE_BREAK_PREFIX,
G_UNICODE_BREAK_POSTFIX,
G_UNICODE_BREAK_COMPLEX_CONTEXT,
G_UNICODE_BREAK_AMBIGUOUS,
G_UNICODE_BREAK_UNKNOWN
} GUnicodeBreakType; |
g_unicode_canonical_ordering ()
void g_unicode_canonical_ordering (gunichar *string,
size_t len); |
g_unicode_canonical_decomposition ()
g_utf8_next_char()
#define g_utf8_next_char(p) |
g_utf8_get_char ()
Convert a sequence of bytes encoded as UTF-8 to a unicode character.
g_utf8_offset_to_pointer ()
Converts from an integer character offset to a pointer to a position
within the string.
g_utf8_pointer_to_offset ()
Converts from a pointer to position within a string to a integer
character offset
g_utf8_prev_char ()
Find the previous UTF-8 character in the string before p
p does not have to be at the beginning of a UTF-8 character. No check
is made to see if the character found is actually valid other than
it starts with an appropriate byte. If p might be the first
character of the string, you must use g_utf8_find_prev_char instead.
g_utf8_find_next_char ()
Find the start of the next utf-8 character in the string after p
p does not have to be at the beginning of a UTF-8 chracter. No check
is made to see if the character found is actually valid other than
it starts with an appropriate byte.
g_utf8_find_prev_char ()
Given a position p with a UTF-8 encoded string str, find the start
of the previous UTF-8 character starting before p. Returns NULL if no
UTF-8 characters are present in p before str.
p does not have to be at the beginning of a UTF-8 chracter. No check
is made to see if the character found is actually valid other than
it starts with an appropriate byte.
g_utf8_strchr ()
Find the leftmost occurence of the given iso-10646 character
in a UTF-8 string.
g_utf8_strrchr ()
Find the rightmost occurence of the given iso-10646 character
in a UTF-8 string.
g_utf8_validate ()
Validates UTF-8 encoded text. str is the text to validate;
if str is nul-terminated, then max_len can be -1, otherwise
max_len should be the number of bytes to validate.
If end is non-NULL, then the end of the valid range
will be stored there (i.e. the address of the first invalid byte
if some bytes were invalid, or the end of the text being validated
otherwise).
Returns TRUE if all of str was valid. Many GLib and GTK+
routines <emphasis>require</emphasis> valid UTF8 as input;
so data read from a file or the network should be checked
with g_utf8_validate() before doing anything else with it.
g_utf8_to_utf16 ()
Convert a string from UTF-8 to UTF-16. A 0 word will be
added to the result after the converted text.
g_utf8_to_ucs4 ()
Convert a string from UTF-8 to a 32-bit fixed width
representation as UCS-4. A trailing 0 will be added to the
string after the converted text.
g_utf8_to_ucs4_fast ()
Convert a string from UTF-8 to a 32-bit fixed width
representation as UCS-4, assuming valid UTF-8 input.
This function is roughly twice as fast as g_utf8_to_ucs4()
but does no error checking on the input.
g_utf16_to_ucs4 ()
Convert a string from UTF-16 to UCS-4. The result will be
terminated with a 0 character.
g_utf16_to_utf8 ()
Convert a string from UTF-16 to UTF-8. The result will be
terminated with a 0 byte.
g_ucs4_to_utf16 ()
Convert a string from UCS-4 to UTF-16. A 0 word will be
added to the result after the converted text.
g_ucs4_to_utf8 ()
Convert a string from a 32-bit fixed width representation as UCS-4.
to UTF-8. The result will be terminated with a 0 byte.
g_unichar_to_utf8 ()
Convert a single character to utf8