boost/spirit/home/support/char_encoding/unicode/query.hpp
/*=============================================================================
Copyright (c) 2001-2011 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Autogenerated by MultiStageTable.py (Unicode multi-stage
table builder) (c) Peter Kankowski, 2008
==============================================================================*/
#if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
#define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
#include <boost/cstdint.hpp>
# include "category_table.hpp"
# include "script_table.hpp"
# include "lowercase_table.hpp"
# include "uppercase_table.hpp"
namespace boost { namespace spirit { namespace ucd
{
// This header provides Basic (Level 1) Unicode Support
// See http://unicode.org/reports/tr18/ for details
struct properties
{
// bit pattern: xxMMMCCC
// MMM: major_category
// CCC: category
enum major_category
{
letter,
mark,
number,
separator,
other,
punctuation,
symbol
};
enum category
{
uppercase_letter = 0, // [Lu] an uppercase letter
lowercase_letter, // [Ll] a lowercase letter
titlecase_letter, // [Lt] a digraphic character, with first part uppercase
modifier_letter, // [Lm] a modifier letter
other_letter, // [Lo] other letters, including syllables and ideographs
nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
enclosing_mark, // [Me] an enclosing combining mark
spacing_mark, // [Mc] a spacing combining mark (positive advance width)
decimal_number = 16, // [Nd] a decimal digit
letter_number, // [Nl] a letterlike numeric character
other_number, // [No] a numeric character of other type
space_separator = 24, // [Zs] a space character (of various non-zero widths)
line_separator, // [Zl] U+2028 LINE SEPARATOR only
paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
control = 32, // [Cc] a C0 or C1 control code
format, // [Cf] a format control character
private_use, // [Co] a private-use character
surrogate, // [Cs] a surrogate code point
unassigned, // [Cn] a reserved unassigned code point or a noncharacter
dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
open_punctuation, // [Ps] an opening punctuation mark (of a pair)
close_punctuation, // [Pe] a closing punctuation mark (of a pair)
connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
other_punctuation, // [Po] a punctuation mark of other type
initial_punctuation, // [Pi] an initial quotation mark
final_punctuation, // [Pf] a final quotation mark
math_symbol = 48, // [Sm] a symbol of primarily mathematical use
currency_symbol, // [Sc] a currency sign
modifier_symbol, // [Sk] a non-letterlike modifier symbol
other_symbol // [So] a symbol of other type
};
enum derived_properties
{
alphabetic = 64,
uppercase = 128,
lowercase = 256,
white_space = 512,
hex_digit = 1024,
noncharacter_code_point = 2048,
default_ignorable_code_point = 4096
};
enum script
{
adlam,
caucasian_albanian,
ahom,
arabic,
imperial_aramaic,
armenian,
avestan,
balinese,
bamum,
bassa_vah,
batak,
bengali,
bhaiksuki,
bopomofo,
brahmi,
braille,
buginese,
buhid,
chakma,
canadian_aboriginal,
carian,
cham,
cherokee,
chorasmian,
coptic,
cypro_minoan,
cypriot,
cyrillic,
devanagari,
dives_akuru,
dogra,
deseret,
duployan,
egyptian_hieroglyphs,
elbasan,
elymaic,
ethiopic,
georgian,
glagolitic,
gunjala_gondi,
masaram_gondi,
gothic,
grantha,
greek,
gujarati,
gurmukhi,
hangul,
han,
hanunoo,
hatran,
hebrew,
hiragana,
anatolian_hieroglyphs,
pahawh_hmong,
nyiakeng_puachue_hmong,
katakana_or_hiragana,
old_hungarian,
old_italic,
javanese,
kayah_li,
katakana,
kawi,
kharoshthi,
khmer,
khojki,
khitan_small_script,
kannada,
kaithi,
tai_tham,
lao,
latin,
lepcha,
limbu,
linear_a,
linear_b,
lisu,
lycian,
lydian,
mahajani,
makasar,
mandaic,
manichaean,
marchen,
medefaidrin,
mende_kikakui,
meroitic_cursive,
meroitic_hieroglyphs,
malayalam,
modi,
mongolian,
mro,
meetei_mayek,
multani,
myanmar,
nag_mundari,
nandinagari,
old_north_arabian,
nabataean,
newa,
nko,
nushu,
ogham,
ol_chiki,
old_turkic,
oriya,
osage,
osmanya,
old_uyghur,
palmyrene,
pau_cin_hau,
old_permic,
phags_pa,
inscriptional_pahlavi,
psalter_pahlavi,
phoenician,
miao,
inscriptional_parthian,
rejang,
hanifi_rohingya,
runic,
samaritan,
old_south_arabian,
saurashtra,
signwriting,
shavian,
sharada,
siddham,
khudawadi,
sinhala,
sogdian,
old_sogdian,
sora_sompeng,
soyombo,
sundanese,
syloti_nagri,
syriac,
tagbanwa,
takri,
tai_le,
new_tai_lue,
tamil,
tangut,
tai_viet,
telugu,
tifinagh,
tagalog,
thaana,
thai,
tibetan,
tirhuta,
tangsa,
toto,
ugaritic,
vai,
vithkuqi,
warang_citi,
wancho,
old_persian,
cuneiform,
yezidi,
yi,
zanabazar_square,
inherited,
common,
unknown
};
};
inline properties::category get_category(::boost::uint32_t ch)
{
return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
}
inline properties::major_category get_major_category(::boost::uint32_t ch)
{
return static_cast<properties::major_category>(get_category(ch) >> 3);
}
inline bool is_punctuation(::boost::uint32_t ch)
{
return get_major_category(ch) == properties::punctuation;
}
inline bool is_decimal_number(::boost::uint32_t ch)
{
return get_category(ch) == properties::decimal_number;
}
inline bool is_hex_digit(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::hex_digit) != 0;
}
inline bool is_control(::boost::uint32_t ch)
{
return get_category(ch) == properties::control;
}
inline bool is_alphabetic(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::alphabetic) != 0;
}
inline bool is_alphanumeric(::boost::uint32_t ch)
{
return is_decimal_number(ch) || is_alphabetic(ch);
}
inline bool is_uppercase(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::uppercase) != 0;
}
inline bool is_lowercase(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::lowercase) != 0;
}
inline bool is_white_space(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::white_space) != 0;
}
inline bool is_blank(::boost::uint32_t ch)
{
switch (ch)
{
case '\n': case '\v': case '\f': case '\r':
return false;
default:
return is_white_space(ch)
&& !( get_category(ch) == properties::line_separator
|| get_category(ch) == properties::paragraph_separator
);
}
}
inline bool is_graph(::boost::uint32_t ch)
{
return !( is_white_space(ch)
|| get_category(ch) == properties::control
|| get_category(ch) == properties::surrogate
|| get_category(ch) == properties::unassigned
);
}
inline bool is_print(::boost::uint32_t ch)
{
return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
}
inline bool is_noncharacter_code_point(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
}
inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
}
inline properties::script get_script(::boost::uint32_t ch)
{
return static_cast<properties::script>(detail::script_lookup(ch));
}
inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
{
// The table returns 0 to signal that this code maps to itself
::boost::uint32_t r = detail::lowercase_lookup(ch);
return (r == 0)? ch : r;
}
inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
{
// The table returns 0 to signal that this code maps to itself
::boost::uint32_t r = detail::uppercase_lookup(ch);
return (r == 0)? ch : r;
}
}}}
#endif