summaryrefslogtreecommitdiff
path: root/plugins/Pcre16/src/ucp.h
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/Pcre16/src/ucp.h')
-rw-r--r--plugins/Pcre16/src/ucp.h200
1 files changed, 200 insertions, 0 deletions
diff --git a/plugins/Pcre16/src/ucp.h b/plugins/Pcre16/src/ucp.h
new file mode 100644
index 0000000000..d8b34bfcc5
--- /dev/null
+++ b/plugins/Pcre16/src/ucp.h
@@ -0,0 +1,200 @@
+/*************************************************
+* Unicode Property Table handler *
+*************************************************/
+
+#ifndef _UCP_H
+#define _UCP_H
+
+/* This file contains definitions of the property values that are returned by
+the UCD access macros. New values that are added for new releases of Unicode
+should always be at the end of each enum, for backwards compatibility.
+
+IMPORTANT: Note also that the specific numeric values of the enums have to be
+the same as the values that are generated by the maint/MultiStage2.py script,
+where the equivalent property descriptive names are listed in vectors.
+
+ALSO: The specific values of the first two enums are assumed for the table
+called catposstab in pcre_compile.c. */
+
+/* These are the general character categories. */
+
+enum {
+ ucp_C, /* Other */
+ ucp_L, /* Letter */
+ ucp_M, /* Mark */
+ ucp_N, /* Number */
+ ucp_P, /* Punctuation */
+ ucp_S, /* Symbol */
+ ucp_Z /* Separator */
+};
+
+/* These are the particular character categories. */
+
+enum {
+ ucp_Cc, /* Control */
+ ucp_Cf, /* Format */
+ ucp_Cn, /* Unassigned */
+ ucp_Co, /* Private use */
+ ucp_Cs, /* Surrogate */
+ ucp_Ll, /* Lower case letter */
+ ucp_Lm, /* Modifier letter */
+ ucp_Lo, /* Other letter */
+ ucp_Lt, /* Title case letter */
+ ucp_Lu, /* Upper case letter */
+ ucp_Mc, /* Spacing mark */
+ ucp_Me, /* Enclosing mark */
+ ucp_Mn, /* Non-spacing mark */
+ ucp_Nd, /* Decimal number */
+ ucp_Nl, /* Letter number */
+ ucp_No, /* Other number */
+ ucp_Pc, /* Connector punctuation */
+ ucp_Pd, /* Dash punctuation */
+ ucp_Pe, /* Close punctuation */
+ ucp_Pf, /* Final punctuation */
+ ucp_Pi, /* Initial punctuation */
+ ucp_Po, /* Other punctuation */
+ ucp_Ps, /* Open punctuation */
+ ucp_Sc, /* Currency symbol */
+ ucp_Sk, /* Modifier symbol */
+ ucp_Sm, /* Mathematical symbol */
+ ucp_So, /* Other symbol */
+ ucp_Zl, /* Line separator */
+ ucp_Zp, /* Paragraph separator */
+ ucp_Zs /* Space separator */
+};
+
+/* These are grapheme break properties. Note that the code for processing them
+assumes that the values are less than 16. If more values are added that take
+the number to 16 or more, the code will have to be rewritten. */
+
+enum {
+ ucp_gbCR, /* 0 */
+ ucp_gbLF, /* 1 */
+ ucp_gbControl, /* 2 */
+ ucp_gbExtend, /* 3 */
+ ucp_gbPrepend, /* 4 */
+ ucp_gbSpacingMark, /* 5 */
+ ucp_gbL, /* 6 Hangul syllable type L */
+ ucp_gbV, /* 7 Hangul syllable type V */
+ ucp_gbT, /* 8 Hangul syllable type T */
+ ucp_gbLV, /* 9 Hangul syllable type LV */
+ ucp_gbLVT, /* 10 Hangul syllable type LVT */
+ ucp_gbRegionalIndicator, /* 11 */
+ ucp_gbOther /* 12 */
+};
+
+/* These are the script identifications. */
+
+enum {
+ ucp_Arabic,
+ ucp_Armenian,
+ ucp_Bengali,
+ ucp_Bopomofo,
+ ucp_Braille,
+ ucp_Buginese,
+ ucp_Buhid,
+ ucp_Canadian_Aboriginal,
+ ucp_Cherokee,
+ ucp_Common,
+ ucp_Coptic,
+ ucp_Cypriot,
+ ucp_Cyrillic,
+ ucp_Deseret,
+ ucp_Devanagari,
+ ucp_Ethiopic,
+ ucp_Georgian,
+ ucp_Glagolitic,
+ ucp_Gothic,
+ ucp_Greek,
+ ucp_Gujarati,
+ ucp_Gurmukhi,
+ ucp_Han,
+ ucp_Hangul,
+ ucp_Hanunoo,
+ ucp_Hebrew,
+ ucp_Hiragana,
+ ucp_Inherited,
+ ucp_Kannada,
+ ucp_Katakana,
+ ucp_Kharoshthi,
+ ucp_Khmer,
+ ucp_Lao,
+ ucp_Latin,
+ ucp_Limbu,
+ ucp_Linear_B,
+ ucp_Malayalam,
+ ucp_Mongolian,
+ ucp_Myanmar,
+ ucp_New_Tai_Lue,
+ ucp_Ogham,
+ ucp_Old_Italic,
+ ucp_Old_Persian,
+ ucp_Oriya,
+ ucp_Osmanya,
+ ucp_Runic,
+ ucp_Shavian,
+ ucp_Sinhala,
+ ucp_Syloti_Nagri,
+ ucp_Syriac,
+ ucp_Tagalog,
+ ucp_Tagbanwa,
+ ucp_Tai_Le,
+ ucp_Tamil,
+ ucp_Telugu,
+ ucp_Thaana,
+ ucp_Thai,
+ ucp_Tibetan,
+ ucp_Tifinagh,
+ ucp_Ugaritic,
+ ucp_Yi,
+ /* New for Unicode 5.0: */
+ ucp_Balinese,
+ ucp_Cuneiform,
+ ucp_Nko,
+ ucp_Phags_Pa,
+ ucp_Phoenician,
+ /* New for Unicode 5.1: */
+ ucp_Carian,
+ ucp_Cham,
+ ucp_Kayah_Li,
+ ucp_Lepcha,
+ ucp_Lycian,
+ ucp_Lydian,
+ ucp_Ol_Chiki,
+ ucp_Rejang,
+ ucp_Saurashtra,
+ ucp_Sundanese,
+ ucp_Vai,
+ /* New for Unicode 5.2: */
+ ucp_Avestan,
+ ucp_Bamum,
+ ucp_Egyptian_Hieroglyphs,
+ ucp_Imperial_Aramaic,
+ ucp_Inscriptional_Pahlavi,
+ ucp_Inscriptional_Parthian,
+ ucp_Javanese,
+ ucp_Kaithi,
+ ucp_Lisu,
+ ucp_Meetei_Mayek,
+ ucp_Old_South_Arabian,
+ ucp_Old_Turkic,
+ ucp_Samaritan,
+ ucp_Tai_Tham,
+ ucp_Tai_Viet,
+ /* New for Unicode 6.0.0: */
+ ucp_Batak,
+ ucp_Brahmi,
+ ucp_Mandaic,
+ /* New for Unicode 6.1.0: */
+ ucp_Chakma,
+ ucp_Meroitic_Cursive,
+ ucp_Meroitic_Hieroglyphs,
+ ucp_Miao,
+ ucp_Sharada,
+ ucp_Sora_Sompeng,
+ ucp_Takri
+};
+
+#endif
+
+/* End of ucp.h */