文本文件  |  81行  |  2.08 KB

#
#   Copyright (C) 2002-2013, International Business Machines Corporation and others.
#       All Rights Reserved.
#
#   file:  char.txt 
#
#   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
#      See Unicode Standard Annex #29.
#      These rules are based on UAX #29 Revision 20 for Unicode Version 6.2
#

#
#  Character Class Definitions.
#
$CR          = [\p{Grapheme_Cluster_Break = CR}];
$LF          = [\p{Grapheme_Cluster_Break = LF}];
$Control     = [\p{Grapheme_Cluster_Break = Control}];
# TODO: Restore if the Prepend set becomes non-empty again: $Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
$Extend      = [\p{Grapheme_Cluster_Break = Extend}];
$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
$Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];

#
# Korean Syllable Definitions
#
$L       = [\p{Grapheme_Cluster_Break = L}];
$V       = [\p{Grapheme_Cluster_Break = V}];
$T       = [\p{Grapheme_Cluster_Break = T}];

$LV      = [\p{Grapheme_Cluster_Break = LV}];
$LVT     = [\p{Grapheme_Cluster_Break = LVT}];


## -------------------------------------------------
!!chain;

!!forward;

$CR $LF;

$L ($L | $V | $LV | $LVT);
($LV | $V) ($V | $T);
($LVT | $T) $T;

$Regional_Indicator $Regional_Indicator;

[^$Control $CR $LF] $Extend;

[^$Control $CR $LF] $SpacingMark;
# TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF];


## -------------------------------------------------

!!reverse;
$LF $CR;
($L | $V | $LV | $LVT) $L;
($V | $T) ($LV | $V);
$T ($LVT | $T);

$Regional_Indicator $Regional_Indicator;

$Extend      [^$Control $CR $LF];
$SpacingMark [^$Control $CR $LF];
# TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend;


## -------------------------------------------------
#  We don't logically need safe char break rules, but if we don't provide any at all
#  the engine for preceding() and following() will fall back to the
#  old style inefficient algorithm.

!!safe_reverse;
$LF $CR;

## -------------------------------------------------

!!safe_forward;
$CR $LF;