/* ****************************************************************************** * * Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: ubidiimp.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 1999aug06 * created by: Markus W. Scherer, updated by Matitiahu Allouche */ #ifndef UBIDIIMP_H #define UBIDIIMP_H /* set import/export definitions */ #ifdef U_COMMON_IMPLEMENTATION #include "unicode/utypes.h" #include "unicode/uchar.h" #include "ubidi_props.h" /* miscellaneous definitions ---------------------------------------------- */ typedef uint8_t DirProp; typedef uint32_t Flags; /* Comparing the description of the BiDi algorithm with this implementation is easier with the same names for the BiDi types in the code as there. See UCharDirection in uchar.h . */ enum { L= U_LEFT_TO_RIGHT, R= U_RIGHT_TO_LEFT, EN= U_EUROPEAN_NUMBER, ES= U_EUROPEAN_NUMBER_SEPARATOR, ET= U_EUROPEAN_NUMBER_TERMINATOR, AN= U_ARABIC_NUMBER, CS= U_COMMON_NUMBER_SEPARATOR, B= U_BLOCK_SEPARATOR, S= U_SEGMENT_SEPARATOR, WS= U_WHITE_SPACE_NEUTRAL, ON= U_OTHER_NEUTRAL, LRE=U_LEFT_TO_RIGHT_EMBEDDING, LRO=U_LEFT_TO_RIGHT_OVERRIDE, AL= U_RIGHT_TO_LEFT_ARABIC, RLE=U_RIGHT_TO_LEFT_EMBEDDING, RLO=U_RIGHT_TO_LEFT_OVERRIDE, PDF=U_POP_DIRECTIONAL_FORMAT, NSM=U_DIR_NON_SPACING_MARK, BN= U_BOUNDARY_NEUTRAL, dirPropCount }; /* * Sometimes, bit values are more appropriate * to deal with directionality properties. * Abbreviations in these macro names refer to names * used in the BiDi algorithm. */ #define DIRPROP_FLAG(dir) (1UL<<(dir)) /* special flag for multiple runs from explicit embedding codes */ #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) /* are there any characters that are LTR or RTL? */ #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) /* explicit embedding codes */ #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) /* paragraph and segment separators */ #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) /* all types that are counted as White Space or Neutral in some steps */ #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) /* all types that are included in a sequence of European Terminators for (W5) */ #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) /* types that are neutrals or could becomes neutrals in (Wn) */ #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) /* * These types may be changed to "e", * the embedding type (L or R) of the run, * in the BiDi algorithm (N2) */ #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) /* * The following bit is ORed to the property of characters in paragraphs * with contextual RTL direction when paraLevel is contextual. */ #define CONTEXT_RTL 0x80 #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) /* * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. */ #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) #define GET_PARALEVEL(ubidi, index) \ (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ : (ubidi)->paraLevel) /* Paragraph type for multiple paragraph support ---------------------------- */ typedef int32_t Para; #define CR 0x000D #define LF 0x000A /* Run structure for reordering --------------------------------------------- */ enum { LRM_BEFORE=1, LRM_AFTER=2, RLM_BEFORE=4, RLM_AFTER=8 }; typedef struct Run { int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ visualLimit, /* last visual position of the run +1 */ insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, if <0, count of bidi controls within run */ } Run; /* in a Run, logicalStart will get this bit set if the run level is odd */ #define INDEX_ODD_BIT (1UL<<31) #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) #define GET_ODD_BIT(x) ((uint32_t)(x)>>31) #define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) U_CFUNC UBool ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); /** BiDi control code points */ enum { ZWNJ_CHAR=0x200c, ZWJ_CHAR, LRM_CHAR, RLM_CHAR, LRE_CHAR=0x202a, RLE_CHAR, PDF_CHAR, LRO_CHAR, RLO_CHAR }; #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5) /* InsertPoints structure for noting where to put BiDi marks ---------------- */ typedef struct Point { int32_t pos; /* position in text */ int32_t flag; /* flag for LRM/RLM, before/after */ } Point; typedef struct InsertPoints { int32_t capacity; /* number of points allocated */ int32_t size; /* number of points used */ int32_t confirmed; /* number of points confirmed */ UErrorCode errorCode; /* for eventual memory shortage */ Point *points; /* pointer to array of points */ } InsertPoints; /* UBiDi structure ----------------------------------------------------------- */ struct UBiDi { /* pointer to parent paragraph object (pointer to self if this object is * a paragraph object); set to NULL in a newly opened object; set to a * real value after a successful execution of ubidi_setPara or ubidi_setLine */ const UBiDi * pParaBiDi; const UBiDiProps *bdp; /* alias pointer to the current text */ const UChar *text; /* length of the current text */ int32_t originalLength; /* if the UBIDI_OPTION_STREAMING option is set, this is the length * of text actually processed by ubidi_setPara, which may be shorter than * the original length. * Otherwise, it is identical to the original length. */ int32_t length; /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or * marks are allowed to be inserted in one of the reordering mode, the * length of the result string may be different from the processed length. */ int32_t resultLength; /* memory sizes in bytes */ int32_t dirPropsSize, levelsSize, parasSize, runsSize; /* allocated memory */ DirProp *dirPropsMemory; UBiDiLevel *levelsMemory; Para *parasMemory; Run *runsMemory; /* indicators for whether memory may be allocated after ubidi_open() */ UBool mayAllocateText, mayAllocateRuns; /* arrays with one value per text-character */ const DirProp *dirProps; UBiDiLevel *levels; /* are we performing an approximation of the "inverse BiDi" algorithm? */ UBool isInverse; /* are we using the basic algorithm or its variation? */ UBiDiReorderingMode reorderingMode; /* UBIDI_REORDER_xxx values must be ordered so that all the regular * logical to visual modes come first, and all inverse BiDi modes * come last. */ #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL /* bitmask for reordering options */ uint32_t reorderingOptions; /* must block separators receive level 0? */ UBool orderParagraphsLTR; /* the paragraph level */ UBiDiLevel paraLevel; /* original paraLevel when contextual */ /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ UBiDiLevel defaultParaLevel; /* context data */ const UChar *prologue; int32_t proLength; const UChar *epilogue; int32_t epiLength; /* the following is set in ubidi_setPara, used in processPropertySeq */ const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ /* the overall paragraph or line directionality - see UBiDiDirection */ UBiDiDirection direction; /* flags is a bit set for which directional properties are in the text */ Flags flags; /* lastArabicPos is index to the last AL in the text, -1 if none */ int32_t lastArabicPos; /* characters after trailingWSStart are WS and are */ /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ int32_t trailingWSStart; /* fields for paragraph handling */ int32_t paraCount; /* set in getDirProps() */ Para *paras; /* limits of paragraphs, filled in ResolveExplicitLevels() or CheckExplicitLevels() */ /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ Para simpleParas[1]; /* fields for line reordering */ int32_t runCount; /* ==-1: runs not set up yet */ Run *runs; /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ Run simpleRuns[1]; /* for inverse Bidi with insertion of directional marks */ InsertPoints insertPoints; /* for option UBIDI_OPTION_REMOVE_CONTROLS */ int32_t controlCount; /* for Bidi class callback */ UBiDiClassCallback *fnClassCallback; /* action pointer */ const void *coClassCallback; /* context pointer */ }; #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) typedef union { DirProp *dirPropsMemory; UBiDiLevel *levelsMemory; Para *parasMemory; Run *runsMemory; } BidiMemoryForAllocation; /* Macros for initial checks at function entry */ #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ if(!IS_VALID_PARA(bidi)) { \ errcode=U_INVALID_STATE_ERROR; \ return retvalue; \ } #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ if(!IS_VALID_PARA_OR_LINE(bidi)) { \ errcode=U_INVALID_STATE_ERROR; \ return retvalue; \ } #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ if((arg)<(start) || (arg)>=(limit)) { \ (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ return retvalue; \ } #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ if(!IS_VALID_PARA(bidi)) { \ errcode=U_INVALID_STATE_ERROR; \ return; \ } #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ if(!IS_VALID_PARA_OR_LINE(bidi)) { \ errcode=U_INVALID_STATE_ERROR; \ return; \ } #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ if((arg)<(start) || (arg)>=(limit)) { \ (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ return; \ } /* helper function to (re)allocate memory if allowed */ U_CFUNC UBool ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); /* helper macros for each allocated array in UBiDi */ #define getDirPropsMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ (pBiDi)->mayAllocateText, (length)) #define getLevelsMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ (pBiDi)->mayAllocateText, (length)) #define getRunsMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) /* additional macros used by ubidi_open() - always allow allocation */ #define getInitialDirPropsMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ TRUE, (length)) #define getInitialLevelsMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ TRUE, (length)) #define getInitialParasMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ TRUE, (length)*sizeof(Para)) #define getInitialRunsMemory(pBiDi, length) \ ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ TRUE, (length)*sizeof(Run)) #endif #endif