// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2000-2003, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * * File writejava.c * * Modification History: * * Date Name Description * 01/11/02 Ram Creation. ******************************************************************************* */ #include "rle.h" /** * The ESCAPE character is used during run-length encoding. It signals * a run of identical chars. */ static const uint16_t ESCAPE = 0xA5A5; /** * The ESCAPE_BYTE character is used during run-length encoding. It signals * a run of identical bytes. */ static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; /** * Append a byte to the given StringBuffer, packing two bytes into each * character. The state parameter maintains intermediary data between * calls. * @param state A two-element array, with state[0] == 0 if this is the * first byte of a pair, or state[0] != 0 if this is the second byte * of a pair, in which case state[1] is the first byte. */ static uint16_t* appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { if(!status || U_FAILURE(*status)){ return NULL; } if (state[0] != 0) { uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); if(buffer < buffLimit){ *buffer++ = c; }else{ *status = U_BUFFER_OVERFLOW_ERROR; } state[0] = 0; return buffer; } else { state[0] = 1; state[1] = value; return buffer; } } /** * Encode a run, possibly a degenerate run (of < 4 values). * @param length The length of the run; must be > 0 && <= 0xFF. */ static uint16_t* encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { if(!status || U_FAILURE(*status)){ return NULL; } if (length < 4) { int32_t j=0; for (; j<length; ++j) { if (value == ESCAPE_BYTE) { buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); } buffer = appendEncodedByte(buffer,bufLimit, value, state, status); } } else { if (length == ESCAPE_BYTE) { if (value == ESCAPE_BYTE){ buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); } buffer = appendEncodedByte(buffer,bufLimit, value, state, status); --length; } buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ } return buffer; } #define APPEND( buffer, bufLimit, value, num, status){ \ if(buffer<bufLimit){ \ *buffer++=(value); \ }else{ \ *status = U_BUFFER_OVERFLOW_ERROR; \ } \ num++; \ } /** * Encode a run, possibly a degenerate run (of < 4 values). * @param length The length of the run; must be > 0 && <= 0xFFFF. */ static uint16_t* encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { int32_t num=0; if (length < 4) { int j=0; for (; j<length; ++j) { if (value == (int32_t) ESCAPE){ APPEND(buffer,bufLimit,ESCAPE, num, status); } APPEND(buffer,bufLimit,value,num, status); } } else { if (length == (int32_t) ESCAPE) { if (value == (int32_t) ESCAPE){ APPEND(buffer,bufLimit,ESCAPE,num,status); } APPEND(buffer,bufLimit,value,num,status); --length; } APPEND(buffer,bufLimit,ESCAPE,num,status); APPEND(buffer,bufLimit,(uint16_t) length, num,status); APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ } return buffer; } /** * Construct a string representing a char array. Use run-length encoding. * A character represents itself, unless it is the ESCAPE character. Then * the following notations are possible: * ESCAPE ESCAPE ESCAPE literal * ESCAPE n c n instances of character c * Since an encoded run occupies 3 characters, we only encode runs of 4 or * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. * If we encounter a run where n == ESCAPE, we represent this as: * c ESCAPE n-1 c * The ESCAPE value is chosen so as not to collide with commonly * seen values. */ int32_t usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { uint16_t* bufLimit = buffer+bufLen; uint16_t* saveBuffer = buffer; if(buffer < bufLimit){ *buffer++ = (uint16_t)(srcLen>>16); if(buffer<bufLimit){ uint16_t runValue = src[0]; int32_t runLength = 1; int i=1; *buffer++ = (uint16_t) srcLen; for (; i<srcLen; ++i) { uint16_t s = src[i]; if (s == runValue && runLength < 0xFFFF){ ++runLength; }else { buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); runValue = s; runLength = 1; } } buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); }else{ *status = U_BUFFER_OVERFLOW_ERROR; } }else{ *status = U_BUFFER_OVERFLOW_ERROR; } return (int32_t)(buffer - saveBuffer); } /** * Construct a string representing a byte array. Use run-length encoding. * Two bytes are packed into a single char, with a single extra zero byte at * the end if needed. A byte represents itself, unless it is the * ESCAPE_BYTE. Then the following notations are possible: * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal * ESCAPE_BYTE n b n instances of byte b * Since an encoded run occupies 3 bytes, we only encode runs of 4 or * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. * If we encounter a run where n == ESCAPE_BYTE, we represent this as: * b ESCAPE_BYTE n-1 b * The ESCAPE_BYTE value is chosen so as not to collide with commonly * seen values. */ int32_t byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { const uint16_t* saveBuf = buffer; uint16_t* bufLimit = buffer+bufLen; if(buffer < bufLimit){ *buffer++ = ((uint16_t) (srcLen >> 16)); if(buffer<bufLimit){ uint8_t runValue = src[0]; int runLength = 1; uint8_t state[2]= {0}; int i=1; *buffer++=((uint16_t) srcLen); for (; i<srcLen; ++i) { uint8_t b = src[i]; if (b == runValue && runLength < 0xFF){ ++runLength; } else { buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); runValue = b; runLength = 1; } } buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); /* We must save the final byte, if there is one, by padding * an extra zero. */ if (state[0] != 0) { buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); } }else{ *status = U_BUFFER_OVERFLOW_ERROR; } }else{ *status = U_BUFFER_OVERFLOW_ERROR; } return (int32_t) (buffer - saveBuf); } /** * Construct an array of shorts from a run-length encoded string. */ int32_t rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { int32_t length = 0; int32_t ai = 0; int i=2; if(!status || U_FAILURE(*status)){ return 0; } /* the source is null terminated */ if(srcLen == -1){ srcLen = u_strlen(src); } if(srcLen <= 2){ return 2; } length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); if(target == NULL){ return length; } if(tgtLen < length){ *status = U_BUFFER_OVERFLOW_ERROR; return length; } for (; i<srcLen; ++i) { uint16_t c = src[i]; if (c == ESCAPE) { c = src[++i]; if (c == ESCAPE) { target[ai++] = c; } else { int32_t runLength = (int32_t) c; uint16_t runValue = src[++i]; int j=0; for (; j<runLength; ++j) { target[ai++] = runValue; } } } else { target[ai++] = c; } } if (ai != length){ *status = U_INTERNAL_PROGRAM_ERROR; } return length; } /** * Construct an array of bytes from a run-length encoded string. */ int32_t rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { int32_t length = 0; UBool nextChar = TRUE; uint16_t c = 0; int32_t node = 0; int32_t runLength = 0; int32_t i = 2; int32_t ai=0; if(!status || U_FAILURE(*status)){ return 0; } /* the source is null terminated */ if(srcLen == -1){ srcLen = u_strlen(src); } if(srcLen <= 2){ return 2; } length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); if(target == NULL){ return length; } if(tgtLen < length){ *status = U_BUFFER_OVERFLOW_ERROR; return length; } for (; ai<tgtLen; ) { /* This part of the loop places the next byte into the local * variable 'b' each time through the loop. It keeps the * current character in 'c' and uses the boolean 'nextChar' * to see if we've taken both bytes out of 'c' yet. */ uint8_t b; if (nextChar) { c = src[i++]; b = (uint8_t) (c >> 8); nextChar = FALSE; } else { b = (uint8_t) (c & 0xFF); nextChar = TRUE; } /* This part of the loop is a tiny state machine which handles * the parsing of the run-length encoding. This would be simpler * if we could look ahead, but we can't, so we use 'node' to * move between three nodes in the state machine. */ switch (node) { case 0: /* Normal idle node */ if (b == ESCAPE_BYTE) { node = 1; } else { target[ai++] = b; } break; case 1: /* We have seen one ESCAPE_BYTE; we expect either a second * one, or a run length and value. */ if (b == ESCAPE_BYTE) { target[ai++] = ESCAPE_BYTE; node = 0; } else { runLength = b; node = 2; } break; case 2: { int j=0; /* We have seen an ESCAPE_BYTE and length byte. We interpret * the next byte as the value to be repeated. */ for (; j<runLength; ++j){ if(ai<tgtLen){ target[ai++] = b; }else{ *status = U_BUFFER_OVERFLOW_ERROR; return ai; } } node = 0; break; } } } if (node != 0){ *status = U_INTERNAL_PROGRAM_ERROR; /*("Bad run-length encoded byte array")*/ return 0; } if (i != srcLen){ /*("Excess data in RLE byte array string");*/ *status = U_INTERNAL_PROGRAM_ERROR; return ai; } return ai; }