C++程序  |  172行  |  6.73 KB

/*
 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @file picobase.h
 *
 * base functionality
 *
 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
 * All rights reserved.
 *
 * History:
 * - 2009-04-20 -- initial version
 *
 */

#ifndef PICOBASE_H_
#define PICOBASE_H_

#include "picoos.h"

#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif

/* maximum number of bytes of an UTF8 character */
#define PICOBASE_UTF8_MAXLEN    4

typedef picoos_uint8  picobase_utf8char[PICOBASE_UTF8_MAXLEN+1];  /* always zero terminated */
typedef picoos_uint8  picobase_utf8;
typedef picoos_uint16 picobase_utf16;
typedef picoos_uint32 picobase_utf32;

/* ***************************************************************/
/* Unicode UTF8 functions */
/* ***************************************************************/

/**
 * Determines the number of UTF8 characters contained in
 *            the UTF8 string 'utf8str' of maximum length maxlen (in bytes)
 * @param    utf8str : a string encoded in UTF8
 * @param    maxlen  : max length (in bytes) accessible in utf8str
 * @return   >=0 : length of the UTF8 string in number of UTF8 characters
 *                     up to the first '\0' or maxlen
 * @return   <0 : not starting with a valid UTF8 character
 * @remarks  strict implementation, not allowing invalid utf8
*/
picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
                                  const picoos_uint16 maxlen);


/**
 * Determines the number of bytes an UTF8 character used based
 *            on the first byte of the UTF8 character
 * @param    firstchar: the first (and maybe only) byte of an UTF8 character
 * @return   positive value in {1,4} : number of bytes of the UTF8 character
 * @return   0 :if not a valid UTF8 character start
 * @remarks strict implementation, not allowing invalid utf8
*/
/* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */

#define picobase_det_utf8_length(x)  (  ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) )

/**
 * Converts the content of 'utf8str' to lowercase and stores it on 'lowercase'
 *            on the first byte of the UTF8 character
 * @param    utf8str : utf8 string
 * @param    lowercaseMaxLen : maximal number of bytes available in 'lowercase'
 * @param    lowercase : string converted to lowercase (output)
 * @param    done : flag to report success/failure of the operation (output)
 * @return  TRUE if successful, FALSE otherwise
*/
picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done);

/**
 * Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase'
 * @param    utf8str : utf8 string
 * @param    uppercase : string converted to uppercase (output)
 * @param    uppercaseMaxLen : maximal number of bytes available in 'uppercase'
 * @param    done : flag to report success/failure of the operation (output)
 * @return  TRUE if successful, FALSE otherwise
*/
picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done);

/**
 * Gets next UTF8 character 'utf8char' from the UTF8 string
 *            'utf8s' starting at position 'pos'
 * @param    utf8s : UTF8 string
 * @param    utf8slenmax : max length accessible in utf8s
 * @param    pos : position from where the UTF8 character is checked and copied
 *            (set also as output to the position directly following the UTF8 char)
 * @param    utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
 * @return  TRUE if okay
 * @return  FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len
*/
picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
                                        const picoos_uint32 utf8slenmax,
                                        picoos_uint32 *pos,
                                        picobase_utf8char utf8char);

/**
 * Same as picobase_get_next_utf8char
 *            without copying the char to utf8char
*/
picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
                                           const picoos_uint32 utf8slenmax,
                                           picoos_uint32 *pos);

/**
 * Gets previous UTF8 character 'utf8char' from the UTF8 string
 *             'utf8s' starting the backward search at position 'pos-1'
 * @param    utf8s : UTF8 string
 * @param    utf8slenmin : min length accessible in utf8s
 * @param    pos : the search for the prev UTF8 char starts at [pos-1]
 *            (set also as output to the start position of the prev UTF8 character)
 * @param    utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
 * @return  TRUE if okay
 * @return  FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len
*/
picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
                                        const picoos_uint32 utf8slenmin,
                                        picoos_uint32 *pos,
                                        picobase_utf8char utf8char);

/**
 * Same as picobase_get_prev_utf8char
 *            without copying the char to utf8char
*/
picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
                                           const picoos_uint32 utf8slenmin,
                                           picoos_uint32 *pos);


/**
 * returns TRUE if the input string is UTF8 and uppercase
 * @param    str : UTF8 string
 * @param    strmaxlen : max length for the input string
 * @return  TRUE if string is UTF8 and uppercase
 * @return  FALSE otherwise
*/
extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen);

/**
 * returns TRUE if the input string is UTF8 and lowercase
 * @param    str : UTF8 string
 * @param    strmaxlen : max length for the input string
 * @return  TRUE if string is UTF8 and lowercase
 * @return  FALSE otherwise
*/
extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen);

#ifdef __cplusplus
}
#endif

#endif /*PICOBASE_H_*/