/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* ---- includes ----------------------------------------------------------- */
#include "b_TensorEm/CompactMat.h"
#include "b_TensorEm/Functions.h"
#include "b_BasicEm/Math.h"
#include "b_BasicEm/Functions.h"
#include "b_BasicEm/Memory.h"
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ auxiliary functions } ---------------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/** Returns dot product of inVec with indexed row
The result is a floating point expresstion:
upper 16 bit: signed value
lower 16 bit: signed exponent
*/
int32 bts_CompactMat_fltDotPrdRow( struct bbs_Context* cpA,
struct bts_CompactMat* ptrA,
const int16* inVecA,
uint32 inNormBitsA,
uint32 rowA )
{
const int16* rowPtrL = ptrA->cpsArrE.arrPtrE + ptrA->wordsPerRowE * rowA;
/* extract row-header info */
uint32 offsL = *rowPtrL++;
uint32 sizeL = *rowPtrL++;
int32 factorManL = *rowPtrL++;
int32 factorExpL = *rowPtrL++;
uint32 rowNormBitsL = *rowPtrL++;
/* consider possible overflow */
uint16 overflowBitsL = ( inNormBitsA + rowNormBitsL >= 31 ) ? inNormBitsA + rowNormBitsL - 31 : 0;
const int16* inPtrL = inVecA + offsL;
count_t iL;
int32 sumL = 0;
if( overflowBitsL == 0 ) /* raw dot product fits in int32 */
{
switch( ptrA->bitsPerValueE )
{
case 16:
{
for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ );
}
break;
#ifndef HW_TMS320C5x /* platforms that don't have int8 must use the 'default' implementation */
case 8:
{
const uint16* dpL = ( uint16* )rowPtrL;
for( iL = sizeL; iL >= 8; iL -= 8 )
{
sumL += ( ( int8 ) dpL[ 0 ] * ( int32 )inPtrL[ 0 ] );
sumL += ( ( int8 )( dpL[ 0 ] >> 8 ) * ( int32 )inPtrL[ 1 ] );
sumL += ( ( int8 ) dpL[ 1 ] * ( int32 )inPtrL[ 2 ] );
sumL += ( ( int8 )( dpL[ 1 ] >> 8 ) * ( int32 )inPtrL[ 3 ] );
sumL += ( ( int8 ) dpL[ 2 ] * ( int32 )inPtrL[ 4 ] );
sumL += ( ( int8 )( dpL[ 2 ] >> 8 ) * ( int32 )inPtrL[ 5 ] );
sumL += ( ( int8 ) dpL[ 3 ] * ( int32 )inPtrL[ 6 ] );
sumL += ( ( int8 )( dpL[ 3 ] >> 8 ) * ( int32 )inPtrL[ 7 ] );
dpL += 4;
inPtrL += 8;
}
for( ; iL >= 2; iL -= 2 )
{
sumL += ( ( int8 ) *dpL * ( int32 )inPtrL[ 0 ] );
sumL += ( ( int8 )( *dpL >> 8 ) * ( int32 )inPtrL[ 1 ] );
dpL++;
inPtrL += 2;
}
if( iL > 0 )
{
sumL += ( ( int8 )*dpL++ * ( int32 )inPtrL[ 0 ] );
}
}
break;
case 6:
{
const uint16* dpL = ( uint16* )rowPtrL;
for( iL = sizeL; iL >= 8; iL -= 8 )
{
int32 lSumL = 0;
lSumL += ( ( int8 ) ( dpL[ 0 ] << 2 ) * ( int32 )inPtrL[ 0 ] );
lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 4 ) & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
lSumL += ( ( int8 ) ( ( dpL[ 1 ] ) & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 2 ) & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 8 ) & 0x00FC ) * ( int32 )inPtrL[ 7 ] );
sumL += ( lSumL >> 2 );
dpL += 3;
inPtrL += 8;
}
{
int32 lSumL = 0;
if( iL > 0 ) lSumL += ( ( int8 ) ( dpL[ 0 ] << 2 ) * ( int32 )inPtrL[ 0 ] );
if( iL > 1 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 4 ) & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
if( iL > 2 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
if( iL > 3 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] ) & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
if( iL > 4 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
if( iL > 5 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
if( iL > 6 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 2 ) & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
sumL += ( lSumL >> 2 );
}
}
break;
case 5:
{
const uint16* dpL = ( uint16* )rowPtrL;
for( iL = sizeL; iL >= 16; iL -= 16 )
{
int32 lSumL = 0;
lSumL += ( ( int8 ) ( dpL[ 0 ] << 3 ) * ( int32 )inPtrL[ 0 ] );
lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 1 ] );
lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 7 ) & 0x00F8 ) * ( int32 )inPtrL[ 2 ] );
lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 3 ] );
lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 4 ] );
lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00F8 ) * ( int32 )inPtrL[ 5 ] );
lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 6 ] );
lSumL += ( ( int8 ) ( ( dpL[ 2 ] ) & 0x00F8 ) * ( int32 )inPtrL[ 7 ] );
lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 5 ) & 0x00F8 ) * ( int32 )inPtrL[ 8 ] );
lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 9 ] );
lSumL += ( ( int8 ) ( ( dpL[ 3 ] << 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
lSumL += ( ( int8 ) ( ( dpL[ 3 ] >> 4 ) & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >> 9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
lSumL += ( ( int8 ) ( ( dpL[ 4 ] << 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 3 ) & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 8 ) & 0x00F8 ) * ( int32 )inPtrL[ 15 ] );
sumL += ( lSumL >> 3 );
dpL += 5;
inPtrL += 16;
}
{
int32 lSumL = 0;
if( iL > 0 ) lSumL += ( ( int8 ) ( dpL[ 0 ] << 3 ) * ( int32 )inPtrL[ 0 ] );
if( iL > 1 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 1 ] );
if( iL > 2 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 7 ) & 0x00F8 ) * ( int32 )inPtrL[ 2 ] );
if( iL > 3 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 3 ] );
if( iL > 4 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 4 ] );
if( iL > 5 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00F8 ) * ( int32 )inPtrL[ 5 ] );
if( iL > 6 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 6 ] );
if( iL > 7 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] ) & 0x00F8 ) * ( int32 )inPtrL[ 7 ] );
if( iL > 8 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 5 ) & 0x00F8 ) * ( int32 )inPtrL[ 8 ] );
if( iL > 9 ) lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 9 ] );
if( iL > 10 ) lSumL += ( ( int8 ) ( ( dpL[ 3 ] << 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
if( iL > 11 ) lSumL += ( ( int8 ) ( ( dpL[ 3 ] >> 4 ) & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
if( iL > 12 ) lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >> 9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
if( iL > 13 ) lSumL += ( ( int8 ) ( ( dpL[ 4 ] << 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
if( iL > 14 ) lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 3 ) & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
sumL += ( lSumL >> 3 );
}
}
break;
case 4:
{
for( iL = sizeL; iL >= 4; iL -= 4 )
{
uint16 v1L = *rowPtrL++;
int32 lSumL = 0;
lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
inPtrL += 4;
sumL += ( lSumL >> 4 );
}
{
uint16 v1L = *rowPtrL++;
int32 lSumL = 0;
if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
sumL += ( lSumL >> 4 );
}
}
break;
#endif /*ifndef HW_TMS320C5x*/
/* The default case can process all bit sizes including those that are explicitly encoded above
* Use the default for all bit sizes when the platform cannot handle the int8 data type (e.g. HW_TMS320C5x)
*/
default:
{
uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
uint32 bitsL = ptrA->bitsPerValueE;
uint16 adjL = 16 - bitsL;
uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
uint32 srL = bitsL;
for( iL = 0; iL < sizeL; iL++ )
{
if( srL > 16 )
{
bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
srL -= 16;
}
sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] ) >> adjL;
srL += bitsL;
}
}
}
}
else /* raw dot product does not fit in int32 */
{
int32 roundL = 1 << ( overflowBitsL - 1 );
switch( ptrA->bitsPerValueE )
{
case 16:
{
for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ ) + roundL ) >> overflowBitsL;
}
break;
case 8:
{
for( iL = sizeL; iL >= 2; iL -= 2 )
{
uint16 v1L = *rowPtrL++;
int32 lSumL = ( ( int8 ) v1L * ( int32 )inPtrL[ 0 ] )
+ ( ( int8 )( v1L >> 8 ) * ( int32 )inPtrL[ 1 ] );
sumL += ( lSumL + roundL ) >> overflowBitsL;
inPtrL += 2;
}
if( iL > 0 )
{
sumL += ( ( ( int8 )*rowPtrL++ * ( int32 )inPtrL[ 0 ] ) + roundL ) >> overflowBitsL;
}
}
break;
case 4:
{
for( iL = sizeL; iL >= 4; iL -= 4 )
{
uint16 v1L = *rowPtrL++;
int32 lSumL = 0;
lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
inPtrL += 4;
sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
}
{
uint16 v1L = *rowPtrL++;
int32 lSumL = 0;
if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
}
}
break;
default:
{
uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
uint32 bitsL = ptrA->bitsPerValueE;
uint16 adjL = 16 - bitsL;
uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
uint32 srL = bitsL;
int32 lRoundL = roundL << adjL;
int32 lAdjL = overflowBitsL + adjL;
for( iL = 0; iL < sizeL; iL++ )
{
if( srL > 16 )
{
bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
srL -= 16;
}
sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] + lRoundL ) >> lAdjL;
srL += bitsL;
}
}
}
}
/* compute result */
{
int32 resultManL;
int32 resultExpL;
int32 resultLogL;
bbs_mulS32( sumL, factorManL, &resultManL, &resultExpL );
resultExpL += factorExpL + overflowBitsL;
resultLogL = bbs_intLog2( resultManL > 0 ? resultManL : -resultManL );
if( resultLogL < 30 )
{
resultManL <<= 30 - resultLogL;
resultExpL -= 30 - resultLogL;
}
resultManL = ( ( resultManL >> 15 ) + 1 ) >> 1;
resultExpL = resultExpL + 16;
return ( ( resultManL & 0x0000FFFF ) << 16 ) | ( resultExpL & 0x0000FFFF );
}
}
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ constructor / destructor } ----------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
void bts_CompactMat_init( struct bbs_Context* cpA,
struct bts_CompactMat* ptrA )
{
ptrA->widthE = 0;
ptrA->heightE = 0;
ptrA->bitsPerValueE = 0;
ptrA->wordsPerRowE = 0;
ptrA->maxRowBitsE = 0;
bbs_Int16Arr_init( cpA, &ptrA->cpsArrE );
bbs_Int16Arr_init( cpA, &ptrA->expArrE );
}
/* ------------------------------------------------------------------------- */
void bts_CompactMat_exit( struct bbs_Context* cpA,
struct bts_CompactMat* ptrA )
{
ptrA->widthE = 0;
ptrA->heightE = 0;
ptrA->bitsPerValueE = 0;
ptrA->wordsPerRowE = 0;
ptrA->maxRowBitsE = 0;
bbs_Int16Arr_exit( cpA, &ptrA->cpsArrE );
bbs_Int16Arr_exit( cpA, &ptrA->expArrE );
}
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ operators } -------------------------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ query functions } -------------------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ modify functions } ------------------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
void bts_CompactMat_create( struct bbs_Context* cpA,
struct bts_CompactMat* ptrA,
uint32 widthA,
uint32 heightA,
uint32 bitsA,
uint32 maxRowSizeA,
struct bbs_MemSeg* mspA )
{
if( bbs_Context_error( cpA ) ) return;
if( bitsA < 2 || bitsA > 16 )
{
bbs_ERROR0( "bts_CompactMat_create:\nbitsA must be between 2 and 16" );
return;
}
ptrA->widthE = widthA;
ptrA->heightE = heightA;
ptrA->bitsPerValueE = bitsA;
ptrA->wordsPerRowE = 6 /*header + 1*/ + ( ( maxRowSizeA * bitsA ) / ( 8 * sizeof( short ) ) );
ptrA->maxRowBitsE = 0;
if( ( ptrA->wordsPerRowE & 1 ) != 0 ) ptrA->wordsPerRowE++;
bbs_Int16Arr_create( cpA, &ptrA->cpsArrE, heightA * ptrA->wordsPerRowE, mspA );
bbs_Int16Arr_fill( cpA, &ptrA->cpsArrE, 0 );
bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
}
/* ------------------------------------------------------------------------- */
void bts_CompactMat_copy( struct bbs_Context* cpA,
struct bts_CompactMat* ptrA,
const struct bts_CompactMat* srcPtrA )
{
ptrA->widthE = srcPtrA->widthE;
ptrA->heightE = srcPtrA->heightE;
ptrA->bitsPerValueE = srcPtrA->bitsPerValueE;
ptrA->wordsPerRowE = srcPtrA->wordsPerRowE;
ptrA->maxRowBitsE = srcPtrA->maxRowBitsE;
bbs_Int16Arr_copy( cpA, &ptrA->cpsArrE, &srcPtrA->cpsArrE );
bbs_Int16Arr_size( cpA, &ptrA->expArrE, ptrA->heightE );
}
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ I/O } -------------------------------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
uint32 bts_CompactMat_memSize( struct bbs_Context* cpA,
const struct bts_CompactMat *ptrA )
{
return bbs_SIZEOF16( uint32 )
+ bbs_SIZEOF16( uint32 ) /* version */
+ bbs_SIZEOF16( ptrA->widthE )
+ bbs_SIZEOF16( ptrA->heightE )
+ bbs_SIZEOF16( ptrA->bitsPerValueE )
+ bbs_SIZEOF16( ptrA->wordsPerRowE )
+ bbs_SIZEOF16( ptrA->maxRowBitsE )
+ bbs_Int16Arr_memSize( cpA, &ptrA->cpsArrE );
}
/* ------------------------------------------------------------------------- */
uint32 bts_CompactMat_memWrite( struct bbs_Context* cpA,
const struct bts_CompactMat* ptrA,
uint16* memPtrA )
{
uint32 memSizeL = bts_CompactMat_memSize( cpA, ptrA );
memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
memPtrA += bbs_memWriteUInt32( bts_COMPACT_MAT_VERSION, memPtrA );
memPtrA += bbs_memWrite32( &ptrA->widthE, memPtrA );
memPtrA += bbs_memWrite32( &ptrA->heightE, memPtrA );
memPtrA += bbs_memWrite32( &ptrA->bitsPerValueE, memPtrA );
memPtrA += bbs_memWrite32( &ptrA->wordsPerRowE, memPtrA );
memPtrA += bbs_memWrite32( &ptrA->maxRowBitsE, memPtrA );
memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->cpsArrE, memPtrA );
return memSizeL;
}
/* ------------------------------------------------------------------------- */
uint32 bts_CompactMat_memRead( struct bbs_Context* cpA,
struct bts_CompactMat* ptrA,
const uint16* memPtrA,
struct bbs_MemSeg* mspA )
{
uint32 memSizeL, versionL;
if( bbs_Context_error( cpA ) ) return 0;
memPtrA += bbs_memRead32( &memSizeL, memPtrA );
memPtrA += bbs_memReadVersion32( cpA, &versionL, bts_COMPACT_MAT_VERSION, memPtrA );
memPtrA += bbs_memRead32( &ptrA->widthE, memPtrA );
memPtrA += bbs_memRead32( &ptrA->heightE, memPtrA );
memPtrA += bbs_memRead32( &ptrA->bitsPerValueE, memPtrA );
memPtrA += bbs_memRead32( &ptrA->wordsPerRowE, memPtrA );
memPtrA += bbs_memRead32( &ptrA->maxRowBitsE, memPtrA );
memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->cpsArrE, memPtrA, mspA );
if( memSizeL != bts_CompactMat_memSize( cpA, ptrA ) )
{
bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_CompactMat_memRead( const struct bts_CompactMat* ptrA, const void* memPtrA ):\n"
"size mismatch" );
}
bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
return memSizeL;
}
/* ------------------------------------------------------------------------- */
/* ========================================================================= */
/* */
/* ---- \ghd{ exec functions } --------------------------------------------- */
/* */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
void bts_CompactMat_map( struct bbs_Context* cpA,
const struct bts_CompactMat* ptrA,
const int16* inVecA,
int16* outVecA,
int16* outExpPtrA )
{
uint32 inNormBitsL = bbs_intLog2( bbs_vecNorm16( inVecA, ptrA->widthE ) ) + 1;
uint32 iL;
int16* expArrL = ( ( struct bts_CompactMat* )ptrA )->expArrE.arrPtrE;
int16 maxExpL = -32767;
for( iL = 0; iL < ptrA->heightE; iL++ )
{
int32 fltL = bts_CompactMat_fltDotPrdRow( cpA, ( struct bts_CompactMat* )ptrA, inVecA, inNormBitsL, iL );
outVecA[ iL ] = fltL >> 16;
expArrL[ iL ] = fltL & 0x0000FFFF;
maxExpL = ( expArrL[ iL ] > maxExpL ) ? expArrL[ iL ] : maxExpL;
}
if( outExpPtrA != NULL ) *outExpPtrA = maxExpL;
for( iL = 0; iL < ptrA->heightE; iL++ )
{
int32 shrL = maxExpL - expArrL[ iL ];
if( shrL > 0 )
{
outVecA[ iL ] = ( ( outVecA[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
}
}
}
/* ------------------------------------------------------------------------- */
/* ========================================================================= */