Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 6 additions & 20 deletions src/cores/core_zgessq.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,7 @@
#include <math.h>
#include <lapacke.h>
#include "common.h"

#define COMPLEX

#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
}
#include "sumsq_update.h"

/*****************************************************************************
*
Expand Down Expand Up @@ -91,19 +80,16 @@ int CORE_zgessq(int M, int N,
double *scale, double *sumsq)
{
int i, j;
double tmp;
double *ptr;

for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );
for(i=0; i<M; i++, ptr++) {
tmp = fabs(*ptr);
UPDATE( 1., tmp );

#ifdef COMPLEX
ptr++;
tmp = fabs(*ptr);
UPDATE( 1., tmp );
sumsq_update( 1, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
sumsq_update( 1, scale, sumsq, ptr );
#endif
}
}
Expand Down
60 changes: 20 additions & 40 deletions src/cores/core_zhessq.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,7 @@
#include <math.h>
#include <lapacke.h>
#include "common.h"

#define COMPLEX

#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
}
#include "sumsq_update.h"

/*****************************************************************************
*
Expand Down Expand Up @@ -97,31 +86,26 @@ int CORE_zhessq(PLASMA_enum uplo, int N,
double *scale, double *sumsq)
{
int i, j;
double tmp;
double *ptr;

if ( uplo == PlasmaUpper ) {
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );

for(i=0; i<j; i++, ptr++) {

tmp = fabs(*ptr);
UPDATE( 2., tmp );

#ifdef COMPLEX
ptr++;
tmp = fabs(*ptr);
UPDATE( 2., tmp );
sumsq_update( 2, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
sumsq_update( 2, scale, sumsq, ptr );
#endif
}

/* Diagonal */
tmp = fabs(*ptr);
UPDATE( 1., tmp );

#ifdef COMPLEX
ptr++;
sumsq_update( 1, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
#endif
}
} else {
Expand All @@ -130,23 +114,19 @@ int CORE_zhessq(PLASMA_enum uplo, int N,
ptr = (double*) ( A + j * LDA + j);

/* Diagonal */
tmp = fabs(*ptr);
UPDATE( 1., tmp );
ptr++;

#ifdef COMPLEX
ptr++;
sumsq_update( 1, scale, sumsq, ptr );
ptr++;

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
#endif

for(i=j+1; i<N; i++, ptr++) {

tmp = fabs(*ptr);
UPDATE( 2., tmp );

#ifdef COMPLEX
ptr++;
tmp = fabs(*ptr);
UPDATE( 2., tmp );
sumsq_update( 2, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
sumsq_update( 2, scale, sumsq, ptr );
#endif
}
}
Expand Down
66 changes: 22 additions & 44 deletions src/cores/core_zsyssq.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,7 @@
#include <math.h>
#include <lapacke.h>
#include "common.h"

#define COMPLEX

#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
}
#include "sumsq_update.h"

/*****************************************************************************
*
Expand Down Expand Up @@ -97,33 +86,27 @@ int CORE_zsyssq(PLASMA_enum uplo, int N,
double *scale, double *sumsq)
{
int i, j;
double tmp;
double *ptr;

if ( uplo == PlasmaUpper ) {
for(j=0; j<N; j++) {
ptr = (double*) ( A + j * LDA );

for(i=0; i<j; i++, ptr++) {

tmp = fabs(*ptr);
UPDATE( 2., tmp );

#ifdef COMPLEX
ptr++;
tmp = fabs(*ptr);
UPDATE( 2., tmp );
sumsq_update( 2, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
sumsq_update( 2, scale, sumsq, ptr );
#endif
}

/* Diagonal */
tmp = fabs(*ptr);
UPDATE( 1., tmp );

#ifdef COMPLEX
ptr++;
tmp = fabs(*ptr);
UPDATE( 1., tmp );
sumsq_update( 1, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
sumsq_update( 1, scale, sumsq, ptr );
#endif
}
} else {
Expand All @@ -132,25 +115,20 @@ int CORE_zsyssq(PLASMA_enum uplo, int N,
ptr = (double*) ( A + j * LDA + j);

/* Diagonal */
tmp = fabs(*ptr);
UPDATE( 1., tmp );
ptr++;

#ifdef COMPLEX
tmp = fabs(*ptr);
UPDATE( 1., tmp );
ptr++;
sumsq_update( 1, scale, sumsq, ptr );
ptr++;

#if defined(PRECISION_z) || defined(PRECISION_c)
sumsq_update( 1, scale, sumsq, ptr );
ptr++;
#endif

for(i=j+1; i<N; i++, ptr++) {

tmp = fabs(*ptr);
UPDATE( 2., tmp );

#ifdef COMPLEX
ptr++;
tmp = fabs(*ptr);
UPDATE( 2., tmp );
sumsq_update( 2, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
sumsq_update( 2, scale, sumsq, ptr );
#endif
}
}
Expand Down
35 changes: 10 additions & 25 deletions src/cores/core_ztrssq.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,7 @@
#include <math.h>
#include <lapacke.h>
#include "common.h"

#define COMPLEX

#define UPDATE( __nb, __value ) \
if (__value != 0. ){ \
if ( *scale < __value ) { \
*sumsq = __nb + (*sumsq) * ( *scale / __value ) * ( *scale / __value ); \
*scale = __value; \
} else { \
*sumsq = *sumsq + __nb * ( __value / *scale ) * ( __value / *scale ); \
} \
}
#include "sumsq_update.h"

/*****************************************************************************
*
Expand Down Expand Up @@ -97,7 +86,7 @@ int CORE_ztrssq(PLASMA_enum uplo, PLASMA_enum diag, int M, int N,

if ( diag == PlasmaUnit ){
tmp = sqrt( min(M, N) );
UPDATE( 1., tmp );
sumsq_update( 1, scale, sumsq, &tmp );
}

if (uplo == PlasmaUpper ) {
Expand All @@ -108,13 +97,11 @@ int CORE_ztrssq(PLASMA_enum uplo, PLASMA_enum diag, int M, int N,
imax = min(j+1-idiag, M);

for(i=0; i<imax; i++, ptr++) {
tmp = fabs(*ptr);
UPDATE( 1., tmp );

#ifdef COMPLEX
sumsq_update( 1, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
UPDATE( 1., tmp );
sumsq_update( 1, scale, sumsq, ptr );
#endif
}
}
Expand All @@ -126,13 +113,11 @@ int CORE_ztrssq(PLASMA_enum uplo, PLASMA_enum diag, int M, int N,
ptr = (double*) ( A + j * (LDA+1) + idiag );

for(i=j+idiag; i<M; i++, ptr++) {
tmp = fabs(*ptr);
UPDATE( 1., tmp );

#ifdef COMPLEX
sumsq_update( 1, scale, sumsq, ptr );

#if defined(PRECISION_z) || defined(PRECISION_c)
ptr++;
tmp = fabs(*ptr);
UPDATE( 1., tmp );
sumsq_update( 1, scale, sumsq, ptr );
#endif
}
}
Expand Down
Loading