Sorry for my poor English.
// C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat
// cl /O2 cl_rdtsc_cpuid_rdtscp.c /Fe: cl_rdtsc_cpuid_rdtscp.exe
#include <stdio.h>
#include <intrin.h>
#pragma intrinsic(__rdtsc)
#pragma intrinsic(__cpuid)
#pragma intrinsic(__rdtscp)
#define SIZE_OF_STAT 10000
#define BOUND_OF_LOOP 1000
#define UINT64_MAX (18446744073709551615ULL)
void __forceinline Filltimes(unsigned __int64 **times)
{
int cpu_info[4];
unsigned int aux;
int i, j;
unsigned __int64 start, end;
volatile int variable = 0;
__cpuid(cpu_info, 0);
start = __rdtsc();
end = __rdtscp(&aux);
__cpuid(cpu_info, 0);
__cpuid(cpu_info, 0);
start = __rdtsc();
end = __rdtscp(&aux);
__cpuid(cpu_info, 0);
for (j=0; j<BOUND_OF_LOOP; j++)
{
for (i =0; i<SIZE_OF_STAT; i++)
{
variable = 0;
__cpuid(cpu_info, 0);
start = __rdtsc();
end = __rdtscp(&aux);
__cpuid(cpu_info, 0);
if ( (end - start) < 0)
{
printf("\n\n>>>>>>>>>>>>>> CRITICAL ERROR IN TAKING THE TIME!!!!!!\n loop(%d) stat(%d) start = %llu, end = %llu, variable = %u\n", j, i, start, end, variable);
times[j][i] = 0;
}
else
{
times[j][i] = end - start;
}
}
}
return;
}
unsigned __int64 var_calc(unsigned __int64 *inputs, int size)
{
int i;
unsigned __int64 acc = 0, previous = 0, temp_var = 0;
for (i=0; i< size; i++)
{
if (acc < previous)
goto overflow;
previous = acc;
acc += inputs[i];
}
acc = acc * acc;
if (acc < previous)
goto overflow;
previous = 0;
for (i=0; i< size; i++)
{
if (temp_var < previous)
goto overflow;
previous = temp_var;
temp_var+= (inputs[i]*inputs[i]);
}
temp_var = temp_var * size;
if (temp_var < previous)
goto overflow;
temp_var =(temp_var - acc)/(((unsigned __int64)(size))*((unsigned __int64)(size)));
return (temp_var);
overflow:
printf("\n\n>>>>>>>>>>>>>> CRITICAL OVERFLOW ERROR IN var_calc!!!!!!\n\n");
return UINT64_MAX;
}
int hello_start(void)
{
int i = 0, j = 0, spurious = 0, k =0;
unsigned __int64 **times;
unsigned __int64 *variances;
unsigned __int64 *min_values;
unsigned __int64 max_dev = 0, min_time = 0, max_time = 0, prev_min =0, tot_var=0, max_dev_all=0, var_of_vars=0, var_of_mins=0;
printf("Loading hello module...\n");
times = malloc(BOUND_OF_LOOP*sizeof(unsigned __int64*));
if (!times)
{
printf("unable to allocate memory for times\n");
return 0;
}
for (j=0; j<BOUND_OF_LOOP; j++)
{
times[j] = malloc(SIZE_OF_STAT*sizeof(unsigned __int64));
if (!times[j])
{
printf("unable to allocate memory for times[%d]\n", j);
for (k=0; k<j; k++)
free(times[k]);
return 0;
}
}
variances = malloc(BOUND_OF_LOOP*sizeof(unsigned __int64));
if (!variances)
{
printf("unable to allocate memory for variances\n");
return 0;
}
min_values = malloc(BOUND_OF_LOOP*sizeof(unsigned __int64));
if (!min_values)
{
printf("unable to allocate memory for min_values\n");
return 0;
}
Filltimes(times);
for (j=0; j<BOUND_OF_LOOP; j++)
{
max_dev = 0;
min_time = 0;
max_time = 0;
for (i =0; i<SIZE_OF_STAT; i++)
{
if ((min_time == 0)||(min_time > times[j][i]))
min_time = times[j][i];
if (max_time < times[j][i])
max_time = times[j][i];
}
max_dev = max_time - min_time;
min_values[j] = min_time;
if ((prev_min != 0) && (prev_min > min_time))
spurious++;
if (max_dev > max_dev_all)
max_dev_all = max_dev;
variances[j] = var_calc(times[j], SIZE_OF_STAT);
tot_var += variances[j];
printf("loop_size:%d >>>> variance(cycles): %llu; max_deviation: %llu ;min time: %llu\n", j, variances[j], max_dev, min_time);
prev_min = min_time;
}
var_of_vars = var_calc(variances, BOUND_OF_LOOP);
var_of_mins = var_calc(min_values, BOUND_OF_LOOP);
printf("\n total number of spurious min values = %d", spurious);
printf("\n total variance = %llu", (tot_var/BOUND_OF_LOOP));
printf("\n absolute max deviation = %llu", max_dev_all);
printf("\n variance of variances = %llu", var_of_vars);
printf("\n variance of minimum values = %llu", var_of_mins);
for (j=0; j<BOUND_OF_LOOP; j++)
{
free(times[j]);
}
free(times);
free(variances);
free(min_values);
return 0;
}
void hello_end(void)
{
printf("\n\nGoodbye Mr.\n");
}
int main()
{
hello_start();
hello_end();
return 0;
}