/* mapfiles.c - We assume we are given files testfile0.txt, testfile1.txt, ..
 * of size respectively 512B, 2KB, 8KB, 32KB, 128KB, 512KB, 2MB.
 * We determine the time to compute their checksum reading the files and
 * mapping the files to memory. We print out the resulting statistics.
 * Compile with
 *   % gcc -o mapfiles mapfiles.c
 * Run with
 *   % mapfiles
 */

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <assert.h>

/* checksum on memory mapped file - 2*count is size in bytes */
static unsigned short checksum(unsigned short *buffer, int count)
{   
  unsigned long int csum = 0;
  while (count--) {
    csum += *buffer++;
    if (csum & 0xFFFF0000) { /* There is a carry */
      csum &= 0x0000FFFF;
      csum++;
    }
  }
  return (csum & 0xFFFF);
}

/* checksum on actual file - 2*count is size in bytes */
static unsigned short fchecksum(const char *filename, int count)
{ 
  enum {BUFSIZE = 32*1024};
  unsigned short buffer[BUFSIZE];
  unsigned long int csum = 0;
  int cursor = 0, n = 0;
  int fd = open(filename, O_RDONLY, 0);
  if (fd < 0) {
    perror("open");
    exit(1);
  }
  while (count--) {
    if (cursor % BUFSIZE == 0) {
      if ((n = read(fd, buffer, 2*BUFSIZE)) < 0) {
	perror("read");
	exit(1);
      } else if (n == 0) 
	break;
    }
    csum += buffer[cursor%BUFSIZE];
    cursor++;
    if (csum & 0xFFFF0000) { /* There is a carry */
      csum &= 0x0000FFFF;
      csum++;
    }
  }
  close(fd);
  return (csum & 0xFFFF);
}


/* returns the time in ms between two timevals */
static int timedif(struct timeval a, struct timeval b)
{
  return (a.tv_sec - b.tv_sec)*1000  + (a.tv_usec - b.tv_usec)/1000;
}


#define min(a,b) ((a)<(b))?(a):(b)
#define max(a,b) ((a)>(b))?(a):(b)


/* we consider files of size 512B, 2KB, 8KB, 32KB, 128KB, 512KB, 2MB */
enum {NSIZES=7, ITERATIONS=10};

int sizes[NSIZES] = {512, 4*512, 16*512, 64*512, 
		     256*512, 1024*512, 4096*512};
char *filenames[NSIZES] = {"testdata0.txt", "testdata1.txt", "testdata2.txt", 
		       "testdata3.txt", "testdata4.txt", "testdata5.txt", 
		       "testdata6.txt"};
int min_time[NSIZES];  /* Min times processing the files directly */
int max_time[NSIZES];  /* Max times processing the files directly */
double avg_time[NSIZES];/* Avg times processing the files directly */
int mint[NSIZES]; /* Times when mapping files to memory. */
int maxt[NSIZES];
double avgt[NSIZES];

int main()
{
  int          fdin;
  char         *src;
  int cs, cs1;      /* the checksum of a file */
  int iterations;   /* it will go from 0 to ITERATIONS-1 */
  int k;            /* it will go from 0 to NSIZES-1 */
  int delta;
  struct timeval starttime, now;

  for (k = 0; k < NSIZES; ++k) {
    mint[k] = 99999;  min_time[k] = 99999;
    maxt[k] = 0;      max_time[k] = 0;
    avgt[k] = 0.0;    avg_time[k] = 0.0;
  }
  for (iterations = 0; iterations < ITERATIONS; ++iterations) {
    for (k = 0; k < NSIZES; ++k) {
      gettimeofday(&starttime, 0);
      if ( (fdin = open(filenames[k], O_RDONLY)) < 0) {
	perror("opening file");
	exit(1);
      }
      if ( (src = mmap(0, sizes[k], PROT_READ,
		   MAP_FILE | MAP_SHARED, fdin, 0)) == (caddr_t) -1) {
	perror("mmap error");
	exit(1);
      }
      cs = checksum((unsigned short *)src, sizes[k]/2);
      if ( munmap(src, sizes[k]) != 0) {
	perror("munmap error");
	exit(1);
      }
      close(fdin);
      gettimeofday(&now, 0);
      delta = timedif(now, starttime);
      mint[k] = (delta<mint[k])?delta:mint[k];
      maxt[k] = (maxt[k]<delta)?delta:maxt[k];
      avgt[k] += delta;
      gettimeofday(&starttime, 0);
      cs1 = fchecksum(filenames[k], sizes[k]/2);
      gettimeofday(&now, 0);
      assert(cs == cs1); /* making sure we are computing the same value with the two methods */
      delta = timedif(now, starttime);
      min_time[k] = (delta<min_time[k])?delta:min_time[k];
      max_time[k] = (max_time[k]<delta)?delta:max_time[k];
      avg_time[k] += delta;
    }
  }
  printf("                MAP               |                I/O\n");
  printf("==================================|=============================\n");
  printf("   SIZE  MIN        AVG      MAX  |        MIN       AVG     MAX\n");
  for (k = 0; k < NSIZES; ++k)
    printf("%7d  %3d %10.2f  %7d  |       %3d %10.2f %7d \n", 
	   sizes[k], mint[k], avgt[k]/ITERATIONS, maxt[k],
		min_time[k], avg_time[k]/ITERATIONS, max_time[k]);
  exit(0);  
}



