#include <stdlib.h> 
#include <stdio.h> 
#include <errno.h> 
#include <netinet/in.h>
#include <sys/types.h> 
#include <sys/stat.h> 
#include <sys/mman.h> 
#include <fcntl.h>
#include <string.h>

#include "my_getopt.h"

#include <list>
#include <vector>
#include <set>
#include <string>

#include "page.h"
#include "dict.h"
#include "record.h"

int opt_ignore_blobs = 0;
int opt_mode = 0;
int opt_verbose = 0;
int opt_linear = 0;
int opt_quiet = 0;
int opt_skip_records = 0;

enum { OPT_MODE_ANALYSE=1, OPT_MODE_DUMP, OPT_MODE_DUMP_DEFS, OPT_MODE_DUMP_DEFS_SHORT } OPT_MODES;

enum { OPT_HELP=1000, OPT_ANALYSE, OPT_DUMP, OPT_DUMP_DEFS, OPT_DUMP_DEFS_SHORT, OPT_IGNORE_BLOBS, OPT_VERBOSE, OPT_LINEAR,
OPT_QUIET, OPT_SKIP_RECORDS } OPT_OPTIONS;

struct option long_options[] = {
  { "help", 0, NULL, OPT_HELP },
  { "analyse", 1, NULL, OPT_ANALYSE },
  { "dump", 1, NULL, OPT_DUMP },
  { "dump-table-defs", 0, NULL, OPT_DUMP_DEFS },
  { "dump-table-defs-short", 0, NULL, OPT_DUMP_DEFS_SHORT },
  { "verbose", 0, NULL, OPT_VERBOSE },
  { "linear", 0, NULL, OPT_LINEAR },
  { "quiet", 0, NULL, OPT_QUIET },
  { "ignore-blobs", 0, NULL, OPT_IGNORE_BLOBS },
  { "skip-records", 0, NULL, OPT_SKIP_RECORDS },
  };

#define VERSION "0.1"

void dump_clustered_index(int fd, std::string tablename)
{
  char *rec;
  int leftmost;
  int comp;
  char *pagedata;
  int nextpage = 0;
  struct stat statbuf;
  
  fstat(fd, &statbuf);
  
  INNOTABLE *table = dict_find_table_by_name(tablename);
  if(table == NULL) {
    printf("Table %s not found\n", tablename.c_str());
    return;
  }
  
  if(!opt_linear) {
    pagedata = page_read(fd, table->indexes[0]->pageno);
    
    if(opt_verbose) 
      printf("Finding first page: %08x ", table->indexes[0]->pageno);

    while(!page_is_leaf(pagedata)) {
      leftmost = page_get_leftmost(table->indexes[0]->nodecolumns, pagedata);
      if(opt_verbose)
        printf("%08x ", leftmost);
      page_free(pagedata);
      pagedata = page_read(fd, leftmost);
    }
    printf("\n");
  } else {
    pagedata = page_read(fd, nextpage);
  }  

  while(1) {
    // Dump the page if not in linear mode, or linear mode and page is an index page which is part of the index we're dumping (and is a leaf page)
    if(!opt_linear || (read_ulshort(pagedata + FIL_PAGE_TYPE) == 17855 && read_dulint(pagedata + PAGE_HEADER + PAGE_INDEX_ID) == table->indexes[0]->id && read_ulshort(pagedata + PAGE_HEADER + PAGE_LEVEL) == 0)) {
      comp = page_index_is_comp(pagedata);
    
      rec = rec_get_infimum(pagedata);
      rec = rec_get_next(comp, pagedata, rec);
      
      while(rec) {
        INNORECORD *record;
        if(comp) {
          record = rec_read_new(table->indexes[0]->columns, rec, pagedata);
        } else {
          record = rec_read_old(rec, pagedata);
        }

        if(record == NULL)
          break; // record broken, skip page
        
        dump_record_short(fd, record, table);
        delete record;
        rec = rec_get_next(comp, pagedata, rec);
      }
    }

    if(opt_linear) {
      nextpage++;
      if(nextpage << PAGE_SIZE_SHIFT >= statbuf.st_size)
        break;
    } else {
      nextpage = page_get_next(pagedata);
      if(nextpage == 0 || nextpage == -1)
        break;
    }
    
    page_free(pagedata);
    pagedata = page_read(fd, nextpage);
    if(!pagedata)
      break;
  }
}

void analyse_clustered_index(int fd, std::string tablename)
{
  char *rec;
  int nextpage;
  int rootlevel;
  unsigned long long pages = 0;
  unsigned long long records = 0;
  unsigned long long realrecords = 0;
  unsigned long long garbage = 0;
  unsigned long long free = 0;
  unsigned long long sequential = 0;
  unsigned long long proximity = 0;
  unsigned long long blobpages = 0;
  unsigned long long blobsize = 0;
  unsigned long long blobs = 0;
  unsigned long long blobsequential = 0;
  unsigned long long datalen = 0;
  int prevpage = 0;
  int comp;
  
  INNOTABLE *table = dict_find_table_by_name(tablename);
  if(table == NULL) {
    printf("Table %s not found\n", tablename.c_str());
    return;
  }
  
  char *pagedata = page_read(fd, table->indexes[0]->pageno);
  comp = page_index_is_comp(pagedata);
  
  rootlevel = read_ulshort(pagedata + PAGE_HEADER + PAGE_LEVEL);
  
  nextpage = table->indexes[0]->pageno;
  
  printf("Finding first page: %08x ", nextpage);
  while(!page_is_leaf(pagedata)) {
    nextpage = page_get_leftmost(table->indexes[0]->nodecolumns, pagedata);
    printf("%08x ", nextpage);
    page_free(pagedata);
    pagedata = page_read(fd, nextpage);
  }
  printf("\n");
  
  prevpage = nextpage;

  printf("Pages used by %s: ", tablename.c_str());
  while(1) {
    printf("I%08X ", nextpage);
  
    pages++;
    
    records += read_ulshort(pagedata + PAGE_HEADER + PAGE_N_RECS);
    garbage += read_ulshort(pagedata + PAGE_HEADER + PAGE_GARBAGE);
    free += PAGE_SIZE - (read_ulshort(pagedata + PAGE_HEADER + PAGE_HEAP_TOP) & ~0x8000) - PAGE_DIR - (read_ulshort(pagedata + PAGE_HEADER + PAGE_N_DIR_SLOTS)*PAGE_DIR_SLOT_SIZE);
    
    prevpage = nextpage;
    nextpage = page_get_next(pagedata);

    if(!opt_skip_records) {
        // Loop through records searching for blob data
        comp = page_index_is_comp(pagedata);
        rec = rec_get_infimum(pagedata);
        rec = rec_get_next(comp, pagedata, rec);
        
        while(rec) {
          int c = 0;
          INNORECORD *record;
          realrecords++;
          if(comp)
            record = rec_read_new(table->indexes[0]->columns, rec, pagedata);
          else
            record = rec_read_old(rec, pagedata);
            
          std::vector<INNOFIELD>::iterator iterFields;
          
          for(iterFields = record->field.begin(); iterFields != record->field.end(); iterFields++, c++) {
            if(!iterFields->isnull) {
              datalen += iterFields->len;
              if(iterFields->isblob) {
                blobs++;
                char *blobpagedata;
                int bloboffset;
                int blobpagelen;
                int blobnextpage;
                  
                blobnextpage = iterFields->blobpage;
                bloboffset = iterFields->bloboffset;
                  
                if(!opt_ignore_blobs) {
                  while(1) {
                    // Follow blob path
                    blobpages++;
                      
                    printf("B%08X ", blobnextpage);
                    fflush(stdout);
                    
                    blobpagedata = page_read(fd, blobnextpage);
                    blobpagelen = blob_get_data_len(blobpagedata, bloboffset);
                    prevpage = blobnextpage;
                    blobnextpage = blob_get_next_page(blobpagedata, bloboffset);
                      
                    if(blobnextpage == prevpage + 1)
                      blobsequential++;
                      
                    // Next page always starts as beginning of page
                    bloboffset = FIL_PAGE_DATA;
                      
                    blobsize += blobpagelen;
                      
                    page_free(blobpagedata);
                    if(blobnextpage == -1)
                      break;
                  }
                }
              }
            }
          }
          delete record;
          rec = rec_get_next(comp, pagedata, rec);
        }
    }
    if(nextpage == 0 || nextpage == -1)
      break;

    if(prevpage == nextpage -1)
      sequential++;
      
    proximity+= abs(prevpage-nextpage);

    page_free(pagedata);
    pagedata = page_read(fd, nextpage);
  }

  printf("\n\n");
  printf("Total leaf pages: %lld (%lld bytes)\n", pages, (unsigned long long)pages*PAGE_SIZE);
  printf("Sequential pages: %lld (%.2f%% fragmented)\n", sequential+1, (float)100*(pages-sequential-1)/pages);
  printf("Free space in pages: %lld (%.2f%% full avg)\n", free+garbage, (1.0 - ((float)(free+garbage)/(pages*PAGE_FREE_EMPTY)))*100.0);
  printf("Records (Page count): %lld (%d bytes effective per record avg)\n", records, (unsigned int)((unsigned long long)pages*PAGE_SIZE)/records);
  printf("Actual records: %lld (should match page count records)\n", realrecords);
  printf("Garbage: %lld bytes\n", garbage);
  printf("Proximity: %.2f pages avg\n", (float)proximity/pages);
  printf("B-tree levels (including leaf): %d\n", rootlevel+1);
  printf("Blobs: %lld\n", blobs);
  printf("Blob pages: %lld (%lld bytes)\n", blobpages, (unsigned long long)blobpages*PAGE_SIZE);
  if(blobpages) {
    printf("Blob sequential pages: %lld (%.2f%% fragmented)\n", blobsequential, (float)100*(blobpages-blobs-blobsequential)/(blobpages-blobs));
    printf("Blob data size: %lld (%lld bytes per blob avg)\n", blobsize, blobsize / blobs);
    printf("Blob-to-Leaf ratio: %.2f\n", (float)blobpages / pages);  
  }
  printf("Total data size in records: %lld\n", datalen);
  printf("Total data size: %lld pages (%lld bytes)\n", blobpages + pages, (unsigned long long)(blobpages+pages)*PAGE_SIZE);
  printf("Overhead: %.2f%%\n", ((float)((blobpages+pages)*PAGE_SIZE)*100/datalen) -100);
}

void usage() {
  printf("Usage: innoinfo [options] <mode> <ibdatafile>\n");
  printf("\n");
  printf("  -h, --help                      This help page\n");
  printf("Modes:\n");
  printf("  -a, --analyse <tablename>       Analyse table statistics\n");
  printf("  -d, --dump    <tablename>       Dump table data\n");
  printf("      --dump-table-defs           Dump table definitions\n");
  printf("      --dump-table-defs-short     Dump table definiitions in short format\n");
  printf("Options:\n");
  printf("      --ignore-blobs              Ignore 'blob' data in tables\n");
  printf("  -v, --verbose                   Output just about everything\n");
  printf("  -q, --quiet                     Quiet mode\n");
  printf("      --linear                    Dump table data using 'linear' mode\n");
  printf("      --skip-records              Don't analyse actual records during 'analyse'\n");
  printf("\n");
  printf("Tablenames must be specified in the format database/table, eg\n");
  printf("mysql/users\n");
  printf("\n");
}


int main(int argc, char *argv[]) {
  int fd;
  int longind = 0;
  char *tablename;
  

  while(1) {
    int c = my_getopt_long(argc, argv, "ha:d:vq", long_options, NULL);
    
    if(c == -1)
      break;
      
    switch(c) {
    case 'h':
    case OPT_HELP:
      usage();
      exit(0);
    case 'a':
    case OPT_ANALYSE:
      opt_mode = OPT_MODE_ANALYSE;
      tablename = my_optarg;
      break;
    case 'd':
    case OPT_DUMP:
      opt_mode = OPT_MODE_DUMP;
      tablename = my_optarg;
      break; 
    case OPT_DUMP_DEFS:
      opt_mode = OPT_MODE_DUMP_DEFS;
      break;
    case OPT_DUMP_DEFS_SHORT:
      opt_mode = OPT_MODE_DUMP_DEFS_SHORT;
      break;
    case OPT_IGNORE_BLOBS:
      opt_ignore_blobs = 1;
      break;
    case 'v':
    case OPT_VERBOSE:
      opt_verbose = 1;
      break;
    case OPT_LINEAR:
      opt_linear = 1;
      break;
    case 'q':
    case OPT_QUIET:
      opt_quiet = 1;
      break;
    case OPT_SKIP_RECORDS:
      opt_skip_records = 1;
      break;
    }
  }
  
  if(!opt_quiet) {
    printf("Starting innoinfo version %s, copyright 2007 Steve Hardy, Zarafa BV, The Netherlands\n", VERSION);
    printf("This program comes with NO WARRANTY and is distributed under the GNU GPL.\n\n");
  }
  
  if(opt_mode == 0) {
    printf("You must specify a mode.\n");
    usage();
    exit(1);
  }
  
  if(my_optind == argc) {
    printf("Not enough parameters, you must specify the ibdata file\n");
    usage();
    exit(1);
  }
  
  fd = open(argv[my_optind], O_RDONLY);
  if(fd < 0) {
    fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno));
    exit(1);
  }
  
  // Get the dictionary header page
  if(opt_verbose) {
    printf("Reading dictionary page (page %d)\n", DICT_PAGE);
  }
  
  char *dict_hdr_page = page_read(fd, DICT_PAGE);
  
  char *dict_hdr = dict_hdr_page + DICT_HDR;
  
  if(opt_verbose) {
    printf("Dictionary header:\n");

    printf(" Max row id: %d\n", read_ulint(dict_hdr+DICT_HDR_ROW_ID));
    printf(" Max table id: %d\n", read_ulint(dict_hdr+DICT_HDR_TABLE_ID));
    printf(" Max index id: %d\n", read_ulint(dict_hdr+DICT_HDR_INDEX_ID));
    printf(" Max mix id: %d\n", read_ulint(dict_hdr+DICT_HDR_MIX_ID));
    printf(" Tables index root page: %d\n", read_ulint(dict_hdr+DICT_HDR_TABLES));
    printf(" Table ids index root page: %d\n", read_ulint(dict_hdr+DICT_HDR_TABLE_IDS));
    printf(" Columns index root page: %d\n", read_ulint(dict_hdr+DICT_HDR_COLUMNS));
    printf(" Indexes index root page: %d\n", read_ulint(dict_hdr+DICT_HDR_INDEXES));
    printf(" Fields index root page: %d\n", read_ulint(dict_hdr+DICT_HDR_FIELDS));
    printf("\n");
  }

  load_table_defs(fd, read_ulint(dict_hdr+DICT_HDR_TABLES));
  load_column_defs(fd, read_ulint(dict_hdr+DICT_HDR_COLUMNS));
  load_index_defs(fd, read_ulint(dict_hdr+DICT_HDR_INDEXES));
  load_index_field_defs(fd, read_ulint(dict_hdr+DICT_HDR_FIELDS));
  load_index_columns();

  switch(opt_mode) {
    case OPT_MODE_DUMP:
      dump_clustered_index(fd, tablename);
      break;
    case OPT_MODE_ANALYSE:  
      analyse_clustered_index(fd, tablename);
      break;
    case OPT_MODE_DUMP_DEFS:
      dump_table_defs();
      break;
    case OPT_MODE_DUMP_DEFS_SHORT:
      dump_table_defs_short();
      break;
    default:
      printf("Undefined mode?\n");
      break;
  }
}
