From 8819448e9d47bf5fcb0efa5d28b598f3f19c6dd2 Mon Sep 17 00:00:00 2001 From: David Date: Wed, 12 Jul 2017 16:25:19 -0300 Subject: [PATCH 1/6] run in debug by default --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8f809dd..d80bc82 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,8 @@ # LOCATION=/usr/local -CFLAGS=-Wall -g -I. -O2 -#CFLAGS=-Wall -ggdb -I. -O0 +#CFLAGS=-Wall -g -I. -O2 +CFLAGS=-Wall -ggdb -I. -O0 OBJ=undark OFILES=varint.o From a307bcd7056b2a6520d143fbb3b4bb6c2440b7f2 Mon Sep 17 00:00:00 2001 From: David Date: Wed, 12 Jul 2017 16:37:39 -0300 Subject: [PATCH 2/6] fix indentation+warnings, remove unused code --- undark.c | 625 ++++++++++++++++++++++++------------------------------- 1 file changed, 277 insertions(+), 348 deletions(-) diff --git a/undark.c b/undark.c index 6d32898..9e013fd 100644 --- a/undark.c +++ b/undark.c @@ -1,17 +1,17 @@ /** - * undark - generic data puller from SQLite DBs. - * - * Rather CPU intensive likely, relies on the correlation - * that the SQLite length of payload should be the same - * as the summation of the payload cell sizes. - * - * Written by Paul L Daniels (pldaniels@pldaniels.com) - * - * BSD Revised licence ( see LICENCE ) - * - * Original version released October 6, 2013 - * - */ +* undark - generic data puller from SQLite DBs. +* +* Rather CPU intensive likely, relies on the correlation +* that the SQLite length of payload should be the same +* as the summation of the payload cell sizes. +* +* Written by Paul L Daniels (pldaniels@pldaniels.com) +* +* BSD Revised licence ( see LICENCE ) +* +* Original version released October 6, 2013 +* +*/ #include #include @@ -156,15 +156,15 @@ char help[] = "-i [-d] [-v] [-V|--version] [--cellcount-min=] ; /*-----------------------------------------------------------------\ - Date Code: : 20131023-105927 - Function Name : UNDARK_init - Returns Type : int - ----Parameter List - 1. struct globals *g , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131023-105927 +Function Name : UNDARK_init +Returns Type : int +----Parameter List +1. struct globals *g , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -174,8 +174,8 @@ char help[] = "-i [-d] [-v] [-V|--version] [--cellcount-min=] int UNDARK_init( struct globals *g ) { /** - * Initialise our globals - */ + * Initialise our globals + */ g->page_size = 0; g->page_count = 0; g->page_number = 1; @@ -208,17 +208,17 @@ int UNDARK_init( struct globals *g ) { /*-----------------------------------------------------------------\ - Date Code: : 20131023-105933 - Function Name : UNDARK_parse_parameters - Returns Type : int - ----Parameter List - 1. int argc, - 2. char **argv, - 3. struct globals *g , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131023-105933 +Function Name : UNDARK_parse_parameters +Returns Type : int +----Parameter List +1. int argc, +2. char **argv, +3. struct globals *g , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -230,9 +230,9 @@ int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) { int param; /** - * We need at least some additional parameters - * - */ + * We need at least some additional parameters + * + */ if (argc < 2) { fprintf(stderr,"%s", help); exit(1); @@ -240,9 +240,9 @@ int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) { /** - * Decode the input parameters. - * Yes, I know, I should do this using gnu params etc. - */ + * Decode the input parameters. + * Yes, I know, I should do this using gnu params etc. + */ for (param = 1; param < argc; param++) { char *p = argv[param]; @@ -260,7 +260,7 @@ int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) { } } else if (strncmp(p,"--", 2) == 0) { - DEBUG fprintf(stderr,"Parameter: '%s' %d\n", p, strlen(PARAM_BLOB_SIZE_LIMIT)); + DEBUG fprintf(stderr,"Parameter: '%s' %lu\n", p, strlen(PARAM_BLOB_SIZE_LIMIT)); // extended parameters if (strncmp(p,PARAM_VERSION, strlen(PARAM_VERSION))==0) { @@ -341,15 +341,15 @@ int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) { /*-----------------------------------------------------------------\ - Date Code: : 20131006-121932 - Function Name : to_signed_byte - Returns Type : char - ----Parameter List - 1. unsigned char value, - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131006-121932 +Function Name : to_signed_byte +Returns Type : char +----Parameter List +1. unsigned char value, +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: Converts 2's compliment byte to signed integer @@ -383,16 +383,16 @@ long int to_signed_long( unsigned long int value ) { /*-----------------------------------------------------------------\ - Date Code: : 20131002-220244 - Function Name : tdump - Returns Type : int - ----Parameter List - 1. char *p, - 2. uint16_t l , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131002-220244 +Function Name : tdump +Returns Type : int +----Parameter List +1. char *p, +2. uint16_t l , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -413,16 +413,16 @@ int tdump( char *p, uint16_t l ) { /*-----------------------------------------------------------------\ - Date Code: : 20131006-122020 - Function Name : sqltdump - Returns Type : int - ----Parameter List - 1. char *p, - 2. uint16_t l , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131006-122020 +Function Name : sqltdump +Returns Type : int +----Parameter List +1. char *p, +2. uint16_t l , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: Dumps text in a SQL friendly format ( doubling of single quotes ) @@ -447,16 +447,16 @@ int sqltdump( char *p, uint16_t l ) { /*-----------------------------------------------------------------\ - Date Code: : 20131007-004650 - Function Name : blob_dump - Returns Type : int - ----Parameter List - 1. unsigned char *p, - 2. uint16_t l , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131007-004650 +Function Name : blob_dump +Returns Type : int +----Parameter List +1. unsigned char *p, +2. uint16_t l , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -479,16 +479,16 @@ int blob_dump( unsigned char *p, uint16_t l ) { /*-----------------------------------------------------------------\ - Date Code: : 20131002-220250 - Function Name : hdump - Returns Type : int - ----Parameter List - 1. char *p, - 2. uint16_t l , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131002-220250 +Function Name : hdump +Returns Type : int +----Parameter List +1. char *p, +2. uint16_t l , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: Combo hex + text dump, 16 byte wide rows @@ -551,17 +551,17 @@ int hdump( unsigned char *p, uint16_t length, char *msg ) { /*-----------------------------------------------------------------\ - Date Code: : 20131007-184003 - Function Name : blob_dump_to_file - Returns Type : int - ----Parameter List - 1. struct globals *glob, - 2. char *p, - 3. size_t l , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131007-184003 +Function Name : blob_dump_to_file +Returns Type : int +----Parameter List +1. struct globals *glob, +2. char *p, +3. size_t l , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -574,12 +574,12 @@ int blob_dump_to_file( struct globals *g, char *p, size_t l ) { char fn[1024]; snprintf(fn, sizeof(fn), "%d.blob", g->blob_count); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Writing %d bytes to %s\n", FL , l, fn ); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Writing %lu bytes to %s\n", FL , l, fn ); f = open(fn, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR ); if (!f) { fprintf(stderr,"Cannot open %s (%s)\n", fn, strerror(errno)); return 1; } written = write(f, p, l); if ( written != l ) { - fprintf(stderr,"Wrote %d of %d bytes to %s ( %s )\n", written, l, fn, strerror(errno)); + fprintf(stderr,"Wrote %ld of %ld bytes to %s ( %s )\n", written, l, fn, strerror(errno)); close(f); return 1; } @@ -591,17 +591,17 @@ int blob_dump_to_file( struct globals *g, char *p, size_t l ) { /*-----------------------------------------------------------------\ - Date Code: : 20131003-223556 - Function Name : *bstrstr - Returns Type : char - ----Parameter List - 1. char *needle, - 2. char *haystack, - 3. char *limit , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131003-223556 +Function Name : *bstrstr +Returns Type : char +----Parameter List +1. char *needle, +2. char *haystack, +3. char *limit , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: Searches for the needle among a haystack possibly containing \0 delimeted data. @@ -640,16 +640,16 @@ char *bstrstr( char *haystack, char *needle, char *limit ) { /*-----------------------------------------------------------------\ - Date Code: : 20131004-175721 - Function Name : decode_row_meta - Returns Type : int - ----Parameter List - 1. uint8_t *p, - 2. struct sql_payload *payload , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131004-175721 +Function Name : decode_row_meta +Returns Type : int +----Parameter List +1. uint8_t *p, +2. struct sql_payload *payload , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: Decodes the payload header data so that we can then later @@ -771,7 +771,7 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl plh_ep += payload->header_size; // if we got a sane value, then we can use this for the full decode size ( includes the size of the first varint telling us the size ) - DEBUG { fprintf(stdout,"[L:%lld][id:%lld][PLHz:%lld]", payload->length, payload->rowid, payload->header_size); } + DEBUG { fprintf(stdout,"[L:%lu][id:%lu][PLHz:%lu]", payload->length, payload->rowid, payload->header_size); } t = 0; offset = 0; @@ -807,7 +807,7 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl offset += payload->cells[t].s; if (offset > payload->length) return 0; - DEBUG { fprintf(stdout,"[%d:%d:%d-%d(%d)]", t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o, plh_ep -p ); } + DEBUG { fprintf(stdout,"[%d:%d:%d-%d(%ld)]", t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o, plh_ep -p ); } if (p >= plh_ep) break; t++; @@ -817,12 +817,12 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl if (p == plh_ep) { DEBUG { - fprintf(stdout,"DEBUG: Payload head size match. (%d =? %d)\n ", p -base,plh_ep -base); + fprintf(stdout,"DEBUG: Payload head size match. (%ld =? %ld)\n ", p -base,plh_ep -base); fprintf(stdout,"DEBUG: Data size by cell meta sum = %d\n ", offset ); } } else { DEBUG { - fprintf(stdout,"DEBUG: Payload scan end point, and predicted end point didn't match, difference %d \n", p -plh_ep ); + fprintf(stdout,"DEBUG: Payload scan end point, and predicted end point didn't match, difference %ld \n", p -plh_ep ); } } @@ -835,8 +835,8 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl if (mode == DECODE_MODE_FREESPACE) { /** there can often be multiple entries within freespace, so we have to be - * a little looser with our acceptance criterion - */ + * a little looser with our acceptance criterion + */ if (offset <= payload->length) { DEBUG fprintf(stdout,"%s:%d:DEBUG: FREESPACE SUBMATCH FOUND ( %u of %lu used )\n", FL , offset, (long unsigned int) payload->length); return (offset +payload->header_size +4); @@ -853,10 +853,10 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl /*-----------------------------------------------------------------\ - Date Code: : 20150723-210259 - Function Name : ntonll - Returns Type : uint64_t - ----Parameter List +Date Code: : 20150723-210259 +Function Name : ntonll +Returns Type : uint64_t + ----Parameter List 1. uint64_t value, ------------------ Exit Codes : @@ -871,27 +871,23 @@ Side Effects : uint64_t ntohll(uint64_t value) { // hdump( &value, 8, "FP: "); - if (1==ntohl(1)) { - return value; - } else { - return ((ntohl((value) & 0xFFFFFFFF) << 32) | ntohl((value) >> 32)); - } + return value; } /*-----------------------------------------------------------------\ - Date Code: : 20131008-182215 - Function Name : dump_row - Returns Type : int - ----Parameter List - 1. struct globals *glob, - 2. char *p, - 3. char *data_endpoint, - 4. struct sql_payload *payload, - 5. int decode , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131008-182215 +Function Name : dump_row +Returns Type : int +----Parameter List +1. struct globals *glob, +2. char *p, +3. char *data_endpoint, +4. struct sql_payload *payload, +5. int decode , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -969,64 +965,64 @@ int dump_row( struct globals *g, char *base, char *data_endpoint, struct sql_pay case 0: fprintf(stdout,"NULL"); break; case 1: fprintf(stdout,"x%d", to_signed_byte(*(payload->mapped_data +payload->cells[t].o)) ); break; case 2: { - uint16_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 2 ); - fprintf(stdout,"%d" , to_signed_int(ntohs(n))); - } - break; + uint16_t n; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 2 ); + fprintf(stdout,"%d" , to_signed_int(ntohs(n))); + } + break; case 3: { - uint32_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 3 ); - fprintf(stdout,"%ld", to_signed_long(ntohl(n))); - } - break; + uint32_t n; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 3 ); + fprintf(stdout,"%ld", to_signed_long(ntohl(n))); + } + break; case 4: { - uint32_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 4 ); - fprintf(stdout,"%ld", to_signed_long(ntohl(n))); - } - break; + uint32_t n; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 4 ); + fprintf(stdout,"%ld", to_signed_long(ntohl(n))); + } + break; case 5: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break; case 6: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break; case 7: - { - uint64_t n; + { + uint64_t n; memcpy(&n, payload->mapped_data +payload->cells[t].o, 8 ); ldf = (long double)ntohll(n); - fprintf(stdout,"%LF",ldf); - } - break; + fprintf(stdout,"%LF",ldf); + } + break; case 8: fprintf(stdout,"0" ); break; case 9: fprintf(stdout,"1" ); break; case 12: - if ( g->report_blobs) { - if (payload->cells[t].s < g->blob_size_limit) { - DEBUG fprintf(stdout,"%s:%d:DEBUG:Not Dumping data to blob file, keeping in CSV\n", FL ); - blob_dump((unsigned char *) (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); - } else { - // dump the blob to a file. - DEBUG fprintf(stdout,"%s:%d:DEBUG:Dumping data to %d.blob [%d bytes]\n", FL ,g->blob_count, payload->cells[t].s); - blob_dump_to_file( g, (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); - DEBUG fprintf(stdout,"\"%d.blob\"", g->blob_count); - } - } - g->blob_count++; - break; + if ( g->report_blobs) { + if (payload->cells[t].s < g->blob_size_limit) { + DEBUG fprintf(stdout,"%s:%d:DEBUG:Not Dumping data to blob file, keeping in CSV\n", FL ); + blob_dump((unsigned char *) (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); + } else { + // dump the blob to a file. + DEBUG fprintf(stdout,"%s:%d:DEBUG:Dumping data to %d.blob [%d bytes]\n", FL ,g->blob_count, payload->cells[t].s); + blob_dump_to_file( g, (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); + DEBUG fprintf(stdout,"\"%d.blob\"", g->blob_count); + } + } + g->blob_count++; + break; case 13: - DEBUG fprintf(stdout,"%s:%d:DEBUG: Dumping text-13\n", FL ); - sqltdump( payload->mapped_data +payload->cells[t].o, payload->cells[t].s ); - break; + DEBUG fprintf(stdout,"%s:%d:DEBUG: Dumping text-13\n", FL ); + sqltdump( payload->mapped_data +payload->cells[t].o, payload->cells[t].s ); + break; default: - fprintf(stderr,"Invalid cell type '%d'", payload->cells[t].t); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Invalid cell type '%d'", FL, payload->cells[t].t); - DEBUG hdump( (unsigned char *) base, 128, "Invalid cell type" ); - return 0; - break; + fprintf(stderr,"Invalid cell type '%d'", payload->cells[t].t); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Invalid cell type '%d'", FL, payload->cells[t].t); + DEBUG hdump( (unsigned char *) base, 128, "Invalid cell type" ); + return 0; + break; } // switch cell type } @@ -1047,16 +1043,16 @@ int dump_row( struct globals *g, char *base, char *data_endpoint, struct sql_pay /*-----------------------------------------------------------------\ - Date Code: : 20131004-211659 - Function Name : *find_next_sms - Returns Type : char - ----Parameter List - 1. char *s, - 2. char *end_point , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131004-211659 +Function Name : *find_next_sms +Returns Type : char +----Parameter List +1. char *s, +2. char *end_point , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: Finds rows within a block. @@ -1083,8 +1079,8 @@ char *find_next_row( struct globals *g, char *s, char *end_point, char *global_s fflush(stdout); /** If we're only wanting the removed, no-key-value rows, then - * continue to the next row - */ + * continue to the next row + */ if ((g->removed_only)&&(row >= 0)) { p++; continue; @@ -1126,16 +1122,16 @@ char *find_next_row( struct globals *g, char *s, char *end_point, char *global_s /*-----------------------------------------------------------------\ - Date Code: : 20131002-220317 - Function Name : main - Returns Type : int - ----Parameter List - 1. int argc, - 2. char **argv , - ------------------ - Exit Codes : - Side Effects : - -------------------------------------------------------------------- +Date Code: : 20131002-220317 +Function Name : main +Returns Type : int +----Parameter List +1. int argc, +2. char **argv , +------------------ +Exit Codes : +Side Effects : +-------------------------------------------------------------------- Comments: -------------------------------------------------------------------- @@ -1151,14 +1147,14 @@ int main( int argc, char **argv ) { int stat_result; /** - * Set up our global struct. - * - * We do this as a local var, rather than global so that it forces - * us to pass it through the functions, rather than _assuming_ it's - * available globally, which makes it a lot easier to migrate things - * to other libs/modules later - * - */ + * Set up our global struct. + * + * We do this as a local var, rather than global so that it forces + * us to pass it through the functions, rather than _assuming_ it's + * available globally, which makes it a lot easier to migrate things + * to other libs/modules later + * + */ g = &globo; @@ -1167,9 +1163,9 @@ int main( int argc, char **argv ) { UNDARK_parse_parameters( argc, argv, g ); /** - * Check our input file sanity - * - */ + * Check our input file sanity + * + */ stat_result = stat( g->input_file, &st ); if (stat_result != 0) { fprintf(stderr,"ERROR: Cannot access input file '%s' ( %s )\n", g->input_file, strerror(errno)); @@ -1178,10 +1174,10 @@ int main( int argc, char **argv ) { /** - * Map our input file to memory, makes it a lot easier - * to jump around if we need to and saves us having to - * handle buffer limits - leave it to the OS to manage :) - */ + * Map our input file to memory, makes it a lot easier + * to jump around if we need to and saves us having to + * handle buffer limits - leave it to the OS to manage :) + */ fd = open( g->input_file, O_RDONLY ); g->db_size = st.st_size; g->db_origin = mmap( NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0 ); @@ -1190,25 +1186,25 @@ int main( int argc, char **argv ) { //fprintf(stderr,"DB origin: %p\nDB end: %p\n", g->db_origin, g->db_end ); /** - * Start decoding the database - * - * Though it's not really required for us to care about the - * SQLite page sizes, it can be useful in case we get boundary - * situations and try to follow the data across a page - * - * If the page size is already set via parameter, then skip - * - */ + * Start decoding the database + * + * Though it's not really required for us to care about the + * SQLite page sizes, it can be useful in case we get boundary + * situations and try to follow the data across a page + * + * If the page size is already set via parameter, then skip + * + */ if (g->page_size == 0) { p = g->db_origin +16; g->page_size = (*(p+1)) | ((*p)<<8); } /** - * Get the number of pages that are supposed to be in the database, though - * we can ignore this and simply parse through the whole DB page at a time - * until we reach the end - */ + * Get the number of pages that are supposed to be in the database, though + * we can ignore this and simply parse through the whole DB page at a time + * until we reach the end + */ p = g->db_origin +28; memcpy( &g->page_count, g->db_origin +28, 4 ); // copy the page count from the header g->page_count = ntohl( g->page_count ); // convert to local format @@ -1216,9 +1212,9 @@ int main( int argc, char **argv ) { DEBUG fprintf(stdout,"Pagesize: %u, Pagecount: %u\n", g->page_size, g->page_count); /** - * Get the free list meta data - * - */ + * Get the free list meta data + * + */ memcpy( &g->freelist_first_page, g->db_origin +32, 4 ); // copy the page count from the header g->freelist_first_page = ntohl( g->freelist_first_page ); DEBUG fprintf(stdout,"First page of freelist trunk: %d\n", g->freelist_first_page ); @@ -1229,76 +1225,9 @@ int main( int argc, char **argv ) { /** - * Get the actual free list pages - * - */ - if (0) { - if (g->freelist_page_count) { - g->freelist_pages = malloc( (g->freelist_page_count +1) *sizeof(uint32_t) ); - if (!g->freelist_pages) { - fprintf(stderr,"ERROR: Cannot allocate memory to build page free list\n"); - exit(1); - } else { - uint32_t next_page; - uint32_t pli; - - next_page = g->freelist_first_page; - g->freelist_pages[0] = next_page; - g->freelist_pages[1] = 0; - pli = 1; - if ( pli < g->freelist_page_count ) { - do { - uint32_t tmp_page, leaf_page_count; - char *fp, *current_page_endpoint; - uint32_t jump; - - jump = ((next_page-2) *g->page_size); - fp = g->db_origin +jump; - current_page_endpoint = fp +g->page_size; - fprintf(stdout,"Freelist - current trunk page = %d [ offset: %X ]\n", next_page, jump); - hdump((unsigned char*)fp, g->page_size, "Current trunk page"); - DEBUG fflush(stdout); - - memcpy( &tmp_page, fp, sizeof(uint32_t)); - tmp_page = ntohl(tmp_page); - fp += sizeof(uint32_t); - DEBUG fprintf(stdout,"Next trunk page (if any): %d\n",tmp_page); - DEBUG fflush(stdout); - - memcpy( &leaf_page_count, fp, sizeof(uint32_t)); - leaf_page_count = ntohl(leaf_page_count); - fp += sizeof(uint32_t); - DEBUG fprintf(stdout,"Leaf page count: %d\n",leaf_page_count); - DEBUG fflush(stdout); - - //while ((pli <= g->freelist_page_count)&&( fp < current_page_endpoint )) { - while (( fp < current_page_endpoint )&&( leaf_page_count-- )) { - hdump((unsigned char*)fp, 16, "Next free page possible"); - memcpy( &(g->freelist_pages[pli]), fp, sizeof(uint32_t)); - g->freelist_pages[pli] = ntohl( g->freelist_pages[pli] ); - DEBUG fprintf(stdout, "Next free page[%d]: %d\n", pli, g->freelist_pages[pli]); - if (g->freelist_pages[pli] == 0) { - fprintf(stdout,"End of freelist detected\n"); - fflush(stdout); - break; - } - fflush(stdout); - pli++; - fp+= sizeof(uint32_t); - } - - next_page = tmp_page; - } while (next_page > 0); - fprintf(stdout,"Freepages - END\n"); - fflush(stdout); - } - } // if there were more than one page - } - - } - - - + * Get the actual free list pages + * + */ g->db_cfp = g->db_cpp = g->db_origin; /* @@ -1306,7 +1235,7 @@ int main( int argc, char **argv ) { g->freelist_pages_current_index = 0; data = f+((g->freelist_pages[g->freelist_pages_current_index]-1) *g->page_size); } - */ + */ DEBUG fprintf(stdout,"%s:%d:DEBUG: Commence decoding data\n", FL ); fflush(stdout); @@ -1323,7 +1252,7 @@ int main( int argc, char **argv ) { /* process the block, mostly this is just removing any 0-bytes from the block so our strstr() calls aren't prematurely terminated. - */ + */ DEBUG { char *p; size_t l; @@ -1364,11 +1293,11 @@ int main( int argc, char **argv ) { leaf.page_byte = 13; /** - * Get freeblock offset and determine if we have a free block in this - * page that needs to be inspected. This is one of the more commonly - * needed parts of data for our row recovery - * - */ + * Get freeblock offset and determine if we have a free block in this + * page that needs to be inspected. This is one of the more commonly + * needed parts of data for our row recovery + * + */ memcpy( &(leaf.freeblock_offset), (g->db_cfp +1), 2 ); leaf.freeblock_offset = ntohs( leaf.freeblock_offset ); if (leaf.freeblock_offset > 0) { @@ -1420,18 +1349,18 @@ int main( int argc, char **argv ) { ); /** - * If we're wanting free block sourced data, then simply jump - * to the start of the free block space and commence the searching - * in the next section ( find_next_row ). - * - * After this the g->db_cfp pointer should be sitting on the first - * varint of the payload header which defines the header length - * (inclusive) - * - * Detecting rows in the freeblocks is done differently to the - * normal data, so - * - */ + * If we're wanting free block sourced data, then simply jump + * to the start of the free block space and commence the searching + * in the next section ( find_next_row ). + * + * After this the g->db_cfp pointer should be sitting on the first + * varint of the payload header which defines the header length + * (inclusive) + * + * Detecting rows in the freeblocks is done differently to the + * normal data, so + * + */ if (g->freelist_space_only) { if ((leaf.freeblock_offset > 0) && (leaf.freeblock_size > 0)) { @@ -1480,18 +1409,18 @@ int main( int argc, char **argv ) { } /** - * NOTE: Free #pages# are different to freeblocks within a page - * - * - if ((g->freelist_space_only)&&(g->freelist_page_count)) { - g->freelist_pages_current_index++; - if (g->freelist_pages_current_index > g->freelist_page_count) { - DEBUG fprintf(stdout,"%s:%d:DEBUG:Out of freespace pages, exiting search\n", FL ); - break; - } - data = f+((g->freelist_pages[g->freelist_pages_current_index] -1) *g->page_size); - } else { - */ + * NOTE: Free #pages# are different to freeblocks within a page + * + * + if ((g->freelist_space_only)&&(g->freelist_page_count)) { + g->freelist_pages_current_index++; + if (g->freelist_pages_current_index > g->freelist_page_count) { + DEBUG fprintf(stdout,"%s:%d:DEBUG:Out of freespace pages, exiting search\n", FL ); + break; + } + data = f+((g->freelist_pages[g->freelist_pages_current_index] -1) *g->page_size); + } else { + */ { g->db_cpp += g->page_size; g->page_number++; From 8e8cfd35194fcc07661b040b78e7c48644204569 Mon Sep 17 00:00:00 2001 From: David Date: Sat, 15 Jul 2017 14:59:13 -0300 Subject: [PATCH 3/6] add example invokation --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d80bc82..52276ad 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ LOCATION=/usr/local #CFLAGS=-Wall -g -I. -O2 -CFLAGS=-Wall -ggdb -I. -O0 +CFLAGS=-Wall -ggdb -I. -Og OBJ=undark OFILES=varint.o @@ -26,3 +26,6 @@ install: ${OBJ} clean: rm -f *.o *core ${OBJ} + +run: + ./undark --cellcount-min=50 --cellcount-max=60 --no-blobs --rowsize-min=420 --rowsize-max=1000 -i ../sms.db >sms-data.csv From 402e74a5c77e7574f134b88b50ecbbf6379cf26e Mon Sep 17 00:00:00 2001 From: David Date: Sat, 15 Jul 2017 14:59:46 -0300 Subject: [PATCH 4/6] check if we've gone over the limit and exit --- undark.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/undark.c b/undark.c index 9e013fd..c8a34e3 100644 --- a/undark.c +++ b/undark.c @@ -1286,7 +1286,7 @@ int main( int argc, char **argv ) { leaf.page_number = g->page_number; /* Decode the page header */ - if (*(g->db_cfp) == 13) { + if (*(g->db_cfp) == 13) { DEBUG fprintf(stdout,"%s:%d:DEBUG: Decoding page header for page %d\n", FL , g->page_number ); fflush(stdout); @@ -1396,7 +1396,7 @@ int main( int argc, char **argv ) { if (row > g->db_cpp_limit) fprintf(stdout,"ERROR: beyond end point\n"); if (row < g->db_cfp) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row location not in g->db_cfp page\n", FL ); if (row == NULL) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row has been returned as NULL\n", FL ); - DEBUG fprintf(stdout,"%s:%d:DEBUG: ROW found at offset: %ld\n", FL, row-g->db_cfp); + DEBUG fprintf(stdout,"%s:%d:DEBUG: ROW found at offset: %lu\n", FL, row-g->db_cfp); } else { break; @@ -1426,6 +1426,9 @@ int main( int argc, char **argv ) { g->page_number++; } + if (g->page_count < g->page_number) + break; + } // while (data < endpoint) close(fd); From e0b7bcbdabd24fce24f619d58bc40dfbb1124de3 Mon Sep 17 00:00:00 2001 From: David Date: Mon, 17 Jul 2017 14:41:05 -0300 Subject: [PATCH 5/6] update to 0.7.1 release. add some debugging to find out why rows with sizes between 4096 and 5300 fail --- undark.c | 1080 +++++++++++++++++++++++++++--------------------------- 1 file changed, 545 insertions(+), 535 deletions(-) diff --git a/undark.c b/undark.c index c8a34e3..03e3585 100644 --- a/undark.c +++ b/undark.c @@ -17,22 +17,17 @@ #include #include #include -//#include +#include #include #include #include #include -//#include +#include +//#include + #include #include #include -#ifndef _WIN32 -#include -#include -#else -#include -#include -#endif #include "varint.h" @@ -133,7 +128,7 @@ struct sqlite_leaf_header { }; -char version[] = "undark version 0.7, by Paul L Daniels ( pldaniels@pldaniels.com )\n"; +char version[] = "undark version 0.7.1, by Paul L Daniels ( pldaniels@pldaniels.com )\n"; char help[] = "-i [-d] [-v] [-V|--version] [--cellcount-min=] [--cellcount-max=] [--rowsize-min=] [--rowsize-max=] [--no-blobs] [--blob-size-limit=] [--page-size=] [--page-start=] [--page-end=] [--freespace] [--freespace-minimum=]\n" "\t-i: input SQLite3 format database\n" "\t-d: enable debugging output (very large dumps)\n" @@ -235,6 +230,7 @@ int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) { */ if (argc < 2) { fprintf(stderr,"%s", help); + fprintf(stderr,"Sizeof double = %ld, long double = %ld\n", sizeof(double), sizeof(long double)); exit(1); } @@ -260,7 +256,7 @@ int UNDARK_parse_parameters( int argc, char **argv, struct globals *g ) { } } else if (strncmp(p,"--", 2) == 0) { - DEBUG fprintf(stderr,"Parameter: '%s' %lu\n", p, strlen(PARAM_BLOB_SIZE_LIMIT)); + DEBUG fprintf(stderr,"Parameter: '%s' %d\n", p, (int)strlen(PARAM_BLOB_SIZE_LIMIT)); // extended parameters if (strncmp(p,PARAM_VERSION, strlen(PARAM_VERSION))==0) { @@ -574,7 +570,7 @@ int blob_dump_to_file( struct globals *g, char *p, size_t l ) { char fn[1024]; snprintf(fn, sizeof(fn), "%d.blob", g->blob_count); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Writing %lu bytes to %s\n", FL , l, fn ); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Writing %ld bytes to %s\n", FL , l, fn ); f = open(fn, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR ); if (!f) { fprintf(stderr,"Cannot open %s (%s)\n", fn, strerror(errno)); return 1; } written = write(f, p, l); @@ -707,7 +703,10 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl // If the payload size exceeds the page_size, then we have to do some more checking + // page size: 4096, payload->length: 4121 to 5229 CRASH if (payload->length > (g->page_size -35)) { + //printf("Payload length: %ld, Page size: %u\n", payload->length, g->page_size); + //printf("this payload > page size\n"); uint32_t tmp, ovp; int ovpi = 1; @@ -764,295 +763,325 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl ovpi++; } } - } // overflow handling - - if (payload->header_size > g->page_size) return 0; // sorry, no can do with the way we're playing this decoding game. - if (payload->header_size < 2) return 0; // need at least 2 bytes + } // overflow handling - plh_ep += payload->header_size; // if we got a sane value, then we can use this for the full decode size ( includes the size of the first varint telling us the size ) + if (payload->header_size > g->page_size) return 0; // sorry, no can do with the way we're playing this decoding game. + if (payload->header_size < 2) return 0; // need at least 2 bytes - DEBUG { fprintf(stdout,"[L:%lu][id:%lu][PLHz:%lu]", payload->length, payload->rowid, payload->header_size); } + plh_ep += payload->header_size; // if we got a sane value, then we can use this for the full decode size ( includes the size of the first varint telling us the size ) - t = 0; - offset = 0; + DEBUG { fprintf(stdout,"[L:%lld][id:%lld][PLHz:%lld]",(long long int) payload->length, (long long int)payload->rowid, (long long int)payload->header_size); } + t = 0; + offset = 0; + + + while (1) { + uint64_t s; + int vil; + + vil = varint_decode( &s, p, &p ); + + if (vil > 8) return 0; // no var int should be bigger than 8 bytes. + + payload->cells[t].t = s; // set the type + switch (s) { + case 0: s = 0; break; + case 1: s = 1; break; + case 2: s = 2; break; + case 3: s = 3; break; + case 4: s = 4; break; + case 5: s = 6; break; + case 6: case 7: s = 8; break; + case 8: case 9: s = 0; break; + case 10: case 11: DEBUG fprintf(stdout,"%s:%d:DEBUG: celltype 10/11 reserved, aborting row.\n",FL); s = 0; return 0; break; + default: + if ((s >= 12)&&((s&0x01)==0)) { payload->cells[t].t = 12; s = (s-12)/2; } + else if ((s >= 13)&&((s&0x01)==1)) { payload->cells[t].t = 13; s = (s-13)/2; } + break; + } - while (1) { - uint64_t s; - int vil; + payload->cells[t].s = s; // set the size/length + payload->cells[t].o = (plh_ep +offset) -base; + offset += payload->cells[t].s; + if (offset > payload->length) return 0; - vil = varint_decode( &s, p, &p ); + DEBUG { fprintf(stdout,"[%d:%d:%d-%d(%ld)]", t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o, plh_ep -p ); } - if (vil > 8) return 0; // no var int should be bigger than 8 bytes. + if (p >= plh_ep) break; + t++; + payload->cell_count++; + if ( t > g->cc_max ) return 0; + } // while decoding the cells - payload->cells[t].t = s; // set the type - switch (s) { - case 0: s = 0; break; - case 1: s = 1; break; - case 2: s = 2; break; - case 3: s = 3; break; - case 4: s = 4; break; - case 5: s = 6; break; - case 6: case 7: s = 8; break; - case 8: case 9: s = 0; break; - case 10: case 11: DEBUG fprintf(stdout,"%s:%d:DEBUG: celltype 10/11 reserved, aborting row.\n",FL); s = 0; return 0; break; - default: - if ((s >= 12)&&((s&0x01)==0)) { payload->cells[t].t = 12; s = (s-12)/2; } - else if ((s >= 13)&&((s&0x01)==1)) { payload->cells[t].t = 13; s = (s-13)/2; } - break; + if (p == plh_ep) { + DEBUG { + fprintf(stdout,"DEBUG: Payload head size match. (%ld =? %ld)\n ", p -base,plh_ep -base); + fprintf(stdout,"DEBUG: Data size by cell meta sum = %d\n ", offset ); + } + } else { + DEBUG { + fprintf(stdout,"DEBUG: Payload scan end point, and predicted end point didn't match, difference %ld \n", p -plh_ep ); + } } - payload->cells[t].s = s; // set the size/length - payload->cells[t].o = (plh_ep +offset) -base; - offset += payload->cells[t].s; - if (offset > payload->length) return 0; - - DEBUG { fprintf(stdout,"[%d:%d:%d-%d(%ld)]", t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o, plh_ep -p ); } - - if (p >= plh_ep) break; - t++; - payload->cell_count++; - if ( t > g->cc_max ) return 0; - } // while decoding the cells - - if (p == plh_ep) { - DEBUG { - fprintf(stdout,"DEBUG: Payload head size match. (%ld =? %ld)\n ", p -base,plh_ep -base); - fprintf(stdout,"DEBUG: Data size by cell meta sum = %d\n ", offset ); - } - } else { - DEBUG { - fprintf(stdout,"DEBUG: Payload scan end point, and predicted end point didn't match, difference %ld \n", p -plh_ep ); + if ( t < g->cc_min ) { + DEBUG fprintf(stdout,"%s:%d:DEBUG: cell count under the minimum, so aborting\n", FL ); + return 0; } - } - if ( t < g->cc_min ) { - DEBUG fprintf(stdout,"%s:%d:DEBUG: cell count under the minimum, so aborting\n", FL ); - return 0; - } + DEBUG fprintf(stdout,"Offset [%u] + headersize [%lu] = length check [%lu]... \n", offset, (unsigned long int)payload->header_size, (unsigned long int)payload->length); - DEBUG fprintf(stdout,"Offset [%u] + headersize [%lu] = length check [%lu]... \n", offset, (unsigned long int)payload->header_size, (unsigned long int)payload->length); + if (mode == DECODE_MODE_FREESPACE) { + /** there can often be multiple entries within freespace, so we have to be + * a little looser with our acceptance criterion + */ + if (offset <= payload->length) { + DEBUG fprintf(stdout,"%s:%d:DEBUG: FREESPACE SUBMATCH FOUND ( %u of %lu used )\n", FL , offset, (long unsigned int) payload->length); + return (offset +payload->header_size +4); + } + } - if (mode == DECODE_MODE_FREESPACE) { - /** there can often be multiple entries within freespace, so we have to be - * a little looser with our acceptance criterion - */ - if (offset <= payload->length) { - DEBUG fprintf(stdout,"%s:%d:DEBUG: FREESPACE SUBMATCH FOUND ( %u of %lu used )\n", FL , offset, (long unsigned int) payload->length); - return (offset +payload->header_size +4); + if (offset + payload->header_size == payload->length) { + DEBUG fprintf(stdout,"\nMATCH FOUND!\n"); + return 1; } - } - if (offset + payload->header_size == payload->length) { - DEBUG fprintf(stdout,"\nMATCH FOUND!\n"); - return 1; + return 0; } - return 0; -} - -/*-----------------------------------------------------------------\ -Date Code: : 20150723-210259 -Function Name : ntonll -Returns Type : uint64_t + /*-----------------------------------------------------------------\ + Date Code: : 20150723-210259 + Function Name : ntonll + Returns Type : uint64_t ----Parameter List 1. uint64_t value, ------------------ -Exit Codes : -Side Effects : --------------------------------------------------------------------- + Exit Codes : + Side Effects : + -------------------------------------------------------------------- Comments: -------------------------------------------------------------------- Changes: \------------------------------------------------------------------*/ -uint64_t ntohll(uint64_t value) { -// hdump( &value, 8, "FP: "); - return value; -} + uint64_t swap64(uint64_t x) { + uint8_t i; + uint64_t y ; + uint8_t *px, *py; -/*-----------------------------------------------------------------\ -Date Code: : 20131008-182215 -Function Name : dump_row -Returns Type : int -----Parameter List -1. struct globals *glob, -2. char *p, -3. char *data_endpoint, -4. struct sql_payload *payload, -5. int decode , ------------------- -Exit Codes : -Side Effects : --------------------------------------------------------------------- + px = (uint8_t *)&x; + py = (uint8_t *)&y; + + for (i=0; i<8; i++) { + *(py+i) = *(px +(7-i)); + } + return y; + } + + uint64_t ntohll(uint64_t value) { + uint64_t t; +// hdump( &value, 8, "\nUxx: "); + if (1==ntohl(1)) { + return value; + } else { + t = swap64(value); +// hdump( &t, 8, "\nSWP: "); + return t; + } + } + + /*-----------------------------------------------------------------\ + Date Code: : 20131008-182215 + Function Name : dump_row + Returns Type : int + ----Parameter List + 1. struct globals *glob, + 2. char *p, + 3. char *data_endpoint, + 4. struct sql_payload *payload, + 5. int decode , + ------------------ + Exit Codes : + Side Effects : + -------------------------------------------------------------------- Comments: -------------------------------------------------------------------- Changes: \------------------------------------------------------------------*/ -int dump_row( struct globals *g, char *base, char *data_endpoint, struct sql_payload *payload, int mode ) { - int t = 0; - int ovpi; - void *addr; - + int dump_row( struct globals *g, char *base, char *data_endpoint, struct sql_payload *payload, int mode ) { + int t = 0; + int ovpi; + void *addr; - DEBUG fprintf(stdout,"\n-DUMPING ROW------------------\n"); - DEBUG hdump((unsigned char *)base, 16, "Dump_row starting data"); - if ( payload->length > g->db_size ) { - DEBUG fprintf(stdout,"%s:%d:ERROR: Nonsensical payload length of %ld requested, ignoring.\n", FL, (long int)payload->length); - return -1; - } + DEBUG fprintf(stdout,"\n-DUMPING ROW------------------\n"); + DEBUG hdump((unsigned char *)base, 16, "Dump_row starting data"); - if (payload->overflow_pages[0] == 0) { - payload->mapped_data = base; - payload->mapped_data_endpoint = data_endpoint; - - } else { - payload->mapped_data = malloc( (payload->length +100) *sizeof(char) ); - if ( !payload->mapped_data ) { - fprintf(stderr,"%s:%d:ERROR: Cannot allocate %ld bytes for mapped data\n", FL, (long int)payload->length +100); + if ( payload->length > g->db_size ) { + DEBUG fprintf(stdout,"%s:%d:ERROR: Nonsensical payload length of %ld requested, ignoring.\n", FL, (long int)payload->length); return -1; } - DEBUG fprintf(stdout,"ALLOCATED %d bytes to mapped data\n", (int)(payload->length +100) ); - if (!payload->mapped_data){ fprintf(stderr,"ERROR: Cannot allocate %d bytes for payload\n", (int)(payload->length +1)); return 0; } - memset( payload->mapped_data, 'X', payload->length +1 ); - - // load in the first, default page. - DEBUG fprintf(stdout,"Copying data for initial page\n"); - memcpy(payload->mapped_data, base, data_endpoint -base ); - payload->mapped_data_endpoint = payload->mapped_data +(data_endpoint -base -4); - // DEBUG hdump( (unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data +4 ); - - // Load in the overflow pages (if any) - ovpi = 0; - while (payload->overflow_pages[ovpi]) { - DEBUG fprintf(stdout,"Copying data from file to memory for page %d to offset [%d]\n", payload->overflow_pages[ovpi], (int)(payload->mapped_data_endpoint -payload->mapped_data)); - - addr = g->db_origin +((payload->overflow_pages[ovpi]-1) *g->page_size) +4; //PLD:20141221-2240 segfault fix - if (( addr < (void *)g->db_origin) || ( addr > (void *)g->db_end)) { - DEBUG fprintf(stdout,"%s:%d:dump_row:ERROR: page seek request outside of boundaries of file (%p < %p > %p)\n", FL, g->db_origin, addr, g->db_end); + + if (payload->overflow_pages[0] == 0) { + payload->mapped_data = base; + payload->mapped_data_endpoint = data_endpoint; + + } else { + int msize = (payload->length +100)*sizeof(uint8_t); + printf("plength %ld, total: %d\n", payload->length, msize); + //__asm__("int $3"); + //FIXME: This malloc causes a double free / corruption when msize > 4096 + payload->mapped_data = malloc(msize); + if ( !payload->mapped_data ) { + fprintf(stderr,"%s:%d:ERROR: Cannot allocate %ld bytes for mapped data\n", FL, (long int)payload->length +100); return -1; } + DEBUG fprintf(stdout,"ALLOCATED %d bytes to mapped data\n", (int)(payload->length +100) ); + if (!payload->mapped_data){ fprintf(stderr,"ERROR: Cannot allocate %d bytes for payload\n", (int)(payload->length +1)); return 0; } + memset( payload->mapped_data, 'X', payload->length +1 ); - memcpy(payload->mapped_data_endpoint, addr, g->page_size -4); - payload->mapped_data_endpoint += g->page_size -4; + // load in the first, default page. + DEBUG fprintf(stdout,"Copying data for initial page\n"); + memcpy(payload->mapped_data, base, data_endpoint -base ); + payload->mapped_data_endpoint = payload->mapped_data +(data_endpoint -base -4); + // DEBUG hdump( (unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data +4 ); - // DEBUG hdump( (unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data ); + // Load in the overflow pages (if any) + ovpi = 0; + while (payload->overflow_pages[ovpi]) { + DEBUG fprintf(stdout,"Copying data from file to memory for page %d to offset [%d]\n", payload->overflow_pages[ovpi], (int)(payload->mapped_data_endpoint -payload->mapped_data)); - ovpi++; - } - } + addr = g->db_origin +((payload->overflow_pages[ovpi]-1) *g->page_size) +4; //PLD:20141221-2240 segfault fix + if (( addr < (void *)g->db_origin) || ( addr+4 > (void *)g->db_end)) { + DEBUG fprintf(stdout,"%s:%d:dump_row:ERROR: page seek request outside of boundaries of file (%p < %p > %p)\n", FL, g->db_origin, addr, g->db_end); + return -1; + } - DEBUG hdump((unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data, "Payload mapped data" ); + memcpy(payload->mapped_data_endpoint, addr, g->page_size -4); + payload->mapped_data_endpoint += g->page_size -4; - if (mode == DECODE_MODE_FREESPACE) { - t = 0; - fprintf(stdout,"-1"); - - } else t = -1; - - while (t <= payload->cell_count) { - long double ldf; - DEBUG fprintf(stdout,"%s:%d:DEBUG: Cell[%d], Type:%d, size:%d, offset:%d\n", FL , t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o); - if (t == -1) fprintf(stdout,"%ld", (long unsigned int) payload->rowid); - if (t>=0) { fprintf(stdout,","); - switch (payload->cells[t].t) { - case 0: fprintf(stdout,"NULL"); break; - case 1: fprintf(stdout,"x%d", to_signed_byte(*(payload->mapped_data +payload->cells[t].o)) ); break; - case 2: { - uint16_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 2 ); - fprintf(stdout,"%d" , to_signed_int(ntohs(n))); - } - break; + // DEBUG hdump( (unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data ); - case 3: { - uint32_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 3 ); - fprintf(stdout,"%ld", to_signed_long(ntohl(n))); - } - break; + ovpi++; + } + } - case 4: { - uint32_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 4 ); - fprintf(stdout,"%ld", to_signed_long(ntohl(n))); - } - break; + DEBUG hdump((unsigned char *)payload->mapped_data, payload->mapped_data_endpoint -payload->mapped_data, "Payload mapped data" ); + + if (mode == DECODE_MODE_FREESPACE) { + t = 0; + fprintf(stdout,"-1"); + + } else t = -1; + + while (t <= payload->cell_count) { + DEBUG fprintf(stdout,"%s:%d:DEBUG: Cell[%d], Type:%d, size:%d, offset:%d\n", FL , t, payload->cells[t].t, payload->cells[t].s, payload->cells[t].o); + if (t == -1) fprintf(stdout,"%ld", (long unsigned int) payload->rowid); + if (t>=0) { fprintf(stdout,","); + switch (payload->cells[t].t) { + case 0: fprintf(stdout,"NULL"); break; + case 1: fprintf(stdout,"x%d", to_signed_byte(*(payload->mapped_data +payload->cells[t].o)) ); break; + case 2: { + uint16_t n; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 2 ); + fprintf(stdout,"%d" , to_signed_int(ntohs(n))); + } + break; - case 5: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break; - case 6: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break; - case 7: - { - uint64_t n; - memcpy(&n, payload->mapped_data +payload->cells[t].o, 8 ); - ldf = (long double)ntohll(n); - fprintf(stdout,"%LF",ldf); - } - break; + case 3: { + uint32_t n; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 3 ); + fprintf(stdout,"%ld", to_signed_long(ntohl(n))); + } + break; - case 8: fprintf(stdout,"0" ); break; - case 9: fprintf(stdout,"1" ); break; - case 12: - if ( g->report_blobs) { - if (payload->cells[t].s < g->blob_size_limit) { - DEBUG fprintf(stdout,"%s:%d:DEBUG:Not Dumping data to blob file, keeping in CSV\n", FL ); - blob_dump((unsigned char *) (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); - } else { - // dump the blob to a file. - DEBUG fprintf(stdout,"%s:%d:DEBUG:Dumping data to %d.blob [%d bytes]\n", FL ,g->blob_count, payload->cells[t].s); - blob_dump_to_file( g, (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); - DEBUG fprintf(stdout,"\"%d.blob\"", g->blob_count); + case 4: { + uint32_t n; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 4 ); + fprintf(stdout,"%ld", to_signed_long(ntohl(n))); } - } - g->blob_count++; - break; + break; + + case 5: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break; + case 6: fprintf(stdout,"%d", ntohl(*(payload->mapped_data +payload->cells[t].o))); break; + case 7: + { + uint64_t n; + uint64_t nn; + double *zz; + memcpy(&n, payload->mapped_data +payload->cells[t].o, 8 ); + nn = (double) ntohll(n); +// hdump( &nn, 8, "\nFPPP: "); + zz = (double *)&nn; + fprintf(stdout,"%f",*zz); + } + break; + + case 8: fprintf(stdout,"0" ); break; + case 9: fprintf(stdout,"1" ); break; + case 12: + if ( g->report_blobs) { + if (payload->cells[t].s < g->blob_size_limit) { + DEBUG fprintf(stdout,"%s:%d:DEBUG:Not Dumping data to blob file, keeping in CSV\n", FL ); + blob_dump((unsigned char *) (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); + } else { + // dump the blob to a file. + DEBUG fprintf(stdout,"%s:%d:DEBUG:Dumping data to %d.blob [%d bytes]\n", FL ,g->blob_count, payload->cells[t].s); + blob_dump_to_file( g, (payload->mapped_data +payload->cells[t].o), payload->cells[t].s ); + DEBUG fprintf(stdout,"\"%d.blob\"", g->blob_count); + } + } + g->blob_count++; + break; + + case 13: + DEBUG fprintf(stdout,"%s:%d:DEBUG: Dumping text-13\n", FL ); + sqltdump( payload->mapped_data +payload->cells[t].o, payload->cells[t].s ); + break; + default: + fprintf(stderr,"Invalid cell type '%d'", payload->cells[t].t); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Invalid cell type '%d'", FL, payload->cells[t].t); + DEBUG hdump( (unsigned char *) base, 128, "Invalid cell type" ); + return 0; + break; + } // switch cell type + } - case 13: - DEBUG fprintf(stdout,"%s:%d:DEBUG: Dumping text-13\n", FL ); - sqltdump( payload->mapped_data +payload->cells[t].o, payload->cells[t].s ); - break; - default: - fprintf(stderr,"Invalid cell type '%d'", payload->cells[t].t); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Invalid cell type '%d'", FL, payload->cells[t].t); - DEBUG hdump( (unsigned char *) base, 128, "Invalid cell type" ); - return 0; - break; - } // switch cell type - } + t++; - t++; + } // while decoding the cells - } // while decoding the cells + fprintf(stdout,"\n"); + fflush(stdout); + if (payload->overflow_pages[0] != 0) { + //__asm__("int $3"); + free( payload->mapped_data ); + //avoid double free? + } - fprintf(stdout,"\n"); - fflush(stdout); - if (payload->overflow_pages[0] != 0) { - free( payload->mapped_data ); + return 0; } - return 0; -} - -/*-----------------------------------------------------------------\ -Date Code: : 20131004-211659 -Function Name : *find_next_sms -Returns Type : char -----Parameter List -1. char *s, -2. char *end_point , ------------------- -Exit Codes : -Side Effects : --------------------------------------------------------------------- + /*-----------------------------------------------------------------\ + Date Code: : 20131004-211659 + Function Name : *find_next_sms + Returns Type : char + ----Parameter List + 1. char *s, + 2. char *end_point , + ------------------ + Exit Codes : + Side Effects : + -------------------------------------------------------------------- Comments: Finds rows within a block. @@ -1062,379 +1091,360 @@ Finds rows within a block. \------------------------------------------------------------------*/ -char *find_next_row( struct globals *g, char *s, char *end_point, char *global_start, int mode, size_t forced_length ) { + char *find_next_row( struct globals *g, char *s, char *end_point, char *global_start, int mode, size_t forced_length ) { - char *p; - struct sql_payload sql; + char *p; + struct sql_payload sql; - DEBUG fprintf(stdout,"find_next_row: MODE: %d\n", mode ); - if (s == NULL) fprintf(stdout,"ERROR: NULL passed as search-space parameter\n"); - p = s; - do { - int row; + DEBUG fprintf(stdout,"find_next_row: MODE: %d\n", mode ); + if (s == NULL) fprintf(stdout,"ERROR: NULL passed as search-space parameter\n"); + p = s; + do { + int row; - row = decode_row( g, p, end_point, &sql, mode, forced_length ); - if (row) { - DEBUG fprintf(stdout,"ROWID: %ld found [+%ld] record size: %d bytes\n", (unsigned long int)sql.rowid, p -global_start, (unsigned int)( sql.length+sql.prefix_length )); - fflush(stdout); + row = decode_row( g, p, end_point, &sql, mode, forced_length ); + if (row) { + DEBUG fprintf(stdout,"ROWID: %ld found [+%ld] record size: %d bytes\n", (unsigned long int)sql.rowid, p -global_start, (unsigned int)( sql.length+sql.prefix_length )); + fflush(stdout); - /** If we're only wanting the removed, no-key-value rows, then - * continue to the next row - */ - if ((g->removed_only)&&(row >= 0)) { - p++; - continue; - } + /** If we're only wanting the removed, no-key-value rows, then + * continue to the next row + */ + if ((g->removed_only)&&(row >= 0)) { + p++; + continue; + } - if ((mode == DECODE_MODE_NORMAL)&&( g->freelist_space_only == 1)) { - // do nothing - } else { - dump_row( g, p, end_point, &sql, mode ); - } + if ((mode == DECODE_MODE_NORMAL)&&( g->freelist_space_only == 1)) { + // do nothing + } else { + dump_row( g, p, end_point, &sql, mode ); + } - fflush(stdout); - if (mode == DECODE_MODE_NORMAL) { - if (g->fine_search) p++; - else p+= sql.length; - } else { - if (row >= forced_length) { - DEBUG fprintf(stdout,"%s:%d:DEBUG: No more data left in freespace block to examine\n", FL); - p = end_point; - break; + fflush(stdout); + if (mode == DECODE_MODE_NORMAL) { + if (g->fine_search) p++; + else p+= sql.length; } else { - p+=row; forced_length -= row; - DEBUG hdump((unsigned char *)p,64, "After freespace decode"); + if (row >= forced_length) { + DEBUG fprintf(stdout,"%s:%d:DEBUG: No more data left in freespace block to examine\n", FL); + p = end_point; + break; + } else { + p+=row; forced_length -= row; + DEBUG hdump((unsigned char *)p,64, "After freespace decode"); + } } + } else { + p++; } - } else { - p++; - } - } while (p < end_point -PAYLOAD_SIZE_MINIMUM); + } while (p < end_point -PAYLOAD_SIZE_MINIMUM); - return NULL; + return NULL; -} + } -/*-----------------------------------------------------------------\ -Date Code: : 20131002-220317 -Function Name : main -Returns Type : int -----Parameter List -1. int argc, -2. char **argv , ------------------- -Exit Codes : -Side Effects : --------------------------------------------------------------------- + /*-----------------------------------------------------------------\ + Date Code: : 20131002-220317 + Function Name : main + Returns Type : int + ----Parameter List + 1. int argc, + 2. char **argv , + ------------------ + Exit Codes : + Side Effects : + -------------------------------------------------------------------- Comments: -------------------------------------------------------------------- Changes: \------------------------------------------------------------------*/ -int main( int argc, char **argv ) { - - int fd; - struct globals globo, *g; - struct stat st; - char *p; - int stat_result; - - /** - * Set up our global struct. - * - * We do this as a local var, rather than global so that it forces - * us to pass it through the functions, rather than _assuming_ it's - * available globally, which makes it a lot easier to migrate things - * to other libs/modules later - * - */ - g = &globo; + int main( int argc, char **argv ) { + int fd; + struct globals globo, *g; + struct stat st; + char *p; + int stat_result; + /** + * Set up our global struct. + * + * We do this as a local var, rather than global so that it forces + * us to pass it through the functions, rather than _assuming_ it's + * available globally, which makes it a lot easier to migrate things + * to other libs/modules later + * + */ + g = &globo; - UNDARK_init( g ); - UNDARK_parse_parameters( argc, argv, g ); - /** - * Check our input file sanity - * - */ - stat_result = stat( g->input_file, &st ); - if (stat_result != 0) { - fprintf(stderr,"ERROR: Cannot access input file '%s' ( %s )\n", g->input_file, strerror(errno)); - exit(1); - } + UNDARK_init( g ); + UNDARK_parse_parameters( argc, argv, g ); - /** - * Map our input file to memory, makes it a lot easier - * to jump around if we need to and saves us having to - * handle buffer limits - leave it to the OS to manage :) - */ - fd = open( g->input_file, O_RDONLY ); - g->db_size = st.st_size; - g->db_origin = mmap( NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0 ); - g->db_end = g->db_origin +st.st_size -1; - - //fprintf(stderr,"DB origin: %p\nDB end: %p\n", g->db_origin, g->db_end ); + /** + * Check our input file sanity + * + */ + stat_result = stat( g->input_file, &st ); + if (stat_result != 0) { + fprintf(stderr,"ERROR: Cannot access input file '%s' ( %s )\n", g->input_file, strerror(errno)); + exit(1); + } - /** - * Start decoding the database - * - * Though it's not really required for us to care about the - * SQLite page sizes, it can be useful in case we get boundary - * situations and try to follow the data across a page - * - * If the page size is already set via parameter, then skip - * - */ - if (g->page_size == 0) { - p = g->db_origin +16; - g->page_size = (*(p+1)) | ((*p)<<8); - } - /** - * Get the number of pages that are supposed to be in the database, though - * we can ignore this and simply parse through the whole DB page at a time - * until we reach the end - */ - p = g->db_origin +28; - memcpy( &g->page_count, g->db_origin +28, 4 ); // copy the page count from the header - g->page_count = ntohl( g->page_count ); // convert to local format + /** + * Map our input file to memory, makes it a lot easier + * to jump around if we need to and saves us having to + * handle buffer limits - leave it to the OS to manage :) + */ + fd = open( g->input_file, O_RDONLY ); + g->db_size = st.st_size; + g->db_origin = mmap( NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0 ); + g->db_end = g->db_origin +st.st_size -1; - DEBUG fprintf(stdout,"Pagesize: %u, Pagecount: %u\n", g->page_size, g->page_count); + //fprintf(stderr,"DB origin: %p\nDB end: %p\n", g->db_origin, g->db_end ); - /** - * Get the free list meta data - * - */ - memcpy( &g->freelist_first_page, g->db_origin +32, 4 ); // copy the page count from the header - g->freelist_first_page = ntohl( g->freelist_first_page ); - DEBUG fprintf(stdout,"First page of freelist trunk: %d\n", g->freelist_first_page ); - - memcpy( &g->freelist_page_count, g->db_origin +36, 4 ); // copy the page count from the header - g->freelist_page_count = ntohl( g->freelist_page_count ); - DEBUG fprintf(stdout,"Freelist page count: %d\n", g->freelist_page_count ); + /** + * Start decoding the database + * + * Though it's not really required for us to care about the + * SQLite page sizes, it can be useful in case we get boundary + * situations and try to follow the data across a page + * + * If the page size is already set via parameter, then skip + * + */ + if (g->page_size == 0) { + p = g->db_origin +16; + g->page_size = (*(p+1)) | ((*p)<<8); + } + /** + * Get the number of pages that are supposed to be in the database, though + * we can ignore this and simply parse through the whole DB page at a time + * until we reach the end + */ + p = g->db_origin +28; + memcpy( &g->page_count, g->db_origin +28, 4 ); // copy the page count from the header + g->page_count = ntohl( g->page_count ); // convert to local format - /** - * Get the actual free list pages - * - */ - g->db_cfp = g->db_cpp = g->db_origin; + DEBUG fprintf(stdout,"Pagesize: %u, Pagecount: %u\n", g->page_size, g->page_count); - /* - if ((g->freelist_space_only)&&(g->freelist_page_count)) { - g->freelist_pages_current_index = 0; - data = f+((g->freelist_pages[g->freelist_pages_current_index]-1) *g->page_size); - } + /** + * Get the free list meta data + * */ + memcpy( &g->freelist_first_page, g->db_origin +32, 4 ); // copy the page count from the header + g->freelist_first_page = ntohl( g->freelist_first_page ); + DEBUG fprintf(stdout,"First page of freelist trunk: %d\n", g->freelist_first_page ); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Commence decoding data\n", FL ); - fflush(stdout); + memcpy( &g->freelist_page_count, g->db_origin +36, 4 ); // copy the page count from the header + g->freelist_page_count = ntohl( g->freelist_page_count ); + DEBUG fprintf(stdout,"Freelist page count: %d\n", g->freelist_page_count ); - while (g->db_cfp < g->db_end ) { - struct sqlite_leaf_header leaf; - int freeblock_mode = 0; - /* load the next page from the file in to the scratch pad */ - g->db_cfp = g->db_cpp; - g->db_cpp_limit = g->db_cpp +g->page_size ; // was -1 ? + /** + * Get the actual free list pages + * + */ + g->db_cfp = g->db_cpp = g->db_origin; - DEBUG fprintf(stdout,"\n\n%s:%d:-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=START.\n", FL); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Commence decoding data\n", FL ); + fflush(stdout); - /* process the block, mostly this is just removing any 0-bytes - from the block so our strstr() calls aren't prematurely terminated. - */ - DEBUG { - char *p; - size_t l; - int bc = 0; - - fprintf(stdout,"%s:%d:Dumping main block in RAW... [ Page No: %lu, Offset: %lu (0x%X), size : %d ]\n" - , FL - , (long unsigned int)g->page_number - , (long unsigned int)(g->db_cpp -g->db_origin) - , (unsigned int)(g->db_cpp -g->db_origin) - , g->page_size - ); - - p = g->db_cfp; - l = g->page_size; - while (l--) { - { if (isprint(*p)) { fprintf(stdout,"%c", *p); } else fprintf(stdout,"_");} - p++; - bc++; - if (bc%128 == 0) fprintf(stdout,"\n"); - } - fprintf(stdout,"\n"); - fflush(stdout); - } // debug + while (g->db_cfp < g->db_end ) { + struct sqlite_leaf_header leaf; + int freeblock_mode = 0; + /* load the next page from the file in to the scratch pad */ + g->db_cfp = g->db_cpp; + g->db_cpp_limit = g->db_cpp +g->page_size ; // was -1 ? + DEBUG fprintf(stdout,"\n\n%s:%d:-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=START.\n", FL); - leaf.freeblock_offset = 0; - leaf.freeblock_size = 0; - leaf.freeblock_next = 0; - leaf.page_number = g->page_number; + /* process the block, mostly this is just removing any 0-bytes + from the block so our strstr() calls aren't prematurely terminated. + */ + DEBUG { + char *p; + size_t l; + int bc = 0; + + fprintf(stdout,"%s:%d:Dumping main block in RAW... [ Page No: %lu, Offset: %lu (0x%X), size : %d ]\n" + , FL + , (long unsigned int)g->page_number + , (long unsigned int)(g->db_cpp -g->db_origin) + , (unsigned int)(g->db_cpp -g->db_origin) + , g->page_size + ); + + p = g->db_cfp; + l = g->page_size; + while ((l--)&&(p)) { + { if (isprint(*p)) { fprintf(stdout,"%c", *p); } else fprintf(stdout,"_");} + p++; + bc++; + if (bc%128 == 0) fprintf(stdout,"\n"); + } + fprintf(stdout,"\n"); + fflush(stdout); + } // debug - /* Decode the page header */ - if (*(g->db_cfp) == 13) { - DEBUG fprintf(stdout,"%s:%d:DEBUG: Decoding page header for page %d\n", FL , g->page_number ); - fflush(stdout); - leaf.page_byte = 13; - /** - * Get freeblock offset and determine if we have a free block in this - * page that needs to be inspected. This is one of the more commonly - * needed parts of data for our row recovery - * - */ - memcpy( &(leaf.freeblock_offset), (g->db_cfp +1), 2 ); - leaf.freeblock_offset = ntohs( leaf.freeblock_offset ); - if (leaf.freeblock_offset > 0) { - uint16_t next, sz, off; + leaf.freeblock_offset = 0; + leaf.freeblock_size = 0; + leaf.freeblock_next = 0; + leaf.page_number = g->page_number; - freeblock_mode = 1; - off = leaf.freeblock_offset; + /* Decode the page header */ + if (*(g->db_cfp) == 13) { - DEBUG fprintf(stdout,"%s:%d:DEBUG: FREEBLOCK mode ON: header decode [offset=%u]\n", FL , leaf.freeblock_offset); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Decoding page header for page %d\n", FL , g->page_number ); + fflush(stdout); + leaf.page_byte = 13; - do { - DEBUG hdump((unsigned char *)(g->db_cfp +off), 16, "Freeblock header data"); + /** + * Get freeblock offset and determine if we have a free block in this + * page that needs to be inspected. This is one of the more commonly + * needed parts of data for our row recovery + * + */ + memcpy( &(leaf.freeblock_offset), (g->db_cfp +1), 2 ); + leaf.freeblock_offset = ntohs( leaf.freeblock_offset ); + if (leaf.freeblock_offset > 0) { + uint16_t next, sz, off; - memcpy( &next, ( g->db_cfp +off ), 2 ); - next = ntohs( next ); - memcpy( &sz, ( g->db_cfp +off +2 ), 2 ); - sz = ntohs( sz ); + freeblock_mode = 1; + off = leaf.freeblock_offset; - DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock size = %u, next position = %u\n", FL, sz, next ); + DEBUG fprintf(stdout,"%s:%d:DEBUG: FREEBLOCK mode ON: header decode [offset=%u]\n", FL , leaf.freeblock_offset); - if (next) off = next; - } while (next); - DEBUG fprintf(stdout,"%s:%d:DEBUG: END OF FREEBLOCK TRACE\n", FL); + do { + DEBUG hdump((unsigned char *)(g->db_cfp +off), 16, "Freeblock header data"); - memcpy( &(leaf.freeblock_next), ( g->db_cfp +leaf.freeblock_offset ), 2 ); - leaf.freeblock_next = ntohs( leaf.freeblock_next ); - memcpy( &(leaf.freeblock_size), ( g->db_cfp +leaf.freeblock_offset +2 ), 2 ); - leaf.freeblock_size = ntohs( leaf.freeblock_size ); - } + memcpy( &next, ( g->db_cfp +off ), 2 ); + next = ntohs( next ); + memcpy( &sz, ( g->db_cfp +off +2 ), 2 ); + sz = ntohs( sz ); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock offset = %u, size = %u, next block = %u \n", FL , leaf.freeblock_offset, leaf.freeblock_size, leaf.freeblock_next ); - if (leaf.freeblock_size > 0) { - DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock data [ %d bytes total [4 bytes for header] ]\n", FL, leaf.freeblock_size ); - DEBUG hdump( (unsigned char *)(g->db_cfp +leaf.freeblock_offset+4), leaf.freeblock_size-4, "Actual data in free block" ); - } - fflush(stdout); - // leaf.freeblock_offset = ntohs( ta ); - leaf.cellcount = ntohs(*(g->db_cfp+3)); - leaf.cell_offset = ntohs(*(g->db_cfp+5)); - leaf.freebytes = (*(g->db_cfp+7)); - - DEBUG fprintf(stdout,"%s:%d:DEBUG: PAGEHEADER:%d pagebyte: %d, freeblock offset: %d, cell count: %d, first cell offset %d, free bytes %d\n", FL - , leaf.page_number - , leaf.page_byte - , leaf.freeblock_offset - , leaf.cellcount - , leaf.cell_offset - , leaf.freebytes - ); - - /** - * If we're wanting free block sourced data, then simply jump - * to the start of the free block space and commence the searching - * in the next section ( find_next_row ). - * - * After this the g->db_cfp pointer should be sitting on the first - * varint of the payload header which defines the header length - * (inclusive) - * - * Detecting rows in the freeblocks is done differently to the - * normal data, so - * - */ - if (g->freelist_space_only) { + DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock size = %u, next position = %u\n", FL, sz, next ); - if ((leaf.freeblock_offset > 0) && (leaf.freeblock_size > 0)) { + if (next) off = next; + } while (next); + DEBUG fprintf(stdout,"%s:%d:DEBUG: END OF FREEBLOCK TRACE\n", FL); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Shifting to freespace at %d from page start\n", FL , leaf.freeblock_offset); - g->db_cfp = g->db_cfp + leaf.freeblock_offset +4; + memcpy( &(leaf.freeblock_next), ( g->db_cfp +leaf.freeblock_offset ), 2 ); + leaf.freeblock_next = ntohs( leaf.freeblock_next ); + memcpy( &(leaf.freeblock_size), ( g->db_cfp +leaf.freeblock_offset +2 ), 2 ); + leaf.freeblock_size = ntohs( leaf.freeblock_size ); + } - DEBUG fprintf(stdout,"%s:%d:DEBUG: New position = %p\n", FL , g->db_cfp); - DEBUG hdump((unsigned char *)g->db_cfp -4,32, "Scratch pointer at freespace data start (including 4 byte header)"); - DEBUG fflush(stdout); + DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock offset = %u, size = %u, next block = %u \n", FL , leaf.freeblock_offset, leaf.freeblock_size, leaf.freeblock_next ); + if (leaf.freeblock_size > 0) { + DEBUG fprintf(stdout,"%s:%d:DEBUG: Freeblock data [ %d bytes total [4 bytes for header] ]\n", FL, leaf.freeblock_size ); + DEBUG hdump( (unsigned char *)(g->db_cfp +leaf.freeblock_offset+4), leaf.freeblock_size-4, "Actual data in free block" ); + } + fflush(stdout); + // leaf.freeblock_offset = ntohs( ta ); + leaf.cellcount = ntohs(*(g->db_cfp+3)); + leaf.cell_offset = ntohs(*(g->db_cfp+5)); + leaf.freebytes = (*(g->db_cfp+7)); + + DEBUG fprintf(stdout,"%s:%d:DEBUG: PAGEHEADER:%d pagebyte: %d, freeblock offset: %d, cell count: %d, first cell offset %d, free bytes %d\n", FL + , leaf.page_number + , leaf.page_byte + , leaf.freeblock_offset + , leaf.cellcount + , leaf.cell_offset + , leaf.freebytes + ); + + /** + * If we're wanting free block sourced data, then simply jump + * to the start of the free block space and commence the searching + * in the next section ( find_next_row ). + * + * After this the g->db_cfp pointer should be sitting on the first + * varint of the payload header which defines the header length + * (inclusive) + * + * Detecting rows in the freeblocks is done differently to the + * normal data, so + * + */ + if (g->freelist_space_only) { + + if ((leaf.freeblock_offset > 0) && (leaf.freeblock_size > 0)) { + + DEBUG fprintf(stdout,"%s:%d:DEBUG: Shifting to freespace at %d from page start\n", FL , leaf.freeblock_offset); + g->db_cfp = g->db_cfp + leaf.freeblock_offset +4; + + DEBUG fprintf(stdout,"%s:%d:DEBUG: New position = %p\n", FL , g->db_cfp); + DEBUG hdump((unsigned char *)g->db_cfp -4,32, "Scratch pointer at freespace data start (including 4 byte header)"); + DEBUG fflush(stdout); + } } - } - fflush(stdout); - } // if we have a leaf page, which we can decode the header on. + fflush(stdout); + } // if we have a leaf page, which we can decode the header on. - //if ((leaf.page_byte == 13)) { - if (1) { + //if ((leaf.page_byte == 13)) { + if (1) { - char *row; - row = g->db_cfp; - DEBUG fprintf(stdout,"%s:%d:DEBUG: g->db_cfp search at = %p\n", FL , g->db_cfp); - do { + char *row; + row = g->db_cfp; + DEBUG fprintf(stdout,"%s:%d:DEBUG: g->db_cfp search at = %p\n", FL , g->db_cfp); + do { - if ((row > g->db_origin)&&(row < g->db_end)) { + if ((row > g->db_origin)&&(row < g->db_end)) { - row = find_next_row( g, row, g->db_cpp_limit, g->db_cfp, freeblock_mode, leaf.freeblock_size ); + row = find_next_row( g, row, g->db_cpp_limit, g->db_cfp, freeblock_mode, leaf.freeblock_size ); - //if (row > g->db_end) fprintf(stdout,"ERROR: beyond end point\n"); - if (row > g->db_cpp_limit) fprintf(stdout,"ERROR: beyond end point\n"); - if (row < g->db_cfp) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row location not in g->db_cfp page\n", FL ); - if (row == NULL) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row has been returned as NULL\n", FL ); - DEBUG fprintf(stdout,"%s:%d:DEBUG: ROW found at offset: %lu\n", FL, row-g->db_cfp); - } else { + //if (row > g->db_end) fprintf(stdout,"ERROR: beyond end point\n"); + if (row > g->db_cpp_limit) fprintf(stdout,"ERROR: beyond end point\n"); + if (row < g->db_cfp) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row location not in g->db_cfp page\n", FL ); + if (row == NULL) DEBUG fprintf(stdout,"%s:%d:DEBUG: Row has been returned as NULL\n", FL ); + DEBUG fprintf(stdout,"%s:%d:DEBUG: ROW found at offset: %ld\n", FL, row-g->db_cfp); + } else { - break; - } + break; + } - } while (row && (row < g->db_cpp_limit )); - //} while (row && (row < g->db_cpp_limit ) && (row < g->db_end) ); + } while (row && (row < g->db_cpp_limit )); + //} while (row && (row < g->db_cpp_limit ) && (row < g->db_end) ); - DEBUG fprintf(stdout,"%s:%d:DEBUG: Finished searching for rows in DB page %d\n", FL , g->page_number); - } + DEBUG fprintf(stdout,"%s:%d:DEBUG: Finished searching for rows in DB page %d\n", FL , g->page_number); + } - /** - * NOTE: Free #pages# are different to freeblocks within a page - * - * - if ((g->freelist_space_only)&&(g->freelist_page_count)) { - g->freelist_pages_current_index++; - if (g->freelist_pages_current_index > g->freelist_page_count) { - DEBUG fprintf(stdout,"%s:%d:DEBUG:Out of freespace pages, exiting search\n", FL ); - break; - } - data = f+((g->freelist_pages[g->freelist_pages_current_index] -1) *g->page_size); - } else { - */ - { - g->db_cpp += g->page_size; - g->page_number++; - } - if (g->page_count < g->page_number) - break; + { + g->db_cpp += g->page_size; + g->page_number++; + } - } // while (data < endpoint) + if (g->page_count < g->page_number) + break; - close(fd); + } // while (data < endpoint) - return 0; - } + close(fd); + + return 0; + } - /** END **/ + /** END **/ From bc3a411164ecce32c8bdfed300205eefb8856225 Mon Sep 17 00:00:00 2001 From: Mygod Date: Fri, 14 Sep 2018 16:06:48 +0800 Subject: [PATCH 6/6] Fix double free --- undark.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/undark.c b/undark.c index 03e3585..5d6c8d2 100644 --- a/undark.c +++ b/undark.c @@ -933,10 +933,22 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl payload->mapped_data_endpoint = data_endpoint; } else { - int msize = (payload->length +100)*sizeof(uint8_t); + int msize = (payload->length)*sizeof(uint8_t); + + // count size of overflow pages (if any) + ovpi = 0; + while (payload->overflow_pages[ovpi]) { + addr = g->db_origin +((payload->overflow_pages[ovpi]-1) *g->page_size) +4; //PLD:20141221-2240 segfault fix + if (( addr < (void *)g->db_origin) || ( addr+4 > (void *)g->db_end)) { + DEBUG fprintf(stdout,"%s:%d:dump_row:ERROR: page seek request outside of boundaries of file (%p < %p > %p)\n", FL, g->db_origin, addr, g->db_end); + return -1; + } + msize += g->page_size -4; + ovpi++; + } + printf("plength %ld, total: %d\n", payload->length, msize); //__asm__("int $3"); - //FIXME: This malloc causes a double free / corruption when msize > 4096 payload->mapped_data = malloc(msize); if ( !payload->mapped_data ) { fprintf(stderr,"%s:%d:ERROR: Cannot allocate %ld bytes for mapped data\n", FL, (long int)payload->length +100); @@ -1062,7 +1074,6 @@ int decode_row( struct globals *g, char *p, char *data_endpoint, struct sql_payl if (payload->overflow_pages[0] != 0) { //__asm__("int $3"); free( payload->mapped_data ); - //avoid double free? } return 0;