// char_count 000601 18:30 // Compile: gcc char_count.c -lm -o char_count // Run: ./char_count // counts characters in a file and coputes the information content (entrophy) // 060001 - modified COUNT.C - 1986 for first version #include #include #include /* log() in lis_sort(), "gcc -lm" links math library */ #include #include #include #include main(argc,argv) int argc; /* no. of command line entries */ char **argv; /* point to command line entries */ { char p[81]; /* p - buffer for keyboard input */ char *ip, *op, *sp; /* ip - infile, op - outfile name, sp x */ int i, c, m, n, sum; /* i - counter, c - input chars */ unsigned long len; /* no. of bytes in file */ float f, fr; /* intermediate */ int nlines; /* no. of lines, + header */ int header; /* no. lines in header */ int nsc = 24; /* no. lines in screen - 1 */ int n127 ; // no. characters > 27 FILE *infile; /* input file stream */ FILE *outfile; /* output file stream */ unsigned ch[256]; /* char no., probability */ int c0, c1, c2, c3; /* char string check buffer */ int rs_flag; /* repeat string in progress flag */ unsigned rs_a=0,rs_b=0,rs_c=0; /* no rep strings > 10, >130, >514 */ unsigned long rc,rs,sl; /* no. of repeated char.s, strs, str len */ char sym[256][3]; /* char symbol array */ char x[16]; /* hex symbols */ int ratio; /* bytes per 10K char.s */ void list256(char[], unsigned[]); /* prints title and 16x8 table */ int lis_sort(char[], unsigned[], char[][3]); /* sorts and prints table */ for (c= 0; c< 16; c++) x[c] = c+'0' + (c>9)*7; /* hex char */ for (i= 0; i< 32; i++) { sym[i][0]='^'; sym[i][1] = i+'@';} /* char */ for (i=32; i<127; i++) { sym[i][0]=' '; sym[i][1] = i ;} /* symbols */ sym[127][0] = 'b' ; sym[127][1]='s'; for (i=128; i<256; i++) { sym[i][0] = x[i/16]; sym[i][1] = x[i%16] ;} for (i=0 ; i<256; i++) sym[i][2] = '\0'; if (argc > 1) ip = *(argv + 1); /* if filename with DOS */ else { /* command, use it, else */ printf("\n Type Input File Name to Examine - "); /* ask for type in */ scanf("%s",p); ip = p; } printf("\n char_count vers 000601 File is %s\n\n",ip); /* print header */ header = 2; infile = fopen (ip,"rb"); /* OPEN INPUT FILE */ if(infile == NULL) {perror(" Infile Not Opened"); exit(-1);} /* outfile = fopen (op,"w+b"); / r/w, create if non-exist */ /* if(outfile == NULL) {perror(" Outfile Not Opened"); exit(-1);} */ /* READ AND COMPRESS IN-FILE */ len = 0; /* init length in bytes */ for (c = 0; c < 256; ch[c++] = 0) ; /* init char counters */ rs_flag = 0; c0 = c1 = c2 = -1; /* init repeat string var.s */ rs = rc = sl = rs_a = rs_b = rs_c = 0; /* ditto */ nlines=header; while ((c = fgetc(infile)) != EOF) /* READ FILE by char.s */ { ch[c]++ ; if (c == 10) nlines++ ; /* after a LF, increment nlines*/ if( c > 127 ) n127++ ; // count non-aASCII characters ++len ; /* increment char count */ } /* --- PRINT SUMMARY --- */ for (c=0; c<256; c++) ch[c] = 10000.0 * ch[c]/(double)(len-rc) ; /* normal*/ printf(" No. Char.s to EOF = %u, No. Lines = %u\n", len, nlines-header) ; list256("Occurance of Single Characters",ch); /* print tables */ ratio = lis_sort("Occurance of Single Characters",ch,sym); i=0; /* count char > 127 */ printf("\n No. Char.s > 127 (not ASCII text) = %u, %u %\n",n127, (100 * n127)/len); exit(0); } /* */ /* --(line 183)----- \C\COUNT.2 ------------------------ 1-30-86 22:00 */ void list256( /* PRINTS TITLE AND 8x16 TABLE */ char p[], /* points to title */ unsigned v[] ) /* array of 256 values */ { register int c, m; c=39-strlen(p)/2; /* center title */ printf("\n"); for ( m=0 ; m 0.000001) d -= pc * d3 *log(pc) ; /* log(pc) < 0 */ } f = d/0.08 ; /* d = 8 bits -> 100% size */ printf("\n Entropy is %4.1f bits/byte. Maximum character-wise compression =%5.1f %\n",d,f); return(100 * (int) f) ; } void my_sort( /* The array of indexs cp[] will point in order, */ /* from max to min values of v[], i.e. v[cp[i] >= v[cp[i+1]] */ unsigned v[], /* array of values */ unsigned cp[], /* array of indexs to v[] */ unsigned n ) /* number of elements in v, cp */ { unsigned m ; register i, j, gap ; /* index variables */ for (gap = n/2; gap > 0; gap /= 2) { /* shell sort */ for (i = gap; i < n; i++) { for (j = i-gap; j >= 0; j -= gap) { if ( v[cp[j]] >= v[cp[j+gap]] ) break; m = cp[j]; cp[j]=cp[j+gap]; cp[j+gap]=m; } } } return ; }