Byte counter

This simple C program counts the frequency of each byte in standard input or in one or more files.

C code

/* Byte counter

Count the frequency of each byte

Version 1.3 (2016-02-20 revised 2019-03-04) */

/* Standard header file for fclose, feof, fgetc, fopen, fprintf, fputs, printf, puts */

#include <stdio.h>

/* Standard header file for EXIT_FAILURE, EXIT_SUCCESS, free, malloc, NULL */

#include <stdlib.h>

/* Standard header file for isprint */

#include <ctype.h>

/* Standard header file for strcmp */

#include <string.h>


/* Returns the position of a single-byte character in a string */

unsigned char position_of_byte_in_string (char const * const string, unsigned char const byte)

{

unsigned char position = 0;

unsigned char current_byte = string [position];

while ((current_byte != byte) && (current_byte != 0))

{

position ++;

current_byte = string [position];

}

return position;

}


/* Calculates and prints the frequency of each single-byte character in a file */

unsigned int count_bytes_in_file (FILE * const input_file, char const * const input_file_name, unsigned char const sort)

{

unsigned int const total = 256;

unsigned char const this_file = 0;

unsigned char const all_files = 1;

static unsigned int file_count = 0;

static unsigned long int count_array [257] [2];

unsigned char category;

if (file_count == 0)

{

/* Initialize the array for the frequencies across all files */

unsigned int count_index;

for (count_index = 0; count_index < 257; count_index ++)

{

count_array [count_index] [all_files] = 0;

}

}

if (input_file == NULL)

{

unsigned int count_index;

category = all_files;

/* Calculate total */

for (count_index = 0; count_index < 256; count_index ++)

{

count_array [total] [all_files] += count_array [count_index] [all_files];

}

}

else

{

unsigned int count_index;

int character;

category = this_file;

file_count ++;

/* Initialize the count array for the frequencies for this file */

for (count_index = 0; count_index < 257; count_index ++)

{

count_array [count_index] [this_file] = 0;

}

/* Count the frequency of each byte */

do

{

character = fgetc (input_file);

if ((character >= 0) && (character < 256))

{

count_array [character] [this_file] ++;

}

}

while ((feof (input_file) == 0) && (ferror (input_file) == 0));

/* Calculate totals */

for (count_index = 0; count_index < 256; count_index ++)

{

count_array [total] [this_file] += count_array [count_index] [this_file];

count_array [count_index] [all_files]+= count_array [count_index] [this_file];

}

}

if ((file_count > 1) || (category == this_file))

{

if ((file_count == 1) && (input_file == stdin))

{

printf ("Total: %lu\n", count_array [total][category]);

}

else

{

printf ("%s: %lu\n", input_file_name, count_array [total][category]);

}

if (count_array [total] [category] > 0)

{

/* Prepare non-zero frequencies */

unsigned int count_index;

unsigned char first_character = 0;

unsigned char last_character = 0;

unsigned char next_character [256];

unsigned char first_character_set = 0;

for (count_index = 0; count_index < 256; count_index ++)

{

if (count_array [count_index] [category] > 0)

{

if (first_character_set == 0)

{

first_character = count_index;

last_character = count_index;

next_character [count_index] = count_index;

first_character_set = 1;

}

else if (sort == 0)

{

next_character [last_character] = count_index;

last_character = count_index;

next_character [last_character] = count_index;

}

else

{

unsigned char current_character = first_character;

unsigned char previous_character = first_character;

while ((count_array [current_character] [category] >= count_array [count_index] [category]) && (current_character != last_character))

{

previous_character = current_character;

current_character = next_character [current_character];

}

if (count_array [current_character] [category] < count_array [count_index][category] )

{

if (current_character == first_character)

{

first_character = count_index;

}

else

{

next_character [previous_character] = count_index;

}

next_character [count_index] = current_character;

}

else

{

if (current_character == last_character)

{

last_character = count_index;

next_character [count_index] = count_index;

}

next_character [current_character] = count_index;

}

}

}

}

/* Print the non-zero frequency of each byte */

unsigned char current_character;

do

{

if (first_character_set== 1)

{

current_character = first_character;

first_character_set = 0;

}

else

{

current_character = next_character [current_character];

}

if (isprint (current_character))

{

printf ("%u (%c): %lu\n", current_character, current_character, count_array [current_character] [category]);

}

else

{

unsigned char position = position_of_byte_in_string ("\a\b\t\n\v\f\r", current_character);

unsigned char const escaped_character_array [] = "abtnvfr";

unsigned char corresponding_character = escaped_character_array [position];

if (corresponding_character != 0)

{

printf ("%u (\\%c): %lu\n", current_character, corresponding_character, count_array [current_character][category]);

}

else

{

printf ("%u ( ): %lu\n", current_character, count_array [current_character][category]);

}

}

}

while (current_character != next_character [current_character]);

}

}

return file_count;

}


int main (int const argument_count, char const * const argument_array [])

{

int return_value = EXIT_SUCCESS;

char const * const program_file_name = argument_array [0];

unsigned char const option_no_more_options = 1 << 0;

unsigned char const option_help = 1 << 1;

unsigned char const option_version = 1 << 2;

unsigned char const option_read_standard_input = 1 << 3;

unsigned char const option_sort = 1 << 4;

char const * const option_string_array [] = { "/-", "--", "?", "/?", "/H", "/h", "-?", "-H", "-h", "-help", "--help", "--version", "-", "/S", "/s", "-S", "-s", "-sort", "--sort" };

unsigned char const option_value_array [] = { option_no_more_options, option_no_more_options, option_help, option_help, option_help, option_help, option_help, option_help, option_help, option_help, option_help, option_version, option_read_standard_input, option_sort, option_sort, option_sort, option_sort, option_sort, option_sort };

unsigned char const option_string_count = sizeof (option_string_array) / sizeof (option_string_array [0]);

unsigned char const option_value_count = sizeof (option_value_array) / sizeof (option_value_array [0]);

unsigned char status = 0;

if (option_string_count != option_value_count)

{

unsigned char option_index;

fprintf (stderr, "%s: Internal error: The size of the option value array (%u) does not match the size of the option string array (%u)\n", program_file_name, option_value_count, option_string_count);

fputs ("index: value, string\n", stderr);

for (option_index = 0; (option_index < option_string_count) && (option_index < option_value_count); option_index ++)

{

fprintf (stderr, "%u: %u, %s\n", option_index, option_value_array [option_index], option_string_array [option_index]);

}

if (option_string_count > option_value_count)

{

for (option_index = option_value_count; option_index < option_string_count; option_index ++)

{

fprintf (stderr, "%u: [none], %s\n", option_index, option_string_array [option_index]);

}

}

else

{

for (option_index = option_string_count; option_index < option_value_count; option_index ++)

{

fprintf (stderr, "%u: %u, [none]\n", option_index, option_value_array [option_index]);

}

}

return_value = EXIT_FAILURE;

}

else

{

//unsigned int file_name_argument_array [argument_count];

unsigned int * file_name_argument_array = malloc (argument_count * sizeof (unsigned int));

if (file_name_argument_array == NULL)

{

perror (program_file_name);

return_value = EXIT_FAILURE;

}

else

{

unsigned int file_name_argument_count = 0;

int argument_index;

/* Set options */

for (argument_index = 1; (argument_index < argument_count) && ((status & (option_help | option_version)) == 0); argument_index ++)

{

char const * const current_argument = argument_array [argument_index];

unsigned char const valid_option = 1 << 5;

status &= ~ valid_option;

if ((status & option_no_more_options) == 0)

{

unsigned char option_index;

for (option_index = 0; (option_index < option_string_count) && ((status & valid_option) == 0); option_index ++)

{

if (strcmp (option_string_array [option_index], current_argument) == 0)

{

status |= valid_option;

status |= option_value_array [option_index];

}

}

}

if ((status & valid_option) == 0)

{

file_name_argument_array [file_name_argument_count] = argument_index;

file_name_argument_count ++;

}

}

/* Print information */

if ((status & option_help) != 0)

{

printf ("Usage: %s [option] [file name]\n", program_file_name);

puts ("Count the frequency of each byte in standard input or in one or more files.");

puts ("Separate multiple file names with spaces.");

puts ("Use no file name or use the - option to read standard input.");

puts ("Use the -s or --sort options to print the frequencies sorted from highest to lowest.");

puts ("Use the -- option to indicate that the arguments that follow it are not options.");

}

else if ((status & option_version) != 0)

{

puts ("Byte counter 1.3");

}

else

{

if (file_name_argument_count > 0)

{

unsigned int file_name_argument_index;

for (file_name_argument_index = 0; file_name_argument_index < file_name_argument_count; file_name_argument_index ++)

{

char const * const input_file_name = argument_array [file_name_argument_array [file_name_argument_index]];

FILE * input_file = fopen (input_file_name, "rb");

if (input_file == NULL)

{

fprintf (stderr, "%s:", program_file_name);

perror (input_file_name);

return_value = EXIT_FAILURE;

}

else

{

count_bytes_in_file (input_file, input_file_name, status & option_sort);

if (fclose (input_file) != 0)

{

fprintf (stderr, "%s:", program_file_name);

perror (input_file_name);

return_value = EXIT_FAILURE;

}

}

}

}

if ((file_name_argument_count == 0) || ((status & option_read_standard_input) != 0))

{

count_bytes_in_file (stdin, "standard input", status & option_sort);

}

count_bytes_in_file (NULL, "Total", status & option_sort);

}

free (file_name_argument_array);

file_name_argument_array = NULL;

}

}

return return_value;

}