psxsdk/tools/huff.c

556 lines
11 KiB
C

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define BUFFER_SIZE 1024
struct TreeNode {
unsigned char value;
unsigned long freq;
struct TreeNode *left;
struct TreeNode *right;
};
struct CodeNode {
unsigned char value;
unsigned long code;
unsigned int codeSize;
};
unsigned long table[256];
unsigned char value[256];
unsigned long freq[256];
int tableSize;
struct CodeNode codes[256];
unsigned int codesUsed;
FILE *inFile;
unsigned long inputSize;
FILE *outFile;
struct TreeNode *root;
char inBuffer[BUFFER_SIZE];
char outBuffer[BUFFER_SIZE];
void CreateTable();
void SiftHeap(struct TreeNode**, unsigned int, unsigned int);
void SortTrees(struct TreeNode**, unsigned int);
void DestroyTree(struct TreeNode *);
void CreateTree();
void GenerateCodes(struct TreeNode*, unsigned long, unsigned long);
void SortCodes();
void Compress();
void Decompress();
void DisplayHelp();
int main(int, char**);
//////////////////////////////////////////////////////////////////////////
// Create the character frequency table for the file
//////////////////////////////////////////////////////////////////////////
void CreateTable() {
int x, y;
unsigned long maxCount;
unsigned char maxIndex;
unsigned char ch;
for(x = 0; x < 256; x++) {
value[x] = 0;
freq[x] = 0;
}
rewind(inFile);
tableSize = 0;
for(;;) {
ch = fgetc(inFile);
if(feof(inFile)) {
break;
}
++inputSize;
if(table[ch] == 0) {
++tableSize;
}
++table[ch];
}
y = tableSize;
do {
--y;
maxIndex = 0;
maxCount = 0;
for(x = 0; x < 256 ; x++) {
if(table[x] > maxCount) {
maxIndex = x;
maxCount = table[(unsigned int)maxIndex];
}
}
freq[y] = maxCount;
value[y] = maxIndex;
table[(unsigned int)maxIndex] = 0;
} while(y > 0);
}
//////////////////////////////////////////////////////////////////////////
// Max-heapify (for sorting)
//////////////////////////////////////////////////////////////////////////
void SiftHeap(struct TreeNode **trees, unsigned int x, unsigned int size) {
struct TreeNode *root;
int finished;
unsigned int y;
root = trees[x - 1];
y = x << 1;
finished = (y > size);
while(!finished) {
if(y < size) {
if(trees[y + 1 - 1]->freq > trees[y - 1]->freq) {
++y;
}
}
if(trees[y - 1]->freq > root->freq) {
trees[x - 1] = trees[y - 1];
x = y;
y = x << 1;
finished = (y > size);
} else {
finished = 1;
}
}
trees[x - 1] = root;
}
//////////////////////////////////////////////////////////////////////////
// Sort the trees in ascending order by frequency
//////////////////////////////////////////////////////////////////////////
void SortTrees(struct TreeNode **trees, unsigned int num) {
struct TreeNode *temp;
unsigned int x;
for(x = num >> 1; x > 1; x--) {
SiftHeap(trees, x, num);
}
for(x = num; x > 1; x--) {
SiftHeap(trees, 1, x);
temp = trees[1 - 1];
trees[1 - 1] = trees[x - 1];
trees[x - 1] = temp;
}
}
//////////////////////////////////////////////////////////////////////////
// Release the memory used by the tree
//////////////////////////////////////////////////////////////////////////
void DestroyTree(struct TreeNode *ptr) {
if(ptr->right) {
DestroyTree(ptr->right);
}
if(ptr->left) {
DestroyTree(ptr->left);
}
free(ptr);
}
//////////////////////////////////////////////////////////////////////////
// Create the huffman coding tree
//////////////////////////////////////////////////////////////////////////
void CreateTree() {
struct TreeNode *ptr;
struct TreeNode *trees[257];
int x, y;
for(x = 0; x < tableSize; x++) {
trees[x] = malloc(sizeof(struct TreeNode));
trees[x]->right = 0;
trees[x]->left = 0;
trees[x]->value = value[x];
trees[x]->freq = freq[x];
}
y = tableSize;
while(y > 1) {
// Combine two smallest nodes into a tree
ptr = malloc(sizeof(struct TreeNode));
ptr->right = trees[0];
ptr->left = trees[1];
ptr->freq = trees[0]->freq + trees[1]->freq;
ptr->value = 0;
trees[0] = ptr;
for(x = 1; x < y - 1; x++) {
trees[x] = trees[x + 1];
}
trees[y] = 0;
SortTrees(trees, y - 1); // account for the zero
--y;
}
root = trees[0];
}
//////////////////////////////////////////////////////////////////////////
// Generate code table from the tree
//////////////////////////////////////////////////////////////////////////
void GenerateCodes(struct TreeNode *ptr, unsigned long code,
unsigned long codeSize) {
if(ptr->right) {
GenerateCodes(ptr->right, (code << 1), codeSize + 1);
GenerateCodes(ptr->left, (code << 1) | 1, codeSize + 1);
} else {
codes[codesUsed].value = ptr->value;
codes[codesUsed].code = code;
codes[codesUsed].codeSize = codeSize;
++codesUsed;
}
}
//////////////////////////////////////////////////////////////////////////
// Sort the codes in ascending order by size
// Used for compression
//////////////////////////////////////////////////////////////////////////
void SortCodes() {
int x, y;
struct CodeNode temp;
for(x = 0; x < codesUsed; x++) {
for(y = x; y < codesUsed; y++) {
if(codes[x].codeSize > codes[y].codeSize) {
temp = codes[x];
codes[x] = codes[y];
codes[y] = temp;
}
}
}
}
//////////////////////////////////////////////////////////////////////////
// Compress the file
//////////////////////////////////////////////////////////////////////////
void Compress() {
int x, y;
unsigned char temp;
unsigned char ch;
int offset;
int ib, ob;
int inBufferSize;
CreateTable();
CreateTree();
codesUsed = 0;
GenerateCodes(root, 0, 0);
DestroyTree(root);
SortCodes();
// [nextvolume]: Values are now saved as 32-bit little endian instead of ASCII numbers
fputc(codesUsed & 0xff, outFile);
fputc((codesUsed >> 8) & 0xff, outFile);
fputc((codesUsed >> 16) & 0xff, outFile);
fputc((codesUsed >> 24) & 0xff, outFile);
fputc(inputSize & 0xff, outFile);
fputc((inputSize >> 8) & 0xff, outFile);
fputc((inputSize >> 16) & 0xff, outFile);
fputc((inputSize >> 24) & 0xff, outFile);
for(x = 0; x < codesUsed; x++) {
fputc(codes[x].value, outFile);
fputc(codes[x].codeSize - 1, outFile);
}
offset = 7;
temp = 0;
for(x = 0; x < codesUsed; x++) {
for(y = codes[x].codeSize - 1; y >= 0; y--) {
temp |= ((codes[x].code >> y) & 1) << offset;
--offset;
if(offset < 0) {
fputc(temp, outFile);
offset = 7;
temp = 0;
}
}
}
if(offset != 7) {
fputc(temp, outFile);
}
offset = 7;
temp = 0;
rewind(inFile);
ib = BUFFER_SIZE;
ob = 0;
inBufferSize = 0;
for(;;) {
if(ib >= BUFFER_SIZE) {
inBufferSize = fread(inBuffer, sizeof(char),
BUFFER_SIZE, inFile);
ib = 0;
}
if(ib >= inBufferSize) {
break;
}
ch = inBuffer[ib++];
for(x = 0; x < codesUsed; x++) {
if(ch == codes[x].value) {
break;
}
}
for(y = codes[x].codeSize - 1; y >= 0; y--) {
temp |= ((codes[x].code >> y) & 1) << offset;
--offset;
if(offset < 0) {
outBuffer[ob++] = temp;
if(ob >= BUFFER_SIZE) {
fwrite(outBuffer, sizeof(char),
BUFFER_SIZE, outFile);
ob = 0;
}
temp = 0;
offset = 7;
}
}
}
if(offset != 7) {
outBuffer[ob++] = temp;
}
if(ob) {
fwrite(outBuffer, sizeof(char), ob, outFile);
}
}
//////////////////////////////////////////////////////////////////////////
// Decompress the file
//////////////////////////////////////////////////////////////////////////
void Decompress() {
int x, y;
unsigned int dataSize;
unsigned long mask, maskSize;
unsigned char ch;
int offset;
// int last;
int ib, ob;
ib = fgetc(inFile);
codesUsed = ib;
ib = fgetc(inFile);
codesUsed |= ib << 8;
ib = fgetc(inFile);
codesUsed |= ib << 16;
ib = fgetc(inFile);
codesUsed |= ib << 24;
ib = fgetc(inFile);
dataSize = ib;
ib = fgetc(inFile);
dataSize |= ib << 8;
ib = fgetc(inFile);
dataSize |= ib << 16;
ib = fgetc(inFile);
dataSize |= ib << 24;
for(x = 0; x < codesUsed; x++) {
codes[x].value = fgetc(inFile);
codes[x].codeSize = fgetc(inFile) + 1;
}
offset = 7;
ch = 0;
for(x = 0; x < codesUsed; x++) {
codes[x].code = 0;
for(y = codes[x].codeSize - 1; y >= 0; y--) {
if(offset == 7) {
ch = fgetc(inFile);
}
codes[x].code |= ((ch >> offset) & 1) << y;
offset = (offset - 1) & 7;
}
}
maskSize = 0;
mask = 0;
offset = 7;
// last = 0;
y = 0;
x = 0;
ib = BUFFER_SIZE;
ob = 0;
for(;;) {
if(offset == 7) {
if(ib >= BUFFER_SIZE) {
fread(inBuffer, sizeof(char), BUFFER_SIZE,
inFile);
ib = 0;
}
ch = inBuffer[ib++];
}
mask <<= 1;
mask |= (ch >> offset) & 1;
++maskSize;
offset = (offset - 1) & 7;
while(codes[y].codeSize < maskSize) ++y;
while(codes[y].codeSize == maskSize) {
if(codes[y].code == mask) {
if(ob >= BUFFER_SIZE) {
fwrite(outBuffer, sizeof(char),
BUFFER_SIZE, outFile);
ob = 0;
}
outBuffer[ob++] = codes[y].value;
++x;
if(x >= dataSize) {
fwrite(outBuffer, sizeof(char),
ob, outFile);
return;
}
mask = 0;
maskSize = 0;
y = 0;
break;
}
++y;
}
}
}
//////////////////////////////////////////////////////////////////////////
// Display usage
//////////////////////////////////////////////////////////////////////////
void DisplayHelp() {
printf("Huffman compressor for PSXSDK\n");
printf("Original version by Joe Wingbermuehle\n");
printf("usage: huff [options] file\n");
printf("options:\n");
printf("\t-\t\tUse stdin for input\n");
printf("\t-c\t\tCompress/Uncompress to stdout\n");
printf("\t-k\t\tKeep old file\n");
printf("\t-u\t\tUncompress\n");
}
//////////////////////////////////////////////////////////////////////////
// Main
//////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
char *inName;
char *outName;
int x, y;
int error;
char uncompress;
char keep;
double savings;
if(argc < 2) {
DisplayHelp();
exit(1);
}
uncompress = 0;
keep = 0;
inName = 0;
outName = 0;
outFile = 0;
inFile = 0;
for(x = 1; x < argc; x++) {
if(argv[x][0] == '-') {
switch(argv[x][1]) {
case 'u':
uncompress = 1;
break;
case 'c':
outFile = stdout;
keep = 1;
break;
case 'k':
keep = 1;
break;
case 0:
inFile = stdout;
break;
default:
DisplayHelp();
exit(1);
}
} else if(!inName && !inFile) {
inName = malloc(strlen(argv[x]) + 1);
strcpy(inName, argv[x]);
} else {
printf("unrecognized option: %s\n", argv[x]);
DisplayHelp();
exit(1);
}
}
if(!inFile) {
inFile = fopen(inName, "rb");
if(!inFile) {
printf("error: could not open input\n");
exit(1);
}
}
if(!uncompress) {
outName = malloc(strlen(inName) + 4);
strcpy(outName, inName);
strcat(outName, ".jh");
} else {
error = 0;
if(strlen(inName) < 5) {
error = 1;
} else {
y = strlen(inName) - 3;
for(x = 0; x < 4; x++) {
if(inName[x + y] != ".jh"[x]) {
error = 1;
break;
}
}
}
if(error) {
printf("bad file extension\n");
DisplayHelp();
exit(1);
}
outName = malloc(strlen(inName) + 1);
strcpy(outName, inName);
if(strlen(outName) > 4) {
outName[strlen(outName) - 3] = 0;
}
}
if(!outFile) {
outFile = fopen(outName, "wb");
if(!outFile) {
printf("error: could not open output\n");
exit(1);
}
}
if(uncompress) {
Decompress();
} else {
Compress();
savings = (double)inputSize - (double)ftell(outFile);
savings /= (double)inputSize;
savings *= 100.00;
printf("savings: %.2f%%\n", savings);
}
if(inFile != stdin) {
fclose(inFile);
if(!keep) {
remove(inName);
}
}
if(outFile != stdout) {
fclose(outFile);
}
exit(0);
}