Plan 9 from Bell Labs’s /usr/web/sources/contrib/tristan/oed/decompress.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include <u.h>
#include <libc.h>
#include <bio.h>

#define DEBUG(x) 

struct oed_block {
	ulong num;
	ulong unknown;
	ulong brktbl;
	ulong symc;
	uchar *buffer;		/* start of buffer */
	uchar *dictionary;	/* start of dictionary = 0x10 */
	uchar *breaktable;	/* start of break table = dictionary + brktbl */
	uchar *seektable;	/* start of seek table = breaktable+0x80 */
	uchar *data;		/* start of data = breaktable + 0x0180 */
	
	ulong *breakpoints;
	ushort *symbols;

	int zeros;
	uchar *cur;	/* current byte in decompression */
	uchar mask;	/* current bit in decompression */
};

enum {
	block_size = 32 * 1024,
};

Biobuf out;

ulong
clong(uchar *src) {
	ulong r;
	r = src[0] << 24;
	r |= src[1] << 16;
	r |= src[2] << 8;
	r |= src[3];
	return r;
}

void
cleanup(struct oed_block *block) {
	free(block->breakpoints);
	free(block->symbols);
}

int
enumerate_symbols(struct oed_block *block) {
  unsigned char *data = block->dictionary;
  unsigned short *sym, *lastsym = block->symbols + block->symc;

DEBUG(fprint(2,"%ld symbols\n", block->symc);)

  for(sym = block->symbols; sym < lastsym; sym++) {
    *sym = data - block->buffer;
    while(~*data & 0x80) data++;
    *data = *data & 0x7f;
    data++;
/*DEBUG(fprint(2, "symbol %ld, %p-%p\n", sym - block->symbols, block->buffer+*sym, data);)
DEBUG(write(2, block->buffer+*sym, data - (block->buffer+*sym));)
DEBUG(write(2, "\n", 1);)*/
  }

  while(data < block->breaktable && *data != '\0') data++;
  *sym = data - block->buffer; /* the end of the final symbol */
DEBUG(fprint(2,"%ld more bytes\n", block->breaktable - data);)

  return block->symc;
}

int
enumerate_breakpoints(struct oed_block *block) {
	uchar *i;
	ulong this,next;
	ulong c;
	ulong *dst;

	i = (block->breaktable);
	dst = block->breakpoints;
	*dst++ = 0; *dst++ = 0; *dst++ = 0; /* initial null entry */
	next=clong(i+4);

DEBUG(fprint(2, "%ld breakpoints\n", clong(i));)
	for(c=clong(i)-1; c; c--) {
		i+=4;
		this=next;
		next=clong(i+4);
DEBUG(fprint(2, "%ld %ld\n", this, next);)
		*dst=*(dst-2) * 2;
		dst++;
		*dst=*(dst-1) + next;
		dst++;
		*dst=*(dst-3) + this;
		dst++;
	}

	return c;
}

/* oed_prepare
 * prepare block for decompression
 */
int
oed_prepare(struct oed_block *block) {
	block->num = clong(block->buffer);
/*	block->unknown = clong(block->buffer + 4); */
	block->brktbl = clong(block->buffer + 8);
	block->symc = clong(block->buffer + 12);

	/* assignments */
	block->dictionary = block->buffer + 0x10;
	block->breaktable = block->dictionary + block->brktbl;
	block->seektable = block->breaktable + 0x80;
	block->data = block->seektable + 0x0100;
	block->cur = block->data;
	block->mask = 0x80;
	block->zeros = 0;

	if((block->symbols = malloc((block->symc + 1) * sizeof(*block->symbols))) == nil)
		exits("symbol table malloc");
	if(enumerate_symbols(block) < 0)
		exits("failed to enumerate symbols");

	if((block->breakpoints = malloc((clong(block->breaktable)+1)*3*sizeof(*block->breakpoints))) == nil)
		exits("break point malloc");
	enumerate_breakpoints(block);
	return 0;
}

inline void
print_symbol(ushort which, struct oed_block *block) {
/*DEBUG(print("print_symbol(%hd)\n", which);)*/
	if(Bwrite(&out, block->buffer + *(block->symbols + which), *(block->symbols + which + 1) - *(block->symbols + which)) < 0)
		exits("Bwrite");
}

ushort
fetch_symbol(struct oed_block *block) {
	int output;
	ulong *curbrk;

	output=0;
	curbrk = block->breakpoints;

/*DEBUG(print("fetch_symbol %ld, %hhx, %hhx\n", block->cur - block->data, *block->cur, block->mask);)*/
	/* the compressionish algorithm goes like this:
	 * append the current bit to your number, 
	 * if your number is in the correct range for this many bits, you're done.
	 * repeat.
	 */
	while(block->cur < (uchar *)block->breakpoints) {
		for(;block->mask;block->mask >>= 1) {
			output <<= 1;
			if(*block->cur & block->mask)
				output++;

			if(output >= curbrk[0] && output < curbrk[1]) {
				output = output + curbrk[2] - curbrk[0];

				if(output) {
					block->zeros = 0;
					block->mask /= 2;
					return output;
				} else {
					if(block->zeros > 4) { /* six zeros sounds about right */
						block->cur = block->buffer + block_size;
						block->mask = 0x00;
						return 0;
					}
					block->zeros++;
				}
				output = 0;
				curbrk = block->breakpoints;
			} else
				curbrk+=3;

		}
		block->cur++;
		block->mask = 0x80;
	}
	return 0;
}

int 
main(int, char **) {
	struct oed_block *block;
	unsigned short csymbol;

	if((block = malloc(sizeof(*block))) == nil)
		exits("block malloc");
	if((block->buffer = malloc(block_size)) == nil)
		exits("buffer malloc");
	if(Binit(&out, 1, OWRITE))
		exits("output alloc");

	while(readn(0, block->buffer, block_size) > 0) {
		oed_prepare(block);

		while(csymbol = fetch_symbol(block)) {
			print_symbol(csymbol, block);
		}

		cleanup(block);
	}

	exits("");
	return 0;
}


Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].