/* msexceltables.c Steve Simon 5-Jan-2005 */
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
enum {
Tillegal = 0,
Tnumber, // cell types
Tlabel,
Tindex,
Tbool,
Terror,
Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
Nwidths = 4096,
};
typedef struct Biff Biff;
typedef struct Col Col;
typedef struct Row Row;
struct Row {
Row *next; // next row
int r; // row number
Col *col; // list of cols in row
};
struct Col {
Col *next; // next col in row
int c; // col number
int f; // index into formating table (Xf)
int type; // type of value for union below
union { // value
int index; // index into string table (Strtab)
int error;
int bool;
char *label;
double number;
};
};
struct Biff {
Biobuf *bp; // input file
int op; // current record type
int len; // length of current record
};
// options
static int Nopad = 0; // disable padding cells to colum width
static int Trunc = 0; // truncate cells to colum width
static int All = 0; // dump all sheet types, Worksheets only by default
static char *Delim = " "; // field delimiter
static char *Sheetrange = nil; // range of sheets wanted
static int Debug = 0;
// file scope
static int Defwidth = 10; // default colum width if non given
static int Biffver; // file vesion
static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
static char **Strtab = nil; // label contents heap
static int Nstrtab = 0; // # of above
static int *Xf; // array of extended format indices
static int Nxf = 0; // # of above
static Biobuf *bo; // stdout (sic)
// table scope
static int Width[Nwidths]; // array of colum widths
static int Ncols = -1; // max colums in table used
static int Content = 0; // type code for contents of sheet
static Row *Root = nil; // one worksheet's worth of cells
static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
static char *Errmsgs[] = {
[0x0] "#NULL!", // intersection of two cell ranges is empty
[0x7] "#DIV/0!", // division by zero
[0xf] "#VALUE!", // wrong type of operand
[0x17] "#REF!", // illegal or deleted cell reference
[0x1d] "#NAME?", // wrong function or range name
[0x24] "#NUM!", // value range overflow
[0x2a] "#N/A!", // argument of function not available
};
void
cell(int r, int c, int f, int type, void *val)
{
Row *row, *nrow;
Col *col, *ncol;
if(c > Ncols)
Ncols = c;
if((ncol = malloc(sizeof(Col))) == nil)
sysfatal("no memory\n");
ncol->c = c;
ncol->f = f;
ncol->type = type;
ncol->next = nil;
switch(type){
case Tnumber: ncol->number = *(double *)val; break;
case Tlabel: ncol->label = (char *)val; break;
case Tindex: ncol->index = *(int *)val; break;
case Tbool: ncol->bool = *(int *)val; break;
case Terror: ncol->error = *(int *)val; break;
default: sysfatal("can't happen error\n");
}
if(Root == nil || Root->r > r){
if((nrow = malloc(sizeof(Row))) == nil)
sysfatal("no memory\n");
nrow->col = ncol;
ncol->next = nil;
nrow->r = r;
nrow->next = Root;
Root = nrow;
return;
}
for(row = Root; row; row = row->next){
if(row->r == r){
if(row->col->c > c){
ncol->next = row->col;
row->col = ncol;
return;
}
else{
for(col = row->col; col; col = col->next)
if(col->next == nil || col->next->c > c){
ncol->next = col->next;
col->next = ncol;
return;
}
}
}
if(row->next == nil || row->next->r > r){
if((nrow = malloc(sizeof(Row))) == nil)
sysfatal("no memory\n");
nrow->col = ncol;
nrow->r = r;
nrow->next = row->next;
row->next = nrow;
return;
}
}
sysfatal("cannot happen error\n");
}
struct Tm *
bifftime(double num)
{
long long t = num;
/* Beware - These epochs are wrong, this
* is due to Excel still remaining compatible
* with Lotus-123, which incorrectly believed 1900
* was a leap year
*/
if(Datemode)
t -= 24107; // epoch = 1/1/1904
else
t -= 25569; // epoch = 31/12/1899
t *= 60*60*24;
return localtime((long)t);
}
void
numfmt(int fmt, int min, int max, double num)
{
char buf[1024];
struct Tm *tm;
if(fmt == 9)
snprint(buf, sizeof(buf),"%.0f%%", num);
else
if(fmt == 10)
snprint(buf, sizeof(buf),"%f%%", num);
else
if(fmt == 11 || fmt == 48)
snprint(buf, sizeof(buf),"%e", num);
else
if(fmt >= 14 && fmt <= 17){
tm = bifftime(num);
snprint(buf, sizeof(buf),"%d-%s-%d",
tm->mday, Months[tm->mon], tm->year+1900);
}
else
if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
tm = bifftime(num);
snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
}
else
if(fmt == 22){
tm = bifftime(num);
snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
tm->hour, tm->min, tm->sec,
tm->mday, Months[tm->mon], tm->year+1900);
}else
snprint(buf, sizeof(buf),"%g", num);
Bprint(bo, "%-*.*q", min, max, buf);
}
void
dump(void)
{
Row *r;
Col *c;
int i, min, max;
for(r = Root; r; r = r->next){
for(c = r->col; c; c = c->next){
if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
min = Defwidth;
if((c->next && c->c == c->next->c) || Nopad)
min = 0;
max = -1;
if(Trunc && min > 2)
max = min -2; // FIXME: -2 because of bug %q format ?
switch(c->type){
case Tnumber:
if(Xf[c->f] == 0)
Bprint(bo, "%-*.*g", min, max, c->number);
else
numfmt(Xf[c->f], min, max, c->number);
break;
case Tlabel:
Bprint(bo, "%-*.*q", min, max, c->label);
break;
case Tbool:
Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False");
break;
case Tindex:
if(c->index < 0 || c->index >= Nstrtab)
sysfatal("SST string out of range - corrupt file?\n");
Bprint(bo, "%-*.*q", min, max, Strtab[c->index]);
break;
case Terror:
if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
Bprint(bo, "#ERR=%d", c->index);
else
Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]);
break;
default:
sysfatal("cannot happen error\n");
break;
}
if(c->next){
if(c->next->c == c->c) // bar charts
Bprint(bo, "=");
else{
Bprint(bo, "%s", Delim);
for(i = c->c; c->next && i < c->next->c -1; i++)
Bprint(bo, "%-*.*s%s", min, max, "", Delim);
}
}
}
if(r->next)
for(i = r->r; i < r->next->r; i++)
Bprint(bo, "\n");
}
Bprint(bo, "\n");
}
void
release(void)
{
Row *r, *or;
Col *c, *oc;
r = Root;
while(r){
c = r->col;
while(c){
if(c->type == Tlabel)
free(c->label);
oc = c;
c = c->next;
free(oc);
}
or = r;
r = r->next;
free(or);
}
Root = nil;
memset(Width, 0, sizeof(Width));
Ncols = -1;
}
void
skip(Biff *b, int len)
{
assert(len <= b->len);
if(Bseek(b->bp, len, 1) == -1)
sysfatal("seek failed - %r\n");
b->len -= len;
}
void
gmem(Biff *b, void *p, int n)
{
if(b->len < n)
sysfatal("short record %d < %d\n", b->len, n);
if(Bread(b->bp, p, n) != n)
sysfatal("unexpected EOF - %r\n");
b->len -= n;
}
void
xd(Biff *b)
{
uvlong off;
uchar buf[16];
int addr, got, n, i, j;
addr = 0;
off = Boffset(b->bp);
while(addr < b->len){
n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
got = Bread(b->bp, buf, n);
Bprint(bo, " %6d ", addr);
addr += n;
for(i = 0; i < got; i++)
Bprint(bo, "%02x ", buf[i]);
for(j = i; j < 16; j++)
Bprint(bo, " ");
Bprint(bo, " ");
for(i = 0; i < got; i++)
Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
Bprint(bo, "\n");
}
Bseek(b->bp, off, 0);
}
static int
getrec(Biff *b)
{
int c;
if((c = Bgetc(b->bp)) == -1)
return -1; // real EOF
b->op = c;
if((c = Bgetc(b->bp)) == -1)
sysfatal("unexpected EOF - %r\n");
b->op |= c << 8;
if((c = Bgetc(b->bp)) == -1)
sysfatal("unexpected EOF - %r\n");
b->len = c;
if((c = Bgetc(b->bp)) == -1)
sysfatal("unexpected EOF - %r\n");
b->len |= c << 8;
if(b->op == 0 && b->len == 0)
return -1;
if(Debug){
Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
xd(b);
}
return 0;
}
static uvlong
gint(Biff *b, int n)
{
int i, c;
uvlong vl, rc;
if(b->len < n)
return -1;
rc = 0;
for(i = 0; i < n; i++){
if((c = Bgetc(b->bp)) == -1)
sysfatal("unexpected EOF - %r\n");
b->len--;
vl = c;
rc |= vl << (8*i);
}
return rc;
}
double
grk(Biff *b)
{
int f;
uvlong n;
double d;
n = gint(b, 4);
f = n & 3;
n &= ~3LL;
if(f & 2){
d = n / 4.0;
}
else{
n <<= 32;
memcpy(&d, &n, sizeof(d));
}
if(f & 1)
d /= 100.0;
return d;
}
double
gdoub(Biff *b)
{
double d;
uvlong n = gint(b, 8);
memcpy(&d, &n, sizeof(n));
return d;
}
char *
gstr(Biff *b, int len_width)
{
Rune r;
char *buf, *p;
int nch, w, ap, ln, rt, opt;
enum {
Unicode = 1,
Asian_phonetic = 4,
Rich_text = 8,
};
if(b->len < len_width){
if(getrec(b) == -1)
sysfatal("starting STRING expected CONTINUE, got EOF\n");
if(b->op != 0x03c)
sysfatal("starting STRING expected CONTINUE, got op=0x%x\n", b->op);
}
ln = gint(b, len_width);
if(Biffver != Ver8){
if((buf = calloc(ln+1, sizeof(char))) == nil)
sysfatal("no memory\n");
gmem(b, buf, ln);
return buf;
}
if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
sysfatal("no memory\n");
p = buf;
if(ln == 0)
return buf;
nch = 0;
*buf = 0;
opt = gint(b, 1);
if(opt & Rich_text)
rt = gint(b, 2);
else
rt = 0;
if(opt & Asian_phonetic)
ap = gint(b, 4);
else
ap = 0;
for(;;){
w = (opt & Unicode)? sizeof(Rune): sizeof(char);
while(b->len > 0){
r = gint(b, w);
p += runetochar(p, &r);
if(++nch >= ln){
if(rt)
skip(b, rt*4);
if(ap)
skip(b, ap);
return buf;
}
}
if(getrec(b) == -1)
sysfatal("in STRING expected CONTINUE, got EOF\n");
if(b->op != 0x03c)
sysfatal("in STRING expected CONTINUE, got op=0x%x\n", b->op);
opt = gint(b, 1);
}
}
void
sst(Biff *b)
{
int n;
skip(b, 4); // total # strings
Nstrtab = gint(b, 4); // # unique strings
if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
sysfatal("no memory\n");
for(n = 0; n < Nstrtab; n++)
Strtab[n] = gstr(b, 2);
}
void
boolerr(Biff *b)
{
int r = gint(b, 2); // row
int c = gint(b, 2); // col
int f = gint(b, 2); // formatting ref
int v = gint(b, 1); // bool value / err code
int t = gint(b, 1); // type
cell(r, c, f, (t)? Terror: Tbool, &v);
}
void
rk(Biff *b)
{
int r = gint(b, 2); // row
int c = gint(b, 2); // col
int f = gint(b, 2); // formatting ref
double v = grk(b); // value
cell(r, c, f, Tnumber, &v);
}
void
mulrk(Biff *b)
{
int r = gint(b, 2); // row
int c = gint(b, 2); // first col
while(b->len >= 6){
int f = gint(b, 2); // formatting ref
double v = grk(b); // value
cell(r, c++, f, Tnumber, &v);
}
}
void
number(Biff *b)
{
int r = gint(b, 2); // row
int c = gint(b, 2); // col
int f = gint(b, 2); // formatting ref
double v = gdoub(b); // double
cell(r, c, f, Tnumber, &v);
}
void
label(Biff *b)
{
int r = gint(b, 2); // row
int c = gint(b, 2); // col
int f = gint(b, 2); // formatting ref
char *s = gstr(b, 2); // byte string
cell(r, c, f, Tlabel, s);
}
void
labelsst(Biff *b)
{
int r = gint(b, 2); // row
int c = gint(b, 2); // col
int f = gint(b, 2); // formatting ref
int i = gint(b, 2); // sst string ref
cell(r, c, f, Tindex, &i);
}
void
bof(Biff *b)
{
Biffver = gint(b, 2);
Content = gint(b, 2);
}
void
defcolwidth(Biff *b)
{
Defwidth = gint(b, 2);
}
void
datemode(Biff *b)
{
Datemode = gint(b, 2);
}
int
wanted(char *range, int sheet)
{
int i, j;
char *p;
if (! range)
return 1;
p = range;
while(*p){
i = strtol(p, &p, 10);
switch(*p){
case '\0':
case ',':
if (i == sheet)
return 1;
break;
case '-':
j = strtol(p+1, &p, 10);
if(sheet >= i && sheet <= j)
return 1;
break;
default:
sysfatal(" %s malformed range spec\n", range);
break;
}
if (*p == ',')
p++;
}
return 0;
}
void
eof(Biff *b)
{
int i;
struct {
int n;
char *s;
} names[] = {
0x005, "Workbook globals",
0x006, "Visual Basic module",
0x010, "Worksheet",
0x020, "Chart",
0x040, "Macro sheet",
0x100, "Workspace file",
};
static int sheet = 0;
if(! wanted(Sheetrange, ++sheet)){
release();
return;
}
if(Ncols != -1){
if(All){
for(i = 0; i < nelem(names); i++)
if(names[i].n == Content){
Bprint(bo, "\n# contents %s\n", names[i].s);
dump();
}
}
else
if(Content == 0x10) // Worksheet
dump();
}
release();
USED(b);
}
void
colinfo(Biff *b)
{
int c;
int c1 = gint(b, 2);
int c2 = gint(b, 2);
int w = gint(b, 2);
if(c1 < 0)
sysfatal("negative column number (%d)\n", c1);
if(c2 >= Nwidths)
sysfatal("too many columns (%d > %d)\n", c2, Nwidths);
w /= 256;
if(w > 100)
w = 100;
if(w < 0)
w = 0;
for(c = c1; c <= c2; c++)
Width[c] = w;
}
void
xf(Biff *b)
{
int fmt;
static int nalloc = 0;
skip(b, 2);
fmt = gint(b, 2);
if(nalloc >= Nxf){
nalloc += 20;
if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
sysfatal("no memory\n");
}
Xf[Nxf++] = fmt;
}
void
writeaccess(Biff *b)
{
Bprint(bo, "# author %s\n", gstr(b, 2));
}
void
codepage(Biff *b)
{
int codepage = gint(b, 2);
if(codepage != 1200) // 1200 == UTF-16
Bprint(bo, "# codepage %d\n", codepage);
}
void
xls2csv(Biobuf *bp)
{
int i;
Biff biff, *b;
struct {
int op;
void (*func)(Biff *);
} dispatch[] = {
0x000a, eof,
0x0022, datemode,
0x0042, codepage,
0x0055, defcolwidth,
0x005c, writeaccess,
0x007d, colinfo,
0x00bd, mulrk,
0x00fc, sst,
0x00fd, labelsst,
0x0203, number,
0x0204, label,
0x0205, boolerr,
0x027e, rk,
0x0809, bof,
0x00e0, xf,
};
b = &biff;
b->bp = bp;
while(getrec(b) != -1){
for(i = 0; i < nelem(dispatch); i++)
if(b->op == dispatch[i].op)
(*dispatch[i].func)(b);
skip(b, b->len);
}
}
void
usage(void)
{
fprint(2, "usage: %s [-Dant] [-d delim] [-w worksheets] /mnt/doc/Workbook\n", argv0);
exits("usage");
}
void
main(int argc, char *argv[])
{
int i;
Biobuf bin, bout, *bp;
ARGBEGIN{
case 'D':
Debug = 1;
break;
case 'a':
All = 1;
break;
case 'd':
Delim = EARGF(usage());
break;
case 'n':
Nopad = 1;
break;
case 't':
Trunc = 1;
break;
case 'w':
Sheetrange = EARGF(usage());
break;
default:
usage();
break;
}ARGEND;
if(argc != 1)
usage();
bo = &bout;
quotefmtinstall();
Binit(bo, OWRITE, 1);
if(argc > 0) {
for(i = 0; i < argc; i++){
if((bp = Bopen(argv[i], OREAD)) == nil)
sysfatal("%s cannot open - %r\n", argv[i]);
xls2csv(bp);
Bterm(bp);
}
} else {
Binit(&bin, 0, OREAD);
xls2csv(&bin);
}
exits(0);
}
|