/* * @(#) lzwc.c - LZW compressor/decompressor. * (c) 1997 Ivan Maidanski http://ivmai.chat.ru * Freeware program source. All rights reserved. ** * Language: ANSI C * Tested with: Watcom C16 v10.0 * Last modified: 1997-08-07 11:50:00 GMT+04:00 */ #include /* NULL, size_t, malloc(), free() */ #include /* FILE, SEEK_SET, SEEK_END */ /* puts(), fopen(), fclose() */ /* fgetc(), fputc(), fseek(), ftell() */ #include /* CHAR_BIT, UCHAR_MAX */ #define MINSTRLEN 3 /* shortest packed string length (1..5) */ #define MAXSTRLEN 66 /* longest packed string length (MINSTRLEN..256) */ #define WINSIZE 4096 /* sliding look-up window size (256..32768) */ #define LWSIZE 64 /* sub-window size (1..WINSIZE) */ #define LITERSCALE 6 /* character frequence model scale (0..16) */ #define LENSCALE 2 /* string length frequence model scale (0..16) */ #define HPOSSCALE 1 /* string high-pos frequence model scale (0..16) */ typedef int tbool; /* 0,1 */ typedef unsigned short tsym; /* 0..UCHAR_MAX+2 */ typedef size_t twinpos; /* 0..WINSIZE */ typedef unsigned char tfreq; /* 0..MAXFREQ */ typedef unsigned short tcum; /* 0..MAXCUMVAL */ typedef unsigned long tdblcum; /* 0..(MAXCUMVAL+1)*(MAXCUMVAL+1)-1 */ /* typedef unsigned long tcum; typedef long double tdblcum; */ #define MAXFREQ ((tfreq)~0) #define MAXCUMVAL ((tcum)~0) #define HALFCUMVAL (MAXCUMVAL/2+MAXCUMVAL%2) typedef struct { tcum low,high; tcum litercumsum,lencumsum,hposcumsum; twinpos winstart; tsym liter; twinpos strpos,strlen; unsigned char *datawin; tsym *litersymtab,*lensymtab,*hpossymtab; tfreq *literfreqtab,*lenfreqtab,*hposfreqtab; twinpos *strptrtab,*prevstrtab; } tpackstatus; void freqmodelstart(tsym symtab[], tfreq freqtab[], tcum *cumsum, tsym cnt) { *cumsum=cnt; while (cnt--) freqtab[symtab[cnt]=cnt]=0; } void freqmodelupdate(tsym symtab[], tfreq freqtab[], tfreq scale, tcum *cumsum, tsym index) { tsym cnt; tcum oldsum; if (freqtab[index]>=MAXFREQ || *cumsum>MAXCUMVAL-scale) { for (cnt=0,oldsum=*cumsum,*cumsum=0;oldsum; oldsum-=(tcum)freqtab[cnt]*scale+1,*cumsum+=(freqtab[cnt++]/=2)); *cumsum=*cumsum*scale+cnt; } for (cnt=index;cnt;cnt--) if (freqtab[cnt-1]!=freqtab[index]) break; oldsum=symtab[cnt]; symtab[cnt]=symtab[index]; symtab[index]=oldsum; freqtab[cnt]++; *cumsum+=scale; } tsym freqmodelgetcum(tcum *cumlower, tcum *cumupper, tsym symtab[], tfreq freqtab[], tfreq scale, tsym symbol) { tsym index; for (index=0,*cumlower=0;symtab[index]!=symbol; *cumlower+=freqtab[index++]); *cumupper=*cumlower+freqtab[index]; *cumlower=*cumlower*scale+index; *cumupper=*cumupper*scale+index+1; return (index); } tsym freqmodelgetindex(tcum *cumlower, tcum *cumupper, tfreq freqtab[], tfreq scale) { tsym index; for (index=0,*cumupper=0;*cumupper<=*cumlower; *cumupper+=(tcum)freqtab[index++]*scale+1); *cumlower=*cumupper-(tcum)freqtab[--index]*scale-1; return (index); } tcum arithgetrange(tcum oldrange, tcum cumfreq, tcum cumsum) { return (((tdblcum)oldrange+1)*cumfreq/cumsum-1); } tcum arithgetcum(tcum value, tcum cumsum, tcum oldrange) { return ((((tdblcum)value+1)*cumsum-1)/((tdblcum)oldrange+1)); } void arithencode(tcum *low, tcum *high, tcum cumlower, tcum cumupper, tcum cumsum) { tcum oldrange=*high-*low; *high=*low+arithgetrange(oldrange,cumupper,cumsum); *low+=arithgetrange(oldrange,cumlower,cumsum)+1; } tcum arithdecode(tcum low, tcum high, tcum value, tcum cumsum) { return (arithgetcum(value-low,cumsum,high-low)); } void arithstart(tcum *low, tcum *high) { *low=0; *high=MAXCUMVAL; } void arithoutput(unsigned char *code, unsigned char *len, tcum *low, tcum *high) { for (;*len && (*low>=HALFCUMVAL || *high=HALFCUMVAL); } void arithoutend(unsigned char *code, unsigned char *len, tcum *low, tcum *high) { unsigned char curlen; if (*low<*high && *len) { for (curlen=0;*low>=HALFCUMVAL/2 && *high*high && *len;(*low)--,(*len)--) *code=(*code<<1)|*high; } void encodesym(tcum *low, tcum *high, tsym symbol, tsym symtab[], tfreq freqtab[], tfreq scale, tcum *cumsum) { tcum cumlower,cumupper; if (symtab!=NULL) { symbol=freqmodelgetcum(&cumlower,&cumupper,symtab,freqtab,scale,symbol); arithencode(low,high,cumlower,cumupper,*cumsum); freqmodelupdate(symtab,freqtab,scale,cumsum,symbol); } else arithencode(low,high,symbol,(tcum)symbol+1,*cumsum); } tsym decodesym(tcum *low, tcum *high, tcum value, tsym symtab[], tfreq freqtab[], tfreq scale, tcum *cumsum) { tsym index; tcum cumlower=arithdecode(*low,*high,value,*cumsum); if (symtab!=NULL) { index=freqmodelgetindex(&cumlower,&value,freqtab,scale); arithencode(low,high,cumlower,value,*cumsum); cumlower=symtab[index]; freqmodelupdate(symtab,freqtab,scale,cumsum,index); } else arithencode(low,high,cumlower,cumlower+1,*cumsum); return (cumlower); } void outsymcode(unsigned char *code, unsigned char *len, tcum *low, tcum *high, tcum nextcumsum) { if (*low<=*high) arithoutput(code,len,low,high); if (*low>*high || *len && *high-*low*high || *high-*lowstrlen? newpos+strlen : strlen-(winsize-newpos))==winstart && !datawinnextstrsearch(datawin,winstart,winsize, &newpos,strlen,prevstrtab)) return (0); while (datawin[winsize-newpos>strlen? newpos+strlen : strlen-(winsize-newpos)]!=ch) if (!datawinnextstrsearch(datawin,winstart,winsize, &newpos,strlen,prevstrtab)) return (0); *strpos=newpos; return (1); } void datawinstrrefind(unsigned char datawin[], twinpos winstart, twinpos winsize, twinpos *strpos, twinpos strlen, twinpos strptrtab[], twinpos prevstrtab[]) { twinpos oldpos=*strpos; if (strlen) { for (*strpos=strptrtab[datawin[oldpos]];(winstart>*strpos? winstart-*strpos : winsize-(*strpos-winstart))=winsize) *winstart=0; } void datawinstrupdate(unsigned char datawin[], twinpos *winstart, twinpos strptrtab[], twinpos prevstrtab[], twinpos winsize, twinpos strpos, twinpos strlen) { while (strlen--) { datawinsearchupdate(strptrtab,prevstrtab,*winstart,datawin[strpos]); datawinupdate(datawin,winstart,winsize,datawin[strpos]); if (++strpos>=winsize) strpos=0; } } tbool encodechar(tsym *liter, twinpos *strpos, twinpos *strlen, unsigned char ch, unsigned char datawin[], twinpos *winstart, twinpos strptrtab[], twinpos prevstrtab[], twinpos winsize, twinpos minlen, twinpos maxlen) { if (*strlen=winsize) *strpos=0; datawinstrrefind(datawin,*winstart,winsize, strpos,--*strlen,strptrtab,prevstrtab); return (0); } } minlen=*strpos; *strpos=(*winstart>minlen? *winstart-minlen : winsize-(minlen-*winstart))-*strlen; datawinstrupdate(datawin,winstart,strptrtab,prevstrtab,winsize, minlen,*strlen); *liter=UCHAR_MAX+1; return (0); } void encodeendch(tsym *liter, twinpos *strpos, twinpos *strlen, unsigned char datawin[], twinpos winstart, twinpos winsize, twinpos minlen) { if (*strlen>=minlen) { *liter=UCHAR_MAX+1; *strpos=(winstart>*strpos? winstart-*strpos : winsize-(*strpos-winstart))-*strlen; } else if (*strlen) { *liter=datawin[*strpos]; if (++*strpos>=winsize) *strpos=0; (*strlen)--; } } unsigned char decodechar(tsym liter, twinpos *strpos, twinpos *strlen, unsigned char datawin[], twinpos *winstart, twinpos winsize) { if (liter>UCHAR_MAX) { liter=datawin[*strpos]; if (++*strpos>=winsize) *strpos=0; (*strlen)--; } else *strlen=0; datawinupdate(datawin,winstart,winsize,liter); return (liter); } void outstrcode(unsigned char *code, unsigned char *len, twinpos strpos, twinpos *strlen, tcum *low, tcum *high, tcum litercumsum, tsym lensymtab[], tfreq lenfreqtab[], tfreq lenscale, tcum *lencumsum, tsym hpossymtab[], tfreq hposfreqtab[], tfreq hposscale, tcum *hposcumsum, tcum lposcumsum, twinpos minlen, twinpos maxlen) { if (*strlen>=minlen) { if (*strlen<=maxlen) { outsymcode(code,len,low,high,*lencumsum); if (!*len) return; encodesym(low,high,*strlen-minlen, lensymtab,lenfreqtab,lenscale,lencumsum); *strlen=maxlen+1; } if (*strlen<=maxlen+1) { outsymcode(code,len,low,high,*hposcumsum); if (!*len) return; encodesym(low,high,strpos/lposcumsum, hpossymtab,hposfreqtab,hposscale,hposcumsum); *strlen=maxlen+2; } outsymcode(code,len,low,high,lposcumsum); if (!*len) return; encodesym(low,high,strpos%lposcumsum,NULL,NULL,0,&lposcumsum); *strlen=0; } outsymcode(code,len,low,high,litercumsum); } void instrcode(tsym *liter, twinpos *strpos, twinpos *strlen, tcum value, unsigned char code, unsigned char *len, tcum *low, tcum *high, tcum litercumsum, tsym lensymtab[], tfreq lenfreqtab[], tfreq lenscale, tcum *lencumsum, tsym hpossymtab[], tfreq hposfreqtab[], tfreq hposscale, tcum *hposcumsum, tcum lposcumsum, twinpos minlen, twinpos maxlen, twinpos winstart, twinpos winsize) { unsigned char auxcode; if (*liter>UCHAR_MAX && *strlen==maxlen+1) { outsymcode(&auxcode,len,low,high,*lencumsum); if (!*len) return; *strlen=maxlen+2; *strpos=decodesym(low,high, (value<<(sizeof(code)*CHAR_BIT-*len))|(code>>*len), lensymtab,lenfreqtab,lenscale,lencumsum)+minlen; } if (*strlen==maxlen+2) { outsymcode(&auxcode,len,low,high,*hposcumsum); if (!*len) return; *liter=(tsym)UCHAR_MAX+2; *strlen=*strpos; *strpos=decodesym(low,high, (value<<(sizeof(code)*CHAR_BIT-*len))|(code>>*len), hpossymtab,hposfreqtab,hposscale,hposcumsum); } if (*liter>(tsym)UCHAR_MAX+1) { outsymcode(&auxcode,len,low,high,lposcumsum); if (!*len) return; *liter=(tsym)UCHAR_MAX+1; *strpos=decodesym(low,high, (value<<(sizeof(code)*CHAR_BIT-*len))|(code>>*len), NULL,NULL,0,&lposcumsum)+*strpos*(tsym)lposcumsum; *strpos+=*strlen; *strpos=winstart>=*strpos? winstart-*strpos : winsize-(*strpos-winstart); } outsymcode(&auxcode,len,low,high,litercumsum); } tbool allocpack(tpackstatus *status) { if ((status->datawin=(unsigned char *)malloc(WINSIZE))!=NULL) { if ((status->litersymtab=(tsym *)malloc((UCHAR_MAX+2)* sizeof(tsym)))!=NULL) { if ((status->literfreqtab=(tfreq *)malloc((UCHAR_MAX+2)* sizeof(tfreq)))!=NULL) { if ((status->lensymtab=(tsym *)malloc((MAXSTRLEN-MINSTRLEN+1)* sizeof(tsym)))!=NULL) { if ((status->lenfreqtab=(tfreq *)malloc((MAXSTRLEN-MINSTRLEN+1)* sizeof(tfreq)))!=NULL) { if ((status->hpossymtab=(tsym *)malloc((WINSIZE+LWSIZE-1)/LWSIZE* sizeof(tsym)))!=NULL) { if ((status->hposfreqtab=(tfreq *)malloc((WINSIZE+LWSIZE-1)/LWSIZE* sizeof(tfreq)))!=NULL) return (0); free((void *)status->hpossymtab); } free((void *)status->lenfreqtab); } free((void *)status->lensymtab); } free((void *)status->literfreqtab); } free((void *)status->litersymtab); } free((void *)status->datawin); } return (1); } tbool allocpackenc(tpackstatus *status) { if ((status->prevstrtab=(twinpos *)malloc(WINSIZE*sizeof(twinpos)))!=NULL) { if ((status->strptrtab=(twinpos *)malloc((UCHAR_MAX+1)* sizeof(twinpos)))!=NULL) return (0); free((void *)status->prevstrtab); } return (1); } void freepack(tpackstatus *status) { free((void *)status->hposfreqtab); free((void *)status->hpossymtab); free((void *)status->lenfreqtab); free((void *)status->lensymtab); free((void *)status->literfreqtab); free((void *)status->litersymtab); free((void *)status->datawin); } void freepackenc(tpackstatus *status) { free((void *)status->strptrtab); free((void *)status->prevstrtab); } void startpack(tpackstatus *status) { datawinstart(status->datawin,&status->winstart,MAXSTRLEN,WINSIZE, &status->strlen); freqmodelstart(status->litersymtab,status->literfreqtab, &status->litercumsum,UCHAR_MAX+2); freqmodelstart(status->lensymtab,status->lenfreqtab, &status->lencumsum,MAXSTRLEN-MINSTRLEN+1); freqmodelstart(status->hpossymtab,status->hposfreqtab, &status->hposcumsum,(WINSIZE+LWSIZE-1)/LWSIZE); arithstart(&status->low,&status->high); } void startpackenc(tpackstatus *status) { datawinsearchinit(status->strptrtab,status->prevstrtab, status->datawin,status->winstart,WINSIZE); } void packchar(unsigned char ch, tpackstatus *status) { tbool catch=encodechar(&status->liter,&status->strpos,&status->strlen, ch,status->datawin,&status->winstart, status->strptrtab,status->prevstrtab, WINSIZE,MINSTRLEN,MAXSTRLEN); if (!catch || !status->strlen) encodesym(&status->low,&status->high,status->liter,status->litersymtab, status->literfreqtab,LITERSCALE,&status->litercumsum); status->liter=catch? (tsym)UCHAR_MAX+1+!status->strlen : ch; } unsigned char unpackchar(tpackstatus *status) { return (decodechar(status->liter,&status->strpos,&status->strlen, status->datawin,&status->winstart,WINSIZE)); } void outpack(unsigned char *code, unsigned char *len, tpackstatus *status) { tbool catch; unsigned char ch; if (status->liter==(tsym)UCHAR_MAX+1) return; for (;;) { outstrcode(code,len,status->strpos,&status->strlen, &status->low,&status->high,status->litercumsum, status->lensymtab,status->lenfreqtab, LENSCALE,&status->lencumsum, status->hpossymtab,status->hposfreqtab, HPOSSCALE,&status->hposcumsum,LWSIZE,MINSTRLEN,MAXSTRLEN); if (!*len || status->liter>UCHAR_MAX) return; catch=encodechar(&status->liter,&status->strpos,&status->strlen, ch=status->liter,status->datawin,&status->winstart, status->strptrtab,status->prevstrtab, WINSIZE,MINSTRLEN,MAXSTRLEN); if (catch && status->strlen) break; encodesym(&status->low,&status->high,status->liter,status->litersymtab, status->literfreqtab,LITERSCALE,&status->litercumsum); status->liter=catch? (tsym)UCHAR_MAX+2 : ch; } status->liter=(tsym)UCHAR_MAX+1; } void outendpack(unsigned char *code, unsigned char *len, tpackstatus *status) { for (;;) { if (status->liter!=(tsym)UCHAR_MAX+1) { outstrcode(code,len,status->strpos,&status->strlen, &status->low,&status->high,status->litercumsum, status->lensymtab,status->lenfreqtab, LENSCALE,&status->lencumsum, status->hpossymtab,status->hposfreqtab, HPOSSCALE,&status->hposcumsum,LWSIZE,MINSTRLEN,MAXSTRLEN); if (!*len) return; } if (!status->strlen) break; encodeendch(&status->liter,&status->strpos,&status->strlen, status->datawin,status->winstart,WINSIZE,MINSTRLEN); encodesym(&status->low,&status->high,status->liter,status->litersymtab, status->literfreqtab,LITERSCALE,&status->litercumsum); status->liter=(tsym)UCHAR_MAX+2; } outtailcode(code,len,&status->low,&status->high); } void inpack(tcum value, unsigned char code, unsigned char *len, tpackstatus *status) { if (!status->strlen) { status->liter=decodesym(&status->low,&status->high, (value<<(sizeof(code)*CHAR_BIT-*len))|(code>>*len), status->litersymtab, status->literfreqtab, LITERSCALE,&status->litercumsum); status->strlen=MAXSTRLEN+1; } instrcode(&status->liter,&status->strpos,&status->strlen,value,code,len, &status->low,&status->high,status->litercumsum, status->lensymtab,status->lenfreqtab, LENSCALE,&status->lencumsum, status->hpossymtab,status->hposfreqtab, HPOSSCALE,&status->hposcumsum,LWSIZE, MINSTRLEN,MAXSTRLEN,status->winstart,WINSIZE); } void packfile(FILE *destfile, FILE *srcfile, unsigned long srclen, tpackstatus *status) { unsigned char outcode,outlen; for (outlen=CHAR_BIT;srclen;srclen--) { packchar((unsigned char)fgetc(srcfile),status); for (outpack(&outcode,&outlen,status);!outlen; outlen=CHAR_BIT,outpack(&outcode,&outlen,status)) fputc(outcode,destfile); } for (outendpack(&outcode,&outlen,status);!outlen; outlen=CHAR_BIT,outendpack(&outcode,&outlen,status)) fputc(outcode,destfile); if (outlen>(cnt*CHAR_BIT)),f); } long fgetlong(FILE *f) { long value=0; unsigned char cnt=sizeof(long); while (cnt--) value=(value< "); return (-1); } argc=packunpackfile(argc,argv[2],argv[3]); switch (argc) { case 1: puts("Error: not enough memory."); break; case 2: puts("Error: cannot open source file."); break; case 3: puts("Error: cannot open destination file."); } return (argc); }