From f4e5e1e7d644b27c9c0c4190ba7691c37b243765 Mon Sep 17 00:00:00 2001 From: Ray Chason Date: Sun, 19 Apr 2026 13:34:08 -0400 Subject: [PATCH] Implement clearing correctly for ZIP Shrink method --- LZW.c | 83 +++++++++++++++++++++++++++++++++++++------- LZW.h | 11 ++++-- XADZipShrinkHandle.m | 2 +- 3 files changed, 79 insertions(+), 17 deletions(-) diff --git a/LZW.c b/LZW.c index 068543dc..3c6739c9 100644 --- a/LZW.c +++ b/LZW.c @@ -38,6 +38,7 @@ LZW *AllocLZW(int maxsymbols,int reservedsymbols) { self->nodes[i].chr=i; self->nodes[i].parent=-1; + self->nodes[i].inuse=1; } ClearLZWTable(self); @@ -56,30 +57,83 @@ void FreeLZW(LZW *self) void ClearLZWTable(LZW *self) { - self->numsymbols=256+self->reservedsymbols; + self->freesymbols=256+self->reservedsymbols; + for (int i=self->freesymbols; imaxsymbols; i++) + { + self->nodes[i].parent=i+1; + self->nodes[i].inuse=0; + } + self->nodes[self->maxsymbols-1].parent=-1; self->prevsymbol=-1; self->symbolsize=9; // TODO: technically this depends on reservedsymbols } -static uint8_t FindFirstByte(LZWTreeNode *nodes,int symbol) +// Partial clearing as used by the ZIP Shrink algorithm +void ClearLZWLeaves(LZW *self) +{ + int firstsymbol=256+self->reservedsymbols; + + // Mark the parents of any nodes currently in use + // self->nodes[x].inuse will be 2 for any such parents; these nodes will + // not be cleared + for (int i=firstsymbol; imaxsymbols; i++) + { + if (self->nodes[i].inuse) + { + int parent=self->nodes[i].parent; + if (parent>=firstsymbol) self->nodes[parent].inuse=2; + } + } + // Mark leaf nodes as free and rebuild the free list + self->freesymbols=-1; + for (int i=self->maxsymbols-1; i>=firstsymbol; i--) + { + if (self->nodes[i].inuse==2) + { + // This node is not to be cleared + self->nodes[i].inuse=1; + } + else + { + // This node is to be cleared, or was already free + self->nodes[i].inuse=0; + self->nodes[i].parent=self->freesymbols; + self->freesymbols=i; + } + } + // self->prevsymbol is left alone +} + +static uint8_t FindFirstByte(LZW *self,int symbol) { - while(nodes[symbol].parent>=0) symbol=nodes[symbol].parent; - return nodes[symbol].chr; + while (1) + { + if (!self->nodes[symbol].inuse && symbol!=self->prevsymbol) + // This can happen after ClearLZWLeaves + // Check for symbol!-self->prevsymbol avoids infinite loop + symbol=self->prevsymbol; + else if (self->nodes[symbol].parent>=0) + symbol=self->nodes[symbol].parent; + else + break; + } + return self->nodes[symbol].chr; } int NextLZWSymbol(LZW *self,int symbol) { if(self->prevsymbol<0) { - if(symbol>=self->numsymbols) return LZWInvalidCodeError; + if(symbol>=self->maxsymbols) return LZWInvalidCodeError; + if(!self->nodes[symbol].inuse) return LZWInvalidCodeError; self->prevsymbol=symbol; return LZWNoError; } int postfixbyte; - if(symbolnumsymbols) postfixbyte=FindFirstByte(self->nodes,symbol); - else if(symbol==self->numsymbols) postfixbyte=FindFirstByte(self->nodes,self->prevsymbol); + if(symbolmaxsymbols && self->nodes[symbol].inuse) postfixbyte=FindFirstByte(self,symbol); + else if(symbol==self->freesymbols) postfixbyte=FindFirstByte(self,self->prevsymbol); else return LZWInvalidCodeError; int parent=self->prevsymbol; @@ -87,12 +141,14 @@ int NextLZWSymbol(LZW *self,int symbol) if(!LZWSymbolListFull(self)) { - self->nodes[self->numsymbols].parent=parent; - self->nodes[self->numsymbols].chr=postfixbyte; - self->numsymbols++; + int nextsymbol=self->nodes[self->freesymbols].parent; + self->nodes[self->freesymbols].parent=parent; + self->nodes[self->freesymbols].chr=postfixbyte; + self->nodes[self->freesymbols].inuse=1; + self->freesymbols=nextsymbol; if(!LZWSymbolListFull(self)) - if((self->numsymbols&self->numsymbols-1)==0) self->symbolsize++; + if((self->freesymbols&(self->freesymbols-1))==0) self->symbolsize++; return LZWNoError; } @@ -104,10 +160,11 @@ int NextLZWSymbol(LZW *self,int symbol) int ReplaceLZWSymbol(LZW *self,int oldsymbol,int symbol) { - if(symbol>=self->numsymbols) return LZWInvalidCodeError; + if(symbol>=self->maxsymbols || !self->nodes[symbol].inuse) return LZWInvalidCodeError; self->nodes[oldsymbol].parent=self->prevsymbol; - self->nodes[oldsymbol].chr=FindFirstByte(self->nodes,symbol); + self->nodes[oldsymbol].chr=FindFirstByte(self,symbol); + self->nodes[oldsymbol].inuse=1; self->prevsymbol=symbol; diff --git a/LZW.h b/LZW.h index 81af8730..1256c16b 100644 --- a/LZW.h +++ b/LZW.h @@ -31,12 +31,13 @@ typedef struct LZWTreeNode { uint8_t chr; + uint8_t inuse; int parent; } LZWTreeNode; typedef struct LZW { - int numsymbols,maxsymbols,reservedsymbols; + int freesymbols,maxsymbols,reservedsymbols; int prevsymbol; int symbolsize; @@ -49,6 +50,7 @@ typedef struct LZW LZW *AllocLZW(int maxsymbols,int reservedsymbols); void FreeLZW(LZW *self); void ClearLZWTable(LZW *self); +void ClearLZWLeaves(LZW *self); int NextLZWSymbol(LZW *self,int symbol); int ReplaceLZWSymbol(LZW *self,int oldsymbol,int symbol); @@ -67,14 +69,17 @@ static inline uint8_t *LZWInternalBuffer(LZW *self) return self->buffer; } +// If ClearLZWLeaves has not been called, self->freesymbols is equal to the +// number of symbols in use. This function will not be reliable if +// ClearLZWLeaves has ever been called on this table. static inline int LZWSymbolCount(LZW *self) { - return self->numsymbols; + return self->freesymbols < 0 ? self->maxsymbols : self->freesymbols; } static inline bool LZWSymbolListFull(LZW *self) { - return self->numsymbols==self->maxsymbols; + return self->freesymbols < 0; } static inline LZWTreeNode *LZWSymbols(LZW *self) diff --git a/XADZipShrinkHandle.m b/XADZipShrinkHandle.m index 260b6e3b..fd3431f8 100644 --- a/XADZipShrinkHandle.m +++ b/XADZipShrinkHandle.m @@ -62,7 +62,7 @@ -(uint8_t)produceByteAtOffset:(off_t)pos symbolsize++; if(symbolsize>13) [XADException raiseDecrunchException]; } - else if(next==2) ClearLZWTable(lzw); + else if(next==2) ClearLZWLeaves(lzw); } else break; }