The BWT/LZW Decompressor

The latest source for the latest compression technique.

package uk.co.kring.net;

import java.io.EOFException;
import java.io.Externalizable;
import java.io.FilterInputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashMap;

/**
 * Created by user on 06/06/2016.
 */
public class Packer {

    public static class OutputStream extends FilterOutputStream implements Externalizable {

        byte[] buf = new byte[4096 * 16];//64K block max
        int cnt = 0;//pointer to end
        int[] dmax = new int[16];
        HashMap<String, Integer> dict;

        public OutputStream(java.io.OutputStream out) {
            super(out);
        }

        @Override
        public void readExternal(ObjectInput input) throws IOException, ClassNotFoundException {
            out = (java.io.OutputStream)input.readObject();
            input.read(buf);
            cnt = input.readChar();
            dict = (HashMap<String, Integer>)input.readObject();
        }

        @Override
        public void writeExternal(ObjectOutput output) throws IOException {
            output.writeObject(out);
            output.write(buf);
            output.writeChar(cnt);
            output.writeObject(dict);
        }

        @Override
        public void close() throws IOException {
            flush();
            out.close();
        }

        private boolean compare(int great, int less) {
            while(true) {
                if(buf[less] > buf[great]) return false;//immediate wrong
                if(buf[less] < buf[great]) return true;//immediate right
                great++;
                if(great == buf.length) great = 0;
                less++;
                if(less == buf.length) less = 0;
            }
        }

        private byte pair = 0;
        private boolean two = false;

        private void outputCount(int num, boolean small, boolean tiny) throws IOException {
            if(tiny) {
                out.write((byte)num);
                return;
            }
            if(small) {
                out.write((byte)num);
                pair = (byte)(pair << 4 + num >> 8);
                if(two) {
                    two = false;
                    out.write(pair);
                } else {
                    two = true;
                }
                return;
            }
            out.write((byte)(num >> 8));
            out.write((byte)num);
        }

        @Override
        public void flush() throws IOException {
            outputCount(cnt, false, false);//just in case length
            char[] count = new char[256];
            if(dict == null) dict = new HashMap<String, Integer>();
            for(int i = 0; i < 256; i++) {
                dmax[i] = 256;//dictionary max
                count[i] = 0;//letter count
            }
            for(int i = 0; i < cnt; i++) {
                count[buf[i]]++;
            }
            char skip = 0;
            boolean first = true;
            char acc = 0;
            char[] start = new char[256];
            for(int j = 0; j < 2; j++) {
                for (int i = 0; i < 256; i++) {
                    if(j == 0) {
                        acc += count[i];
                        start[i] = acc;
                    }
                    if (count[i] == 0) {
                        skip++;
                        if (first) {
                            outputCount(0, false, true);
                            first = false;
                        }
                    } else {
                        if (skip != 0) {
                            outputCount(skip, false, true);
                            skip = 0;
                            first = true;
                        }
                        outputCount(count[i], false, true);
                        count[i] >>= 8;
                    }
                }
                if(skip != 0) outputCount(skip, false, true);//final skip
            }
            int[] ptr = new int[buf.length];
            byte[] bwt = new byte[buf.length];

            outputCount(Sais.bwtransform(buf, bwt, ptr, cnt), false, false);

            //now an lzw
            String sym = "";
            char context = 0;
            char lastContext = 0;
            int test = 0;
            for(int j = 0; j < cnt; j++) {
                while(j >= start[context]) context++;
                if(lastContext == context) {
                    sym += bwt[j];//add a char
                } else {
                    lastContext = context;
                    outputCount(test, true, false);
                    sym = "" + bwt[j];//new char
                }
                if(sym.length() == 1) {
                    test = (int)sym.charAt(0);
                } else {
                    if(dict.containsKey(context + sym)) {
                        test = dict.get(context + sym);
                        continue;
                    } else {
                        outputCount(test, true, false);
                        if (dmax[context & 0xf] < 0x1000) {//context limit
                            dict.put(context + sym, dmax[context & 0xf]);
                            dmax[context & 0xf]++;
                        }
                        sym = "" + bwt[j];//new symbol
                    }
                }
            }
            outputCount(test, true, false);//last match
            if(!two) outputCount(0, true, false);//aligned data
            out.flush();
            cnt = 0;//fill next buffer
        }

        @Override
        public void write(int oneByte) throws IOException {
            if(cnt == buf.length) flush();
            buf[cnt++] = (byte)oneByte;
        }
    }

    public static class InputStream extends FilterInputStream implements Externalizable {

        @Override
        public void readExternal(ObjectInput input) throws IOException, ClassNotFoundException {
            in = (java.io.InputStream)input.readObject();
            input.read(buf);
            idx = input.readChar();
            cnt = input.readChar();
            dict = (HashMap<Integer, String>)input.readObject();
        }

        @Override
        public void writeExternal(ObjectOutput output) throws IOException {
            output.writeObject(in);
            output.write(buf);
            output.writeChar(idx);
            output.writeChar(cnt);
            output.writeObject(dict);
        }

        //SEE MIT LICENCE OF Sais.java

        private static void unbwt(byte[] T, byte[] U, int[] LF, int n, int pidx) {
            int[] C = new int[256];
            int i, t;
            for(i = 0; i < 256; ++i) { C[i] = 0; }
            for(i = 0; i < n; ++i) { LF[i] = C[(int)(T[i] & 0xff)]++; }
            for(i = 0, t = 0; i < 256; ++i) { t += C[i]; C[i] = t - C[i]; }
            for(i = n - 1, t = 0; 0 <= i; --i) {
                t = LF[t] + C[(int)((U[i] = T[t]) & 0xff)];
                t += (t < pidx) ? 1 : 0;
            }
        }

        byte[] buf = new byte[4096 * 16];//64K block max
        int cnt = 0;//pointer to end
        int idx = 0;
        int[] dmax = new int[16];
        HashMap<Integer, String> dict;

        private boolean two = false;
        private int vala = 0;
        private int valb = 0;

        private int reader() throws IOException {
            int i = in.read();
            if(i == -1) throw new EOFException("End Of Stream");
            return i;
        }

        private char inCount(boolean small, boolean tiny) throws IOException {
            if(tiny) return (char)reader();
            if(small) {
                if(!two) {
                    vala = reader();
                    valb = reader();
                    int valc = reader();
                    vala += (valc << 4) & 0xf00;
                    valb += (valc << 8) & 0xf00;
                    two = true;
                } else {
                    vala = valb;
                    two = false;
                }
                return (char)vala;
            }
            int val = reader() << 8;
            val += reader();
            return (char)val;
        }

        public InputStream(java.io.InputStream in) {
            super(in);
        }

        @Override
        public int available() throws IOException {
            return cnt - idx;
        }

        @Override
        public void close() throws IOException {
            in.close();
        }

        private void doReads() throws IOException {
            if(available() == 0) {
                two = false;//align
                if(dict == null) dict = new HashMap<Integer, String>();
                cnt = inCount(false, false);
                char[] count = new char[256];
                char tmp = 0;
                for(int j = 0; j < 2; j++) {
                    for (int i = 0; i < 256; i++) {
                        count[i] += tmp = (char)(inCount(false, true) << (j == 1?8:0));
                        if (tmp == 0) {
                            int skip = inCount(false, true);
                            for (int k = 1; k < skip; k++) {
                                count[i + k] += 0;
                                i++;//offset
                            }
                        }
                    }
                }
                for(int i = 1; i < 256; i++) {
                    count[i] += count[i - 1];//accumulate
                }
                if(cnt != count[255]) throw new IOException("Bad Input Check (character count)");
                int choose = inCount(false, false);//read index
                if(cnt < choose) throw new IOException("Bad Input Check (selected row)");
                byte[] build;//make this
                //then lzw
                //rosetta code
                int context = 0;
                int lastContext = 0;
                String w = "" + inCount(true, false);
                StringBuffer result = new StringBuffer(w);
                while (result.length() < cnt) {//not yet complete
                    char k = inCount(true, false);
                    String entry;
                    while(result.length() > count[context]) {
                        context++;//do first
                        if (context > 255)
                            throw new IOException("Bad Input Check (character count)");
                    }
                    if(k < 256)
                        entry = "" + k;
                    else if (dict.containsKey(((context & 0xf) << 16) + k))
                        entry = dict.get(((context & 0xf) << 16) + k);
                    else if (k == dmax[context&0xf])
                        entry = w + w.charAt(0);
                    else
                        throw new IOException("Bad Input Check (token: " + k + ")");
                    result.append(entry);
                    // Add w+entry[0] to the dictionary.
                    if(lastContext == context) {
                        if (dmax[context & 0xf] < 0x1000) {
                            dict.put(((context & 0xf) << 16) + (dmax[context & 0xf]++), w + entry.charAt(0));
                        }
                        w = entry;
                    } else {
                        //context change
                        context = lastContext;
                        //and following context should be a <256 ...
                        if(result.length() < cnt) {
                            w = "" + inCount(true, false);
                            result.append(w);
                        }
                    }
                }
                build = result.toString().getBytes();
                //working buffers
                int[] wrk = new int[buf.length];
                unbwt(build, buf, wrk, buf.length, choose);
                idx = 0;//ready for reads
            }
        }

        @Override
        public int read() throws IOException {
            try {
                doReads();
                int x = buf[idx++];
                doReads();//to prevent avail = 0 never access
                return x;
            } catch(EOFException e) {
                return -1;
            }
        }

        @Override
        public long skip(long byteCount) throws IOException {
            long i;
            for(i = 0; i < byteCount; i++)
                if(read() == -1) break;
            return i;
        }

        @Override
        public boolean markSupported() {
            return false;
        }

        @Override
        public synchronized void reset() throws IOException {
            throw new IOException("Mark Not Supported");
        }
    }
}