The latest source for the latest compression technique.
package uk.co.kring.net; import java.io.EOFException; import java.io.Externalizable; import java.io.FilterInputStream; import java.io.FilterOutputStream; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; import java.util.HashMap; /** * Created by user on 06/06/2016. */ public class Packer { public static class OutputStream extends FilterOutputStream implements Externalizable { byte[] buf = new byte[4096 * 16];//64K block max int cnt = 0;//pointer to end int[] dmax = new int[16]; HashMap<String, Integer> dict; public OutputStream(java.io.OutputStream out) { super(out); } @Override public void readExternal(ObjectInput input) throws IOException, ClassNotFoundException { out = (java.io.OutputStream)input.readObject(); input.read(buf); cnt = input.readChar(); dict = (HashMap<String, Integer>)input.readObject(); } @Override public void writeExternal(ObjectOutput output) throws IOException { output.writeObject(out); output.write(buf); output.writeChar(cnt); output.writeObject(dict); } @Override public void close() throws IOException { flush(); out.close(); } private boolean compare(int great, int less) { while(true) { if(buf[less] > buf[great]) return false;//immediate wrong if(buf[less] < buf[great]) return true;//immediate right great++; if(great == buf.length) great = 0; less++; if(less == buf.length) less = 0; } } private byte pair = 0; private boolean two = false; private void outputCount(int num, boolean small, boolean tiny) throws IOException { if(tiny) { out.write((byte)num); return; } if(small) { out.write((byte)num); pair = (byte)(pair << 4 + num >> 8); if(two) { two = false; out.write(pair); } else { two = true; } return; } out.write((byte)(num >> 8)); out.write((byte)num); } @Override public void flush() throws IOException { outputCount(cnt, false, false);//just in case length char[] count = new char[256]; if(dict == null) dict = new HashMap<String, Integer>(); for(int i = 0; i < 256; i++) { dmax[i] = 256;//dictionary max count[i] = 0;//letter count } for(int i = 0; i < cnt; i++) { count[buf[i]]++; } char skip = 0; boolean first = true; char acc = 0; char[] start = new char[256]; for(int j = 0; j < 2; j++) { for (int i = 0; i < 256; i++) { if(j == 0) { acc += count[i]; start[i] = acc; } if (count[i] == 0) { skip++; if (first) { outputCount(0, false, true); first = false; } } else { if (skip != 0) { outputCount(skip, false, true); skip = 0; first = true; } outputCount(count[i], false, true); count[i] >>= 8; } } if(skip != 0) outputCount(skip, false, true);//final skip } int[] ptr = new int[buf.length]; byte[] bwt = new byte[buf.length]; outputCount(Sais.bwtransform(buf, bwt, ptr, cnt), false, false); //now an lzw String sym = ""; char context = 0; char lastContext = 0; int test = 0; for(int j = 0; j < cnt; j++) { while(j >= start[context]) context++; if(lastContext == context) { sym += bwt[j];//add a char } else { lastContext = context; outputCount(test, true, false); sym = "" + bwt[j];//new char } if(sym.length() == 1) { test = (int)sym.charAt(0); } else { if(dict.containsKey(context + sym)) { test = dict.get(context + sym); continue; } else { outputCount(test, true, false); if (dmax[context & 0xf] < 0x1000) {//context limit dict.put(context + sym, dmax[context & 0xf]); dmax[context & 0xf]++; } sym = "" + bwt[j];//new symbol } } } outputCount(test, true, false);//last match if(!two) outputCount(0, true, false);//aligned data out.flush(); cnt = 0;//fill next buffer } @Override public void write(int oneByte) throws IOException { if(cnt == buf.length) flush(); buf[cnt++] = (byte)oneByte; } } public static class InputStream extends FilterInputStream implements Externalizable { @Override public void readExternal(ObjectInput input) throws IOException, ClassNotFoundException { in = (java.io.InputStream)input.readObject(); input.read(buf); idx = input.readChar(); cnt = input.readChar(); dict = (HashMap<Integer, String>)input.readObject(); } @Override public void writeExternal(ObjectOutput output) throws IOException { output.writeObject(in); output.write(buf); output.writeChar(idx); output.writeChar(cnt); output.writeObject(dict); } //SEE MIT LICENCE OF Sais.java private static void unbwt(byte[] T, byte[] U, int[] LF, int n, int pidx) { int[] C = new int[256]; int i, t; for(i = 0; i < 256; ++i) { C[i] = 0; } for(i = 0; i < n; ++i) { LF[i] = C[(int)(T[i] & 0xff)]++; } for(i = 0, t = 0; i < 256; ++i) { t += C[i]; C[i] = t - C[i]; } for(i = n - 1, t = 0; 0 <= i; --i) { t = LF[t] + C[(int)((U[i] = T[t]) & 0xff)]; t += (t < pidx) ? 1 : 0; } } byte[] buf = new byte[4096 * 16];//64K block max int cnt = 0;//pointer to end int idx = 0; int[] dmax = new int[16]; HashMap<Integer, String> dict; private boolean two = false; private int vala = 0; private int valb = 0; private int reader() throws IOException { int i = in.read(); if(i == -1) throw new EOFException("End Of Stream"); return i; } private char inCount(boolean small, boolean tiny) throws IOException { if(tiny) return (char)reader(); if(small) { if(!two) { vala = reader(); valb = reader(); int valc = reader(); vala += (valc << 4) & 0xf00; valb += (valc << 8) & 0xf00; two = true; } else { vala = valb; two = false; } return (char)vala; } int val = reader() << 8; val += reader(); return (char)val; } public InputStream(java.io.InputStream in) { super(in); } @Override public int available() throws IOException { return cnt - idx; } @Override public void close() throws IOException { in.close(); } private void doReads() throws IOException { if(available() == 0) { two = false;//align if(dict == null) dict = new HashMap<Integer, String>(); cnt = inCount(false, false); char[] count = new char[256]; char tmp = 0; for(int j = 0; j < 2; j++) { for (int i = 0; i < 256; i++) { count[i] += tmp = (char)(inCount(false, true) << (j == 1?8:0)); if (tmp == 0) { int skip = inCount(false, true); for (int k = 1; k < skip; k++) { count[i + k] += 0; i++;//offset } } } } for(int i = 1; i < 256; i++) { count[i] += count[i - 1];//accumulate } if(cnt != count[255]) throw new IOException("Bad Input Check (character count)"); int choose = inCount(false, false);//read index if(cnt < choose) throw new IOException("Bad Input Check (selected row)"); byte[] build;//make this //then lzw //rosetta code int context = 0; int lastContext = 0; String w = "" + inCount(true, false); StringBuffer result = new StringBuffer(w); while (result.length() < cnt) {//not yet complete char k = inCount(true, false); String entry; while(result.length() > count[context]) { context++;//do first if (context > 255) throw new IOException("Bad Input Check (character count)"); } if(k < 256) entry = "" + k; else if (dict.containsKey(((context & 0xf) << 16) + k)) entry = dict.get(((context & 0xf) << 16) + k); else if (k == dmax[context&0xf]) entry = w + w.charAt(0); else throw new IOException("Bad Input Check (token: " + k + ")"); result.append(entry); // Add w+entry[0] to the dictionary. if(lastContext == context) { if (dmax[context & 0xf] < 0x1000) { dict.put(((context & 0xf) << 16) + (dmax[context & 0xf]++), w + entry.charAt(0)); } w = entry; } else { //context change context = lastContext; //and following context should be a <256 ... if(result.length() < cnt) { w = "" + inCount(true, false); result.append(w); } } } build = result.toString().getBytes(); //working buffers int[] wrk = new int[buf.length]; unbwt(build, buf, wrk, buf.length, choose); idx = 0;//ready for reads } } @Override public int read() throws IOException { try { doReads(); int x = buf[idx++]; doReads();//to prevent avail = 0 never access return x; } catch(EOFException e) { return -1; } } @Override public long skip(long byteCount) throws IOException { long i; for(i = 0; i < byteCount; i++) if(read() == -1) break; return i; } @Override public boolean markSupported() { return false; } @Override public synchronized void reset() throws IOException { throw new IOException("Mark Not Supported"); } } }