001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.MemoryLimitException;
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BitInputStream;
028import org.apache.commons.compress.utils.InputStreamStatistics;
029
030/**
031 * <p>Generic LZW implementation. It is used internally for
032 * the Z decompressor and the Unshrinking Zip file compression method,
033 * but may be useful for third-party projects in implementing their own LZW variations.</p>
034 *
035 * @NotThreadSafe
036 * @since 1.10
037 */
038public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
039    protected static final int DEFAULT_CODE_SIZE = 9;
040    protected static final int UNUSED_PREFIX = -1;
041
042    private final byte[] oneByte = new byte[1];
043
044    protected final BitInputStream in;
045    private int clearCode = -1;
046    private int codeSize = DEFAULT_CODE_SIZE;
047    private byte previousCodeFirstChar;
048    private int previousCode = UNUSED_PREFIX;
049    private int tableSize;
050    private int[] prefixes;
051    private byte[] characters;
052    private byte[] outputStack;
053    private int outputStackLocation;
054
055    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
056        this.in = new BitInputStream(inputStream, byteOrder);
057    }
058
059    @Override
060    public void close() throws IOException {
061        in.close();
062    }
063
064    @Override
065    public int read() throws IOException {
066        final int ret = read(oneByte);
067        if (ret < 0) {
068            return ret;
069        }
070        return 0xff & oneByte[0];
071    }
072
073    @Override
074    public int read(final byte[] b, final int off, final int len) throws IOException {
075        int bytesRead = readFromStack(b, off, len);
076        while (len - bytesRead > 0) {
077            final int result = decompressNextSymbol();
078            if (result < 0) {
079                if (bytesRead > 0) {
080                    count(bytesRead);
081                    return bytesRead;
082                }
083                return result;
084            }
085            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
086        }
087        count(bytesRead);
088        return bytesRead;
089    }
090
091    /**
092     * @since 1.17
093     */
094    @Override
095    public long getCompressedCount() {
096        return in.getBytesRead();
097    }
098
099    /**
100     * Read the next code and expand it.
101     * @return the expanded next code, negative on EOF
102     * @throws IOException on error
103     */
104    protected abstract int decompressNextSymbol() throws IOException;
105
106    /**
107     * Add a new entry to the dictionary.
108     * @param previousCode the previous code
109     * @param character the next character to append
110     * @return the new code
111     * @throws IOException on error
112     */
113    protected abstract int addEntry(int previousCode, byte character)
114        throws IOException;
115
116    /**
117     * Sets the clear code based on the code size.
118     * @param codeSize code size
119     */
120    protected void setClearCode(final int codeSize) {
121        clearCode = (1 << (codeSize - 1));
122    }
123
124    /**
125     * Initializes the arrays based on the maximum code size.
126     * First checks that the estimated memory usage is below memoryLimitInKb
127     *
128     * @param maxCodeSize maximum code size
129     * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
130     * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb
131     * @throws IllegalArgumentException if <code>maxCodeSize</code> is not bigger than 0
132     */
133    protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb)
134            throws MemoryLimitException {
135        if (maxCodeSize <= 0) {
136            throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize
137                + ", must be bigger than 0");
138        }
139
140        if (memoryLimitInKb > -1) {
141            final int maxTableSize = 1 << maxCodeSize;
142            //account for potential overflow
143            long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack))
144            long memoryUsageInKb = memoryUsageInBytes >> 10;
145
146            if (memoryUsageInKb > memoryLimitInKb) {
147                throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
148            }
149        }
150        initializeTables(maxCodeSize);
151    }
152
153    /**
154     * Initializes the arrays based on the maximum code size.
155     * @param maxCodeSize maximum code size
156     * @throws IllegalArgumentException if <code>maxCodeSize</code> is not bigger than 0
157     */
158    protected void initializeTables(final int maxCodeSize) {
159        if (maxCodeSize <= 0) {
160            throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize
161                + ", must be bigger than 0");
162        }
163        final int maxTableSize = 1 << maxCodeSize;
164        prefixes = new int[maxTableSize];
165        characters = new byte[maxTableSize];
166        outputStack = new byte[maxTableSize];
167        outputStackLocation = maxTableSize;
168        final int max = 1 << 8;
169        for (int i = 0; i < max; i++) {
170            prefixes[i] = -1;
171            characters[i] = (byte) i;
172        }
173    }
174
175    /**
176     * Reads the next code from the stream.
177     * @return the next code
178     * @throws IOException on error
179     */
180    protected int readNextCode() throws IOException {
181        if (codeSize > 31) {
182            throw new IllegalArgumentException("Code size must not be bigger than 31");
183        }
184        return (int) in.readBits(codeSize);
185    }
186
187    /**
188     * Adds a new entry if the maximum table size hasn't been exceeded
189     * and returns the new index.
190     * @param previousCode the previous code
191     * @param character the character to append
192     * @param maxTableSize the maximum table size
193     * @return the new code or -1 if maxTableSize has been reached already
194     */
195    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
196        if (tableSize < maxTableSize) {
197            prefixes[tableSize] = previousCode;
198            characters[tableSize] = character;
199            return tableSize++;
200        }
201        return -1;
202    }
203
204    /**
205     * Add entry for repeat of previousCode we haven't added, yet.
206     * @return new code for a repeat of the previous code or -1 if
207     * maxTableSize has been reached already
208     * @throws IOException on error
209     */
210    protected int addRepeatOfPreviousCode() throws IOException {
211        if (previousCode == -1) {
212            // can't have a repeat for the very first code
213            throw new IOException("The first code can't be a reference to its preceding code");
214        }
215        return addEntry(previousCode, previousCodeFirstChar);
216    }
217
218    /**
219     * Expands the entry with index code to the output stack and may
220     * create a new entry
221     * @param code the code
222     * @param addedUnfinishedEntry whether unfinished entries have been added
223     * @return the new location of the output stack
224     * @throws IOException on error
225     */
226    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)
227        throws IOException {
228        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
229            outputStack[--outputStackLocation] = characters[entry];
230        }
231        if (previousCode != -1 && !addedUnfinishedEntry) {
232            addEntry(previousCode, outputStack[outputStackLocation]);
233        }
234        previousCode = code;
235        previousCodeFirstChar = outputStack[outputStackLocation];
236        return outputStackLocation;
237    }
238
239    private int readFromStack(final byte[] b, final int off, final int len) {
240        final int remainingInStack = outputStack.length - outputStackLocation;
241        if (remainingInStack > 0) {
242            final int maxLength = Math.min(remainingInStack, len);
243            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
244            outputStackLocation += maxLength;
245            return maxLength;
246        }
247        return 0;
248    }
249
250    protected int getCodeSize() {
251        return codeSize;
252    }
253
254    protected void resetCodeSize() {
255        setCodeSize(DEFAULT_CODE_SIZE);
256    }
257
258    protected void setCodeSize(final int cs) {
259        this.codeSize = cs;
260    }
261
262    protected void incrementCodeSize() {
263        codeSize++;
264    }
265
266    protected void resetPreviousCode() {
267        this.previousCode = -1;
268    }
269
270    protected int getPrefix(final int offset) {
271        return prefixes[offset];
272    }
273
274    protected void setPrefix(final int offset, final int value) {
275        prefixes[offset] = value;
276    }
277
278    protected int getPrefixesLength() {
279        return prefixes.length;
280    }
281
282    protected int getClearCode() {
283        return clearCode;
284    }
285
286    protected int getTableSize() {
287        return tableSize;
288    }
289
290    protected void setTableSize(final int newSize) {
291        tableSize = newSize;
292    }
293
294}