1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.snappy;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23
24 import org.apache.commons.compress.compressors.CompressorOutputStream;
25 import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
26 import org.apache.commons.compress.compressors.lz77support.Parameters;
27 import org.apache.commons.compress.utils.ByteUtils;
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 public class SnappyCompressorOutputStream extends CompressorOutputStream<OutputStream> {
50
51
52 private static final int MAX_LITERAL_SIZE_WITHOUT_SIZE_BYTES = 60;
53 private static final int MAX_LITERAL_SIZE_WITH_ONE_SIZE_BYTE = 1 << 8;
54 private static final int MAX_LITERAL_SIZE_WITH_TWO_SIZE_BYTES = 1 << 16;
55
56 private static final int MAX_LITERAL_SIZE_WITH_THREE_SIZE_BYTES = 1 << 24;
57
58 private static final int ONE_SIZE_BYTE_MARKER = 60 << 2;
59
60 private static final int TWO_SIZE_BYTE_MARKER = 61 << 2;
61
62 private static final int THREE_SIZE_BYTE_MARKER = 62 << 2;
63
64 private static final int FOUR_SIZE_BYTE_MARKER = 63 << 2;
65
66
67
68 private static final int MIN_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE = 4;
69
70 private static final int MAX_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE = 11;
71
72 private static final int MAX_OFFSET_WITH_ONE_OFFSET_BYTE = 1 << 11 - 1;
73
74 private static final int MAX_OFFSET_WITH_TWO_OFFSET_BYTES = 1 << 16 - 1;
75
76 private static final int ONE_BYTE_COPY_TAG = 1;
77
78 private static final int TWO_BYTE_COPY_TAG = 2;
79 private static final int FOUR_BYTE_COPY_TAG = 3;
80
81
82
83 private static final int MIN_MATCH_LENGTH = 4;
84
85 private static final int MAX_MATCH_LENGTH = 64;
86
87
88
89
90
91
92
93 public static Parameters.Builder createParameterBuilder(final int blockSize) {
94
95
96
97 return Parameters.builder(blockSize).withMinBackReferenceLength(MIN_MATCH_LENGTH).withMaxBackReferenceLength(MAX_MATCH_LENGTH).withMaxOffset(blockSize)
98 .withMaxLiteralLength(blockSize);
99 }
100
101 private final LZ77Compressor compressor;
102 private final ByteUtils.ByteConsumer consumer;
103
104
105 private final byte[] oneByte = new byte[1];
106
107 private boolean finished;
108
109
110
111
112
113
114
115
116 public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize) throws IOException {
117 this(os, uncompressedSize, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE);
118 }
119
120
121
122
123
124
125
126
127
128 public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize, final int blockSize) throws IOException {
129 this(os, uncompressedSize, createParameterBuilder(blockSize).build());
130 }
131
132
133
134
135
136
137
138
139
140 public SnappyCompressorOutputStream(final OutputStream out, final long uncompressedSize, final Parameters params) throws IOException {
141 super(out);
142 consumer = new ByteUtils.OutputStreamByteConsumer(out);
143 compressor = new LZ77Compressor(params, block -> {
144 switch (block.getType()) {
145 case LITERAL:
146 writeLiteralBlock((LZ77Compressor.LiteralBlock) block);
147 break;
148 case BACK_REFERENCE:
149 writeBackReference((LZ77Compressor.BackReference) block);
150 break;
151 case EOD:
152 break;
153 }
154 });
155 writeUncompressedSize(uncompressedSize);
156 }
157
158 @Override
159 public void close() throws IOException {
160 try {
161 finish();
162 } finally {
163 out.close();
164 }
165 }
166
167
168
169
170
171
172 public void finish() throws IOException {
173 if (!finished) {
174 compressor.finish();
175 finished = true;
176 }
177 }
178
179 @Override
180 public void write(final byte[] data, final int off, final int len) throws IOException {
181 compressor.compress(data, off, len);
182 }
183
184 @Override
185 public void write(final int b) throws IOException {
186 oneByte[0] = (byte) (b & 0xff);
187 write(oneByte);
188 }
189
190 private void writeBackReference(final LZ77Compressor.BackReference block) throws IOException {
191 final int len = block.getLength();
192 final int offset = block.getOffset();
193 if (len >= MIN_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE && len <= MAX_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE && offset <= MAX_OFFSET_WITH_ONE_OFFSET_BYTE) {
194 writeBackReferenceWithOneOffsetByte(len, offset);
195 } else if (offset < MAX_OFFSET_WITH_TWO_OFFSET_BYTES) {
196 writeBackReferenceWithTwoOffsetBytes(len, offset);
197 } else {
198 writeBackReferenceWithFourOffsetBytes(len, offset);
199 }
200 }
201
202 private void writeBackReferenceWithFourOffsetBytes(final int len, final int offset) throws IOException {
203 writeBackReferenceWithLittleEndianOffset(FOUR_BYTE_COPY_TAG, 4, len, offset);
204 }
205
206 private void writeBackReferenceWithLittleEndianOffset(final int tag, final int offsetBytes, final int len, final int offset) throws IOException {
207 out.write(tag | len - 1 << 2);
208 writeLittleEndian(offsetBytes, offset);
209 }
210
211 private void writeBackReferenceWithOneOffsetByte(final int len, final int offset) throws IOException {
212 out.write(ONE_BYTE_COPY_TAG | len - 4 << 2 | (offset & 0x700) >> 3);
213 out.write(offset & 0xff);
214 }
215
216 private void writeBackReferenceWithTwoOffsetBytes(final int len, final int offset) throws IOException {
217 writeBackReferenceWithLittleEndianOffset(TWO_BYTE_COPY_TAG, 2, len, offset);
218 }
219
220 private void writeLiteralBlock(final LZ77Compressor.LiteralBlock block) throws IOException {
221 final int len = block.getLength();
222 if (len <= MAX_LITERAL_SIZE_WITHOUT_SIZE_BYTES) {
223 writeLiteralBlockNoSizeBytes(block, len);
224 } else if (len <= MAX_LITERAL_SIZE_WITH_ONE_SIZE_BYTE) {
225 writeLiteralBlockOneSizeByte(block, len);
226 } else if (len <= MAX_LITERAL_SIZE_WITH_TWO_SIZE_BYTES) {
227 writeLiteralBlockTwoSizeBytes(block, len);
228 } else if (len <= MAX_LITERAL_SIZE_WITH_THREE_SIZE_BYTES) {
229 writeLiteralBlockThreeSizeBytes(block, len);
230 } else {
231 writeLiteralBlockFourSizeBytes(block, len);
232 }
233 }
234
235 private void writeLiteralBlockFourSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException {
236 writeLiteralBlockWithSize(FOUR_SIZE_BYTE_MARKER, 4, len, block);
237 }
238
239 private void writeLiteralBlockNoSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException {
240 writeLiteralBlockWithSize(len - 1 << 2, 0, len, block);
241 }
242
243 private void writeLiteralBlockOneSizeByte(final LZ77Compressor.LiteralBlock block, final int len) throws IOException {
244 writeLiteralBlockWithSize(ONE_SIZE_BYTE_MARKER, 1, len, block);
245 }
246
247 private void writeLiteralBlockThreeSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException {
248 writeLiteralBlockWithSize(THREE_SIZE_BYTE_MARKER, 3, len, block);
249 }
250
251 private void writeLiteralBlockTwoSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException {
252 writeLiteralBlockWithSize(TWO_SIZE_BYTE_MARKER, 2, len, block);
253 }
254
255 private void writeLiteralBlockWithSize(final int tagByte, final int sizeBytes, final int len, final LZ77Compressor.LiteralBlock block) throws IOException {
256 out.write(tagByte);
257 writeLittleEndian(sizeBytes, len - 1);
258 out.write(block.getData(), block.getOffset(), len);
259 }
260
261 private void writeLittleEndian(final int numBytes, final int num) throws IOException {
262 ByteUtils.toLittleEndian(consumer, num, numBytes);
263 }
264
265 private void writeUncompressedSize(long uncompressedSize) throws IOException {
266 boolean more;
267 do {
268 int currentByte = (int) (uncompressedSize & 0x7F);
269 more = uncompressedSize > currentByte;
270 if (more) {
271 currentByte |= 0x80;
272 }
273 out.write(currentByte);
274 uncompressedSize >>= 7;
275 } while (more);
276 }
277 }