1 /// implementation of tar file extraction
2 module dbuild.tar;
3 
4 /// print all entries names to stdout
5 void listFiles(in string archive)
6 {
7     import std.exception : enforce;
8     import std.stdio : File, writefln;
9 
10     auto tarF = File(archive, "rb");
11 
12     int bypass;
13 
14     foreach (chunk; tarF.byChunk(blockSize)) {
15         if (bypass) {
16             --bypass;
17             continue;
18         }
19 
20         enforce(chunk.length == blockSize);
21         TarHeader *hp = cast(TarHeader*)&chunk[0];
22 
23         writefln("%s: %s", hp.typeFlag, trunc(hp.filename));
24 
25         auto sz = octalStrToLong(hp.size);
26         while (sz > 0) {
27             bypass++;
28             if (sz > blockSize) sz -= blockSize;
29             else sz = 0;
30         }
31     }
32 }
33 
34 /// Check whether the archive contains a single or multiple root directories.
35 /// Useful to determine if archive can be extracted "here" or within another directory
36 bool isSingleRootDir(in string archive, out string rootDir)
37 {
38     import std.algorithm : findSplit;
39     import std.exception : enforce;
40     import std.stdio : File;
41     import std..string : strip;
42 
43     auto tarF = File(archive, "rb");
44 
45     int bypass;
46 
47     foreach (chunk; tarF.byChunk(blockSize)) {
48         if (bypass) {
49             --bypass;
50             continue;
51         }
52 
53         enforce(chunk.length == blockSize);
54         TarHeader *hp = cast(TarHeader*)&chunk[0];
55 
56         if (hp.typeFlag == TypeFlag.dir /+ || hp.typeFlag == TypeFlag.reg +/) {
57             const name = trunc(hp.filename);
58             const rd = findSplit(name, "/")[0];
59             if (!rootDir.length) {
60                 rootDir = rd;
61             }
62             else {
63                 if (rd != rootDir) return false;
64             }
65         }
66 
67         auto sz = octalStrToLong(hp.size);
68         while (sz > 0) {
69             bypass++;
70             if (sz > blockSize) sz -= blockSize;
71             else sz = 0;
72         }
73     }
74     return true;
75 }
76 
77 /// Extract a tar file to a directory.
78 void extractTo(in string archive, in string directory)
79 {
80     import std.exception : enforce;
81     import std.file : mkdir, mkdirRecurse, setAttributes;
82     import std.path : buildPath;
83     import std.stdio : File, stderr;
84 
85     mkdirRecurse(directory);
86 
87     auto f = File(archive, "rb");
88 
89     ubyte[blockSize] block;
90     ubyte[blockSize] fileblock;
91 
92     int numNullBlocks;
93     while (numNullBlocks < 2) {
94 
95         auto hb = f.rawRead(block[]);
96         if (!hb.length) break;
97 
98         if (isNullBlock(hb)) {
99             numNullBlocks ++;
100             continue;
101         }
102 
103         enforce(hb.length == blockSize);
104         TarHeader* th = cast(TarHeader*)(&hb[0]);
105 
106         // Check the checksum
107         if(!th.confirmChecksum()) {
108             throw new Exception("Tar invalid checksum in "~archive);
109         }
110 
111         string filename = trunc(th.filename);
112         if (th.magic == posixMagicNum) {
113             filename = trunc(th.prefix) ~ filename;
114         }
115         auto sz = cast(size_t)octalStrToLong(th.size);
116 
117         // TODO mode
118         if (th.typeFlag == TypeFlag.dir) {
119             const path = buildPath(directory, filename);
120             mkdir(path);
121         }
122         else {
123             if (th.typeFlag != TypeFlag.reg && th.typeFlag != TypeFlag.areg) {
124                 stderr.writefln(
125                     "Unknown tar file type for \"%s\" : %s. Treating as regular file",
126                     filename, th.typeFlag
127                 );
128             }
129             const path = buildPath(directory, filename);
130 
131             {
132                 auto nf = File(path, "wb");
133                 while(sz > 0) {
134                     auto fb = f.rawRead(fileblock[]);
135                     const copyLen = sz > blockSize ? blockSize : sz;
136                     nf.rawWrite(fb[0 .. copyLen]);
137                     sz -= copyLen;
138                 }
139             }
140 
141             const mode = octalStrToInt(th.mode);
142             setAttributes(path, mode);
143         }
144 
145     }
146 }
147 
148 private enum blockSize = 512;
149 
150 private enum TypeFlag : ubyte
151 {
152     areg    = 0,
153     reg     = '0',
154     link    = '1',
155     sym     = '2',
156     chr     = '3',
157     blk     = '4',
158     dir     = '5',
159     fifo    = '6',
160     cont    = '7',
161 }
162 
163 private struct TarHeader
164 {
165     char[100] filename;
166     char[8] mode;
167     char[8] ownerId;
168     char[8] groupId;
169     char[12] size;
170     char[12] modificationTime;
171     char[8] checksum;
172     TypeFlag typeFlag;
173     char[100] linkedFilename;
174 
175     char[6] magic;
176     char[2] tarVersion;
177     char[32] owner;
178     char[32] group;
179     char[8] deviceMajorNumber;
180     char[8] deviceMinorNumber;
181     char[155] prefix;
182     char[12] padding;
183 
184     bool confirmChecksum()
185     {
186         uint apparentChecksum = octalStrToInt(checksum);
187         uint currentSum = calculateUnsignedChecksum();
188 
189         if(apparentChecksum != currentSum)
190         {
191             // Handle old tars which use a broken implementation that calculated the
192             // checksum incorrectly (using signed chars instead of unsigned).
193             currentSum = calculateSignedChecksum();
194             if(apparentChecksum != currentSum)
195             {
196                 return false;
197             }
198         }
199         return true;
200     }
201 
202     void nullify()
203     {
204         filename = 0;
205         mode = 0;
206         ownerId = 0;
207         groupId = 0;
208         size = 0;
209         modificationTime = 0;
210         checksum = 0;
211         typeFlag = cast(TypeFlag)0;
212         magic = 0;
213         tarVersion = 0;
214         owner = 0;
215         group = 0;
216         deviceMajorNumber = 0;
217         deviceMinorNumber = 0;
218         prefix = 0;
219         padding = 0;
220     }
221 
222     uint calculateUnsignedChecksum()
223     {
224         uint sum;
225         sum += unsignedSum(filename);
226         sum += unsignedSum(mode);
227         sum += unsignedSum(ownerId);
228         sum += unsignedSum(groupId);
229         sum += unsignedSum(size);
230         sum += unsignedSum(modificationTime);
231         sum += 32 * 8; // checksum is treated as all blanks
232         sum += typeFlag;
233         sum += unsignedSum(linkedFilename);
234         sum += unsignedSum(magic);
235         sum += unsignedSum(tarVersion);
236         sum += unsignedSum(owner);
237         sum += unsignedSum(group);
238         sum += unsignedSum(deviceMajorNumber);
239         sum += unsignedSum(deviceMinorNumber);
240         sum += unsignedSum(prefix);
241         return sum;
242     }
243 
244     uint calculateSignedChecksum()
245     {
246         uint sum;
247         sum += signedSum(filename);
248         sum += signedSum(mode);
249         sum += signedSum(ownerId);
250         sum += signedSum(groupId);
251         sum += signedSum(size);
252         sum += signedSum(modificationTime);
253         sum += 32 * 8; // checksum is treated as all blanks
254         sum += typeFlag;
255         sum += signedSum(linkedFilename);
256         sum += signedSum(magic);
257         sum += signedSum(tarVersion);
258         sum += signedSum(owner);
259         sum += signedSum(group);
260         sum += signedSum(deviceMajorNumber);
261         sum += signedSum(deviceMinorNumber);
262         sum += signedSum(prefix);
263         return sum;
264     }
265 
266     private static uint unsignedSum(char[] values)
267     {
268         uint result;
269         foreach(char c ; values)
270         {
271             result += c;
272         }
273         return result;
274     }
275 
276     private static uint signedSum(char[] values)
277     {
278         uint result;
279         foreach(byte b ; cast(byte[])values)
280         {
281             result += b;
282         }
283         return result;
284     }
285 }
286 
287 static assert (TarHeader.sizeof == blockSize);
288 
289 private string posixMagicNum = "ustar\0";
290 
291 private bool isNullBlock(const(ubyte)[] block)
292 {
293     if (block.length != blockSize) return false;
294     foreach(b; block) {
295         if (b != 0) return false;
296     }
297     return true;
298 }
299 
300 private uint octalStrToInt(char[] octal)
301 {
302     import std.format : formattedRead;
303     import std..string : strip;
304 
305     string s = cast(string)(strip(octal));
306     uint result;
307     formattedRead(s, "%o ", &result);
308     return result;
309 }
310 
311 private ulong octalStrToLong(char[] octal)
312 {
313     import std.format : formattedRead;
314     import std..string : strip;
315 
316     string s = cast(string)(strip(octal));
317     ulong result;
318     formattedRead(s, "%o ", &result);
319     return result;
320 }
321 
322 private char[] strToBytes(string str, uint length)
323 {
324     import std.algorithm : min;
325 
326     char[] result = new char[length];
327     result[0 .. min(str.length, length)] = str;
328     result[str.length .. $] = 0;
329     return result;
330 }
331 
332 private string trunc(char[] input)
333 {
334     for(size_t i=0; i < input.length; ++i)
335     {
336         if(input[i] == '\0')
337         {
338             return input[0 .. i].idup;
339         }
340     }
341     return input.idup;
342 }