From 60275bf4734205b5e5c919ce62d3625fbf856120 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Fri, 2 Feb 2018 16:17:22 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9BFix=20DiscImageChef=20format=20dedu?= =?UTF-8?q?plication.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DiscImageChef.DiscImages/DiscImageChef.cs | 38 ++++++++++++++++------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/DiscImageChef.DiscImages/DiscImageChef.cs b/DiscImageChef.DiscImages/DiscImageChef.cs index c5f9983b5..628b363e4 100644 --- a/DiscImageChef.DiscImages/DiscImageChef.cs +++ b/DiscImageChef.DiscImages/DiscImageChef.cs @@ -140,7 +140,7 @@ namespace DiscImageChef.DiscImages MemoryStream decompressedStream; bool deduplicate; /// On-memory deduplication table indexed by checksum. - Dictionary deduplicationTable; + Dictionary deduplicationTable; /// writer. FlakeWriter flakeWriter; /// settings. @@ -165,6 +165,7 @@ namespace DiscImageChef.DiscImages Md5Context md5Provider; /// Cache of media tags. Dictionary mediaTags; + bool nocompress; /// If DDT is on-disk, this is the image stream offset at which it starts. long outMemoryDdtPosition; bool rewinded; @@ -190,7 +191,7 @@ namespace DiscImageChef.DiscImages /// In-memory deduplication table ulong[] userDataDdt; bool writingLong; - bool nocompress; + ulong writtenSectors; public DiscImageChef() { @@ -2554,7 +2555,7 @@ namespace DiscImageChef.DiscImages imageStream.Seek(0, SeekOrigin.End); mediaTags = new Dictionary(); checksumProvider = SHA256.Create(); - deduplicationTable = new Dictionary(); + deduplicationTable = new Dictionary(); trackIsrcs = new Dictionary(); trackFlags = new Dictionary(); @@ -2643,15 +2644,24 @@ namespace DiscImageChef.DiscImages if(sectorAddress == 0) alreadyWrittenZero = true; byte[] hash = null; + writtenSectors++; // Compute hash only if asked to deduplicate, or the sector is empty (those will always be deduplicated) if(deduplicate || ArrayHelpers.ArrayIsNullOrEmpty(data)) hash = checksumProvider.ComputeHash(data); + string hashString = null; - if(hash != null && deduplicationTable.TryGetValue(hash, out ulong pointer)) + if(hash != null) { - SetDdtEntry(sectorAddress, pointer); - ErrorMessage = ""; - return true; + StringBuilder hashSb = new StringBuilder(); + foreach(byte h in hash) hashSb.Append(h.ToString("x2")); + hashString = hashSb.ToString(); + + if(deduplicationTable.TryGetValue(hashString, out ulong pointer)) + { + SetDdtEntry(sectorAddress, pointer); + ErrorMessage = ""; + return true; + } } Track trk = new Track(); @@ -2758,8 +2768,8 @@ namespace DiscImageChef.DiscImages sectorSize = (uint)data.Length }; - if(imageInfo.XmlMediaType == XmlMediaType.OpticalDisc && trk.TrackType == TrackType.Audio && !nocompress) - currentBlockHeader.compression = CompressionType.Flac; + if(imageInfo.XmlMediaType == XmlMediaType.OpticalDisc && trk.TrackType == TrackType.Audio && !nocompress + ) currentBlockHeader.compression = CompressionType.Flac; blockStream = new MemoryStream(); decompressedStream = new MemoryStream(); @@ -2771,7 +2781,7 @@ namespace DiscImageChef.DiscImages } ulong ddtEntry = (ulong)((imageStream.Position << shift) + currentBlockOffset); - if(hash != null) deduplicationTable.Add(hash, ddtEntry); + if(hash != null) deduplicationTable.Add(hashString, ddtEntry); if(currentBlockHeader.compression == CompressionType.Flac) { AudioBuffer audioBuffer = new AudioBuffer(AudioPCMConfig.RedBook, data, SAMPLES_PER_SECTOR); @@ -2780,8 +2790,7 @@ namespace DiscImageChef.DiscImages else { decompressedStream.Write(data, 0, data.Length); - if(currentBlockHeader.compression == CompressionType.Lzma) - lzmaBlockStream.Write(data, 0, data.Length); + if(currentBlockHeader.compression == CompressionType.Lzma) lzmaBlockStream.Write(data, 0, data.Length); } SetDdtEntry(sectorAddress, ddtEntry); @@ -3168,6 +3177,11 @@ namespace DiscImageChef.DiscImages imageStream.Write(blockStream.ToArray(), 0, (int)blockStream.Length); } + if(deduplicate) + DicConsole.DebugWriteLine("DiscImageChef format plugin", + "Of {0} sectors written, {1} are unique ({2:P})", writtenSectors, + deduplicationTable.Count, (double)deduplicationTable.Count / writtenSectors); + IndexEntry idxEntry; // Write media tag blocks