[DatFile, DatItem] Make finding duplicates better

This commit is contained in:
Matt Nadareski
2016-10-06 11:42:55 -07:00
parent 1d48490798
commit 32203a5496
3 changed files with 481 additions and 205 deletions

View File

@@ -33,6 +33,19 @@
Unzip, Unzip,
} }
/// <summary>
/// Determines how the current dictionary is sorted by
/// </summary>
public enum SortedBy
{
Default = 0,
Size,
CRC,
MD5,
SHA1,
Game,
}
#endregion #endregion
#region DatItem related #region DatItem related

View File

@@ -37,6 +37,7 @@ namespace SabreTools.Helper
private bool _excludeOf; private bool _excludeOf;
private bool _mergeRoms; private bool _mergeRoms;
private SortedDictionary<string, List<DatItem>> _files; private SortedDictionary<string, List<DatItem>> _files;
private SortedBy _sortedBy;
// Data specific to the Miss DAT type // Data specific to the Miss DAT type
private bool _useGame; private bool _useGame;
@@ -179,6 +180,11 @@ namespace SabreTools.Helper
_files = value; _files = value;
} }
} }
public SortedBy SortedBy
{
get { return _sortedBy; }
set { _sortedBy = value; }
}
// Data specific to the Miss DAT type // Data specific to the Miss DAT type
public bool UseGame public bool UseGame
@@ -271,123 +277,6 @@ namespace SabreTools.Helper
#endregion #endregion
#region Constructors
/// <summary>
/// Create a default, empty Dat object
/// </summary>
public DatFile()
{
// Nothing needs to be done
}
/// <summary>
/// Create a new Dat object with the included information (standard Dats)
/// </summary>
/// <param name="fileName">New filename</param>
/// <param name="name">New name</param>
/// <param name="description">New description</param>
/// <param name="rootDir">New rootdir</param>
/// <param name="category">New category</param>
/// <param name="version">New version</param>
/// <param name="date">New date</param>
/// <param name="author">New author</param>
/// <param name="email">New email</param>
/// <param name="homepage">New homepage</param>
/// <param name="url">New URL</param>
/// <param name="comment">New comment</param>
/// <param name="header">New header</param>
/// <param name="superdat">True to set SuperDAT type, false otherwise</param>
/// <param name="forceMerging">None, Split, Full</param>
/// <param name="forceNodump">None, Obsolete, Required, Ignore</param>
/// <param name="forcePacking">None, Zip, Unzip</param>
/// <param name="outputFormat">Non-zero flag for output format, zero otherwise for default</param>
/// <param name="mergeRoms">True to dedupe the roms in the DAT, false otherwise (default)</param>
/// <param name="files">SortedDictionary of lists of DatItem objects</param>
public DatFile(string fileName, string name, string description, string rootDir, string category, string version, string date,
string author, string email, string homepage, string url, string comment, string header, string type, ForceMerging forceMerging,
ForceNodump forceNodump, ForcePacking forcePacking, OutputFormat outputFormat, bool mergeRoms, SortedDictionary<string, List<DatItem>> files)
{
_fileName = fileName;
_name = name;
_description = description;
_rootDir = rootDir;
_category = category;
_version = version;
_date = date;
_author = author;
_email = email;
_homepage = homepage;
_url = url;
_comment = comment;
_header = header;
_type = type;
_forceMerging = forceMerging;
_forceNodump = forceNodump;
_forcePacking = forcePacking;
_outputFormat = outputFormat;
_mergeRoms = mergeRoms;
_files = files;
_romCount = 0;
_diskCount = 0;
_totalSize = 0;
_crcCount = 0;
_md5Count = 0;
_sha1Count = 0;
_nodumpCount = 0;
}
/// <summary>
/// Create a new Dat object with the included information (missfile)
/// </summary>
/// <param name="filename">New filename</param>
/// <param name="name">New name</param>
/// <param name="description">New description</param>
/// <param name="outputFormat">Non-zero flag for output format, zero otherwise for default</param>
/// <param name="mergeRoms">True to dedupe the roms in the DAT, false otherwise (default)</param>
/// <param name="files">SortedDictionary of lists of DatItem objects</param>
/// <param name="useGame">True if games are to be used in output, false if roms are</param>
/// <param name="prefix">Generic prefix to be added to each line</param>
/// <param name="postfix">Generic postfix to be added to each line</param>
/// <param name="quotes">Add quotes to each item</param>
/// <param name="repExt">Replace all extensions with another</param>
/// <param name="addExt">Add an extension to all items</param>
/// <param name="remExt">Remove all extensions</param>
/// <param name="gameName">Add the dat name as a directory prefix</param>
/// <param name="romba">Output files in romba format</param>
public DatFile(string fileName, string name, string description, OutputFormat outputFormat, bool mergeRoms,
SortedDictionary<string, List<DatItem>> files, bool useGame, string prefix, string postfix, bool quotes,
string repExt, string addExt, bool remExt, bool gameName, bool romba)
{
_fileName = fileName;
_name = name;
_description = description;
_outputFormat = outputFormat;
_mergeRoms = mergeRoms;
_files = files;
_useGame = useGame;
_prefix = prefix;
_postfix = postfix;
_quotes = quotes;
_repExt = repExt;
_addExt = addExt;
_remExt = remExt;
_gameName = gameName;
_romba = romba;
_romCount = 0;
_diskCount = 0;
_totalSize = 0;
_crcCount = 0;
_md5Count = 0;
_sha1Count = 0;
_nodumpCount = 0;
}
#endregion
#region Instance Methods #region Instance Methods
#region Bucketing #region Bucketing
@@ -401,7 +290,14 @@ namespace SabreTools.Helper
/// <param name="output">True if the number of hashes counted is to be output (default), false otherwise</param> /// <param name="output">True if the number of hashes counted is to be output (default), false otherwise</param>
public void BucketByGame(bool mergeroms, bool norename, Logger logger, bool output = true) public void BucketByGame(bool mergeroms, bool norename, Logger logger, bool output = true)
{ {
logger.User("Organizing " + (mergeroms ? "and merging " : "") + "roms for output"); // If we already have the right sorting, trust it
if (_sortedBy == SortedBy.Game)
{
return;
}
// Set the sorted type
_sortedBy = SortedBy.Game;
SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>(); SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>();
long count = 0; long count = 0;
@@ -412,6 +308,8 @@ namespace SabreTools.Helper
Files = sortable; Files = sortable;
} }
logger.User("Organizing " + (mergeroms ? "and merging " : "") + "roms for output");
// Process each all of the roms // Process each all of the roms
List<string> keys = Files.Keys.ToList(); List<string> keys = Files.Keys.ToList();
foreach (string key in keys) foreach (string key in keys)
@@ -468,6 +366,322 @@ namespace SabreTools.Helper
Files = sortable; Files = sortable;
} }
/// <summary>
/// Take the arbitrarily sorted Files Dictionary and convert to one sorted by Size
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
/// <param name="logger">Logger object for file and console output</param>
/// <param name="output">True if the number of hashes counted is to be output (default), false otherwise</param>
public void BucketBySize(bool mergeroms, Logger logger, bool output = true)
{
// If we already have the right sorting, trust it
if (_sortedBy == SortedBy.Size)
{
return;
}
// Set the sorted type
_sortedBy = SortedBy.Size;
SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>();
long count = 0;
// If we have a null dict or an empty one, output a new dictionary
if (Files == null || Files.Count == 0)
{
Files = sortable;
}
logger.User("Organizing " + (mergeroms ? "and merging " : "") + "roms for output");
// Process each all of the roms
List<string> keys = Files.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> roms = Files[key];
// If we're merging the roms, do so
if (mergeroms)
{
roms = DatItem.Merge(roms, logger);
}
// Now add each of the roms to their respective games
foreach (DatItem rom in roms)
{
count++;
string newkey = (rom.Type == ItemType.Rom ? ((Rom)rom).Size.ToString() : "-1");
if (sortable.ContainsKey(newkey))
{
sortable[newkey].Add(rom);
}
else
{
List<DatItem> temp = new List<DatItem>();
temp.Add(rom);
sortable.Add(newkey, temp);
}
}
}
// Now go through and sort all of the lists
keys = sortable.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> sortedlist = sortable[key];
DatItem.Sort(ref sortedlist, false);
sortable[key] = sortedlist;
}
// Output the count if told to
if (output)
{
logger.User("A total of " + count + " file hashes will be written out to file");
}
// Now assign the dictionary back
Files = sortable;
}
/// <summary>
/// Take the arbitrarily sorted Files Dictionary and convert to one sorted by CRC
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
/// <param name="logger">Logger object for file and console output</param>
/// <param name="output">True if the number of hashes counted is to be output (default), false otherwise</param>
public void BucketByCRC(bool mergeroms, Logger logger, bool output = true)
{
// If we already have the right sorting, trust it
if (_sortedBy == SortedBy.CRC)
{
return;
}
// Set the sorted type
_sortedBy = SortedBy.CRC;
SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>();
long count = 0;
// If we have a null dict or an empty one, output a new dictionary
if (Files == null || Files.Count == 0)
{
Files = sortable;
}
logger.User("Organizing " + (mergeroms ? "and merging " : "") + "roms for output");
// Process each all of the roms
List<string> keys = Files.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> roms = Files[key];
// If we're merging the roms, do so
if (mergeroms)
{
roms = DatItem.Merge(roms, logger);
}
// Now add each of the roms to their respective games
foreach (DatItem rom in roms)
{
count++;
string newkey = (rom.Type == ItemType.Rom ? ((Rom)rom).CRC : Constants.CRCZero);
if (sortable.ContainsKey(newkey))
{
sortable[newkey].Add(rom);
}
else
{
List<DatItem> temp = new List<DatItem>();
temp.Add(rom);
sortable.Add(newkey, temp);
}
}
}
// Now go through and sort all of the lists
keys = sortable.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> sortedlist = sortable[key];
DatItem.Sort(ref sortedlist, false);
sortable[key] = sortedlist;
}
// Output the count if told to
if (output)
{
logger.User("A total of " + count + " file hashes will be written out to file");
}
// Now assign the dictionary back
Files = sortable;
}
/// <summary>
/// Take the arbitrarily sorted Files Dictionary and convert to one sorted by MD5
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
/// <param name="logger">Logger object for file and console output</param>
/// <param name="output">True if the number of hashes counted is to be output (default), false otherwise</param>
public void BucketByMD5(bool mergeroms, Logger logger, bool output = true)
{
// If we already have the right sorting, trust it
if (_sortedBy == SortedBy.MD5)
{
return;
}
// Set the sorted type
_sortedBy = SortedBy.MD5;
SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>();
long count = 0;
// If we have a null dict or an empty one, output a new dictionary
if (Files == null || Files.Count == 0)
{
Files = sortable;
}
logger.User("Organizing " + (mergeroms ? "and merging " : "") + "roms for output");
// Process each all of the roms
List<string> keys = Files.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> roms = Files[key];
// If we're merging the roms, do so
if (mergeroms)
{
roms = DatItem.Merge(roms, logger);
}
// Now add each of the roms to their respective games
foreach (DatItem rom in roms)
{
count++;
string newkey = (rom.Type == ItemType.Rom
? ((Rom)rom).MD5
: (rom.Type == ItemType.Disk
? ((Disk)rom).MD5
: Constants.MD5Zero));
if (sortable.ContainsKey(newkey))
{
sortable[newkey].Add(rom);
}
else
{
List<DatItem> temp = new List<DatItem>();
temp.Add(rom);
sortable.Add(newkey, temp);
}
}
}
// Now go through and sort all of the lists
keys = sortable.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> sortedlist = sortable[key];
DatItem.Sort(ref sortedlist, false);
sortable[key] = sortedlist;
}
// Output the count if told to
if (output)
{
logger.User("A total of " + count + " file hashes will be written out to file");
}
// Now assign the dictionary back
Files = sortable;
}
/// <summary>
/// Take the arbitrarily sorted Files Dictionary and convert to one sorted by SHA1
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
/// <param name="logger">Logger object for file and console output</param>
/// <param name="output">True if the number of hashes counted is to be output (default), false otherwise</param>
public void BucketBySHA1(bool mergeroms, Logger logger, bool output = true)
{
// If we already have the right sorting, trust it
if (_sortedBy == SortedBy.SHA1)
{
return;
}
// Set the sorted type
_sortedBy = SortedBy.SHA1;
SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>();
long count = 0;
// If we have a null dict or an empty one, output a new dictionary
if (Files == null || Files.Count == 0)
{
Files = sortable;
}
logger.User("Organizing " + (mergeroms ? "and merging " : "") + "roms for output");
// Process each all of the roms
List<string> keys = Files.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> roms = Files[key];
// If we're merging the roms, do so
if (mergeroms)
{
roms = DatItem.Merge(roms, logger);
}
// Now add each of the roms to their respective games
foreach (DatItem rom in roms)
{
count++;
string newkey = (rom.Type == ItemType.Rom
? ((Rom)rom).SHA1
: (rom.Type == ItemType.Disk
? ((Disk)rom).SHA1
: Constants.MD5Zero));
if (sortable.ContainsKey(newkey))
{
sortable[newkey].Add(rom);
}
else
{
List<DatItem> temp = new List<DatItem>();
temp.Add(rom);
sortable.Add(newkey, temp);
}
}
}
// Now go through and sort all of the lists
keys = sortable.Keys.ToList();
foreach (string key in keys)
{
List<DatItem> sortedlist = sortable[key];
DatItem.Sort(ref sortedlist, false);
sortable[key] = sortedlist;
}
// Output the count if told to
if (output)
{
logger.User("A total of " + count + " file hashes will be written out to file");
}
// Now assign the dictionary back
Files = sortable;
}
#endregion #endregion
#region Cloning Methods #region Cloning Methods
@@ -476,44 +690,45 @@ namespace SabreTools.Helper
{ {
return new DatFile return new DatFile
{ {
FileName = this.FileName, FileName = _fileName,
Name = this.Name, Name = _name,
Description = this.Description, Description = _description,
RootDir = this.RootDir, RootDir = _rootDir,
Category = this.Category, Category = _category,
Version = this.Version, Version = _version,
Date = this.Date, Date = _date,
Author = this.Author, Author = _author,
Email = this.Email, Email = _email,
Homepage = this.Homepage, Homepage = _homepage,
Url = this.Url, Url = _url,
Comment = this.Comment, Comment = _comment,
Header = this.Header, Header = _header,
Type = this.Type, Type = _type,
ForceMerging = this.ForceMerging, ForceMerging = _forceMerging,
ForceNodump = this.ForceNodump, ForceNodump = _forceNodump,
ForcePacking = this.ForcePacking, ForcePacking = _forcePacking,
ExcludeOf = this.ExcludeOf, ExcludeOf = _excludeOf,
OutputFormat = this.OutputFormat, OutputFormat = _outputFormat,
MergeRoms = this.MergeRoms, MergeRoms = _mergeRoms,
Files = this.Files, Files = _files,
UseGame = this.UseGame, SortedBy = _sortedBy,
Prefix = this.Prefix, UseGame = _useGame,
Postfix = this.Postfix, Prefix = _prefix,
Quotes = this.Quotes, Postfix = _postfix,
RepExt = this.RepExt, Quotes = _quotes,
AddExt = this.AddExt, RepExt = _repExt,
RemExt = this.RemExt, AddExt = _addExt,
GameName = this.GameName, RemExt = _remExt,
Romba = this.Romba, GameName = _gameName,
RomCount = this.RomCount, Romba = _romba,
DiskCount = this.DiskCount, RomCount = _romCount,
TotalSize = this.TotalSize, DiskCount = _diskCount,
CRCCount = this.CRCCount, TotalSize = _totalSize,
MD5Count = this.MD5Count, CRCCount = _crcCount,
SHA1Count = this.SHA1Count, MD5Count = _md5Count,
BaddumpCount = this.BaddumpCount, SHA1Count = _sha1Count,
NodumpCount = this.NodumpCount, BaddumpCount = _baddumpCount,
NodumpCount = _nodumpCount,
}; };
} }
@@ -521,36 +736,37 @@ namespace SabreTools.Helper
{ {
return new DatFile return new DatFile
{ {
FileName = this.FileName, FileName = _fileName,
Name = this.Name, Name = _name,
Description = this.Description, Description = _description,
RootDir = this.RootDir, RootDir = _rootDir,
Category = this.Category, Category = _category,
Version = this.Version, Version = _version,
Date = this.Date, Date = _date,
Author = this.Author, Author = _author,
Email = this.Email, Email = _email,
Homepage = this.Homepage, Homepage = _homepage,
Url = this.Url, Url = _url,
Comment = this.Comment, Comment = _comment,
Header = this.Header, Header = _header,
Type = this.Type, Type = _type,
ForceMerging = this.ForceMerging, ForceMerging = _forceMerging,
ForceNodump = this.ForceNodump, ForceNodump = _forceNodump,
ForcePacking = this.ForcePacking, ForcePacking = _forcePacking,
ExcludeOf = this.ExcludeOf, ExcludeOf = _excludeOf,
OutputFormat = this.OutputFormat, OutputFormat = _outputFormat,
MergeRoms = this.MergeRoms, MergeRoms = _mergeRoms,
Files = new SortedDictionary<string, List<DatItem>>(), Files = new SortedDictionary<string, List<DatItem>>(),
UseGame = this.UseGame, SortedBy = SortedBy.Default,
Prefix = this.Prefix, UseGame = _useGame,
Postfix = this.Postfix, Prefix = _prefix,
Quotes = this.Quotes, Postfix = _postfix,
RepExt = this.RepExt, Quotes = _quotes,
AddExt = this.AddExt, RepExt = _repExt,
RemExt = this.RemExt, AddExt = _addExt,
GameName = this.GameName, RemExt = _remExt,
Romba = this.Romba, GameName = _gameName,
Romba = _romba,
}; };
} }
@@ -3195,7 +3411,14 @@ namespace SabreTools.Helper
*/ */
string[] rominfo = line.Split('¬'); string[] rominfo = line.Split('¬');
Rom rom = new Rom(rominfo[5], Int64.Parse(rominfo[7]), rominfo[6], null, null, ItemStatus.None, null, rominfo[3], null, // Try getting the size separately
long size = 0;
if (!Int64.TryParse(rominfo[7], out size))
{
size = 0;
}
Rom rom = new Rom(rominfo[5], size, rominfo[6], null, null, ItemStatus.None, null, rominfo[3], null,
rominfo[4], null, null, rominfo[8], rominfo[1], null, null, false, null, null, sysid, null, srcid, null); rominfo[4], null, null, rominfo[8], rominfo[1], null, null, false, null, null, sysid, null, srcid, null);
// Now process and add the rom // Now process and add the rom

View File

@@ -623,10 +623,51 @@ namespace SabreTools.Helper
return output; return output;
} }
// Try to find duplicates // Get the correct dictionary based on what is available
List<string> keys = datdata.Files.Keys.ToList(); string key = "";
foreach (string key in keys) if (_itemType == ItemType.Rom && ((Rom)this).CRC != null)
{ {
key = ((Rom)this).CRC;
datdata.BucketByCRC(false, logger, false);
}
else if (_itemType == ItemType.Rom && ((Rom)this).MD5 != null)
{
key = ((Rom)this).MD5;
datdata.BucketByMD5(false, logger, false);
}
else if (_itemType == ItemType.Disk && ((Disk)this).MD5 != null)
{
key = ((Disk)this).MD5;
datdata.BucketByMD5(false, logger, false);
}
else if (_itemType == ItemType.Rom && ((Rom)this).SHA1 != null)
{
key = ((Rom)this).SHA1;
datdata.BucketBySHA1(false, logger, false);
}
else if (_itemType == ItemType.Disk && ((Disk)this).SHA1 != null)
{
key = ((Disk)this).SHA1;
datdata.BucketBySHA1(false, logger, false);
}
else if (_itemType == ItemType.Rom)
{
key = ((Rom)this).Size.ToString();
datdata.BucketBySize(false, logger, false);
}
else
{
key = "-1";
datdata.BucketBySize(false, logger, false);
}
// If the key doesn't exist, return the empty list
if (!datdata.Files.ContainsKey(key))
{
return output;
}
// Try to find duplicates
List<DatItem> roms = datdata.Files[key]; List<DatItem> roms = datdata.Files[key];
List<DatItem> left = new List<DatItem>(); List<DatItem> left = new List<DatItem>();
@@ -647,7 +688,6 @@ namespace SabreTools.Helper
{ {
datdata.Files[key] = left; datdata.Files[key] = left;
} }
}
return output; return output;
} }