From 05e7ed47839d802ff667947ca94fd298bb7b3b12 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Mon, 7 Sep 2020 00:03:09 +0100 Subject: [PATCH] Optimize SQL requests for known files. --- RomRepoMgr.Core/Workers/DatImporter.cs | 379 +++++++++++++++++++++---- 1 file changed, 331 insertions(+), 48 deletions(-) diff --git a/RomRepoMgr.Core/Workers/DatImporter.cs b/RomRepoMgr.Core/Workers/DatImporter.cs index 60bb563..f3d8166 100644 --- a/RomRepoMgr.Core/Workers/DatImporter.cs +++ b/RomRepoMgr.Core/Workers/DatImporter.cs @@ -25,12 +25,14 @@ using System; using System.Collections.Generic; +using System.Data.Common; using System.Diagnostics; using System.Globalization; using System.IO; using System.Linq; using Aaru.Checksums; using EFCore.BulkExtensions; +using Microsoft.EntityFrameworkCore; using RomRepoMgr.Core.EventArgs; using RomRepoMgr.Core.Models; using RomRepoMgr.Core.Resources; @@ -196,32 +198,340 @@ namespace RomRepoMgr.Core.Workers List disks = new List(); List medias = new List(); - foreach(List values in datFile.Items.Values) + string tmpRomCrc32Table = Guid.NewGuid().ToString(); + string tmpRomMd5Table = Guid.NewGuid().ToString(); + string tmpRomSha1Table = Guid.NewGuid().ToString(); + string tmpRomSha256Table = Guid.NewGuid().ToString(); + string tmpRomSha384Table = Guid.NewGuid().ToString(); + string tmpRomSha512Table = Guid.NewGuid().ToString(); + string tmpDiskMd5Table = Guid.NewGuid().ToString(); + string tmpDiskSha1Table = Guid.NewGuid().ToString(); + string tmpMediaMd5Table = Guid.NewGuid().ToString(); + string tmpMediaSha1Table = Guid.NewGuid().ToString(); + string tmpMediaSha256Table = Guid.NewGuid().ToString(); + + DbConnection dbConnection = ctx.Database.GetDbConnection(); + dbConnection.Open(); + + position = 0; + + SetProgressBounds?.Invoke(this, new ProgressBoundsEventArgs { - foreach(DatItem item in values) + Minimum = 0, + Maximum = datFile.Items.Values.Count + }); + + using(DbTransaction dbTransaction = dbConnection.BeginTransaction()) + { + DbCommand dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"CREATE TABLE \"{tmpRomCrc32Table}\" (\"Size\" INTEGER NOT NULL, \"Crc32\" TEXT NOT NULL);"; + + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"CREATE TABLE \"{tmpRomMd5Table}\" (\"Size\" INTEGER NOT NULL, \"Md5\" TEXT NOT NULL);"; + + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"CREATE TABLE \"{tmpRomSha1Table}\" (\"Size\" INTEGER NOT NULL, \"Sha1\" TEXT NOT NULL);"; + + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"CREATE TABLE \"{tmpRomSha256Table}\" (\"Size\" INTEGER NOT NULL, \"Sha256\" TEXT NOT NULL);"; + + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"CREATE TABLE \"{tmpRomSha384Table}\" (\"Size\" INTEGER NOT NULL, \"Sha384\" TEXT NOT NULL);"; + + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"CREATE TABLE \"{tmpRomSha512Table}\" (\"Size\" INTEGER NOT NULL, \"Sha512\" TEXT NOT NULL);"; + + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + dbcc.CommandText = $"CREATE TABLE \"{tmpDiskMd5Table}\" (\"Md5\" TEXT NOT NULL);"; + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + dbcc.CommandText = $"CREATE TABLE \"{tmpDiskSha1Table}\" (\"Sha1\" TEXT NOT NULL);"; + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + dbcc.CommandText = $"CREATE TABLE \"{tmpMediaMd5Table}\" (\"Md5\" TEXT NOT NULL);"; + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + dbcc.CommandText = $"CREATE TABLE \"{tmpMediaSha1Table}\" (\"Sha1\" TEXT NOT NULL);"; + dbcc.ExecuteNonQuery(); + dbcc = dbConnection.CreateCommand(); + dbcc.CommandText = $"CREATE TABLE \"{tmpMediaSha256Table}\" (\"Sha256\" TEXT NOT NULL);"; + dbcc.ExecuteNonQuery(); + + foreach(List values in datFile.Items.Values) { - switch(item) + SetProgress?.Invoke(this, new ProgressEventArgs { - case Rom rom: - roms.Add(rom); + Value = position + }); - continue; - case Disk disk: - disks.Add(disk); + foreach(DatItem item in values) + { + switch(item) + { + case Rom rom: + if(rom.CRC != null) + { + dbcc = dbConnection.CreateCommand(); - continue; - case Media media: - medias.Add(media); + dbcc.CommandText = + $"INSERT INTO \"{tmpRomCrc32Table}\" (\"Size\", \"Crc32\") VALUES (\"{(ulong)rom.Size}\", \"{rom.CRC}\");"; - continue; - default: - Console.WriteLine(item); + dbcc.ExecuteNonQuery(); + } - continue; + if(rom.MD5 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpRomMd5Table}\" (\"Size\", \"Md5\") VALUES (\"{(ulong)rom.Size}\", \"{rom.MD5}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(rom.SHA1 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpRomSha1Table}\" (\"Size\", \"Sha1\") VALUES (\"{(ulong)rom.Size}\", \"{rom.SHA1}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(rom.SHA256 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpRomSha256Table}\" (\"Size\", \"Sha256\") VALUES (\"{(ulong)rom.Size}\", \"{rom.SHA256}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(rom.SHA384 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpRomSha384Table}\" (\"Size\", \"Sha384\") VALUES (\"{(ulong)rom.Size}\", \"{rom.SHA384}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(rom.SHA512 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpRomSha512Table}\" (\"Size\", \"Sha512\") VALUES (\"{(ulong)rom.Size}\", \"{rom.SHA512}\");"; + + dbcc.ExecuteNonQuery(); + } + + roms.Add(rom); + + continue; + case Disk disk: + if(disk.MD5 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpDiskMd5Table}\" (\"Md5\") VALUES (\"{disk.MD5}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(disk.SHA1 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpDiskSha1Table}\" (\"Sha1\") VALUES (\"{disk.SHA1}\");"; + + dbcc.ExecuteNonQuery(); + } + + disks.Add(disk); + + continue; + case Media media: + if(media.MD5 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpMediaMd5Table}\" (\"Md5\") VALUES (\"{media.MD5}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(media.SHA1 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpMediaSha1Table}\" (\"Sha1\") VALUES (\"{media.SHA1}\");"; + + dbcc.ExecuteNonQuery(); + } + + if(media.SHA256 != null) + { + dbcc = dbConnection.CreateCommand(); + + dbcc.CommandText = + $"INSERT INTO \"{tmpMediaSha256Table}\" (\"Sha256\") VALUES (\"{media.SHA256}\");"; + + dbcc.ExecuteNonQuery(); + } + + medias.Add(media); + + continue; + default: + Console.WriteLine(item); + + continue; + } } + + position++; } + + SetIndeterminateProgress?.Invoke(this, System.EventArgs.Empty); + + dbTransaction.Commit(); } + List pendingFilesByCrcList = ctx.Files. + FromSqlRaw($"SELECT DISTINCT f.* FROM Files AS f, [{tmpRomCrc32Table}] AS t WHERE f.Size = t.Size AND f.Crc32 = t.Crc32"). + ToList(); + + List pendingFilesByMd5List = ctx.Files. + FromSqlRaw($"SELECT DISTINCT f.* FROM Files AS f, [{tmpRomMd5Table}] AS t WHERE f.Size = t.Size AND f.Md5 = t.Md5"). + ToList(); + + List pendingFilesBySha1List = ctx.Files. + FromSqlRaw($"SELECT DISTINCT f.* FROM Files AS f, [{tmpRomSha1Table}] AS t WHERE f.Size = t.Size AND f.Sha1 = t.Sha1"). + ToList(); + + List pendingFilesBySha256List = ctx.Files. + FromSqlRaw($"SELECT DISTINCT f.* FROM Files AS f, [{tmpRomSha256Table}] AS t WHERE f.Size = t.Size AND f.Sha256 = t.Sha256"). + ToList(); + + List pendingFilesBySha384List = ctx.Files. + FromSqlRaw($"SELECT DISTINCT f.* FROM Files AS f, [{tmpRomSha384Table}] AS t WHERE f.Size = t.Size AND f.Sha384 = t.Sha384"). + ToList(); + + List pendingFilesBySha512List = ctx.Files. + FromSqlRaw($"SELECT DISTINCT f.* FROM Files AS f, [{tmpRomSha512Table}] AS t WHERE f.Size = t.Size AND f.Sha512 = t.Sha512"). + ToList(); + + Dictionary pendingDisksByMd5 = ctx.Disks. + FromSqlRaw($"SELECT DISTINCT f.* FROM Disks AS f, [{tmpDiskMd5Table}] AS t WHERE f.Md5 = t.Md5"). + ToDictionary(f => f.Md5); + + Dictionary pendingDisksBySha1 = ctx.Disks. + FromSqlRaw($"SELECT DISTINCT f.* FROM Disks AS f, [{tmpDiskSha1Table}] AS t WHERE f.Sha1 = t.Sha1"). + ToDictionary(f => f.Sha1); + + Dictionary pendingMediasByMd5 = ctx.Medias. + FromSqlRaw($"SELECT DISTINCT f.* FROM Medias AS f, [{tmpMediaMd5Table}] AS t WHERE f.Md5 = t.Md5"). + ToDictionary(f => f.Md5); + + Dictionary pendingMediasBySha1 = ctx.Medias. + FromSqlRaw($"SELECT DISTINCT f.* FROM Medias AS f, [{tmpMediaSha1Table}] AS t WHERE f.Sha1 = t.Sha1"). + ToDictionary(f => f.Sha1); + + Dictionary pendingMediasBySha256 = ctx.Medias. + FromSqlRaw($"SELECT DISTINCT f.* FROM Medias AS f, [{tmpMediaSha256Table}] AS t WHERE f.Sha256 = t.Sha256"). + ToDictionary(f => f.Sha256); + + Dictionary pendingFilesByCrc = new Dictionary(); + Dictionary pendingFilesByMd5 = new Dictionary(); + Dictionary pendingFilesBySha1 = new Dictionary(); + Dictionary pendingFilesBySha256 = new Dictionary(); + Dictionary pendingFilesBySha384 = new Dictionary(); + Dictionary pendingFilesBySha512 = new Dictionary(); + List pendingFiles = new List(); + + // This is because of hash collisions. + foreach(DbFile item in pendingFilesByCrcList) + if(pendingFilesByCrc.ContainsKey(item.Crc32)) + pendingFiles.Add(item); + else + pendingFilesByCrc[item.Crc32] = item; + + foreach(DbFile item in pendingFilesByMd5List) + if(pendingFilesByMd5.ContainsKey(item.Md5)) + pendingFiles.Add(item); + else + pendingFilesByMd5[item.Md5] = item; + + foreach(DbFile item in pendingFilesBySha1List) + if(pendingFilesBySha1.ContainsKey(item.Sha1)) + pendingFiles.Add(item); + else + pendingFilesBySha1[item.Sha1] = item; + + foreach(DbFile item in pendingFilesBySha256List) + if(pendingFilesBySha256.ContainsKey(item.Sha256)) + pendingFiles.Add(item); + else + pendingFilesBySha256[item.Sha256] = item; + + foreach(DbFile item in pendingFilesBySha384List) + if(pendingFilesBySha384.ContainsKey(item.Sha384)) + pendingFiles.Add(item); + else + pendingFilesBySha384[item.Sha384] = item; + + foreach(DbFile item in pendingFilesBySha512List) + if(pendingFilesBySha512.ContainsKey(item.Sha512)) + pendingFiles.Add(item); + else + pendingFilesBySha512[item.Sha512] = item; + + // Clear some memory + pendingFilesByCrcList.Clear(); + pendingFilesByMd5List.Clear(); + pendingFilesBySha1List.Clear(); + pendingFilesBySha256List.Clear(); + pendingFilesBySha384List.Clear(); + pendingFilesBySha512List.Clear(); + + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpRomCrc32Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpRomMd5Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpRomSha1Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpRomSha256Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpRomSha384Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpRomSha512Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpDiskMd5Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpDiskSha1Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpMediaMd5Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpMediaSha1Table}]"); + ctx.Database.ExecuteSqlRaw($"DROP TABLE [{tmpMediaSha256Table}]"); + SetProgressBounds?.Invoke(this, new ProgressBoundsEventArgs { Minimum = 0, @@ -235,15 +545,8 @@ namespace RomRepoMgr.Core.Workers position = 0; - Dictionary pendingFilesBySha512 = new Dictionary(); - Dictionary pendingFilesBySha384 = new Dictionary(); - Dictionary pendingFilesBySha256 = new Dictionary(); - Dictionary pendingFilesBySha1 = new Dictionary(); - Dictionary pendingFilesByMd5 = new Dictionary(); - Dictionary pendingFilesByCrc = new Dictionary(); - List pendingFiles = new List(); - List newFiles = new List(); - List newFilesByMachine = new List(); + List newFiles = new List(); + List newFilesByMachine = new List(); foreach(Rom rom in roms) { @@ -347,14 +650,6 @@ namespace RomRepoMgr.Core.Workers file = pendingFiles.FirstOrDefault(f => f.Crc32 == rom.CRC && f.Size == uSize); } - file ??= ctx.Files.FirstOrDefault(f => ((rom.SHA512 != null && f.Sha512 == rom.SHA512) || - (rom.SHA384 != null && f.Sha384 == rom.SHA384) || - (rom.SHA256 != null && f.Sha256 == rom.SHA256) || - (rom.SHA1 != null && f.Sha1 == rom.SHA1) || - (rom.MD5 != null && f.Md5 == rom.MD5) || - (rom.CRC != null && f.Crc32 == rom.CRC)) && - f.Size == uSize); - if(file == null) { file = new DbFile @@ -509,10 +804,8 @@ namespace RomRepoMgr.Core.Workers position = 0; - Dictionary pendingDisksBySha1 = new Dictionary(); - Dictionary pendingDisksByMd5 = new Dictionary(); - List newDisks = new List(); - List newDisksByMachine = new List(); + List newDisks = new List(); + List newDisksByMachine = new List(); foreach(Disk disk in disks) { @@ -549,9 +842,6 @@ namespace RomRepoMgr.Core.Workers dbDisk == null) pendingDisksByMd5.TryGetValue(disk.MD5, out dbDisk); - dbDisk ??= ctx.Disks.FirstOrDefault(f => (disk.SHA1 != null && f.Sha1 == disk.SHA1) || - (disk.MD5 != null && f.Md5 == disk.MD5)); - if(dbDisk == null) { dbDisk = new DbDisk @@ -632,11 +922,8 @@ namespace RomRepoMgr.Core.Workers position = 0; - Dictionary pendingMediasBySha256 = new Dictionary(); - Dictionary pendingMediasBySha1 = new Dictionary(); - Dictionary pendingMediasByMd5 = new Dictionary(); - List newMedias = new List(); - List newMediasByMachine = new List(); + List newMedias = new List(); + List newMediasByMachine = new List(); foreach(Media media in medias) { @@ -678,10 +965,6 @@ namespace RomRepoMgr.Core.Workers dbMedia == null) pendingMediasByMd5.TryGetValue(media.MD5, out dbMedia); - dbMedia ??= ctx.Medias.FirstOrDefault(f => (media.SHA256 != null && f.Sha256 == media.SHA256) || - (media.SHA1 != null && f.Sha1 == media.SHA1) || - (media.MD5 != null && f.Md5 == media.MD5)); - // TODO: SpamSum if(dbMedia == null) {