using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace SabreTools.IO { public class SeparatedValueReader : IDisposable { /// /// Internal stream reader for inputting /// private readonly StreamReader sr; /// /// Internal value to say how many fields should be written /// private int fields = -1; /// /// Get if at end of stream /// public bool EndOfStream { get { return sr?.EndOfStream ?? true; } } /// /// Contents of the current line, unprocessed /// public string CurrentLine { get; private set; } = string.Empty; /// /// Get the current line number /// public long LineNumber { get; private set; } = 0; /// /// Assume the first row is a header /// public bool Header { get; set; } = true; /// /// Header row values /// public List HeaderValues { get; set; } = null; /// /// Get the current line values /// public List Line { get; private set; } = null; /// /// Assume that values are wrapped in quotes /// public bool Quotes { get; set; } = true; /// /// Set what character should be used as a separator /// public char Separator { get; set; } = ','; /// /// Set if field count should be verified from the first row /// public bool VerifyFieldCount { get; set; } = true; /// /// Constructor for reading from a file /// public SeparatedValueReader(string filename) { sr = new StreamReader(filename); } /// /// Constructor for reading from a stream /// public SeparatedValueReader(Stream stream, Encoding encoding) { sr = new StreamReader(stream, encoding); } /// /// Read the header line /// public bool ReadHeader() { if (!Header) throw new InvalidOperationException("No header line expected"); if (HeaderValues != null) throw new InvalidOperationException("No more than 1 header row in a file allowed"); return ReadNextLine(); } /// /// Read the next line in the separated value file /// public bool ReadNextLine() { if (!(sr.BaseStream?.CanRead ?? false) || sr.EndOfStream) return false; string fullLine = sr.ReadLine(); CurrentLine = fullLine; LineNumber++; // If we have quotes, we need to split specially if (Quotes) { // https://stackoverflow.com/questions/3776458/split-a-comma-separated-string-with-both-quoted-and-unquoted-strings var lineSplitRegex = new Regex($"(?:^|{Separator})(\"(?:[^\"]+|\"\")*\"|[^{Separator}]*)"); var temp = new List(); foreach (Match match in lineSplitRegex.Matches(fullLine)) { string curr = match.Value; if (curr.Length == 0) temp.Add(""); // Trim separator, whitespace, quotes, inter-quote whitespace curr = curr.TrimStart(Separator).Trim().Trim('\"').Trim(); temp.Add(curr); } Line = temp; } // Otherwise, just split on the delimiter else { Line = fullLine.Split(Separator).Select(f => f.Trim()).ToList(); } // If we don't have a header yet and are expecting one, read this as the header if (Header && HeaderValues == null) { HeaderValues = Line; fields = HeaderValues.Count; } // If we're verifying field counts and the numbers are off, error out if (VerifyFieldCount && fields != -1 && Line.Count != fields) throw new InvalidDataException($"Invalid row found, cannot continue: {fullLine}"); return true; } /// /// Get the value for the current line for the current key /// public string GetValue(string key) { // No header means no key-based indexing if (!Header) throw new ArgumentException("No header expected so no keys can be used"); // If we don't have the key, return null; if (!HeaderValues.Contains(key)) return null; int index = HeaderValues.IndexOf(key); if (Line.Count() < index) throw new ArgumentException($"Current line doesn't have index {index}"); return Line[index]; } /// /// Get the value for the current line for the current index /// public string GetValue(int index) { if (Line.Count() < index) throw new ArgumentException($"Current line doesn't have index {index}"); return Line[index]; } /// /// Dispose of the underlying reader /// public void Dispose() { sr.Dispose(); } } }