diff --git a/ChangeLog b/ChangeLog index 83f6af5..45cce7f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-02-24 Natalia Portillo + + * plist-cil.sln: + Bump to 1.14 (upstream r114). + 2015-02-20 Natalia Portillo * SvnInfo.txt: diff --git a/plist-cil.sln b/plist-cil.sln index 9fb095d..245b29f 100644 --- a/plist-cil.sln +++ b/plist-cil.sln @@ -233,6 +233,6 @@ Global $37.IncludeDirectoryPaths = True $36.inheritsSet = Mono description = @This library enables .NET applications to work with property lists in various formats.\n\tSupported formats for reading and writing are OS X/iOS binary and XML property lists.\n ASCII property lists are also supported.\n\tThe library also provides access to basic functions of NeXTSTEP/Cocoa classes like\n NSDictionary, NSArray, etc. - version = 1.13 + version = 1.14 EndGlobalSection EndGlobal diff --git a/plist-cil.test/ChangeLog b/plist-cil.test/ChangeLog index 1189d38..84c6034 100644 --- a/plist-cil.test/ChangeLog +++ b/plist-cil.test/ChangeLog @@ -1,3 +1,8 @@ +2015-02-24 Natalia Portillo + + * plist-cil.test.csproj: + Bump to 1.14 (upstream r114). + 2015-02-20 Natalia Portillo * IssueTest.cs: diff --git a/plist-cil.test/plist-cil.test.csproj b/plist-cil.test/plist-cil.test.csproj index 1a88484..7171219 100644 --- a/plist-cil.test/plist-cil.test.csproj +++ b/plist-cil.test/plist-cil.test.csproj @@ -8,7 +8,7 @@ plistcil.test plist-cil.test v4.5 - 1.13 + 1.14 true diff --git a/plist-cil/ASCIIPropertyListParser.cs b/plist-cil/ASCIIPropertyListParser.cs index 063752f..9ce7abd 100644 --- a/plist-cil/ASCIIPropertyListParser.cs +++ b/plist-cil/ASCIIPropertyListParser.cs @@ -437,6 +437,9 @@ namespace Claunia.PropertyList public NSObject Parse() { index = 0; + //Skip Unicode byte order mark (BOM) + if (data.Length >= 3 && (data[0] & 0xFF) == 0xEF && (data[1] & 0xFF) == 0xBB && (data[2] & 0xFF) == 0xBF) + Skip(3); SkipWhitespacesAndComments(); Expect(DICTIONARY_BEGIN_TOKEN, ARRAY_BEGIN_TOKEN, COMMENT_BEGIN_TOKEN); try @@ -752,7 +755,7 @@ namespace Claunia.PropertyList //If the string can be represented in the ASCII codepage // --> use ASCII encoding - if(IsASCIIEncodable(result)) + if (IsASCIIEncodable(result)) return Encoding.ASCII.GetString(Encoding.Convert(Encoding.BigEndianUnicode, Encoding.ASCII, bytArr)); //The string contains characters outside the ASCII codepage // --> use the UTF-8 encoded string diff --git a/plist-cil/BinaryPropertyListParser.cs b/plist-cil/BinaryPropertyListParser.cs index 766b2df..85170e8 100644 --- a/plist-cil/BinaryPropertyListParser.cs +++ b/plist-cil/BinaryPropertyListParser.cs @@ -42,38 +42,26 @@ namespace Claunia.PropertyList /// @author Natalia Portillo public class BinaryPropertyListParser { - int majorVersion, minorVersion; + /// + /// Major version of the property list format + /// + int majorVersion; + + /// + /// Minor version of the property list format + /// + int minorVersion; /// /// Property list in bytes /// byte[] bytes; - /// - /// Length of an offset definition in bytes - /// - int offsetSize; - /// /// Length of an object reference in bytes /// int objectRefSize; - /// - /// Number of objects stored in this property list - /// - int numObjects; - - /// - /// Reference to the top object of the property list - /// - int topObject; - - /// - /// Offset of the offset table from the beginning of the file - /// - int offsetTableOffset; - /// /// The table holding the information at which offset each object is found /// @@ -126,6 +114,7 @@ namespace Claunia.PropertyList if (majorVersion > 0) { + // Version 1.0+ is not even supported by OS X's own parser throw new PropertyListFormatException("Unsupported binary property list format: v" + majorVersion + "." + minorVersion + ". " + "Version 1.0 and later are not yet supported."); } @@ -135,16 +124,11 @@ namespace Claunia.PropertyList */ byte[] trailer = CopyOfRange(bytes, bytes.Length - 32, bytes.Length); //6 null bytes (index 0 to 5) - offsetSize = (int)ParseUnsignedInt(trailer, 6, 7); - //System.Console.WriteLine("offsetSize: "+offsetSize); + int offsetSize = (int)ParseUnsignedInt(trailer, 6, 7); objectRefSize = (int)ParseUnsignedInt(trailer, 7, 8); - //System.Console.WriteLine("objectRefSize: "+objectRefSize); - numObjects = (int)ParseUnsignedInt(trailer, 8, 16); - //System.Console.WriteLine("numObjects: "+numObjects); - topObject = (int)ParseUnsignedInt(trailer, 16, 24); - //System.Console.WriteLine("topObject: "+topObject); - offsetTableOffset = (int)ParseUnsignedInt(trailer, 24, 32); - //System.Console.WriteLine("offsetTableOffset: "+offsetTableOffset); + int numObjects = (int)ParseUnsignedInt(trailer, 8, 16); + int topObject = (int)ParseUnsignedInt(trailer, 16, 24); + int offsetTableOffset = (int)ParseUnsignedInt(trailer, 24, 32); /* * Handle offset table @@ -155,9 +139,6 @@ namespace Claunia.PropertyList { byte[] offsetBytes = CopyOfRange(bytes, offsetTableOffset + i * offsetSize, offsetTableOffset + (i + 1) * offsetSize); offsetTable[i] = (int)ParseUnsignedInt(offsetBytes); - /*System.Console.Write("Offset for Object #"+i+" is "+offsetTable[i]+" ["); - foreach(byte b: in ffsetBytes) System.Console.Write(Convert.ToString((int)b, 16))+" "); - System.Console.WriteLine("]");*/ } return ParseObject(topObject); @@ -185,22 +166,13 @@ namespace Claunia.PropertyList /// When the property list's format could not be parsed. public static NSObject Parse(FileInfo f) { - // While on Java, heap size is limited by the JVM, on .NET the heap size is dynamically allocated using all - // available RAM+swap. There is a function to check if that allocation can succeed, but works in 16MiB pieces, - // far bigger than any known PropertyList. And even then, paging would allow to work with insanely sized PropertyLists. - // Therefor, the checks in .NET (System.Runtime.MemoryFailPoint) are not worth the effort. - // Rest of calls to Java's Runtime.getRuntime().freeMemory() will not be commented but completely removed. - /* - if (f.length() > Runtime.getRuntime().freeMemory()) { - throw new OutOfMemoryError("To little heap space available! Wanted to read " + f.length() + " bytes, but only " + Runtime.getRuntime().freeMemory() + " are available."); - }*/ return Parse(f.OpenRead()); } /// /// Parses an object inside the currently parsed binary property list. /// For the format specification check - /// + /// /// Apple's binary property list parser implementation. /// /// The parsed object. @@ -237,19 +209,19 @@ namespace Claunia.PropertyList case 0xC: { //URL with no base URL (v1.0 and later) - //TODO + //TODO Implement binary URL parsing (not yet even implemented in Core Foundation as of revision 855.17) break; } case 0xD: { //URL with base URL (v1.0 and later) - //TODO + //TODO Implement binary URL parsing (not yet even implemented in Core Foundation as of revision 855.17) break; } case 0xE: { //16-byte UUID (v1.0 and later) - //TODO + //TODO Implement binary UUID parsing (not yet even implemented in Core Foundation as of revision 855.17) break; } case 0xF: @@ -284,51 +256,63 @@ namespace Claunia.PropertyList case 0x4: { //Data - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int dataoffset = lenAndoffset[1]; + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; + int dataoffset = lengthAndOffset[1]; return new NSData(CopyOfRange(bytes, offset + dataoffset, offset + dataoffset + length)); } case 0x5: { //ASCII String - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int stroffset = lenAndoffset[1]; + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; //Each character is 1 byte + int stroffset = lengthAndOffset[1]; return new NSString(CopyOfRange(bytes, offset + stroffset, offset + stroffset + length), "ASCII"); } case 0x6: { //UTF-16-BE String - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int stroffset = lenAndoffset[1]; + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; + int stroffset = lengthAndOffset[1]; - //length is String length -> to get byte length multiply by 2, as 1 character takes 2 bytes in UTF-16 + //UTF-16 characters can have variable length, but the Core Foundation reference implementation + //assumes 2 byte characters, thus only covering the Basic Multilingual Plane length *= 2; return new NSString(CopyOfRange(bytes, offset + stroffset, offset + stroffset + length), "UTF-16BE"); } + case 0x7: + { + //UTF-8 string (v1.0 and later) + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int strOffset = lengthAndOffset[1]; + int characters = lengthAndOffset[0]; + //UTF-8 characters can have variable length, so we need to calculate the byte length dynamically + //by reading the UTF-8 characters one by one + int length = CalculateUtf8StringLength(bytes, offset + strOffset, characters); + return new NSString(CopyOfRange(bytes, offset + strOffset, offset + strOffset + length), "UTF-8"); + } case 0x8: { - //UID + //UID (v1.0 and later) int length = objInfo + 1; return new UID(obj.ToString(), CopyOfRange(bytes, offset + 1, offset + 1 + length)); } case 0xA: { //Array - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int arrayoffset = lenAndoffset[1]; + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; + int arrayOffset = lengthAndOffset[1]; NSArray array = new NSArray(length); for (int i = 0; i < length; i++) { int objRef = (int)ParseUnsignedInt(CopyOfRange(bytes, - offset + arrayoffset + i * objectRefSize, - offset + arrayoffset + (i + 1) * objectRefSize)); + offset + arrayOffset + i * objectRefSize, + offset + arrayOffset + (i + 1) * objectRefSize)); array.SetValue(i, ParseObject(objRef)); } return array; @@ -336,10 +320,10 @@ namespace Claunia.PropertyList } case 0xB: { - //Ordered set - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int contentOffset = lenAndoffset[1]; + //Ordered set (v1.0 and later) + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; + int contentOffset = lengthAndOffset[1]; NSSet set = new NSSet(true); for (int i = 0; i < length; i++) @@ -353,10 +337,10 @@ namespace Claunia.PropertyList } case 0xC: { - //Set - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int contentOffset = lenAndoffset[1]; + //Set (v1.0 and later) + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; + int contentOffset = lengthAndOffset[1]; NSSet set = new NSSet(); for (int i = 0; i < length; i++) @@ -371,9 +355,9 @@ namespace Claunia.PropertyList case 0xD: { //Dictionary - int[] lenAndoffset = ReadLengthAndOffset(objInfo, offset); - int length = lenAndoffset[0]; - int contentOffset = lenAndoffset[1]; + int[] lengthAndOffset = ReadLengthAndOffset(objInfo, offset); + int length = lengthAndOffset[0]; + int contentOffset = lengthAndOffset[1]; //System.out.println("Parsing dictionary #"+obj); NSDictionary dict = new NSDictionary(); @@ -408,8 +392,8 @@ namespace Claunia.PropertyList /// Offset in the byte array at which the object is located. int[] ReadLengthAndOffset(int objInfo, int offset) { - int length = objInfo; - int stroffset = 1; + int lengthValue = objInfo; + int offsetValue = 1; if (objInfo == 0xF) { int int_type = bytes[offset + 1]; @@ -420,10 +404,10 @@ namespace Claunia.PropertyList } int intInfo = int_type & 0x0F; int intLength = (int)Math.Pow(2, intInfo); - stroffset = 2 + intLength; + offsetValue = 2 + intLength; if (intLength < 3) { - length = (int)ParseUnsignedInt(CopyOfRange(bytes, offset + 2, offset + 2 + intLength)); + lengthValue = (int)ParseUnsignedInt(CopyOfRange(bytes, offset + 2, offset + 2 + intLength)); } else { @@ -435,10 +419,71 @@ namespace Claunia.PropertyList litEBigInteger[i] = bigEBigInteger[j]; litEBigInteger[litEBigInteger.Length - 1] = (byte)0x00; // Be sure to get unsigned BigInteger - length = (int)new System.Numerics.BigInteger(litEBigInteger); + lengthValue = (int)new System.Numerics.BigInteger(litEBigInteger); } } - return new []{ length, stroffset }; + return new []{ lengthValue, offsetValue }; + } + + /// + /// Calculates the length in bytes of the UTF-8 string. + /// + /// The UTF-8 string length. + /// Array containing the UTF-8 string. + /// Offset in the array where the UTF-8 string resides. + /// How many UTF-8 characters are in the string. + int CalculateUtf8StringLength(byte[] bytes, int offset, int numCharacters) + { + int length = 0; + for (int i = 0; i < numCharacters; i++) + { + int tempOffset = offset + length; + if (bytes.Length <= tempOffset) + { + //WARNING: Invalid UTF-8 string, fall back to length = number of characters + return numCharacters; + } + if (bytes[tempOffset] < 0x80) + { + length++; + } + if (bytes[tempOffset] < 0xC2) + { + //Invalid value (marks continuation byte), fall back to length = number of characters + return numCharacters; + } + else if (bytes[tempOffset] < 0xE0) + { + if ((bytes[tempOffset + 1] & 0xC0) != 0x80) + { + //Invalid continuation byte, fall back to length = number of characters + return numCharacters; + } + length += 2; + } + else if (bytes[tempOffset] < 0xF0) + { + if ((bytes[tempOffset + 1] & 0xC0) != 0x80 + || (bytes[tempOffset + 2] & 0xC0) != 0x80) + { + //Invalid continuation byte, fall back to length = number of characters + return numCharacters; + } + length += 3; + } + else if (bytes[tempOffset] < 0xF5) + { + if ((bytes[tempOffset + 1] & 0xC0) != 0x80 + || (bytes[tempOffset + 2] & 0xC0) != 0x80 + || (bytes[tempOffset + 3] & 0xC0) != 0x80) + { + //Invalid continuation byte, fall back to length = number of characters + return numCharacters; + } + length += 4; + } + } + return length; } /// diff --git a/plist-cil/ChangeLog b/plist-cil/ChangeLog index 970719b..17447fd 100644 --- a/plist-cil/ChangeLog +++ b/plist-cil/ChangeLog @@ -1,3 +1,17 @@ +2015-02-24 Natalia Portillo + + * PropertyListParser.cs: + * ASCIIPropertyListParser.cs: + Sync BOM skipping code with upstream. + + * BinaryPropertyListParser.cs: + Sync code with upstream. + Added UTF-8 string decoding on binary property lists. + + * plist-cil.csproj: + * Properties/AssemblyInfo.cs: + Bump to 1.14 (upstream r114). + 2015-02-20 Natalia Portillo * UID.cs: diff --git a/plist-cil/Properties/AssemblyInfo.cs b/plist-cil/Properties/AssemblyInfo.cs index e1bb17e..73c333f 100644 --- a/plist-cil/Properties/AssemblyInfo.cs +++ b/plist-cil/Properties/AssemblyInfo.cs @@ -17,7 +17,7 @@ using System.Runtime.CompilerServices; // The form "{Major}.{Minor}.*" will automatically update the build and revision, // and "{Major}.{Minor}.{Build}.*" will update just the revision. -[assembly: AssemblyVersion("1.13.0.0")] +[assembly: AssemblyVersion("1.14.0.0")] // The following attributes are used to specify the signing key for the assembly, // if desired. See the Mono documentation for more information about signing. diff --git a/plist-cil/PropertyListParser.cs b/plist-cil/PropertyListParser.cs index 8d76908..a9e2c8b 100644 --- a/plist-cil/PropertyListParser.cs +++ b/plist-cil/PropertyListParser.cs @@ -76,6 +76,11 @@ namespace Claunia.PropertyList { //Skip any possible whitespace at the beginning of the file int offset = 0; + if (bytes.Length >= 3 && (bytes[0] & 0xFF) == 0xEF && (bytes[1] & 0xFF) == 0xBB && (bytes[2] & 0xFF) == 0xBF) + { + //Skip Unicode byte order mark (BOM) + offset += 3; + } while (offset < bytes.Length && bytes[offset] == ' ' || bytes[offset] == '\t' || bytes[offset] == '\r' || bytes[offset] == '\n' || bytes[offset] == '\f') { offset++; @@ -95,20 +100,21 @@ namespace Claunia.PropertyList //Skip any possible whitespace at the beginning of the file byte[] magicBytes = new byte[8]; int b; + long index = -1; + bool bom = false; long mark; do { mark = fs.Position; b = fs.ReadByte(); + index++; + //Check if we are reading the Unicode byte order mark (BOM) and skip it + bom = index < 3 && ((index == 0 && b == 0xEF) || (bom && ((index == 1 && b == 0xBB) || (index == 2 && b == 0xBF)))); } - while(b != -1 && b == ' ' || b == '\t' || b == '\r' || b == '\n' || b == '\f'); + while(b != -1 && b == ' ' || b == '\t' || b == '\r' || b == '\n' || b == '\f' || bom); magicBytes[0] = (byte)b; int read = fs.Read(magicBytes, 1, 7); - // Check for UTF-8 BOM prefixed XMLs first. - if (magicBytes[0] == 0xEF && magicBytes[1] == 0xBB && magicBytes[2] == 0xBF && magicBytes[3] == (byte)'<') - return TYPE_XML; - int type = DetermineType(Encoding.ASCII.GetString(magicBytes, 0, read)); fs.Seek(mark, SeekOrigin.Begin); //if(fs.markSupported()) diff --git a/plist-cil/plist-cil.csproj b/plist-cil/plist-cil.csproj index 8660a00..b1c539f 100644 --- a/plist-cil/plist-cil.csproj +++ b/plist-cil/plist-cil.csproj @@ -7,7 +7,7 @@ Library Claunia.PropertyList plist-cil - 1.13 + 1.14 true