From 579d8831e15fcff60a821d6fee554b7f26bba96f Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Fri, 12 Jun 2020 14:00:01 -0700 Subject: [PATCH] Huge page support for composite images loaded on Linux (#37673) Add support for loading composite R2R images utilizing huge pages on Linux Support is broken into 3 major portions - Changes to the compiler to add a switch which can compile the composite image with higher than normal alignment - Changes to the runtime to make some slight tweaks to PE file loading on Linux to support these images correctly - Documentation on how to tie these various features together to achieve large page loading on Linux --- .../features/Linux-Hugepage-Crossgen2.md | 102 +++++++++++++++ src/coreclr/src/pal/src/map/map.cpp | 40 +++++- .../CodeGen/ReadyToRunObjectWriter.cs | 17 ++- .../Compiler/ReadyToRunCodegenCompilation.cs | 9 +- .../ReadyToRunCodegenCompilationBuilder.cs | 10 +- .../ObjectWriter/R2RPEBuilder.cs | 117 ++++++++++++++++-- .../crossgen2/crossgen2/CommandLineOptions.cs | 6 + .../src/tools/crossgen2/crossgen2/Program.cs | 18 +++ .../crossgen2/Properties/Resources.resx | 6 + 9 files changed, 302 insertions(+), 23 deletions(-) create mode 100644 docs/design/features/Linux-Hugepage-Crossgen2.md diff --git a/docs/design/features/Linux-Hugepage-Crossgen2.md b/docs/design/features/Linux-Hugepage-Crossgen2.md new file mode 100644 index 00000000000000..28f219b592c0ea --- /dev/null +++ b/docs/design/features/Linux-Hugepage-Crossgen2.md @@ -0,0 +1,102 @@ +Configuring Huge Pages for loading composite binaries using CoreCLR on Linux +---- + +Huge pages can provide performance benefits to reduce the cost of TLB cache misses when +executing code. In general, the largest available wins may be achieved by enabling huge +pages for use by the GC, which will dominate the memory use in the process, but in some +circumstances, if the application is sufficiently large, there may be a benefit to using +huge pages to map in code. + +It is expected that consumers who have these needs have very large applications, and are +able to tolerate somewhat complex solutions. CoreCLR supports loading composite R2R +images using the hugetlbfs. Doing some requires several steps. + +1. The composite image must be created with a switch such as `--custom-pe-section-alignment=2097152`. This will align the PE sections in the R2R file on 2MB virtual address boundaries, and align the sections in the PE file itself on the same boundaries. + - This will increase the size of the image by up to 5 * the specified alignment. Typical increases will be more similar to 3 * the specified alignment +2. The composite image must be copied into a hugetlbfs filesystem which is visible to the .NET process instead of the composite image being loaded from the normal path. + - IMPORTANT: The composite image must NOT be located in the normal path next to the application binary, or that file will be used instead of the huge page version. + - The environment variable `COMPlus_NativeImageSearchPaths` must be set to point at the location of the hugetlbfs in use. For instance, `COMPlus_NativeImageSearchPaths` might be set to `/var/lib/hugetlbfs/user/USER/pagesize-2MB` + - As the cp command does not support copying into a hugetlbfs due to lack of support for the write syscall in that file system, a custom copy application must be used. A sample application that may be used to perform this task has a source listing in Appendix A. +3. The machine must be configured to have sufficient huge pages available in the appropriate huge page pool. The memory requirements of huge page PE loading are as follows. + - Sufficient pages to hold the unmodified copy of the composite image in the hugetlbfs. These pages will be used by the initial copy which emplaces the composite image into huge pages. + - By default the runtime will map each page of the composite image using a MAP_PRIVATE mapping. This will require that the maximum number of huge pages is large enough to hold a completely separate copy of the image as loaded. + - To reduce that cost, launch the application with the PAL_MAP_READONLY_PE_HUGE_PAGE_AS_SHARED environment variable set to 1. This environment variable will change the way that the composite image R2R files are mapped into the process to create the mappings to read only sections as MAP_SHARED mappings. This will reduce the extra huge pages needed to only be the sections marked as RW in the PE file. On a Windows machine use the link tool (`link /dump /header compositeimage.dll` to determine the number of pages needed for the these `.data` section of the PE file.) + - If the PAL_MAP_READONLY_PE_HUGE_PE_AS_SHARED is set, the number of huge pages needed is ` + * ` + +Appendix A - Source for a simple copy into hugetlbfs program. + +``` +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) +{ + if (argc != 3) + { + printf("Incorrect number arguments specified. Arguments are "); + return 1; + } + + void *addrSrc, *addrDest; + int fdSrc, fdDest, ret; + + fdSrc = open(argv[1], O_RDWR); + if (fdSrc < 0) + { + printf("Open src failed\n"); + return 1; + } + + struct stat st; + if (fstat(fdSrc, &st) < 0) + { + printf("fdSrc fstat failed\n"); + return 1; + } + + addrSrc = mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fdSrc, 0); + if (addrSrc == MAP_FAILED) + { + printf("fdSrc mmap failed\n"); + return 1; + } + + fdDest = open(argv[2], O_CREAT | O_RDWR, 0755); + if (fdDest < 0) + { + printf("Open dest failed\n"); + return 1; + } + + if (ftruncate(fdDest, st.st_size) < 0) + { + printf("ftruncate failed\n"); + return 1; + } + + addrDest = mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fdDest, 0); + if (addrDest == MAP_FAILED) + { + printf("fdDest mmap failed\n"); + return 1; + } + + memcpy(addrDest, addrSrc, st.st_size); + + munmap(addrSrc, st.st_size); + munmap(addrDest, st.st_size); + close(fdSrc); + close(fdDest); + return 0; +} +``` \ No newline at end of file diff --git a/src/coreclr/src/pal/src/map/map.cpp b/src/coreclr/src/pal/src/map/map.cpp index 90936b19fd9165..ee919a8067aca5 100644 --- a/src/coreclr/src/pal/src/map/map.cpp +++ b/src/coreclr/src/pal/src/map/map.cpp @@ -2233,6 +2233,10 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) bool forceRelocs = false; char* envVar; #endif + SIZE_T reserveSize = 0; + bool forceOveralign = false; + int readWriteFlags = MAP_FILE|MAP_PRIVATE|MAP_FIXED; + int readOnlyFlags = readWriteFlags; ENTRY("MAPMapPEFile (hFile=%p offset=%zx)\n", hFile, offset); @@ -2357,13 +2361,20 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) // We're going to start adding mappings to the mapping list, so take the critical section InternalEnterCriticalSection(pThread, &mapping_critsec); + reserveSize = virtualSize; + if ((ntHeader.OptionalHeader.SectionAlignment) > GetVirtualPageSize()) + { + reserveSize += ntHeader.OptionalHeader.SectionAlignment; + forceOveralign = true; + } + #ifdef HOST_64BIT // First try to reserve virtual memory using ExecutableAllocator. This allows all PE images to be // near each other and close to the coreclr library which also allows the runtime to generate // more efficient code (by avoiding usage of jump stubs). Alignment to a 64 KB granularity should // not be necessary (alignment to page size should be sufficient), but see // ExecutableMemoryAllocator::AllocateMemory() for the reason why it is done. - loadedBase = ReserveMemoryFromExecutableAllocator(pThread, ALIGN_UP(virtualSize, VIRTUAL_64KB)); + loadedBase = ReserveMemoryFromExecutableAllocator(pThread, ALIGN_UP(reserveSize, VIRTUAL_64KB)); #endif // HOST_64BIT if (loadedBase == NULL) @@ -2384,7 +2395,7 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) mapFlags |= MAP_JIT; } #endif // __APPLE__ - loadedBase = mmap(usedBaseAddr, virtualSize, PROT_NONE, mapFlags, -1, 0); + loadedBase = mmap(usedBaseAddr, reserveSize, PROT_NONE, mapFlags, -1, 0); } if (MAP_FAILED == loadedBase) @@ -2413,15 +2424,28 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) } #endif // _DEBUG + size_t headerSize; + headerSize = GetVirtualPageSize(); // if there are lots of sections, this could be wrong + + if (forceOveralign) + { + loadedBase = ALIGN_UP(loadedBase, ntHeader.OptionalHeader.SectionAlignment); + headerSize = ntHeader.OptionalHeader.SectionAlignment; + char *mapAsShared = EnvironGetenv("PAL_MAP_READONLY_PE_HUGE_PAGE_AS_SHARED"); + + // If PAL_MAP_READONLY_PE_HUGE_PAGE_AS_SHARED is set to 1. map the readonly sections as shared + // which works well with the behavior of the hugetlbfs + if (mapAsShared != NULL && (strcmp(mapAsShared, "1") == 0)) + readOnlyFlags = MAP_FILE|MAP_SHARED|MAP_FIXED; + } + //we have now reserved memory (potentially we got rebased). Walk the PE sections and map each part //separately. - size_t headerSize; - headerSize = GetVirtualPageSize(); // if there are lots of sections, this could be wrong //first, map the PE header to the first page in the image. Get pointers to the section headers palError = MAPmmapAndRecord(pFileObject, loadedBase, - loadedBase, headerSize, PROT_READ, MAP_FILE|MAP_PRIVATE|MAP_FIXED, fd, offset, + loadedBase, headerSize, PROT_READ, readOnlyFlags, fd, offset, (void**)&loadedHeader); if (NO_ERROR != palError) { @@ -2501,18 +2525,22 @@ void * MAPMapPEFile(HANDLE hFile, off_t offset) //Don't discard these sections. We need them to verify PE files //if (currentHeader.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) // continue; + int flags = readOnlyFlags; if (currentHeader.Characteristics & IMAGE_SCN_MEM_EXECUTE) prot |= PROT_EXEC; if (currentHeader.Characteristics & IMAGE_SCN_MEM_READ) prot |= PROT_READ; if (currentHeader.Characteristics & IMAGE_SCN_MEM_WRITE) + { prot |= PROT_WRITE; + flags = readWriteFlags; + } palError = MAPmmapAndRecord(pFileObject, loadedBase, sectionBase, currentHeader.SizeOfRawData, prot, - MAP_FILE|MAP_PRIVATE|MAP_FIXED, + flags, fd, offset + currentHeader.PointerToRawData, §ionData); diff --git a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs index 8238d9418f9ced..e032bda1e1646f 100644 --- a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs +++ b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs @@ -54,6 +54,13 @@ internal class ReadyToRunObjectWriter /// private readonly MapFileBuilder _mapFileBuilder; + /// + /// If non-null, the PE file will be laid out such that it can naturally be mapped with a higher alignment than 4KB + /// This is used to support loading via large pages on Linux + /// + private readonly int? _customPESectionAlignment; + + #if DEBUG private struct NodeInfo { @@ -72,12 +79,13 @@ public NodeInfo(ISymbolNode node, int nodeIndex, int symbolIndex) Dictionary _previouslyWrittenNodeNames = new Dictionary(); #endif - public ReadyToRunObjectWriter(string objectFilePath, EcmaModule componentModule, IEnumerable nodes, NodeFactory factory, bool generateMapFile) + public ReadyToRunObjectWriter(string objectFilePath, EcmaModule componentModule, IEnumerable nodes, NodeFactory factory, bool generateMapFile, int? customPESectionAlignment) { _objectFilePath = objectFilePath; _componentModule = componentModule; _nodes = nodes; _nodeFactory = factory; + _customPESectionAlignment = customPESectionAlignment; if (generateMapFile) { @@ -127,7 +135,8 @@ public void EmitPortableExecutable() headerBuilder, r2rHeaderExportSymbol, Path.GetFileName(_objectFilePath), - getRuntimeFunctionsTable); + getRuntimeFunctionsTable, + _customPESectionAlignment); NativeDebugDirectoryEntryNode nativeDebugDirectoryEntryNode = null; @@ -270,10 +279,10 @@ private void EmitObjectData(R2RPEBuilder r2rPeBuilder, ObjectData data, int node r2rPeBuilder.AddObjectData(data, section, name, mapFileBuilder); } - public static void EmitObject(string objectFilePath, EcmaModule componentModule, IEnumerable nodes, NodeFactory factory, bool generateMapFile) + public static void EmitObject(string objectFilePath, EcmaModule componentModule, IEnumerable nodes, NodeFactory factory, bool generateMapFile, int? customPESectionAlignment) { Console.WriteLine($@"Emitting R2R PE file: {objectFilePath}"); - ReadyToRunObjectWriter objectWriter = new ReadyToRunObjectWriter(objectFilePath, componentModule, nodes, factory, generateMapFile); + ReadyToRunObjectWriter objectWriter = new ReadyToRunObjectWriter(objectFilePath, componentModule, nodes, factory, generateMapFile, customPESectionAlignment); objectWriter.EmitPortableExecutable(); } } diff --git a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs index 9396f872bd3b93..bd192ca1429d6c 100644 --- a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs +++ b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs @@ -233,6 +233,7 @@ public sealed class ReadyToRunCodegenCompilation : Compilation public ReadyToRunSymbolNodeFactory SymbolNodeFactory { get; } public ReadyToRunCompilationModuleGroupBase CompilationModuleGroup { get; } + private readonly int? _customPESectionAlignment; internal ReadyToRunCodegenCompilation( DependencyAnalyzerBase dependencyGraph, @@ -248,7 +249,8 @@ internal ReadyToRunCodegenCompilation( int parallelism, ProfileDataManager profileData, ReadyToRunMethodLayoutAlgorithm methodLayoutAlgorithm, - ReadyToRunFileLayoutAlgorithm fileLayoutAlgorithm) + ReadyToRunFileLayoutAlgorithm fileLayoutAlgorithm, + int? customPESectionAlignment) : base( dependencyGraph, nodeFactory, @@ -262,6 +264,7 @@ internal ReadyToRunCodegenCompilation( _resilient = resilient; _parallelism = parallelism; _generateMapFile = generateMapFile; + _customPESectionAlignment = customPESectionAlignment; SymbolNodeFactory = new ReadyToRunSymbolNodeFactory(nodeFactory); _corInfoImpls = new ConditionalWeakTable(); _inputFiles = inputFiles; @@ -290,7 +293,7 @@ public override void Compile(string outputFile) using (PerfEventSource.StartStopEvents.EmittingEvents()) { NodeFactory.SetMarkingComplete(); - ReadyToRunObjectWriter.EmitObject(outputFile, componentModule: null, nodes, NodeFactory, _generateMapFile); + ReadyToRunObjectWriter.EmitObject(outputFile, componentModule: null, nodes, NodeFactory, _generateMapFile, _customPESectionAlignment); CompilationModuleGroup moduleGroup = _nodeFactory.CompilationModuleGroup; if (moduleGroup.IsCompositeBuildMode) @@ -339,7 +342,7 @@ private void RewriteComponentFile(string inputFile, string outputFile, string ow } componentGraph.ComputeMarkedNodes(); componentFactory.Header.Add(Internal.Runtime.ReadyToRunSectionType.OwnerCompositeExecutable, ownerExecutableNode, ownerExecutableNode); - ReadyToRunObjectWriter.EmitObject(outputFile, componentModule: inputModule, componentGraph.MarkedNodeList, componentFactory, generateMapFile: false); + ReadyToRunObjectWriter.EmitObject(outputFile, componentModule: inputModule, componentGraph.MarkedNodeList, componentFactory, generateMapFile: false, customPESectionAlignment: null); } public override void WriteDependencyLog(string outputFileName) diff --git a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs index d7ac45c9e37935..7cc5d3daa0fa15 100644 --- a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs +++ b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilationBuilder.cs @@ -28,6 +28,7 @@ public sealed class ReadyToRunCodegenCompilationBuilder : CompilationBuilder private ProfileDataManager _profileData; private ReadyToRunMethodLayoutAlgorithm _r2rMethodLayoutAlgorithm; private ReadyToRunFileLayoutAlgorithm _r2rFileLayoutAlgorithm; + private int? _customPESectionAlignment; private string _jitPath; private string _outputFile; @@ -137,6 +138,12 @@ public ReadyToRunCodegenCompilationBuilder GenerateOutputFile(string outputFile) return this; } + public ReadyToRunCodegenCompilationBuilder UseCustomPESectionAlignment(int? customPESectionAlignment) + { + _customPESectionAlignment = customPESectionAlignment; + return this; + } + public override ICompilation ToCompilation() { // TODO: only copy COR headers for single-assembly build and for composite build with embedded MSIL @@ -223,7 +230,8 @@ public override ICompilation ToCompilation() _parallelism, _profileData, _r2rMethodLayoutAlgorithm, - _r2rFileLayoutAlgorithm); + _r2rFileLayoutAlgorithm, + _customPESectionAlignment); } } } diff --git a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs index bb648b9af99253..39624d7868b8a9 100644 --- a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs +++ b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs @@ -123,6 +123,14 @@ public SectionRVADelta(int startRVA, int endRVA, int deltaRVA) /// private int[] _sectionRVAs; + /// + /// Pointers to the location of the raw data. Needed to allow phyical file alignment + /// beyond 4KB. PEBuilder doesn't support this today so that we + /// must store the RVA's and post-process the produced PE by patching the section + /// headers in the PE header. + /// + private int[] _sectionPointerToRawData; + /// /// Maximum of virtual and physical size for each section. /// @@ -148,6 +156,12 @@ public SectionRVADelta(int startRVA, int endRVA, int deltaRVA) /// private bool _written; + /// + /// If non-null, the PE file will be laid out such that it can naturally be mapped with a higher alignment than 4KB + /// This is used to support loading via large pages on Linux + /// + private readonly int? _customPESectionAlignment; + /// /// Constructor initializes the various control structures and combines the section list. /// @@ -159,7 +173,8 @@ public R2RPEBuilder( PEHeaderBuilder peHeaderBuilder, ISymbolNode r2rHeaderExportSymbol, string outputFileSimpleName, - Func getRuntimeFunctionsTable) + Func getRuntimeFunctionsTable, + int? customPESectionAlignment) : base(peHeaderBuilder, deterministicIdProvider: null) { _target = target; @@ -171,6 +186,8 @@ public R2RPEBuilder( _textSectionIndex = _sectionBuilder.AddSection(TextSectionName, SectionCharacteristics.ContainsCode | SectionCharacteristics.MemExecute | SectionCharacteristics.MemRead, 512); _dataSectionIndex = _sectionBuilder.AddSection(DataSectionName, SectionCharacteristics.ContainsInitializedData | SectionCharacteristics.MemWrite | SectionCharacteristics.MemRead, 512); + _customPESectionAlignment = customPESectionAlignment; + if (r2rHeaderExportSymbol != null) { _sectionBuilder.AddSection(R2RPEBuilder.ExportDataSectionName, SectionCharacteristics.ContainsInitializedData | SectionCharacteristics.MemRead, 512); @@ -199,6 +216,7 @@ public R2RPEBuilder( _sections = sectionListBuilder.ToImmutableArray(); _sectionRVAs = new int[_sections.Length]; + _sectionPointerToRawData = new int[_sections.Length]; _sectionRawSizes = new int[_sections.Length]; } @@ -269,6 +287,10 @@ public void Write(Stream outputStream, int timeDateStamp) _sectionBuilder.RelocateOutputFile(outputPeFile, Header.ImageBase, outputStream); UpdateSectionRVAs(outputStream); + + if (_customPESectionAlignment.HasValue) + SetPEHeaderSectionAlignment(outputStream, _customPESectionAlignment.Value); + ApplyMachineOSOverride(outputStream); SetPEHeaderTimeStamp(outputStream, timeDateStamp); @@ -301,7 +323,7 @@ public void AddSections(MapFileBuilder mapFileBuilder) sizeof(short) + // SizeOfOptionalHeader: sizeof(ushort); // Characteristics - const int OffsetOfChecksum = + const int OffsetOfSectionAlign = sizeof(short) + // Magic sizeof(byte) + // MajorLinkerVersion sizeof(byte) + // MinorLinkerVersion @@ -310,8 +332,9 @@ public void AddSections(MapFileBuilder mapFileBuilder) sizeof(int) + // SizeOfUninitializedData sizeof(int) + // AddressOfEntryPoint sizeof(int) + // BaseOfCode - sizeof(long) + // PE32: BaseOfData (int), ImageBase (int) + sizeof(long); // PE32: BaseOfData (int), ImageBase (int) // PE32+: ImageBase (long) + const int OffsetOfChecksum = OffsetOfSectionAlign + sizeof(int) + // SectionAlignment sizeof(int) + // FileAlignment sizeof(short) + // MajorOperatingSystemVersion @@ -327,7 +350,10 @@ public void AddSections(MapFileBuilder mapFileBuilder) const int OffsetOfSizeOfImage = OffsetOfChecksum - 2 * sizeof(int); // SizeOfHeaders, SizeOfImage const int SectionHeaderNameSize = 8; - const int SectionHeaderRVAOffset = SectionHeaderNameSize + sizeof(int); // skip 8 bytes Name + 4 bytes VirtualSize + const int SectionHeaderVirtualSize = SectionHeaderNameSize; // VirtualSize follows + const int SectionHeaderRVAOffset = SectionHeaderVirtualSize + sizeof(int); // RVA Offset follows VirtualSize + 4 bytes VirtualSize + const int SectionHeaderSizeOfRawData = SectionHeaderRVAOffset + sizeof(int); // SizeOfRawData follows RVA + const int SectionHeaderPointerToRawDataOffset = SectionHeaderSizeOfRawData + sizeof(int); // PointerToRawData immediately follows the SizeOfRawData const int SectionHeaderSize = SectionHeaderNameSize + @@ -366,10 +392,39 @@ private void UpdateSectionRVAs(Stream outputStream) int sectionCount = _sectionRVAs.Length; for (int sectionIndex = 0; sectionIndex < sectionCount; sectionIndex++) { - outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderRVAOffset, SeekOrigin.Begin); - byte[] rvaBytes = BitConverter.GetBytes(_sectionRVAs[sectionIndex]); - Debug.Assert(rvaBytes.Length == sizeof(int)); - outputStream.Write(rvaBytes, 0, rvaBytes.Length); + if (_customPESectionAlignment != null) + { + // When _customPESectionAlignment is set, the physical and virtual sizes are the same + byte[] sizeBytes = BitConverter.GetBytes(_sectionRawSizes[sectionIndex]); + Debug.Assert(sizeBytes.Length == sizeof(int)); + + // Update VirtualSize + { + outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderVirtualSize, SeekOrigin.Begin); + outputStream.Write(sizeBytes, 0, sizeBytes.Length); + } + // Update SizeOfRawData + { + outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderSizeOfRawData, SeekOrigin.Begin); + outputStream.Write(sizeBytes, 0, sizeBytes.Length); + } + } + + // Update RVAs + { + outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderRVAOffset, SeekOrigin.Begin); + byte[] rvaBytes = BitConverter.GetBytes(_sectionRVAs[sectionIndex]); + Debug.Assert(rvaBytes.Length == sizeof(int)); + outputStream.Write(rvaBytes, 0, rvaBytes.Length); + } + + // Update pointer to raw data + { + outputStream.Seek(sectionHeaderOffset + SectionHeaderSize * sectionIndex + SectionHeaderPointerToRawDataOffset, SeekOrigin.Begin); + byte[] rawDataBytesBytes = BitConverter.GetBytes(_sectionPointerToRawData[sectionIndex]); + Debug.Assert(rawDataBytesBytes.Length == sizeof(int)); + outputStream.Write(rawDataBytesBytes, 0, rawDataBytesBytes.Length); + } } // Patch SizeOfImage to point past the end of the last section @@ -380,6 +435,19 @@ private void UpdateSectionRVAs(Stream outputStream) outputStream.Write(sizeOfImageBytes, 0, sizeOfImageBytes.Length); } + /// + /// Set PE header section alignment, for alignments not supported by the System.Reflection.Metadata + /// + /// Output stream representing the R2R PE executable + /// Timestamp to set in the R2R PE header + private void SetPEHeaderSectionAlignment(Stream outputStream, int customAlignment) + { + outputStream.Seek(DosHeaderSize + PESignatureSize + COFFHeaderSize + OffsetOfSectionAlign, SeekOrigin.Begin); + byte[] alignBytes = BitConverter.GetBytes(customAlignment); + Debug.Assert(alignBytes.Length == sizeof(int)); + outputStream.Write(alignBytes, 0, alignBytes.Length); + } + /// /// TODO: System.Reflection.Metadata doesn't currently support OS machine overrides. /// We cannot directly pass the xor-ed target machine to PEHeaderBuilder because it @@ -498,6 +566,26 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc outputSectionIndex--; } + int injectedPadding = 0; + if (_customPESectionAlignment.HasValue && _customPESectionAlignment.Value != 0) + { + if (outputSectionIndex > 0) + { + sectionStartRva = Math.Max(sectionStartRva, _sectionRVAs[outputSectionIndex - 1] + _sectionRawSizes[outputSectionIndex - 1]); + } + + int newSectionStartRva = AlignmentHelper.AlignUp(sectionStartRva, _customPESectionAlignment.Value); + int newSectionPointerToRawData = AlignmentHelper.AlignUp(location.PointerToRawData, _customPESectionAlignment.Value); + if (newSectionPointerToRawData > location.PointerToRawData) + { + sectionDataBuilder = new BlobBuilder(); + injectedPadding = newSectionPointerToRawData - location.PointerToRawData; + sectionDataBuilder.WriteBytes(1, injectedPadding); + } + sectionStartRva = newSectionStartRva; + location = new SectionLocation(sectionStartRva, newSectionPointerToRawData); + } + if (!_target.IsWindows) { if (outputSectionIndex > 0) @@ -516,6 +604,7 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc if (outputSectionIndex >= 0) { _sectionRVAs[outputSectionIndex] = sectionStartRva; + _sectionPointerToRawData[outputSectionIndex] = location.PointerToRawData; } BlobBuilder extraData = _sectionBuilder.SerializeSection(name, location); @@ -544,9 +633,19 @@ protected override BlobBuilder SerializeSection(string name, SectionLocation loc sectionDataBuilder.WriteByte(0); } + int sectionRawSize = sectionDataBuilder.Count - injectedPadding; + + if (_customPESectionAlignment.HasValue && _customPESectionAlignment.Value != 0) + { + // Align the end of the section to the padding offset + int count = AlignmentHelper.AlignUp(sectionRawSize, _customPESectionAlignment.Value); + sectionDataBuilder.WriteBytes(0, count - sectionRawSize); + sectionRawSize = count; + } + if (outputSectionIndex >= 0) { - _sectionRawSizes[outputSectionIndex] = sectionDataBuilder.Count; + _sectionRawSizes[outputSectionIndex] = sectionRawSize; } return sectionDataBuilder; diff --git a/src/coreclr/src/tools/crossgen2/crossgen2/CommandLineOptions.cs b/src/coreclr/src/tools/crossgen2/crossgen2/CommandLineOptions.cs index 1e4ae01dbb8429..721ea2e5071265 100644 --- a/src/coreclr/src/tools/crossgen2/crossgen2/CommandLineOptions.cs +++ b/src/coreclr/src/tools/crossgen2/crossgen2/CommandLineOptions.cs @@ -40,6 +40,8 @@ public class CommandLineOptions public int Parallelism { get; set; } public ReadyToRunMethodLayoutAlgorithm MethodLayout { get; set; } public ReadyToRunFileLayoutAlgorithm FileLayout { get; set; } + public int? CustomPESectionAlignment { get; set; } + public string SingleMethodTypeName { get; set; } public string SingleMethodName { get; set; } public string[] SingleMethodGenericArgs { get; set; } @@ -174,6 +176,10 @@ public static Command RootCommand() { Argument = new Argument(() => Environment.ProcessorCount) }, + new Option(new[] { "--custom-pe-section-alignment" }, SR.CustomPESectionAlignmentOption) + { + Argument = new Argument() + }, new Option(new[] { "--map" }, SR.MapFileOption) { Argument = new Argument() diff --git a/src/coreclr/src/tools/crossgen2/crossgen2/Program.cs b/src/coreclr/src/tools/crossgen2/crossgen2/Program.cs index 8cf3957ba906a0..e54d804477d73c 100644 --- a/src/coreclr/src/tools/crossgen2/crossgen2/Program.cs +++ b/src/coreclr/src/tools/crossgen2/crossgen2/Program.cs @@ -119,6 +119,23 @@ private int Run() if (_commandLineOptions.OutputFilePath == null) throw new CommandLineException(SR.MissingOutputFile); + if (_commandLineOptions.CustomPESectionAlignment != null) + { + int alignment = _commandLineOptions.CustomPESectionAlignment.Value; + bool invalidArgument = false; + if (alignment <= 4096) + { + invalidArgument = true; + } + if ((alignment & (alignment - 1)) != 0) + { + invalidArgument = true; // Alignment not power of two + } + + if (invalidArgument) + throw new CommandLineException(SR.InvalidCustomPESectionAlignment); + } + // // Set target Architecture and OS // @@ -478,6 +495,7 @@ private int Run() .FileLayoutAlgorithms(_commandLineOptions.MethodLayout, _commandLineOptions.FileLayout) .UseJitPath(_commandLineOptions.JitPath) .UseInstructionSetSupport(instructionSetSupport) + .UseCustomPESectionAlignment(_commandLineOptions.CustomPESectionAlignment) .GenerateOutputFile(_commandLineOptions.OutputFilePath.FullName) .UseILProvider(ilProvider) .UseBackendOptions(_commandLineOptions.CodegenOptions) diff --git a/src/coreclr/src/tools/crossgen2/crossgen2/Properties/Resources.resx b/src/coreclr/src/tools/crossgen2/crossgen2/Properties/Resources.resx index bfeec047701e4d..e9504c5e5e60cb 100644 --- a/src/coreclr/src/tools/crossgen2/crossgen2/Properties/Resources.resx +++ b/src/coreclr/src/tools/crossgen2/crossgen2/Properties/Resources.resx @@ -258,6 +258,12 @@ Warning: overriding -Ot with -Os + + Use custom alignment for PE sections in generated PE file + + + Custom PE Section Alignment must be a power of two greater than 4096. + Error: multiple input files are only supported in composite build mode: {0}