Feature: Add scripting interface

2026-02-07 06:44:34 +00:00 · 2024-06-24 17:16:13 +10:00
539 changed files with 77913 additions and 49124 deletions
--- a/.github/workflows/rolling-release.yml
+++ b/.github/workflows/rolling-release.yml
@@ -34,37 +34,35 @@ jobs:
        path: |
          dep/msvc/deps-arm64
          dep/msvc/deps-x64
-        key: deps ${{ hashFiles('scripts/deps/build-dependencies-windows-arm64.bat', 'scripts/deps/build-dependencies-windows-x64.bat') }}
+        key: deps ${{ hashFiles('scripts/build-dependencies-windows-arm64.bat', 'scripts/build-dependencies-windows-x64.bat') }}

    - name: Build X64 Dependencies
      if: steps.cache-deps.outputs.cache-hit != 'true'
      env:
        DEBUG: 0
-      run: scripts/deps/build-dependencies-windows-x64.bat
+      run: scripts/build-dependencies-windows-x64.bat

    - name: Build ARM64 Dependencies
      if: steps.cache-deps.outputs.cache-hit != 'true'
      env:
        DEBUG: 0
-      run: scripts/deps/build-dependencies-windows-arm64.bat
-
-    - name: Initialize build tag
-      shell: cmd
-      run: |
-        echo #pragma once > src/scmversion/tag.h
+      run: scripts/build-dependencies-windows-arm64.bat

    - name: Tag as preview build
      if: github.ref == 'refs/heads/master'
      shell: cmd
      run: |
+        echo #pragma once > src/scmversion/tag.h
        echo #define SCM_RELEASE_ASSET "duckstation-windows-x64-release.zip" >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAG "preview" >> src/scmversion/tag.h

+
    - name: Tag as dev build
      if: github.ref == 'refs/heads/dev'
      shell: cmd
      run: |
+        echo #pragma once > src/scmversion/tag.h
        echo #define SCM_RELEASE_ASSET "duckstation-windows-x64-release.zip" >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAG "latest" >> src/scmversion/tag.h
@@ -122,29 +120,25 @@ jobs:
        path: |
          dep/msvc/deps-arm64
          dep/msvc/deps-x64
-        key: deps ${{ hashFiles('scripts/deps/build-dependencies-windows-arm64.bat', 'scripts/deps/build-dependencies-windows-x64.bat') }}
+        key: deps ${{ hashFiles('scripts/build-dependencies-windows-arm64.bat', 'scripts/build-dependencies-windows-x64.bat') }}

    - name: Build X64 Dependencies
      if: steps.cache-deps.outputs.cache-hit != 'true'
      env:
        DEBUG: 0
-      run: scripts/deps/build-dependencies-windows-x64.bat
+      run: scripts/build-dependencies-windows-x64.bat

    - name: Build ARM64 Dependencies
      if: steps.cache-deps.outputs.cache-hit != 'true'
      env:
        DEBUG: 0
-      run: scripts/deps/build-dependencies-windows-arm64.bat
-
-    - name: Initialize build tag
-      shell: cmd
-      run: |
-        echo #pragma once > src/scmversion/tag.h
+      run: scripts/build-dependencies-windows-arm64.bat

    - name: Tag as preview build
      if: github.ref == 'refs/heads/master'
      shell: cmd
      run: |
+        echo #pragma once > src/scmversion/tag.h
        echo #define SCM_RELEASE_ASSET "duckstation-windows-arm64-release.zip" >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAG "preview" >> src/scmversion/tag.h
@@ -153,6 +147,7 @@ jobs:
      if: github.ref == 'refs/heads/dev'
      shell: cmd
      run: |
+        echo #pragma once > src/scmversion/tag.h
        echo #define SCM_RELEASE_ASSET "duckstation-windows-arm64-release.zip" >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
        echo #define SCM_RELEASE_TAG "latest" >> src/scmversion/tag.h
@@ -198,6 +193,14 @@ jobs:
    runs-on: ubuntu-22.04
    timeout-minutes: 120
    steps:
+    # Work around https://github.com/actions/runner-images/issues/8659
+    - name: Remove GCC 13 from runner image
+      shell: bash
+      run: |
+        sudo rm -f /etc/apt/sources.list.d/ubuntu-toolchain-r-ubuntu-test-jammy.list
+        sudo apt-get update
+        sudo apt-get install -y --allow-downgrades 'libc6=2.35-0ubuntu*' 'libc6-dev=2.35-0ubuntu*' libstdc++6=12.3.0-1ubuntu1~22.04 libgcc-s1=12.3.0-1ubuntu1~22.04
+
    - uses: actions/checkout@v4.1.6
      with:
        fetch-depth: 0
@@ -223,19 +226,16 @@ jobs:
      uses: actions/cache@v4.0.2
      with:
        path: ~/deps
-        key: deps ${{ hashFiles('scripts/deps/build-dependencies-linux.sh') }}
+        key: deps ${{ hashFiles('scripts/build-dependencies-linux.sh') }}

    - name: Build Dependencies
      if: steps.cache-deps.outputs.cache-hit != 'true'
-      run: scripts/deps/build-dependencies-linux.sh "$HOME/deps"
-
-    - name: Initialize build tag
-      run: |
-        echo '#pragma once' > src/scmversion/tag.h
+      run: scripts/build-dependencies-linux.sh "$HOME/deps"

    - name: Tag as preview build
      if: github.ref == 'refs/heads/master'
      run: |
+        echo '#pragma once' > src/scmversion/tag.h
        echo '#define SCM_RELEASE_ASSET "DuckStation-x64.AppImage"' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAG "preview"' >> src/scmversion/tag.h
@@ -243,6 +243,7 @@ jobs:
    - name: Tag as dev build
      if: github.ref == 'refs/heads/dev'
      run: |
+        echo '#pragma once' > src/scmversion/tag.h
        echo '#define SCM_RELEASE_ASSET "DuckStation-x64.AppImage"' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAG "latest"' >> src/scmversion/tag.h
@@ -282,10 +283,6 @@ jobs:
      shell: bash
      run: git config --global --add safe.directory "*"

-    - name: Initialize build tag
-      run: |
-        echo '#pragma once' > src/scmversion/tag.h
-
    - name: Generate AppStream XML
      run: |
        scripts/generate-metainfo.sh scripts/flatpak
@@ -314,7 +311,7 @@ jobs:

    - name: Push to Flathub stable
      if: github.ref == 'refs/heads/dev'
-      uses: flathub-infra/flatpak-github-actions/flat-manager@b6c92176b7f578aedd80cac74cd8f0336f618e89
+      uses: flathub-infra/flatpak-github-actions/flat-manager@23796715b3dfa4c86ddf50cf29c3cc8b3c82dca8
      with:
        flat-manager-url: https://hub.flathub.org/
        repository: stable
@@ -353,19 +350,16 @@ jobs:
      uses: actions/cache@v4.0.2
      with:
        path: ~/deps
-        key: deps-mac ${{ hashFiles('scripts/deps/build-dependencies-mac.sh') }}
+        key: deps-mac ${{ hashFiles('scripts/build-dependencies-mac.sh') }}

    - name: Build Dependencies
      if: steps.cache-deps-mac.outputs.cache-hit != 'true'
-      run: scripts/deps/build-dependencies-mac.sh "$HOME/deps"
-
-    - name: Initialize build tag
-      run: |
-        echo '#pragma once' > src/scmversion/tag.h
+      run: scripts/build-dependencies-mac.sh "$HOME/deps"

    - name: Tag as preview build
      if: github.ref == 'refs/heads/master'
      run: |
+        echo '#pragma once' > src/scmversion/tag.h
        echo '#define SCM_RELEASE_ASSET "duckstation-mac-release.zip"' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAG "preview"' >> src/scmversion/tag.h
@@ -373,6 +367,7 @@ jobs:
    - name: Tag as dev build
      if: github.ref == 'refs/heads/dev'
      run: |
+        echo '#pragma once' > src/scmversion/tag.h
        echo '#define SCM_RELEASE_ASSET "duckstation-mac-release.zip"' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
        echo '#define SCM_RELEASE_TAG "latest"' >> src/scmversion/tag.h
--- a/.github/workflows/upload-caches.yml
+++ b/.github/workflows/upload-caches.yml
@@ -1,36 +0,0 @@
-name: Upload Caches
-
-on:
-  workflow_dispatch:
-
-jobs:
-  upload-windows-cache:
-    runs-on: windows-2022
-    timeout-minutes: 120
-    steps:
-    - uses: actions/checkout@v4.1.6
-      with:
-        fetch-depth: 0
-
-    - name: Cache Dependencies
-      id: cache-deps
-      uses: actions/cache@v4.0.2
-      with:
-        path: |
-          dep/msvc/deps-arm64
-          dep/msvc/deps-x64
-        key: deps ${{ hashFiles('scripts/deps/build-dependencies-windows-arm64.bat', 'scripts/deps/build-dependencies-windows-x64.bat') }}
-
-    - name: Zip Cache Files
-      if: steps.cache-deps.outputs.cache-hit == 'true'
-      shell: cmd
-      run: |
-        "C:\Program Files\7-Zip\7z.exe" a -r deps-x64.zip ./dep/msvc/deps-x64
-        "C:\Program Files\7-Zip\7z.exe" a -r deps-arm64.zip ./dep/msvc/deps-arm64
-
-    - name: Upload Cache Files
-      if: steps.cache-deps.outputs.cache-hit == 'true'
-      uses: actions/upload-artifact@v4.3.3
-      with:
-        name: "windows"
-        path: "deps-*.zip"
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,6 @@

 # dependency build temp files
 deps-build/
-/deps/

 # vs stuff
 .vs
--- a/CMakeModules/DuckStationDependencies.cmake
+++ b/CMakeModules/DuckStationDependencies.cmake
@@ -9,16 +9,13 @@ endif()
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)

-find_package(SDL2 2.30.6 REQUIRED)
+find_package(SDL2 2.30.4 REQUIRED)
 find_package(Zstd 1.5.6 REQUIRED)
 find_package(WebP REQUIRED) # v1.4.0, spews an error on Linux because no pkg-config.
 find_package(ZLIB REQUIRED) # 1.3, but Mac currently doesn't use it.
 find_package(PNG 1.6.40 REQUIRED)
 find_package(JPEG REQUIRED) # No version because flatpak uses libjpeg-turbo.
 find_package(Freetype 2.11.1 REQUIRED)
-find_package(cpuinfo REQUIRED)
-find_package(DiscordRPC 3.4.0 REQUIRED)
-find_package(SoundTouch 2.3.3 REQUIRED)

 if(NOT WIN32)
  find_package(CURL REQUIRED)
--- a/CMakeModules/DuckStationUtils.cmake
+++ b/CMakeModules/DuckStationUtils.cmake
@@ -57,8 +57,6 @@ function(detect_architecture)
    if("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES)
      message(STATUS "Building x86_64 MacOS binaries.")
      set(CPU_ARCH_X64 TRUE PARENT_SCOPE)
-      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xarch_x86_64 -msse4.1" PARENT_SCOPE)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xarch_x86_64 -msse4.1" PARENT_SCOPE)
    endif()
    if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
      message(STATUS "Building ARM64 MacOS binaries.")
@@ -69,10 +67,6 @@ function(detect_architecture)
         CMAKE_SIZEOF_VOID_P EQUAL 8)
    message(STATUS "Building x86_64 binaries.")
    set(CPU_ARCH_X64 TRUE PARENT_SCOPE)
-    if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1" PARENT_SCOPE)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1" PARENT_SCOPE)
-    endif()
  elseif(("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") AND
         CMAKE_SIZEOF_VOID_P EQUAL 8) # Might have an A64 kernel, e.g. Raspbian.
    message(STATUS "Building ARM64 binaries.")
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -16,8 +16,7 @@ The following people have contributed to the project in some way, and are credit
 - posix - @Richard-L, blexx - German
 - @phoe-nix, @zkdpower - Chinese (Simplified)
 - Sorer - @MojoJojoDojo - Hebrew
- Hipnosis - @Hipnosis183, MrHomunculus, @falsepopsky - Spanish, Spanish (Latin America)
- @IlDucci - Spanish (Spain)
+- Hipnosis - @Hipnosis183, MrHomunculus, @falsepopsky - Spanish
 - @RaydenX93 - Italian
 - @r57zone - Russian
 - @6lackmag3 - Russian (Android)
--- a/README.md
+++ b/README.md
@@ -50,15 +50,13 @@ Other features include:
 - Automatic loading/applying of PPF patches.

 ## System Requirements
- - A CPU faster than a potato. But it needs to be x86_64 (SSE4.1), AArch32/armv7, AArch64/ARMv8, or RISC-V/RV64.
+ - A CPU faster than a potato. But it needs to be x86_64, AArch32/armv7, AArch64/ARMv8, or RISC-V/RV64.
 - For the hardware renderers, a GPU capable of OpenGL 3.1/OpenGL ES 3.1/Direct3D 11 Feature Level 10.0 (or Vulkan 1.0) and above. So, basically anything made in the last 10 years or so.
 - SDL, XInput or DInput compatible game controller (e.g. XB360/XBOne/XBSeries). DualShock 3 users on Windows will need to install the official DualShock 3 drivers included as part of PlayStation Now.

 ## Downloading and running
 Binaries of DuckStation for Windows x64/ARM64, Linux x86_64 (in AppImage/Flatpak formats), and macOS Universal Binaries are available via GitHub Releases and are automatically built with every commit/push. Binaries or packages distributed through other sources may be out of date and are not supported by the developer, please speak to them for support, not us.

-For x86 machines (most systems), you will need a CPU that supports the SSE4.1 instruction set. This includes all CPUs manufactured after 2007. If you want to use DuckStation with a CPU that is older, [v0.1-6995](https://github.com/stenzek/duckstation/releases/tag/v0.1-6995) is the last version that does not require SSE4.1.
-
 ### Windows

 DuckStation **requires** Windows 10/11, specifically version 1809 or newer. If you are still using Windows 7/8/8.1, DuckStation **will not run** on your operating system. Running these operating systems in 2023 should be considered a security risk, and I would recommend updating to something which receives vendor support.
@@ -166,7 +164,7 @@ alsa-lib-devel brotli-devel clang cmake dbus-devel egl-wayland-devel extra-cmake
 #### Building

 1. Clone the repository: `git clone https://github.com/stenzek/duckstation.git`, `cd duckstation`.
-2. Build dependencies. You can save these outside of the tree if you like. This will take a while. `scripts/deps/build-dependencies-linux.sh deps`.
+2. Build dependencies. You can save these outside of the tree if you like. This will take a while. `scripts/build-dependencies-linux.sh deps`.
 3. Run CMake to configure the build system. Assuming a build subdirectory of `build-release`, run `cmake -B build-release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" -DCMAKE_PREFIX_PATH="$PWD/deps" -G Ninja`. If you want a release (optimized) build, include `-DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON`.
 4. Compile the source code. For the example above, run `ninja -C build-release`
 5. Run the binary, located in the build directory under `./build-release/bin/duckstation-qt`.
@@ -179,7 +177,7 @@ Requirements:


 1. Clone the repository: `git clone https://github.com/stenzek/duckstation.git`.
-2. Build the dependencies. This will take a while. `scripts/deps/build-dependencies-mac.sh deps`.
+2. Build the dependencies. This will take a while. `scripts/build-dependencies-mac.sh deps`.
 2. Run CMake to configure the build system: `cmake -Bbuild-release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DCMAKE_PREFIX_PATH="$PWD/deps"`. 
 4. Compile the source code: `cmake --build build-release --parallel`.
 5. Run the binary, located in the build directory under `bin/DuckStation.app`.
--- a/data/resources/gamecontrollerdb.txt
+++ b/data/resources/gamecontrollerdb.txt
@@ -3,7 +3,6 @@

 # Windows
 03000000300f00000a01000000000000,3 In 1 Conversion Box,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b8,x:b3,y:b0,platform:Windows,
-03000000fa190000918d000000000000,3 In 1 Conversion Box,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b8,x:b3,y:b0,platform:Windows,
 03000000fa2d00000100000000000000,3dRudder Foot Motion Controller,leftx:a0,lefty:a1,rightx:a5,righty:a2,platform:Windows,
 03000000d0160000040d000000000000,4Play Adapter,a:b1,b:b3,back:b4,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,leftshoulder:b6,leftstick:b14,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b15,righttrigger:b9,rightx:a3,righty:a4,start:b5,x:b0,y:b2,platform:Windows,
 03000000d0160000050d000000000000,4Play Adapter,a:b1,b:b3,back:b4,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,leftshoulder:b6,leftstick:b14,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b15,righttrigger:b9,rightx:a3,righty:a4,start:b5,x:b0,y:b2,platform:Windows,
@@ -485,7 +484,7 @@
 03000000f0250000c183000000000000,PlayStation Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000d9040000160f000000000000,PlayStation Controller Adapter,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b9,x:b3,y:b0,platform:Windows,
 030000004c0500003713000000000000,PlayStation Vita,a:b1,b:b2,back:b8,dpdown:b13,dpleft:b15,dpright:b14,dpup:b12,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a3,righty:a4,start:b9,x:b0,y:b3,platform:Windows,
-03000000d620000011a7000000000000,PowerA Core Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
+03000000d620000011a7000000000000,PowerA Core Plus GameCube Controller,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
 03000000dd62000015a7000000000000,PowerA Fusion Nintendo Switch Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000d620000012a7000000000000,PowerA Fusion Nintendo Switch Fight Pad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000dd62000016a7000000000000,PowerA Fusion Pro Nintendo Switch Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
@@ -874,7 +873,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000050b00000045000031000000,ASUS Gamepad,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
 03000000050b00000579000000010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b12,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b14,leftshoulder:b6,leftstick:b15,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b42,paddle1:b9,paddle2:b11,rightshoulder:b7,rightstick:b16,righttrigger:a4,rightx:a2,righty:a3,start:b13,x:b3,y:b4,platform:Mac OS X,
 03000000050b00000679000000010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b12,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b14,leftshoulder:b6,leftstick:b15,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b23,rightshoulder:b7,rightstick:b16,righttrigger:a4,rightx:a2,righty:a3,start:b13,x:b3,y:b4,platform:Mac OS X,
-03000000503200000110000045010000,Atari VCS Classic,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:Mac OS X,
+03000000503200000110000045010000,Atari VCS Classic,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:MacOSX
 03000000503200000110000047010000,Atari VCS Classic Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:Mac OS X,
 03000000503200000210000047010000,Atari VCS Modern Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a4,rightx:a2,righty:a3,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000008a3500000102000000010000,Backbone One,a:b0,b:b1,back:b16,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b17,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b15,x:b2,y:b3,platform:Mac OS X,
@@ -1082,7 +1081,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000005e040000d102000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000dd02000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000e002000000000000,Xbox One Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Mac OS X,
-030000005e040000e002000003090000,Xbox One Controller,a:b0,b:b1,x:b2,y:b3,back:b6,guide:b10,start:b7,leftstick:b8,rightstick:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,platform:Mac OS X,
+030000005e040000e002000003090000,Xbox One Controller,a:b0,b:b1,back:b16,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000e302000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000ea02000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000fd02000003090000,Xbox One Controller,a:b0,b:b1,back:b16,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
@@ -1092,7 +1091,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000005e040000130b000009050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000013050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000015050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
-030000005e040000130b000007050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000220b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000172700004431000029010000,XiaoMi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a6,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Mac OS X,
@@ -1391,7 +1389,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 050000005e040000050b000003090000,Microsoft Xbox One Elite 2,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a6,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 050000005e0400008e02000030110000,Microsoft Xbox One Elite 2,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,paddle1:b11,paddle2:b13,paddle3:b12,paddle4:b14,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000005e040000120b00000b050000,Microsoft Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
-060000005e040000120b000001050000,Microsoft Xbox Series X Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
+060000005e040000120b000001050000,Microsoft Xbox Series X Controller,a:b0,b:b1,x:b2,y:b3,back:b6,start:b7,guide:b8,leftshoulder:b4,rightshoulder:b5,leftstick:b9,rightstick:b10,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,platform:Linux,
 03000000030000000300000002000000,Miroof,a:b1,b:b0,back:b6,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,start:b7,x:b3,y:b2,platform:Linux,
 03000000790000001c18000010010000,Mobapad Chitu HD,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 050000004d4f435554452d3035335800,Mocute 053X,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
@@ -1469,7 +1467,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000004c0500003713000011010000,PlayStation Vita,a:b1,b:b2,back:b8,dpdown:b13,dpleft:b15,dpright:b14,dpup:b12,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a3,righty:a4,start:b9,x:b0,y:b3,platform:Linux,
 03000000c62400000053000000010000,PowerA,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000c62400003a54000001010000,PowerA 1428124-01,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
-03000000d620000011a7000011010000,PowerA Core Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
+03000000d620000011a7000011010000,PowerA Core Plus Gamecube Controller,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 03000000dd62000015a7000011010000,PowerA Fusion Nintendo Switch Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000d620000012a7000011010000,PowerA Fusion Nintendo Switch Fight Pad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000d62000000140000001010000,PowerA Fusion Pro 2 Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1525,8 +1523,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000300f00001211000011010000,Qanba Arcade Joystick,a:b2,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b5,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b6,start:b9,x:b1,y:b3,platform:Linux,
 03000000222c00000225000011010000,Qanba Dragon Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000222c00000025000011010000,Qanba Dragon Arcade Joystick (PS4),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
-03000000222c00001220000011010000,Qanba Drone 2 Arcade Joystick (PS4),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
-03000000222c00001020000011010000,Qanba Drone 2 Arcade Joystick (PS5),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 03000000222c00000020000011010000,Qanba Drone Arcade PS4 Joystick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,rightshoulder:b5,righttrigger:a4,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
 03000000300f00001210000010010000,Qanba Joystick Plus,a:b0,b:b1,back:b8,leftshoulder:b5,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b4,righttrigger:b6,start:b9,x:b2,y:b3,platform:Linux,
 03000000222c00000223000011010000,Qanba Obsidian Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
@@ -1693,7 +1689,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 050000005e040000220b000013050000,Xbox One Elite 2 Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 050000005e040000050b000002090000,Xbox One Elite Series 2,a:b0,b:b1,back:b136,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a6,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 030000005e040000ea02000011050000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
-050082795e040000e002000003090000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 060000005e040000ea0200000b050000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 060000005e040000ea0200000d050000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000005e040000120b000001050000,Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
--- a/data/resources/gamedb.yaml
+++ b/data/resources/gamedb.yaml
@@ -956,10 +956,6 @@ SLES-02089:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForceSoftwareRendererForReadbacks # 250+ readbacks per frame when loading...
-  settings:
-    displayLineStartOffset: 2 # Game doesn't fill the whole framebuffer, stops flicker.
  metadata:
    publisher: "Cryo Interactive"
    developer: "Smart Dog"
@@ -3181,8 +3177,7 @@ SCPS-10126:
    - AnalogController
    - DigitalController
  traits:
-    - ForceAccurateBlending # Requires 16-bit blend precision
-    - DisableTrueColor # to fix screen flicker.
+    - ForceSoftwareRenderer
  metadata:
    publisher: "Sony"
    developer: "Sony"
@@ -3204,9 +3199,6 @@ SLES-04108:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForceAccurateBlending # Requires 16-bit blend precision
-    - DisableTrueColor # to fix screen flicker.
  metadata:
    publisher: "Vivendi Universal Games, Inc"
    developer: "Coktel Vision / Neko Entertaiment"
@@ -3856,8 +3848,6 @@ SLPS-00269:
  name: "Air Management '96 (Japan)"
  controllers:
    - DigitalController
-  traits:
-    - ForceRecompilerICache # Prevents crashes.
  metadata:
    publisher: "Koei"
    developer: "Koei"
@@ -15420,8 +15410,6 @@ SLPS-01222:
      - SLPS-01223
  controllers:
    - DigitalController
-  traits:
-    - DisableWidescreen
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -15445,8 +15433,6 @@ SLPS-01223:
      - SLPS-01223
  controllers:
    - DigitalController
-  traits:
-    - DisableWidescreen
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -15463,8 +15449,6 @@ SLPS-01223:
    linkCable: false
 SLPS-00999:
  name: "Biohazard 2 (Japan) (Trial Edition)"
-  traits:
-    - DisableWidescreen
 SLPS-01510:
  name: "Biohazard 2 - Dual Shock Ver. (Japan) (Disc 1) (Leon-hen)"
  discSet:
@@ -15475,8 +15459,6 @@ SLPS-01510:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - DisableWidescreen
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -15501,8 +15483,6 @@ SLPS-01511:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - DisableWidescreen
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -15522,9 +15502,6 @@ SLPS-02300:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -15541,17 +15518,11 @@ SLPS-02300:
    linkCable: false
 SLPM-80485:
  name: "Biohazard 3 - Last Escape (Japan) (Demo)"
-  traits:
-    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
 SLPM-87224:
  name: "Biohazard 3 - Last Escape (Japan) (Rev 1)"
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -19005,8 +18976,6 @@ SLES-01304:
  name: "Breath of Fire III (Europe)"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
  metadata:
    publisher: "Infogrames"
    developer: "Capcom"
@@ -19025,8 +18994,6 @@ SLES-01319:
  name: "Breath of Fire III (France)"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
  metadata:
    publisher: "Infogrames"
    developer: "Capcom"
@@ -19045,8 +19012,6 @@ SLES-01320:
  name: "Breath of Fire III (Germany)"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
  metadata:
    publisher: "Infogrames"
    developer: "Capcom"
@@ -19065,8 +19030,6 @@ SLPS-00990:
  name: "Breath of Fire III (Japan)"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
  metadata:
    publisher: "Capcom"
    developer: "Capcom"
@@ -19083,8 +19046,6 @@ SLPS-00990:
    linkCable: false
 SLPM-80115:
  name: "Breath of Fire III (Japan) (Demo)"
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
 SLUS-00422:
  name: "Breath of Fire III (USA)"
  compatibility:
@@ -19092,8 +19053,6 @@ SLUS-00422:
    versionTested: "0.1-1072-g840a806"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
  metadata:
    publisher: "Capcom"
    developer: "Capcom"
@@ -19112,8 +19071,6 @@ SLPM-86720:
  name: "Breath of Fire III [PlayStation the Best] (aka Breath of Fire 3 [PlayStation the Best])"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes jittery sprites.
  metadata:
    publisher: "Capcom"
    developer: "Capcom"
@@ -22797,8 +22754,7 @@ SLES-01182:
    - DigitalController
    - NeGcon
  settings:
-    dmaMaxSliceTicks: 500 # Needs smaller sizes to avoid menu corruption.
-    dmaHaltTicks: 250
+    dmaMaxSliceTicks: 100
  metadata:
    publisher: "THQ"
    developer: "Interactive Entertainment"
@@ -22822,8 +22778,7 @@ SLUS-00882:
    rating: NoIssues
    comments: "Intro logos require the software renderer to display correctly."
  settings:
-    dmaMaxSliceTicks: 500 # Needs smaller sizes to avoid menu corruption.
-    dmaHaltTicks: 250
+    dmaMaxSliceTicks: 100
  controllers:
    - AnalogController
    - DigitalController
@@ -30723,9 +30678,6 @@ SCPS-10003:
  name: "Crime Crackers (Japan)"
  controllers:
    - DigitalController
-  settings:
-    dmaMaxSliceTicks: 100 # Stops DMA from blazing past the deferred CDROM async interrupt.
-    displayActiveEndOffset: -1 # Fixes garbage on edge of screen in cutscenes.
  codes:
    - HASH-111C340E270B10A8
  metadata:
@@ -43231,7 +43183,6 @@ SLES-00132:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -43256,7 +43207,6 @@ SLPS-00308:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -43282,7 +43232,6 @@ SLUS-00077:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -46298,10 +46247,9 @@ SLES-00703:
    - AnalogController
    - DigitalController
  traits:
-    - DisablePGXP # 2.5D, not beneficial, breaks rendering.
-    - DisableWidescreen # No effect.
+    - DisablePGXP # 2.5D, not beneficial.
  settings:
-    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
+    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
    publisher: "GT Interactive"
    developer: "3D Realms Entertainment"
@@ -46322,10 +46270,9 @@ SLES-00987:
    - AnalogController
    - DigitalController
  traits:
-    - DisablePGXP # 2.5D, not beneficial, breaks rendering.
-    - DisableWidescreen # No effect.
+    - DisablePGXP # 2.5D, not beneficial.
  settings:
-    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
+    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
    publisher: "GT Interactive"
    developer: "3D Realms Entertainment"
@@ -46343,17 +46290,14 @@ SLES-00987:
 SLED-01027:
  name: "Duke Nukem (France) (Demo)"
  traits:
-    - DisablePGXP # 2.5D, not beneficial, breaks rendering.
-    - DisableWidescreen # No effect.
+    - DisablePGXP # 2.5D, not beneficial.
  settings:
-    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
+    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
 SLES-03405:
  name: "Duke Nukem - Land of the Babes (Europe) (En,Fr,De,Es,It)"
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46377,8 +46321,6 @@ SLES-03440:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46401,8 +46343,6 @@ SLUS-01002:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46422,8 +46362,6 @@ SLES-01515:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46443,8 +46381,6 @@ SLES-03517:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46464,8 +46400,6 @@ SLES-01619:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46485,8 +46419,6 @@ SLES-03518:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46509,8 +46441,6 @@ SLUS-00583:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
  metadata:
    publisher: "GT Interactive Software Corp"
    developer: "N-Space"
@@ -46527,22 +46457,17 @@ SLUS-00583:
    linkCable: false
 SLUS-80583:
  name: "Duke Nukem - Time to Kill (USA) (Demo 1)"
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
 SLUS-90036:
  name: "Duke Nukem - Time to Kill (USA) (Demo 2)"
-  traits:
-    - ForcePGXPCPUMode # Improves wall texture wobble.
 SLPS-01557:
  name: "Duke Nukem - Total Meltdown (Japan)"
  controllers:
    - AnalogController
    - DigitalController
  traits:
-    - DisablePGXP # 2.5D, not beneficial, breaks rendering.
-    - DisableWidescreen # No effect.
+    - DisablePGXP # 2.5D, not beneficial.
  settings:
-    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
+    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
    publisher: "King Record Co. Ltd"
    developer: "3D Realms Entertainment"
@@ -46566,10 +46491,9 @@ SLUS-00355:
    - AnalogController
    - DigitalController
  traits:
-    - DisablePGXP # 2.5D, not beneficial, breaks rendering.
-    - DisableWidescreen # No effect.
+    - DisablePGXP # 2.5D, not beneficial.
  settings:
-    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
+    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
    publisher: "GT Interactive"
    developer: "3D Realms Entertainment"
@@ -51042,9 +50966,6 @@ SLES-00501:
  name: "Extreme Snow Break (Europe)"
  controllers:
    - DigitalController
-  settings:
-    dmaMaxSliceTicks: 10 # Very sensitive to DMA timing, otherwise polygon flicker.
-    dmaHaltTicks: 100 # CPU needs to run significantly faster than DMA.
  metadata:
    publisher: "Microids"
    developer: "Virtual Studio"
@@ -51061,9 +50982,6 @@ SLES-00501:
    linkCable: false
 SLED-01193:
  name: "Extreme Snow Break (Europe) (Demo)"
-  settings:
-    dmaMaxSliceTicks: 10 # Very sensitive to DMA timing, otherwise polygon flicker.
-    dmaHaltTicks: 100 # CPU needs to run significantly faster than DMA.
 PCPX-96178:
  name: "e-Jump (Japan) (Disc 1)"
  discSet:
@@ -54982,7 +54900,6 @@ SLES-00487:
    - PlayStationMouse
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -55006,7 +54923,6 @@ SLPS-00727:
    - PlayStationMouse
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -55033,7 +54949,6 @@ SLUS-00331:
    - PlayStationMouse
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -60547,12 +60462,9 @@ SLPM-87331:
  name: "Front Mission 2 (Japan) (Front Mission History)"
  controllers:
    - DigitalController
-  traits:
-    # Pick your poison here. Disabling true colour fixes the sprite backgrounds,
-    # but if you're upscaling, leaves junk around the edges.
-    - ForceSoftwareRendererForReadbacks
  codes:
    - SLPM-87331
+    - SLPM-87397
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
@@ -60567,14 +60479,10 @@ SLPM-87331:
    vibration: false
    multitap: false
    linkCable: false
-SLPS-01000:
+SCPS-45116:
  name: "Front Mission 2 (Japan, Asia)"
  controllers:
    - DigitalController
-  traits:
-    # Pick your poison here. Disabling true colour fixes the sprite backgrounds,
-    # but if you're upscaling, leaves junk around the edges.
-    - ForceSoftwareRendererForReadbacks
  codes:
    - SCPS-45116
    - SLPS-01000
@@ -66109,11 +66017,6 @@ SLUS-00127:
    linkCable: false
 SLES-00032:
  name: "Grand Theft Auto (Europe) (En,Fr,De,It)"
-  discSet:
-    name: "Grand Theft Auto (Europe) (En,Fr,De,It)"
-    serials:
-      - SLES-00032
-      - SLES-03389
  controllers:
    - AnalogController
    - DigitalController
@@ -66137,11 +66040,6 @@ SLES-00032:
    linkCable: false
 SLUS-00106:
  name: "Grand Theft Auto (USA)"
-  discSet:
-    name: "Grand Theft Auto (USA)"
-    serials:
-      - SLUS-00106
-      - SLUS-00846
  compatibility:
    rating: NoIssues
    versionTested: "0.1-1308-g622e50fa"
@@ -66164,11 +66062,6 @@ SLUS-00106:
    linkCable: false
 SLES-03389:
  name: "Grand Theft Auto - London 1969 (Europe) (En,Fr,De,It)"
-  discSet:
-    name: "Grand Theft Auto (Europe) (En,Fr,De,It)"
-    serials:
-      - SLES-00032
-      - SLES-03389
  controllers:
    - AnalogController
    - DigitalController
@@ -66215,11 +66108,6 @@ SLES-01714:
    linkCable: false
 SLUS-00846:
  name: "Grand Theft Auto - Mission Pack 1 - London 1969 (USA)"
-  discSet:
-    name: "Grand Theft Auto (USA)"
-    serials:
-      - SLUS-00106
-      - SLUS-00846
  controllers:
    - AnalogController
    - DigitalController
@@ -66722,8 +66610,6 @@ SLPS-00719:
  name: "Great Battle VI, The (Japan)"
  controllers:
    - DigitalController
-  codes:
-    - HASH-2A8D6A1D4C539B43
  metadata:
    publisher: "Banpresto"
    developer: "Aspect"
@@ -68062,8 +67948,6 @@ SCPS-10006:
  compatibility:
    rating: NoIssues
    versionTested: "0.1-4525-gdfd67664"
-  traits:
-    - DisableWidescreen # No effect.
  controllers:
    - DigitalController
  codes:
@@ -71043,7 +70927,6 @@ SLES-00555:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -71069,7 +70952,6 @@ SLUS-00348:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -71092,7 +70974,6 @@ SLPS-00972:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
  metadata:
@@ -71236,12 +71117,10 @@ SLUS-01244:
  compatibility:
    rating: GraphicalAudioIssues
    versionTested: "0.1-4693-gbbcf1c67"
+    upscalingIssues: "Menus transparency is wrong (Issue #592)"
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForceAccurateBlending # Requires 16-bit blend precision
-    - DisableTrueColor # to fix transparency in menu backgrounds.
  metadata:
    publisher: "The 3DO Company"
    developer: "Team .366"
@@ -94107,8 +93986,6 @@ SLES-00211:
  name: "Magic Carpet (Europe) (En,Fr,De,Es,Sv)"
  controllers:
    - DigitalController
-  settings:
-    displayDeinterlacingMode: Disabled # Isn't actually interlaced, uses 240p buffers, they just left 480i enabled...
  metadata:
    publisher: "Electronic Arts"
    developer: "Bullfrog Productions / Krisalis"
@@ -94131,8 +94008,6 @@ SLPS-00587:
  name: "Magic Carpet (Japan)"
  controllers:
    - DigitalController
-  settings:
-    displayDeinterlacingMode: Disabled # Isn't actually interlaced, uses 240p buffers, they just left 480i enabled...
  metadata:
    publisher: "Electronic Arts"
    developer: "Bullfrog Productions / Krisalis"
@@ -94151,8 +94026,6 @@ SLUS-00029:
  name: "Magic Carpet (USA) (En,Fr,De,Es,Sv)"
  controllers:
    - DigitalController
-  settings:
-    displayDeinterlacingMode: Disabled # Isn't actually interlaced, uses 240p buffers, they just left 480i enabled...
  metadata:
    publisher: "Electronic Arts"
    developer: "Bullfrog Productions / Krisalis"
@@ -101354,7 +101227,8 @@ SLPS-00047:
  controllers:
    - DigitalController
  codes:
-    - HASH-F6005ABBC40728D4
+    - SLPS-00047
+    - SLPS-02104
  metadata:
    publisher: "Altron"
    developer: "Altron"
@@ -112695,7 +112569,6 @@ SLPS-00050:
 SCES-00582:
  name: "Nightmare Creatures (Europe)"
  controllers:
-    - AnalogController
    - DigitalController
  metadata:
    publisher: "Activision"
@@ -112714,7 +112587,6 @@ SCES-00582:
 SCES-00684:
  name: "Nightmare Creatures (Germany)"
  controllers:
-    - AnalogController
    - DigitalController
  metadata:
    publisher: "Activision"
@@ -112733,7 +112605,6 @@ SCES-00684:
 SIPS-60027:
  name: "Nightmare Creatures (Japan)"
  controllers:
-    - AnalogController
    - DigitalController
  metadata:
    publisher: "Sony"
@@ -112757,7 +112628,6 @@ SLUS-00582:
    rating: NoIssues
    versionTested: "0.1-986-gfc911de1"
  controllers:
-    - AnalogController
    - DigitalController
  metadata:
    publisher: "Activision"
@@ -115891,6 +115761,7 @@ SCUS-94449:
 SLPM-86439:
  name: "Omiai Commando - Ba-Couple ni Tsukkomi o (Japan)"
  controllers:
+    - AnalogController
    - DigitalController
  metadata:
    publisher: "Enix"
@@ -116379,8 +116250,6 @@ SLPS-02951:
    - AnalogController
    - DigitalController
    - NeGcon
-  settings:
-    displayDeinterlacingMode: Blend # Only used in menus, MAD flickers with fading.
  metadata:
    publisher: "MTO"
    developer: "MTO"
@@ -130044,8 +129913,6 @@ SLPM-80296:
  name: "Rally de Africa (Japan) (Taikenban)"
 SLPS-02679:
  name: "Rally de Europe (Japan)"
-  traits:
-    - DisableWidescreen # Speedometer breaks with WS rendering.
  controllers:
    - AnalogController
    - DigitalController
@@ -130332,8 +130199,6 @@ SCES-00004:
  compatibility:
    rating: NoIssues
    versionTested: "0.1-1308-g622e50fa"
-  traits:
-    - DisableWidescreen # No effect.
  controllers:
    - DigitalController
  metadata:
@@ -130440,8 +130305,6 @@ SLES-01103:
  controllers:
    - AnalogController
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes affine texture mapping on floor.
  metadata:
    publisher: "Mindscape"
    developer: "Pure Entertainment"
@@ -130471,7 +130334,6 @@ SLUS-00656:
    - DigitalController
  traits:
    - ForceInterlacing
-    - ForcePGXPCPUMode # Fixes affine texture mapping on floor.
  metadata:
    publisher: "Mindscape"
    developer: "Pure Entertainment"
@@ -132999,7 +132861,6 @@ SLES-02529:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  libcrypt: true
  metadata:
    publisher: "Eidos Interactive"
@@ -133019,7 +132880,6 @@ SLED-02541:
  name: "Resident Evil 3 - Nemesis (Europe) (Demo)"
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
 SLES-02530:
  name: "Resident Evil 3 - Nemesis (France)"
  controllers:
@@ -133027,7 +132887,6 @@ SLES-02530:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  libcrypt: true
  metadata:
    publisher: "Eidos Interactive"
@@ -133050,7 +132909,6 @@ SLES-02531:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  libcrypt: true
  metadata:
    publisher: "Eidos Interactive"
@@ -133073,7 +132931,6 @@ SLES-02698:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  libcrypt: true
  metadata:
    publisher: "Eidos Interactive"
@@ -133096,7 +132953,6 @@ SLES-02533:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  libcrypt: true
  metadata:
    publisher: "Eidos Interactive"
@@ -133122,7 +132978,6 @@ SLES-02532:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  libcrypt: true
  metadata:
    publisher: "Proein / Eidos Interactive"
@@ -133148,7 +133003,6 @@ SLUS-00923:
    - DigitalController
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
  metadata:
    publisher: "Capcom"
    developer: "Capcom Production Studio 4"
@@ -133167,7 +133021,6 @@ SLUS-90064:
  name: "Resident Evil 3 - Nemesis (USA) (Demo)"
  traits:
    - DisableWidescreen
-    - ForcePGXPCPUMode # Fixes jitter in character models.
 SLPS-01974:
  name: "Restaurant Dream (Japan)"
  controllers:
@@ -149904,8 +149757,6 @@ SCES-00577:
    - DigitalController
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
-    dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
-    dmaHaltTicks: 150 # Fixes sprites in menus.
  metadata:
    publisher: "Sony Computer Entertaiment Europe"
    developer: "Namco"
@@ -149929,8 +149780,6 @@ SLUS-00240:
    - DigitalController
  settings:
    gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
-    dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
-    dmaHaltTicks: 150 # Fixes sprites in menus.
  metadata:
    publisher: "Namco"
    developer: "Namco"
@@ -149952,10 +149801,6 @@ SLPS-00555:
    versionTested: "0.1-2202-ga17e15f1"
  controllers:
    - DigitalController
-  settings:
-    gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
-    dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
-    dmaHaltTicks: 150 # Fixes sprites in menus.
  metadata:
    publisher: "Namco"
    developer: "Namco"
@@ -149977,10 +149822,6 @@ SLPS-00545:
    versionTested: "0.1-2202-ga17e15f1"
  controllers:
    - DigitalController
-  settings:
-    gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
-    dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
-    dmaHaltTicks: 150 # Fixes sprites in menus.
  metadata:
    publisher: "Namco"
    developer: "Namco"
@@ -150002,10 +149843,6 @@ SLPS-91168:
  codes:
    - SLPS-91168
    - SLPS-91454
-  settings:
-    gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
-    dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
-    dmaHaltTicks: 150 # Fixes sprites in menus.
  metadata:
    publisher: "Namco"
    developer: "Namco"
@@ -153537,7 +153374,6 @@ SLES-00585:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, PGXP is not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
  metadata:
@@ -153560,7 +153396,6 @@ SLES-00640:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, PGXP is not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
  metadata:
@@ -153584,7 +153419,6 @@ SLPS-00685:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, PGXP is not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
  metadata:
@@ -153607,7 +153441,6 @@ SLES-00646:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, PGXP is not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
  metadata:
@@ -153634,7 +153467,6 @@ SLUS-00297:
    - DigitalController
  traits:
    - DisablePGXP # 2.5D, PGXP is not beneficial.
-    - DisableWidescreen # No effect.
  settings:
    gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
  metadata:
@@ -155542,6 +155374,7 @@ SLES-02598:
 SLPS-02508:
  name: "Street Fighter EX2 Plus (Japan)"
  controllers:
+    - AnalogController
    - DigitalController
  metadata:
    publisher: "Capcom"
@@ -172170,9 +172003,6 @@ SLPS-00025:
    - DigitalController
  traits:
    - ForceRecompilerICache
-  settings:
-    dmaMaxSliceTicks: 500 # Stops a large GPU transfer breaking CD.
-    dmaHaltTicks: 300
  codes:
    - HASH-A8647D688C39B63F
    - HASH-21D86F0985C11667
@@ -186425,9 +186255,6 @@ SCPS-45170:
  codes:
    - SCPS-45170
    - SCPS-45171
-  traits:
-    - ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
-    - DisablePGXPOn2DPolygons # Fixes misaligned text.
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
@@ -186449,9 +186276,6 @@ SLPS-02773:
  codes:
    - SLPS-02773
    - SLPS-02774
-  traits:
-    - ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
-    - DisablePGXPOn2DPolygons # Fixes misaligned text.
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
@@ -186480,9 +186304,6 @@ SLPS-01160:
    - SLPS-02775
    - SLPS-91436
    - SLPS-91437
-  traits:
-    - ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
-    - DisablePGXPOn2DPolygons # Fixes misaligned text.
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
@@ -186509,9 +186330,6 @@ SLPS-01161:
  codes:
    - SLPS-01161
    - SLPS-02776
-  traits:
-    - ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
-    - DisablePGXPOn2DPolygons # Fixes misaligned text.
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
@@ -186538,9 +186356,6 @@ SLUS-00664:
    versionTested: "0.1-1308-g622e50fa"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
-    - DisablePGXPOn2DPolygons # Fixes misaligned text.
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
@@ -186567,9 +186382,6 @@ SLUS-00669:
    versionTested: "0.1-1308-g622e50fa"
  controllers:
    - DigitalController
-  traits:
-    - ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
-    - DisablePGXPOn2DPolygons # Fixes misaligned text.
  metadata:
    publisher: "Squaresoft"
    developer: "Squaresoft"
--- a/data/resources/shaders/dolphinfx/crt/CRT-CONSUMER.glsl
+++ b/data/resources/shaders/dolphinfx/crt/CRT-CONSUMER.glsl
@@ -0,0 +1,780 @@
+//   Crt-Consumer
+
+//   This program is free software; you can redistribute it and/or
+//   modify it under the terms of the GNU General Public License
+//   as published by the Free Software Foundation; either version 2
+//   of the License, or (at your option) any later version.
+
+//   This program is distributed in the hope that it will be useful,
+//   but WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//   GNU General Public License for more details.
+
+//   You should have received a copy of the GNU General Public License
+//   along with this program; if not, write to the Free Software
+//   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+
+
+/*
+[configuration]
+
+
+[OptionRangeFloat]
+GUIName = Pre-Scale Sharpening
+OptionName = PRE_SCALE
+MinValue = 1.0
+MaxValue = 4.0
+StepAmount = 0.1
+DefaultValue = 1.5
+
+[OptionRangeFloat]
+GUIName = Convergence X
+OptionName = blurx
+MinValue = -4.0
+MaxValue = 4.0
+StepAmount = 0.05
+DefaultValue = 0.25
+
+[OptionRangeFloat]
+GUIName = Convergence Y
+OptionName = blury
+MinValue = -4.0
+MaxValue = 4.0
+StepAmount = 0.05
+DefaultValue = -0.1
+
+[OptionRangeFloat]
+GUIName = Curvature X
+OptionName = warpx
+MinValue = 0.0
+MaxValue = 0.12
+StepAmount = 0.01
+DefaultValue = 0.03
+
+[OptionRangeFloat]
+GUIName = Curvature Y
+OptionName = warpy
+MinValue = 0.0
+MaxValue = 0.12
+StepAmount = 0.01
+DefaultValue = 0.04
+
+[OptionRangeFloat]
+GUIName = Corner size
+OptionName = corner
+MinValue = 0.0
+MaxValue = 0.10
+StepAmount = 0.01
+DefaultValue = 0.03
+
+[OptionRangeFloat]
+GUIName = Border Smoothness
+OptionName = smoothness
+MinValue = 100.0
+MaxValue = 600.0
+StepAmount = 5.0
+DefaultValue = 400.0
+
+[OptionRangeFloat]
+GUIName = Interlacing Toggle
+OptionName = inter
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 1.0
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Interlacing Downscale Scanlines
+OptionName = Downscale
+MinValue = 1.0
+MaxValue = 8.0
+StepAmount = 1.
+DefaultValue = 2.0
+
+[OptionRangeFloat]
+GUIName = Beam low
+OptionName = scanlow
+MinValue = 1.0
+MaxValue = 15.0
+StepAmount = 1.0
+DefaultValue = 6.0
+
+[OptionRangeFloat]
+GUIName = Beam high
+OptionName = scanhigh
+MinValue = 1.0
+MaxValue = 15.0
+StepAmount = 1.0
+DefaultValue = 8.0
+
+[OptionRangeFloat]
+GUIName = Scanlines dark
+OptionName = beamlow
+MinValue = 0.5
+MaxValue = 2.5
+StepAmount = 0.0
+DefaultValue = 1.45
+
+[OptionRangeFloat]
+GUIName = Scanlines bright
+OptionName = beamhigh
+MinValue = 0.5
+MaxValue = 2.5
+StepAmount = 0.0
+DefaultValue = 1.05
+
+[OptionRangeFloat]
+GUIName = Protect White On Masks
+OptionName = preserve
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 0.01
+DefaultValue = 0.98
+
+[OptionRangeFloat]
+GUIName = Bright boost dark pixels
+OptionName = brightboost1
+MinValue = 0.0
+MaxValue = 3.0
+StepAmount = 0.05
+DefaultValue = 1.25
+
+[OptionRangeFloat]
+GUIName = Bright boost bright pixels
+OptionName = brightboost2
+MinValue = 0.0
+MaxValue = 3.0
+StepAmount = 0.05
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Glow pixels per axis
+OptionName = glow
+MinValue = 1.0
+MaxValue = 6.0
+StepAmount = 1.0
+DefaultValue = 3.0
+
+[OptionRangeFloat]
+GUIName = Glow quality
+OptionName = quality
+MinValue = 0.25
+MaxValue = 4.0
+StepAmount = 0.05
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Glow intensity
+OptionName = glow_str
+MinValue = 0.0001
+MaxValue = 2.0
+StepAmount = 0.05
+DefaultValue = 0.3
+
+[OptionRangeFloat]
+GUIName = Add Noise
+OptionName = nois
+MinValue = 0.0
+MaxValue = 32.0
+StepAmount = 1.0
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Post Brightness
+OptionName = postbr
+MinValue = 0.0
+MaxValue = 2.5
+StepAmount = 0.02
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Palette Fixes. Sega, PUAE Atari ST dark colors 
+OptionName = palette_fix
+MinValue = 0.0
+MaxValue = 2.0
+StepAmount = 1.0
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Mask Type
+OptionName = Shadowmask
+MinValue = -1.0
+MaxValue = 8.0
+StepAmount = 1.
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Mask Size
+OptionName = masksize
+MinValue = 1.0
+MaxValue = 2.0
+StepAmount = 1.0
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Mask dark
+OptionName = MaskDark
+MinValue = 0.0
+MaxValue = 2.0
+StepAmount = 0.1
+DefaultValue = 0.2
+
+[OptionRangeFloat]
+GUIName = Mask light
+OptionName = MaskLight
+MinValue = 0.0
+MaxValue = 2.0
+StepAmount = 0.1
+DefaultValue = 1.5
+
+[OptionRangeFloat]
+GUIName = Slot Mask Strength
+OptionName = slotmask
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 0.05
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Slot Mask Width
+OptionName = slotwidth
+MinValue = 1.0
+MaxValue = 6.0
+StepAmount = 0.5
+DefaultValue = 2.0
+
+[OptionRangeFloat]
+GUIName = Slot Mask Height: 2x1 or 4x1
+OptionName = double_slot
+MinValue = 1.0
+MaxValue = 2.0
+StepAmount = 1.0
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Slot Mask Size
+OptionName = slotms
+MinValue = 1.0
+MaxValue = 2.0
+StepAmount = 1.0
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Gamma Out
+OptionName = GAMMA_OUT
+MinValue = 0.0
+MaxValue = 4.0
+StepAmount = 0.05
+DefaultValue = 2.25
+
+[OptionRangeFloat]
+GUIName = Saturation
+OptionName = sat
+MinValue = 0.0
+MaxValue = 2.0
+StepAmount = 0.05
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Contrast, 1.0:Off
+OptionName = contrast
+MinValue = 0.00
+MaxValue = 2.00
+StepAmount = 0.05
+DefaultValue = 1.0
+
+[OptionRangeFloat]
+GUIName = Color Temperature %
+OptionName = WP
+MinValue = -100.0
+MaxValue = 100.0
+StepAmount = 5.
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Red-Green Tint
+OptionName = rg
+MinValue = -1.0
+MaxValue = 1.0
+StepAmount = 0.005
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Red-Blue Tint
+OptionName = rb
+MinValue = -1.0
+MaxValue = 1.0
+StepAmount = 0.005
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Green-Red Tint
+OptionName = gr
+MinValue = -1.0
+MaxValue = 1.0
+StepAmount = 0.005
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Green-Blue Tint
+OptionName = gb
+MinValue = -1.0
+MaxValue = 1.0
+StepAmount = 0.005
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Blue-Red Tint
+OptionName = br
+MinValue = -1.0
+MaxValue = 1.0
+StepAmount = 0.005
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Blue-Green Tint
+OptionName = bg
+MinValue = -1.0
+MaxValue = 1.0
+StepAmount = 0.005
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Vignette On/Off
+OptionName = vignette
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 1.0
+DefaultValue = 0.0
+
+[OptionRangeFloat]
+GUIName = Vignette Power
+OptionName = vpower
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 0.01
+DefaultValue = 0.15
+
+[OptionRangeFloat]
+GUIName = Vignette strength
+OptionName = vstr
+MinValue = 0.0
+MaxValue = 50.0
+StepAmount = 1.0
+DefaultValue = 40.0
+
+[OptionRangeFloat]
+GUIName = Switch off shader
+OptionName = alloff
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 1.0
+DefaultValue = 0.0
+
+
+[/configuration]
+*/
+
+#define iTime  (float(GetTime())/2.0)
+#define iTimer (float(GetTime())/60.0)
+
+#define SourceSize (vec4(1.0/GetInvNativePixelSize(),GetInvNativePixelSize()))
+
+vec2 Warp(vec2 pos)
+{
+    pos  = pos * 2.0 - 1.0;    
+    pos *= vec2(1.0 + (pos.y * pos.y) * warpx, 1.0 + (pos.x * pos.x) * warpy);
+    return pos * 0.5 + 0.5;
+} 
+
+float sw(float y, float l)
+{
+    float beam = mix(scanlow, scanhigh, y);
+    float scan = mix(beamlow,  beamhigh, l);
+    float ex = y * scan;
+    return exp2(-beam * ex * ex);
+}
+
+vec3 mask(vec2 x, vec3 col, float l)
+{
+    x = floor(x / masksize);        
+  
+    if (Shadowmask == 0.0)
+    {
+        float m = fract(x.x * 0.4999);
+        if (m < 0.4999) return vec3(1.0,             MaskDark, 1.0);
+        else            return vec3(MaskDark, 1.0,             MaskDark);
+    }
+   
+    else if (Shadowmask == 1.0)
+    {
+        vec3 Mask = vec3(MaskDark, MaskDark, MaskDark);
+        float line = MaskLight;
+        float odd  = 0.0;
+
+        if (fract(x.x / 6.0) < 0.5) odd = 1.0;
+        if (fract((x.y + odd) / 2.0) < 0.5) line = MaskDark;
+
+        float m = fract(x.x / 3.0);
+        if      (m < 0.333) Mask.b = MaskLight;
+        else if (m < 0.666) Mask.g = MaskLight;
+        else                Mask.r = MaskLight;
+        
+        Mask *= line; 
+        return Mask; 
+    } 
+    
+    else if (Shadowmask == 2.0)
+    {
+        float m = fract(x.x*0.3333);
+        if (m < 0.3333) return vec3(MaskDark,  MaskDark,  MaskLight);
+        if (m < 0.6666) return vec3(MaskDark,  MaskLight, MaskDark);
+        else            return vec3(MaskLight, MaskDark,  MaskDark);
+    }
+
+    if (Shadowmask == 3.0)
+    {
+        float m = fract(x.x * 0.5);
+        if (m < 0.5) return vec3(1.0, 1.0, 1.0);
+        else         return vec3(MaskDark, MaskDark, MaskDark);
+    }
+   
+    else if (Shadowmask == 4.0)
+    {   
+        vec3 Mask = vec3(col.rgb);
+        float line = MaskLight;
+        float odd  = 0.0;
+
+        if (fract(x.x / 4.0) < 0.5) odd = 1.0;
+        if (fract((x.y + odd) / 2.0) < 0.5) line = MaskDark;
+
+        float m = fract(x.x / 2.0);
+        if  (m < 0.5) { Mask.r = 1.0; Mask.b = 1.0; }
+        else  Mask.g = 1.0;   
+
+        Mask *= line;  
+        return Mask;
+    } 
+
+    else if (Shadowmask == 5.0)
+    {
+        vec3 Mask = vec3(1.0, 1.0, 1.0);
+
+        if (fract(x.x / 4.0) < 0.5)   
+        {
+            if (fract(x.y / 3.0) < 0.666)
+            {
+                if (fract(x.x / 2.0) < 0.5) Mask = vec3(1.0,             MaskDark, 1.0);
+                else                        Mask = vec3(MaskDark, 1.0,             MaskDark);
+            }
+            else Mask *= l;
+        }
+        else if (fract(x.x / 4.0) >= 0.5)   
+        {
+            if (fract(x.y / 3.0) > 0.333) 
+            {
+                if (fract(x.x / 2.0) < 0.5) Mask = vec3(1.0,             MaskDark, 1.0); 
+                else                        Mask = vec3(MaskDark, 1.0,             MaskDark);
+            }
+            else Mask *= l;
+        }
+
+        return Mask;
+    }
+
+    else if (Shadowmask == 6.0)
+    {
+        vec3 Mask = vec3(MaskDark, MaskDark, MaskDark);
+        if (fract(x.x / 6.0) < 0.5)   
+        {
+            if (fract(x.y / 4.0) < 0.75)  
+            {
+                if      (fract(x.x / 3.0) < 0.3333) Mask.r = MaskLight; 
+                else if (fract(x.x / 3.0) < 0.6666) Mask.g = MaskLight; 
+                else                                Mask.b = MaskLight;
+            }
+            else Mask * l * 0.9;
+        }
+        else if (fract(x.x / 6.0) >= 0.5)   
+        {
+            if (fract(x.y / 4.0) >= 0.5 || fract(x.y / 4.0) < 0.25)  
+            {
+                if      (fract(x.x / 3.0) < 0.3333) Mask.r = MaskLight; 
+                else if (fract(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
+                else                                Mask.b = MaskLight;
+            }
+            else Mask * l * 0.9;
+        }
+        return Mask;
+    }
+
+    else if (Shadowmask == 7.0)
+    {
+        float m = fract(x.x * 0.3333);
+
+        if (m < 0.3333) return vec3(MaskDark,          MaskLight,         MaskLight * col.b); //Cyan
+        if (m < 0.6666) return vec3(MaskLight * col.r, MaskDark,          MaskLight);         //Magenta
+        else            return vec3(MaskLight,         MaskLight * col.g, MaskDark);          //Yellow
+    }
+
+    else if (Shadowmask == 8.0)
+    {
+        vec3 Mask = vec3(MaskDark, MaskDark, MaskDark);
+
+        float bright = MaskLight;
+        float left   = 0.0;
+        if (fract(x.x / 6.0) < 0.5) left = 1.0;
+             
+        float m = fract(x.x / 3.0);
+        if      (m < 0.333) Mask.b = 0.9;
+        else if (m < 0.666) Mask.g = 0.9;
+        else                Mask.r = 0.9;
+        
+        if (mod(x.y, 2.0) == 1.0 && left == 1.0 || mod(x.y, 2.0) == 0.0 && left == 0.0) 
+            Mask *= bright; 
+      
+        return Mask; 
+    } 
+    
+    else return vec3(1.0, 1.0, 1.0);
+}
+
+float SlotMask(vec2 pos, vec3 c)
+{
+    if (slotmask == 0.0) return 1.0;
+    
+    pos = floor(pos / slotms);
+    float mx = pow(max(max(c.r, c.g), c.b), 1.33);
+    float mlen = slotwidth * 2.0;
+    float px = fract(pos.x / mlen);
+    float py = floor(fract(pos.y / (2.0 * double_slot)) * 2.0 * double_slot);
+    float slot_dark = mix(1.0 - slotmask, 1.0 - 0.80 * slotmask, mx);
+    float slot = 1.0 + 0.7 * slotmask * (1.0 - mx);
+    
+    if      (py == 0.0                && px <  0.5) slot = slot_dark; 
+    else if (py == double_slot && px >= 0.5) slot = slot_dark;       
+    
+    return slot;
+}
+
+mat4 contrastMatrix(float contrast)
+{   
+    float t = (1.0 - contrast) / 2.0;
+    
+    return mat4(contrast, 0,               0,               0,
+                0,               contrast, 0,               0,
+                0,               0,               contrast, 0,
+                t,               t,               t,               1);
+}
+
+mat3 vign(float l)
+{
+//    vec2 vpos = vTexCoord;
+    vec2 vpos = GetCoordinates();
+    vpos *= 1.0 - vpos.xy;
+    
+    float vig = vpos.x * vpos.y * vstr;
+    vig = min(pow(vig, vpower), 1.0); 
+    if (vignette == 0.0) vig = 1.0;
+   
+    return mat3(vig, 0,   0,
+                0,   vig, 0,
+                0,   0,   vig);
+}
+
+vec3 saturation(vec3 textureColor)
+{
+    float luminance = length(textureColor.rgb) * 0.5775;
+
+    vec3 luminanceWeighting = vec3(0.4, 0.5, 0.1);
+    if (luminance < 0.5) luminanceWeighting.rgb = (luminanceWeighting.rgb * luminanceWeighting.rgb) 
+                                                + (luminanceWeighting.rgb * luminanceWeighting.rgb);
+
+    luminance = dot(textureColor.rgb, luminanceWeighting);
+    vec3 greyScaleColor = vec3(luminance, luminance, luminance);
+
+    vec3 res = vec3(mix(greyScaleColor, textureColor.rgb, sat));
+    return res;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+vec3 glow0 (vec2 texcoord, vec3 col)
+{
+
+   // the more quality, the smaller the offset and better quality, less visible glow too
+     vec2 size = SourceSize.zw/quality;
+     
+     vec3 c01;
+     vec3 sum = vec3(0.0);
+   
+   // glow = pixels per axis, the more the slower!
+
+    for (float x = -glow; x <= glow; x = x+1.0)
+     {
+
+   // multiply texture, the more far away the less pronounced
+        float factor = 1.0/glow;
+        for (float y = -glow; y <= glow; y = y+1.0)
+        {
+
+        vec2 offset = vec2(x, y) * size;
+
+         c01 = SampleLocation(texcoord + offset).rgb*factor; c01 = c01*c01;
+          
+                sum += c01;
+        }
+    }
+  
+    return (glow_str * sum / (glow * glow )) ;
+}
+    
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+float noise(vec2 co)
+{
+    return fract(sin(iTimer * dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453);
+}
+
+float corner0(vec2 coord)
+{
+    coord = (coord - vec2(0.5, 0.5)) * 1.0 + vec2(0.5, 0.5);
+    coord = min(coord, vec2(1.0, 1.0) - coord) * vec2(1.0, SourceSize.y / SourceSize.x);
+    
+    vec2 cdist = vec2(corner, corner);
+    coord = (cdist - min(coord, cdist));
+    float dist = sqrt(dot(coord, coord));
+
+    return clamp((cdist.x - dist) * smoothness, 0.0, 1.0);
+}  
+
+const mat3 D65_to_XYZ = mat3(
+           0.4306190,  0.2220379,  0.0201853,
+           0.3415419,  0.7066384,  0.1295504,
+           0.1783091,  0.0713236,  0.9390944);
+
+const mat3 XYZ_to_D65 = mat3(
+           3.0628971, -0.9692660,  0.0678775,
+          -1.3931791,  1.8760108, -0.2288548,
+          -0.4757517,  0.0415560,  1.0693490);
+           
+const mat3 D50_to_XYZ = mat3(
+           0.4552773,  0.2323025,  0.0145457,
+           0.3675500,  0.7077956,  0.1049154,
+           0.1413926,  0.0599019,  0.7057489);
+           
+const mat3 XYZ_to_D50 = mat3(
+           2.9603944, -0.9787684,  0.0844874,
+          -1.4678519,  1.9161415, -0.2545973,
+          -0.4685105,  0.0334540,  1.4216174);         
+
+void main()
+{
+    vec2 vTexCoord  = GetCoordinates();
+    vec2 pos = Warp(vTexCoord.xy);
+    vec2 tex_size = 1.0 / GetInvNativePixelSize();  
+    vec2 OutputSize = GetWindowSize();
+
+
+    vec2 pC4 = (pos + 0.5/tex_size);
+    vec2 fp = fract(pos * tex_size);
+    if (inter < 0.5 && tex_size.y > 400.0){ fp.y = fract(pos.y * tex_size.y*1.0/Downscale);} 
+
+    vec4 res = vec4(1.0);
+    
+    if (alloff == 1.0) 
+        res = SampleLocation(pC4); 
+    else
+    {
+
+   vec2 texel = pos * tex_size;
+   vec2 texel_floored = floor(texel);
+
+   float scale = PRE_SCALE;
+   float region_range = 0.5 - 0.5 / scale;
+
+   // Figure out where in the texel to sample to get correct pre-scaled bilinear.
+   // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
+
+   vec2 center_dist = fp - 0.5;
+
+   vec2 fpp = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
+
+   vec2 mod_texel = texel_floored + fpp;
+   vec2 coords = mod_texel / SourceSize.xy;
+
+        vec3 sample1 = SampleLocation(vec2(coords.x + blurx*SourceSize.z, coords.y - blury*SourceSize.w)).rgb;
+        vec3 sample2 = SampleLocation(coords).rgb;
+        vec3 sample3 = SampleLocation(vec2(coords.x - blurx*SourceSize.z, coords.y + blury*SourceSize.w )).rgb;
+        
+        vec3 color = vec3(sample1.r * 0.5  + sample2.r * 0.5, 
+                          sample1.g * 0.25 + sample2.g * 0.5 + sample3.g * 0.25,
+                          sample2.b * 0.5  + sample3.b * 0.5);
+        if (palette_fix != 0.0) 
+        {
+            if (palette_fix == 1.0) color = color* 1.0667;
+            else if (palette_fix == 2.0) color = color * 2.0;
+        }
+
+        //COLOR TEMPERATURE FROM GUEST.R-DR.VENOM
+        if (WP != 0.0)
+        {
+            vec3 warmer = D50_to_XYZ * color;
+            warmer = XYZ_to_D65 * warmer; 
+            
+            vec3 cooler = D65_to_XYZ * color;
+            cooler = XYZ_to_D50 * cooler;
+            
+            float m = abs(WP) / 100.0;
+            vec3 comp = (WP < 0.0) ? cooler : warmer;
+            comp = clamp(comp, 0.0, 1.0);   
+            
+            color = vec3(mix(color, comp, m));
+        }
+
+     mat3 hue = mat3 (1., rg,  rb,                 //red tint
+                      gr,  1., gb,                  //green tint
+                      br,  bg,  1.);                //blue tint
+
+        color = hue * color;
+
+        color = (2.0*pow(color,vec3(2.8))) - pow(color,vec3(3.6));
+
+        float lum = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
+
+        float f = fract(fp.y -0.5);
+        
+        if (inter > 0.5 && tex_size.y > 400.0) color = color; 
+        else
+        {color = color * sw(f,lum) + color * sw (1.0-f,lum);}
+        
+        float lum1 = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
+
+        
+        color *= mix(mask((vTexCoord * OutputSize.xy), color,lum1), vec3(1.0), lum1*preserve);
+        
+
+        if (slotmask != 0.0) color *= SlotMask((vTexCoord * OutputSize.xy) * 1.0001, color);
+        
+        color *= mix(brightboost1, brightboost2, max(max(color.r, color.g), color.b));    
+
+    
+
+        color = pow(color,vec3(1.0 / GAMMA_OUT));
+                if (glow_str != 0.0) color += glow0(coords,color);
+
+        if (sat    != 1.0) color  = saturation(color);
+        if (corner != 0.0) color *= corner0(pC4);
+        if (nois   != 0.0) color *= 1.0 + noise(coords * 2.0) / nois;
+
+        color *= mix(1.0, postbr, lum);
+        res = vec4(color, 1.0);
+        if (contrast != 1.0) res = contrastMatrix(contrast) * res;
+        if (inter > 0.5 && SourceSize.y > 400.0 && fract(iTime) < 0.5) res = res * 0.95;
+        res.rgb *= vign(lum);
+
+    }
+    
+    SetOutput(res);
+}
--- a/data/resources/shaders/reshade/Shaders/CRT-Guest-NTSC.fx
+++ b/data/resources/shaders/reshade/Shaders/CRT-Guest-NTSC.fx
--- a/data/resources/shaders/reshade/Shaders/XY-Pos-free.fx
+++ b/data/resources/shaders/reshade/Shaders/XY-Pos-free.fx
@@ -1,84 +0,0 @@
-#include "ReShade.fxh"
-
-//  CrashGG presents
-
-//  'XY-Pos-free' 
-
-//  A super-simple shader refined from the super-fast crt-cyclon.fx, It only provides
-//  the functions of free pixel stretching and position translation on the XY axis.
-//  Suitable for users who only want to fine-tune the screen zoom and position and do not like the bundled CRT-like effects.
-//  Fixed some bugs in the original version, adjusted the step progress and the range.
-
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or (at your option)
-//  any later version.
-
-
-uniform float zoomx <
-	ui_type = "drag";
-	ui_min = -0.3000;
-	ui_max = 0.3000;
-	ui_step = 0.0005;
-	ui_label = "Zoom Image X";
-> = 0.0000;
-
-uniform float zoomy <
-	ui_type = "drag";
-	ui_min = -0.3000;
-	ui_max = 0.3000;
-	ui_step = 0.0005;
-	ui_label = "Zoom Image Y";
-> = 0.0000;
-
-uniform float centerx <
-	ui_type = "drag";
-	ui_min = -9.99;
-	ui_max = 9.99;
-	ui_step = 0.01;
-	ui_label = "Image Center X";
-> = 0.00;
-
-uniform float centery <
-	ui_type = "drag";
-	ui_min = -9.99;
-	ui_max = 9.99;
-	ui_step = 0.01;
-	ui_label = "Image Center Y";
-> = 0.00;
-
-
-float2 Warp(float2 pos)
-{
-    pos = pos*2.0-1.0;
-    pos *= float2(1.0+pos.y*pos.y*0, 1.0+pos.x*pos.x*0);
-    pos = pos*0.5+0.5;
-
-    return pos;
-}
-
-
-float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-// zoom in and center screen
-    float2 pos = Warp((vTexCoord*float2(1.0-zoomx,1.0-zoomy)-float2(centerx,centery)/100.0));
-
-// Convergence
-    float3 res = tex2D(ReShade::BackBuffer,pos).rgb;
-
-// Vignette
-    float x = 0.0;
-	
-    return float4(res, 1.0);
-}
-
-
-
-technique CRT_CYCLON
-{
-   pass PS_CRT_CYCLON
-   {
-   	VertexShader = PostProcessVS;
-   	PixelShader  = CRT_CYCLON_PS;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx
+++ b/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx
@@ -1,104 +0,0 @@
-#include "ReShade.fxh"
-
-/*
-   Copyright (C) 2016 guest(r) - guest.r@gmail.com
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-*/
-
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-static const float3 dt = float3(1.0,1.0,1.0);
-
-float3 texture2d(sampler2D tex, float2 coord, float4 yx) {
-
-    float3 s00 = tex2D(tex, coord + yx.zw).xyz; 
-    float3 s20 = tex2D(tex, coord + yx.xw).xyz; 
-    float3 s22 = tex2D(tex, coord + yx.xy).xyz; 
-    float3 s02 = tex2D(tex, coord + yx.zy).xyz; 
-
-    float m1=dot(abs(s00-s22),dt)+0.001;
-    float m2=dot(abs(s02-s20),dt)+0.001;
-
-    return 0.5*(m2*(s00+s22)+m1*(s02+s20))/(m1+m2);
-}
-
-
-
-float4 PS_aa_shader_40(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-    // Calculating texel coordinates
-    float2 size     = 4.0 / NormalizedNativePixelSize;
-    float2 inv_size = 1.0 / size;
-
-    float4 yx = float4(inv_size, -inv_size);
-    
-    float2 OGL2Pos = vTexCoord * size;
-
-    float2 fp = frac(OGL2Pos);
-    float2 dx = float2(inv_size.x,0.0);
-    float2 dy = float2(0.0, inv_size.y);
-    float2 g1 = float2(inv_size.x,inv_size.y);
-    float2 g2 = float2(-inv_size.x,inv_size.y);
-    
-    float2 pC4 = floor(OGL2Pos) * 1.0001 * inv_size;    
-    
-    // Reading the texels
-    float3 C1 = texture2d(sBackBuffer, pC4 - dy, yx);
-    float3 C0 = texture2d(sBackBuffer, pC4 - g1, yx); 
-    float3 C2 = texture2d(sBackBuffer, pC4 - g2, yx);
-    float3 C3 = texture2d(sBackBuffer, pC4 - dx, yx);
-    float3 C4 = texture2d(sBackBuffer, pC4     , yx);
-    float3 C5 = texture2d(sBackBuffer, pC4 + dx, yx);
-    float3 C6 = texture2d(sBackBuffer, pC4 + g2, yx);
-    float3 C7 = texture2d(sBackBuffer, pC4 + dy, yx);
-    float3 C8 = texture2d(sBackBuffer, pC4 + g1, yx);
-    
-    float3 ul, ur, dl, dr;
-    float m1, m2;
-    
-    m1 = dot(abs(C0-C4),dt)+0.001;
-    m2 = dot(abs(C1-C3),dt)+0.001;
-    ul = (m2*(C0+C4)+m1*(C1+C3))/(m1+m2);  
-    
-    m1 = dot(abs(C1-C5),dt)+0.001;
-    m2 = dot(abs(C2-C4),dt)+0.001;
-    ur = (m2*(C1+C5)+m1*(C2+C4))/(m1+m2);
-    
-    m1 = dot(abs(C3-C7),dt)+0.001;
-    m2 = dot(abs(C6-C4),dt)+0.001;
-    dl = (m2*(C3+C7)+m1*(C6+C4))/(m1+m2);
-    
-    m1 = dot(abs(C4-C8),dt)+0.001;
-    m2 = dot(abs(C5-C7),dt)+0.001;
-    dr = (m2*(C4+C8)+m1*(C5+C7))/(m1+m2);
-    
-    float3 c11 = 0.5*((dr*fp.x+dl*(1-fp.x))*fp.y+(ur*fp.x+ul*(1-fp.x))*(1-fp.y) );
-
-    return float4(c11, 1.0);
-}
-
-
-
-technique aa_shader_40
-{
-   pass
-   {
-       VertexShader = PostProcessVS;
-       PixelShader  = PS_aa_shader_40;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/anti-aliasing/fxaa.fx
+++ b/data/resources/shaders/reshade/Shaders/anti-aliasing/fxaa.fx
@@ -1,271 +0,0 @@
-#include "ReShade.fxh"
-
-
-/**
- * @license
- * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
- *
- * TO  THE MAXIMUM  EXTENT PERMITTED  BY APPLICABLE  LAW, THIS SOFTWARE  IS PROVIDED
- * *AS IS*  AND NVIDIA AND  ITS SUPPLIERS DISCLAIM  ALL WARRANTIES,  EITHER  EXPRESS
- * OR IMPLIED, INCLUDING, BUT NOT LIMITED  TO, NONINFRINGEMENT,IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL  NVIDIA 
- * OR ITS SUPPLIERS BE  LIABLE  FOR  ANY  DIRECT, SPECIAL,  INCIDENTAL,  INDIRECT,  OR  
- * CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION,  DAMAGES FOR LOSS 
- * OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY 
- * OTHER PECUNIARY LOSS) ARISING OUT OF THE  USE OF OR INABILITY  TO USE THIS SOFTWARE, 
- * EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
- */
-
-/*
-FXAA_PRESET - Choose compile-in knob preset 0-5.
------------------------------------------------------------------------------
-FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required 
-                      to apply algorithm.
-                      1.0/3.0  - too little
-                      1.0/4.0  - good start
-                      1.0/8.0  - applies to more edges
-                      1.0/16.0 - overkill
------------------------------------------------------------------------------
-FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks.
-                          Perf optimization.
-                          1.0/32.0 - visible limit (smaller isn't visible)
-                          1.0/16.0 - good compromise
-                          1.0/12.0 - upper limit (seeing artifacts)
------------------------------------------------------------------------------
-FXAA_SEARCH_STEPS - Maximum number of search steps for end of span.
------------------------------------------------------------------------------
-FXAA_SEARCH_THRESHOLD - Controls when to stop searching.
-                        1.0/4.0 - seems to be the best quality wise
------------------------------------------------------------------------------
-FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal.
-                   1.0/2.0 - low removal
-                   1.0/3.0 - medium removal
-                   1.0/4.0 - default removal
-                   1.0/8.0 - high removal
-                   0.0 - complete removal
------------------------------------------------------------------------------
-FXAA_SUBPIX_CAP - Insures fine detail is not completely removed.
-                  This is important for the transition of sub-pixel detail,
-                  like fences and wires.
-                  3.0/4.0 - default (medium amount of filtering)
-                  7.0/8.0 - high amount of filtering
-                  1.0 - no capping of sub-pixel aliasing removal
-*/
-
-
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
-uniform float2 ViewportSize < source = "viewportsize"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
-
-
-#ifndef FXAA_PRESET
-    #define FXAA_PRESET 6
-#endif
-#if (FXAA_PRESET == 3)
-    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
-    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/16.0)
-    #define FXAA_SEARCH_STEPS        16
-    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
-    #define FXAA_SUBPIX_CAP          (3.0/4.0)
-    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
-#endif
-#if (FXAA_PRESET == 4)
-    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
-    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
-    #define FXAA_SEARCH_STEPS        24
-    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
-    #define FXAA_SUBPIX_CAP          (3.0/4.0)
-    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
-#endif
-#if (FXAA_PRESET == 5)
-    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
-    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
-    #define FXAA_SEARCH_STEPS        32
-    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
-    #define FXAA_SUBPIX_CAP          (3.0/4.0)
-    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
-#endif
-#if (FXAA_PRESET == 6)
-    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
-    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
-    #define FXAA_SEARCH_STEPS        32
-    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
-    #define FXAA_SUBPIX_CAP          (1.0)
-    #define FXAA_SUBPIX_TRIM         (0.0)
-#endif
-
-#define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM))
-
-// Return the luma, the estimation of luminance from rgb inputs.
-// This approximates luma using one FMA instruction,
-// skipping normalization and tossing out blue.
-// FxaaLuma() will range 0.0 to 2.963210702.
-float FxaaLuma(float3 rgb) {
-    return rgb.y * (0.587/0.299) + rgb.x;
-}
-
-float3 FxaaLerp3(float3 a, float3 b, float amountOfA) {
-    return (-float3(amountOfA, amountOfA, amountOfA) * b) + ((a * float3(amountOfA, amountOfA, amountOfA)) + b);
-}
-
-float4 FxaaTexOff(sampler2D tex, float2 pos, int2 off, float2 rcpFrame) {
-    float x = pos.x + float(off.x) * rcpFrame.x;
-    float y = pos.y + float(off.y) * rcpFrame.y;
-    return tex2D(tex, float2(x, y));
-}
-
-// pos is the output of FxaaVertexShader interpolated across screen.
-// xy -> actual texture position {0.0 to 1.0}
-// rcpFrame should be a uniform equal to  {1.0/frameWidth, 1.0/frameHeight}
-float3 FxaaPixelShader(float2 pos, sampler2D tex, float2 rcpFrame)
-{
-    float3 rgbN = FxaaTexOff(tex, pos.xy, int2( 0,-1), rcpFrame).xyz;
-    float3 rgbW = FxaaTexOff(tex, pos.xy, int2(-1, 0), rcpFrame).xyz;
-    float3 rgbM = FxaaTexOff(tex, pos.xy, int2( 0, 0), rcpFrame).xyz;
-    float3 rgbE = FxaaTexOff(tex, pos.xy, int2( 1, 0), rcpFrame).xyz;
-    float3 rgbS = FxaaTexOff(tex, pos.xy, int2( 0, 1), rcpFrame).xyz;
-    
-    float lumaN = FxaaLuma(rgbN);
-    float lumaW = FxaaLuma(rgbW);
-    float lumaM = FxaaLuma(rgbM);
-    float lumaE = FxaaLuma(rgbE);
-    float lumaS = FxaaLuma(rgbS);
-    float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
-    float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
-    
-    float range = rangeMax - rangeMin;
-    if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD))
-    {
-        return rgbM;
-    }
-    
-    float3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
-    
-    float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
-    float rangeL = abs(lumaL - lumaM);
-    float blendL = max(0.0, (rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE; 
-    blendL = min(FXAA_SUBPIX_CAP, blendL);
-    
-    float3 rgbNW = FxaaTexOff(tex, pos.xy, int2(-1,-1), rcpFrame).xyz;
-    float3 rgbNE = FxaaTexOff(tex, pos.xy, int2( 1,-1), rcpFrame).xyz;
-    float3 rgbSW = FxaaTexOff(tex, pos.xy, int2(-1, 1), rcpFrame).xyz;
-    float3 rgbSE = FxaaTexOff(tex, pos.xy, int2( 1, 1), rcpFrame).xyz;
-    rgbL += (rgbNW + rgbNE + rgbSW + rgbSE);
-    rgbL *= (1.0/float3(9.0, 9.0, 9.0));
-    
-    float lumaNW = FxaaLuma(rgbNW);
-    float lumaNE = FxaaLuma(rgbNE);
-    float lumaSW = FxaaLuma(rgbSW);
-    float lumaSE = FxaaLuma(rgbSE);
-    
-    float edgeVert = 
-        abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) +
-        abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) +
-        abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE));
-    float edgeHorz = 
-        abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) +
-        abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) +
-        abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
-        
-    bool horzSpan = edgeHorz >= edgeVert;
-    float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
-    
-    if(!horzSpan)
-    {
-        lumaN = lumaW;
-        lumaS = lumaE;
-    }
-    
-    float gradientN = abs(lumaN - lumaM);
-    float gradientS = abs(lumaS - lumaM);
-    lumaN = (lumaN + lumaM) * 0.5;
-    lumaS = (lumaS + lumaM) * 0.5;
-    
-    if (gradientN < gradientS)
-    {
-        lumaN = lumaS;
-        lumaN = lumaS;
-        gradientN = gradientS;
-        lengthSign *= -1.0;
-    }
-    
-    float2 posN;
-    posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
-    posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);
-    
-    gradientN *= FXAA_SEARCH_THRESHOLD;
-    
-    float2 posP = posN;
-    float2 offNP = horzSpan ? float2(rcpFrame.x, 0.0) : float2(0.0, rcpFrame.y); 
-    float lumaEndN = lumaN;
-    float lumaEndP = lumaN;
-    bool doneN = false;
-    bool doneP = false;
-    posN += offNP * float2(-1.0, -1.0);
-    posP += offNP * float2( 1.0,  1.0);
-    
-    for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
-        if(!doneN)
-        {
-            lumaEndN = FxaaLuma(tex2D(tex, posN.xy).xyz);
-        }
-        if(!doneP)
-        {
-            lumaEndP = FxaaLuma(tex2D(tex, posP.xy).xyz);
-        }
-        
-        doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN);
-        doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN);
-        
-        if(doneN && doneP)
-        {
-            break;
-        }
-        if(!doneN)
-        {
-            posN -= offNP;
-        }
-        if(!doneP)
-        {
-            posP += offNP;
-        }
-    }
-    
-    float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
-    float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;
-    bool directionN = dstN < dstP;
-    lumaEndN = directionN ? lumaEndN : lumaEndP;
-    
-    if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0))
-    {
-        lengthSign = 0.0;
-    }
- 
-
-    float spanLength = (dstP + dstN);
-    dstN = directionN ? dstN : dstP;
-    float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign;
-    float3 rgbF = tex2D(tex, float2(
-        pos.x + (horzSpan ? 0.0 : subPixelOffset),
-        pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;
-    return FxaaLerp3(rgbL, rgbF, blendL); 
-}
-
-float4 PS_FXAA(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-    float3 color = FxaaPixelShader(vTexCoord, sBackBuffer, 1.0 / (ViewportSize*BufferToViewportRatio));
-
-    return float4(color, 1.0);
-}
-
-
-
-technique FXAA
-{
-   pass
-   {
-   	VertexShader = PostProcessVS;
-   	PixelShader  = PS_FXAA;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler-fast.fx
+++ b/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler-fast.fx
@@ -1,163 +0,0 @@
-#include "ReShade.fxh"
-
-
-/*
-   G-sharp resampler 2.0 - dynamic range (upscaler, downsampler)
-   
-   Copyright (C) 2024 guest(r)
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-   
-*/ 
-
-
-
-uniform float GSHARP0 <
-	ui_type = "drag";
-	ui_min = 0.75;
-	ui_max = 8.0;
-	ui_step = 0.05;
-	ui_label = "Filter Range";
-> = 2.45;
-
-uniform float GBOOST <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 2.5;
-	ui_step = 0.05;
-	ui_label = "Filter Boost (same range, speedup)";
-> = 1.75;
-
-uniform float GMAXSHARP <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 0.25;
-	ui_step = 0.01;
-	ui_label = "Filter Sharpness";
-> = 0.1;
-
-uniform float GPAR <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 1.0;
-	ui_step = 0.10;
-	ui_label = "Anti-Ringing";
-> = 0.50;
-
-
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
-uniform float2 ViewportSize < source = "viewportsize"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-texture2D tGSHARP2_H{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
-sampler2D sGSHARP2_H{Texture=tGSHARP2_H;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP)
-
-float smothstep(float x)
-{
-	return exp(-2.33*x*x);
-}
-
-float getw(float x)
-{
-	float z = x/GBOOST;
-	float y = smothstep(z);
-	return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0));
-}
-
-float3 gsharp2(float2 tex, float2 dx, float f, sampler2D Source)
-{
-	float3 color = 0.0.xxx;
-
-	float w, fp;
-	float wsum = 0.0;
-	float3 pixel;
-	float3 cmax = 0.0.xxx;
-	float3 cmin = 1.0.xxx;
-	float FPR = GSHARP0;
-	float FPR2 = 2.0*FPR;
-	float FPR3 = FPR2*FPR2;
-	float LOOPSIZE = ceil(FPR2);	
-	float x = -LOOPSIZE+1.0;
-
-	do
-	{
-		fp = min(abs(x+f),FPR2);
-		pixel  = tex2D(Source, tex + x*dx).rgb;		
-		fp = fp/FPR;
-		w = getw(fp);
-		if (w > 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); }
-		color = color + w * pixel;
-		wsum   = wsum + w;
-		
-		x = x + 1.0;
-		
-	} while (x <= LOOPSIZE);
-
-	color = color / wsum;
-
-	return lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR);
-}
-
-float4 PS_GSHARP2_H(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-	float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize);
-
-	float2 pos = vTexCoord * SourceSize.xy-0.5;
-	float  f =  -frac(pos.x);
-	float2 tex = (floor(pos) + 0.5)*SourceSize.zw;
-	float3 color;
-	float2 dx  = float2(SourceSize.z, 0.0);
-	
-	color = gsharp2(tex, dx, f, sBackBuffer);
-	
-	return float4(color, 1.0);
-}
-
-float4 PS_GSHARP2_V(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-	float4 SourceSize = float4((ViewportSize.x*BufferToViewportRatio.x), 1.0/NormalizedInternalPixelSize.y, 1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedInternalPixelSize.y);
-
-	float2 pos = vTexCoord * SourceSize.xy-0.5;
-	float  f =  -frac(pos.y);
-	float2 tex = (floor(pos) + 0.5)*SourceSize.zw;
-	float3 color;
-	float2 dy  = float2(0.0, SourceSize.w);
-	
-	color = gsharp2(tex, dy, f, sGSHARP2_H);
-	
-	return float4(color, 1.0);
-}
-
-
-
-technique GSHARP2
-{
-   pass
-   {
-   	VertexShader = PostProcessVS;
-   	PixelShader  = PS_GSHARP2_H;
-	RenderTarget = tGSHARP2_H;
-   }
-   pass
-   {
-   	VertexShader = PostProcessVS;
-   	PixelShader  = PS_GSHARP2_V;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler.fx
+++ b/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler.fx
@@ -1,145 +0,0 @@
-#include "ReShade.fxh"
-
-
-/*
-   G-sharp resampler 2.0 - dynamic range (upscaler, downsampler)
-   
-   Copyright (C) 2024 guest(r)
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-   
-*/ 
-
-
-
-uniform float GSHARP0 <
-	ui_type = "drag";
-	ui_min = 0.75;
-	ui_max = 8.0;
-	ui_step = 0.05;
-	ui_label = "Filter Range";
-> = 2.45;
-
-uniform float GBOOST <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 2.5;
-	ui_step = 0.05;
-	ui_label = "Filter Boost (same range, speedup)";
-> = 1.75;
-
-uniform float GMAXSHARP <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 0.25;
-	ui_step = 0.01;
-	ui_label = "Filter Sharpness";
-> = 0.1;
-
-uniform float GPAR <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 1.0;
-	ui_step = 0.10;
-	ui_label = "Anti-Ringing";
-> = 0.50;
-
-
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP)
-
-float smothstep(float x)
-{
-	return exp(-2.33*x*x);
-}
-
-float getw(float x)
-{
-	float z = x/GBOOST;
-	float y = smothstep(z);
-	return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0));
-}
-
-
-float4 PS_GSHARP2(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-    float2 texCoord = vTexCoord;
-    float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize);
-
-	float2 pos = vTexCoord * SourceSize.xy-0.5;
-	float2 f =  -frac(pos);
-	float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
-	float3 color = 0.0.xxx;
-	float2 dx  = float2(SourceSize.z, 0.0);
-	float2 dy  = float2(0.0, SourceSize.w);
-	
-	float w, fp;
-	float wsum = 0.0;
-	float3 pixel;
-	float3 cmax = 0.0.xxx;
-	float3 cmin = 1.0.xxx;
-	float FPR = GSHARP0;
-	float FPR2 = 2.0*FPR;
-	float FPR3 = FPR2*FPR2;
-	float LOOPSIZE = ceil(FPR2);	
-	float y = -LOOPSIZE+1.0;
-	float x = 0.0;
-	
-	do
-	{
-		x = -LOOPSIZE + 1.0;
-	
-		do
-		{
-			fp = dot(float2(x+f.x,y+f.y),float2(x+f.x,y+f.y));
-			if (fp >= FPR3) w = 0.0;
-			else
-			{
-				pixel  = tex2D(sBackBuffer, tex + x*dx + y*dy).rgb;		
-				fp = sqrt(fp)/FPR;
-				w = getw(fp);				
-				if (w >= 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); }
-				color = color + w * pixel;
-				wsum   = wsum + w;
-			}
-			x = x + 1.0;
-			
-		} while (x <= LOOPSIZE);
-		
-		y = y + 1.0;
-		
-	} while (y <= LOOPSIZE);
-
-	color = color / wsum;
-
-	color = lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR);
-	
-	return float4(color, 1.0);
-}
-
-
-
-technique GSHARP2
-{
-   pass
-   {
-   	VertexShader = PostProcessVS;
-   	PixelShader  = PS_GSHARP2;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-consumer.fx
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-consumer.fx
@@ -1,797 +0,0 @@
-#include "ReShade.fxh"
-
-
-/*
-   CRT-Consumer
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-
-*/
-
-
-
-uniform float PRE_SCALE <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 4.0;
-	ui_step = 0.1;
-	ui_label = "Pre-Scale Sharpening";
-> = 1.5;
-
-uniform float blurx <
-	ui_type = "drag";
-	ui_min = -4.0;
-	ui_max = 4.0;
-	ui_step = 0.05;
-	ui_label = "Convergence X";
-> = 0.25;
-
-uniform float blury <
-	ui_type = "drag";
-	ui_min = -4.0;
-	ui_max = 4.0;
-	ui_step = 0.05;
-	ui_label = "Convergence Y";
-> = -0.1;
-
-uniform float warpx <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 0.12;
-	ui_step = 0.01;
-	ui_label = " Curvature X";
-> = 0.03;
-
-uniform float warpy <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 0.12;
-	ui_step = 0.01;
-	ui_label = " Curvature Y";
-> = 0.04;
-
-uniform float corner <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 0.10;
-	ui_step = 0.01;
-	ui_label = " Corner size";
-> = 0.03;
-
-uniform float smoothness <
-	ui_type = "drag";
-	ui_min = 100.0;
-	ui_max = 600.0;
-	ui_step = 5.0;
-	ui_label = " Border Smoothness";
-> = 400.0;
-
-uniform bool inter <
-	ui_type = "radio";
-	ui_label = "Interlacing Toggle";
-> = true;
-
-uniform float Downscale <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 8.0;
-	ui_step = 1.;
-	ui_label = "Interlacing Downscale Scanlines";
-> = 2.0;
-
-uniform float scanlow <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 15.0;
-	ui_step = 1.0;
-	ui_label = "Beam low";
-> = 6.0;
-
-uniform float scanhigh <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 15.0;
-	ui_step = 1.0;
-	ui_label = "Beam high";
-> = 8.0;
-
-uniform float beamlow <
-	ui_type = "drag";
-	ui_min = 0.5;
-	ui_max = 2.5;
-	ui_step = 0.05;
-	ui_label = "Scanlines dark";
-> = 1.45;
-
-uniform float beamhigh <
-	ui_type = "drag";
-	ui_min = 0.5;
-	ui_max = 2.5;
-	ui_step = 0.05;
-	ui_label = "Scanlines bright";
-> = 1.05;
-
-uniform float preserve <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 1.0;
-	ui_step = 0.01;
-	ui_label = "Protect White On Masks";
-> = 0.98;
-
-uniform float brightboost1 <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 3.0;
-	ui_step = 0.05;
-	ui_label = "Bright boost dark pixels";
-> = 1.25;
-
-uniform float brightboost2 <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 3.0;
-	ui_step = 0.05;
-	ui_label = "Bright boost bright pixels";
-> = 1.0;
-
-uniform float glow <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 6.0;
-	ui_step = 1.0;
-	ui_label = "Glow pixels per axis";
-> = 3.0;
-
-uniform float quality <
-	ui_type = "drag";
-	ui_min = 0.25;
-	ui_max = 4.0;
-	ui_step = 0.05;
-	ui_label = "Glow quality";
-> = 1.0;
-
-uniform float glow_str <
-	ui_type = "drag";
-	ui_min = 0.0001;
-	ui_max = 2.0;
-	ui_step = 0.05;
-	ui_label = "Glow intensity";
-> = 0.3;
-
-uniform float nois <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 32.0;
-	ui_step = 1.0;
-	ui_label = "Add Noise";
-> = 0.0;
-
-uniform float postbr <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 2.5;
-	ui_step = 0.02;
-	ui_label = "Post Brightness";
-> = 1.0;
-
-uniform float palette_fix <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 2.0;
-	ui_step = 1.0;
-	ui_label = "Palette Fixes. Sega, PUAE Atari ST dark colors";
-> = 0.0;
-
-uniform float Shadowmask <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 8.0;
-	ui_step = 1.;
-	ui_label = "Mask Type";
-> = 0.0;
-
-uniform float masksize <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 2.0;
-	ui_step = 1.0;
-	ui_label = "Mask Size";
-> = 1.0;
-
-uniform float MaskDark <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 2.0;
-	ui_step = 0.1;
-	ui_label = "Mask dark";
-> = 0.2;
-
-uniform float MaskLight <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 2.0;
-	ui_step = 0.1;
-	ui_label = "Mask light";
-> = 1.5;
-
-uniform float slotmask <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 1.0;
-	ui_step = 0.05;
-	ui_label = "Slot Mask Strength";
-> = 0.0;
-
-uniform float slotwidth <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 6.0;
-	ui_step = 0.5;
-	ui_label = "Slot Mask Width";
-> = 2.0;
-
-uniform float double_slot <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 2.0;
-	ui_step = 1.0;
-	ui_label = "Slot Mask Height: 2x1 or 4x1";
-> = 1.0;
-
-uniform float slotms <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 2.0;
-	ui_step = 1.0;
-	ui_label = "Slot Mask Size";
-> = 1.0;
-
-uniform float GAMMA_OUT <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 4.0;
-	ui_step = 0.05;
-	ui_label = "Gamma Out";
-> = 2.25;
-
-uniform float sat <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 2.0;
-	ui_step = 0.05;
-	ui_label = "Saturation";
-> = 1.0;
-
-uniform float contrast <
-	ui_type = "drag";
-	ui_min = 0.00;
-	ui_max = 2.00;
-	ui_step = 0.05;
-	ui_label = "Contrast, 1.0:Off";
-> = 1.0;
-
-uniform float WP <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 5.;
-	ui_label = "Color Temperature %";
-> = 0.0;
-
-uniform float rg <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Red-Green Tint";
-> = 0.0;
-
-uniform float rb <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Red-Blue Tint";
-> = 0.0;
-
-uniform float gr <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Green-Red Tint";
-> = 0.0;
-
-uniform float gb <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Green-Blue Tint";
-> = 0.0;
-
-uniform float br <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Blue-Red Tint";
-> = 0.0;
-
-uniform float bg <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Blue-Green Tint";
-> = 0.0;
-
-uniform bool vignette <
-	ui_type = "radio";
-	ui_label = "Vignette On/Off";
-> = false;
-
-uniform float vpower <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 1.0;
-	ui_step = 0.01;
-	ui_label = "Vignette Power";
-> = 0.15;
-
-uniform float vstr <
-	ui_type = "drag";
-	ui_min = 0.0;
-	ui_max = 50.0;
-	ui_step = 1.0;
-	ui_label = "Vignette strength";
-> = 40.0;
-
-uniform bool alloff <
-	ui_type = "radio";
-	ui_label = "Switch off shader";
-> = false;
-
-
-uniform float  FrameCount < source = "framecount"; >;
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float2 ViewportSize < source = "viewportsize"; >;
-uniform float  ViewportX < source = "viewportx"; >;
-uniform float  ViewportY < source = "viewporty"; >;
-uniform float  ViewportWidth < source = "viewportwidth"; >;
-uniform float  ViewportHeight < source = "viewportheight"; >;
-uniform float2 ViewportOffset < source = "viewportoffset"; >;
-uniform float  BufferWidth < source = "bufferwidth"; >;
-uniform float  BufferHeight < source = "bufferheight"; >;
-uniform float  NativeWidth < source = "nativewidth"; >;
-uniform float  NativeHeight < source = "nativeheight"; >;
-uniform float  InternalWidth < source = "internalwidth"; >;
-uniform float  InternalHeight < source = "internalheight"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-#define iTime (float(FrameCount)/2.0)
-#define iTimer (float(FrameCount)/60.0)
-
-#define SourceSize (float4(1.0/NormalizedNativePixelSize,NormalizedNativePixelSize))
-#define OutputSize (ViewportSize*BufferToViewportRatio)
-
-float2 Warp(float2 pos)
-{
-    pos  = pos * 2.0 - 1.0;    
-    pos *= float2(1.0 + (pos.y * pos.y) * warpx, 1.0 + (pos.x * pos.x) * warpy);
-    return pos * 0.5 + 0.5;
-} 
-
-float sw(float y, float l)
-{
-    float beam = lerp(scanlow, scanhigh, y);
-    float scan = lerp(beamlow,  beamhigh, l);
-    float ex = y * scan;
-    return exp2(-beam * ex * ex);
-}
-
-float3 mask(float2 x, float3 col, float l)
-{
-    x = floor(x / masksize);        
-  
-    if (Shadowmask == 0.0)
-    {
-        float m = frac(x.x * 0.4999);
-        if (m < 0.4999) return float3(1.0,             MaskDark, 1.0);
-        else            return float3(MaskDark, 1.0,             MaskDark);
-    }
-   
-    else if (Shadowmask == 1.0)
-    {
-        float3 Mask = float3(MaskDark, MaskDark, MaskDark);
-        float line = MaskLight;
-        float odd  = 0.0;
-
-        if (frac(x.x / 6.0) < 0.5) odd = 1.0;
-        if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark;
-
-        float m = frac(x.x / 3.0);
-        if      (m < 0.333) Mask.b = MaskLight;
-        else if (m < 0.666) Mask.g = MaskLight;
-        else                Mask.r = MaskLight;
-        
-        Mask *= line; 
-        return Mask; 
-    } 
-    
-    else if (Shadowmask == 2.0)
-    {
-        float m = frac(x.x*0.3333);
-        if (m < 0.3333) return float3(MaskDark,  MaskDark,  MaskLight);
-        if (m < 0.6666) return float3(MaskDark,  MaskLight, MaskDark);
-        else            return float3(MaskLight, MaskDark,  MaskDark);
-    }
-
-    if (Shadowmask == 3.0)
-    {
-        float m = frac(x.x * 0.5);
-        if (m < 0.5) return float3(1.0, 1.0, 1.0);
-        else         return float3(MaskDark, MaskDark, MaskDark);
-    }
-   
-    else if (Shadowmask == 4.0)
-    {   
-        float3 Mask = float3(col.rgb);
-        float line = MaskLight;
-        float odd  = 0.0;
-
-        if (frac(x.x / 4.0) < 0.5) odd = 1.0;
-        if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark;
-
-        float m = frac(x.x / 2.0);
-        if  (m < 0.5) { Mask.r = 1.0; Mask.b = 1.0; }
-        else  Mask.g = 1.0;   
-
-        Mask *= line;  
-        return Mask;
-    } 
-
-    else if (Shadowmask == 5.0)
-    {
-        float3 Mask = float3(1.0, 1.0, 1.0);
-
-        if (frac(x.x / 4.0) < 0.5)   
-        {
-            if (frac(x.y / 3.0) < 0.666)
-            {
-                if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0,             MaskDark, 1.0);
-                else                        Mask = float3(MaskDark, 1.0,             MaskDark);
-            }
-            else Mask *= l;
-        }
-        else if (frac(x.x / 4.0) >= 0.5)   
-        {
-            if (frac(x.y / 3.0) > 0.333) 
-            {
-                if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0,             MaskDark, 1.0); 
-                else                        Mask = float3(MaskDark, 1.0,             MaskDark);
-            }
-            else Mask *= l;
-        }
-
-        return Mask;
-    }
-
-    else if (Shadowmask == 6.0)
-    {
-        float3 Mask = float3(MaskDark, MaskDark, MaskDark);
-        if (frac(x.x / 6.0) < 0.5)   
-        {
-            if (frac(x.y / 4.0) < 0.75)  
-            {
-                if      (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight; 
-                else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight; 
-                else                                Mask.b = MaskLight;
-            }
-            else Mask * l * 0.9;
-        }
-        else if (frac(x.x / 6.0) >= 0.5)   
-        {
-            if (frac(x.y / 4.0) >= 0.5 || frac(x.y / 4.0) < 0.25)  
-            {
-                if      (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight; 
-                else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
-                else                                Mask.b = MaskLight;
-            }
-            else Mask * l * 0.9;
-        }
-        return Mask;
-    }
-
-    else if (Shadowmask == 7.0)
-    {
-        float m = frac(x.x * 0.3333);
-
-        if (m < 0.3333) return float3(MaskDark,          MaskLight,         MaskLight * col.b); //Cyan
-        if (m < 0.6666) return float3(MaskLight * col.r, MaskDark,          MaskLight);         //Magenta
-        else            return float3(MaskLight,         MaskLight * col.g, MaskDark);          //Yellow
-    }
-
-    else if (Shadowmask == 8.0)
-    {
-        float3 Mask = float3(MaskDark, MaskDark, MaskDark);
-
-        float bright = MaskLight;
-        float left   = 0.0;
-        if (frac(x.x / 6.0) < 0.5) left = 1.0;
-             
-        float m = frac(x.x / 3.0);
-        if      (m < 0.333) Mask.b = 0.9;
-        else if (m < 0.666) Mask.g = 0.9;
-        else                Mask.r = 0.9;
-        
-        if ((x.y % 2.0) == 1.0 && left == 1.0 || (x.y % 2.0) == 0.0 && left == 0.0) 
-            Mask *= bright; 
-      
-        return Mask; 
-    } 
-    
-    else return float3(1.0, 1.0, 1.0);
-}
-
-float SlotMask(float2 pos, float3 c)
-{
-    if (slotmask == 0.0) return 1.0;
-    
-    pos = floor(pos / slotms);
-    float mx = pow(max(max(c.r, c.g), c.b), 1.33);
-    float mlen = slotwidth * 2.0;
-    float px = frac(pos.x / mlen);
-    float py = floor(frac(pos.y / (2.0 * double_slot)) * 2.0 * double_slot);
-    float slot_dark = lerp(1.0 - slotmask, 1.0 - 0.80 * slotmask, mx);
-    float slot = 1.0 + 0.7 * slotmask * (1.0 - mx);
-    
-    if      (py == 0.0                && px <  0.5) slot = slot_dark; 
-    else if (py == double_slot && px >= 0.5) slot = slot_dark;       
-    
-    return slot;
-}
-
-float4x4 contrastMatrix(float contrast)
-{   
-    float t = (1.0 - contrast) / 2.0;
-    
-    return float4x4(contrast, 0,               0,               0,
-                0,               contrast, 0,               0,
-                0,               0,               contrast, 0,
-                t,               t,               t,               1);
-}
-
-float3x3 vign(float l, float2 tex)
-{
-    float2 vpos = tex;
-    vpos *= 1.0 - vpos.xy;
-    
-    float vig = vpos.x * vpos.y * vstr;
-    vig = min(pow(vig, vpower), 1.0); 
-    if (vignette == false) vig = 1.0;
-   
-    return float3x3(vig, 0,   0,
-                0,   vig, 0,
-                0,   0,   vig);
-}
-
-float3 saturation(float3 textureColor)
-{
-    float luminance = length(textureColor.rgb) * 0.5775;
-
-    float3 luminanceWeighting = float3(0.4, 0.5, 0.1);
-    if (luminance < 0.5) luminanceWeighting.rgb = (luminanceWeighting.rgb * luminanceWeighting.rgb) 
-                                                + (luminanceWeighting.rgb * luminanceWeighting.rgb);
-
-    luminance = dot(textureColor.rgb, luminanceWeighting);
-    float3 greyScaleColor = float3(luminance, luminance, luminance);
-
-    float3 res = float3(lerp(greyScaleColor, textureColor.rgb, sat));
-    return res;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-float3 glow0 (float2 texcoord, float3 col)
-{
-
-   // the more quality, the smaller the offset and better quality, less visible glow too
-     float2 size = SourceSize.zw/quality;
-     
-     float3 c01;
-     float3 sum = float3(0.0, 0.0, 0.0);
-   
-   // glow = pixels per axis, the more the slower!
-
-    for (float x = -glow; x <= glow; x = x+1.0)
-     {
-
-   // multiply texture, the more far away the less pronounced
-        float factor = 1.0/glow;
-        for (float y = -glow; y <= glow; y = y+1.0)
-        {
-
-        float2 offset = float2(x, y) * size;
-
-         c01 = tex2D(sBackBuffer, texcoord + offset).rgb*factor; c01 = c01*c01;
-          
-                sum += c01;
-        }
-    }
-  
-    return (glow_str * sum / (glow * glow )) ;
-}
-    
-///////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-float noise(float2 co)
-{
-    return frac(sin(iTimer * dot(co.xy ,float2(12.9898,78.233))) * 43758.5453);
-}
-
-float corner0(float2 coord)
-{
-    coord = (coord - float2(0.5, 0.5)) * 1.0 + float2(0.5, 0.5);
-    coord = min(coord, float2(1.0, 1.0) - coord) * float2(1.0, SourceSize.y / SourceSize.x);
-    
-    float2 cdist = float2(corner, corner);
-    coord = (cdist - min(coord, cdist));
-    float dist = sqrt(dot(coord, coord));
-
-    return clamp((cdist.x - dist) * smoothness, 0.0, 1.0);
-}  
-
-static const float3x3 D65_to_XYZ = float3x3(
-           0.4306190,  0.2220379,  0.0201853,
-           0.3415419,  0.7066384,  0.1295504,
-           0.1783091,  0.0713236,  0.9390944);
-
-static const float3x3 XYZ_to_D65 = float3x3(
-           3.0628971, -0.9692660,  0.0678775,
-          -1.3931791,  1.8760108, -0.2288548,
-          -0.4757517,  0.0415560,  1.0693490);
-           
-static const float3x3 D50_to_XYZ = float3x3(
-           0.4552773,  0.2323025,  0.0145457,
-           0.3675500,  0.7077956,  0.1049154,
-           0.1413926,  0.0599019,  0.7057489);
-           
-static const float3x3 XYZ_to_D50 = float3x3(
-           2.9603944, -0.9787684,  0.0844874,
-          -1.4678519,  1.9161415, -0.2545973,
-          -0.4685105,  0.0334540,  1.4216174);         
-
-
-float4 PS_CRT_CONSUMER(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
-{
-    float2 pos = Warp(vTexCoord.xy);
-    float2 tex_size = SourceSize.xy;  
-
-    float2 pC4 = (pos + 0.5/tex_size);
-    float2 fp = frac(pos * tex_size);
-    if (inter == false && tex_size.y > 400.0){ fp.y = frac(pos.y * tex_size.y*1.0/Downscale);} 
-
-    float4 res = float4(1.0, 1.0, 1.0, 1.0);
-    
-    if (alloff == true) 
-        res = tex2D(sBackBuffer, pC4); 
-    else
-    {
-
-   float2 texel = pos * tex_size;
-   float2 texel_floored = floor(texel);
-
-   float scale = PRE_SCALE;
-   float region_range = 0.5 - 0.5 / scale;
-
-   // Figure out where in the texel to sample to get correct pre-scaled bilinear.
-   // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
-
-   float2 center_dist = fp - 0.5;
-
-   float2 fpp = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
-
-   float2 mod_texel = texel_floored + fpp;
-   float2 coords = mod_texel / SourceSize.xy;
-
-        float3 sample1 = tex2D(sBackBuffer, float2(coords.x + blurx*SourceSize.z, coords.y - blury*SourceSize.w)).rgb;
-        float3 sample2 = tex2D(sBackBuffer, coords).rgb;
-        float3 sample3 = tex2D(sBackBuffer, float2(coords.x - blurx*SourceSize.z, coords.y + blury*SourceSize.w )).rgb;
-        
-        float3 color = float3(sample1.r * 0.5  + sample2.r * 0.5, 
-                          sample1.g * 0.25 + sample2.g * 0.5 + sample3.g * 0.25,
-                          sample2.b * 0.5  + sample3.b * 0.5);
-        if (palette_fix != 0.0) 
-        {
-            if (palette_fix == 1.0) color = color* 1.0667;
-            else if (palette_fix == 2.0) color = color * 2.0;
-        }
-
-        //COLOR TEMPERATURE FROM GUEST.R-DR.VENOM
-        if (WP != 0.0)
-        {
-            float3 warmer = mul(color, D50_to_XYZ);
-            warmer = mul(warmer, XYZ_to_D65); 
-            
-            float3 cooler = mul(color, D65_to_XYZ);
-            cooler = mul(cooler, XYZ_to_D50);
-            
-            float m = abs(WP) / 100.0;
-            float3 comp = (WP < 0.0) ? cooler : warmer;
-            comp = clamp(comp, 0.0, 1.0);   
-            
-            color = float3(lerp(color, comp, m));
-        }
-
-     float3x3 hue = float3x3 (1., rg,  rb,                 //red tint
-                      gr,  1., gb,                  //green tint
-                      br,  bg,  1.);                //blue tint
-
-        color = mul(color, hue);
-
-        color = (2.0*pow(color,float3(2.8, 2.8, 2.8))) - pow(color,float3(3.6, 3.6, 3.6));
-
-        float lum = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
-
-        float f = frac(fp.y -0.5);
-        
-        if (inter == true && tex_size.y > 400.0) color = color; 
-        else
-        {color = color * sw(f,lum) + color * sw (1.0-f,lum);}
-        
-        float lum1 = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
-
-        
-        color *= lerp(mask((vTexCoord * OutputSize.xy), color,lum1), float3(1.0, 1.0, 1.0), lum1*preserve);
-        
-
-        if (slotmask != 0.0) color *= SlotMask((vTexCoord * OutputSize.xy) * 1.0001, color);
-        
-        color *= lerp(brightboost1, brightboost2, max(max(color.r, color.g), color.b));    
-
-    
-
-        color = pow(color,float3(1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT));
-                if (glow_str != 0.0) color += glow0(coords,color);
-
-        if (sat    != 1.0) color  = saturation(color);
-        if (corner != 0.0) color *= corner0(pC4);
-        if (nois   != 0.0) color *= 1.0 + noise(coords * 2.0) / nois;
-
-        color *= lerp(1.0, postbr, lum);
-        res = float4(color, 1.0);
-        if (contrast != 1.0) res = mul(res, contrastMatrix(contrast));
-        if (inter == true && SourceSize.y > 400.0 && frac(iTime) < 0.5) res = res * 0.95;
-        res.rgb = mul(res.rgb, vign(lum, vTexCoord));
-
-    }
-    
-    return res;
-}
-
-
-
-technique CRT_CONSUMER
-{
-   pass
-   {
-   	VertexShader = PostProcessVS;
-   	PixelShader  = PS_CRT_CONSUMER;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-cyclon.fx
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-cyclon.fx
@@ -32,8 +32,11 @@ uniform float SCANLINE <
 	ui_label = "Scanline Weight";
 > = 0.3;

-uniform bool INTERLACE <
-	ui_type = "radio";
+uniform float INTERLACE <
+	ui_type = "drag";
+	ui_min = 0.0;
+	ui_max = 1.0;
+	ui_step = 1.0;
 	ui_label = "Interlacing On/Off";
 > = 1.0;

@@ -61,8 +64,11 @@ uniform float MSIZE <
 	ui_label = "Mask Size";
 > = 1.0;

-uniform bool SLOT <
-	ui_type = "radio";
+uniform float SLOT <
+	ui_type = "drag";
+	ui_min = 0.0;
+	ui_max = 1.0;
+	ui_step = 1.0;
 	ui_label = "Slot Mask On/Off";
 > = 1.0;

@@ -106,8 +112,11 @@ uniform float bogus_geom <
 	ui_label = " [ GEOMETRY SETTINGS ] ";
 > = 0.0;

-uniform bool bzl <
-	ui_type = "radio";
+uniform float bzl <
+	ui_type = "drag";
+	ui_min = 0.0;
+	ui_max = 1.0;
+	ui_step = 1.0;
 	ui_label = "Bezel On/Off";
 > = 1.0;

@@ -139,7 +148,7 @@ uniform float centerx <
 	ui_type = "drag";
 	ui_min = -5.0;
 	ui_max = 5.0;
-	ui_step = 0.05;
+	ui_step = 0.0;
 	ui_label = "Image Center X";
 > = 0.0;

@@ -167,8 +176,11 @@ uniform float WARPY <
 	ui_label = "Curvature Vertical";
 > = 0.01;

-uniform bool vig <
-	ui_type = "radio";
+uniform float vig <
+	ui_type = "drag";
+	ui_min = 0.0;
+	ui_max = 1.0;
+	ui_step = 1.0;
 	ui_label = "Vignette On/Off";
 > = 1.0;

@@ -224,7 +236,7 @@ uniform float BLACK  <
 	ui_type = "drag";
 	ui_min = -0.20;
 	ui_max = 0.20;
-	ui_step = 0.01;
+	ui_step = 0.0;
 	ui_label = "Black Level";
 > = 0.0;

@@ -238,9 +250,9 @@ uniform float RG <

 uniform float RB <
 	ui_type = "drag";
-	ui_min = -0.25;
-	ui_max = 0.25;
-	ui_step = 0.01;
+	ui_min = 0.0;
+	ui_max = -0.25;
+	ui_step = 0.2;
 	ui_label = "Blue <-to-> Red Hue";
 > = 0.0;

@@ -311,10 +323,9 @@ uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel
 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
 uniform float  UpscaleMultiplier < source = "upscale_multiplier"; >;
 uniform float2 ViewportSize < source = "viewportsize"; >;
+
+
 uniform int FrameCount < source = "framecount"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;};
-
 texture tBezel < source = "crt-cyclon/bezel.png"; >
 {
 	Width = BUFFER_WIDTH;
@@ -431,9 +442,8 @@ uniform float2 BufferHeight < source = "bufferheight"; >;

 float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
 {
-    float4 SourceSize = float4(1.0 / NormalizedNativePixelSize, NormalizedNativePixelSize);
+    float4 SourceSize = float4(1.0 / (NormalizedInternalPixelSize * UpscaleMultiplier), NormalizedInternalPixelSize * UpscaleMultiplier);
    float2 OutputSize = ViewportSize;
-
    float2 scale = BufferViewportRatio.xy;
    float2 warpcoords = (vTexCoord-float2(0.5,0.5)) * BufferViewportRatio + float2(0.5,0.5);

@@ -448,7 +458,7 @@ float3x3 hue = float3x3(
    float4 bez = float4(0.0,0.0,0.0,0.0);
 //    if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*SourceSize.xy/OriginalSize.xy*0.97+float2(0.015,0.015));   
 //    if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*scale*0.97+float2(0.015,0.015));   
-    if (bzl == true) bez = tex2D(sBezel,warpcoords*0.97+float2(0.015,0.015));  // This fix Bezel to adjust to Game's aspect ratio. 
+    if (bzl == 1.0) bez = tex2D(sBezel,warpcoords*0.97+float2(0.015,0.015));  // This fix Bezel to adjust to Game's aspect ratio. 

    bez.rgb = lerp(bez.rgb, float3(ambient,ambient,ambient),0.5);

@@ -463,10 +473,10 @@ float3x3 hue = float3x3(
    pos.x = lerp(pos.x, i.x*ps.x, 0.2);

 // Convergence
-    float3  res0 = tex2D(sBackBuffer,pos).rgb;
-    float resr = tex2D(sBackBuffer,pos + dx*CONV_R).r;
-    float resb = tex2D(sBackBuffer,pos + dx*CONV_B).b;
-    float resg = tex2D(sBackBuffer,pos + dx*CONV_G).g;
+    float3  res0 = tex2D(ReShade::BackBuffer,pos).rgb;
+    float resr = tex2D(ReShade::BackBuffer,pos + dx*CONV_R).r;
+    float resb = tex2D(ReShade::BackBuffer,pos + dx*CONV_B).b;
+    float resg = tex2D(ReShade::BackBuffer,pos + dx*CONV_G).g;

    float3 res = float3(  res0.r*(1.0-C_STR) +  resr*C_STR,
                      res0.g*(1.0-C_STR) +  resg*C_STR,
@@ -474,7 +484,7 @@ float3x3 hue = float3x3(
                   );
 // Vignette
    float x = 0.0;
-    if (vig == true){
+    if (vig == 1.0){
    x = vTexCoord.x*scale.x-0.5;
 //    x = vTexCoord.x-0.5;
    x = x*x;}
@@ -498,7 +508,7 @@ float3x3 hue = float3x3(
    {
        s = frac(bpos.y*SourceSize.y/2.0-0.5);
 //        if (INTERLACE == 1.0) s = mod(float(FrameCount),2.0) < 1.0 ? s: s+0.5;
-        if (INTERLACE == true) s = (float(FrameCount) % 2.0) < 1.0 ? s: s+0.5;
+        if (INTERLACE == 1.0) s = (float(FrameCount) % 2.0) < 1.0 ? s: s+0.5;
    }
 // Calculate CRT-Geom scanlines weight and apply
    float weight  = scanlineWeights(s, res, x);
@@ -511,7 +521,7 @@ float3x3 hue = float3x3(
    float CGWG = lerp(Maskl, Maskh, l);
    res *= Mask(xy, CGWG);
 // Apply slot mask on top of Trinitron-like mask
-    if (SLOT == true) res *= lerp(slot(xy/2.0),float3(1.0,1.0,1.0),CGWG);
+    if (SLOT == 1.0) res *= lerp(slot(xy/2.0),float3(1.0,1.0,1.0),CGWG);
    
    if (POTATO == 0.0) res = inv_gamma(res,pwr);
    else {res = sqrt(res); res *= lerp(1.3,1.1,l);}
@@ -526,7 +536,7 @@ float3x3 hue = float3x3(
    res -= float3(BLACK,BLACK,BLACK);
    res *= blck;
 // Apply bezel code, adapted from New-Pixie
-    if (bzl == true)
+    if (bzl >0.0)
    res.rgb = lerp(res.rgb, lerp(max(res.rgb, 0.0), pow( abs(bez.rgb), float3( 1.4,1.4,1.4 ) ), bez.w * bez.w), float3( 1.0,1.0,1.0 ) );


--- a/data/resources/shaders/reshade/Shaders/crt/crt-geo-zfast.fx
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-geo-zfast.fx
@@ -1,150 +0,0 @@
-#include "ReShade.fxh"
-
-/*
-    zfast_crt_geo - A simple, fast CRT shader.
-
-    Copyright (C) 2017 Greg Hogan (SoltanGris42)
-    Copyright (C) 2023 Jose Linares (Dogway)
-
-    This program is free software; you can redistribute it and/or modify it
-    under the terms of the GNU General Public License as published by the Free
-    Software Foundation; either version 2 of the License, or (at your option)
-    any later version.
-
-
-Notes:  This shader does scaling with a weighted linear filter
-        based on the algorithm by Iñigo Quilez here:
-        https://iquilezles.org/articles/texture/
-        but modified to be somewhat sharper. Then a scanline effect that varies
-        based on pixel brightness is applied along with a monochrome aperture mask.
-        This shader runs at ~60fps on the Chromecast HD (10GFlops) on a 1080p display.
-        (https://forums.libretro.com/t/android-googletv-compatible-shaders-nitpicky)
-
-Dogway: I modified zfast_crt.glsl shader to include screen curvature,
-        vignetting, round corners and phosphor*temperature. Horizontal pixel is left out
-        from the Quilez' algo (read above) to provide a more S-Video like horizontal blur.
-        The scanlines and mask are also now performed in the recommended linear light.
-        For this to run smoothly on GPU deprived platforms like the Chromecast and
-        older consoles, I had to remove several parameters and hardcode them into the shader.
-        Another POV is to run the shader on handhelds like the Switch or SteamDeck so they consume less battery.
-
-*/
-
-
-uniform float SCANLINE_WEIGHT <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 15.0;
-    ui_step = 0.5;
-    ui_label = "Scanline Amount";
-> = 7.0;
-
-uniform float MASK_DARK <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.05;
-    ui_label = "Mask Effect Amount";
-> = 0.5;
-
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float BufferWidth < source = "bufferwidth"; >;
-uniform float BufferHeight < source = "bufferheight"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
-
-struct ST_VertexOut
-{
-    float2 invDims : TEXCOORD1;
-};
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_CRT_Geo_zFast(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    vVARS.invDims = NormalizedNativePixelSize;
-}
-
-
-#define MSCL (BufferHeight > 1499.0 ? 0.3333 : 0.5)
-// This compensates the scanline+mask embedded gamma from the beam dynamics
-#define pwr ((1.0/((-0.0325*SCANLINE_WEIGHT+1.0)*(-0.311*MASK_DARK+1.0))-1.2).xxx)
-
-
-
-// NTSC-J (D93) -> Rec709 D65 Joint Matrix (with D93 simulation)
-// This is compensated for a linearization hack (RGB*RGB and then sqrt())
-static const float3x3 P22D93 = float3x3(
-     1.00000, 0.00000, -0.06173,
-     0.07111, 0.96887, -0.01136,
-     0.00000, 0.08197,  1.07280);
-
-
-// Returns gamma corrected output, compensated for scanline+mask embedded gamma
-float3 inv_gamma(float3 col, float3 power)
-{
-    float3 cir  = col-1.0;
-         cir *= cir;
-         col  = lerp(sqrt(col),sqrt(1.0-cir),power);
-    return col;
-}
-
-float2 Warp(float2 pos)
-{
-    pos  = pos*2.0-1.0;
-    pos *= float2(1.0 + (pos.y*pos.y)*0.0276, 1.0 + (pos.x*pos.x)*0.0414);
-    return pos*0.5 + 0.5;
-}
-
-
-float4 PS_CRT_Geo_zFast(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0, in ST_VertexOut vVARS) : SV_Target
-{
-    float2 pos   = vTexCoord;
-    float2 xy     = Warp(pos);
-
-    float2 corn   = min(xy,1.0-xy); // This is used to mask the rounded
-           corn.x = 0.0001/corn.x;  // corners later on
-
-          pos *= (1.0 - pos.xy);
-    float vig   = pos.x * pos.y * 46.0;
-          vig   = min(sqrt(vig), 1.0);
-
-
-    // Of all the pixels that are mapped onto the texel we are
-    // currently rendering, which pixel are we currently rendering?
-    float ratio_scale = xy.y / NormalizedNativePixelSize.y - 0.5;
-    // Snap to the center of the underlying texel.
-    float i = floor(ratio_scale) + 0.5;
-
-    // This is just like "Quilez Scaling" but sharper
-    float f = ratio_scale - i;
-    float Y = f*f;
-    float p = (i + 4.0*Y*f)*vVARS.invDims.y;
-
-    float whichmask = floor(vTexCoord.x*BufferWidth)*(-MSCL);
-    float mask = 1.0 + float(frac(whichmask) < MSCL)*(-MASK_DARK);
-    float3 colour = tex2D(sBackBuffer, float2(xy.x,p)).rgb;
-
-    colour = max(mul(P22D93 * vig, colour*colour), 0.0.xxx);
-
-    float scanLineWeight = (1.5 - SCANLINE_WEIGHT*(Y - Y*Y));
-
-    if (corn.y <= corn.x || corn.x < 0.0001 )
-    colour = 0.0.xxx;
-
-    return float4(inv_gamma(colour.rgb*lerp(scanLineWeight*mask, 1.0, colour.r*0.26667+colour.g*0.26667+colour.b*0.26667),pwr),1.0);
-}
-
-
-
-technique CRT_Geo_zFast
-{
-   pass
-   {
-       VertexShader = VS_CRT_Geo_zFast;
-       PixelShader  = PS_CRT_Geo_zFast;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx
@@ -52,13 +52,16 @@ uniform bool CURVATURE <
    ui_category = "Curvature";
    ui_type = "radio";
    ui_label = "CRTGeom Curvature Toggle";
-> = true;
+> = 1.0;

-uniform bool invert_aspect <
-    ui_type = "radio";
+uniform float invert_aspect <
+    ui_type = "drag";
    ui_category = "Curvature";
+    ui_min = 0.0;
+    ui_max = 1.0;
+    ui_step = 1.0;
    ui_label = "CRTGeom Curvature Aspect Inversion";
-> = false;
+> = 0.0;

 uniform float R <
    ui_type = "drag";
@@ -90,8 +93,8 @@ uniform float cornersmooth <
 uniform float x_tilt <
    ui_type = "drag";
    ui_category = "Curvature";
-    ui_min = -1.0;
-    ui_max = 1.0;
+    ui_min = -0.5;
+    ui_max = 0.5;
    ui_step = 0.05;
    ui_label = "CRTGeom Horizontal Tilt";
 > = 0.0;
@@ -99,8 +102,8 @@ uniform float x_tilt <
 uniform float y_tilt <
    ui_type = "drag";
    ui_category = "Curvature";
-    ui_min = -1.0;
-    ui_max = 1.0;
+    ui_min = -0.5;
+    ui_max = 0.5;
    ui_step = 0.05;
    ui_label = "CRTGeom Vertical Tilt";
 > = 0.0;
@@ -121,22 +124,6 @@ uniform float overscan_y <
    ui_label = "CRTGeom Vert. Overscan %";
 > = 100.0;

-uniform float centerx <
-    ui_type = "drag";
-    ui_min = -100.0;
-    ui_max = 100.0;
-    ui_step = 0.1;
-    ui_label = "Image Center X";
-> = 0.00;
-
-uniform float centery <
-    ui_type = "drag";
-    ui_min = -100.0;
-    ui_max = 100.0;
-    ui_step = 0.1;
-    ui_label = "Image Center Y";
-> = 0.00;
-
 uniform float DOTMASK <
    ui_type = "drag";
    ui_min = 0.0;
@@ -161,10 +148,13 @@ uniform float scanline_weight <
    ui_label = "CRTGeom Scanline Weight";
 > = 0.3;

-uniform bool vertical_scanlines <
-    ui_type = "radio";
+uniform float vertical_scanlines <
+    ui_type = "drag";
+    ui_min = 0.0;
+    ui_max = 1.0;
+    ui_step = 1.0;
    ui_label = "CRTGeom Vertical Scanlines";
-> = false;
+> = 0.0;

 uniform float lum <
    ui_type = "drag";
@@ -185,17 +175,14 @@ uniform float interlace_detect <


 uniform float  FrameCount < source = "framecount"; >;
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
+uniform float2 BufferViewportRatio < source = "buffer_to_viewport_ratio"; >;
 uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
 uniform float2 NativePixelSize < source = "native_pixel_size"; >;
 uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
 uniform float  UpscaleMultiplier < source = "upscale_multiplier"; >;
 uniform float2 ViewportSize < source = "viewportsize"; >;
-uniform float  ViewportWidth < source = "viewportwidth"; >;
-uniform float  ViewportHeight < source = "viewportheight"; >;

-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};

 // Comment the next line to disable interpolation in linear gamma (and
 // gain speed).
@@ -212,13 +199,13 @@ sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BO
 #define PI 3.141592653589

 #ifdef LINEAR_PROCESSING
-#       define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
+#       define TEX2D(c) pow(tex2D(ReShade::BackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
 #else
-#       define TEX2D(c) tex2D(sBackBuffer, (c))
+#       define TEX2D(c) tex2D(ReShade::BackBuffer, (c))
 #endif

 // aspect ratio
-#define aspect     (invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
+#define aspect     (invert_aspect>0.5?float2(0.75,1.0):float2(1.0,0.75))
 #define overscan   (float2(1.01,1.01));


@@ -285,15 +272,6 @@ float3 vs_maxscale(float2 sinangle, float2 cosangle)
    return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
 }

-// Code snippet borrowed from crt-cyclon. (credits to DariusG)
-float2 Warp(float2 pos)
-{
-    pos = pos*2.0 - 1.0;
-    pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0);
-    pos = pos*0.5 + 0.5;
-
-    return pos;
-}


 // Vertex shader generating a triangle covering the entire screen
@@ -303,11 +281,8 @@ void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, ou
    texcoord.y = (id == 1) ? 2.0 : 0.0;
    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);

-    // center screen
-    texcoord = Warp(texcoord - float2(centerx,centery)/100.0);
-
    float2 SourceSize = 1.0/NormalizedNativePixelSize;
-    float2 OutputSize = ViewportSize*BufferToViewportRatio;
+    float2 OutputSize = ViewportSize*BufferViewportRatio;

    // Precalculate a bunch of useful values we'll need in the fragment
    // shader.
@@ -315,7 +290,7 @@ void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, ou
    vVARS.cosangle    = cos(float2(x_tilt, y_tilt));
    vVARS.stretch     = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
    
-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
    {
       vVARS.TextureSize = float2(SHARPER * SourceSize.x, SourceSize.y);
       
@@ -346,7 +321,7 @@ float intersect(float2 xy, float2 sinangle, float2 cosangle)
    float A = dot(xy,xy) + d*d;
    float B, C;

-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
    {
       B = 2.0*(R*(dot(xy,sinangle) - d*cosangle.x*cosangle.y) - d*d);
       C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
@@ -383,7 +358,7 @@ float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
    float x = 1.0 - cos(r/R);
    float D;
    
-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
      D = d/R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
    else
      D = d/R + x*cosangle.y*cosangle.x + dot(uv,sinangle);
@@ -393,7 +368,7 @@ float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)

 float3 maxscale(float2 sinangle, float2 cosangle)
 {
-   if(vertical_scanlines == false)
+   if(vertical_scanlines < 0.5)
   {
       float2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y), sinangle, cosangle);
       float2 a = float2(0.5, 0.5)*aspect;
@@ -460,12 +435,13 @@ float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)

 float corner(float2 coord)
 {
+    coord = (coord - float2(0.5, 0.5)) * float2(overscan_x / 100.0, overscan_y / 100.0) + float2(0.5, 0.5);
    coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
    float2 cdist = float2(cornersize, cornersize);
    coord = (cdist - min(coord, cdist));
    float dist = sqrt(dot(coord, coord));
    
-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
      return clamp((cdist.x - dist)*cornersmooth, 0.0, 1.0);
    else
      return clamp((cdist.y - dist)*cornersmooth, 0.0, 1.0);
@@ -476,7 +452,6 @@ float fwidth(float value){
 }


-
 float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
 {
    // Here's a helpful diagram to keep in mind while trying to
@@ -502,18 +477,17 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_

    // Texture coordinates of the texel containing the active pixel.
    float2 xy;
-
-    if (CURVATURE == true)
+    if (CURVATURE > 0.5)
      xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
    else
      xy = vTexCoord;

-    float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5));
+    float cval = corner(xy);

    // Of all the pixels that are mapped onto the texel we are
    // currently rendering, which pixel are we currently rendering?
   float2 ilvec;
-   if(vertical_scanlines == false)
+   if(vertical_scanlines < 0.5)
      ilvec = float2(0.0, vVARS.ilfac.y * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0);
   else
      ilvec = float2(vVARS.ilfac.x * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0, 0.0);
@@ -528,7 +502,7 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
    // of various neighbour texels in a scanline on the current
    // pixel.
    float4 coeffs;
-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
      coeffs = PI * float4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);
    else
      coeffs = PI * float4(1.0 + uv_ratio.y, uv_ratio.y, 1.0 - uv_ratio.y, 2.0 - uv_ratio.y);
@@ -546,7 +520,7 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
    // scanlines at the horizontal location of the current pixel,
    // using the Lanczos coefficients above.
    float4 col, col2;
-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
    {
       col = clamp(
           mul(coeffs, float4x4(
@@ -595,7 +569,7 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
    // Calculate the influence of the current and next scanlines on
    // the current pixel.
    float4 weights, weights2;
-    if(vertical_scanlines == false)
+    if(vertical_scanlines < 0.5)
    {
       weights  = scanlineWeights(uv_ratio.y, col);
       weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);
--- a/data/resources/shaders/reshade/Shaders/crt/crt-hyllian-sinc.fx
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-hyllian-sinc.fx
@@ -1,375 +0,0 @@
-#include "ReShade.fxh"
-
-/*
-    Hyllian's CRT-sinc Shader
-
-    Copyright (C) 2011-2024 Hyllian
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to deal
-    in the Software without restriction, including without limitation the rights
-    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-    copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be included in
-    all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-    THE SOFTWARE.
-*/
-
-
-
-uniform int HFILTER_PROFILE <
-    ui_type = "combo";
-    ui_items = "Custom\0Composite\0Composite Soft\0";
-    ui_label = "H-FILTER PROFILE";
-> = 0;
-
-uniform float SHP <
-    ui_type = "drag";
-    ui_min = 0.50;
-    ui_max = 1.0;
-    ui_step = 0.01;
-    ui_label = "CUSTOM H-FILTER SHARPNESS";
-> = 1.0;
-
-uniform bool CRT_ANTI_RINGING <
-    ui_type = "radio";
-    ui_label = "ANTI RINGING";
-> = true;
-
-uniform bool SHARPNESS_HACK <
-    ui_type = "radio";
-    ui_label = "SHARPNESS HACK";
-> = false;
-
-uniform float CRT_InputGamma <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 5.0;
-    ui_step = 0.1;
-    ui_label = "INPUT GAMMA";
-> = 2.4;
-
-uniform float CRT_OutputGamma <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 5.0;
-    ui_step = 0.05;
-    ui_label = "OUTPUT GAMMA";
-> = 2.2;
-
-uniform int MASK_LAYOUT <
-    ui_type = "combo";
-    ui_items = "0-Off\0"
-               "1-Aperture Classic\0""2-Aperture1 RGB 1080p\0""3-Aperture2 RGB 1080p\0""4-Aperture1 RGB 4k\0""5-Aperture2 RGB 4k\0""6-Aperture3 RGB 4k\0"
-               "7-Shadow Classic\0""8-Shadow1 1080p\0""9-Shadow2 1080p\0""10-Shadow1 4k\0"
-               "11-Slot1 1080p\0""12-Slot2 1080p\0""13-Slot1 4k\0""14-Slot1 4k\0""15-Slot1 8k\0";
-    ui_category = "CRT Mask";
-    ui_label = "MASK LAYOUT";
-> = 1;
-
-uniform int MONITOR_SUBPIXELS <
-    ui_type = "combo";
-    ui_items = "RGB\0BGR\0";
-    ui_category = "CRT Mask";
-    ui_label = "MONITOR SUBPIXELS LAYOUT";
-> = 0;
-
-uniform float BRIGHTBOOST <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 3.0;
-    ui_step = 0.05;
-    ui_label = "BRIGHTNESS BOOST";
-> = 1.0;
-
-uniform float BEAM_MIN_WIDTH <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.01;
-    ui_label = "MIN BEAM WIDTH";
-> = 0.86;
-
-uniform float BEAM_MAX_WIDTH <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.01;
-    ui_label = "MAX BEAM WIDTH";
-> = 1.0;
-
-uniform float SCANLINES_STRENGTH <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.01;
-    ui_label = "SCANLINES STRENGTH";
-> = 0.72;
-
-uniform int SCANLINES_SHAPE <
-    ui_type = "combo";
-    ui_items = "Sinc\0Gaussian\0";
-    ui_label = "SCANLINES SHAPE";
-> = 1.0;
-
-uniform float SCANLINES_CUTOFF <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1000.0;
-    ui_step = 1.0;
-    ui_label = "SCANLINES CUTOFF";
-    ui_tooltip = "Max vertical native resolution above which scanlines are disabled.";
-> = 390.0;
-
-uniform bool SCANLINES_HIRES <
-    ui_type = "radio";
-    ui_label = "HIGH RESOLUTION SCANLINES";
-> = false;
-
-uniform float POST_BRIGHTNESS <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 3.0;
-    ui_step = 0.05;
-    ui_label = "POST-BRIGHTNESS";
-> = 1.00;
-
-uniform bool VSCANLINES <
-    ui_type = "radio";
-    ui_label = "VERTICAL SCANLINES";
-> = false;
-
-
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float  BufferWidth               < source = "bufferwidth"; >;
-uniform float  BufferHeight              < source = "bufferheight"; >;
-uniform float2 BufferToViewportRatio     < source = "buffer_to_viewport_ratio"; >;
-uniform float2 ViewportSize              < source = "viewportsize"; >;
-uniform float  ViewportWidth             < source = "viewportwidth"; >;
-uniform float  ViewportHeight            < source = "viewportheight"; >;
-uniform float  UpscaleMultiplier         < source = "upscale_multiplier"; >;
-
-
-#include "../misc/include/mask.fxh"
-#include "../misc/include/geom.fxh"
-
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
-
-texture2D tBackBufferLinear{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D sBackBufferLinear{Texture=tBackBufferLinear;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-#define GAMMA_IN(color)    pow(color, float3(CRT_InputGamma, CRT_InputGamma, CRT_InputGamma))
-#define GAMMA_OUT(color)   pow(color, float3(1.0 / CRT_OutputGamma, 1.0 / CRT_OutputGamma, 1.0 / CRT_OutputGamma))
-
-#define SCANLINES_STRENGTH (-0.16*SCANLINES_SHAPE+SCANLINES_STRENGTH)
-#define CORNER_SMOOTHNESS (80.0*pow(CORNER_SMOOTHNESS,10.0))
-
-#define pi    3.1415926535897932384626433832795
-
-#define RADIUS  2.0  // No need for more than 2-taps
-
-float2 get_hfilter_profile()
-{
-    float2 hf_profile = float2(SHP, RADIUS);
-
-    if      (HFILTER_PROFILE == 1) hf_profile = float2(0.78, 2.0); // SNES composite
-    else if (HFILTER_PROFILE == 2) hf_profile = float2(0.65, 2.0); // Genesis composite
-
-    return hf_profile;
-}
-
-/* Some window functions for tests. */
-float4 sinc(float4 x)              { return sin(pi*x)*(1.0/(pi*x+0.001.xxxx)); }
-float4 hann_window(float4 x)       { return 0.5 * ( 1.0 - cos( 0.5 * pi * ( x + 2.0 ) ) ); }
-float4 blackman_window(float4 x)   { return 0.42 - 0.5*cos(0.5*pi*(x+2.0)) + 0.08*cos(pi*(x+2.0)); }
-float4 lanczos(float4 x, float a)  { return sinc(x) * sinc(x / a); }
-float4 blackman(float4 x, float a) { return sinc(x) * blackman_window(x); }
-float4 hann(float4 x, float a)     { return sinc(x) * hann_window(x); }
-
-float4 resampler4(float4 x, float2 hfp)
-{
-    return blackman(x * hfp.x, hfp.y);
-}
-
-
-#define wa    (0.5*pi)
-#define wb    (pi)
-
-float3 resampler3(float3 x)
-{
-    float3 res;
-
-    res.x = (x.x<=0.001) ?  1.0  :  sin(x.x*wa)*sin(x.x*wb)/(wa*wb*x.x*x.x);
-    res.y = (x.y<=0.001) ?  1.0  :  sin(x.y*wa)*sin(x.y*wb)/(wa*wb*x.y*x.y);
-    res.z = (x.z<=0.001) ?  1.0  :  sin(x.z*wa)*sin(x.z*wb)/(wa*wb*x.z*x.z);
-
-    return res;
-}
-
-float3 get_scanlines(float3 d0, float3 d1, float3 color0, float3 color1)
-{
-    if (SCANLINES_SHAPE > 0.5) {
-        d0 = exp(-16.0*d0*d0);
-        d1 = exp(-16.0*d1*d1);
-    }
-    else {
-        d0 = clamp(2.0*d0, 0.0, 1.0);
-        d1 = clamp(2.0*d1, 0.0, 1.0);
-        d0 = resampler3(d0);
-        d1 = resampler3(d1);
-    }
-
-    return (BRIGHTBOOST*(color0*d0+color1*d1));
-}
-
-float4 PS_BackBufferLinear(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
-{
-//    float2 tc = (floor(vTexCoord / NormalizedNativePixelSize) + 0.5.xx) * NormalizedNativePixelSize;
-
-    return float4(GAMMA_IN(tex2D(sBackBuffer, vTexCoord).rgb), 1.0);
-}
-
-struct ST_VertexOut
-{
-    float2 sinangle    : TEXCOORD1;
-    float2 cosangle    : TEXCOORD2;
-    float3 stretch     : TEXCOORD3;
-    float2 TextureSize : TEXCOORD4;
-};
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    // Screen centering
-    texcoord = texcoord - float2(centerx,centery)/100.0;
-
-    float2 SourceSize = 1.0/NormalizedNativePixelSize;
-    float shp_hack = 1.0 + float(SHARPNESS_HACK);
-
- 
-    // Precalculate a bunch of useful values we'll need in the fragment
-    // shader.
-    vVARS.sinangle    = sin(float2(geom_x_tilt, geom_y_tilt));
-    vVARS.cosangle    = cos(float2(geom_x_tilt, geom_y_tilt));
-    vVARS.stretch     = maxscale(vVARS.sinangle, vVARS.cosangle);
-    vVARS.TextureSize = lerp(float2(shp_hack*SourceSize.x, SourceSize.y), float2(SourceSize.x, shp_hack*SourceSize.y), VSCANLINES);
-}
-
-
-float4 PS_CRT_Hyllian(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0, in ST_VertexOut vVARS) : SV_Target
-{
-    float2 OutputSize = float2(BufferWidth, BufferHeight);
-
-    float2 TextureSize = vVARS.TextureSize;
-
-    float2 dx = lerp(float2(1.0/TextureSize.x, 0.0), float2(0.0, 1.0/TextureSize.y), VSCANLINES);
-    float2 dy = lerp(float2(0.0, 1.0/TextureSize.y), float2(1.0/TextureSize.x, 0.0), VSCANLINES);
-
-    // Texture coordinates of the texel containing the active pixel.
-    float2 WarpedTexCoord = (geom_curvature == true) ? transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch) : vTexCoord;
-
-    float cval = corner((WarpedTexCoord-0.5.xx) * BufferToViewportRatio + 0.5.xx);
-
-    float2 pix_coord = WarpedTexCoord*TextureSize - 0.5.xx;
-  
-    float2 tc = ( (SCANLINES_HIRES == true) ? (lerp(float2(floor(pix_coord.x), pix_coord.y), float2(pix_coord.x, floor(pix_coord.y)), VSCANLINES) + float2(0.5, 0.5)) : (floor(pix_coord) + float2(0.5, 0.5)) )/TextureSize;
-
-    float2 fp = lerp(frac(pix_coord), frac(pix_coord.yx), VSCANLINES);
-
-    float3 c00 = tex2D(sBackBufferLinear, tc     - dx).xyz;
-    float3 c01 = tex2D(sBackBufferLinear, tc         ).xyz;
-    float3 c02 = tex2D(sBackBufferLinear, tc     + dx).xyz;
-    float3 c03 = tex2D(sBackBufferLinear, tc + 2.0*dx).xyz;
-
-    float3 c10, c11, c12, c13;
-
-    if (SCANLINES_HIRES == false)
-    {
-        c10 = tex2D(sBackBufferLinear, tc     - dx + dy).xyz;
-        c11 = tex2D(sBackBufferLinear, tc          + dy).xyz;
-        c12 = tex2D(sBackBufferLinear, tc     + dx + dy).xyz;
-        c13 = tex2D(sBackBufferLinear, tc + 2.0*dx + dy).xyz;
-    }
-    else { c10 = c00; c11 = c01; c12 = c02; c13 = c03;}
-
-    float4x3 color_matrix0 = float4x3(c00, c01, c02, c03);
-    float4x3 color_matrix1 = float4x3(c10, c11, c12, c13);
-
-    float2 hfp = get_hfilter_profile();
-
-    float4 weights = resampler4(float4(1.0+fp.x, fp.x, 1.0-fp.x, 2.0-fp.x), hfp);
-
-    float3 color0   = mul(weights, color_matrix0)/dot(weights, 1.0.xxxx);
-    float3 color1   = mul(weights, color_matrix1)/dot(weights, 1.0.xxxx);
-
-    // Get min/max samples
-    float3 min_sample0 = min(c01,c02);
-    float3 max_sample0 = max(c01,c02);
-    float3 min_sample1 = min(c11,c12);
-    float3 max_sample1 = max(c11,c12);
-  
-    // Anti-ringing
-    float3 aux = color0;
-    color0 = clamp(color0, min_sample0, max_sample0);
-    color0 = lerp(aux, color0, CRT_ANTI_RINGING);
-    aux = color1;
-    color1 = clamp(color1, min_sample1, max_sample1);
-    color1 = lerp(aux, color1, CRT_ANTI_RINGING);
-
-    float pos0 = fp.y;
-    float pos1 = 1 - fp.y;
-
-    float3 lum0 = lerp(BEAM_MIN_WIDTH.xxx, BEAM_MAX_WIDTH.xxx, color0);
-    float3 lum1 = lerp(BEAM_MIN_WIDTH.xxx, BEAM_MAX_WIDTH.xxx, color1);
-
-    float3 d0 = SCANLINES_STRENGTH*pos0/(lum0*lum0+0.0000001.xxx);
-    float3 d1 = SCANLINES_STRENGTH*pos1/(lum1*lum1+0.0000001.xxx);
-
-    float3 color  = (vVARS.TextureSize.y <= SCANLINES_CUTOFF) ? get_scanlines(d0, d1, color0, color1) : tex2D(sBackBufferLinear, WarpedTexCoord.xy).xyz;
-
-    color *=  BRIGHTBOOST;
-
-    color  = GAMMA_OUT(color);
-
-    float2 mask_coords =vTexCoord.xy * OutputSize.xy;
-
-    mask_coords = lerp(mask_coords.xy, mask_coords.yx, VSCANLINES);
-
-    color.rgb*=GAMMA_OUT(mask_weights(mask_coords, MASK_LAYOUT, MONITOR_SUBPIXELS, MASK_DARK_STRENGTH, MASK_LIGHT_STRENGTH));
-    
-    float4 res = float4(POST_BRIGHTNESS*color, 1.0);
-
-    res.rgb = res.rgb * cval.xxx;
-
-    return float4(res.rgb, 1.0);
-}
-
-technique CRT_Hyllian
-{
-   pass
-   {
-       VertexShader = PostProcessVS;
-       PixelShader  = PS_BackBufferLinear;
-       RenderTarget = tBackBufferLinear;
-   }
-   pass
-   {
-       VertexShader = VS_CRT_Geom;
-       PixelShader  = PS_CRT_Hyllian;
-   }
-}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale.fx
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale.fx
@@ -1,521 +0,0 @@
-#include "ReShade.fxh"
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-//  Ported to Duckstation (ReShade specs) by Hyllian (2024).
-
-//  Set shader params for all passes here:
-
-uniform float crt_gamma <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 5.0;
-    ui_step = 0.025;
-    ui_label = "Simulated CRT Gamma";
-    ui_category = "Display Settings";
-> = 2.5;
-
-uniform float lcd_gamma <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 5.0;
-    ui_step = 0.025;
-    ui_label = "Your Display Gamma";
-    ui_category = "Display Settings";
-> = 2.2;
-
-uniform float levels_contrast <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 4.0;
-    ui_step = 0.015625;
-    ui_label = "Contrast";
-    ui_category = "Display Settings";
-> = 1.0;
-
-uniform float halation_weight <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.005;
-    ui_label = "Halation Weight";
-    ui_category = "Effects";
-> = 0.0;
-
-uniform float diffusion_weight <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.005;
-    ui_label = "Diffusion Weight";
-    ui_category = "Effects";
-> = 0.075;
-
-uniform float bloom_underestimate_levels <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 5.0;
-    ui_step = 0.01;
-    ui_label = "Bloom - Underestimate Levels";
-    ui_category = "Effects";
-> = 0.8;
-
-uniform float bloom_excess <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.005;
-    ui_label = "Bloom - Excess";
-    ui_category = "Effects";
-> = 0.0;
-
-uniform float beam_min_sigma <
-    ui_type = "drag";
-    ui_min = 0.005;
-    ui_max = 1.0;
-    ui_step = 0.005;
-    ui_label = "Min Sigma";
-    ui_category = "Beam Dynamics";
-> = 0.02;
-
-uniform float beam_max_sigma <
-    ui_type = "drag";
-    ui_min = 0.005;
-    ui_max = 1.0;
-    ui_step = 0.005;
-    ui_label = "Max Sigma";
-    ui_category = "Beam Dynamics";
-> = 0.3;
-
-uniform float beam_spot_power <
-    ui_type = "drag";
-    ui_min = 0.01;
-    ui_max = 16.0;
-    ui_step = 0.01;
-    ui_label = "Spot Power";
-    ui_category = "Beam Dynamics";
-> = 0.33;
-
-uniform float beam_min_shape <
-    ui_type = "drag";
-    ui_min = 2.0;
-    ui_max = 32.0;
-    ui_step = 0.1;
-    ui_label = "Min Shape";
-    ui_category = "Beam Dynamics";
-> = 2.0;
-
-uniform float beam_max_shape <
-    ui_type = "drag";
-    ui_min = 2.0;
-    ui_max = 32.0;
-    ui_step = 0.1;
-    ui_label = "Max Shape";
-    ui_category = "Beam Dynamics";
-> = 4.0;
-
-uniform float beam_shape_power <
-    ui_type = "drag";
-    ui_min = 0.01;
-    ui_max = 16.0;
-    ui_step = 0.01;
-    ui_label = "Shape Power";
-    ui_category = "Beam Dynamics";
-> = 0.25;
-
-uniform int beam_horiz_filter <
-    ui_type = "combo";
-    ui_items = "Quilez\0Gaussian\0Lanczos\0";
-    ui_label = "Horizontal Filter";
-    ui_category = "Beam Dynamics";
-> = 0;
-
-uniform float beam_horiz_sigma <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 0.67;
-    ui_step = 0.005;
-    ui_label = "Horizontal Sigma";
-    ui_category = "Beam Dynamics";
-> = 0.35;
-
-uniform float beam_horiz_linear_rgb_weight <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.01;
-    ui_label = "Horiz Linear RGB Weight";
-    ui_category = "Beam Dynamics";
-> = 1.0;
-
-uniform float convergence_offset_x_r <
-    ui_type = "drag";
-    ui_min = -4.0;
-    ui_max = 4.0;
-    ui_step = 0.05;
-    ui_label = "Offset X Red";
-    ui_category = "Convergence";
-> = 0.0;
-
-uniform float convergence_offset_x_g <
-    ui_type = "drag";
-    ui_min = -4.0;
-    ui_max = 4.0;
-    ui_step = 0.05;
-    ui_label = "Offset X Green";
-    ui_category = "Convergence";
-> = 0.0;
-
-uniform float convergence_offset_x_b <
-    ui_type = "drag";
-    ui_min = -4.0;
-    ui_max = 4.0;
-    ui_step = 0.05;
-    ui_label = "Offset X Blue";
-    ui_category = "Convergence";
-> = 0.0;
-
-uniform float convergence_offset_y_r <
-    ui_type = "drag";
-    ui_min = -2.0;
-    ui_max = 2.0;
-    ui_step = 0.05;
-    ui_label = "Offset Y Red";
-    ui_category = "Convergence";
-> = 0.0;
-
-uniform float convergence_offset_y_g <
-    ui_type = "drag";
-    ui_min = -2.0;
-    ui_max = 2.0;
-    ui_step = 0.05;
-    ui_label = "Offset Y Green";
-    ui_category = "Convergence";
-> = 0.0;
-
-uniform float convergence_offset_y_b <
-    ui_type = "drag";
-    ui_min = -2.0;
-    ui_max = 2.0;
-    ui_step = 0.05;
-    ui_label = "Offset Y Blue";
-    ui_category = "Convergence";
-> = 0.0;
-
-uniform int mask_type <
-    ui_type = "combo";
-    ui_items = "Aperture Grille\0Slot Mask\0Shadow Mask\0";
-    ui_label = "Type";
-    ui_category = "Mask";
-> = 0;
-
-uniform float mask_sample_mode_desired <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 2.0;
-    ui_step = 1.;
-    ui_label = "Sample Mode";
-    ui_category = "Mask";
-> = 0.0;
-
-uniform float mask_specify_num_triads <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 1.0;
-    ui_label = "Specify Number of Triads";
-    ui_category = "Mask";
-> = 0.0;
-
-uniform float mask_triad_size_desired <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 18.0;
-    ui_step = 0.125;
-    ui_label = "Triad Size Desired";
-    ui_category = "Mask";
-> = 3.0;
-
-uniform float mask_num_triads_desired <
-    ui_type = "drag";
-    ui_min = 342.0;
-    ui_max = 1920.0;
-    ui_step = 1.0;
-    ui_label = "Number of Triads Desired";
-    ui_category = "Mask";
-> = 480.0;
-
-uniform bool interlace_detect <
-    ui_type = "radio";
-    ui_label = "Enable Interlacing Detection";
-    ui_category = "Interlacing";
-> = true;
-
-uniform bool interlace_bff <
-    ui_type = "radio";
-    ui_label = "Bottom Field First";
-    ui_category = "Interlacing";
-> = false;
-
-uniform bool interlace_1080i <
-    ui_type = "radio";
-    ui_label = "Detect 1080i";
-    ui_category = "Interlacing";
-> = false;
-
-
-uniform float  FrameCount < source = "framecount"; >;
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
-uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
-uniform float2 NativePixelSize < source = "native_pixel_size"; >;
-uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float  UpscaleMultiplier < source = "upscale_multiplier"; >;
-uniform float2 ViewportSize < source = "viewportsize"; >;
-uniform float  ViewportWidth < source = "viewportwidth"; >;
-uniform float  ViewportHeight < source = "viewportheight"; >;
-
-#include "../misc/include/geom.fxh"
-
-#define VIEWPORT_SIZE (ViewportSize*BufferToViewportRatio)
-#define TEXTURE_SIZE  (1.0/NormalizedNativePixelSize)
-
-#define ORIG_LINEARIZED_texture_size    TEXTURE_SIZE
-#define VERTICAL_SCANLINES_texture_size TEXTURE_SIZE
-#define BLOOM_APPROX_texture_size       TEXTURE_SIZE
-#define BLUR9FAST_VERTICAL_texture_size TEXTURE_SIZE
-#define HALATION_BLUR_texture_size      TEXTURE_SIZE
-#define MASK_RESIZE_VERT_texture_size   TEXTURE_SIZE
-#define MASK_RESIZE_texture_size        float2(64.0,0.0625*((VIEWPORT_SIZE).y))
-#define MASKED_SCANLINES_texture_size   (0.0625*VIEWPORT_SIZE)
-#define BRIGHTPASS_texture_size         VIEWPORT_SIZE
-#define BLOOM_VERTICAL_texture_size     VIEWPORT_SIZE
-#define BLOOM_HORIZONTAL_texture_size   VIEWPORT_SIZE
-
-#define ORIG_LINEARIZED_video_size      ORIG_LINEARIZED_texture_size
-#define VERTICAL_SCANLINES_video_size   VERTICAL_SCANLINES_texture_size
-#define BLOOM_APPROX_video_size         BLOOM_APPROX_texture_size
-#define BLUR9FAST_VERTICAL_video_size   BLUR9FAST_VERTICAL_texture_size
-#define HALATION_BLUR_video_size        HALATION_BLUR_texture_size
-#define MASK_RESIZE_VERT_video_size     MASK_RESIZE_VERT_texture_size
-#define MASK_RESIZE_video_size          MASK_RESIZE_texture_size
-#define MASKED_SCANLINES_video_size     MASKED_SCANLINES_texture_size
-#define BRIGHTPASS_video_size           BRIGHTPASS_texture_size
-#define BLOOM_VERTICAL_video_size       BLOOM_VERTICAL_texture_size
-#define BLOOM_HORIZONTAL_video_size     BLOOM_HORIZONTAL_texture_size
-
-#define video_size texture_size
-
-
-texture2D tmask_grille_texture_small < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
-texture2D tmask_slot_texture_small < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
-texture2D tmask_shadow_texture_small < source = "crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
-
-texture2D tmask_grille_texture_large < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
-texture2D tmask_slot_texture_large < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
-texture2D tmask_shadow_texture_large < source = "crt-royale/TileableLinearShadowMaskEDP.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
-
-sampler2D mask_grille_texture_small { Texture = tmask_grille_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
-sampler2D mask_slot_texture_small { Texture = tmask_slot_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
-sampler2D mask_shadow_texture_small { Texture = tmask_shadow_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
-
-sampler2D mask_grille_texture_large { Texture = tmask_grille_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
-sampler2D mask_slot_texture_large { Texture = tmask_slot_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
-sampler2D mask_shadow_texture_large { Texture = tmask_shadow_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
-
-
-#ifndef DEBUG_PASSES
-    #define DEBUG_PASSES 11
-#endif
-
-
-
-texture2D tORIG_LINEARIZED{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D ORIG_LINEARIZED{Texture=tORIG_LINEARIZED;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-
-#if (DEBUG_PASSES > 1)
-texture2D tVERTICAL_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D VERTICAL_SCANLINES{Texture=tVERTICAL_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-#if (DEBUG_PASSES > 2)
-texture2D tBLOOM_APPROX{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D BLOOM_APPROX{Texture=tBLOOM_APPROX;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-
-#if (DEBUG_PASSES > 3)
-// Need checking if it's really necessary to rendertarget.
-texture2D tBLUR9FAST_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D BLUR9FAST_VERTICAL{Texture=tBLUR9FAST_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-#if (DEBUG_PASSES > 4)
-
-texture2D tHALATION_BLUR{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D HALATION_BLUR{Texture=tHALATION_BLUR;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-#if (DEBUG_PASSES > 5)
-
-texture2D tMASK_RESIZE_VERTICAL{Width=64.0;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;};
-sampler2D MASK_RESIZE_VERTICAL{Texture=tMASK_RESIZE_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
-#endif
-#if (DEBUG_PASSES > 6)
-
-texture2D tMASK_RESIZE{Width=BUFFER_WIDTH*0.0625;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;};
-sampler2D MASK_RESIZE{Texture=tMASK_RESIZE;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
-#endif
-#if (DEBUG_PASSES > 7)
-
-texture2D tMASKED_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D MASKED_SCANLINES{Texture=tMASKED_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-#if (DEBUG_PASSES > 8)
-
-texture2D tBRIGHTPASS{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D BRIGHTPASS{Texture=tBRIGHTPASS;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-
-#if (DEBUG_PASSES > 9)
-texture2D tBLOOM_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D BLOOM_VERTICAL{Texture=tBLOOM_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-#endif
-
-
-
-#include "crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh"
-
-#if (DEBUG_PASSES > 1)
-#include "crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh"
-#endif
-#if (DEBUG_PASSES > 2)
-#include "crt-royale/src/crt-royale-bloom-approx.fxh"
-#endif
-#if (DEBUG_PASSES > 3)
-#include "crt-royale/src/blur9fast-vertical.fxh"
-#endif
-#if (DEBUG_PASSES > 4)
-#include "crt-royale/src/blur9fast-horizontal.fxh"
-#endif
-#if (DEBUG_PASSES > 5)
-#include "crt-royale/src/crt-royale-mask-resize-vertical.fxh"
-#endif
-#if (DEBUG_PASSES > 6)
-#include "crt-royale/src/crt-royale-mask-resize-horizontal.fxh"
-#endif
-#if (DEBUG_PASSES > 7)
-#include "crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh"
-#endif
-#if (DEBUG_PASSES > 8)
-#include "crt-royale/src/crt-royale-brightpass.fxh"
-#endif
-#if (DEBUG_PASSES > 9)
-#include "crt-royale/src/crt-royale-bloom-vertical.fxh"
-#endif
-#if (DEBUG_PASSES > 10)
-#include "crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh"
-#endif
-
-
-technique CRT_Royale
-{
-   pass
-   {
-       VertexShader = VS_Linearize;
-       PixelShader  = PS_Linearize;
-       RenderTarget = tORIG_LINEARIZED;
-   }
-#if (DEBUG_PASSES > 1)
-   pass
-   {
-       VertexShader = VS_Scanlines_Vertical_Interlacing;
-       PixelShader  = PS_Scanlines_Vertical_Interlacing;
-       RenderTarget = tVERTICAL_SCANLINES;
-   }
-#endif
-#if (DEBUG_PASSES > 2)
-   pass
-   {
-       VertexShader = VS_Bloom_Approx;
-       PixelShader  = PS_Bloom_Approx;
-       RenderTarget = tBLOOM_APPROX;
-   }
-#endif
-#if (DEBUG_PASSES > 3)
-   pass
-   {
-       VertexShader = VS_Blur9Fast_Vertical;
-       PixelShader  = PS_Blur9Fast_Vertical;
-       RenderTarget = tBLUR9FAST_VERTICAL;
-   }
-#endif
-#if (DEBUG_PASSES > 4)
-   pass
-   {
-       VertexShader = VS_Blur9Fast_Horizontal;
-       PixelShader  = PS_Blur9Fast_Horizontal;
-       RenderTarget = tHALATION_BLUR;
-   }
-#endif
-#if (DEBUG_PASSES > 5)
-   pass
-   {
-       VertexShader = VS_Mask_Resize_Vertical;
-       PixelShader  = PS_Mask_Resize_Vertical;
-       RenderTarget = tMASK_RESIZE_VERTICAL;
-   }
-#endif
-#if (DEBUG_PASSES > 6)
-   pass
-   {
-       VertexShader = VS_Mask_Resize_Horizontal;
-       PixelShader  = PS_Mask_Resize_Horizontal;
-       RenderTarget = tMASK_RESIZE;
-   }
-#endif
-#if (DEBUG_PASSES > 7)
-   pass
-   {
-       VertexShader = VS_Scanlines_Horizontal_Apply_Mask;
-       PixelShader  = PS_Scanlines_Horizontal_Apply_Mask;
-       RenderTarget = tMASKED_SCANLINES;
-   }
-#endif
-#if (DEBUG_PASSES > 8)
-   pass
-   {
-       VertexShader = VS_Brightpass;
-       PixelShader  = PS_Brightpass;
-       RenderTarget = tBRIGHTPASS;
-   }
-#endif
-#if (DEBUG_PASSES > 9)
-   pass
-   {
-       VertexShader = VS_Bloom_Vertical;
-       PixelShader  = PS_Bloom_Vertical;
-       RenderTarget = tBLOOM_VERTICAL;
-   }
-#endif
-#if (DEBUG_PASSES > 10)
-   pass
-   {
-       VertexShader = VS_Bloom_Horizontal;
-       PixelShader  = PS_Bloom_Horizontal;
-   }
-#endif
-}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/LICENSE.TXT
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/LICENSE.TXT
@@ -1,280 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users.  This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it.  (Some other Free Software Foundation software is covered by
-the GNU Lesser General Public License instead.)  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-  To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must show them these terms so they know their
-rights.
-
-  We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-  Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software.  If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
-  Finally, any free program is threatened constantly by software
-patents.  We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary.  To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                    GNU GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License.  The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language.  (Hereinafter, translation is included without limitation in
-the term "modification".)  Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-  1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-  2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) You must cause the modified files to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    b) You must cause any work that you distribute or publish, that in
-    whole or in part contains or is derived from the Program or any
-    part thereof, to be licensed as a whole at no charge to all third
-    parties under the terms of this License.
-
-    c) If the modified program normally reads commands interactively
-    when run, you must cause it, when started running for such
-    interactive use in the most ordinary way, to print or display an
-    announcement including an appropriate copyright notice and a
-    notice that there is no warranty (or else, saying that you provide
-    a warranty) and that users may redistribute the program under
-    these conditions, and telling the user how to view a copy of this
-    License.  (Exception: if the Program itself is interactive but
-    does not normally print such an announcement, your work based on
-    the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-    a) Accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of Sections
-    1 and 2 above on a medium customarily used for software interchange; or,
-
-    b) Accompany it with a written offer, valid for at least three
-    years, to give any third party, for a charge no more than your
-    cost of physically performing source distribution, a complete
-    machine-readable copy of the corresponding source code, to be
-    distributed under the terms of Sections 1 and 2 above on a medium
-    customarily used for software interchange; or,
-
-    c) Accompany it with the information you received as to the offer
-    to distribute corresponding source code.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form with such
-    an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it.  For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable.  However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License.  Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-  5. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Program or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-  6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-  7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded.  In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-  9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation.  If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-  10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission.  For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this.  Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-                            NO WARRANTY
-
-  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-                     END OF TERMS AND CONDITIONS
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bind-shader-params.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bind-shader-params.fxh
@@ -1,249 +0,0 @@
-#ifndef BIND_SHADER_PARAMS_H
-#define BIND_SHADER_PARAMS_H
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "helper-functions-and-macros.fxh"
-#include "user-settings.fxh"
-#include "derived-settings-and-constants.fxh"
-
-//  Override some parameters for gamma-management.h and tex2Dantialias.h:
-#define OVERRIDE_DEVICE_GAMMA
-static const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
-#define ANTIALIAS_OVERRIDE_BASICS
-#define ANTIALIAS_OVERRIDE_PARAMETERS
-
-//  Disable runtime shader params if the user doesn't explicitly want them.
-//  Static constants will be defined in place of uniforms of the same name.
-#ifndef RUNTIME_SHADER_PARAMS_ENABLE
-    #undef PARAMETER_UNIFORM
-#endif
-
-//  Bind option names to shader parameter uniforms or static constants.
-#ifdef PARAMETER_UNIFORM
-    uniform float crt_gamma;
-    uniform float lcd_gamma;
-    uniform float levels_contrast;
-    uniform float halation_weight;
-    uniform float diffusion_weight;
-    uniform float bloom_underestimate_levels;
-    uniform float bloom_excess;
-    uniform float beam_min_sigma;
-    uniform float beam_max_sigma;
-    uniform float beam_spot_power;
-    uniform float beam_min_shape;
-    uniform float beam_max_shape;
-    uniform float beam_shape_power;
-    uniform float beam_horiz_sigma;
-    #ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
-        uniform float beam_horiz_filter;
-        uniform float beam_horiz_linear_rgb_weight;
-    #else
-        static const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
-        static const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
-    #endif
-    uniform float convergence_offset_x_r;
-    uniform float convergence_offset_x_g;
-    uniform float convergence_offset_x_b;
-    uniform float convergence_offset_y_r;
-    uniform float convergence_offset_y_g;
-    uniform float convergence_offset_y_b;
-    #ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-        uniform float mask_type;
-    #else
-        static const float mask_type = clamp(mask_type_static, 0.0, 2.0);
-    #endif
-    uniform float mask_sample_mode_desired;
-    uniform float mask_specify_num_triads;
-    uniform float mask_triad_size_desired;
-    uniform float mask_num_triads_desired;
-    uniform float aa_subpixel_r_offset_x_runtime;
-    uniform float aa_subpixel_r_offset_y_runtime;
-    #ifdef RUNTIME_ANTIALIAS_WEIGHTS
-        uniform float aa_cubic_c;
-        uniform float aa_gauss_sigma;
-    #else
-        static const float aa_cubic_c = aa_cubic_c_static;                              //  Clamp to [0, 4]?
-        static const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static);  //  Clamp to [FIXZERO(0), 1]?
-    #endif
-    uniform float geom_mode_runtime;
-    uniform float geom_radius;
-    uniform float geom_view_dist;
-    uniform float geom_tilt_angle_x;
-    uniform float geom_tilt_angle_y;
-    uniform float geom_aspect_ratio_x;
-    uniform float geom_aspect_ratio_y;
-    uniform float geom_overscan_x;
-    uniform float geom_overscan_y;
-    uniform float border_size;
-    uniform float border_darkness;
-    uniform float border_compress;
-    uniform float interlace_bff;
-    uniform float interlace_1080i;
-#else
-    //  Use constants from user-settings.h, and limit ranges appropriately:
-/*    static const float crt_gamma = macro_max(0.0, crt_gamma_static);
-    static const float lcd_gamma = macro_max(0.0, lcd_gamma_static);
-    static const float levels_contrast = macro_clamp(levels_contrast_static, 0.0, 4.0);
-    static const float halation_weight = macro_clamp(halation_weight_static, 0.0, 1.0);
-    static const float diffusion_weight = macro_clamp(diffusion_weight_static, 0.0, 1.0);
-    static const float bloom_underestimate_levels = macro_max(FIX_ZERO(0.0), bloom_underestimate_levels_static);
-    static const float bloom_excess = macro_clamp(bloom_excess_static, 0.0, 1.0);
-    static const float beam_min_sigma = macro_max(FIX_ZERO(0.0), beam_min_sigma_static);
-    static const float beam_max_sigma = macro_max(beam_min_sigma, beam_max_sigma_static);
-    static const float beam_spot_power = macro_max(beam_spot_power_static, 0.0);
-    static const float beam_min_shape = macro_max(2.0, beam_min_shape_static);
-    static const float beam_max_shape = macro_max(beam_min_shape, beam_max_shape_static);
-    static const float beam_shape_power = macro_max(0.0, beam_shape_power_static);
-    static const float beam_horiz_filter = macro_clamp(beam_horiz_filter_static, 0.0, 2.0);
-    static const float beam_horiz_sigma = macro_max(FIX_ZERO(0.0), beam_horiz_sigma_static);
-    static const float beam_horiz_linear_rgb_weight = macro_clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
-*/    //  Unpack static vector elements to match scalar uniforms:
-/*    static const float convergence_offset_x_r = macro_clamp(convergence_offsets_r_static.x, -4.0, 4.0);
-    static const float convergence_offset_x_g = macro_clamp(convergence_offsets_g_static.x, -4.0, 4.0);
-    static const float convergence_offset_x_b = macro_clamp(convergence_offsets_b_static.x, -4.0, 4.0);
-    static const float convergence_offset_y_r = macro_clamp(convergence_offsets_r_static.y, -4.0, 4.0);
-    static const float convergence_offset_y_g = macro_clamp(convergence_offsets_g_static.y, -4.0, 4.0);
-    static const float convergence_offset_y_b = macro_clamp(convergence_offsets_b_static.y, -4.0, 4.0);
-    static const float mask_type = macro_clamp(mask_type_static, 0.0, 2.0);
-    static const float mask_sample_mode_desired = macro_clamp(mask_sample_mode_static, 0.0, 2.0);
-    static const float mask_specify_num_triads = macro_clamp(mask_specify_num_triads_static, 0.0, 1.0);
-    static const float mask_triad_size_desired = macro_clamp(mask_triad_size_desired_static, 1.0, 18.0);
-    static const float mask_num_triads_desired = macro_clamp(mask_num_triads_desired_static, 342.0, 1920.0);
-    static const float aa_subpixel_r_offset_x_runtime = macro_clamp(aa_subpixel_r_offset_static.x, -0.5, 0.5);
-    static const float aa_subpixel_r_offset_y_runtime = macro_clamp(aa_subpixel_r_offset_static.y, -0.5, 0.5);
-    static const float aa_cubic_c = aa_cubic_c_static;                              //  Clamp to [0, 4]?
-    static const float aa_gauss_sigma = macro_max(FIX_ZERO(0.0), aa_gauss_sigma_static);  //  Clamp to [FIXZERO(0), 1]?
-    static const float geom_mode_runtime = macro_clamp(geom_mode_static, 0.0, 3.0);
-    static const float geom_radius = macro_max(1.0/(2.0*pi), geom_radius_static);         //  Clamp to [1/(2*pi), 1024]?
-    static const float geom_view_dist = macro_max(0.5, geom_view_dist_static);            //  Clamp to [0.5, 1024]?
-    static const float geom_tilt_angle_x = macro_clamp(geom_tilt_angle_static.x, -pi, pi);
-    static const float geom_tilt_angle_y = macro_clamp(geom_tilt_angle_static.y, -pi, pi);
-    static const float geom_aspect_ratio_x = geom_aspect_ratio_static;              //  Force >= 1?
-    static const float geom_aspect_ratio_y = 1.0;
-    static const float geom_overscan_x = macro_max(FIX_ZERO(0.0), geom_overscan_static.x);
-    static const float geom_overscan_y = macro_max(FIX_ZERO(0.0), geom_overscan_static.y);
-    static const float border_size = macro_clamp(border_size_static, 0.0, 0.5);           //  0.5 reaches to image center
-    static const float border_darkness = macro_max(0.0, border_darkness_static);
-    static const float border_compress = macro_max(1.0, border_compress_static);          //  < 1.0 darkens whole image
-    static const float interlace_bff = float(interlace_bff_static);
-    static const float interlace_1080i = float(interlace_1080i_static);
-*/
-#endif
-
-/*
-//  Provide accessors for vector constants that pack scalar uniforms:
-float2 get_aspect_vector(const float geom_aspect_ratio)
-{
-    //  Get an aspect ratio vector.  Enforce geom_max_aspect_ratio, and prevent
-    //  the absolute scale from affecting the uv-mapping for curvature:
-    const float geom_clamped_aspect_ratio =
-        min(geom_aspect_ratio, geom_max_aspect_ratio);
-    const float2 geom_aspect =
-        normalize(float2(geom_clamped_aspect_ratio, 1.0));
-    return geom_aspect;
-}
-
-float2 get_geom_overscan_vector()
-{
-    return float2(geom_overscan_x, geom_overscan_y);
-}
-
-float2 get_geom_tilt_angle_vector()
-{
-    return float2(geom_tilt_angle_x, geom_tilt_angle_y);
-}
-*/
-float3 get_convergence_offsets_x_vector()
-{
-    return float3(convergence_offset_x_r, convergence_offset_x_g,
-        convergence_offset_x_b);
-}
-
-float3 get_convergence_offsets_y_vector()
-{
-    return float3(convergence_offset_y_r, convergence_offset_y_g,
-        convergence_offset_y_b);
-}
-
-float2 get_convergence_offsets_r_vector()
-{
-    return float2(convergence_offset_x_r, convergence_offset_y_r);
-}
-
-float2 get_convergence_offsets_g_vector()
-{
-    return float2(convergence_offset_x_g, convergence_offset_y_g);
-}
-
-float2 get_convergence_offsets_b_vector()
-{
-    return float2(convergence_offset_x_b, convergence_offset_y_b);
-}
-/*
-float2 get_aa_subpixel_r_offset()
-{
-    #ifdef RUNTIME_ANTIALIAS_WEIGHTS
-        #ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
-            //  WARNING: THIS IS EXTREMELY EXPENSIVE.
-            return float2(aa_subpixel_r_offset_x_runtime,
-                aa_subpixel_r_offset_y_runtime);
-        #else
-            return aa_subpixel_r_offset_static;
-        #endif
-    #else
-        return aa_subpixel_r_offset_static;
-    #endif
-}
-*/
-//  Provide accessors settings which still need "cooking:"
-float get_mask_amplify()
-{
-    static const float mask_grille_amplify = 1.0/mask_grille_avg_color;
-    static const float mask_slot_amplify = 1.0/mask_slot_avg_color;
-    static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
-    return mask_type < 0.5 ? mask_grille_amplify :
-        mask_type < 1.5 ? mask_slot_amplify :
-        mask_shadow_amplify;
-}
-
-float get_mask_sample_mode()
-{
-    #ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-        #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-            return mask_sample_mode_desired;
-        #else
-            return clamp(mask_sample_mode_desired, 1.0, 2.0);
-        #endif
-    #else
-        #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-            return mask_sample_mode_static;
-        #else
-            return clamp(mask_sample_mode_static, 1.0, 2.0);
-        #endif
-    #endif
-}
-
-
-#endif  //  BIND_SHADER_PARAMS_H
-
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bloom-functions.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bloom-functions.fxh
@@ -1,317 +0,0 @@
-#ifndef BLOOM_FUNCTIONS_H
-#define BLOOM_FUNCTIONS_H
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////////  DESCRIPTION  ////////////////////////////////
-
-//  These utility functions and constants help several passes determine the
-//  size and center texel weight of the phosphor bloom in a uniform manner.
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  We need to calculate the correct blur sigma using some .cgp constants:
-#include "user-settings.fxh"
-#include "derived-settings-and-constants.fxh"
-#include "blur-functions.fxh"
-
-
-///////////////////////////////  BLOOM CONSTANTS  //////////////////////////////
-
-//  Compute constants with manual inlines of the functions below:
-static const float bloom_diff_thresh = 1.0/256.0;
-
-
-
-///////////////////////////////////  HELPERS  //////////////////////////////////
-
-float get_min_sigma_to_blur_triad(const float triad_size,
-    const float thresh)
-{
-    //  Requires:   1.) triad_size is the final phosphor triad size in pixels
-    //              2.) thresh is the max desired pixel difference in the
-    //                  blurred triad (e.g. 1.0/256.0).
-    //  Returns:    Return the minimum sigma that will fully blur a phosphor
-    //              triad on the screen to an even color, within thresh.
-    //              This closed-form function was found by curve-fitting data.
-    //  Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
-    return -0.05168 + 0.6113*triad_size -
-        1.122*triad_size*sqrt(0.000416 + thresh);
-    //  Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
-    //return 0.5985*triad_size - triad_size*sqrt(thresh)
-}
-
-float get_absolute_scale_blur_sigma(const float thresh)
-{
-    //  Requires:   1.) min_expected_triads must be a global float.  The number
-    //                  of horizontal phosphor triads in the final image must be
-    //                  >= min_allowed_viewport_triads.x for realistic results.
-    //              2.) bloom_approx_scale_x must be a global float equal to the
-    //                  absolute horizontal scale of BLOOM_APPROX.
-    //              3.) bloom_approx_scale_x/min_allowed_viewport_triads.x
-    //                  should be <= 1.1658025090 to keep the final result <
-    //                  0.62666015625 (the largest sigma ensuring the largest
-    //                  unused texel weight stays < 1.0/256.0 for a 3x3 blur).
-    //              4.) thresh is the max desired pixel difference in the
-    //                  blurred triad (e.g. 1.0/256.0).
-    //  Returns:    Return the minimum Gaussian sigma that will blur the pass
-    //              output as much as it would have taken to blur away
-    //              bloom_approx_scale_x horizontal phosphor triads.
-    //  Description:
-    //  BLOOM_APPROX should look like a downscaled phosphor blur.  Ideally, we'd
-    //  use the same blur sigma as the actual phosphor bloom and scale it down
-    //  to the current resolution with (bloom_approx_scale_x/viewport_size_x), but
-    //  we don't know the viewport size in this pass.  Instead, we'll blur as
-    //  much as it would take to blur away min_allowed_viewport_triads.x.  This
-    //  will blur "more than necessary" if the user actually uses more triads,
-    //  but that's not terrible either, because blurring a constant fraction of
-    //  the viewport may better resemble a true optical bloom anyway (since the
-    //  viewport will generally be about the same fraction of each player's
-    //  field of view, regardless of screen size and resolution).
-    //  Assume an extremely large viewport size for asymptotic results.
-    return bloom_approx_scale_x/max_viewport_size_x *
-        get_min_sigma_to_blur_triad(
-            max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
-}
-
-float get_center_weight(const float sigma)
-{
-    //  Given a Gaussian blur sigma, get the blur weight for the center texel.
-    #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
-        return get_fast_gaussian_weight_sum_inv(sigma);
-    #else
-        const float denom_inv = 0.5/(sigma*sigma);
-        const float w0 = 1.0;
-        const float w1 = exp(-1.0 * denom_inv);
-        const float w2 = exp(-4.0 * denom_inv);
-        const float w3 = exp(-9.0 * denom_inv);
-        const float w4 = exp(-16.0 * denom_inv);
-        const float w5 = exp(-25.0 * denom_inv);
-        const float w6 = exp(-36.0 * denom_inv);
-        const float w7 = exp(-49.0 * denom_inv);
-        const float w8 = exp(-64.0 * denom_inv);
-        const float w9 = exp(-81.0 * denom_inv);
-        const float w10 = exp(-100.0 * denom_inv);
-        const float w11 = exp(-121.0 * denom_inv);
-        const float w12 = exp(-144.0 * denom_inv);
-        const float w13 = exp(-169.0 * denom_inv);
-        const float w14 = exp(-196.0 * denom_inv);
-        const float w15 = exp(-225.0 * denom_inv);
-        const float w16 = exp(-256.0 * denom_inv);
-        const float w17 = exp(-289.0 * denom_inv);
-        const float w18 = exp(-324.0 * denom_inv);
-        const float w19 = exp(-361.0 * denom_inv);
-        const float w20 = exp(-400.0 * denom_inv);
-        const float w21 = exp(-441.0 * denom_inv);
-        //  Note: If the implementation uses a smaller blur than the max allowed,
-        //  the worst case scenario is that the center weight will be overestimated,
-        //  so we'll put a bit more energy into the brightpass...no huge deal.
-        //  Then again, if the implementation uses a larger blur than the max
-        //  "allowed" because of dynamic branching, the center weight could be
-        //  underestimated, which is more of a problem...consider always using
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
-            //  43x blur:
-            const float weight_sum_inv = 1.0 /
-                (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 +
-                w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21));
-        #else
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
-            //  31x blur:
-            const float weight_sum_inv = 1.0 /
-                (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 +
-                w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15));
-        #else
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
-            //  25x blur:
-            const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
-                w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12));
-        #else
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
-            //  17x blur:
-            const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
-                w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8));
-        #else
-            //  9x blur:
-            const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4));
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
-        const float center_weight = weight_sum_inv * weight_sum_inv;
-        return center_weight;
-    #endif
-}
-
-float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv,
-    const float2 dxdy, const float sigma)
-{
-    //  If sigma is static, we can safely branch and use the smallest blur
-    //  that's big enough.  Ignore #define hints, because we'll only use a
-    //  large blur if we actually need it, and the branches cost nothing.
-    #ifndef RUNTIME_PHOSPHOR_BLOOM_SIGMA
-        #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
-    #else
-        //  It's still worth branching if the profile supports dynamic branches:
-        //  It's much faster than using a hugely excessive blur, but each branch
-        //  eats ~1% FPS.
-        #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
-            #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
-        #endif
-    #endif
-    //  Failed optimization notes:
-    //  I originally created a same-size mipmapped 5-tap separable blur10 that
-    //  could handle any sigma by reaching into lower mip levels.  It was
-    //  as fast as blur25fast for runtime sigmas and a tad faster than
-    //  blur31fast for static sigmas, but mipmapping two viewport-size passes
-    //  ate 10% of FPS across all codepaths, so it wasn't worth it.
-    #ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
-        if(sigma <= blur9_std_dev)
-        {
-            return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
-        }
-        else if(sigma <= blur17_std_dev)
-        {
-            return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
-        }
-        else if(sigma <= blur25_std_dev)
-        {
-            return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
-        }
-        else if(sigma <= blur31_std_dev)
-        {
-            return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
-        }
-        else
-        {
-            return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
-        }
-    #else
-        //  If we can't afford to branch, we can only guess at what blur
-        //  size we need.  Therefore, use the largest blur allowed.
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
-            return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
-        #else
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
-            return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
-        #else
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
-            return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
-        #else
-        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
-            return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
-        #else
-            return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
-        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
-    #endif  //  PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
-}
-
-float get_bloom_approx_sigma(const float output_size_x_runtime,
-    const float estimated_viewport_size_x)
-{
-    //  Requires:   1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
-    //                  This is included for dynamic codepaths just in case the
-    //                  following two globals are incorrect:
-    //              2.) bloom_approx_size_x_for_skip should == the same
-    //                  if PHOSPHOR_BLOOM_FAKE is #defined
-    //              3.) bloom_approx_size_x should == the same otherwise
-    //  Returns:    For gaussian4x4, return a dynamic small bloom sigma that's
-    //              as close to optimal as possible given available information.
-    //              For blur3x3, return the a static small bloom sigma that
-    //              works well for typical cases.  Otherwise, we're using simple
-    //              bilinear filtering, so use static calculations.
-    //  Assume the default static value.  This is a compromise that ensures
-    //  typical triads are blurred, even if unusually large ones aren't.
-    static const float mask_num_triads_static =
-        max(min_allowed_viewport_triads.x, mask_num_triads_desired_static);
-    const float mask_num_triads_from_size =
-        estimated_viewport_size_x/mask_triad_size_desired;
-    const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x,
-        lerp(mask_num_triads_from_size, mask_num_triads_desired,
-            mask_specify_num_triads));
-    //  Assume an extremely large viewport size for asymptotic results:
-    static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
-    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
-    {
-        //  Use the runtime num triads and output size:
-        const float asymptotic_triad_size =
-            max_viewport_size_x/mask_num_triads_runtime;
-        const float asymptotic_sigma = get_min_sigma_to_blur_triad(
-            asymptotic_triad_size, bloom_diff_thresh);
-        const float bloom_approx_sigma =
-            asymptotic_sigma * output_size_x_runtime/max_viewport_size_x;
-        //  The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
-        //  account for the Gaussian scanline sigma from the last pass too.
-        //  The bloom will be too wide horizontally but tall enough vertically.
-        return length(float2(bloom_approx_sigma, beam_max_sigma));
-    }
-    else    //  3x3 blur resize (the bilinear resize doesn't need a sigma)
-    {
-        //  We're either using blur3x3 or bilinear filtering.  The biggest
-        //  reason to choose blur3x3 is to avoid dynamic weights, so use a
-        //  static calculation.
-        #ifdef PHOSPHOR_BLOOM_FAKE
-            static const float output_size_x_static =
-                bloom_approx_size_x_for_fake;
-        #else
-            static const float output_size_x_static = bloom_approx_size_x;
-        #endif
-        static const float asymptotic_triad_size =
-            max_viewport_size_x/mask_num_triads_static;
-        const float asymptotic_sigma = get_min_sigma_to_blur_triad(
-            asymptotic_triad_size, bloom_diff_thresh);
-        const float bloom_approx_sigma =
-            asymptotic_sigma * output_size_x_static/max_viewport_size_x;
-        //  The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
-        //  try accounting for the Gaussian scanline sigma from the last pass
-        //  too; use the static default value:
-        return length(float2(bloom_approx_sigma, beam_max_sigma_static));
-    }
-}
-
-float get_final_bloom_sigma(const float bloom_sigma_runtime)
-{
-    //  Requires:   1.) bloom_sigma_runtime is a precalculated sigma that's
-    //                  optimal for the [known] triad size.
-    //              2.) Call this from a fragment shader (not a vertex shader),
-    //                  or blurring with static sigmas won't be constant-folded.
-    //  Returns:    Return the optimistic static sigma if the triad size is
-    //              known at compile time.  Otherwise return the optimal runtime
-    //              sigma (10% slower) or an implementation-specific compromise
-    //              between an optimistic or pessimistic static sigma.
-    //  Notes:      Call this from the fragment shader, NOT the vertex shader,
-    //              so static sigmas can be constant-folded!
-    const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad(
-        mask_triad_size_desired_static, bloom_diff_thresh);
-    #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
-        return bloom_sigma_runtime;
-    #else
-        //  Overblurring looks as bad as underblurring, so assume average-size
-        //  triads, not worst-case huge triads:
-        return bloom_sigma_optimistic;
-    #endif
-}
-
-
-#endif  //  BLOOM_FUNCTIONS_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/blur-functions.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/blur-functions.fxh
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/derived-settings-and-constants.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/derived-settings-and-constants.fxh
@@ -1,299 +0,0 @@
-#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H
-#define DERIVED_SETTINGS_AND_CONSTANTS_H
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////////  DESCRIPTION  ////////////////////////////////
-
-//  These macros and constants can be used across the whole codebase.
-//  Unlike the values in user-settings.cgh, end users shouldn't modify these.
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "user-settings.fxh"
-#include "user-cgp-constants.fxh"
-
-
-///////////////////////////////  FIXED SETTINGS  ///////////////////////////////
-
-//  Avoid dividing by zero; using a macro overloads for float, float2, etc.:
-//#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625))   //  2^-16
-
-//  Ensure the first pass decodes CRT gamma and the last encodes LCD gamma.
-#ifndef SIMULATE_CRT_ON_LCD
-    #define SIMULATE_CRT_ON_LCD
-#endif
-
-//  Manually tiling a manually resized texture creates texture coord derivative
-//  discontinuities and confuses anisotropic filtering, causing discolored tile
-//  seams in the phosphor mask.  Workarounds:
-//  a.) Using tex2Dlod disables anisotropic filtering for tiled masks.  It's
-//      downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and
-//      disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either.
-//  b.) "Tile flat twice" requires drawing two full tiles without border padding
-//      to the resized mask FBO, and it's incompatible with same-pass curvature.
-//      (Same-pass curvature isn't used but could be in the future...maybe.)
-//  c.) "Fix discontinuities" requires derivatives and drawing one tile with
-//      border padding to the resized mask FBO, but it works with same-pass
-//      curvature.  It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined.
-//  Precedence: a, then, b, then c (if multiple strategies are #defined).
-    #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD              //  129.7 FPS, 4x, flat; 101.8 at fullscreen
-    #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE       //  128.1 FPS, 4x, flat; 101.5 at fullscreen
-    #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES   //  124.4 FPS, 4x, flat; 97.4 at fullscreen
-//  Also, manually resampling the phosphor mask is slightly blurrier with
-//  anisotropic filtering.  (Resampling with mipmapping is even worse: It
-//  creates artifacts, but only with the fully bloomed shader.)  The difference
-//  is subtle with small triads, but you can fix it for a small cost.
-    //#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-
-
-//////////////////////////////  DERIVED SETTINGS  //////////////////////////////
-
-//  Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the
-//  geometry mode at runtime, or a 4x4 true Gaussian resize.  Disable
-//  incompatible settings ASAP.  (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be
-//  #defined by either user-settings.h or a wrapper .cg that #includes the
-//  current .cg pass.)
-#ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
-    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-        #undef PHOSPHOR_MASK_MANUALLY_RESIZE
-    #endif
-    #ifdef RUNTIME_GEOMETRY_MODE
-        #undef RUNTIME_GEOMETRY_MODE
-    #endif
-    //  Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is
-    //  inferior in most cases, so replace 2.0 with 0.0:
-    static const float bloom_approx_filter =
-        bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static;
-#else
-    static const float bloom_approx_filter = bloom_approx_filter_static;
-#endif
-
-//  Disable slow runtime paths if static parameters are used.  Most of these
-//  won't be a problem anyway once the params are disabled, but some will.
-#ifndef RUNTIME_SHADER_PARAMS_ENABLE
-    #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
-        #undef RUNTIME_PHOSPHOR_BLOOM_SIGMA
-    #endif
-    #ifdef RUNTIME_ANTIALIAS_WEIGHTS
-        #undef RUNTIME_ANTIALIAS_WEIGHTS
-    #endif
-    #ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
-        #undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
-    #endif
-    #ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
-        #undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
-    #endif
-    #ifdef RUNTIME_GEOMETRY_TILT
-        #undef RUNTIME_GEOMETRY_TILT
-    #endif
-    #ifdef RUNTIME_GEOMETRY_MODE
-        #undef RUNTIME_GEOMETRY_MODE
-    #endif
-    #ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-        #undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-    #endif
-#endif
-
-//  Make tex2Dbias a backup for tex2Dlod for wider compatibility.
-#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
-    #define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-#endif
-#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-    #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
-#endif
-//  Rule out unavailable anisotropic compatibility strategies:
-#ifndef DRIVERS_ALLOW_DERIVATIVES
-    #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-        #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-    #endif
-#endif
-#ifndef DRIVERS_ALLOW_TEX2DLOD
-    #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
-        #undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
-    #endif
-    #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-        #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-    #endif
-    #ifdef ANTIALIAS_DISABLE_ANISOTROPIC
-        #undef ANTIALIAS_DISABLE_ANISOTROPIC
-    #endif
-#endif
-#ifndef DRIVERS_ALLOW_TEX2DBIAS
-    #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-        #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-    #endif
-    #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
-        #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
-    #endif
-#endif
-//  Prioritize anisotropic tiling compatibility strategies by performance and
-//  disable unused strategies.  This concentrates all the nesting in one place.
-#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
-    #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-        #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-    #endif
-    #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-        #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-    #endif
-    #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-        #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-    #endif
-#else
-    #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-        #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-            #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-        #endif
-        #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-            #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-        #endif
-    #else
-        //  ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with
-        //  flat texture coords in the same pass, but that's all we use.
-        #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-            #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-                #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-            #endif
-        #endif
-    #endif
-#endif
-//  The tex2Dlod and tex2Dbias strategies share a lot in common, and we can
-//  reduce some #ifdef nesting in the next section by essentially OR'ing them:
-#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
-    #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
-#endif
-#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-    #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
-#endif
-//  Prioritize anisotropic resampling compatibility strategies the same way:
-#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-    #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
-        #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
-    #endif
-#endif
-
-
-///////////////////////  DERIVED PHOSPHOR MASK CONSTANTS  //////////////////////
-
-//  If we can use the large mipmapped LUT without mipmapping artifacts, we
-//  should: It gives us more options for using fewer samples.
-#ifdef DRIVERS_ALLOW_TEX2DLOD
-    #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-        //  TODO: Take advantage of this!
-        #define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
-        static const float2 mask_resize_src_lut_size = mask_texture_large_size;
-    #else
-        static const float2 mask_resize_src_lut_size = mask_texture_small_size;
-    #endif
-#else
-    static const float2 mask_resize_src_lut_size = mask_texture_small_size;
-#endif
-
-
-//  tex2D's sampler2D parameter MUST be a uniform global, a uniform input to
-//  main_fragment, or a static alias of one of the above.  This makes it hard
-//  to select the phosphor mask at runtime: We can't even assign to a uniform
-//  global in the vertex shader or select a sampler2D in the vertex shader and
-//  pass it to the fragment shader (even with explicit TEXUNIT# bindings),
-//  because it just gives us the input texture or a black screen.  However, we
-//  can get around these limitations by calling tex2D three times with different
-//  uniform samplers (or resizing the phosphor mask three times altogether).
-//  With dynamic branches, we can process only one of these branches on top of
-//  quickly discarding fragments we don't need (cgc seems able to overcome
-//  limigations around dependent texture fetches inside of branches).  Without
-//  dynamic branches, we have to process every branch for every fragment...which
-//  is slower.  Runtime sampling mode selection is slower without dynamic
-//  branches as well.  Let the user's static #defines decide if it's worth it.
-#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
-    #define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-#else
-    #ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-        #define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-    #endif
-#endif
-
-//  We need to render some minimum number of tiles in the resize passes.
-//  We need at least 1.0 just to repeat a single tile, and we need extra
-//  padding beyond that for anisotropic filtering, discontinuitity fixing,
-//  antialiasing, same-pass curvature (not currently used), etc.  First
-//  determine how many border texels and tiles we need, based on how the result
-//  will be sampled:
-#ifdef GEOMETRY_EARLY
-        static const float max_subpixel_offset = aa_subpixel_r_offset_static.x;
-        //  Most antialiasing filters have a base radius of 4.0 pixels:
-        static const float max_aa_base_pixel_border = 4.0 +
-            max_subpixel_offset;
-#else
-    static const float max_aa_base_pixel_border = 0.0;
-#endif
-//  Anisotropic filtering adds about 0.5 to the pixel border:
-#ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
-    static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
-#else
-    static const float max_aniso_pixel_border = max_aa_base_pixel_border;
-#endif
-//  Fixing discontinuities adds 1.0 more to the pixel border:
-#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-    static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0;
-#else
-    static const float max_tiled_pixel_border = max_aniso_pixel_border;
-#endif
-//  Convert the pixel border to an integer texel border.  Assume same-pass
-//  curvature about triples the texel frequency:
-#ifdef GEOMETRY_EARLY
-    static const float max_mask_texel_border =
-        macro_ceil(max_tiled_pixel_border * 3.0);
-#else
-    static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border);
-#endif
-//  Convert the texel border to a tile border using worst-case assumptions:
-static const float max_mask_tile_border = max_mask_texel_border/
-    (mask_min_allowed_triad_size * mask_triads_per_tile);
-
-//  Finally, set the number of resized tiles to render to MASK_RESIZE, and set
-//  the starting texel (inside borders) for sampling it.
-#ifndef GEOMETRY_EARLY
-    #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-        //  Special case: Render two tiles without borders.  Anisotropic
-        //  filtering doesn't seem to be a problem here.
-        static const float mask_resize_num_tiles = 1.0 + 1.0;
-        static const float mask_start_texels = 0.0;
-    #else
-        static const float mask_resize_num_tiles = 1.0 +
-            2.0 * max_mask_tile_border;
-        static const float mask_start_texels = max_mask_texel_border;
-    #endif
-#else
-    static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border;
-    static const float mask_start_texels = max_mask_texel_border;
-#endif
-
-//  We have to fit mask_resize_num_tiles into an FBO with a viewport scale of
-//  mask_resize_viewport_scale.  This limits the maximum final triad size.
-//  Estimate the minimum number of triads we can split the screen into in each
-//  dimension (we'll be as correct as mask_resize_viewport_scale is):
-static const float mask_resize_num_triads =
-    mask_resize_num_tiles * mask_triads_per_tile;
-static const float2 min_allowed_viewport_triads =
-    mask_resize_num_triads.xx / mask_resize_viewport_scale;
-
-#endif  //  DERIVED_SETTINGS_AND_CONSTANTS_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/gamma-management.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/gamma-management.fxh
@@ -1,545 +0,0 @@
-#ifndef GAMMA_MANAGEMENT_H
-#define GAMMA_MANAGEMENT_H
-
-/////////////////////////////////  MIT LICENSE  ////////////////////////////////
-
-//  Copyright (C) 2014 TroggleMonkey
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to
-//  deal in the Software without restriction, including without limitation the
-//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-//  sell copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//  
-//  The above copyright notice and this permission notice shall be included in
-//  all copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-//  IN THE SOFTWARE.
-
-/////////////////////////////////  DESCRIPTION  ////////////////////////////////
-
-//  This file provides gamma-aware tex*D*() and encode_output() functions.
-//  Requires:   Before #include-ing this file, the including file must #define
-//              the following macros when applicable and follow their rules:
-//              1.) #define FIRST_PASS if this is the first pass.
-//              2.) #define LAST_PASS if this is the last pass.
-//              3.) If sRGB is available, set srgb_framebufferN = "true" for
-//                  every pass except the last in your .cgp preset.
-//              4.) If sRGB isn't available but you want gamma-correctness with
-//                  no banding, #define GAMMA_ENCODE_EVERY_FBO each pass.
-//              5.) #define SIMULATE_CRT_ON_LCD if desired (precedence over 5-7)
-//              6.) #define SIMULATE_GBA_ON_LCD if desired (precedence over 6-7)
-//              7.) #define SIMULATE_LCD_ON_CRT if desired (precedence over 7)
-//              8.) #define SIMULATE_GBA_ON_CRT if desired (precedence over -)
-//              If an option in [5, 8] is #defined in the first or last pass, it
-//              should be #defined for both.  It shouldn't make a difference
-//              whether it's #defined for intermediate passes or not.
-//  Optional:   The including file (or an earlier included file) may optionally
-//              #define a number of macros indicating it will override certain
-//              macros and associated constants are as follows:
-//              static constants with either static or uniform constants.  The
-//              1.) OVERRIDE_STANDARD_GAMMA: The user must first define:
-//                  static const float ntsc_gamma
-//                  static const float pal_gamma
-//                  static const float crt_reference_gamma_high
-//                  static const float crt_reference_gamma_low
-//                  static const float lcd_reference_gamma
-//                  static const float crt_office_gamma
-//                  static const float lcd_office_gamma
-//              2.) OVERRIDE_DEVICE_GAMMA: The user must first define:
-//                  static const float crt_gamma
-//                  static const float gba_gamma
-//                  static const float lcd_gamma
-//              3.) OVERRIDE_FINAL_GAMMA: The user must first define:
-//                  static const float input_gamma
-//                  static const float intermediate_gamma
-//                  static const float output_gamma
-//                  (intermediate_gamma is for GAMMA_ENCODE_EVERY_FBO.)
-//              4.) OVERRIDE_ALPHA_ASSUMPTIONS: The user must first define:
-//                  static const bool assume_opaque_alpha
-//              The gamma constant overrides must be used in every pass or none,
-//              and OVERRIDE_FINAL_GAMMA bypasses all of the SIMULATE* macros.
-//              OVERRIDE_ALPHA_ASSUMPTIONS may be set on a per-pass basis.
-//  Usage:      After setting macros appropriately, ignore gamma correction and
-//              replace all tex*D*() calls with equivalent gamma-aware
-//              tex*D*_linearize calls, except:
-//              1.) When you read an LUT, use regular tex*D or a gamma-specified
-//                  function, depending on its gamma encoding:
-//                      tex*D*_linearize_gamma (takes a runtime gamma parameter)
-//              2.) If you must read pass0's original input in a later pass, use
-//                  tex2D_linearize_ntsc_gamma.  If you want to read pass0's
-//                  input with gamma-corrected bilinear filtering, consider
-//                  creating a first linearizing pass and reading from the input
-//                  of pass1 later.
-//              Then, return encode_output(color) from every fragment shader.
-//              Finally, use the global gamma_aware_bilinear boolean if you want
-//              to statically branch based on whether bilinear filtering is
-//              gamma-correct or not (e.g. for placing Gaussian blur samples).
-//
-//  Detailed Policy:
-//  tex*D*_linearize() functions enforce a consistent gamma-management policy
-//  based on the FIRST_PASS and GAMMA_ENCODE_EVERY_FBO settings.  They assume
-//  their input texture has the same encoding characteristics as the input for
-//  the current pass (which doesn't apply to the exceptions listed above).
-//  Similarly, encode_output() enforces a policy based on the LAST_PASS and
-//  GAMMA_ENCODE_EVERY_FBO settings.  Together, they result in one of the
-//  following two pipelines.
-//  Typical pipeline with intermediate sRGB framebuffers:
-//      linear_color = pow(pass0_encoded_color, input_gamma);
-//      intermediate_output = linear_color;     //  Automatic sRGB encoding
-//      linear_color = intermediate_output;     //  Automatic sRGB decoding
-//      final_output = pow(intermediate_output, 1.0/output_gamma);
-//  Typical pipeline without intermediate sRGB framebuffers:
-//      linear_color = pow(pass0_encoded_color, input_gamma);
-//      intermediate_output = pow(linear_color, 1.0/intermediate_gamma);
-//      linear_color = pow(intermediate_output, intermediate_gamma);
-//      final_output = pow(intermediate_output, 1.0/output_gamma);
-//  Using GAMMA_ENCODE_EVERY_FBO is much slower, but it's provided as a way to
-//  easily get gamma-correctness without banding on devices where sRGB isn't
-//  supported.
-//
-//  Use This Header to Maximize Code Reuse:
-//  The purpose of this header is to provide a consistent interface for texture
-//  reads and output gamma-encoding that localizes and abstracts away all the
-//  annoying details.  This greatly reduces the amount of code in each shader
-//  pass that depends on the pass number in the .cgp preset or whether sRGB
-//  FBO's are being used: You can trivially change the gamma behavior of your
-//  whole pass by commenting or uncommenting 1-3 #defines.  To reuse the same
-//  code in your first, Nth, and last passes, you can even put it all in another
-//  header file and #include it from skeleton .cg files that #define the
-//  appropriate pass-specific settings.
-//
-//  Rationale for Using Three Macros:
-//  This file uses GAMMA_ENCODE_EVERY_FBO instead of an opposite macro like
-//  SRGB_PIPELINE to ensure sRGB is assumed by default, which hopefully imposes
-//  a lower maintenance burden on each pass.  At first glance it seems we could
-//  accomplish everything with two macros: GAMMA_CORRECT_IN / GAMMA_CORRECT_OUT.
-//  This works for simple use cases where input_gamma == output_gamma, but it
-//  breaks down for more complex scenarios like CRT simulation, where the pass
-//  number determines the gamma encoding of the input and output.
-
-
-///////////////////////////////  BASE CONSTANTS  ///////////////////////////////
-
-//  Set standard gamma constants, but allow users to override them:
-#ifndef OVERRIDE_STANDARD_GAMMA
-    //  Standard encoding gammas:
-    static const float ntsc_gamma = 2.2;    //  Best to use NTSC for PAL too?
-    static const float pal_gamma = 2.8;     //  Never actually 2.8 in practice
-    //  Typical device decoding gammas (only use for emulating devices):
-    //  CRT/LCD reference gammas are higher than NTSC and Rec.709 video standard
-    //  gammas: The standards purposely undercorrected for an analog CRT's
-    //  assumed 2.5 reference display gamma to maintain contrast in assumed
-    //  [dark] viewing conditions: http://www.poynton.com/PDFs/GammaFAQ.pdf
-    //  These unstated assumptions about display gamma and perceptual rendering
-    //  intent caused a lot of confusion, and more modern CRT's seemed to target
-    //  NTSC 2.2 gamma with circuitry.  LCD displays seem to have followed suit
-    //  (they struggle near black with 2.5 gamma anyway), especially PC/laptop
-    //  displays designed to view sRGB in bright environments.  (Standards are
-    //  also in flux again with BT.1886, but it's underspecified for displays.)
-    static const float crt_reference_gamma_high = 2.5;  //  In (2.35, 2.55)
-    static const float crt_reference_gamma_low = 2.35;  //  In (2.35, 2.55)
-    static const float lcd_reference_gamma = 2.5;       //  To match CRT
-    static const float crt_office_gamma = 2.2;  //  Circuitry-adjusted for NTSC
-    static const float lcd_office_gamma = 2.2;  //  Approximates sRGB
-#endif  //  OVERRIDE_STANDARD_GAMMA
-
-//  Assuming alpha == 1.0 might make it easier for users to avoid some bugs,
-//  but only if they're aware of it.
-#ifndef OVERRIDE_ALPHA_ASSUMPTIONS
-    static const bool assume_opaque_alpha = false;
-#endif
-
-
-///////////////////////  DERIVED CONSTANTS AS FUNCTIONS  ///////////////////////
-
-//  gamma-management.h should be compatible with overriding gamma values with
-//  runtime user parameters, but we can only define other global constants in
-//  terms of static constants, not uniform user parameters.  To get around this
-//  limitation, we need to define derived constants using functions.
-
-//  Set device gamma constants, but allow users to override them:
-#ifdef OVERRIDE_DEVICE_GAMMA
-    //  The user promises to globally define the appropriate constants:
-    float get_crt_gamma()    {   return crt_gamma;   }
-    float get_gba_gamma()    {   return gba_gamma;   }
-    float get_lcd_gamma()    {   return lcd_gamma;   }
-#else
-    float get_crt_gamma()    {   return crt_reference_gamma_high;    }
-    float get_gba_gamma()    {   return 3.5; }   //  Game Boy Advance; in (3.0, 4.0)
-    float get_lcd_gamma()    {   return lcd_office_gamma;            }
-#endif  //  OVERRIDE_DEVICE_GAMMA
-
-//  Set decoding/encoding gammas for the first/lass passes, but allow overrides:
-#ifdef OVERRIDE_FINAL_GAMMA
-    //  The user promises to globally define the appropriate constants:
-    float get_intermediate_gamma()   {   return intermediate_gamma;  }
-    float get_input_gamma()          {   return input_gamma;         }
-    float get_output_gamma()         {   return output_gamma;        }
-#else
-    //  If we gamma-correct every pass, always use ntsc_gamma between passes to
-    //  ensure middle passes don't need to care if anything is being simulated:
-    float get_intermediate_gamma()   {   return ntsc_gamma;          }
-    #ifdef SIMULATE_CRT_ON_LCD
-        float get_input_gamma()      {   return get_crt_gamma();     }
-        float get_output_gamma()     {   return get_lcd_gamma();     }
-    #else
-    #ifdef SIMULATE_GBA_ON_LCD
-        float get_input_gamma()      {   return get_gba_gamma();     }
-        float get_output_gamma()     {   return get_lcd_gamma();     }
-    #else
-    #ifdef SIMULATE_LCD_ON_CRT
-        float get_input_gamma()      {   return get_lcd_gamma();     }
-        float get_output_gamma()     {   return get_crt_gamma();     }
-    #else
-    #ifdef SIMULATE_GBA_ON_CRT
-        float get_input_gamma()      {   return get_gba_gamma();     }
-        float get_output_gamma()     {   return get_crt_gamma();     }
-    #else   //  Don't simulate anything:
-        float get_input_gamma()      {   return ntsc_gamma;          }
-        float get_output_gamma()     {   return ntsc_gamma;          }
-    #endif  //  SIMULATE_GBA_ON_CRT
-    #endif  //  SIMULATE_LCD_ON_CRT
-    #endif  //  SIMULATE_GBA_ON_LCD
-    #endif  //  SIMULATE_CRT_ON_LCD
-#endif  //  OVERRIDE_FINAL_GAMMA
-
-//  Set decoding/encoding gammas for the current pass.  Use static constants for
-//  linearize_input and gamma_encode_output, because they aren't derived, and
-//  they let the compiler do dead-code elimination.
-#ifndef GAMMA_ENCODE_EVERY_FBO
-    #ifdef FIRST_PASS
-        static const bool linearize_input = true;
-        float get_pass_input_gamma()     {   return get_input_gamma();   }
-    #else
-        static const bool linearize_input = false;
-        float get_pass_input_gamma()     {   return 1.0;                 }
-    #endif
-    #ifdef LAST_PASS
-        static const bool gamma_encode_output = true;
-        float get_pass_output_gamma()    {   return get_output_gamma();  }
-    #else
-        static const bool gamma_encode_output = false;
-        float get_pass_output_gamma()    {   return 1.0;                 }
-    #endif
-#else
-    static const bool linearize_input = true;
-    static const bool gamma_encode_output = true;
-    #ifdef FIRST_PASS
-        float get_pass_input_gamma()     {   return get_input_gamma();   }
-    #else
-        float get_pass_input_gamma()     {   return get_intermediate_gamma();    }
-    #endif
-    #ifdef LAST_PASS
-        float get_pass_output_gamma()    {   return get_output_gamma();  }
-    #else
-        float get_pass_output_gamma()    {   return get_intermediate_gamma();    }
-    #endif
-#endif
-
-//  Users might want to know if bilinear filtering will be gamma-correct:
-static const bool gamma_aware_bilinear = !linearize_input;
-
-
-//////////////////////  COLOR ENCODING/DECODING FUNCTIONS  /////////////////////
-
-float4 encode_output(const float4 color)
-{
-    if(gamma_encode_output)
-    {
-        if(assume_opaque_alpha)
-        {
-            return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), 1.0);
-        }
-        else
-        {
-            return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), color.a);
-        }
-    }
-    else
-    {
-        return color;
-    }
-}
-
-float4 decode_input(const float4 color)
-{
-        return color;
-}
-
-float4 decode_input_first(const float4 color)
-{
-        if(assume_opaque_alpha)
-        {
-            return float4(pow(color.rgb, get_input_gamma()), 1.0);
-        }
-        else
-        {
-            return float4(pow(color.rgb, get_input_gamma()), color.a);
-        }
-}
-
-
-float4 decode_gamma_input(const float4 color, const float3 gamma)
-{
-    if(assume_opaque_alpha)
-    {
-        return float4(pow(color.rgb, gamma), 1.0);
-    }
-    else
-    {
-        return float4(pow(color.rgb, gamma), color.a);
-    }
-}
-
-
-///////////////////////////  TEXTURE LOOKUP WRAPPERS  //////////////////////////
-
-//  "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
-//  Provide a wide array of linearizing texture lookup wrapper functions.  The
-//  Cg shader spec Retroarch uses only allows for 2D textures, but 1D and 3D
-//  lookups are provided for completeness in case that changes someday.  Nobody
-//  is likely to use the *fetch and *proj functions, but they're included just
-//  in case.  The only tex*D texture sampling functions omitted are:
-//      - tex*Dcmpbias
-//      - tex*Dcmplod
-//      - tex*DARRAY*
-//      - tex*DMS*
-//      - Variants returning integers
-//  Standard line length restrictions are ignored below for vertical brevity.
-/*
-//  tex1D:
-float4 tex1D_linearize(const sampler1D tex, const float tex_coords)
-{   return decode_input(tex1D(tex, tex_coords));   }
-
-float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords)
-{   return decode_input(tex1D(tex, tex_coords));   }
-
-float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const int texel_off)
-{   return decode_input(tex1D(tex, tex_coords, texel_off));    }
-
-float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off)
-{   return decode_input(tex1D(tex, tex_coords, texel_off));    }
-
-float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy)
-{   return decode_input(tex1D(tex, tex_coords, dx, dy));   }
-
-float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy)
-{   return decode_input(tex1D(tex, tex_coords, dx, dy));   }
-
-float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy, const int texel_off)
-{   return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off));    }
-
-float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy, const int texel_off)
-{   return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off));    }
-
-//  tex1Dbias:
-float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords)
-{   return decode_input(tex1Dbias(tex, tex_coords));   }
-
-float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex1Dbias(tex, tex_coords, texel_off));    }
-
-//  tex1Dfetch:
-float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords)
-{   return decode_input(tex1Dfetch(tex, tex_coords));  }
-
-float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords, const int texel_off)
-{   return decode_input(tex1Dfetch(tex, tex_coords, texel_off));   }
-
-//  tex1Dlod:
-float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords)
-{   return decode_input(tex1Dlod(tex, tex_coords));    }
-
-float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex1Dlod(tex, tex_coords, texel_off));     }
-
-//  tex1Dproj:
-float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords)
-{   return decode_input(tex1Dproj(tex, tex_coords));   }
-
-float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords)
-{   return decode_input(tex1Dproj(tex, tex_coords));   }
-
-float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off)
-{   return decode_input(tex1Dproj(tex, tex_coords, texel_off));    }
-
-float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords, const int texel_off)
-{   return decode_input(tex1Dproj(tex, tex_coords, texel_off));    }
-*/
-//  tex2D:
-float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords)
-{   return decode_input(tex2D(tex, tex_coords));   }
-
-float4 tex2D_linearize_first(const sampler2D tex, const float2 tex_coords)
-{   return decode_input_first(tex2D(tex, tex_coords));   }
-
-float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords)
-{   return decode_input(tex2D(tex, tex_coords.xy));   }
-
-//float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off)
-//{   return decode_input(tex2D(tex, tex_coords, texel_off));    }
-
-//float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off)
-//{   return decode_input(tex2D(tex, tex_coords.xy, texel_off));    }
-/*
-float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy)
-{   return decode_input(tex2D(tex, tex_coords, dx, dy));   }
-
-float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy)
-{   return decode_input(tex2D(tex, tex_coords, dx, dy));   }
-
-float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off)
-{   return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off));    }
-
-float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off)
-{   return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off));    }
-
-//  tex2Dbias:
-float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords)
-{   return decode_input(tex2Dbias(tex, tex_coords));   }
-
-float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex2Dbias(tex, tex_coords, texel_off));    }
-
-//  tex2Dfetch:
-float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords)
-{   return decode_input(tex2Dfetch(tex, tex_coords));  }
-
-float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords, const int texel_off)
-{   return decode_input(tex2Dfetch(tex, tex_coords, texel_off));   }
-*/
-//  tex2Dlod:
-float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords)
-{   return decode_input(tex2Dlod(tex, tex_coords));    }
-
-//float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
-//{   return decode_input(tex2Dlod(tex, tex_coords, texel_off));     }
-/*
-//  tex2Dproj:
-float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords)
-{   return decode_input(tex2Dproj(tex, tex_coords));   }
-
-float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords)
-{   return decode_input(tex2Dproj(tex, tex_coords));   }
-
-float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off)
-{   return decode_input(tex2Dproj(tex, tex_coords, texel_off));    }
-
-float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex2Dproj(tex, tex_coords, texel_off));    }
-
-//  tex3D:
-float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords)
-{   return decode_input(tex3D(tex, tex_coords));   }
-
-float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const int texel_off)
-{   return decode_input(tex3D(tex, tex_coords, texel_off));    }
-
-float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy)
-{   return decode_input(tex3D(tex, tex_coords, dx, dy));   }
-
-float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy, const int texel_off)
-{   return decode_input(tex3D(tex, tex_coords, dx, dy, texel_off));    }
-
-//  tex3Dbias:
-float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords)
-{   return decode_input(tex3Dbias(tex, tex_coords));   }
-
-float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex3Dbias(tex, tex_coords, texel_off));    }
-
-//  tex3Dfetch:
-float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords)
-{   return decode_input(tex3Dfetch(tex, tex_coords));  }
-
-float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords, const int texel_off)
-{   return decode_input(tex3Dfetch(tex, tex_coords, texel_off));   }
-
-//  tex3Dlod:
-float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords)
-{   return decode_input(tex3Dlod(tex, tex_coords));    }
-
-float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex3Dlod(tex, tex_coords, texel_off));     }
-
-//  tex3Dproj:
-float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords)
-{   return decode_input(tex3Dproj(tex, tex_coords));   }
-
-float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
-{   return decode_input(tex3Dproj(tex, tex_coords, texel_off));    }
-
-
-//  NONSTANDARD "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
-//  This narrow selection of nonstandard tex2D* functions can be useful:
-
-//  tex2Dlod0: Automatically fill in the tex2D LOD parameter for mip level 0.
-float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords)
-{   return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0)));   }
-
-float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off)
-{   return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0), texel_off));    }
-
-
-//  MANUALLY LINEARIZING TEXTURE LOOKUP FUNCTIONS:
-//  Provide a narrower selection of tex2D* wrapper functions that decode an
-//  input sample with a specified gamma value.  These are useful for reading
-//  LUT's and for reading the input of pass0 in a later pass.
-
-//  tex2D:
-float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords), gamma);   }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords), gamma);   }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const int texel_off, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma);    }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const int texel_off, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma);    }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma);   }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma);   }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma);    }
-
-float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma)
-{   return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma);    }
-
-//  tex2Dbias:
-float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma)
-{   return decode_gamma_input(tex2Dbias(tex, tex_coords), gamma);   }
-
-float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma)
-{   return decode_gamma_input(tex2Dbias(tex, tex_coords, texel_off), gamma);    }
-
-//  tex2Dfetch:
-float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const float3 gamma)
-{   return decode_gamma_input(tex2Dfetch(tex, tex_coords), gamma);  }
-
-float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const int texel_off, const float3 gamma)
-{   return decode_gamma_input(tex2Dfetch(tex, tex_coords, texel_off), gamma);   }
-*/
-//  tex2Dlod:
-float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma)
-{   return decode_gamma_input(tex2Dlod(tex, tex_coords), gamma);    }
-
-//float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma)
-//{   return decode_gamma_input(tex2Dlod(tex, tex_coords, texel_off), gamma);     }
-
-
-#endif  //  GAMMA_MANAGEMENT_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/helper-functions-and-macros.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/helper-functions-and-macros.fxh
@@ -1,76 +0,0 @@
-#ifndef _HELPER_FUNCTIONS_AND_MACROS_H
-#define _HELPER_FUNCTIONS_AND_MACROS_H
-
-/////////////////////////////////  MIT LICENSE  ////////////////////////////////
-
-//  Copyright (C) 2020 Alex Gunter
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to
-//  deal in the Software without restriction, including without limitation the
-//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-//  sell copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//  
-//  The above copyright notice and this permission notice shall be included in
-//  all copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-//  IN THE SOFTWARE.
-
-
-float4 tex2D_nograd(sampler2D tex, float2 tex_coords)
-{
-    return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0);
-}
-
-// ReShade 4 does not permit the use of functions or the ternary operator
-// outside of a function definition. This is a problem for this port
-// because the original crt-royale shader makes heavy use of these
-// constructs at the root level.
-
-// These preprocessor definitions are a workaround for this limitation.
-// Note that they are strictly intended for defining complex global
-// constants. I doubt they're more performant than the built-in
-// equivalents, so I recommend using the built-ins whenever you can.
-
-
-#define macro_sign(c) -((int) ((c) != 0)) * -((int) ((c) > 0))
-#define macro_abs(c) (c) * macro_sign(c)
-
-#define macro_min(c, d) (c) * ((int) ((c) <= (d))) + (d) * ((int) ((c) > (d)))
-#define macro_max(c, d) (c) * ((int) ((c) >= (d))) + (d) * ((int) ((c) < (d)))
-#define macro_clamp(c, l, u) macro_min(macro_max(c, l), u)
-
-#define macro_ceil(c) (float) ((int) (c) + (int) (((int) (c)) < (c)))
-
-#define macro_cond(c, a, b) float(c) * (a) + float(!(c)) * (b)
-
-
-
-////////////////////////  COMMON MATHEMATICAL CONSTANTS  ///////////////////////
-
-static const float pi = 3.141592653589;
-//  We often want to find the location of the previous texel, e.g.:
-//      const float2 curr_texel = uv * texture_size;
-//      const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5);
-//      const float2 prev_texel_uv = prev_texel / texture_size;
-//  However, many GPU drivers round incorrectly around exact texel locations.
-//  We need to subtract a little less than 0.5 before flooring, and some GPU's
-//  require this value to be farther from 0.5 than others; define it here.
-//      const float2 prev_texel =
-//          floor(curr_texel - float2(under_half)) + float2(0.5);
-static const float under_half = 0.4995;
-
-//  Avoid dividing by zero; using a macro overloads for float, float2, etc.:
-#define FIX_ZERO(c) (macro_max(macro_abs(c), 0.0000152587890625))   //  2^-16
-
-// #define fmod(x, y) ((x) - (y) * floor((x)/(y) + FIX_ZERO(0.0)))
-#define fmod(x, y) (frac((x) / (y)) * (y))
-
-#endif  //  _HELPER_FUNCTIONS_AND_MACROS_H
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/phosphor-mask-resizing.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/phosphor-mask-resizing.fxh
@@ -1,676 +0,0 @@
-#ifndef PHOSPHOR_MASK_RESIZING_H
-#define PHOSPHOR_MASK_RESIZING_H
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "user-settings.fxh"
-#include "derived-settings-and-constants.fxh"
-
-/////////////////////////////  CODEPATH SELECTION  /////////////////////////////
-
-//  Choose a looping strategy based on what's allowed:
-//  Dynamic loops not allowed: Use a flat static loop.
-//  Dynamic loops accomodated: Coarsely branch around static loops.
-//  Dynamic loops assumed allowed: Use a flat dynamic loop.
-#ifndef DRIVERS_ALLOW_DYNAMIC_BRANCHES
-    #ifdef ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
-        #define BREAK_LOOPS_INTO_PIECES
-    #else
-        #define USE_SINGLE_STATIC_LOOP
-    #endif
-#endif  //  No else needed: Dynamic loops assumed.
-
-
-//////////////////////////////////  CONSTANTS  /////////////////////////////////
-
-//  The larger the resized tile, the fewer samples we'll need for downsizing.
-//  See if we can get a static min tile size > mask_min_allowed_tile_size:
-static const float mask_min_allowed_tile_size = macro_ceil(
-    mask_min_allowed_triad_size * mask_triads_per_tile);
-static const float mask_min_expected_tile_size = 
-        mask_min_allowed_tile_size;
-//  Limit the number of sinc resize taps by the maximum minification factor:
-static const float pi_over_lobes = pi/mask_sinc_lobes;
-static const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes *
-    mask_resize_src_lut_size.x/mask_min_expected_tile_size;
-//  Vectorized loops sample in multiples of 4.  Round up to be safe:
-static const float max_sinc_resize_samples_m4 = macro_ceil(
-    max_sinc_resize_samples_float * 0.25) * 4.0;
-
-
-/////////////////////////  RESAMPLING FUNCTION HELPERS  ////////////////////////
-
-float get_dynamic_loop_size(const float magnification_scale)
-{
-    //  Requires:   The following global constants must be defined:
-    //              1.) mask_sinc_lobes
-    //              2.) max_sinc_resize_samples_m4
-    //  Returns:    The minimum number of texture samples for a correct downsize
-    //              at magnification_scale.
-    //  We're downsizing, so the filter is sized across 2*lobes output pixels
-    //  (not 2*lobes input texels).  This impacts distance measurements and the
-    //  minimum number of input samples needed.
-    const float min_samples_float = 2.0 * mask_sinc_lobes / magnification_scale;
-    const float min_samples_m4 = ceil(min_samples_float * 0.25) * 4.0;
-    #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
-        const float max_samples_m4 = max_sinc_resize_samples_m4;
-    #else   // ifdef BREAK_LOOPS_INTO_PIECES
-        //  Simulating loops with branches imposes a 128-sample limit.
-        const float max_samples_m4 = min(128.0, max_sinc_resize_samples_m4);
-    #endif
-    return min(min_samples_m4, max_samples_m4);
-}
-
-float2 get_first_texel_tile_uv_and_dist(const float2 tex_uv, 
-    const float2 texture_size, const float dr, 
-    const float input_tiles_per_texture_r, const float samples,
-    const bool vertical)
-{
-    //  Requires:   1.) dr == du == 1.0/texture_size.x or
-    //                  dr == dv == 1.0/texture_size.y
-    //                  (whichever direction we're resampling in).
-    //                  It's a scalar to save register space.
-    //              2.) input_tiles_per_texture_r is the number of input tiles
-    //                  that can fit in the input texture in the direction we're
-    //                  resampling this pass.
-    //              3.) vertical indicates whether we're resampling vertically
-    //                  this pass (or horizontally).
-    //  Returns:    Pack and return the first sample's tile_uv coord in [0, 1]
-    //              and its texel distance from the destination pixel, in the
-    //              resized dimension only.
-    //  We'll start with the topmost or leftmost sample and work down or right,
-    //  so get the first sample location and distance.  Modify both dimensions
-    //  as if we're doing a one-pass 2D resize; we'll throw away the unneeded
-    //  (and incorrect) dimension at the end.
-    const float2 curr_texel = tex_uv * texture_size;
-    const float2 prev_texel = floor(curr_texel - under_half.xx) + 0.5.xx;
-    const float2 first_texel = prev_texel - float2(samples.xx/2.0.xx - 1.0.xx);
-    const float2 first_texel_uv_wrap_2D = first_texel * dr;
-    const float2 first_texel_dist_2D = curr_texel - first_texel;
-    //  Convert from tex_uv to tile_uv coords so we can sub fracs for fmods.
-    const float2 first_texel_tile_uv_wrap_2D =
-        first_texel_uv_wrap_2D * input_tiles_per_texture_r;
-    //  Project wrapped coordinates to the [0, 1] range.  We'll do this with all
-    //  samples,but the first texel is special, since it might be negative.
-    const float2 coord_negative =
-        float2(first_texel_tile_uv_wrap_2D < 0.0.xx);
-    const float2 first_texel_tile_uv_2D =
-        frac(first_texel_tile_uv_wrap_2D) + coord_negative;
-    //  Pack the first texel's tile_uv coord and texel distance in 1D:
-    const float2 tile_u_and_dist =
-        float2(first_texel_tile_uv_2D.x, first_texel_dist_2D.x);
-    const float2 tile_v_and_dist =
-        float2(first_texel_tile_uv_2D.y, first_texel_dist_2D.y);
-    return vertical ? tile_v_and_dist : tile_u_and_dist;
-    //return lerp(tile_u_and_dist, tile_v_and_dist, float(vertical));
-}
-
-float4 tex2Dlod0try(const sampler2D tex, const float2 tex_uv)
-{
-    //  Mipmapping and anisotropic filtering get confused by sinc-resampling.
-    //  One [slow] workaround is to select the lowest mip level:
-    #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
-        return tex2Dlod(tex, float4(tex_uv, 0.0, 0.0));
-    #else
-        #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
-            return tex2Dbias(tex, float4(tex_uv, 0.0, -16.0));
-        #else
-            return tex2D(tex, tex_uv);
-        #endif
-    #endif
-}
-
-
-//////////////////////////////  LOOP BODY MACROS  //////////////////////////////
-
-//  Using functions can exceed the temporary register limit, so we're
-//  stuck with #define macros (I'm TRULY sorry).  They're declared here instead
-//  of above to be closer to the actual invocation sites.  Steps:
-//  1.) Get the exact texel location.
-//  2.) Sample the phosphor mask (already assumed encoded in linear RGB).
-//  3.) Get the distance from the current pixel and sinc weight:
-//          sinc(dist) = sin(pi * dist)/(pi * dist)
-//      We can also use the slower/smoother Lanczos instead:
-//          L(x) = sinc(dist) * sinc(dist / lobes)
-//  4.) Accumulate the weight sum in weights, and accumulate the weighted texels
-//      in pixel_color (we'll normalize outside the loop at the end).
-//  We vectorize the loop to help reduce the Lanczos window's cost.
-
-    //  The r coord is the coord in the dimension we're resizing along (u or v),
-    //  and first_texel_tile_uv_rrrr is a float4 of the first texel's u or v
-    //  tile_uv coord in [0, 1].  tex_uv_r will contain the tile_uv u or v coord
-    //  for four new texel samples.
-    #define CALCULATE_R_COORD_FOR_4_SAMPLES                                    \
-        const float4 true_i = float4(i_base + i,i_base + i,i_base + i,i_base + i) + float4(0.0, 1.0, 2.0, 3.0); \
-        const float4 tile_uv_r = frac(                                         \
-            first_texel_tile_uv_rrrr + true_i * tile_dr);                      \
-        const float4 tex_uv_r = tile_uv_r * tile_size_uv_r;
-
-    #ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
-        #define CALCULATE_SINC_RESAMPLE_WEIGHTS                                \
-            const float4 pi_dist_over_lobes = pi_over_lobes * dist;            \
-            const float4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
-                (pi_dist*pi_dist_over_lobes), 1.0.xxxx);
-    #else
-        #define CALCULATE_SINC_RESAMPLE_WEIGHTS                                \
-            const float4 weights = min(sin(pi_dist)/pi_dist, 1.0.xxxx);
-    #endif
-
-    #define UPDATE_COLOR_AND_WEIGHT_SUMS                                       \
-        const float4 dist = magnification_scale *                              \
-            abs(first_dist_unscaled - true_i);                                 \
-        const float4 pi_dist = pi * dist;                                      \
-        CALCULATE_SINC_RESAMPLE_WEIGHTS;                                       \
-        pixel_color += new_sample0 * weights.xxx;                              \
-        pixel_color += new_sample1 * weights.yyy;                              \
-        pixel_color += new_sample2 * weights.zzz;                              \
-        pixel_color += new_sample3 * weights.www;                              \
-        weight_sum += weights;
-
-    #define VERTICAL_SINC_RESAMPLE_LOOP_BODY                                   \
-        CALCULATE_R_COORD_FOR_4_SAMPLES;                                       \
-        const float3 new_sample0 = tex2Dlod0try(tex,                       \
-            float2(tex_uv.x, tex_uv_r.x)).rgb;                                 \
-        const float3 new_sample1 = tex2Dlod0try(tex,                       \
-            float2(tex_uv.x, tex_uv_r.y)).rgb;                                 \
-        const float3 new_sample2 = tex2Dlod0try(tex,                       \
-            float2(tex_uv.x, tex_uv_r.z)).rgb;                                 \
-        const float3 new_sample3 = tex2Dlod0try(tex,                       \
-            float2(tex_uv.x, tex_uv_r.w)).rgb;                                 \
-        UPDATE_COLOR_AND_WEIGHT_SUMS;
-
-    #define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY                                 \
-        CALCULATE_R_COORD_FOR_4_SAMPLES;                                       \
-        const float3 new_sample0 = tex2Dlod0try(tex,                       \
-            float2(tex_uv_r.x, tex_uv.y)).rgb;                                 \
-        const float3 new_sample1 = tex2Dlod0try(tex,                       \
-            float2(tex_uv_r.y, tex_uv.y)).rgb;                                 \
-        const float3 new_sample2 = tex2Dlod0try(tex,                       \
-            float2(tex_uv_r.z, tex_uv.y)).rgb;                                 \
-        const float3 new_sample3 = tex2Dlod0try(tex,                       \
-            float2(tex_uv_r.w, tex_uv.y)).rgb;                                 \
-        UPDATE_COLOR_AND_WEIGHT_SUMS;
-
-
-////////////////////////////  RESAMPLING FUNCTIONS  ////////////////////////////
-
-float3 downsample_vertical_sinc_tiled(const sampler2D tex,
-    const float2 tex_uv, const float2 texture_size, const float dr,
-    const float magnification_scale, const float tile_size_uv_r)
-{
-    //  Requires:   1.) dr == du == 1.0/texture_size.x or
-    //                  dr == dv == 1.0/texture_size.y
-    //                  (whichever direction we're resampling in).
-    //                  It's a scalar to save register space.
-    //              2.) tile_size_uv_r is the number of texels an input tile
-    //                  takes up in the input texture, in the direction we're
-    //                  resampling this pass.
-    //              3.) magnification_scale must be <= 1.0.
-    //  Returns:    Return a [Lanczos] sinc-resampled pixel of a vertically
-    //              downsized input tile embedded in an input texture.  (The
-    //              vertical version is special-cased though: It assumes the
-    //              tile size equals the [static] texture size, since it's used
-    //              on an LUT texture input containing one tile.  For more
-    //              generic use, eliminate the "static" in the parameters.)
-    //  The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
-    //  we're resizing along, e.g. "dy" in this case.
-    #ifdef USE_SINGLE_STATIC_LOOP
-        //  A static loop can be faster, but it might blur too much from using
-        //  more samples than it should.
-        static const int samples = int(max_sinc_resize_samples_m4);
-    #else
-        const int samples = int(get_dynamic_loop_size(magnification_scale));
-    #endif
-
-    //  Get the first sample location (scalar tile uv coord along the resized
-    //  dimension) and distance from the output location (in texels):
-    static const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
-    //  true = vertical resize:
-    const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
-        tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true);
-    const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
-    const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
-    //  Get the tile sample offset:
-    static const float tile_dr = dr * input_tiles_per_texture_r;
-
-    //  Sum up each weight and weighted sample color, varying the looping
-    //  strategy based on our expected dynamic loop capabilities.  See the
-    //  loop body macros above.
-    int i_base = 0;
-    float4 weight_sum = 0.0.xxxx;
-    float3 pixel_color = 0.0.xxx;
-    static const int i_step = 4;
-    #ifdef BREAK_LOOPS_INTO_PIECES
-        if(samples - i_base >= 64)
-        {
-            for(int i = 0; i < 64; i += i_step)
-            {
-                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 64;
-        }
-        if(samples - i_base >= 32)
-        {
-            for(int i = 0; i < 32; i += i_step)
-            {
-                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 32;
-        }
-        if(samples - i_base >= 16)
-        {
-            for(int i = 0; i < 16; i += i_step)
-            {
-                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 16;
-        }
-        if(samples - i_base >= 8)
-        {
-            for(int i = 0; i < 8; i += i_step)
-            {
-                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 8;
-        }
-        if(samples - i_base >= 4)
-        {
-            for(int i = 0; i < 4; i += i_step)
-            {
-                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 4;
-        }
-        //  Do another 4-sample block for a total of 128 max samples.
-        if(samples - i_base > 0)
-        {
-            for(int i = 0; i < 4; i += i_step)
-            {
-                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-        }
-    #else
-        for(int i = 0; i < samples; i += i_step)
-        {
-            VERTICAL_SINC_RESAMPLE_LOOP_BODY;
-        }
-    #endif
-    //  Normalize so the weight_sum == 1.0, and return:
-    const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
-    const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx + 
-        weight_sum_reduce.yyy);
-    return (pixel_color/scalar_weight_sum);
-}
-
-float3 downsample_horizontal_sinc_tiled(const sampler2D tex,
-    const float2 tex_uv, const float2 texture_size, const float dr,
-    const float magnification_scale, const float tile_size_uv_r)
-{
-    //  Differences from downsample_horizontal_sinc_tiled:
-    //  1.) The dr and tile_size_uv_r parameters are not static consts.
-    //  2.) The "vertical" parameter to get_first_texel_tile_uv_and_dist is
-    //      set to false instead of true.
-    //  3.) The horizontal version of the loop body is used.
-    //  TODO: If we can get guaranteed compile-time dead code elimination,
-    //  we can combine the vertical/horizontal downsampling functions by:
-    //  1.) Add an extra static const bool parameter called "vertical."
-    //  2.) Supply it with the result of get_first_texel_tile_uv_and_dist().
-    //  3.) Use a conditional assignment in the loop body macro.  This is the
-    //      tricky part: We DO NOT want to incur the extra conditional
-    //      assignment in the inner loop at runtime!
-    //  The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
-    //  we're resizing along, e.g. "dx" in this case.
-    #ifdef USE_SINGLE_STATIC_LOOP
-        //  If we have to load all samples, we might as well use them.
-        static const int samples = int(max_sinc_resize_samples_m4);
-    #else
-        const int samples = int(get_dynamic_loop_size(magnification_scale));
-    #endif
-
-    //  Get the first sample location (scalar tile uv coord along resized
-    //  dimension) and distance from the output location (in texels):
-    const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
-    //  false = horizontal resize:
-    const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
-        tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, false);
-    const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
-    const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
-    //  Get the tile sample offset:
-    const float tile_dr = dr * input_tiles_per_texture_r;
-
-    //  Sum up each weight and weighted sample color, varying the looping
-    //  strategy based on our expected dynamic loop capabilities.  See the
-    //  loop body macros above.
-    int i_base = 0;
-    float4 weight_sum = 0.0.xxxx;
-    float3 pixel_color = 0.0.xxx;
-    static const int i_step = 4;
-    #ifdef BREAK_LOOPS_INTO_PIECES
-        if(samples - i_base >= 64)
-        {
-            for(int i = 0; i < 64; i += i_step)
-            {
-                HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 64;
-        }
-        if(samples - i_base >= 32)
-        {
-            for(int i = 0; i < 32; i += i_step)
-            {
-                HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 32;
-        }
-        if(samples - i_base >= 16)
-        {
-            for(int i = 0; i < 16; i += i_step)
-            {
-                HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 16;
-        }
-        if(samples - i_base >= 8)
-        {
-            for(int i = 0; i < 8; i += i_step)
-            {
-                HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 8;
-        }
-        if(samples - i_base >= 4)
-        {
-            for(int i = 0; i < 4; i += i_step)
-            {
-                HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-            i_base += 4;
-        }
-        //  Do another 4-sample block for a total of 128 max samples.
-        if(samples - i_base > 0)
-        {
-            for(int i = 0; i < 4; i += i_step)
-            {
-                HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-            }
-        }
-    #else
-        for(int i = 0; i < samples; i += i_step)
-        {
-            HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
-        }
-    #endif
-    //  Normalize so the weight_sum == 1.0, and return:
-    const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
-    const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx +
-        weight_sum_reduce.yyy);
-    return (pixel_color/scalar_weight_sum);
-}
-
-
-////////////////////////////  TILE SIZE CALCULATION  ///////////////////////////
-
-float2 get_resized_mask_tile_size(const float2 estimated_viewport_size,
-    const float2 estimated_mask_resize_output_size,
-    const bool solemnly_swear_same_inputs_for_every_pass)
-{
-    //  Requires:   The following global constants must be defined according to
-    //              certain constraints:
-    //              1.) mask_resize_num_triads: Must be high enough that our
-    //                  mask sampling method won't have artifacts later
-    //                  (long story; see derived-settings-and-constants.h)
-    //              2.) mask_resize_src_lut_size: Texel size of our mask LUT
-    //              3.) mask_triads_per_tile: Num horizontal triads in our LUT
-    //              4.) mask_min_allowed_triad_size: User setting (the more
-    //                  restrictive it is, the faster the resize will go)
-    //              5.) mask_min_allowed_tile_size_x < mask_resize_src_lut_size.x
-    //              6.) mask_triad_size_desired_{runtime, static}
-    //              7.) mask_num_triads_desired_{runtime, static}
-    //              8.) mask_specify_num_triads must be 0.0/1.0 (false/true)
-    //              The function parameters must be defined as follows:
-    //              1.) estimated_viewport_size == (final viewport size);
-    //                  If mask_specify_num_triads is 1.0/true and the viewport
-    //                  estimate is wrong, the number of triads will differ from
-    //                  the user's preference by about the same factor.
-    //              2.) estimated_mask_resize_output_size: Must equal the
-    //                  output size of the MASK_RESIZE pass.
-    //                  Exception: The x component may be estimated garbage if
-    //                  and only if the caller throws away the x result.
-    //              3.) solemnly_swear_same_inputs_for_every_pass: Set to false,
-    //                  unless you can guarantee that every call across every
-    //                  pass will use the same sizes for the other parameters.
-    //              When calling this across multiple passes, always use the
-    //              same y viewport size/scale, and always use the same x
-    //              viewport size/scale when using the x result.
-    //  Returns:    Return the final size of a manually resized mask tile, after
-    //              constraining the desired size to avoid artifacts.  Under
-    //              unusual circumstances, tiles may become stretched vertically
-    //              (see wall of text below).
-    //  Stated tile properties must be correct:
-    static const float tile_aspect_ratio_inv =
-        mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
-    static const float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
-    static const float2 tile_aspect = float2(1.0, tile_aspect_ratio_inv);
-    //  If mask_specify_num_triads is 1.0/true and estimated_viewport_size.x is
-    //  wrong, the user preference will be misinterpreted:
-    const float desired_tile_size_x = mask_triads_per_tile * lerp(
-        mask_triad_size_desired,
-        estimated_viewport_size.x / mask_num_triads_desired,
-        mask_specify_num_triads);
-    if(get_mask_sample_mode() > 0.5)
-    {
-        //  We don't need constraints unless we're sampling MASK_RESIZE.
-        return desired_tile_size_x * tile_aspect;
-    }
-    //  Make sure we're not upsizing:
-    const float temp_tile_size_x =
-        min(desired_tile_size_x, mask_resize_src_lut_size.x);
-    //  Enforce min_tile_size and max_tile_size in both dimensions:
-    const float2 temp_tile_size = temp_tile_size_x * tile_aspect;
-    static const float2 min_tile_size =
-        mask_min_allowed_tile_size * tile_aspect;
-    const float2 max_tile_size =
-        estimated_mask_resize_output_size / mask_resize_num_tiles;
-    const float2 clamped_tile_size =
-        clamp(temp_tile_size, min_tile_size, max_tile_size);
-    //  Try to maintain tile_aspect_ratio.  This is the tricky part:
-    //  If we're currently resizing in the y dimension, the x components
-    //  could be MEANINGLESS.  (If estimated_mask_resize_output_size.x is
-    //  bogus, then so is max_tile_size.x and clamped_tile_size.x.)
-    //  We can't adjust the y size based on clamped_tile_size.x.  If it
-    //  clamps when it shouldn't, it won't clamp again when later passes
-    //  call this function with the correct sizes, and the discrepancy will
-    //  break the sampling coords in MASKED_SCANLINES.  Instead, we'll limit
-    //  the x size based on the y size, but not vice versa, unless the
-    //  caller swears the parameters were the same (correct) in every pass.
-    //  As a result, triads could appear vertically stretched if:
-    //  a.) mask_resize_src_lut_size.x > mask_resize_src_lut_size.y: Wide
-    //      LUT's might clamp x more than y (all provided LUT's are square)
-    //  b.) true_viewport_size.x < true_viewport_size.y: The user is playing
-    //      with a vertically oriented screen (not accounted for anyway)
-    //  c.) mask_resize_viewport_scale.x < masked_resize_viewport_scale.y:
-    //      Viewport scales are equal by default.
-    //  If any of these are the case, you can fix the stretching by setting:
-    //      mask_resize_viewport_scale.x = mask_resize_viewport_scale.y *
-    //          (1.0 / min_expected_aspect_ratio) *
-    //          (mask_resize_src_lut_size.x / mask_resize_src_lut_size.y)
-    const float x_tile_size_from_y =
-        clamped_tile_size.y * tile_aspect_ratio;
-    const float y_tile_size_from_x = lerp(clamped_tile_size.y,
-        clamped_tile_size.x * tile_aspect_ratio_inv,
-        float(solemnly_swear_same_inputs_for_every_pass));
-    const float2 reclamped_tile_size = float2(
-        min(clamped_tile_size.x, x_tile_size_from_y),
-        min(clamped_tile_size.y, y_tile_size_from_x));
-    //  We need integer tile sizes in both directions for tiled sampling to
-    //  work correctly.  Use floor (to make sure we don't round up), but be
-    //  careful to avoid a rounding bug where floor decreases whole numbers:
-    const float2 final_resized_tile_size =
-        floor(reclamped_tile_size + float2(FIX_ZERO(0.0),FIX_ZERO(0.0)));
-    return final_resized_tile_size;
-}
-
-
-/////////////////////////  FINAL MASK SAMPLING HELPERS  ////////////////////////
-
-float4 get_mask_sampling_parameters(const float2 mask_resize_texture_size,
-    const float2 mask_resize_video_size, const float2 true_viewport_size,
-    out float2 mask_tiles_per_screen)
-{
-    //  Requires:   1.) Requirements of get_resized_mask_tile_size() must be
-    //                  met, particularly regarding global constants.
-    //              The function parameters must be defined as follows:
-    //              1.) mask_resize_texture_size == MASK_RESIZE.texture_size
-    //                  if get_mask_sample_mode() is 0 (otherwise anything)
-    //              2.) mask_resize_video_size == MASK_RESIZE.video_size
-    //                  if get_mask_sample_mode() is 0 (otherwise anything)
-    //              3.) true_viewport_size == IN.output_size for a pass set to
-    //                  1.0 viewport scale (i.e. it must be correct)
-    //  Returns:    Return a float4 containing:
-    //                  xy: tex_uv coords for the start of the mask tile
-    //                  zw: tex_uv size of the mask tile from start to end
-    //              mask_tiles_per_screen is an out parameter containing the
-    //              number of mask tiles that will fit on the screen.
-    //  First get the final resized tile size.  The viewport size and mask
-    //  resize viewport scale must be correct, but don't solemnly swear they
-    //  were correct in both mask resize passes unless you know it's true.
-    //  (We can better ensure a correct tile aspect ratio if the parameters are
-    //  guaranteed correct in all passes...but if we lie, we'll get inconsistent
-    //  sizes across passes, resulting in broken texture coordinates.)
-    const float mask_sample_mode = get_mask_sample_mode();
-    const float2 mask_resize_tile_size = get_resized_mask_tile_size(
-        true_viewport_size, mask_resize_video_size, false);
-    if(mask_sample_mode < 0.5)
-    {
-        //  Sample MASK_RESIZE: The resized tile is a fraction of the texture
-        //  size and starts at a nonzero offset to allow for border texels:
-        const float2 mask_tile_uv_size = mask_resize_tile_size /
-            mask_resize_texture_size;
-        const float2 skipped_tiles = mask_start_texels/mask_resize_tile_size;
-        const float2 mask_tile_start_uv = skipped_tiles * mask_tile_uv_size;
-        //  mask_tiles_per_screen must be based on the *true* viewport size:
-        mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
-        return float4(mask_tile_start_uv, mask_tile_uv_size);
-    }
-    else
-    {
-        //  If we're tiling at the original size (1:1 pixel:texel), redefine a
-        //  "tile" to be the full texture containing many triads.  Otherwise,
-        //  we're hardware-resampling an LUT, and the texture truly contains a
-        //  single unresized phosphor mask tile anyway.
-        static const float2 mask_tile_uv_size = 1.0.xx;
-        static const float2 mask_tile_start_uv = 0.0.xx;
-        if(mask_sample_mode > 1.5)
-        {
-            //  Repeat the full LUT at a 1:1 pixel:texel ratio without resizing:
-            mask_tiles_per_screen = true_viewport_size/mask_texture_large_size;
-        }
-        else
-        {
-            //  Hardware-resize the original LUT:
-            mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
-        }
-        return float4(mask_tile_start_uv, mask_tile_uv_size);
-    }
-}
-
-float2 fix_tiling_discontinuities_normalized(const float2 tile_uv,
-    float2 duv_dx, float2 duv_dy)
-{
-    //  Requires:   1.) duv_dx == ddx(tile_uv)
-    //              2.) duv_dy == ddy(tile_uv)
-    //              3.) tile_uv contains tile-relative uv coords in [0, 1],
-    //                  such that (0.5, 0.5) is the center of a tile, etc.
-    //                  ("Tile" can mean texture, the video embedded in the
-    //                  texture, or some other "tile" embedded in a texture.)
-    //  Returns:    Return new tile_uv coords that contain no discontinuities
-    //              across a 2x2 pixel quad.
-    //  Description:
-    //  When uv coords wrap from 1.0 to 0.0, they create a discontinuity in the
-    //  derivatives, which we assume happened if the absolute difference between
-    //  any fragment in a 2x2 block is > ~half a tile.  If the current block has
-    //  a u or v discontinuity and the current fragment is in the first half of
-    //  the tile along that axis (i.e. it wrapped from 1.0 to 0.0), add a tile
-    //  to that coord to make the 2x2 block continuous.  (It will now have a
-    //  coord > 1.0 in the padding area beyond the tile.)  This function takes
-    //  derivatives as parameters so the caller can reuse them.
-    //  In case we're using high-quality (nVidia-style) derivatives, ensure
-    //  diagonically opposite fragments see each other for correctness:
-    duv_dx = abs(duv_dx) + abs(ddy(duv_dx));
-    duv_dy = abs(duv_dy) + abs(ddx(duv_dy));
-    const float2 pixel_in_first_half_tile = float2(tile_uv < 0.5.xx);
-    const float2 jump_exists = float2(duv_dx + duv_dy > 0.5.xx);
-    return tile_uv + jump_exists * pixel_in_first_half_tile;
-}
-
-float2 convert_phosphor_tile_uv_wrap_to_tex_uv(const float2 tile_uv_wrap,
-    const float4 mask_tile_start_uv_and_size)
-{
-    //  Requires:   1.) tile_uv_wrap contains tile-relative uv coords, where the
-    //                  tile spans from [0, 1], such that (0.5, 0.5) is at the
-    //                  tile center.  The input coords can range from [0, inf],
-    //                  and their fractional parts map to a repeated tile.
-    //                  ("Tile" can mean texture, the video embedded in the
-    //                  texture, or some other "tile" embedded in a texture.)
-    //              2.) mask_tile_start_uv_and_size.xy contains tex_uv coords
-    //                  for the start of the embedded tile in the full texture.
-    //              3.) mask_tile_start_uv_and_size.zw contains the [fractional]
-    //                  tex_uv size of the embedded tile in the full texture.
-    //  Returns:    Return tex_uv coords (used for texture sampling)
-    //              corresponding to tile_uv_wrap.
-    if(get_mask_sample_mode() < 0.5)
-    {
-        //  Manually repeat the resized mask tile to fill the screen:
-        //  First get fractional tile_uv coords.  Using frac/fmod on coords
-        //  confuses anisotropic filtering; fix it as user options dictate.
-        //  derived-settings-and-constants.h disables incompatible options.
-        #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
-            float2 tile_uv = frac(tile_uv_wrap * 0.5) * 2.0;
-        #else
-            float2 tile_uv = frac(tile_uv_wrap);
-        #endif
-        #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
-            const float2 tile_uv_dx = ddx(tile_uv);
-            const float2 tile_uv_dy = ddy(tile_uv);
-            tile_uv = fix_tiling_discontinuities_normalized(tile_uv,
-                tile_uv_dx, tile_uv_dy);
-        #endif
-        //  The tile is embedded in a padded FBO, and it may start at a
-        //  nonzero offset if border texels are used to avoid artifacts:
-        const float2 mask_tex_uv = mask_tile_start_uv_and_size.xy +
-            tile_uv * mask_tile_start_uv_and_size.zw;
-        return mask_tex_uv;
-    }
-    else
-    {
-        //  Sample from the input phosphor mask texture with hardware tiling.
-        //  If we're tiling at the original size (mode 2), the "tile" is the
-        //  whole texture, and it contains a large number of triads mapped with
-        //  a 1:1 pixel:texel ratio.  OTHERWISE, the texture contains a single
-        //  unresized tile.  tile_uv_wrap already has correct coords for both!
-        return tile_uv_wrap;
-    }
-}
-
-
-#endif  //  PHOSPHOR_MASK_RESIZING_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/quad-pixel-communication.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/quad-pixel-communication.fxh
@@ -1,243 +0,0 @@
-#ifndef QUAD_PIXEL_COMMUNICATION_H
-#define QUAD_PIXEL_COMMUNICATION_H
-
-/////////////////////////////////  MIT LICENSE  ////////////////////////////////
-
-//  Copyright (C) 2014 TroggleMonkey*
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to
-//  deal in the Software without restriction, including without limitation the
-//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-//  sell copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//
-//  The above copyright notice and this permission notice shall be included in
-//  all copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-//  IN THE SOFTWARE.
-
-/////////////////////////////////  DISCLAIMER  /////////////////////////////////
-
-//  *This code was inspired by "Shader Amortization using Pixel Quad Message
-//  Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2.  My intent
-//  is not to plagiarize his fundamentally similar code and assert my own
-//  copyright, but the algorithmic helper functions require so little code that
-//  implementations can't vary by much except bugfixes and conventions.  I just
-//  wanted to license my own particular code here to avoid ambiguity and make it
-//  clear that as far as I'm concerned, people can do as they please with it.
-
-/////////////////////////////////  DESCRIPTION  ////////////////////////////////
-
-//  Given screen pixel numbers, derive a "quad vector" describing a fragment's
-//  position in its 2x2 pixel quad.  Given that vector, obtain the values of any
-//  variable at neighboring fragments.
-//  Requires:   Using this file in general requires:
-//              1.) ddx() and ddy() are present in the current Cg profile.
-//              2.) The GPU driver is using fine/high-quality derivatives.
-//                  Functions will give incorrect results if this is not true,
-//                  so a test function is included.
-
-
-/////////////////////  QUAD-PIXEL COMMUNICATION PRIMITIVES  ////////////////////
-
-float4 get_quad_vector_naive(const float4 output_pixel_num_wrt_uvxy)
-{
-    //  Requires:   Two measures of the current fragment's output pixel number
-    //              in the range ([0, IN.output_size.x), [0, IN.output_size.y)):
-    //              1.) output_pixel_num_wrt_uvxy.xy increase with uv coords.
-    //              2.) output_pixel_num_wrt_uvxy.zw increase with screen xy.
-    //  Returns:    Two measures of the fragment's position in its 2x2 quad:
-    //              1.) The .xy components are its 2x2 placement with respect to
-    //                  uv direction (the origin (0, 0) is at the top-left):
-    //                  top-left     = (-1.0, -1.0) top-right    = ( 1.0, -1.0)
-    //                  bottom-left  = (-1.0,  1.0) bottom-right = ( 1.0,  1.0)
-    //                  You need this to arrange/weight shared texture samples.
-    //              2.) The .zw components are its 2x2 placement with respect to
-    //                  screen xy direction (IN.position); the origin varies.
-    //                  quad_gather needs this measure to work correctly.
-    //              Note: quad_vector.zw = quad_vector.xy * float2(
-    //                      ddx(output_pixel_num_wrt_uvxy.x),
-    //                      ddy(output_pixel_num_wrt_uvxy.y));
-    //  Caveats:    This function assumes the GPU driver always starts 2x2 pixel
-    //              quads at even pixel numbers.  This assumption can be wrong
-    //              for odd output resolutions (nondeterministically so).
-    const float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0;
-    const float4 quad_vector = pixel_odd * 2.0 - 1.0.xxxx;
-    return quad_vector;
-}
-
-float4 get_quad_vector(const float4 output_pixel_num_wrt_uvxy)
-{
-    //  Requires:   Same as get_quad_vector_naive() (see that first).
-    //  Returns:    Same as get_quad_vector_naive() (see that first), but it's
-    //              correct even if the 2x2 pixel quad starts at an odd pixel,
-    //              which can occur at odd resolutions.
-    const float4 quad_vector_guess =
-        get_quad_vector_naive(output_pixel_num_wrt_uvxy);
-    //  If quad_vector_guess.zw doesn't increase with screen xy, we know
-    //  the 2x2 pixel quad starts at an odd pixel:
-    const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z),
-                                                ddy(quad_vector_guess.w));
-    return quad_vector_guess * odd_start_mirror.xyxy;
-}
-
-float4 get_quad_vector(const float2 output_pixel_num_wrt_uv)
-{
-    //  Requires:   1.) ddx() and ddy() are present in the current Cg profile.
-    //              2.) output_pixel_num_wrt_uv must increase with uv coords and
-    //                  measure the current fragment's output pixel number in:
-    //                      ([0, IN.output_size.x), [0, IN.output_size.y))
-    //  Returns:    Same as get_quad_vector_naive() (see that first), but it's
-    //              correct even if the 2x2 pixel quad starts at an odd pixel,
-    //              which can occur at odd resolutions.
-    //  Caveats:    This function requires less information than the version
-    //              taking a float4, but it's potentially slower.
-    //  Do screen coords increase with or against uv?  Get the direction
-    //  with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}.
-    const float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x),
-                                        ddy(output_pixel_num_wrt_uv.y));
-    const float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0;
-    const float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - 0.5.xx) * 2.0;
-    const float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror;
-    //  If quad_vector_screen_guess doesn't increase with screen xy, we know
-    //  the 2x2 pixel quad starts at an odd pixel:
-    const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x),
-                                                ddy(quad_vector_screen_guess.y));
-    const float4 quad_vector_guess = float4(
-        quad_vector_uv_guess, quad_vector_screen_guess);
-    return quad_vector_guess * odd_start_mirror.xyxy;
-}
-
-void quad_gather(const float4 quad_vector, const float4 curr,
-    out float4 adjx, out float4 adjy, out float4 diag)
-{
-    //  Requires:   1.) ddx() and ddy() are present in the current Cg profile.
-    //              2.) The GPU driver is using fine/high-quality derivatives.
-    //              3.) quad_vector describes the current fragment's location in
-    //                  its 2x2 pixel quad using get_quad_vector()'s conventions.
-    //              4.) curr is any vector you wish to get neighboring values of.
-    //  Returns:    Values of an input vector (curr) at neighboring fragments
-    //              adjacent x, adjacent y, and diagonal (via out parameters).
-    adjx = curr - ddx(curr) * quad_vector.z;
-    adjy = curr - ddy(curr) * quad_vector.w;
-    diag = adjx - ddy(adjx) * quad_vector.w;
-}
-
-void quad_gather(const float4 quad_vector, const float3 curr,
-    out float3 adjx, out float3 adjy, out float3 diag)
-{
-    //  Float3 version
-    adjx = curr - ddx(curr) * quad_vector.z;
-    adjy = curr - ddy(curr) * quad_vector.w;
-    diag = adjx - ddy(adjx) * quad_vector.w;
-}
-
-void quad_gather(const float4 quad_vector, const float2 curr,
-    out float2 adjx, out float2 adjy, out float2 diag)
-{
-    //  Float2 version
-    adjx = curr - ddx(curr) * quad_vector.z;
-    adjy = curr - ddy(curr) * quad_vector.w;
-    diag = adjx - ddy(adjx) * quad_vector.w;
-}
-
-float4 quad_gather(const float4 quad_vector, const float curr)
-{
-    //  Float version:
-    //  Returns:    return.x == current
-    //              return.y == adjacent x
-    //              return.z == adjacent y
-    //              return.w == diagonal
-    float4 all = curr.xxxx;
-    all.y = all.x - ddx(all.x) * quad_vector.z;
-    all.zw = all.xy - ddy(all.xy) * quad_vector.w;
-    return all;
-}
-
-float4 quad_gather_sum(const float4 quad_vector, const float4 curr)
-{
-    //  Requires:   Same as quad_gather()
-    //  Returns:    Sum of an input vector (curr) at all fragments in a quad.
-    float4 adjx, adjy, diag;
-    quad_gather(quad_vector, curr, adjx, adjy, diag);
-    return (curr + adjx + adjy + diag);
-}
-
-float3 quad_gather_sum(const float4 quad_vector, const float3 curr)
-{
-    //  Float3 version:
-    float3 adjx, adjy, diag;
-    quad_gather(quad_vector, curr, adjx, adjy, diag);
-    return (curr + adjx + adjy + diag);
-}
-
-float2 quad_gather_sum(const float4 quad_vector, const float2 curr)
-{
-    //  Float2 version:
-    float2 adjx, adjy, diag;
-    quad_gather(quad_vector, curr, adjx, adjy, diag);
-    return (curr + adjx + adjy + diag);
-}
-
-float quad_gather_sum(const float4 quad_vector, const float curr)
-{
-    //  Float version:
-    const float4 all_values = quad_gather(quad_vector, curr);
-    return (all_values.x + all_values.y + all_values.z + all_values.w);
-}
-
-bool fine_derivatives_working(const float4 quad_vector, float4 curr)
-{
-    //  Requires:   1.) ddx() and ddy() are present in the current Cg profile.
-    //              2.) quad_vector describes the current fragment's location in
-    //                  its 2x2 pixel quad using get_quad_vector()'s conventions.
-    //              3.) curr must be a test vector with non-constant derivatives
-    //                  (its value should change nonlinearly across fragments).
-    //  Returns:    true if fine/hybrid/high-quality derivatives are used, or
-    //              false if coarse derivatives are used or inconclusive
-    //  Usage:      Test whether quad-pixel communication is working!
-    //  Method:     We can confirm fine derivatives are used if the following
-    //              holds (ever, for any value at any fragment):
-    //                  (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy))
-    //              The more values we test (e.g. test a float4 two ways), the
-    //              easier it is to demonstrate fine derivatives are working.
-    //  TODO: Check for floating point exact comparison issues!
-    float4 ddx_curr = ddx(curr);
-    float4 ddy_curr = ddy(curr);
-    float4 adjx = curr - ddx_curr * quad_vector.z;
-    float4 adjy = curr - ddy_curr * quad_vector.w;
-    bool ddy_different = any(ddy_curr != ddy(adjx));
-    bool ddx_different = any(ddx_curr != ddx(adjy));
-    return any(bool2(ddy_different, ddx_different));
-}
-
-bool fine_derivatives_working_fast(const float4 quad_vector, float curr)
-{
-    //  Requires:   Same as fine_derivatives_working()
-    //  Returns:    Same as fine_derivatives_working()
-    //  Usage:      This is faster than fine_derivatives_working() but more
-    //              likely to return false negatives, so it's less useful for
-    //              offline testing/debugging.  It's also useless as the basis
-    //              for dynamic runtime branching as of May 2014: Derivatives
-    //              (and quad-pixel communication) are currently disallowed in
-    //              branches.  However, future GPU's may allow you to use them
-    //              in dynamic branches if you promise the branch condition
-    //              evaluates the same for every fragment in the quad (and/or if
-    //              the driver enforces that promise by making a single fragment
-    //              control branch decisions).  If that ever happens, this
-    //              version may become a more economical choice.
-    float ddx_curr = ddx(curr);
-    float ddy_curr = ddy(curr);
-    float adjx = curr - ddx_curr * quad_vector.z;
-    return (ddy_curr != ddy(adjx));
-}
-
-#endif  //  QUAD_PIXEL_COMMUNICATION_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/scanline-functions.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/scanline-functions.fxh
@@ -1,569 +0,0 @@
-#ifndef SCANLINE_FUNCTIONS_H
-#define SCANLINE_FUNCTIONS_H
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "user-settings.fxh"
-#include "derived-settings-and-constants.fxh"
-#include "special-functions.fxh"
-#include "gamma-management.fxh"
-
-
-/////////////////////////////  SCANLINE FUNCTIONS  /////////////////////////////
-
-float3 get_gaussian_sigma(const float3 color, const float sigma_range)
-{
-    //  Requires:   Globals:
-    //              1.) beam_min_sigma and beam_max_sigma are global floats
-    //                  containing the desired minimum and maximum beam standard
-    //                  deviations, for dim and bright colors respectively.
-    //              2.) beam_max_sigma must be > 0.0
-    //              3.) beam_min_sigma must be in (0.0, beam_max_sigma]
-    //              4.) beam_spot_power must be defined as a global float.
-    //              Parameters:
-    //              1.) color is the underlying source color along a scanline
-    //              2.) sigma_range = beam_max_sigma - beam_min_sigma; we take
-    //                  sigma_range as a parameter to avoid repeated computation
-    //                  when beam_{min, max}_sigma are runtime shader parameters
-    //  Optional:   Users may set beam_spot_shape_function to 1 to define the
-    //              inner f(color) subfunction (see below) as:
-    //                  f(color) = sqrt(1.0 - (color - 1.0)*(color - 1.0))
-    //              Otherwise (technically, if beam_spot_shape_function < 0.5):
-    //                  f(color) = pow(color, beam_spot_power)
-    //  Returns:    The standard deviation of the Gaussian beam for "color:"
-    //                  sigma = beam_min_sigma + sigma_range * f(color)
-    //  Details/Discussion:
-    //  The beam's spot shape vaguely resembles an aspect-corrected f() in the
-    //  range [0, 1] (not quite, but it's related).  f(color) = color makes
-    //  spots look like diamonds, and a spherical function or cube balances
-    //  between variable width and a soft/realistic shape.   A beam_spot_power
-    //  > 1.0 can produce an ugly spot shape and more initial clipping, but the
-    //  final shape also differs based on the horizontal resampling filter and
-    //  the phosphor bloom.  For instance, resampling horizontally in nonlinear
-    //  light and/or with a sharp (e.g. Lanczos) filter will sharpen the spot
-    //  shape, but a sixth root is still quite soft.  A power function (default
-    //  1.0/3.0 beam_spot_power) is most flexible, but a fixed spherical curve
-    //  has the highest variability without an awful spot shape.
-    //
-    //  beam_min_sigma affects scanline sharpness/aliasing in dim areas, and its
-    //  difference from beam_max_sigma affects beam width variability.  It only
-    //  affects clipping [for pure Gaussians] if beam_spot_power > 1.0 (which is
-    //  a conservative estimate for a more complex constraint).
-    //
-    //  beam_max_sigma affects clipping and increasing scanline width/softness
-    //  as color increases.  The wider this is, the more scanlines need to be
-    //  evaluated to avoid distortion.  For a pure Gaussian, the max_beam_sigma
-    //  at which the first unused scanline always has a weight < 1.0/255.0 is:
-    //      num scanlines = 2, max_beam_sigma = 0.2089; distortions begin ~0.34
-    //      num scanlines = 3, max_beam_sigma = 0.3879; distortions begin ~0.52
-    //      num scanlines = 4, max_beam_sigma = 0.5723; distortions begin ~0.70
-    //      num scanlines = 5, max_beam_sigma = 0.7591; distortions begin ~0.89
-    //      num scanlines = 6, max_beam_sigma = 0.9483; distortions begin ~1.08
-    //  Generalized Gaussians permit more leeway here as steepness increases.
-    if(beam_spot_shape_function < 0.5)
-    {
-        //  Use a power function:
-        return beam_min_sigma.xxx + sigma_range *
-            pow(color, beam_spot_power);
-    }
-    else
-    {
-        //  Use a spherical function:
-        const float3 color_minus_1 = color - 1.0.xxx;
-        return beam_min_sigma.xxx + sigma_range *
-            sqrt(1.0.xxx - color_minus_1*color_minus_1);
-    }
-}
-
-float3 get_generalized_gaussian_beta(const float3 color,
-    const float shape_range)
-{
-    //  Requires:   Globals:
-    //              1.) beam_min_shape and beam_max_shape are global floats
-    //                  containing the desired min/max generalized Gaussian
-    //                  beta parameters, for dim and bright colors respectively.
-    //              2.) beam_max_shape must be >= 2.0
-    //              3.) beam_min_shape must be in [2.0, beam_max_shape]
-    //              4.) beam_shape_power must be defined as a global float.
-    //              Parameters:
-    //              1.) color is the underlying source color along a scanline
-    //              2.) shape_range = beam_max_shape - beam_min_shape; we take
-    //                  shape_range as a parameter to avoid repeated computation
-    //                  when beam_{min, max}_shape are runtime shader parameters
-    //  Returns:    The type-I generalized Gaussian "shape" parameter beta for
-    //              the given color.
-    //  Details/Discussion:
-    //  Beta affects the scanline distribution as follows:
-    //  a.) beta < 2.0 narrows the peak to a spike with a discontinuous slope
-    //  b.) beta == 2.0 just degenerates to a Gaussian
-    //  c.) beta > 2.0 flattens and widens the peak, then drops off more steeply
-    //      than a Gaussian.  Whereas high sigmas widen and soften peaks, high
-    //      beta widen and sharpen peaks at the risk of aliasing.
-    //  Unlike high beam_spot_powers, high beam_shape_powers actually soften shape
-    //  transitions, whereas lower ones sharpen them (at the risk of aliasing).
-    return beam_min_shape + shape_range * pow(color, beam_shape_power);
-}
-
-float3 scanline_gaussian_integral_contrib(const float3 dist,
-    const float3 color, const float pixel_height, const float sigma_range)
-{
-    //  Requires:   1.) dist is the distance of the [potentially separate R/G/B]
-    //                  point(s) from a scanline in units of scanlines, where
-    //                  1.0 means the sample point straddles the next scanline.
-    //              2.) color is the underlying source color along a scanline.
-    //              3.) pixel_height is the output pixel height in scanlines.
-    //              4.) Requirements of get_gaussian_sigma() must be met.
-    //  Returns:    Return a scanline's light output over a given pixel.
-    //  Details:
-    //  The CRT beam profile follows a roughly Gaussian distribution which is
-    //  wider for bright colors than dark ones.  The integral over the full
-    //  range of a Gaussian function is always 1.0, so we can vary the beam
-    //  with a standard deviation without affecting brightness.  'x' = distance:
-    //      gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
-    //      gaussian integral = 0.5 (1.0 + erf(x/(sigma * sqrt(2))))
-    //  Use a numerical approximation of the "error function" (the Gaussian
-    //  indefinite integral) to find the definite integral of the scanline's
-    //  average brightness over a given pixel area.  Even if curved coords were
-    //  used in this pass, a flat scalar pixel height works almost as well as a
-    //  pixel height computed from a full pixel-space to scanline-space matrix.
-    const float3 sigma = get_gaussian_sigma(color, sigma_range);
-    const float3 ph_offset = (pixel_height.xxx) * 0.5;
-    const float3 denom_inv = 1.0/(sigma*sqrt(2.0));
-    const float3 integral_high = erf((dist + ph_offset)*denom_inv);
-    const float3 integral_low = erf((dist - ph_offset)*denom_inv);
-    return color * 0.5*(integral_high - integral_low)/pixel_height;
-}
-
-float3 scanline_generalized_gaussian_integral_contrib(const float3 dist,
-    const float3 color, const float pixel_height, const float sigma_range,
-    const float shape_range)
-{
-    //  Requires:   1.) Requirements of scanline_gaussian_integral_contrib()
-    //                  must be met.
-    //              2.) Requirements of get_gaussian_sigma() must be met.
-    //              3.) Requirements of get_generalized_gaussian_beta() must be
-    //                  met.
-    //  Returns:    Return a scanline's light output over a given pixel.
-    //  A generalized Gaussian distribution allows the shape (beta) to vary
-    //  as well as the width (alpha).  "gamma" refers to the gamma function:
-    //      generalized sample =
-    //          beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
-    //  ligamma(s, z) is the lower incomplete gamma function, for which we only
-    //  implement two of four branches (because we keep 1/beta <= 0.5):
-    //      generalized integral = 0.5 + 0.5* sign(x) *
-    //          ligamma(1/beta, (|x|/alpha)**beta)/gamma(1/beta)
-    //  See get_generalized_gaussian_beta() for a discussion of beta.
-    //  We base alpha on the intended Gaussian sigma, but it only strictly
-    //  models models standard deviation at beta == 2, because the standard
-    //  deviation depends on both alpha and beta (keeping alpha independent is
-    //  faster and preserves intuitive behavior and a full spectrum of results).
-    const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
-    const float3 beta = get_generalized_gaussian_beta(color, shape_range);
-    const float3 alpha_inv = 1.0.xxx/alpha;
-    const float3 s = 1.0.xxx/beta;
-    const float3 ph_offset = (pixel_height.xxx) * 0.5;
-    //  Pass beta to gamma_impl to avoid repeated divides.  Similarly pass
-    //  beta (i.e. 1/s) and 1/gamma(s) to normalized_ligamma_impl.
-    const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, beta);
-    const float3 dist1 = dist + ph_offset;
-    const float3 dist0 = dist - ph_offset;
-    const float3 integral_high = sign(dist1) * normalized_ligamma_impl(
-        s, pow(abs(dist1)*alpha_inv, beta), beta, gamma_s_inv);
-    const float3 integral_low = sign(dist0) * normalized_ligamma_impl(
-        s, pow(abs(dist0)*alpha_inv, beta), beta, gamma_s_inv);
-    return color * 0.5*(integral_high - integral_low)/pixel_height;
-}
-
-float3 scanline_gaussian_sampled_contrib(const float3 dist, const float3 color,
-    const float pixel_height, const float sigma_range)
-{
-    //  See scanline_gaussian integral_contrib() for detailed comments!
-    //  gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
-    const float3 sigma = get_gaussian_sigma(color, sigma_range);
-    //  Avoid repeated divides:
-    const float3 sigma_inv = 1.0.xxx/sigma;
-    const float3 inner_denom_inv = 0.5 * sigma_inv * sigma_inv;
-    const float3 outer_denom_inv = sigma_inv/sqrt(2.0*pi);
-    if(beam_antialias_level > 0.5)
-    {
-        //  Sample 1/3 pixel away in each direction as well:
-        const float3 sample_offset = pixel_height.xxx/3.0;
-        const float3 dist2 = dist + sample_offset;
-        const float3 dist3 = abs(dist - sample_offset);
-        //  Average three pure Gaussian samples:
-        const float3 scale = color/3.0 * outer_denom_inv;
-        const float3 weight1 = exp(-(dist*dist)*inner_denom_inv);
-        const float3 weight2 = exp(-(dist2*dist2)*inner_denom_inv);
-        const float3 weight3 = exp(-(dist3*dist3)*inner_denom_inv);
-        return scale * (weight1 + weight2 + weight3);
-    }
-    else
-    {
-        return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv;
-    }
-}
-
-float3 scanline_generalized_gaussian_sampled_contrib(const float3 dist,
-    const float3 color, const float pixel_height, const float sigma_range,
-    const float shape_range)
-{
-    //  See scanline_generalized_gaussian_integral_contrib() for details!
-    //  generalized sample =
-    //      beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
-    const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
-    const float3 beta = get_generalized_gaussian_beta(color, shape_range);
-    //  Avoid repeated divides:
-    const float3 alpha_inv = 1.0.xxx/alpha;
-    const float3 beta_inv = 1.0.xxx/beta;
-    const float3 scale = color * beta * 0.5 * alpha_inv /
-        gamma_impl(beta_inv, beta);
-    if(beam_antialias_level > 0.5)
-    {
-        //  Sample 1/3 pixel closer to and farther from the scanline too.
-        const float3 sample_offset = pixel_height.xxx/3.0;
-        const float3 dist2 = dist + sample_offset;
-        const float3 dist3 = abs(dist - sample_offset);
-        //  Average three generalized Gaussian samples:
-        const float3 weight1 = exp(-pow(abs(dist*alpha_inv), beta));
-        const float3 weight2 = exp(-pow(abs(dist2*alpha_inv), beta));
-        const float3 weight3 = exp(-pow(abs(dist3*alpha_inv), beta));
-        return scale/3.0 * (weight1 + weight2 + weight3);
-    }
-    else
-    {
-        return scale * exp(-pow(abs(dist*alpha_inv), beta));
-    }
-}
-
-float3 scanline_contrib(float3 dist, float3 color,
-    float pixel_height, const float sigma_range, const float shape_range)
-{
-    //  Requires:   1.) Requirements of scanline_gaussian_integral_contrib()
-    //                  must be met.
-    //              2.) Requirements of get_gaussian_sigma() must be met.
-    //              3.) Requirements of get_generalized_gaussian_beta() must be
-    //                  met.
-    //  Returns:    Return a scanline's light output over a given pixel, using
-    //              a generalized or pure Gaussian distribution and sampling or
-    //              integrals as desired by user codepath choices.
-    if(beam_generalized_gaussian)
-    {
-        if(beam_antialias_level > 1.5)
-        {
-            return scanline_generalized_gaussian_integral_contrib(
-                dist, color, pixel_height, sigma_range, shape_range);
-        }
-        else
-        {
-            return scanline_generalized_gaussian_sampled_contrib(
-                dist, color, pixel_height, sigma_range, shape_range);
-        }
-    }
-    else
-    {
-        if(beam_antialias_level > 1.5)
-        {
-            return scanline_gaussian_integral_contrib(
-                dist, color, pixel_height, sigma_range);
-        }
-        else
-        {
-            return scanline_gaussian_sampled_contrib(
-                dist, color, pixel_height, sigma_range);
-        }
-    }
-}
-
-float3 get_raw_interpolated_color(const float3 color0,
-    const float3 color1, const float3 color2, const float3 color3,
-    const float4 weights)
-{
-    //  Use max to avoid bizarre artifacts from negative colors:
-    return max(mul(weights, float4x3(color0, color1, color2, color3)), 0.0);
-}
-
-float3 get_interpolated_linear_color(const float3 color0, const float3 color1,
-    const float3 color2, const float3 color3, const float4 weights)
-{
-    //  Requires:   1.) Requirements of include/gamma-management.h must be met:
-    //                  intermediate_gamma must be globally defined, and input
-    //                  colors are interpreted as linear RGB unless you #define
-    //                  GAMMA_ENCODE_EVERY_FBO (in which case they are
-    //                  interpreted as gamma-encoded with intermediate_gamma).
-    //              2.) color0-3 are colors sampled from a texture with tex2D().
-    //                  They are interpreted as defined in requirement 1.
-    //              3.) weights contains weights for each color, summing to 1.0.
-    //              4.) beam_horiz_linear_rgb_weight must be defined as a global
-    //                  float in [0.0, 1.0] describing how much blending should
-    //                  be done in linear RGB (rest is gamma-corrected RGB).
-    //              5.) RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE must be #defined
-    //                  if beam_horiz_linear_rgb_weight is anything other than a
-    //                  static constant, or we may try branching at runtime
-    //                  without dynamic branches allowed (slow).
-    //  Returns:    Return an interpolated color lookup between the four input
-    //              colors based on the weights in weights.  The final color will
-    //              be a linear RGB value, but the blending will be done as
-    //              indicated above.
-    const float intermediate_gamma = get_intermediate_gamma();
-    //  Branch if beam_horiz_linear_rgb_weight is static (for free) or if the
-    //  profile allows dynamic branches (faster than computing extra pows):
-    #ifndef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
-        #define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
-    #else
-        #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
-            #define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
-        #endif
-    #endif
-    #ifdef SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
-        //  beam_horiz_linear_rgb_weight is static, so we can branch:
-        #ifdef GAMMA_ENCODE_EVERY_FBO
-            const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
-                color0, color1, color2, color3, weights), intermediate_gamma);
-            if(beam_horiz_linear_rgb_weight > 0.0)
-            {
-                const float3 linear_mixed_color = get_raw_interpolated_color(
-                    pow(color0, intermediate_gamma),
-                    pow(color1, intermediate_gamma),
-                    pow(color2, intermediate_gamma),
-                    pow(color3, intermediate_gamma),
-                    weights);
-                return lerp(gamma_mixed_color, linear_mixed_color,
-                    beam_horiz_linear_rgb_weight);
-            }
-            else
-            {
-                return gamma_mixed_color;
-            }
-        #else
-            const float3 linear_mixed_color = get_raw_interpolated_color(
-                color0, color1, color2, color3, weights);
-            if(beam_horiz_linear_rgb_weight < 1.0)
-            {
-                const float3 gamma_mixed_color = get_raw_interpolated_color(
-                    pow(color0, 1.0/intermediate_gamma),
-                    pow(color1, 1.0/intermediate_gamma),
-                    pow(color2, 1.0/intermediate_gamma),
-                    pow(color3, 1.0/intermediate_gamma),
-                    weights);
-                return lerp(gamma_mixed_color, linear_mixed_color,
-                    beam_horiz_linear_rgb_weight);
-            }
-            else
-            {
-                return linear_mixed_color;
-            }
-        #endif  //  GAMMA_ENCODE_EVERY_FBO
-    #else
-        #ifdef GAMMA_ENCODE_EVERY_FBO
-            //  Inputs: color0-3 are colors in gamma-encoded RGB.
-            const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
-                color0, color1, color2, color3, weights), intermediate_gamma);
-            const float3 linear_mixed_color = get_raw_interpolated_color(
-                pow(color0, intermediate_gamma),
-                pow(color1, intermediate_gamma),
-                pow(color2, intermediate_gamma),
-                pow(color3, intermediate_gamma),
-                weights);
-            return lerp(gamma_mixed_color, linear_mixed_color,
-                beam_horiz_linear_rgb_weight);
-        #else
-            //  Inputs: color0-3 are colors in linear RGB.
-            const float3 linear_mixed_color = get_raw_interpolated_color(
-                color0, color1, color2, color3, weights);
-            const float3 gamma_mixed_color = get_raw_interpolated_color(
-                    pow(color0, 1.0/intermediate_gamma),
-                    pow(color1, 1.0/intermediate_gamma),
-                    pow(color2, 1.0/intermediate_gamma),
-                    pow(color3, 1.0/intermediate_gamma),
-                    weights);
-            return lerp(gamma_mixed_color, linear_mixed_color,
-                beam_horiz_linear_rgb_weight);
-        #endif  //  GAMMA_ENCODE_EVERY_FBO
-    #endif  //  SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
-}
-
-float3 get_scanline_color(const sampler2D Source, const float2 scanline_uv,
-    const float2 uv_step_x, const float4 weights)
-{
-    //  Requires:   1.) scanline_uv must be vertically snapped to the caller's
-    //                  desired line or scanline and horizontally snapped to the
-    //                  texel just left of the output pixel (color1)
-    //              2.) uv_step_x must contain the horizontal uv distance
-    //                  between texels.
-    //              3.) weights must contain interpolation filter weights for
-    //                  color0, color1, color2, and color3, where color1 is just
-    //                  left of the output pixel.
-    //  Returns:    Return a horizontally interpolated texture lookup using 2-4
-    //              nearby texels, according to weights and the conventions of
-    //              get_interpolated_linear_color().
-    //  We can ignore the outside texture lookups for Quilez resampling.
-    const float3 color1 = tex2D(Source, scanline_uv).rgb;
-    const float3 color2 = tex2D(Source, scanline_uv + uv_step_x).rgb;
-    float3 color0 = 0.0.xxx;
-    float3 color3 = 0.0.xxx;
-    if(beam_horiz_filter > 0.5)
-    {
-        color0 = tex2D(Source, scanline_uv - uv_step_x).rgb;
-        color3 = tex2D(Source, scanline_uv + 2.0 * uv_step_x).rgb;
-    }
-    //  Sample the texture as-is, whether it's linear or gamma-encoded:
-    //  get_interpolated_linear_color() will handle the difference.
-    return get_interpolated_linear_color(color0, color1, color2, color3, weights);
-}
-
-float3 sample_single_scanline_horizontal(const sampler2D Source,
-    const float2 tex_uv, const float2 texture_size,
-    const float2 texture_size_inv)
-{
-    //  TODO: Add function requirements.
-    //  Snap to the previous texel and get sample dists from 2/4 nearby texels:
-    const float2 curr_texel = tex_uv * texture_size;
-    //  Use under_half to fix a rounding bug right around exact texel locations.
-    const float2 prev_texel =
-        floor(curr_texel - under_half.xx) + 0.5.xx;
-    const float2 prev_texel_hor = float2(prev_texel.x, curr_texel.y);
-    const float2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv;
-    const float prev_dist = curr_texel.x - prev_texel_hor.x;
-    const float4 sample_dists = float4(1.0 + prev_dist, prev_dist,
-        1.0 - prev_dist, 2.0 - prev_dist);
-    //  Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
-    float4 weights;
-    if(beam_horiz_filter < 0.5)
-    {
-        //  Quilez:
-        const float x = sample_dists.y;
-        const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
-        weights = float4(0.0, 1.0 - w2, w2, 0.0);
-    }
-    else if(beam_horiz_filter < 1.5)
-    {
-        //  Gaussian:
-        float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);
-        weights = exp(-(sample_dists*sample_dists)*inner_denom_inv);
-    }
-    else
-    {
-        //  Lanczos2:
-        const float4 pi_dists = FIX_ZERO(sample_dists * pi);
-        weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5) /
-            (pi_dists * pi_dists);
-    }
-    //  Ensure the weight sum == 1.0:
-    const float4 final_weights = weights/dot(weights, 1.0.xxxx);
-    //  Get the interpolated horizontal scanline color:
-    const float2 uv_step_x = float2(texture_size_inv.x, 0.0);
-    return get_scanline_color(
-        Source, prev_texel_hor_uv, uv_step_x, final_weights);
-}
-
-float3 sample_rgb_scanline_horizontal(const sampler2D Source,
-    const float2 tex_uv, const float2 texture_size,
-    const float2 texture_size_inv)
-{
-    //  TODO: Add function requirements.
-    //  Rely on a helper to make convergence easier.
-    if(beam_misconvergence)
-    {
-        const float3 convergence_offsets_rgb =
-            get_convergence_offsets_x_vector();
-        const float3 offset_u_rgb =
-            convergence_offsets_rgb * texture_size_inv.xxx;
-        const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, 0.0);
-        const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, 0.0);
-        const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, 0.0);
-        const float3 sample_r = sample_single_scanline_horizontal(
-            Source, scanline_uv_r, texture_size, texture_size_inv);
-        const float3 sample_g = sample_single_scanline_horizontal(
-            Source, scanline_uv_g, texture_size, texture_size_inv);
-        const float3 sample_b = sample_single_scanline_horizontal(
-            Source, scanline_uv_b, texture_size, texture_size_inv);
-        return float3(sample_r.r, sample_g.g, sample_b.b);
-    }
-    else
-    {
-        return sample_single_scanline_horizontal(Source, tex_uv, texture_size,
-            texture_size_inv);
-    }
-}
-
-float2 get_last_scanline_uv(const float2 tex_uv, const float2 texture_size,
-    const float2 texture_size_inv, const float2 il_step_multiple,
-    const float frame_count, out float dist)
-{
-    //  Compute texture coords for the last/upper scanline, accounting for
-    //  interlacing: With interlacing, only consider even/odd scanlines every
-    //  other frame.  Top-field first (TFF) order puts even scanlines on even
-    //  frames, and BFF order puts them on odd frames.  Texels are centered at:
-    //      frac(tex_uv * texture_size) == x.5
-    //  Caution: If these coordinates ever seem incorrect, first make sure it's
-    //  not because anisotropic filtering is blurring across field boundaries.
-    //  Note: TFF/BFF won't matter for sources that double-weave or similar.
-    const float field_offset = floor(il_step_multiple.y * 0.75) *
-        fmod(frame_count + float(interlace_bff), 2.0);
-    const float2 curr_texel = tex_uv * texture_size;
-    //  Use under_half to fix a rounding bug right around exact texel locations.
-    //  This causes an insane bug on duckstation, so it's disabled here. (Hyllian, 2024)
-//    const float2 prev_texel_num = floor(curr_texel - under_half.xx);
-    const float2 prev_texel_num = curr_texel;
-    const float wrong_field = fmod(
-        prev_texel_num.y + field_offset, il_step_multiple.y);
-    const float2 scanline_texel_num = prev_texel_num - float2(0.0, wrong_field);
-    //  Snap to the center of the previous scanline in the current field:
-    const float2 scanline_texel = scanline_texel_num + 0.5.xx;
-    const float2 scanline_uv = scanline_texel * texture_size_inv;
-    //  Save the sample's distance from the scanline, in units of scanlines:
-    dist = (curr_texel.y - scanline_texel.y)/il_step_multiple.y;
-    return scanline_uv;
-}
-
-bool is_interlaced(float num_lines)
-{
-    //  Detect interlacing based on the number of lines in the source.
-    if(interlace_detect)
-    {
-        //  NTSC: 525 lines, 262.5/field; 486 active (2 half-lines), 243/field
-        //  NTSC Emulators: Typically 224 or 240 lines
-        //  PAL: 625 lines, 312.5/field; 576 active (typical), 288/field
-        //  PAL Emulators: ?
-        //  ATSC: 720p, 1080i, 1080p
-        //  Where do we place our cutoffs?  Assumptions:
-        //  1.) We only need to care about active lines.
-        //  2.) Anything > 288 and <= 576 lines is probably interlaced.
-        //  3.) Anything > 576 lines is probably not interlaced...
-        //  4.) ...except 1080 lines, which is a crapshoot (user decision).
-        //  5.) Just in case the main program uses calculated video sizes,
-        //      we should nudge the float thresholds a bit.
-        const bool sd_interlace = ((num_lines > 288.5) && (num_lines < 576.5));
-        const bool hd_interlace = interlace_1080i ?
-            ((num_lines > 1079.5) && (num_lines < 1080.5)) :
-            false;
-        return (sd_interlace || hd_interlace);
-    }
-    else
-    {
-        return false;
-    }
-}
-
-
-#endif  //  SCANLINE_FUNCTIONS_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/special-functions.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/special-functions.fxh
@@ -1,498 +0,0 @@
-#ifndef SPECIAL_FUNCTIONS_H
-#define SPECIAL_FUNCTIONS_H
-
-/////////////////////////////////  MIT LICENSE  ////////////////////////////////
-
-//  Copyright (C) 2014 TroggleMonkey
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to
-//  deal in the Software without restriction, including without limitation the
-//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-//  sell copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//
-//  The above copyright notice and this permission notice shall be included in
-//  all copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-//  IN THE SOFTWARE.
-
-
-/////////////////////////////////  DESCRIPTION  ////////////////////////////////
-
-//  This file implements the following mathematical special functions:
-//  1.) erf() = 2/sqrt(pi) * indefinite_integral(e**(-x**2))
-//  2.) gamma(s), a real-numbered extension of the integer factorial function
-//  It also implements normalized_ligamma(s, z), a normalized lower incomplete
-//  gamma function for s < 0.5 only.  Both gamma() and normalized_ligamma() can
-//  be called with an _impl suffix to use an implementation version with a few
-//  extra precomputed parameters (which may be useful for the caller to reuse).
-//  See below for details.
-//
-//  Design Rationale:
-//  Pretty much every line of code in this file is duplicated four times for
-//  different input types (float4/float3/float2/float).  This is unfortunate,
-//  but Cg doesn't allow function templates.  Macros would be far less verbose,
-//  but they would make the code harder to document and read.  I don't expect
-//  these functions will require a whole lot of maintenance changes unless
-//  someone ever has need for more robust incomplete gamma functions, so code
-//  duplication seems to be the lesser evil in this case.
-
-
-///////////////////////////  GAUSSIAN ERROR FUNCTION  //////////////////////////
-
-float4 erf6(float4 x)
-{
-    //  Requires:   x is the standard parameter to erf().
-    //  Returns:    Return an Abramowitz/Stegun approximation of erf(), where:
-    //                  erf(x) = 2/sqrt(pi) * integral(e**(-x**2))
-    //              This approximation has a max absolute error of 2.5*10**-5
-    //              with solid numerical robustness and efficiency.  See:
-	//                  https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
-	static const float4 one = 1.0.xxxx;
-	const float4 sign_x = sign(x);
-	const float4 t = one/(one + 0.47047*abs(x));
-	const float4 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
-		exp(-(x*x));
-	return result * sign_x;
-}
-
-float3 erf6(const float3 x)
-{
-    //  Float3 version:
-	static const float3 one = 1.0.xxx;
-	const float3 sign_x = sign(x);
-	const float3 t = one/(one + 0.47047*abs(x));
-	const float3 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
-		exp(-(x*x));
-	return result * sign_x;
-}
-
-float2 erf6(const float2 x)
-{
-    //  Float2 version:
-	static const float2 one = 1.0.xx;
-	const float2 sign_x = sign(x);
-	const float2 t = one/(one + 0.47047*abs(x));
-	const float2 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
-		exp(-(x*x));
-	return result * sign_x;
-}
-
-float erf6(const float x)
-{
-    //  Float version:
-	const float sign_x = sign(x);
-	const float t = 1.0/(1.0 + 0.47047*abs(x));
-	const float result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
-		exp(-(x*x));
-	return result * sign_x;
-}
-
-float4 erft(const float4 x)
-{
-    //  Requires:   x is the standard parameter to erf().
-    //  Returns:    Approximate erf() with the hyperbolic tangent.  The error is
-    //              visually noticeable, but it's blazing fast and perceptually
-    //              close...at least on ATI hardware.  See:
-    //                  http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html
-    //  Warning:    Only use this if your hardware drivers correctly implement
-    //              tanh(): My nVidia 8800GTS returns garbage output.
-	return tanh(1.202760580 * x);
-}
-
-float3 erft(const float3 x)
-{
-    //  Float3 version:
-	return tanh(1.202760580 * x);
-}
-
-float2 erft(const float2 x)
-{
-    //  Float2 version:
-	return tanh(1.202760580 * x);
-}
-
-float erft(const float x)
-{
-    //  Float version:
-	return tanh(1.202760580 * x);
-}
-
-float4 erf(const float4 x)
-{
-    //  Requires:   x is the standard parameter to erf().
-    //  Returns:    Some approximation of erf(x), depending on user settings.
-	#ifdef ERF_FAST_APPROXIMATION
-		return erft(x);
-	#else
-		return erf6(x);
-	#endif
-}
-
-float3 erf(const float3 x)
-{
-    //  Float3 version:
-	#ifdef ERF_FAST_APPROXIMATION
-		return erft(x);
-	#else
-		return erf6(x);
-	#endif
-}
-
-float2 erf(const float2 x)
-{
-    //  Float2 version:
-	#ifdef ERF_FAST_APPROXIMATION
-		return erft(x);
-	#else
-		return erf6(x);
-	#endif
-}
-
-float erf(const float x)
-{
-    //  Float version:
-	#ifdef ERF_FAST_APPROXIMATION
-		return erft(x);
-	#else
-		return erf6(x);
-	#endif
-}
-
-
-///////////////////////////  COMPLETE GAMMA FUNCTION  //////////////////////////
-
-float4 gamma_impl(const float4 s, const float4 s_inv)
-{
-    //  Requires:   1.) s is the standard parameter to the gamma function, and
-    //                  it should lie in the [0, 36] range.
-    //              2.) s_inv = 1.0/s.  This implementation function requires
-    //                  the caller to precompute this value, giving users the
-    //                  opportunity to reuse it.
-    //  Returns:    Return approximate gamma function (real-numbered factorial)
-    //              output using the Lanczos approximation with two coefficients
-    //              calculated using Paul Godfrey's method here:
-    //                  http://my.fit.edu/~gabdo/gamma.txt
-    //              An optimal g value for s in [0, 36] is ~1.12906830989, with
-    //              a maximum relative error of 0.000463 for 2**16 equally
-    //              evals.  We could use three coeffs (0.0000346 error) without
-    //              hurting latency, but this allows more parallelism with
-    //              outside instructions.
-	static const float4 g = 1.12906830989.xxxx;
-	static const float4 c0 = 0.8109119309638332633713423362694399653724431.xxxx;
-	static const float4 c1 = 0.4808354605142681877121661197951496120000040.xxxx;
-	static const float4 e = 2.71828182845904523536028747135266249775724709.xxxx;
-	const float4 sph = s + 0.5.xxxx;
-	const float4 lanczos_sum = c0 + c1/(s + 1.0.xxxx);
-	const float4 base = (sph + g)/e;  //  or (s + g + float4(0.5))/e
-	//  gamma(s + 1) = base**sph * lanczos_sum; divide by s for gamma(s).
-	//  This has less error for small s's than (s -= 1.0) at the beginning.
-	return (pow(base, sph) * lanczos_sum) * s_inv;
-}
-
-float3 gamma_impl(const float3 s, const float3 s_inv)
-{
-    //  Float3 version:
-	static const float3 g = 1.12906830989.xxx;
-	static const float3 c0 = 0.8109119309638332633713423362694399653724431.xxx;
-	static const float3 c1 = 0.4808354605142681877121661197951496120000040.xxx;
-	static const float3 e = 2.71828182845904523536028747135266249775724709.xxx;
-	const float3 sph = s + 0.5.xxx;
-	const float3 lanczos_sum = c0 + c1/(s + 1.0.xxx);
-	const float3 base = (sph + g)/e;
-	return (pow(base, sph) * lanczos_sum) * s_inv;
-}
-
-float2 gamma_impl(const float2 s, const float2 s_inv)
-{
-    //  Float2 version:
-	static const float2 g = 1.12906830989.xx;
-	static const float2 c0 = 0.8109119309638332633713423362694399653724431.xx;
-	static const float2 c1 = 0.4808354605142681877121661197951496120000040.xx;
-	static const float2 e = 2.71828182845904523536028747135266249775724709.xx;
-	const float2 sph = s + 0.5.xx;
-	const float2 lanczos_sum = c0 + c1/(s + 1.0.xx);
-	const float2 base = (sph + g)/e;
-	return (pow(base, sph) * lanczos_sum) * s_inv;
-}
-
-float gamma_impl(const float s, const float s_inv)
-{
-    //  Float version:
-	static const float g = 1.12906830989;
-	static const float c0 = 0.8109119309638332633713423362694399653724431;
-	static const float c1 = 0.4808354605142681877121661197951496120000040;
-	static const float e = 2.71828182845904523536028747135266249775724709;
-	const float sph = s + 0.5;
-	const float lanczos_sum = c0 + c1/(s + 1.0);
-	const float base = (sph + g)/e;
-	return (pow(base, sph) * lanczos_sum) * s_inv;
-}
-
-float4 gamma(const float4 s)
-{
-    //  Requires:   s is the standard parameter to the gamma function, and it
-    //              should lie in the [0, 36] range.
-    //  Returns:    Return approximate gamma function output with a maximum
-    //              relative error of 0.000463.  See gamma_impl for details.
-	return gamma_impl(s, 1.0.xxxx/s);
-}
-
-float3 gamma(const float3 s)
-{
-    //  Float3 version:
-	return gamma_impl(s, 1.0.xxx/s);
-}
-
-float2 gamma(const float2 s)
-{
-    //  Float2 version:
-	return gamma_impl(s, 1.0.xx/s);
-}
-
-float gamma(const float s)
-{
-    //  Float version:
-	return gamma_impl(s, 1.0/s);
-}
-
-
-////////////////  INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT)  ///////////////
-
-//  Lower incomplete gamma function for small s and z (implementation):
-float4 ligamma_small_z_impl(const float4 s, const float4 z, const float4 s_inv)
-{
-    //  Requires:   1.) s < ~0.5
-    //              2.) z <= ~0.775075
-    //              3.) s_inv = 1.0/s (precomputed for outside reuse)
-    //  Returns:    A series representation for the lower incomplete gamma
-    //              function for small s and small z (4 terms).
-    //  The actual "rolled up" summation looks like:
-	//      last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0;
-	//      sum = last_sign * last_pow / ((s + k) * last_factorial)
-	//      for(int i = 0; i < 4; ++i)
-	//      {
-	//          last_sign *= -1.0; last_pow *= z; last_factorial *= i;
-	//          sum += last_sign * last_pow / ((s + k) * last_factorial);
-	//      }
-	//  Unrolled, constant-unfolded and arranged for madds and parallelism:
-	const float4 scale = pow(z, s);
-	float4 sum = s_inv;  //  Summation iteration 0 result
-	//  Summation iterations 1, 2, and 3:
-	const float4 z_sq = z*z;
-	const float4 denom1 = s + 1.0.xxxx;
-	const float4 denom2 = 2.0*s + 4.0.xxxx;
-	const float4 denom3 = 6.0*s + 18.0.xxxx;
-	//float4 denom4 = 24.0*s + float4(96.0);
-	sum -= z/denom1;
-	sum += z_sq/denom2;
-	sum -= z * z_sq/denom3;
-	//sum += z_sq * z_sq / denom4;
-	//  Scale and return:
-	return scale * sum;
-}
-
-float3 ligamma_small_z_impl(const float3 s, const float3 z, const float3 s_inv)
-{
-    //  Float3 version:
-	const float3 scale = pow(z, s);
-	float3 sum = s_inv;
-	const float3 z_sq = z*z;
-	const float3 denom1 = s + 1.0.xxx;
-	const float3 denom2 = 2.0*s + 4.0.xxx;
-	const float3 denom3 = 6.0*s + 18.0.xxx;
-	sum -= z/denom1;
-	sum += z_sq/denom2;
-	sum -= z * z_sq/denom3;
-	return scale * sum;
-}
-
-float2 ligamma_small_z_impl(const float2 s, const float2 z, const float2 s_inv)
-{
-    //  Float2 version:
-	const float2 scale = pow(z, s);
-	float2 sum = s_inv;
-	const float2 z_sq = z*z;
-	const float2 denom1 = s + 1.0.xx;
-	const float2 denom2 = 2.0*s + 4.0.xx;
-	const float2 denom3 = 6.0*s + 18.0.xx;
-	sum -= z/denom1;
-	sum += z_sq/denom2;
-	sum -= z * z_sq/denom3;
-	return scale * sum;
-}
-
-float ligamma_small_z_impl(const float s, const float z, const float s_inv)
-{
-    //  Float version:
-	const float scale = pow(z, s);
-	float sum = s_inv;
-	const float z_sq = z*z;
-	const float denom1 = s + 1.0;
-	const float denom2 = 2.0*s + 4.0;
-	const float denom3 = 6.0*s + 18.0;
-	sum -= z/denom1;
-	sum += z_sq/denom2;
-	sum -= z * z_sq/denom3;
-	return scale * sum;
-}
-
-//  Upper incomplete gamma function for small s and large z (implementation):
-float4 uigamma_large_z_impl(const float4 s, const float4 z)
-{
-    //  Requires:   1.) s < ~0.5
-    //              2.) z > ~0.775075
-    //  Returns:    Gauss's continued fraction representation for the upper
-    //              incomplete gamma function (4 terms).
-	//  The "rolled up" continued fraction looks like this.  The denominator
-    //  is truncated, and it's calculated "from the bottom up:"
-	//      denom = float4('inf');
-	//      float4 one = float4(1.0);
-	//      for(int i = 4; i > 0; --i)
-	//      {
-	//          denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom;
-	//      }
-	//  Unrolled and constant-unfolded for madds and parallelism:
-	const float4 numerator = pow(z, s) * exp(-z);
-	float4 denom = 7.0.xxxx + z - s;
-	denom = 5.0.xxxx + z - s + (3.0*s - 9.0.xxxx)/denom;
-	denom = 3.0.xxxx + z - s + (2.0*s - 4.0.xxxx)/denom;
-	denom = 1.0.xxxx + z - s + (s - 1.0.xxxx)/denom;
-	return numerator / denom;
-}
-
-float3 uigamma_large_z_impl(const float3 s, const float3 z)
-{
-    //  Float3 version:
-	const float3 numerator = pow(z, s) * exp(-z);
-	float3 denom = 7.0.xxx + z - s;
-	denom = 5.0.xxx + z - s + (3.0*s - 9.0.xxx)/denom;
-	denom = 3.0.xxx + z - s + (2.0*s - 4.0.xxx)/denom;
-	denom = 1.0.xxx + z - s + (s - 1.0.xxx)/denom;
-	return numerator / denom;
-}
-
-float2 uigamma_large_z_impl(const float2 s, const float2 z)
-{
-    //  Float2 version:
-	const float2 numerator = pow(z, s) * exp(-z);
-	float2 denom = 7.0.xx + z - s;
-	denom = 5.0.xx + z - s + (3.0*s - 9.0.xx)/denom;
-	denom = 3.0.xx + z - s + (2.0*s - 4.0.xx)/denom;
-	denom = 1.0.xx + z - s + (s - 1.0.xx)/denom;
-	return numerator / denom;
-}
-
-float uigamma_large_z_impl(const float s, const float z)
-{
-    //  Float version:
-	const float numerator = pow(z, s) * exp(-z);
-	float denom = 7.0 + z - s;
-	denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
-	denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
-	denom = 1.0 + z - s + (s - 1.0)/denom;
-	return numerator / denom;
-}
-
-//  Normalized lower incomplete gamma function for small s (implementation):
-float4 normalized_ligamma_impl(const float4 s, const float4 z,
-    const float4 s_inv, const float4 gamma_s_inv)
-{
-    //  Requires:   1.) s < ~0.5
-    //              2.) s_inv = 1/s (precomputed for outside reuse)
-    //              3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse)
-    //  Returns:    Approximate the normalized lower incomplete gamma function
-    //              for s < 0.5.  Since we only care about s < 0.5, we only need
-    //              to evaluate two branches (not four) based on z.  Each branch
-    //              uses four terms, with a max relative error of ~0.00182.  The
-    //              branch threshold and specifics were adapted for fewer terms
-    //              from Gil/Segura/Temme's paper here:
-    //                  http://oai.cwi.nl/oai/asset/20433/20433B.pdf
-	//  Evaluate both branches: Real branches test slower even when available.
-	static const float4 thresh = 0.775075.xxxx;
-	const bool4 z_is_large = z > thresh;
-	const float4 large_z = 1.0.xxxx - uigamma_large_z_impl(s, z) * gamma_s_inv;
-	const float4 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
-	//  Combine the results from both branches:
-	return large_z * float4(z_is_large.xxxx) + small_z * float4(!z_is_large.xxxx);
-}
-
-float3 normalized_ligamma_impl(const float3 s, const float3 z,
-    const float3 s_inv, const float3 gamma_s_inv)
-{
-    //  Float3 version:
-	static const float3 thresh = 0.775075.xxx;
-	const bool3 z_is_large = z > thresh;
-	const float3 large_z = 1.0.xxx - uigamma_large_z_impl(s, z) * gamma_s_inv;
-	const float3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
-	return large_z * float3(z_is_large.xxx) + small_z * float3(!z_is_large.xxx);
-}
-
-float2 normalized_ligamma_impl(const float2 s, const float2 z,
-    const float2 s_inv, const float2 gamma_s_inv)
-{
-    //  Float2 version:
-	static const float2 thresh = 0.775075.xx;
-	const bool2 z_is_large = z > thresh;
-	const float2 large_z = 1.0.xx - uigamma_large_z_impl(s, z) * gamma_s_inv;
-	const float2 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
-	return large_z * float2(z_is_large.xx) + small_z * float2(!z_is_large.xx);
-}
-
-float normalized_ligamma_impl(const float s, const float z,
-    const float s_inv, const float gamma_s_inv)
-{
-    //  Float version:
-	static const float thresh = 0.775075;
-	const bool z_is_large = z > thresh;
-	const float large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
-	const float small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
-	return large_z * float(z_is_large) + small_z * float(!z_is_large);
-}
-
-//  Normalized lower incomplete gamma function for small s:
-float4 normalized_ligamma(const float4 s, const float4 z)
-{
-    //  Requires:   s < ~0.5
-    //  Returns:    Approximate the normalized lower incomplete gamma function
-    //              for s < 0.5.  See normalized_ligamma_impl() for details.
-	const float4 s_inv = 1.0.xxxx/s;
-	const float4 gamma_s_inv = 1.0.xxxx/gamma_impl(s, s_inv);
-	return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
-}
-
-float3 normalized_ligamma(const float3 s, const float3 z)
-{
-    //  Float3 version:
-	const float3 s_inv = 1.0.xxx/s;
-	const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, s_inv);
-	return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
-}
-
-float2 normalized_ligamma(const float2 s, const float2 z)
-{
-    //  Float2 version:
-	const float2 s_inv = 1.0.xx/s;
-	const float2 gamma_s_inv = 1.0.xx/gamma_impl(s, s_inv);
-	return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
-}
-
-float normalized_ligamma(const float s, const float z)
-{
-    //  Float version:
-	const float s_inv = 1.0/s;
-	const float gamma_s_inv = 1.0/gamma_impl(s, s_inv);
-	return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
-}
-
-
-#endif  //  SPECIAL_FUNCTIONS_H
-
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-cgp-constants.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-cgp-constants.fxh
@@ -1,58 +0,0 @@
-#ifndef USER_CGP_CONSTANTS_H
-#define USER_CGP_CONSTANTS_H
-
-//  IMPORTANT:
-//  These constants MUST be set appropriately for the settings in crt-royale.cgp
-//  (or whatever related .cgp file you're using).  If they aren't, you're likely
-//  to get artifacts, the wrong phosphor mask size, etc.  I wish these could be
-//  set directly in the .cgp file to make things easier, but...they can't.
-
-//  PASS SCALES AND RELATED CONSTANTS:
-//  Copy the absolute scale_x for BLOOM_APPROX.  There are two major versions of
-//  this shader: One does a viewport-scale bloom, and the other skips it.  The
-//  latter benefits from a higher bloom_approx_scale_x, so save both separately:
-static const float bloom_approx_size_x = 320.0;
-static const float bloom_approx_size_x_for_fake = 400.0;
-//  Copy the viewport-relative scales of the phosphor mask resize passes
-//  (MASK_RESIZE and the pass immediately preceding it):
-static const float2 mask_resize_viewport_scale = float2(0.0625, 0.0625);
-//  Copy the geom_max_aspect_ratio used to calculate the MASK_RESIZE scales, etc.:
-static const float geom_max_aspect_ratio = 4.0/3.0;
-
-//  PHOSPHOR MASK TEXTURE CONSTANTS:
-//  Set the following constants to reflect the properties of the phosphor mask
-//  texture named in crt-royale.cgp.  The shader optionally resizes a mask tile
-//  based on user settings, then repeats a single tile until filling the screen.
-//  The shader must know the input texture size (default 64x64), and to manually
-//  resize, it must also know the horizontal triads per tile (default 8).
-static const float2 mask_texture_small_size = 64.0.xx;
-static const float2 mask_texture_large_size = 512.0.xx;
-static const float mask_triads_per_tile = 8.0;
-//  We need the average brightness of the phosphor mask to compensate for the
-//  dimming it causes.  The following four values are roughly correct for the
-//  masks included with the shader.  Update the value for any LUT texture you
-//  change.  [Un]comment "#define PHOSPHOR_MASK_GRILLE14" depending on whether
-//  the loaded aperture grille uses 14-pixel or 15-pixel stripes (default 15).
-//#define PHOSPHOR_MASK_GRILLE14
-static const float mask_grille14_avg_color = 50.6666666/255.0;
-    //  TileableLinearApertureGrille14Wide7d33Spacing*.png
-    //  TileableLinearApertureGrille14Wide10And6Spacing*.png
-static const float mask_grille15_avg_color = 53.0/255.0;
-    //  TileableLinearApertureGrille15Wide6d33Spacing*.png
-    //  TileableLinearApertureGrille15Wide8And5d5Spacing*.png
-static const float mask_slot_avg_color = 46.0/255.0;
-    //  TileableLinearSlotMask15Wide9And4d5Horizontal8VerticalSpacing*.png
-    //  TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing*.png
-static const float mask_shadow_avg_color = 41.0/255.0;
-    //  TileableLinearShadowMask*.png
-    //  TileableLinearShadowMaskEDP*.png
-
-#ifdef PHOSPHOR_MASK_GRILLE14
-    static const float mask_grille_avg_color = mask_grille14_avg_color;
-#else
-    static const float mask_grille_avg_color = mask_grille15_avg_color;
-#endif
-
-
-#endif  //  USER_CGP_CONSTANTS_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-settings.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-settings.fxh
@@ -1,359 +0,0 @@
-#ifndef USER_SETTINGS_H
-#define USER_SETTINGS_H
-
-/////////////////////////////  DRIVER CAPABILITIES  ////////////////////////////
-
-//  The Cg compiler uses different "profiles" with different capabilities.
-//  This shader requires a Cg compilation profile >= arbfp1, but a few options
-//  require higher profiles like fp30 or fp40.  The shader can't detect profile
-//  or driver capabilities, so instead you must comment or uncomment the lines
-//  below with "//" before "#define."  Disable an option if you get compilation
-//  errors resembling those listed.  Generally speaking, all of these options
-//  will run on nVidia cards, but only DRIVERS_ALLOW_TEX2DBIAS (if that) is
-//  likely to run on ATI/AMD, due to the Cg compiler's profile limitations.
-
-//  Derivatives: Unsupported on fp20, ps_1_1, ps_1_2, ps_1_3, and arbfp1.
-//  Among other things, derivatives help us fix anisotropic filtering artifacts
-//  with curved manually tiled phosphor mask coords.  Related errors:
-//  error C3004: function "float2 ddx(float2);" not supported in this profile
-//  error C3004: function "float2 ddy(float2);" not supported in this profile
-    //#define DRIVERS_ALLOW_DERIVATIVES
-
-//  Fine derivatives: Unsupported on older ATI cards.
-//  Fine derivatives enable 2x2 fragment block communication, letting us perform
-//  fast single-pass blur operations.  If your card uses coarse derivatives and
-//  these are enabled, blurs could look broken.  Derivatives are a prerequisite.
-    #ifdef DRIVERS_ALLOW_DERIVATIVES
-        #define DRIVERS_ALLOW_FINE_DERIVATIVES
-    #endif
-
-//  Dynamic looping: Requires an fp30 or newer profile.
-//  This makes phosphor mask resampling faster in some cases.  Related errors:
-//  error C5013: profile does not support "for" statements and "for" could not
-//  be unrolled
-    //#define DRIVERS_ALLOW_DYNAMIC_BRANCHES
-
-//  Without DRIVERS_ALLOW_DYNAMIC_BRANCHES, we need to use unrollable loops.
-//  Using one static loop avoids overhead if the user is right, but if the user
-//  is wrong (loops are allowed), breaking a loop into if-blocked pieces with a
-//  binary search can potentially save some iterations.  However, it may fail:
-//  error C6001: Temporary register limit of 32 exceeded; 35 registers
-//  needed to compile program
-    //#define ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
-
-//  tex2Dlod: Requires an fp40 or newer profile.  This can be used to disable
-//  anisotropic filtering, thereby fixing related artifacts.  Related errors:
-//  error C3004: function "float4 tex2Dlod(sampler2D, float4);" not supported in
-//  this profile
-    //#define DRIVERS_ALLOW_TEX2DLOD
-
-//  tex2Dbias: Requires an fp30 or newer profile.  This can be used to alleviate
-//  artifacts from anisotropic filtering and mipmapping.  Related errors:
-//  error C3004: function "float4 tex2Dbias(sampler2D, float4);" not supported
-//  in this profile
-    //#define DRIVERS_ALLOW_TEX2DBIAS
-
-//  Integrated graphics compatibility: Integrated graphics like Intel HD 4000
-//  impose stricter limitations on register counts and instructions.  Enable
-//  INTEGRATED_GRAPHICS_COMPATIBILITY_MODE if you still see error C6001 or:
-//  error C6002: Instruction limit of 1024 exceeded: 1523 instructions needed
-//  to compile program.
-//  Enabling integrated graphics compatibility mode will automatically disable:
-//  1.) PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer.
-//      (This may be reenabled in a later release.)
-//  2.) RUNTIME_GEOMETRY_MODE
-//  3.) The high-quality 4x4 Gaussian resize for the bloom approximation
-    //#define INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
-
-
-////////////////////////////  USER CODEPATH OPTIONS  ///////////////////////////
-
-//  To disable a #define option, turn its line into a comment with "//."
-
-//  RUNTIME VS. COMPILE-TIME OPTIONS (Major Performance Implications):
-//  Enable runtime shader parameters in the Retroarch (etc.) GUI?  They override
-//  many of the options in this file and allow real-time tuning, but many of
-//  them are slower.  Disabling them and using this text file will boost FPS.
-#define RUNTIME_SHADER_PARAMS_ENABLE
-//  Specify the phosphor bloom sigma at runtime?  This option is 10% slower, but
-//  it's the only way to do a wide-enough full bloom with a runtime dot pitch.
-#define RUNTIME_PHOSPHOR_BLOOM_SIGMA
-//  Specify antialiasing weight parameters at runtime?  (Costs ~20% with cubics)
-#define RUNTIME_ANTIALIAS_WEIGHTS
-//  Specify subpixel offsets at runtime? (WARNING: EXTREMELY EXPENSIVE!)
-//#define RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
-//  Make beam_horiz_filter and beam_horiz_linear_rgb_weight into runtime shader
-//  parameters?  This will require more math or dynamic branching.
-#define RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
-//  Specify the tilt at runtime?  This makes things about 3% slower.
-#define RUNTIME_GEOMETRY_TILT
-//  Specify the geometry mode at runtime?
-#define RUNTIME_GEOMETRY_MODE
-//  Specify the phosphor mask type (aperture grille, slot mask, shadow mask) and
-//  mode (Lanczos-resize, hardware resize, or tile 1:1) at runtime, even without
-//  dynamic branches?  This is cheap if mask_resize_viewport_scale is small.
-#define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
-
-//  PHOSPHOR MASK:
-//  Manually resize the phosphor mask for best results (slower)?  Disabling this
-//  removes the option to do so, but it may be faster without dynamic branches.
-    #define PHOSPHOR_MASK_MANUALLY_RESIZE
-//  If we sinc-resize the mask, should we Lanczos-window it (slower but better)?
-    #define PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
-//  Larger blurs are expensive, but we need them to blur larger triads.  We can
-//  detect the right blur if the triad size is static or our profile allows
-//  dynamic branches, but otherwise we use the largest blur the user indicates
-//  they might need:
-    #define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
-    //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
-    //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
-    //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
-    //  Here's a helpful chart:
-    //  MaxTriadSize    BlurSize    MinTriadCountsByResolution
-    //  3.0             9.0         480/640/960/1920 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
-    //  6.0             17.0        240/320/480/960 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
-    //  9.0             25.0        160/213/320/640 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
-    //  12.0            31.0        120/160/240/480 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
-    //  18.0            43.0        80/107/160/320 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
-
-
-///////////////////////////////  USER PARAMETERS  //////////////////////////////
-
-//  Note: Many of these static parameters are overridden by runtime shader
-//  parameters when those are enabled.  However, many others are static codepath
-//  options that were cleaner or more convert to code as static constants.
-
-//  GAMMA:
-    static const float crt_gamma_static = 2.5;                  //  range [1, 5]
-    static const float lcd_gamma_static = 2.2;                  //  range [1, 5]
-
-//  LEVELS MANAGEMENT:
-    //  Control the final multiplicative image contrast:
-    static const float levels_contrast_static = 1.0;            //  range [0, 4)
-    //  We auto-dim to avoid clipping between passes and restore brightness
-    //  later.  Control the dim factor here: Lower values clip less but crush
-    //  blacks more (static only for now).
-    static const float levels_autodim_temp = 0.5;               //  range (0, 1]
-
-//  HALATION/DIFFUSION/BLOOM:
-    //  Halation weight: How much energy should be lost to electrons bounding
-    //  around under the CRT glass and exciting random phosphors?
-    static const float halation_weight_static = 0.0;            //  range [0, 1]
-    //  Refractive diffusion weight: How much light should spread/diffuse from
-    //  refracting through the CRT glass?
-    static const float diffusion_weight_static = 0.075;         //  range [0, 1]
-    //  Underestimate brightness: Bright areas bloom more, but we can base the
-    //  bloom brightpass on a lower brightness to sharpen phosphors, or a higher
-    //  brightness to soften them.  Low values clip, but >= 0.8 looks okay.
-    static const float bloom_underestimate_levels_static = 0.8; //  range [0, 5]
-    //  Blur all colors more than necessary for a softer phosphor bloom?
-    static const float bloom_excess_static = 0.0;               //  range [0, 1]
-    //  The BLOOM_APPROX pass approximates a phosphor blur early on with a small
-    //  blurred resize of the input (convergence offsets are applied as well).
-    //  There are three filter options (static option only for now):
-    //  0.) Bilinear resize: A fast, close approximation to a 4x4 resize
-    //      if min_allowed_viewport_triads and the BLOOM_APPROX resolution are sane
-    //      and beam_max_sigma is low.
-    //  1.) 3x3 resize blur: Medium speed, soft/smeared from bilinear blurring,
-    //      always uses a static sigma regardless of beam_max_sigma or
-    //      mask_num_triads_desired.
-    //  2.) True 4x4 Gaussian resize: Slowest, technically correct.
-    //  These options are more pronounced for the fast, unbloomed shader version.
-    static const float bloom_approx_filter_static = 2.0;
-
-//  ELECTRON BEAM SCANLINE DISTRIBUTION:
-    //  How many scanlines should contribute light to each pixel?  Using more
-    //  scanlines is slower (especially for a generalized Gaussian) but less
-    //  distorted with larger beam sigmas (especially for a pure Gaussian).  The
-    //  max_beam_sigma at which the closest unused weight is guaranteed <
-    //  1.0/255.0 (for a 3x antialiased pure Gaussian) is:
-    //      2 scanlines: max_beam_sigma = 0.2089; distortions begin ~0.34; 141.7 FPS pure, 131.9 FPS generalized
-    //      3 scanlines, max_beam_sigma = 0.3879; distortions begin ~0.52; 137.5 FPS pure; 123.8 FPS generalized
-    //      4 scanlines, max_beam_sigma = 0.5723; distortions begin ~0.70; 134.7 FPS pure; 117.2 FPS generalized
-    //      5 scanlines, max_beam_sigma = 0.7591; distortions begin ~0.89; 131.6 FPS pure; 112.1 FPS generalized
-    //      6 scanlines, max_beam_sigma = 0.9483; distortions begin ~1.08; 127.9 FPS pure; 105.6 FPS generalized
-    static const float beam_num_scanlines = 3.0;                //  range [2, 6]
-    //  A generalized Gaussian beam varies shape with color too, now just width.
-    //  It's slower but more flexible (static option only for now).
-    static const bool beam_generalized_gaussian = true;
-    //  What kind of scanline antialiasing do you want?
-    //  0: Sample weights at 1x; 1: Sample weights at 3x; 2: Compute an integral
-    //  Integrals are slow (especially for generalized Gaussians) and rarely any
-    //  better than 3x antialiasing (static option only for now).
-    static const float beam_antialias_level = 1.0;              //  range [0, 2]
-    //  Min/max standard deviations for scanline beams: Higher values widen and
-    //  soften scanlines.  Depending on other options, low min sigmas can alias.
-    static const float beam_min_sigma_static = 0.02;            //  range (0, 1]
-    static const float beam_max_sigma_static = 0.3;             //  range (0, 1]
-    //  Beam width varies as a function of color: A power function (0) is more
-    //  configurable, but a spherical function (1) gives the widest beam
-    //  variability without aliasing (static option only for now).
-    static const float beam_spot_shape_function = 0.0;
-    //  Spot shape power: Powers <= 1 give smoother spot shapes but lower
-    //  sharpness.  Powers >= 1.0 are awful unless mix/max sigmas are close.
-    static const float beam_spot_power_static = 1.0/3.0;    //  range (0, 16]
-    //  Generalized Gaussian max shape parameters: Higher values give flatter
-    //  scanline plateaus and steeper dropoffs, simultaneously widening and
-    //  sharpening scanlines at the cost of aliasing.  2.0 is pure Gaussian, and
-    //  values > ~40.0 cause artifacts with integrals.
-    static const float beam_min_shape_static = 2.0;         //  range [2, 32]
-    static const float beam_max_shape_static = 4.0;         //  range [2, 32]
-    //  Generalized Gaussian shape power: Affects how quickly the distribution
-    //  changes shape from Gaussian to steep/plateaued as color increases from 0
-    //  to 1.0.  Higher powers appear softer for most colors, and lower powers
-    //  appear sharper for most colors.
-    static const float beam_shape_power_static = 1.0/4.0;   //  range (0, 16]
-    //  What filter should be used to sample scanlines horizontally?
-    //  0: Quilez (fast), 1: Gaussian (configurable), 2: Lanczos2 (sharp)
-    static const float beam_horiz_filter_static = 0.0;
-    //  Standard deviation for horizontal Gaussian resampling:
-    static const float beam_horiz_sigma_static = 0.35;      //  range (0, 2/3]
-    //  Do horizontal scanline sampling in linear RGB (correct light mixing),
-    //  gamma-encoded RGB (darker, hard spot shape, may better match bandwidth-
-    //  limiting circuitry in some CRT's), or a weighted avg.?
-    static const float beam_horiz_linear_rgb_weight_static = 1.0;   //  range [0, 1]
-    //  Simulate scanline misconvergence?  This needs 3x horizontal texture
-    //  samples and 3x texture samples of BLOOM_APPROX and HALATION_BLUR in
-    //  later passes (static option only for now).
-    static const bool beam_misconvergence = true;
-    //  Convergence offsets in x/y directions for R/G/B scanline beams in units
-    //  of scanlines.  Positive offsets go right/down; ranges [-2, 2]
-    static const float2 convergence_offsets_r_static = float2(0.1, 0.2);
-    static const float2 convergence_offsets_g_static = float2(0.3, 0.4);
-    static const float2 convergence_offsets_b_static = float2(0.5, 0.6);
-    //  Detect interlacing (static option only for now)?
-    static const bool interlace_detect_static = true;
-    //  Assume 1080-line sources are interlaced?
-    static const bool interlace_1080i_static = false;
-    //  For interlaced sources, assume TFF (top-field first) or BFF order?
-    //  (Whether this matters depends on the nature of the interlaced input.)
-    static const bool interlace_bff_static = false;
-
-//  ANTIALIASING:
-    //  What AA level do you want for curvature/overscan/subpixels?  Options:
-    //  0x (none), 1x (sample subpixels), 4x, 5x, 6x, 7x, 8x, 12x, 16x, 20x, 24x
-    //  (Static option only for now)
-    static const float aa_level = 12.0;                     //  range [0, 24]
-    //  What antialiasing filter do you want (static option only)?  Options:
-    //  0: Box (separable), 1: Box (cylindrical),
-    //  2: Tent (separable), 3: Tent (cylindrical),
-    //  4: Gaussian (separable), 5: Gaussian (cylindrical),
-    //  6: Cubic* (separable), 7: Cubic* (cylindrical, poor)
-    //  8: Lanczos Sinc (separable), 9: Lanczos Jinc (cylindrical, poor)
-    //      * = Especially slow with RUNTIME_ANTIALIAS_WEIGHTS
-    static const float aa_filter = 6.0;                     //  range [0, 9]
-    //  Flip the sample grid on odd/even frames (static option only for now)?
-    static const bool aa_temporal = false;
-    //  Use RGB subpixel offsets for antialiasing?  The pixel is at green, and
-    //  the blue offset is the negative r offset; range [0, 0.5]
-    static const float2 aa_subpixel_r_offset_static = float2(-1.0/3.0, 0.0);//float2(0.0);
-    //  Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell
-    //  1.) "Keys cubics" with B = 1 - 2C are considered the highest quality.
-    //  2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening.
-    //  3.) C = 1.0/3.0 is the Mitchell-Netravali filter.
-    //  4.) C = 0.0 is a soft spline filter.
-    static const float aa_cubic_c_static = 0.5;             //  range [0, 4]
-    //  Standard deviation for Gaussian antialiasing: Try 0.5/aa_pixel_diameter.
-    static const float aa_gauss_sigma_static = 0.5;     //  range [0.0625, 1.0]
-
-//  PHOSPHOR MASK:
-    //  Mask type: 0 = aperture grille, 1 = slot mask, 2 = EDP shadow mask
-    static const float mask_type_static = 1.0;                  //  range [0, 2]
-    //  We can sample the mask three ways.  Pick 2/3 from: Pretty/Fast/Flexible.
-    //  0.) Sinc-resize to the desired dot pitch manually (pretty/slow/flexible).
-    //      This requires PHOSPHOR_MASK_MANUALLY_RESIZE to be #defined.
-    //  1.) Hardware-resize to the desired dot pitch (ugly/fast/flexible).  This
-    //      is halfway decent with LUT mipmapping but atrocious without it.
-    //  2.) Tile it without resizing at a 1:1 texel:pixel ratio for flat coords
-    //      (pretty/fast/inflexible).  Each input LUT has a fixed dot pitch.
-    //      This mode reuses the same masks, so triads will be enormous unless
-    //      you change the mask LUT filenames in your .cgp file.
-    static const float mask_sample_mode_static = 0.0;           //  range [0, 2]
-    //  Prefer setting the triad size (0.0) or number on the screen (1.0)?
-    //  If RUNTIME_PHOSPHOR_BLOOM_SIGMA isn't #defined, the specified triad size
-    //  will always be used to calculate the full bloom sigma statically.
-    static const float mask_specify_num_triads_static = 0.0;    //  range [0, 1]
-    //  Specify the phosphor triad size, in pixels.  Each tile (usually with 8
-    //  triads) will be rounded to the nearest integer tile size and clamped to
-    //  obey minimum size constraints (imposed to reduce downsize taps) and
-    //  maximum size constraints (imposed to have a sane MASK_RESIZE FBO size).
-    //  To increase the size limit, double the viewport-relative scales for the
-    //  two MASK_RESIZE passes in crt-royale.cgp and user-cgp-contants.h.
-    //      range [1, mask_texture_small_size/mask_triads_per_tile]
-    static const float mask_triad_size_desired_static = 24.0 / 8.0;
-    //  If mask_specify_num_triads is 1.0/true, we'll go by this instead (the
-    //  final size will be rounded and constrained as above); default 480.0
-    static const float mask_num_triads_desired_static = 480.0;
-    //  How many lobes should the sinc/Lanczos resizer use?  More lobes require
-    //  more samples and avoid moire a bit better, but some is unavoidable
-    //  depending on the destination size (static option for now).
-    static const float mask_sinc_lobes = 3.0;                   //  range [2, 4]
-    //  The mask is resized using a variable number of taps in each dimension,
-    //  but some Cg profiles always fetch a constant number of taps no matter
-    //  what (no dynamic branching).  We can limit the maximum number of taps if
-    //  we statically limit the minimum phosphor triad size.  Larger values are
-    //  faster, but the limit IS enforced (static option only, forever);
-    //      range [1, mask_texture_small_size/mask_triads_per_tile]
-    //  TODO: Make this 1.0 and compensate with smarter sampling!
-    static const float mask_min_allowed_triad_size = 2.0;
-
-//  GEOMETRY:
-    //  Geometry mode:
-    //  0: Off (default), 1: Spherical mapping (like cgwg's),
-    //  2: Alt. spherical mapping (more bulbous), 3: Cylindrical/Trinitron
-    static const float geom_mode_static = 0.0;      //  range [0, 3]
-    //  Radius of curvature: Measured in units of your viewport's diagonal size.
-    static const float geom_radius_static = 2.0;    //  range [1/(2*pi), 1024]
-    //  View dist is the distance from the player to their physical screen, in
-    //  units of the viewport's diagonal size.  It controls the field of view.
-    static const float geom_view_dist_static = 2.0; //  range [0.5, 1024]
-    //  Tilt angle in radians (clockwise around up and right vectors):
-    static const float2 geom_tilt_angle_static = float2(0.0, 0.0);  //  range [-pi, pi]
-    //  Aspect ratio: When the true viewport size is unknown, this value is used
-    //  to help convert between the phosphor triad size and count, along with
-    //  the mask_resize_viewport_scale constant from user-cgp-constants.h.  Set
-    //  this equal to Retroarch's display aspect ratio (DAR) for best results;
-    //  range [1, geom_max_aspect_ratio from user-cgp-constants.h];
-    //  default (256/224)*(54/47) = 1.313069909 (see below)
-    static const float geom_aspect_ratio_static = 1.313069909;
-    //  Before getting into overscan, here's some general aspect ratio info:
-    //  - DAR = display aspect ratio = SAR * PAR; as in your Retroarch setting
-    //  - SAR = storage aspect ratio = DAR / PAR; square pixel emulator frame AR
-    //  - PAR = pixel aspect ratio   = DAR / SAR; holds regardless of cropping
-    //  Geometry processing has to "undo" the screen-space 2D DAR to calculate
-    //  3D view vectors, then reapplies the aspect ratio to the simulated CRT in
-    //  uv-space.  To ensure the source SAR is intended for a ~4:3 DAR, either:
-    //  a.) Enable Retroarch's "Crop Overscan"
-    //  b.) Readd horizontal padding: Set overscan to e.g. N*(1.0, 240.0/224.0)
-    //  Real consoles use horizontal black padding in the signal, but emulators
-    //  often crop this without cropping the vertical padding; a 256x224 [S]NES
-    //  frame (8:7 SAR) is intended for a ~4:3 DAR, but a 256x240 frame is not.
-    //  The correct [S]NES PAR is 54:47, found by blargg and NewRisingSun:
-    //      http://board.zsnes.com/phpBB3/viewtopic.php?f=22&t=11928&start=50
-    //      http://forums.nesdev.com/viewtopic.php?p=24815#p24815
-    //  For flat output, it's okay to set DAR = [existing] SAR * [correct] PAR
-    //  without doing a. or b., but horizontal image borders will be tighter
-    //  than vertical ones, messing up curvature and overscan.  Fixing the
-    //  padding first corrects this.
-    //  Overscan: Amount to "zoom in" before cropping.  You can zoom uniformly
-    //  or adjust x/y independently to e.g. readd horizontal padding, as noted
-    //  above: Values < 1.0 zoom out; range (0, inf)
-    static const float2 geom_overscan_static = float2(1.0, 1.0);// * 1.005 * (1.0, 240/224.0)
-    //  Compute a proper pixel-space to texture-space matrix even without ddx()/
-    //  ddy()?  This is ~8.5% slower but improves antialiasing/subpixel filtering
-    //  with strong curvature (static option only for now).
-    static const bool geom_force_correct_tangent_matrix = true;
-
-//  BORDERS:
-    //  Rounded border size in texture uv coords:
-    static const float border_size_static = 0.015;           //  range [0, 0.5]
-    //  Border darkness: Moderate values darken the border smoothly, and high
-    //  values make the image very dark just inside the border:
-    static const float border_darkness_static = 2.0;        //  range [0, inf)
-    //  Border compression: High numbers compress border transitions, narrowing
-    //  the dark border area.
-    static const float border_compress_static = 2.5;        //  range [1, inf)
-
-
-#endif  //  USER_SETTINGS_H
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh
@@ -1,97 +0,0 @@
-/////////////////////////////////  MIT LICENSE  ////////////////////////////////
-
-//  Copyright (C) 2014 TroggleMonkey
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to
-//  deal in the Software without restriction, including without limitation the
-//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-//  sell copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//  
-//  The above copyright notice and this permission notice shall be included in
-//  all copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-//  IN THE SOFTWARE.
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-//  PASS SETTINGS:
-//  gamma-management.h needs to know what kind of pipeline we're using and
-//  what pass this is in that pipeline.  This will become obsolete if/when we
-//  can #define things like this in the .cgp preset file.
-//#define GAMMA_ENCODE_EVERY_FBO
-//#define FIRST_PASS
-//#define LAST_PASS
-//#define SIMULATE_CRT_ON_LCD
-//#define SIMULATE_GBA_ON_LCD
-//#define SIMULATE_LCD_ON_CRT
-//#define SIMULATE_GBA_ON_CRT
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.fxh"
-#include "../include/blur-functions.fxh"
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p4
-{
-    float2 blur_dxdy        : TEXCOORD1;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Blur9Fast_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p4 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-/*    float2 texture_size = 1.0/NormalizedNativePixelSize;
-    float2 output_size  = (ViewportSize*BufferToViewportRatio);
-    float2 video_size   = 1.0/NormalizedNativePixelSize;
-*/
-//    float2 texture_size = float2(320.0, 240.0);
-    float2 texture_size = HALATION_BLUR_texture_size;
-    float2 output_size  = VIEWPORT_SIZE;
-//    float2 output_size  = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
-  //  float2 output_size  = float2(320.0, 240.0);
-//    float2 output_size  = 1.0/NormalizedNativePixelSize;
-
-	//  Get the uv sample distance between output pixels.  Blurs are not generic
-    //  Gaussian resizers, and correct blurs require:
-    //  1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
-    //  2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
-    //  3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
-    //  Gaussian resizers would upsize using the distance between input texels
-    //  (not output pixels), but we avoid this and consistently blur at the
-    //  destination size.  Otherwise, combining statically calculated weights
-    //  with bilinear sample exploitation would result in terrible artifacts.
-    const float2 dxdy_scale = video_size/output_size;
-	const float2 dxdy = dxdy_scale/texture_size;
-    //  This blur is horizontal-only, so zero out the vertical offset:
-	OUT.blur_dxdy = float2(dxdy.x, 0.0);
-}
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Blur9Fast_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p4 VAR) : SV_Target
-{
-	float3 color = tex2Dblur9fast(BLUR9FAST_VERTICAL, vTexCoord, VAR.blur_dxdy);
-    //  Encode and output the blurred image:
-    return encode_output(float4(color, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh
@@ -1,95 +0,0 @@
-/////////////////////////////////  MIT LICENSE  ////////////////////////////////
-
-//  Copyright (C) 2014 TroggleMonkey
-//
-//  Permission is hereby granted, free of charge, to any person obtaining a copy
-//  of this software and associated documentation files (the "Software"), to
-//  deal in the Software without restriction, including without limitation the
-//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-//  sell copies of the Software, and to permit persons to whom the Software is
-//  furnished to do so, subject to the following conditions:
-//  
-//  The above copyright notice and this permission notice shall be included in
-//  all copies or substantial portions of the Software.
-//
-//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-//  IN THE SOFTWARE.
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-//  PASS SETTINGS:
-//  gamma-management.h needs to know what kind of pipeline we're using and
-//  what pass this is in that pipeline.  This will become obsolete if/when we
-//  can #define things like this in the .cgp preset file.
-//#define GAMMA_ENCODE_EVERY_FBO
-//#define FIRST_PASS
-//#define LAST_PASS
-//#define SIMULATE_CRT_ON_LCD
-//#define SIMULATE_GBA_ON_LCD
-//#define SIMULATE_LCD_ON_CRT
-//#define SIMULATE_GBA_ON_CRT
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/gamma-management.fxh"
-#include "../include/blur-functions.fxh"
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p3
-{
-    float2 blur_dxdy        : TEXCOORD1;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Blur9Fast_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p3 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-/*
-    float2 texture_size = 1.0/NormalizedNativePixelSize;
-    float2 output_size  = (ViewportSize*BufferToViewportRatio);
-    float2 video_size   = 1.0/NormalizedNativePixelSize;
-*/
-//    float2 texture_size = float2(320.0, 240.0);
-    float2 texture_size = BLUR9FAST_VERTICAL_texture_size;
-    float2 output_size  = VIEWPORT_SIZE;
-   // float2 output_size  = VIEWPORT_SIZE/4.0;
-//    float2 output_size  = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
-//    float2 output_size  = 1.0/NormalizedNativePixelSize;
-
-	//  Get the uv sample distance between output pixels.  Blurs are not generic
-    //  Gaussian resizers, and correct blurs require:
-    //  1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
-    //  2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
-    //  3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
-    //  Gaussian resizers would upsize using the distance between input texels
-    //  (not output pixels), but we avoid this and consistently blur at the
-    //  destination size.  Otherwise, combining statically calculated weights
-    //  with bilinear sample exploitation would result in terrible artifacts.
-    const float2 dxdy_scale = video_size/output_size;
-	const float2 dxdy = dxdy_scale/texture_size;
-    //  This blur is vertical-only, so zero out the horizontal offset:
-	OUT.blur_dxdy = float2(0.0, dxdy.y);
-}
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Blur9Fast_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p3 VAR) : SV_Target
-{
-	float3 color = tex2Dblur9fast(BLOOM_APPROX, vTexCoord, VAR.blur_dxdy);
-    //  Encode and output the blurred image:
-    return encode_output(float4(color, 1.0));
-}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh
@@ -1,363 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#define ORIG_LINEARIZEDvideo_size   VERTICAL_SCANLINES_texture_size
-#define ORIG_LINEARIZEDtexture_size VERTICAL_SCANLINES_video_size
-
-#define bloom_approx_scale_x (4.0/3.0)
-static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-#include "../include/gamma-management.fxh"
-#include "../include/blur-functions.fxh"
-#include "../include/scanline-functions.fxh"
-#include "../include/bloom-functions.fxh"
-
-///////////////////////////////////  HELPERS  //////////////////////////////////
-
-float3 tex2Dresize_gaussian4x4(const sampler2D tex, const float2 tex_uv,
-    const float2 dxdy, const float2 texture_size, const float2 texture_size_inv,
-    const float2 tex_uv_to_pixel_scale, const float sigma)
-{
-    //  Requires:   1.) All requirements of gamma-management.h must be satisfied!
-    //              2.) filter_linearN must == "true" in your .cgp preset.
-    //              3.) mipmap_inputN must == "true" in your .cgp preset if
-    //                  IN.output_size << SRC.video_size.
-    //              4.) dxdy should contain the uv pixel spacing:
-    //                      dxdy = max(float2(1.0),
-    //                          SRC.video_size/IN.output_size)/SRC.texture_size;
-    //              5.) texture_size == SRC.texture_size
-    //              6.) texture_size_inv == float2(1.0)/SRC.texture_size
-    //              7.) tex_uv_to_pixel_scale == IN.output_size *
-    //                      SRC.texture_size / SRC.video_size;
-    //              8.) sigma is the desired Gaussian standard deviation, in
-    //                  terms of output pixels.  It should be < ~0.66171875 to
-    //                  ensure the first unused sample (outside the 4x4 box) has
-    //                  a weight < 1.0/256.0.
-    //  Returns:    A true 4x4 Gaussian resize of the input.
-    //  Description:
-    //  Given correct inputs, this Gaussian resizer samples 4 pixel locations
-    //  along each downsized dimension and/or 4 texel locations along each
-    //  upsized dimension.  It computes dynamic weights based on the pixel-space
-    //  distance of each sample from the destination pixel.  It is arbitrarily
-    //  resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
-    //  TODO: Move this to a more suitable file once there are others like it.
-    const float denom_inv = 0.5/(sigma*sigma);
-    //  We're taking 4x4 samples, and we're snapping to texels for upsizing.
-    //  Find texture coords for sample 5 (second row, second column):
-    const float2 curr_texel = tex_uv * texture_size;
-    const float2 prev_texel =
-        floor(curr_texel - under_half.xx) + 0.5.xx;
-    const float2 prev_texel_uv = prev_texel * texture_size_inv;
-    const float2 snap = float2(dxdy <= texture_size_inv);
-    const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
-    const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap);
-    //  Compute texture coords for other samples:
-    const float2 dx = float2(dxdy.x, 0.0);
-    const float2 sample0_uv = sample5_uv - dxdy;
-    const float2 sample10_uv = sample5_uv + dxdy;
-    const float2 sample15_uv = sample5_uv + 2.0 * dxdy;
-    const float2 sample1_uv = sample0_uv + dx;
-    const float2 sample2_uv = sample0_uv + 2.0 * dx;
-    const float2 sample3_uv = sample0_uv + 3.0 * dx;
-    const float2 sample4_uv = sample5_uv - dx;
-    const float2 sample6_uv = sample5_uv + dx;
-    const float2 sample7_uv = sample5_uv + 2.0 * dx;
-    const float2 sample8_uv = sample10_uv - 2.0 * dx;
-    const float2 sample9_uv = sample10_uv - dx;
-    const float2 sample11_uv = sample10_uv + dx;
-    const float2 sample12_uv = sample15_uv - 3.0 * dx;
-    const float2 sample13_uv = sample15_uv - 2.0 * dx;
-    const float2 sample14_uv = sample15_uv - dx;
-    //  Load each sample:
-    const float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
-    const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
-    const float3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
-    const float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
-    const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
-    const float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
-    const float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
-    const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
-    const float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
-    const float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
-    const float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
-    const float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
-    const float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
-    const float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
-    const float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
-    const float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
-    //  Compute destination pixel offsets for each sample:
-    const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
-    const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
-    //  Compute Gaussian sample weights:
-    const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
-    const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
-    const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
-    const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
-    const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
-    const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
-    const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
-    const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
-    const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
-    const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
-    const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
-    const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
-    const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
-    const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
-    const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
-    const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
-    const float weight_sum_inv = 1.0/(
-        w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
-        w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
-    //  Weight and sum the samples:
-    const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
-        w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
-        w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
-        w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
-    return sum * weight_sum_inv;
-}
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p2
-{
-    float2 tex_uv                       : TEXCOORD1;
-    float2 blur_dxdy                    : TEXCOORD2;
-    float2 uv_scanline_step             : TEXCOORD3;
-    float estimated_viewport_size_x     : TEXCOORD4;
-    float2 texture_size_inv             : TEXCOORD5;
-    float2 tex_uv_to_pixel_scale        : TEXCOORD6;
-    float2 output_size                  : TEXCOORD7;
-};
-
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Bloom_Approx(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p2 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 texture_size  = BLOOM_APPROX_texture_size;
-    float2 output_size   = VIEWPORT_SIZE;
-
-    OUT.output_size = output_size;
-
-    //  This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
-    //  except we're using a different source image.
-    const float2 video_uv = texcoord * texture_size/video_size;
-    OUT.tex_uv = video_uv * ORIG_LINEARIZEDvideo_size /
-        ORIG_LINEARIZEDtexture_size;
-    //  The last pass (vertical scanlines) had a viewport y scale, so we can
-    //  use it to calculate a better runtime sigma:
-//    OUT.estimated_viewport_size_x = video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y;
-    OUT.estimated_viewport_size_x = video_size.y * texture_size.x/texture_size.y;
-
-    //  Get the uv sample distance between output pixels.  We're using a resize
-    //  blur, so arbitrary upsizing will be acceptable if filter_linearN =
-    //  "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
-    //  "true" too.  The blur will be much more accurate if a true 4x4 Gaussian
-    //  resize is used instead of tex2Dblur3x3_resize (which samples between
-    //  texels even for upsizing).
-    const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/output_size;
-    const float2 texture_size_inv = 1.0.xx/ORIG_LINEARIZEDtexture_size;
-    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
-    {
-        //  For upsizing, we'll snap to texels and sample the nearest 4.
-        const float2 dxdy_scale = max(dxdy_min_scale, 1.0.xx);
-        OUT.blur_dxdy = dxdy_scale * texture_size_inv;
-    }
-    else
-    {
-        const float2 dxdy_scale = dxdy_min_scale;
-        OUT.blur_dxdy = dxdy_scale * texture_size_inv;
-    }
-    //  tex2Dresize_gaussian4x4 needs to know a bit more than the other filters:
-    OUT.tex_uv_to_pixel_scale = output_size *
-        ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size;
-    OUT.texture_size_inv = texture_size_inv;
-
-    //  Detecting interlacing again here lets us apply convergence offsets in
-    //  this pass.  il_step_multiple contains the (texel, scanline) step
-    //  multiple: 1 for progressive, 2 for interlaced.
-    const float2 orig_video_size = ORIG_LINEARIZEDvideo_size;
-    const float y_step = 1.0 + float(is_interlaced(orig_video_size.y));
-    const float2 il_step_multiple = float2(1.0, y_step);
-    //  Get the uv distance between (texels, same-field scanlines):
-    OUT.uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size;
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Bloom_Approx(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p2 VAR) : SV_Target
-{
-    //  Would a viewport-relative size work better for this pass?  (No.)
-    //  PROS:
-    //  1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
-    //      write a viewport scale.  That number could be used to directly scale
-    //      the viewport-resolution bloom sigma and/or triad size to a smaller
-    //      scale.  This way, we could calculate an optimal dynamic sigma no
-    //      matter how the dot pitch is specified.
-    //  CONS:
-    //  1.) Texel smearing would be much worse at small viewport sizes, but
-    //      performance would be much worse at large viewport sizes, so there
-    //      would be no easy way to calculate a decent scale.
-    //  2.) Worse, we could no longer get away with using a constant-size blur!
-    //      Instead, we'd have to face all the same difficulties as the real
-    //      phosphor bloom, which requires static #ifdefs to decide the blur
-    //      size based on the expected triad size...a dynamic value.
-    //  3.) Like the phosphor bloom, we'd have less control over making the blur
-    //      size correct for an optical blur.  That said, we likely overblur (to
-    //      maintain brightness) more than the eye would do by itself: 20/20
-    //      human vision distinguishes ~1 arc minute, or 1/60 of a degree.  The
-    //      highest viewing angle recommendation I know of is THX's 40.04 degree
-    //      recommendation, at which 20/20 vision can distinguish about 2402.4
-    //      lines.  Assuming the "TV lines" definition, that means 1201.2
-    //      distinct light lines and 1201.2 distinct dark lines can be told
-    //      apart, i.e. 1201.2 pairs of lines.  This would correspond to 1201.2
-    //      pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
-    //      (if they're alternately lit).  That's a max of 800.8 triads.  Using
-    //      a more popular 30 degree viewing angle recommendation, 20/20 vision
-    //      can distinguish 1800 lines, or 600 triads of alternately lit
-    //      phosphors.  In contrast, we currently blur phosphors all the way
-    //      down to 341.3 triads to ensure full brightness.
-    //  4.) Realistically speaking, we're usually just going to use bilinear
-    //      filtering in this pass anyway, but it only works well to limit
-    //      bandwidth if it's done at a small constant scale.
-    
-    //  Get the constants we need to sample:
-    float2 output_size  = VAR.output_size;
-     //const sampler2D Source = ORIG_LINEARIZED;
-    const float2 tex_uv = VAR.tex_uv;
-    const float2 blur_dxdy = VAR.blur_dxdy;
-    const float2 texture_size = ORIG_LINEARIZEDtexture_size;
-    const float2 texture_size_inv = VAR.texture_size_inv;
-    const float2 tex_uv_to_pixel_scale = VAR.tex_uv_to_pixel_scale;
-    float2 tex_uv_r, tex_uv_g, tex_uv_b;
-    if(beam_misconvergence)
-    {
-        const float2 uv_scanline_step = VAR.uv_scanline_step;
-        const float2 convergence_offsets_r = get_convergence_offsets_r_vector();
-        const float2 convergence_offsets_g = get_convergence_offsets_g_vector();
-        const float2 convergence_offsets_b = get_convergence_offsets_b_vector();
-        tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step;
-        tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step;
-        tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step;
-    }
-    //  Get the blur sigma:
-    const float bloom_approx_sigma = get_bloom_approx_sigma(output_size.x,
-        VAR.estimated_viewport_size_x);
-
-    //  Sample the resized and blurred texture, and apply convergence offsets if
-    //  necessary.  Applying convergence offsets here triples our samples from
-    //  16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
-    //  HALATION_BLUR 3 times at full resolution every time they're used.
-    float3 color_r, color_g, color_b, color;
-    if(bloom_approx_filter > 1.5)
-    {
-        //  Use a 4x4 Gaussian resize.  This is slower but technically correct.
-        if(beam_misconvergence)
-        {
-            color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-            color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-            color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-        }
-        else
-        {
-            color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-        }
-    }
-    else if(bloom_approx_filter > 0.5)
-    {
-        //  Use a 3x3 resize blur.  This is the softest option, because we're
-        //  blurring already blurry bilinear samples.  It doesn't play quite as
-        //  nicely with convergence offsets, but it has its charms.
-        if(beam_misconvergence)
-        {
-            color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
-                blur_dxdy, bloom_approx_sigma);
-            color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
-                blur_dxdy, bloom_approx_sigma);
-            color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
-                blur_dxdy, bloom_approx_sigma);
-        }
-        else
-        {
-            color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
-        }
-    }
-    else
-    {
-        //  Use bilinear sampling.  This approximates a 4x4 Gaussian resize MUCH
-        //  better than tex2Dblur3x3_resize for the very small sigmas we're
-        //  likely to use at small output resolutions.  (This estimate becomes
-        //  too sharp above ~400x300, but the blurs break down above that
-        //  resolution too, unless min_allowed_viewport_triads is high enough to
-        //  keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
-        if(beam_misconvergence)
-        {
-            color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
-            color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
-            color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
-        }
-        else
-        {
-            color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
-        }
-    }
-    //  Pack the colors from the red/green/blue beams into a single vector:
-    if(beam_misconvergence)
-    {
-        color = float3(color_r.r, color_g.g, color_b.b);
-    }
-    //  Encode and output the blurred image:
-    return encode_output(float4(color, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh
@@ -1,129 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/gamma-management.fxh"
-#include "../include/bloom-functions.fxh"
-#include "../include/phosphor-mask-resizing.fxh"
-#include "../include/scanline-functions.fxh"
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p10
-{
-    float2 video_uv            : TEXCOORD1;
-    float2 bloom_dxdy          : TEXCOORD2;
-    float bloom_sigma_runtime  : TEXCOORD3;
-    float2 sinangle            : TEXCOORD4;
-    float2 cosangle            : TEXCOORD5;
-    float3 stretch             : TEXCOORD6;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Bloom_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p10 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 texture_size = BLOOM_HORIZONTAL_texture_size;
-    float2 output_size  = VIEWPORT_SIZE;
-
-    // Screen centering
-    texcoord = texcoord - float2(centerx,centery)/100.0;
-
-    float2 tex_uv = texcoord;
-
-    //  Our various input textures use different coords:
-    const float2 video_uv = tex_uv * texture_size/video_size;
-    OUT.video_uv = video_uv;
-
-    //  We're horizontally blurring the bloom input (vertically blurred
-    //  brightpass).  Get the uv distance between output pixels / input texels
-    //  in the horizontal direction (this pass must NOT resize):
-    OUT.bloom_dxdy = float2(1.0/texture_size.x, 0.0);
-
-    //  Calculate a runtime bloom_sigma in case it's needed:
-    const float mask_tile_size_x = get_resized_mask_tile_size(
-        output_size, output_size * mask_resize_viewport_scale, false).x;
-    OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
-        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
-
-    // Precalculate a bunch of useful values we'll need in the fragment
-    // shader.
-    OUT.sinangle    = sin(float2(geom_x_tilt, geom_y_tilt));
-    OUT.cosangle    = cos(float2(geom_x_tilt, geom_y_tilt));
-    OUT.stretch     = maxscale(OUT.sinangle, OUT.cosangle);
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Bloom_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p10 VAR) : SV_Target
-{
-    VAR.video_uv = (geom_curvature == true) ? transform(VAR.video_uv, VAR.sinangle, VAR.cosangle, VAR.stretch) : VAR.video_uv;
-
-    float cval = corner((VAR.video_uv-0.5.xx) * BufferToViewportRatio + 0.5.xx);
-
-    //  Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
-    const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
-    const float3 blurred_brightpass = tex2DblurNfast(BLOOM_VERTICAL,
-        VAR.video_uv, VAR.bloom_dxdy, bloom_sigma);
-
-    //  Sample the masked scanlines.  Alpha contains the auto-dim factor:
-    const float3 intensity_dim =
-        tex2D_linearize(MASKED_SCANLINES, VAR.video_uv).rgb;
-    const float auto_dim_factor = levels_autodim_temp;
-    const float undim_factor = 1.0/auto_dim_factor;
-
-    //  Calculate the mask dimpass, add it to the blurred brightpass, and
-    //  undim (from scanline auto-dim) and amplify (from mask dim) the result:
-    const float mask_amplify = get_mask_amplify();
-    const float3 brightpass = tex2D_linearize(BRIGHTPASS,
-        VAR.video_uv).rgb;
-    const float3 dimpass = intensity_dim - brightpass;
-    const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
-        mask_amplify * undim_factor * levels_contrast;
-
-    //  Sample the halation texture, and let some light bleed into refractive
-    //  diffusion.  Conceptually this occurs before the phosphor bloom, but
-    //  adding it in earlier passes causes black crush in the diffusion colors.
-    const float3 diffusion_color = levels_contrast * tex2D_linearize(
-        HALATION_BLUR, VAR.video_uv).rgb;
-    float3 final_bloom = lerp(phosphor_bloom,
-        diffusion_color, diffusion_weight);
-
-    final_bloom = (geom_curvature == true) ? final_bloom * cval.xxx : final_bloom;
-
-    final_bloom = pow(final_bloom.rgb, 1.0/get_output_gamma());
-
-    //  Encode and output the bloomed image:
-    return encode_output(float4(final_bloom, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh
@@ -1,83 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/gamma-management.fxh"
-#include "../include/bloom-functions.fxh"
-#include "../include/phosphor-mask-resizing.fxh"
-
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p9
-{
-    float2 tex_uv               : TEXCOORD1;
-    float2 bloom_dxdy           : TEXCOORD2;
-    float bloom_sigma_runtime   : TEXCOORD3;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Bloom_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p9 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 texture_size = BLOOM_VERTICAL_texture_size;
-    float2 output_size  = VIEWPORT_SIZE;
-
-    OUT.tex_uv = texcoord;
-
-    //  Get the uv sample distance between output pixels.  Calculate dxdy like
-    //  blurs/vertex-shader-blur-fast-vertical.h.
-    const float2 dxdy_scale = video_size/output_size;
-    const float2 dxdy = dxdy_scale/texture_size;
-    //  This blur is vertical-only, so zero out the vertical offset:
-    OUT.bloom_dxdy = float2(0.0, dxdy.y);
-
-    //  Calculate a runtime bloom_sigma in case it's needed:
-    const float mask_tile_size_x = get_resized_mask_tile_size(
-        output_size, output_size * mask_resize_viewport_scale, false).x;
-    OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
-        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Bloom_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p9 VAR) : SV_Target
-{
-    //  Blur the brightpass horizontally with a 9/17/25/43x blur:
-    const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
-    const float3 color = tex2DblurNfast(BRIGHTPASS, VAR.tex_uv,
-        VAR.bloom_dxdy, bloom_sigma);
-    //  Encode and output the blurred image:
-    return encode_output(float4(color, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh
@@ -1,130 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/gamma-management.fxh"
-#include "../include/blur-functions.fxh"
-#include "../include/phosphor-mask-resizing.fxh"
-#include "../include/scanline-functions.fxh"
-#include "../include/bloom-functions.fxh"
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p8
-{
-    float2 video_uv                     : TEXCOORD1;
-    float2 scanline_tex_uv              : TEXCOORD2;
-    float2 blur3x3_tex_uv               : TEXCOORD3;
-    float bloom_sigma_runtime           : TEXCOORD4;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Brightpass(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p8 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 tex_uv = texcoord;
-
-    float2 texture_size = BRIGHTPASS_texture_size;
-    float2 output_size  = VIEWPORT_SIZE;
-
-    //  Our various input textures use different coords:
-    const float2 video_uv = tex_uv * texture_size/video_size;
-    OUT.video_uv = video_uv;
-    OUT.scanline_tex_uv = video_uv * MASKED_SCANLINES_video_size /
-        MASKED_SCANLINES_texture_size;
-    OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / BLOOM_APPROX_texture_size;
-
-    //  Calculate a runtime bloom_sigma in case it's needed:
-    const float mask_tile_size_x = get_resized_mask_tile_size(
-        output_size, output_size * mask_resize_viewport_scale, false).x;
-    OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
-        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Brightpass(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p8 VAR) : SV_Target
-{
-    //  Sample the masked scanlines:
-    const float3 intensity_dim =
-        tex2D_linearize(MASKED_SCANLINES, VAR.scanline_tex_uv).rgb;
-    //  Get the full intensity, including auto-undimming, and mask compensation:
-    const float auto_dim_factor = levels_autodim_temp;
-    const float undim_factor = 1.0/auto_dim_factor;
-    const float mask_amplify = get_mask_amplify();
-    const float3 intensity = intensity_dim * undim_factor * mask_amplify *
-        levels_contrast;
-
-    //  Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
-    //  would look like, so we can estimate how much energy we'll receive from
-    //  blooming neighbors:
-    const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(
-        BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
-
-    //  Compute the blur weight for the center texel and the maximum energy we
-    //  expect to receive from neighbors:
-    const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
-    const float center_weight = get_center_weight(bloom_sigma);
-    const float3 max_area_contribution_approx =
-        max(0.0.xxx, phosphor_blur_approx - center_weight * intensity);
-    //  Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
-    //  because it actually gets better results (on top of being very simple),
-    //  but adjust all intensities for the user's desired underestimate factor:
-    const float3 area_contrib_underestimate =
-        bloom_underestimate_levels * max_area_contribution_approx;
-    const float3 intensity_underestimate =
-        bloom_underestimate_levels * intensity;
-    //  Calculate the blur_ratio, the ratio of intensity we want to blur:
-    #ifdef BRIGHTPASS_AREA_BASED
-        //  This area-based version changes blur_ratio more smoothly and blurs
-        //  more, clipping less but offering less phosphor differentiation:
-        const float3 phosphor_blur_underestimate = bloom_underestimate_levels *
-            phosphor_blur_approx;
-        const float3 soft_intensity = max(intensity_underestimate,
-            phosphor_blur_underestimate * mask_amplify);
-        const float3 blur_ratio_temp =
-            ((1.0.xxx - area_contrib_underestimate) /
-            soft_intensity - 1.0.xxx) / (center_weight - 1.0);
-    #else
-        const float3 blur_ratio_temp =
-            ((1.0.xxx - area_contrib_underestimate) /
-            intensity_underestimate - 1.0.xxx) / (center_weight - 1.0);
-    #endif
-    const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
-    //  Calculate the brightpass based on the auto-dimmed, unamplified, masked
-    //  scanlines, encode if necessary, and return!
-    const float3 brightpass = intensity_dim *
-        lerp(blur_ratio, 1.0.xxx, bloom_excess);
-    return encode_output(float4(brightpass, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh
@@ -1,109 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-//  PASS SETTINGS:
-//  gamma-management.h needs to know what kind of pipeline we're using and
-//  what pass this is in that pipeline.  This will become obsolete if/when we
-//  can #define things like this in the .cgp preset file.
-#define FIRST_PASS
-#define SIMULATE_CRT_ON_LCD
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/bind-shader-params.fxh"
-#include "../include/gamma-management.fxh"
-#include "../include/scanline-functions.fxh"
-
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex
-{
-    float2 tex_uv           : TEXCOORD1;
-    float2 uv_step          : TEXCOORD2;
-    float interlaced        : TEXCOORD3;
-};
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Linearize(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    OUT.tex_uv = texcoord;
-//    OUT.tex_uv = (floor(texcoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
-    //  Save the uv distance between texels:
-    OUT.uv_step = NormalizedNativePixelSize;
-
-    //  Detect interlacing: 1.0 = true, 0.0 = false.
-    OUT.interlaced = is_interlaced(1.0/NormalizedNativePixelSize.y);
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
-
-#define input_texture sBackBuffer
-
-float4 PS_Linearize(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex VAR) : SV_Target
-{
-    //  Linearize the input based on CRT gamma and bob interlaced fields.
-    //  Bobbing ensures we can immediately blur without getting artifacts.
-    //  Note: TFF/BFF won't matter for sources that double-weave or similar.
-   // VAR.tex_uv = (floor(VAR.tex_uv / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
-
-    if(interlace_detect)
-    {
-        //  Sample the current line and an average of the previous/next line;
-        //  tex2D_linearize will decode CRT gamma.  Don't bother branching:
-        const float2 tex_uv = VAR.tex_uv;
-        const float2 v_step = float2(0.0, VAR.uv_step.y);
-        const float3 curr_line = tex2D_linearize_first(
-            input_texture, tex_uv).rgb;
-        const float3 last_line = tex2D_linearize_first(
-            input_texture, tex_uv - v_step).rgb;
-        const float3 next_line = tex2D_linearize_first(
-            input_texture, tex_uv + v_step).rgb;
-        const float3 interpolated_line = 0.5 * (last_line + next_line);
-        //  If we're interlacing, determine which field curr_line is in:
-        const float modulus = VAR.interlaced + 1.0;
-        const float field_offset =
-            fmod(FrameCount + float(interlace_bff), modulus);
-        const float curr_line_texel = tex_uv.y / NormalizedNativePixelSize.y;
-        //  Use under_half to fix a rounding bug around exact texel locations.
-        const float line_num_last = floor(curr_line_texel - under_half);
-        const float wrong_field = fmod(line_num_last + field_offset, modulus);
-        //  Select the correct color, and output the result:
-        const float3 color = lerp(curr_line, interpolated_line, wrong_field);
-        return encode_output(float4(color, 1.0));
-    }
-    else
-    {
-        return encode_output(tex2D_linearize_first(input_texture, VAR.tex_uv));
-    }
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh
@@ -1,130 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/phosphor-mask-resizing.fxh"
-
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p6
-{
-    float2 src_tex_uv_wrap              : TEXCOORD1;
-    float2 tile_uv_wrap                 : TEXCOORD2;
-    float2 resize_magnification_scale   : TEXCOORD3;
-    float2 src_dxdy                     : TEXCOORD4;
-    float2 tile_size_uv                 : TEXCOORD5;
-    float2 input_tiles_per_texture      : TEXCOORD6;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Mask_Resize_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p6 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 tex_uv = texcoord;
-
-    float2 texture_size = MASK_RESIZE_texture_size;
-    float2 output_size  = 0.0625*(VIEWPORT_SIZE);
-
-    //  First estimate the viewport size (the user will get the wrong number of
-    //  triads if it's wrong and mask_specify_num_triads is 1.0/true).
-    const float2 estimated_viewport_size =
-        output_size / mask_resize_viewport_scale;
-    //  Find the final size of our resized phosphor mask tiles.  We probably
-    //  estimated the viewport size and MASK_RESIZE output size differently last
-    //  pass, so do not swear they were the same. ;)
-    const float2 mask_resize_tile_size = get_resized_mask_tile_size(
-        estimated_viewport_size, output_size, false);
-
-    //  We'll render resized tiles until filling the output FBO or meeting a
-    //  limit, so compute [wrapped] tile uv coords based on the output uv coords
-    //  and the number of tiles that will fit in the FBO.
-    const float2 output_tiles_this_pass = output_size / mask_resize_tile_size;
-    const float2 output_video_uv = tex_uv * texture_size / video_size;
-    const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
-
-    //  Get the texel size of an input tile and related values:
-    const float2 input_tile_size = float2(min(
-        mask_resize_src_lut_size.x, video_size.x), mask_resize_tile_size.y);
-    const float2 tile_size_uv = input_tile_size / texture_size;
-    const float2 input_tiles_per_texture = texture_size / input_tile_size;
-
-    //  Derive [wrapped] texture uv coords from [wrapped] tile uv coords and
-    //  the tile size in uv coords, and save frac() for the fragment shader.
-    const float2 src_tex_uv_wrap = tile_uv_wrap * tile_size_uv;
-
-    //  Output the values we need, including the magnification scale and step:
-    OUT.tile_uv_wrap = tile_uv_wrap;
-    OUT.src_tex_uv_wrap = src_tex_uv_wrap;
-    OUT.resize_magnification_scale = mask_resize_tile_size / input_tile_size;
-    OUT.src_dxdy = float2(1.0/texture_size.x, 0.0);
-    OUT.tile_size_uv = tile_size_uv;
-    OUT.input_tiles_per_texture = input_tiles_per_texture;
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Mask_Resize_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p6 VAR) : SV_Target
-{
-    //  The input contains one mask tile horizontally and a number vertically.
-    //  Resize the tile horizontally to its final screen size and repeat it
-    //  until drawing at least mask_resize_num_tiles, leaving it unchanged
-    //  vertically.  Lanczos-resizing the phosphor mask achieves much sharper
-    //  results than mipmapping, outputting >= mask_resize_num_tiles makes for
-    //  easier tiled sampling later.
-    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-        //  Discard unneeded fragments in case our profile allows real branches.
-        float2 texture_size = MASK_RESIZE_texture_size;
-        const float2 tile_uv_wrap = VAR.tile_uv_wrap;
-        if(get_mask_sample_mode() < 0.5 &&
-            max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
-        {
-            const float src_dx = VAR.src_dxdy.x;
-            const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
-            const float3 pixel_color = downsample_horizontal_sinc_tiled(MASK_RESIZE_VERTICAL,
-                src_tex_uv, texture_size, VAR.src_dxdy.x,
-                VAR.resize_magnification_scale.x, VAR.tile_size_uv.x);
-            //  The input LUT was linear RGB, and so is our output:
-            return float4(pixel_color, 1.0);
-        }
-        else
-        {
-            discard;
-        }
-    #else
-        discard;
-        return 1.0.xxxx;
-    #endif
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh
@@ -1,164 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/phosphor-mask-resizing.fxh"
-
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p5
-{
-    float2 src_tex_uv_wrap              : TEXCOORD1;
-    float2 resize_magnification_scale   : TEXCOORD2;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Mask_Resize_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p5 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 tex_uv = texcoord;
-
-    float2 texture_size = MASK_RESIZE_VERT_texture_size;
-    float2 output_size  = float2(64.0, 0.0625*((VIEWPORT_SIZE).y));
-
-    //  First estimate the viewport size (the user will get the wrong number of
-    //  triads if it's wrong and mask_specify_num_triads is 1.0/true).
-    const float viewport_y = output_size.y / mask_resize_viewport_scale.y;
-//  Now get aspect_ratio from texture_size. 
-//    const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
-    const float aspect_ratio = texture_size.x / texture_size.y;
-    const float2 estimated_viewport_size =
-        float2(viewport_y * aspect_ratio, viewport_y);
-    //  Estimate the output size of MASK_RESIZE (the next pass).  The estimated
-    //  x component shouldn't matter, because we're not using the x result, and
-    //  we're not swearing it's correct (if we did, the x result would influence
-    //  the y result to maintain the tile aspect ratio).
-    const float2 estimated_mask_resize_output_size =
-        float2(output_size.y * aspect_ratio, output_size.y);
-    //  Find the final intended [y] size of our resized phosphor mask tiles,
-    //  then the tile size for the current pass (resize y only):
-    const float2 mask_resize_tile_size = get_resized_mask_tile_size(
-        estimated_viewport_size, estimated_mask_resize_output_size, false);
-    const float2 pass_output_tile_size = float2(min(
-        mask_resize_src_lut_size.x, output_size.x), mask_resize_tile_size.y);
-
-    //  We'll render resized tiles until filling the output FBO or meeting a
-    //  limit, so compute [wrapped] tile uv coords based on the output uv coords
-    //  and the number of tiles that will fit in the FBO.
-    const float2 output_tiles_this_pass = output_size / pass_output_tile_size;
-    const float2 output_video_uv = tex_uv * texture_size / video_size;
-    const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
-
-    //  The input LUT is just a single mask tile, so texture uv coords are the
-    //  same as tile uv coords (save frac() for the fragment shader).  The
-    //  magnification scale is also straightforward:
-    OUT.src_tex_uv_wrap = tile_uv_wrap;
-    OUT.resize_magnification_scale =
-        pass_output_tile_size / mask_resize_src_lut_size;
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Mask_Resize_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p5 VAR) : SV_Target
-{
-    //  Resize the input phosphor mask tile to the final vertical size it will
-    //  appear on screen.  Keep 1x horizontal size if possible (IN.output_size
-    //  >= mask_resize_src_lut_size), and otherwise linearly sample horizontally
-    //  to fit exactly one tile.  Lanczos-resizing the phosphor mask achieves
-    //  much sharper results than mipmapping, and vertically resizing first
-    //  minimizes the total number of taps required.  We output a number of
-    //  resized tiles >= mask_resize_num_tiles for easier tiled sampling later.
-    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-        //  Discard unneeded fragments in case our profile allows real branches.
-        const float2 tile_uv_wrap = VAR.src_tex_uv_wrap;
-        if(get_mask_sample_mode() < 0.5 &&
-            tile_uv_wrap.y <= mask_resize_num_tiles)
-        {
-            static const float src_dy = 1.0/mask_resize_src_lut_size.y;
-            const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
-            float3 pixel_color;
-            //  If mask_type is static, this branch will be resolved statically.
-			#ifdef PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
-				if(mask_type < 0.5)
-				{
-					pixel_color = downsample_vertical_sinc_tiled(
-						mask_grille_texture_large, src_tex_uv, mask_resize_src_lut_size,
-						src_dy, VAR.resize_magnification_scale.y, 1.0);
-				}
-				else if(mask_type < 1.5)
-				{
-					pixel_color = downsample_vertical_sinc_tiled(
-						mask_slot_texture_large, src_tex_uv, mask_resize_src_lut_size,
-						src_dy, VAR.resize_magnification_scale.y, 1.0);
-				}
-				else
-				{
-					pixel_color = downsample_vertical_sinc_tiled(
-						mask_shadow_texture_large, src_tex_uv, mask_resize_src_lut_size,
-						src_dy, VAR.resize_magnification_scale.y, 1.0);
-				}
-			#else
-				if(mask_type < 0.5)
-				{
-					pixel_color = downsample_vertical_sinc_tiled(
-						mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size,
-						src_dy, VAR.resize_magnification_scale.y, 1.0);
-				}
-				else if(mask_type < 1.5)
-				{
-					pixel_color = downsample_vertical_sinc_tiled(
-						mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size,
-						src_dy, VAR.resize_magnification_scale.y, 1.0);
-				}
-				else
-				{
-					pixel_color = downsample_vertical_sinc_tiled(
-						mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size,
-						src_dy, VAR.resize_magnification_scale.y, 1.0);
-				}
-			#endif
-            //  The input LUT was linear RGB, and so is our output:
-            return float4(pixel_color, 1.0);
-        }
-        else
-        {
-            discard;
-        }
-    #else
-        discard;
-        return 1.0.xxxx;
-    #endif
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh
@@ -1,283 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-#include "../include/user-settings.fxh"
-#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../include/scanline-functions.fxh"
-#include "../include/phosphor-mask-resizing.fxh"
-#include "../include/bloom-functions.fxh"
-#include "../include/gamma-management.fxh"
-
-
-///////////////////////////////////  HELPERS  //////////////////////////////////
-
-float4 tex2Dtiled_mask_linearize(const sampler2D tex,
-    const float2 tex_uv)
-{
-    //  If we're manually tiling a texture, anisotropic filtering can get
-    //  confused.  One workaround is to just select the lowest mip level:
-    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-        #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
-            //  TODO: Use tex2Dlod_linearize with a calculated mip level.
-            return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0));
-        #else
-            #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
-                return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0));
-            #else
-                return tex2D_linearize(tex, tex_uv);
-            #endif
-        #endif
-    #else
-        return tex2D_linearize(tex, tex_uv);
-    #endif
-}
-
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-
-struct out_vertex_p7
-{
-    //  Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
-    float2 video_uv                     : TEXCOORD1;
-    float2 scanline_tex_uv              : TEXCOORD2;
-    float2 blur3x3_tex_uv               : TEXCOORD3;
-    float2 halation_tex_uv              : TEXCOORD4;
-    float2 scanline_texture_size_inv    : TEXCOORD5;
-    float4 mask_tile_start_uv_and_size  : TEXCOORD6;
-    float2 mask_tiles_per_screen        : TEXCOORD7;
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Scanlines_Horizontal_Apply_Mask(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p7 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 tex_uv = texcoord;
-
-    float2 texture_size = MASKED_SCANLINES_texture_size;
-    float2 output_size  = VIEWPORT_SIZE;
-
-    //  Our various input textures use different coords.
-    const float2 video_uv = tex_uv * texture_size/video_size;
-    const float2 scanline_texture_size_inv =
-        1.0.xx/VERTICAL_SCANLINES_texture_size;
-    OUT.video_uv = video_uv;
-    OUT.scanline_tex_uv = video_uv * VERTICAL_SCANLINES_video_size *
-        scanline_texture_size_inv;
-    OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size /
-        BLOOM_APPROX_texture_size;
-    OUT.halation_tex_uv = video_uv * HALATION_BLUR_video_size /
-        HALATION_BLUR_texture_size;
-    OUT.scanline_texture_size_inv = scanline_texture_size_inv;
-
-    //  Get a consistent name for the final mask texture size.  Sample mode 0
-    //  uses the manually resized mask, but ignore it if we never resized.
-    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-        const float mask_sample_mode = get_mask_sample_mode();
-        const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ?
-            MASKED_SCANLINES_texture_size : mask_texture_large_size;
-        const float2 mask_resize_video_size = mask_sample_mode < 0.5 ?
-            MASKED_SCANLINES_video_size : mask_texture_large_size;
-    #else
-        const float2 mask_resize_texture_size = mask_texture_large_size;
-        const float2 mask_resize_video_size = mask_texture_large_size;
-    #endif
-    //  Compute mask tile dimensions, starting points, etc.:
-    float2 mask_tiles_per_screen;
-    OUT.mask_tile_start_uv_and_size = get_mask_sampling_parameters(
-        mask_resize_texture_size, mask_resize_video_size, output_size,
-        mask_tiles_per_screen);
-    OUT.mask_tiles_per_screen = mask_tiles_per_screen;
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Scanlines_Horizontal_Apply_Mask(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p7 VAR) : SV_Target
-{
-    //  This pass: Sample (misconverged?) scanlines to the final horizontal
-    //  resolution, apply halation (bouncing electrons), and apply the phosphor
-    //  mask.  Fake a bloom if requested.  Unless we fake a bloom, the output
-    //  will be dim from the scanline auto-dim, mask dimming, and low gamma.
-
-    //  Horizontally sample the current row (a vertically interpolated scanline)
-    //  and account for horizontal convergence offsets, given in units of texels.
-  //  float2 VERTICAL_SCANLINES_texture_size = float2(1.0/NormalizedNativePixelSize.x, ViewportSize.y*BufferToViewportRatio.y);
-
-    float2 output_size  = VIEWPORT_SIZE;
-
-    const float3 scanline_color_dim = sample_rgb_scanline_horizontal(
-        VERTICAL_SCANLINES, VAR.scanline_tex_uv,
-        VERTICAL_SCANLINES_texture_size, VAR.scanline_texture_size_inv);
-    const float auto_dim_factor = levels_autodim_temp;
-
-    //  Sample the phosphor mask:
-    const float2 tile_uv_wrap = VAR.video_uv * VAR.mask_tiles_per_screen;
-    const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(
-        tile_uv_wrap, VAR.mask_tile_start_uv_and_size);
-    float3 phosphor_mask_sample;
-    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
-        const bool sample_orig_luts = get_mask_sample_mode() > 0.5;
-    #else
-        static const bool sample_orig_luts = true;
-    #endif
-    if(sample_orig_luts)
-    {
-        //  If mask_type is static, this branch will be resolved statically.
-        if(mask_type < 0.5)
-        {
-            phosphor_mask_sample = tex2D_linearize(
-                mask_grille_texture_large, mask_tex_uv).rgb;
-        }
-        else if(mask_type < 1.5)
-        {
-            phosphor_mask_sample = tex2D_linearize(
-                mask_slot_texture_large, mask_tex_uv).rgb;
-        }
-        else
-        {
-            phosphor_mask_sample = tex2D_linearize(
-                mask_shadow_texture_large, mask_tex_uv).rgb;
-        }
-    }
-    else
-    {
-        //  Sample the resized mask, and avoid tiling artifacts:
-        phosphor_mask_sample = tex2Dtiled_mask_linearize(
-            MASK_RESIZE, mask_tex_uv).rgb;
-    }
-
-    //  Sample the halation texture (auto-dim to match the scanlines), and
-    //  account for both horizontal and vertical convergence offsets, given
-    //  in units of texels horizontally and same-field scanlines vertically:
-    const float3 halation_color = tex2D_linearize(
-        HALATION_BLUR, VAR.halation_tex_uv).rgb;
-
-    //  Apply halation: Halation models electrons flying around under the glass
-    //  and hitting the wrong phosphors (of any color).  It desaturates, so
-    //  average the halation electrons to a scalar.  Reduce the local scanline
-    //  intensity accordingly to conserve energy.
-    const float3 halation_intensity_dim =
-        dot(halation_color, auto_dim_factor.xxx/3.0).xxx;
-    const float3 electron_intensity_dim = lerp(scanline_color_dim,
-        halation_intensity_dim, halation_weight);
-
-    //  Apply the phosphor mask:
-    const float3 phosphor_emission_dim = electron_intensity_dim *
-        phosphor_mask_sample;
-
-    #ifdef PHOSPHOR_BLOOM_FAKE
-        //  The BLOOM_APPROX pass approximates a blurred version of a masked
-        //  and scanlined image.  It's usually used to compute the brightpass,
-        //  but we can also use it to fake the bloom stage entirely.  Caveats:
-        //  1.) A fake bloom is conceptually different, since we're mixing in a
-        //      fully blurred low-res image, and the biggest implication are:
-        //  2.) If mask_amplify is incorrect, results deteriorate more quickly.
-        //  3.) The inaccurate blurring hurts quality in high-contrast areas.
-        //  4.) The bloom_underestimate_levels parameter seems less sensitive.
-        //  Reverse the auto-dimming and amplify to compensate for mask dimming:
-        #define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
-        #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
-            static const float blur_contrast = 1.05;
-        #else
-            static const float blur_contrast = 1.0;
-        #endif
-        const float mask_amplify = get_mask_amplify();
-        const float undim_factor = 1.0/auto_dim_factor;
-        const float3 phosphor_emission =
-            phosphor_emission_dim * undim_factor * mask_amplify;
-        //  Get a phosphor blur estimate, accounting for convergence offsets:
-        const float3 electron_intensity = electron_intensity_dim * undim_factor;
-        const float3 phosphor_blur_approx_soft = tex2D_linearize(
-            BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
-        const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft,
-            electron_intensity, 0.1) * blur_contrast;
-        //  We could blend between phosphor_emission and phosphor_blur_approx,
-        //  solving for the minimum blend_ratio that avoids clipping past 1.0:
-        //      1.0 >= total_intensity
-        //      1.0 >= phosphor_emission * (1.0 - blend_ratio) +
-        //              phosphor_blur_approx * blend_ratio
-        //      blend_ratio = (phosphor_emission - 1.0)/
-        //          (phosphor_emission - phosphor_blur_approx);
-        //  However, this blurs far more than necessary, because it aims for
-        //  full brightness, not minimal blurring.  To fix it, base blend_ratio
-        //  on a max area intensity only so it varies more smoothly:
-        const float3 phosphor_blur_underestimate =
-            phosphor_blur_approx * bloom_underestimate_levels;
-        const float3 area_max_underestimate =
-            phosphor_blur_underestimate * mask_amplify;
-        #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
-            const float3 blend_ratio_temp =
-                (area_max_underestimate - 1.0.xxx) /
-                (area_max_underestimate - phosphor_blur_underestimate);
-        #else
-            //  Try doing it like an area-based brightpass.  This is nearly
-            //  identical, but it's worth toying with the code in case I ever
-            //  find a way to make it look more like a real bloom.  (I've had
-            //  some promising textures from combining an area-based blend ratio
-            //  for the phosphor blur and a more brightpass-like blend-ratio for
-            //  the phosphor emission, but I haven't found a way to make the
-            //  brightness correct across the whole color range, especially with
-            //  different bloom_underestimate_levels values.)
-            const float desired_triad_size = lerp(mask_triad_size_desired,
-                output_size.x/mask_num_triads_desired,
-                mask_specify_num_triads);
-            const float bloom_sigma = get_min_sigma_to_blur_triad(
-                desired_triad_size, bloom_diff_thresh);
-            const float center_weight = get_center_weight(bloom_sigma);
-            const float3 max_area_contribution_approx =
-                max(0.0.xxx, phosphor_blur_approx -
-                center_weight * phosphor_emission);
-            const float3 area_contrib_underestimate =
-                bloom_underestimate_levels * max_area_contribution_approx;
-            const float3 blend_ratio_temp =
-                ((1.0.xxx - area_contrib_underestimate) /
-                area_max_underestimate - 1.0.xxx) / (center_weight - 1.0);
-        #endif
-        //  Clamp blend_ratio in case it's out-of-range, but be SUPER careful:
-        //  min/max/clamp are BIZARRELY broken with lerp (optimization bug?),
-        //  and this redundant sequence avoids bugs, at least on nVidia cards:
-        const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0);
-        const float3 blend_ratio = lerp(blend_ratio_clamped, 1.0.xxx, bloom_excess);
-        //  Blend the blurred and unblurred images:
-        const float3 phosphor_emission_unclipped =
-            lerp(phosphor_emission, phosphor_blur_approx, blend_ratio);
-        //  Simulate refractive diffusion by reusing the halation sample.
-        const float3 pixel_color = lerp(phosphor_emission_unclipped,
-            halation_color, diffusion_weight);
-    #else
-        const float3 pixel_color = phosphor_emission_dim;
-    #endif
-    //  Encode if necessary, and output.
-    return encode_output(float4(pixel_color, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh
@@ -1,241 +0,0 @@
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-#undef FIRST_PASS
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//#include "../include/user-settings.fxh"
-//#include "../include/derived-settings-and-constants.fxh"
-#include "../include/bind-shader-params.fxh"
-#include "../include/scanline-functions.fxh"
-//#include "../include/gamma-management.fxh"
-
-/////////////////////////////////  STRUCTURES  /////////////////////////////////
-
-struct out_vertex_p1
-{
-    //  Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
-    float2 tex_uv                   : TEXCOORD1;
-    float2 uv_step                  : TEXCOORD2;    //  uv size of a texel (x) and scanline (y)
-    float2 il_step_multiple         : TEXCOORD3;    //  (1, 1) = progressive, (1, 2) = interlaced
-    float pixel_height_in_scanlines : TEXCOORD4;    //  Height of an output pixel in scanlines
-};
-
-
-////////////////////////////////  VERTEX SHADER  ///////////////////////////////
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_Scanlines_Vertical_Interlacing(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p1 OUT)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    OUT.tex_uv = texcoord;
-
-    float2 texture_size = VERTICAL_SCANLINES_texture_size;
-    float2 output_size  = float2(TEXTURE_SIZE.x, VIEWPORT_SIZE.y);
-
-    //  Detect interlacing: il_step_multiple indicates the step multiple between
-    //  lines: 1 is for progressive sources, and 2 is for interlaced sources.
-//    const float2 video_size = 1.0/NormalizedNativePixelSize;
-    const float y_step = 1.0 + float(is_interlaced(video_size.y));
-    OUT.il_step_multiple = float2(1.0, y_step);
-    //  Get the uv tex coords step between one texel (x) and scanline (y):
-    OUT.uv_step = OUT.il_step_multiple / texture_size;
-
-    //  If shader parameters are used, {min, max}_{sigma, shape} are runtime
-    //  values.  Compute {sigma, shape}_range outside of scanline_contrib() so
-    //  they aren't computed once per scanline (6 times per fragment and up to
-    //  18 times per vertex):
-/*    const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
-        beam_min_sigma;
-    const float shape_range = max(beam_max_shape, beam_min_shape) -
-        beam_min_shape;
-*/
-    //  We need the pixel height in scanlines for antialiased/integral sampling:
-    const float ph = (video_size.y / output_size.y) / 
-        OUT.il_step_multiple.y;
-    OUT.pixel_height_in_scanlines = ph;
-
-}
-
-
-///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
-
-float4 PS_Scanlines_Vertical_Interlacing(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p1 VAR) : SV_Target
-{
-    //  This pass: Sample multiple (misconverged?) scanlines to the final
-    //  vertical resolution.  Temporarily auto-dim the output to avoid clipping.
-
-    //  Read some attributes into local variables:
-    const float2 texture_size = VERTICAL_SCANLINES_texture_size;
-    const float2 texture_size_inv = 1.0/texture_size;
-    const float2 uv_step = VAR.uv_step;
-    const float2 il_step_multiple = VAR.il_step_multiple;
-    const float frame_count = FrameCount;
-    const float ph = VAR.pixel_height_in_scanlines;
-
-    //  Get the uv coords of the previous scanline (in this field), and the
-    //  scanline's distance from this sample, in scanlines.
-    float dist;
-    const float2 scanline_uv = get_last_scanline_uv(VAR.tex_uv, texture_size,
-        texture_size_inv, il_step_multiple, frame_count, dist);
-
-    //  Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next
-    //  scanlines are numbered 2 and 3.  Get scanline colors colors (ignore
-    //  horizontal sampling, since since IN.output_size.x = video_size.x).
-    //  NOTE: Anisotropic filtering creates interlacing artifacts, which is why
-    //  ORIG_LINEARIZED bobbed any interlaced input before this pass.
-    const float2 v_step = float2(0.0, uv_step.y);
-    const float3 scanline2_color = tex2D_linearize(ORIG_LINEARIZED, scanline_uv).rgb;
-    const float3 scanline3_color =
-        tex2D_linearize(ORIG_LINEARIZED, scanline_uv + v_step).rgb;
-    float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color,
-        scanline_outside_color;
-    float dist_round;
-    //  Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines:
-    if(beam_num_scanlines > 5.5)
-    {
-        scanline1_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
-        scanline4_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
-        scanline0_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - 2.0 * v_step).rgb;
-        scanline5_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 3.0 * v_step).rgb;
-    }
-    //  Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines:
-    else if(beam_num_scanlines > 4.5)
-    {
-        scanline1_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
-        scanline4_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
-        //  dist is in [0, 1]
-        dist_round = round(dist);
-        const float2 sample_0_or_5_uv_off =
-            lerp(-2.0 * v_step, 3.0 * v_step, dist_round);
-        //  Call this "scanline_outside_color" to cope with the conditional
-        //  scanline number:
-        scanline_outside_color = tex2D_linearize(
-            ORIG_LINEARIZED, scanline_uv + sample_0_or_5_uv_off).rgb;
-    }
-    //  Use scanlines 1 and 4 for a total of 4 scanlines:
-    else if(beam_num_scanlines > 3.5)
-    {
-        scanline1_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
-        scanline4_color =
-            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
-    }
-    //  Use scanline 1 or 4 for a total of 3 scanlines:
-    else if(beam_num_scanlines > 2.5)
-    {
-        //  dist is in [0, 1]
-        dist_round = round(dist);
-        const float2 sample_1or4_uv_off =
-            lerp(-v_step, 2.0 * v_step, dist_round);
-        scanline_outside_color = tex2D_linearize(
-            ORIG_LINEARIZED, scanline_uv + sample_1or4_uv_off).rgb;
-    }
-    
-    //  Compute scanline contributions, accounting for vertical convergence.
-    //  Vertical convergence offsets are in units of current-field scanlines.
-    //  dist2 means "positive sample distance from scanline 2, in scanlines:"
-    float3 dist2 = dist.xxx;
-    if(beam_misconvergence)
-    {
-        const float3 convergence_offsets_vert_rgb =
-            get_convergence_offsets_y_vector();
-        dist2 = dist.xxx - convergence_offsets_vert_rgb;
-    }
-    //  Calculate {sigma, shape}_range outside of scanline_contrib so it's only
-    //  done once per pixel (not 6 times) with runtime params.  Don't reuse the
-    //  vertex shader calculations, so static versions can be constant-folded.
-    const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
-        beam_min_sigma;
-    const float shape_range = max(beam_max_shape, beam_min_shape) -
-        beam_min_shape;
-    //  Calculate and sum final scanline contributions, starting with lines 2/3.
-    //  There is no normalization step, because we're not interpolating a
-    //  continuous signal.  Instead, each scanline is an additive light source.
-    const float3 scanline2_contrib = scanline_contrib(dist2,
-        scanline2_color, ph, sigma_range, shape_range);
-    const float3 scanline3_contrib = scanline_contrib(abs(1.0.xxx - dist2),
-        scanline3_color, ph, sigma_range, shape_range);
-    float3 scanline_intensity = scanline2_contrib + scanline3_contrib;
-    if(beam_num_scanlines > 5.5)
-    {
-        const float3 scanline0_contrib =
-            scanline_contrib(dist2 + 2.0.xxx, scanline0_color,
-                ph, sigma_range, shape_range);
-        const float3 scanline1_contrib =
-            scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
-                ph, sigma_range, shape_range);
-        const float3 scanline4_contrib =
-            scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
-                ph, sigma_range, shape_range);
-        const float3 scanline5_contrib =
-            scanline_contrib(abs(3.0.xxx - dist2), scanline5_color,
-                ph, sigma_range, shape_range);
-        scanline_intensity += scanline0_contrib + scanline1_contrib +
-            scanline4_contrib + scanline5_contrib;
-    }
-    else if(beam_num_scanlines > 4.5)
-    {
-        const float3 scanline1_contrib =
-            scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
-                ph, sigma_range, shape_range);
-        const float3 scanline4_contrib =
-            scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
-                ph, sigma_range, shape_range);
-        const float3 dist0or5 = lerp(
-            dist2 + 2.0.xxx, 3.0.xxx - dist2, dist_round);
-        const float3 scanline0or5_contrib = scanline_contrib(
-            dist0or5, scanline_outside_color, ph, sigma_range, shape_range);
-        scanline_intensity += scanline1_contrib + scanline4_contrib +
-            scanline0or5_contrib;
-    }
-    else if(beam_num_scanlines > 3.5)
-    {
-        const float3 scanline1_contrib =
-            scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
-                ph, sigma_range, shape_range);
-        const float3 scanline4_contrib =
-            scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
-                ph, sigma_range, shape_range);
-        scanline_intensity += scanline1_contrib + scanline4_contrib;
-    }
-    else if(beam_num_scanlines > 2.5)
-    {
-        const float3 dist1or4 = lerp(
-            dist2 + 1.0.xxx, 2.0.xxx - dist2, dist_round);
-        const float3 scanline1or4_contrib = scanline_contrib(
-            dist1or4, scanline_outside_color, ph, sigma_range, shape_range);
-        scanline_intensity += scanline1or4_contrib;
-    }
-
-    //  Auto-dim the image to avoid clipping, encode if necessary, and output.
-    //  My original idea was to compute a minimal auto-dim factor and put it in
-    //  the alpha channel, but it wasn't working, at least not reliably.  This
-    //  is faster anyway, levels_autodim_temp = 0.5 isn't causing banding.
-    return encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0));
-}
-
--- a/data/resources/shaders/reshade/Shaders/denoisers/bilateral.fx
+++ b/data/resources/shaders/reshade/Shaders/denoisers/bilateral.fx
@@ -1,166 +0,0 @@
-#include "ReShade.fxh"
-
-/*
-   Bilateral - Smart
-   
-   Copyright (C) 2024 guest(r)
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-   
-*/ 
-
-
-uniform float FRANGE <
-    ui_type = "drag";
-    ui_min = 1.0;
-    ui_max = 10.0;
-    ui_step = 1.0;
-    ui_label = "Filter Range";
-> = 5.0;
-
-uniform float FBSMOOTH <
-    ui_type = "drag";
-    ui_min = 0.05;
-    ui_max = 1.0;
-    ui_step = 0.025;
-    ui_label = "Filter Base Smoothing";
-> = 0.3;
-
-uniform float FSIGMA <
-    ui_type = "drag";
-    ui_min = 0.15;
-    ui_max = 1.5;
-    ui_step = 0.05;
-    ui_label = "Filter Strength";
-> = 1.0;
-
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
-uniform float2 ViewportSize < source = "viewportsize"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-texture2D tBilateral_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
-sampler2D sBilateral_P0{Texture=tBilateral_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
-
-#define FSIGMA1 (1.0/FSIGMA)
-
-#define COMPAT_TEXTURE(c,d) tex2D(c,d)
-
-float wt(float3 A, float3 B)
-{    
-    return clamp(FBSMOOTH - 2.33*dot(abs(A-B),1.0.xxx)/(dot(A+B,1.0.xxx)+1.0), 0.0, 0.25);
-}
-
-
-float getw(float x, float3 c, float3 p)
-{
-    float y = pow(max(1.0-x,0.0), FSIGMA1);
-    float d = wt(c,p);
-    return y*d;
-}
-
-
-
-float4 PS_Bilateral_X(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
-{
-    float4 SourceSize   = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio));
-//    float4 SourceSize   = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize);
-    float2 pos = vTexCoord * SourceSize.xy;
-    float f =  0.5-frac(pos.x);
-    float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
-    float2 dx  = float2(SourceSize.z, 0.0);
-    
-    float w, fp;
-    float wsum = 0.0;
-    float3 pixel;
-    float FPR = FRANGE;
-    float FPR1 = 1.0/FPR;
-    float LOOPSIZE = FPR;
-    float x = -FPR;
-
-    float3 comp = COMPAT_TEXTURE(sBackBuffer, tex).rgb;
-    float3 color = 0.0.xxx;
-    
-    do
-    {
-        pixel  = COMPAT_TEXTURE(sBackBuffer, tex + x*dx).rgb;        
-        fp = min(abs(x+f),FPR)*FPR1;
-        w = getw(fp,comp,pixel);            
-        color = color + w * pixel;
-        wsum   = wsum + w;
-
-        x = x + 1.0;
-        
-    } while (x <= LOOPSIZE);
-
-    color = color / wsum;
-
-    return float4(color, 1.0);
-}
-
-
-float4 PS_Bilateral_Y(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
-{
-    float4 SourceSize   = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio));
-    float2 pos = vTexCoord * SourceSize.xy;
-    float f =  0.5-frac(pos.y);
-    float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
-    float2 dy  = float2(0.0, SourceSize.w);
-    
-    float w, fp;
-    float wsum = 0.0;
-    float3 pixel;
-    float FPR = FRANGE;
-    float FPR1 = 1.0/FPR;
-    float LOOPSIZE = FPR;
-    float y = -FPR;
-
-    float3 comp = COMPAT_TEXTURE(sBilateral_P0, tex).rgb;
-    float3 color = 0.0.xxx;
-    
-    do
-    {
-        pixel  = COMPAT_TEXTURE(sBilateral_P0, tex + y*dy).rgb;        
-        fp = min(abs(y+f),FPR)*FPR1;
-        w = getw(fp,comp,pixel);            
-        color = color + w * pixel;
-        wsum   = wsum + w;
-
-        y = y + 1.0;
-        
-    } while (y <= LOOPSIZE);
-
-    color = color / wsum;
-
-    return float4(color, 1.0);
-}
-
-technique Bilateral
-{
-
-    pass
-    {
-        VertexShader = PostProcessVS;
-        PixelShader  = PS_Bilateral_X;
-        RenderTarget = tBilateral_P0;
-    }
-    pass
-    {
-        VertexShader = PostProcessVS;
-        PixelShader  = PS_Bilateral_Y;
-    }
-
-}
--- a/data/resources/shaders/reshade/Shaders/edge-smoothing/super-xbr.fx
+++ b/data/resources/shaders/reshade/Shaders/edge-smoothing/super-xbr.fx
@@ -32,7 +32,7 @@ uniform float XBR_EDGE_STR_P0 <
    ui_min = 0.0;
    ui_max = 5.0;
    ui_step = 0.5;
-    ui_label = "Xbr - Edge Strength";
+    ui_label = "Xbr - Edge Strength p0";
 > = 5.0;

 uniform float XBR_WEIGHT <
@@ -76,7 +76,7 @@ uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;

 texture2D tBackBufferY{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
-sampler2D sBackBufferY{Texture=tBackBufferY;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
+sampler2D sBackBufferY{Texture=tBackBufferY;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};

 texture2D tSuper_xBR_P0 < pooled = true; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
 sampler2D sSuper_xBR_P0{Texture=tSuper_xBR_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
@@ -87,11 +87,8 @@ sampler2D sSuper_xBR_P1{Texture=tSuper_xBR_P1;AddressU=CLAMP;AddressV=CLAMP;Addr
 texture2D tSuper_xBR_P2 < pooled = true; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
 sampler2D sSuper_xBR_P2{Texture=tSuper_xBR_P2;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};

-#define weight1 (XBR_WEIGHT*1.29633/10.0)
-#define weight2 (XBR_WEIGHT*1.75068/10.0/2.0)
-#define limits  (XBR_EDGE_STR_P0+0.000001)
+#define Y float3(.2126,.7152,.0722)

-static const float3 Y = float3(.2126,.7152,.0722);
 static const float wp0[6] = {2.0, 1.0, -1.0, 4.0, -1.0, 1.0};
 static const float wp1[6] = {1.0, 0.0,  0.0, 0.0,  0.0, 0.0};
 static const float wp2[6] = {0.0, 0.0,  0.0, 1.0,  0.0, 0.0};
@@ -156,11 +153,15 @@ float3 super_xbr(float wp[6], float4 P0, float4  B, float4  C, float4 P1, float4
    /* Calc edgeness in horizontal/vertical directions. */
    float hv_edge = (hv_wd(wp, f, i, e, h, c, i5, b, h5) - hv_wd(wp, e, f, h, i, d, f4, g, i4));

+    float limits = XBR_EDGE_STR_P0 + 0.000001;
    float edge_strength = smoothstep(0.0, limits, abs(d_edge));
    
    float4 w1, w2;
    float3 c3, c4;

+    float weight1 = (XBR_WEIGHT*1.29633/10.0);
+    float weight2 = (XBR_WEIGHT*1.75068/10.0/2.0);
+
    /* Filter weights. Two taps only. */
    w1 = float4(-weight1, weight1+0.50, weight1+0.50, -weight1);
    w2 = float4(-weight2, weight2+0.25, weight2+0.25, -weight2);
@@ -182,17 +183,15 @@ float3 super_xbr(float wp[6], float4 P0, float4  B, float4  C, float4 P1, float4
    return color;
 }

-float4 PS_BackBufferY(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
+float4 BackBufferY(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
 {
-    float2 tc = (floor(vTexCoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
-
-    float3 color = tex2D(ReShade::BackBuffer, tc).rgb;
+    float3 color = tex2D(ReShade::BackBuffer, vTexCoord.xy).rgb;

    return float4(color, luma(color));
 }


-float4 PS_Super_xBR_P0(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
+float4 Super_xBR_P0(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
 {
    float2 ps = NormalizedNativePixelSize;

@@ -224,7 +223,7 @@ float4 PS_Super_xBR_P0(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : S



-float4 PS_Super_xBR_P1(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
+float4 Super_xBR_P1(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
 {
    float2 ps = NormalizedNativePixelSize;

@@ -267,7 +266,7 @@ float4 PS_Super_xBR_P1(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : S
 }


-float4 PS_Super_xBR_P2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
+float4 Super_xBR_P2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
 {
    float2 ps = 0.5*NormalizedNativePixelSize;

@@ -326,7 +325,7 @@ float4 resampler(float4 x)
 }


-float4 PS_Jinc2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
+float4 Jinc2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
 {
    float2 ps = 0.5*NormalizedNativePixelSize;

@@ -392,33 +391,33 @@ float4 PS_Jinc2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Targe

 technique Super_xBR
 {
-    pass
+    pass PS_BackBufferY
    {
        VertexShader = PostProcessVS;
-        PixelShader  = PS_BackBufferY;
+        PixelShader  = BackBufferY;
        RenderTarget = tBackBufferY;
    }
-    pass
+    pass PS_Super_xBR_P0
    {
        VertexShader = PostProcessVS;
-        PixelShader  = PS_Super_xBR_P0;
+        PixelShader  = Super_xBR_P0;
        RenderTarget = tSuper_xBR_P0;
    }
-    pass
+    pass PS_Super_xBR_P1
    {
        VertexShader = PostProcessVS;
-        PixelShader  = PS_Super_xBR_P1;
+        PixelShader  = Super_xBR_P1;
        RenderTarget = tSuper_xBR_P1;
    }
-    pass
+    pass PS_Super_xBR_P2
    {
        VertexShader = PostProcessVS;
-        PixelShader  = PS_Super_xBR_P2;
+        PixelShader  = Super_xBR_P2;
        RenderTarget = tSuper_xBR_P2;
    }
-    pass
+    pass PS_Jinc2
    {
        VertexShader = PostProcessVS;
-        PixelShader  = PS_Jinc2;
+        PixelShader  = Jinc2;
    }
 }
--- a/data/resources/shaders/reshade/Shaders/interpolation/bicubic.fx
+++ b/data/resources/shaders/reshade/Shaders/interpolation/bicubic.fx
@@ -1,4 +1,4 @@
-#include "ReShade.fxh"
+#include "../ReShade.fxh"

 /*
   Bicubic multipass Shader
@@ -32,21 +32,15 @@ uniform int BICUBIC_FILTER <
 	ui_tooltip = "Bicubic: balanced. Catmull-Rom: sharp. B-Spline: blurred. Hermite: soft pixelized.";
 > = 0;

-uniform float B_PRESCALE <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 8.0;
-	ui_step = 1.0;
-	ui_label = "Prescale factor";
-> = 1.0;

 uniform bool B_ANTI_RINGING <
 	ui_type = "radio";
-	ui_label = "Anti-Ringing";
+	ui_label = "Bicubic Anti-Ringing";
 > = false;

 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float  BufferWidth < source = "bufferwidth"; >;
+uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
+uniform float2 ViewportSize < source = "viewportsize"; >;

 texture2D tBicubic_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
 sampler2D sBicubic_P0{Texture=tBicubic_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
@@ -92,18 +86,19 @@ float3 bicubic_ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3)
 }


-float4 PS_Bicubic_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
+float4 Bicubic_X(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
 {
    // Both dimensions are unfiltered, so it looks for lores pixels.
-    float2 ps  = NormalizedNativePixelSize/B_PRESCALE;
-    float2 pos = uv_tx.xy/ps - float2(0.5, 0.0);
-    float2 tc  = (floor(pos) + 0.5.xx) * ps;
-    float2 fp  = frac(pos);
+    float2 ps   = NormalizedNativePixelSize;
+    float2 posi = uv_tx.xy + ps * float2(0.5, 0.0);
+    float2 fp   = frac(posi / ps);

-    float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 0.0)).rgb;
-    float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 0.0)).rgb;
-    float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 0.0)).rgb;
-    float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 0.0)).rgb;
+    float2 tc  = posi - (fp + 0.5) * ps;
+
+    float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 1.0)).rgb;
+    float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 1.0)).rgb;
+    float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 1.0)).rgb;
+    float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 1.0)).rgb;

    float3 color = bicubic_ar(fp.x, C0, C1, C2, C3);

@@ -111,36 +106,38 @@ float4 PS_Bicubic_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Targ
 }


-float4 PS_Bicubic_Y(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
+float4 Bicubic_Y(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
 {
    // One must be careful here. Horizontal dimension is already filtered, so it looks for x in hires.
-    float2 ps  = float2(1.0/BufferWidth, NormalizedNativePixelSize.y/B_PRESCALE);
-    float2 pos = uv_tx.xy/ps - float2(0.0, 0.5);
-    float2 tc  = (floor(pos) + 0.5.xx) * ps;
-    float2 fp  = frac(pos);
+    float2 ps   = float2(1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedNativePixelSize.y);
+    float2 posi = uv_tx.xy + ps * float2(0.5, 0.5);
+    float2 fp   = frac(posi / ps);

-    float3 C0 = tex2D(sBicubic_P0, tc + ps*float2(0.0, -1.0)).rgb;
-    float3 C1 = tex2D(sBicubic_P0, tc + ps*float2(0.0,  0.0)).rgb;
-    float3 C2 = tex2D(sBicubic_P0, tc + ps*float2(0.0,  1.0)).rgb;
-    float3 C3 = tex2D(sBicubic_P0, tc + ps*float2(0.0,  2.0)).rgb;
+    float2 tc  = posi - (fp + 0.5) * ps;
+
+    float3 C0 = tex2D(sBicubic_P0, tc + ps*float2(1.0, -1.0)).rgb;
+    float3 C1 = tex2D(sBicubic_P0, tc + ps*float2(1.0,  0.0)).rgb;
+    float3 C2 = tex2D(sBicubic_P0, tc + ps*float2(1.0,  1.0)).rgb;
+    float3 C3 = tex2D(sBicubic_P0, tc + ps*float2(1.0,  2.0)).rgb;

    float3 color = bicubic_ar(fp.y, C0, C1, C2, C3);

    return float4(color, 1.0);
 }

-
 technique Bicubic
 {
-	pass
+
+	pass PS_Bicubic_X
 	{
 		VertexShader = PostProcessVS;
-		PixelShader  = PS_Bicubic_X;
+		PixelShader  = Bicubic_X;
 		RenderTarget = tBicubic_P0;
 	}
-	pass
+	pass PS_Bicubic_Y
 	{
 		VertexShader = PostProcessVS;
-		PixelShader  = PS_Bicubic_Y;
+		PixelShader  = Bicubic_Y;
 	}
+
 }
--- a/data/resources/shaders/reshade/Shaders/interpolation/lanczos3.fx
+++ b/data/resources/shaders/reshade/Shaders/interpolation/lanczos3.fx
@@ -28,14 +28,6 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 http://www.gnu.org/copyleft/gpl.html
 */

-uniform float L3_PRESCALE <
-	ui_type = "drag";
-	ui_min = 1.0;
-	ui_max = 8.0;
-	ui_step = 1.0;
-	ui_label = "Prescale factor";
-> = 1.0;
-

 uniform bool LANCZOS3_ANTI_RINGING <
 	ui_type = "radio";
@@ -43,12 +35,12 @@ uniform bool LANCZOS3_ANTI_RINGING <
 > = true;

 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float  BufferWidth < source = "bufferwidth"; >;
+uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
+uniform float2 ViewportSize < source = "viewportsize"; >;

 texture2D tLanczos3_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
 sampler2D sLanczos3_P0{Texture=tLanczos3_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};

-
 #define AR_STRENGTH 1.0
 #define FIX(c) (max(abs(c),1e-5))
 #define PI     3.1415926535897932384626433832795
@@ -56,10 +48,10 @@ sampler2D sLanczos3_P0{Texture=tLanczos3_P0;AddressU=CLAMP;AddressV=CLAMP;Addres

 float3 weight3(float x)
 {
-   float3 Sampling = FIX(2.0 * PI * float3(x - 1.5, x - 0.5, x + 0.5));
+   float3 Sample = FIX(2.0 * PI * float3(x - 1.5, x - 0.5, x + 0.5));

   // Lanczos3. Note: we normalize outside this function, so no point in multiplying by radius.
-   return sin(Sampling) * sin(Sampling / radius) / (Sampling * Sampling);
+   return sin(Sample) * sin(Sample / radius) / (Sample * Sample);
 }

 float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C4, float3 C5)
@@ -67,7 +59,7 @@ float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C
    float3 w1 = weight3(0.5 - fp * 0.5);
    float3 w2 = weight3(1.0 - fp * 0.5);

-    float sum   = dot(w1, 1.0.xxx) + dot(w2, 1.0.xxx);
+    float sum   = dot(  w1, float3(1.,1.,1.)) + dot(  w2, float3(1.,1.,1.));
    w1   /= sum;
    w2   /= sum;

@@ -87,21 +79,23 @@ float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C
 }


-
-float4 PS_Lanczos3_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
+float4 Lanczos3_X(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
 {
    // Both dimensions are unfiltered, so it looks for lores pixels.
-    float2 ps  = NormalizedNativePixelSize/L3_PRESCALE;
-    float2 pos = uv_tx.xy/ps - float2(0.5, 0.0);
-    float2 tc  = (floor(pos) + 0.5.xx) * ps;
-    float2 fp  = frac(pos);
+    float2 ps = NormalizedNativePixelSize;
+    float2 posi = uv_tx.xy + ps * float2(0.5, 0.0);
+    float2 fp = frac(posi / ps);

-    float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-2.0, 0.0)).rgb;
-    float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 0.0)).rgb;
-    float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 0.0)).rgb;
-    float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 0.0)).rgb;
-    float3 C4 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 0.0)).rgb;
-    float3 C5 = tex2D(ReShade::BackBuffer, tc + ps*float2( 3.0, 0.0)).rgb;
+    float2 xystart = posi - (fp + 0.5) * ps;
+
+    float ypos = xystart.y + ps.y;
+
+    float3 C0 = tex2D(ReShade::BackBuffer, float2(xystart.x - ps.x * 2.0, ypos)).rgb;
+    float3 C1 = tex2D(ReShade::BackBuffer, float2(xystart.x - ps.x * 1.0, ypos)).rgb;
+    float3 C2 = tex2D(ReShade::BackBuffer, float2(xystart.x             , ypos)).rgb;
+    float3 C3 = tex2D(ReShade::BackBuffer, float2(xystart.x + ps.x * 1.0, ypos)).rgb;
+    float3 C4 = tex2D(ReShade::BackBuffer, float2(xystart.x + ps.x * 2.0, ypos)).rgb;
+    float3 C5 = tex2D(ReShade::BackBuffer, float2(xystart.x + ps.x * 3.0, ypos)).rgb; 

    float3 color = lanczos3ar(fp.x, C0, C1, C2, C3, C4, C5);

@@ -109,38 +103,42 @@ float4 PS_Lanczos3_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Tar
 }


-float4 PS_Lanczos3_Y(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
+float4 Lanczos3_Y(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
 {
    // One must be careful here. Horizontal dimension is already filtered, so it looks for x in hires.
-    float2 ps  = float2(1.0/BufferWidth, NormalizedNativePixelSize.y/L3_PRESCALE);
-    float2 pos = uv_tx.xy/ps - float2(0.0, 0.5);
-    float2 tc  = (floor(pos) + 0.5.xx) * ps;
-    float2 fp  = frac(pos);
+    float2 ps = float2(1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedNativePixelSize.y);
+    float2 posi = uv_tx.xy + ps * float2(0.5, 0.5);
+    float2 fp = frac(posi / ps);

-    float3 C0 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -2.0)).rgb;
-    float3 C1 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -1.0)).rgb;
-    float3 C2 = tex2D(sLanczos3_P0, tc + ps*float2(0.0,  0.0)).rgb;
-    float3 C3 = tex2D(sLanczos3_P0, tc + ps*float2(0.0,  1.0)).rgb;
-    float3 C4 = tex2D(sLanczos3_P0, tc + ps*float2(0.0,  2.0)).rgb;
-    float3 C5 = tex2D(sLanczos3_P0, tc + ps*float2(0.0,  3.0)).rgb;
+    float2 xystart = posi - (fp + 0.5) * ps;
+
+    float xpos = xystart.x  + ps.x;
+
+    float3 C0 = tex2D(sLanczos3_P0, float2(xpos, xystart.y - ps.y * 2.0)).rgb;
+    float3 C1 = tex2D(sLanczos3_P0, float2(xpos, xystart.y - ps.y * 1.0)).rgb;
+    float3 C2 = tex2D(sLanczos3_P0, float2(xpos, xystart.y             )).rgb;
+    float3 C3 = tex2D(sLanczos3_P0, float2(xpos, xystart.y + ps.y * 1.0)).rgb;
+    float3 C4 = tex2D(sLanczos3_P0, float2(xpos, xystart.y + ps.y * 2.0)).rgb;
+    float3 C5 = tex2D(sLanczos3_P0, float2(xpos, xystart.y + ps.y * 3.0)).rgb; 

    float3 color = lanczos3ar(fp.y, C0, C1, C2, C3, C4, C5);

    return float4(color, 1.0);
 }

-
 technique Lanczos3
 {
-	pass
+
+	pass PS_Lanczos3_X
 	{
 		VertexShader = PostProcessVS;
-		PixelShader  = PS_Lanczos3_X;
+		PixelShader  = Lanczos3_X;
 		RenderTarget = tLanczos3_P0;
 	}
-	pass
+	pass PS_Lanczos3_Y
 	{
 		VertexShader = PostProcessVS;
-		PixelShader  = PS_Lanczos3_Y;
+		PixelShader  = Lanczos3_Y;
 	}
+
 }
--- a/data/resources/shaders/reshade/Shaders/misc/geom.fx
+++ b/data/resources/shaders/reshade/Shaders/misc/geom.fx
@@ -32,8 +32,11 @@



-uniform bool geom_curvature <
-	ui_type = "radio";
+uniform float geom_curvature <
+	ui_type = "drag";
+	ui_min = 0.0;
+	ui_max = 1.0;
+	ui_step = 1.0;
 	ui_label = "Geom Curvature Toggle";
 > = 1.0;

@@ -53,8 +56,11 @@ uniform float geom_d <
 	ui_label = "Geom Distance";
 > = 1.5;

-uniform bool geom_invert_aspect <
-	ui_type = "radio";
+uniform float geom_invert_aspect <
+	ui_type = "drag";
+	ui_min = 0.0;
+	ui_max = 1.0;
+	ui_step = 1.0;
 	ui_label = "Geom Curvature Aspect Inversion";
 > = 0.0;

@@ -76,16 +82,16 @@ uniform float geom_cornersmooth <

 uniform float geom_x_tilt <
 	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
+	ui_min = -0.5;
+	ui_max = 0.5;
 	ui_step = 0.05;
 	ui_label = "Geom Horizontal Tilt";
 > = 0.0;

 uniform float geom_y_tilt <
 	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
+	ui_min = -0.5;
+	ui_max = 0.5;
 	ui_step = 0.05;
 	ui_label = "Geom Vertical Tilt";
 > = 0.0;
@@ -106,22 +112,6 @@ uniform float geom_overscan_y <
 	ui_label = "Geom Vert. Overscan %";
 > = 100.0;

-uniform float centerx <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 0.1;
-	ui_label = "Image Center X";
-> = 0.00;
-
-uniform float centery <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 0.1;
-	ui_label = "Image Center Y";
-> = 0.00;
-
 uniform float geom_lum <
 	ui_type = "drag";
 	ui_min = 0.5;
@@ -147,13 +137,9 @@ uniform float geom_monitor_gamma <
 > = 2.2;


-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
+uniform float2 BufferViewportRatio < source = "buffer_to_viewport_ratio"; >;
 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
 uniform float2 ViewportSize < source = "viewportsize"; >;
-uniform float  ViewportWidth < source = "viewportwidth"; >;
-uniform float  ViewportHeight < source = "viewportheight"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};

 // Comment the next line to disable interpolation in linear gamma (and
 // gain speed).
@@ -170,14 +156,14 @@ sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BO
 #define PI 3.141592653589

 #ifdef LINEAR_PROCESSING
-#       define TEX2D(c) pow(tex2D(sBackBuffer, (c)), geom_target_gamma.xxxx)
+#       define TEX2D(c) pow(tex2D(ReShade::BackBuffer, (c)), float4(geom_target_gamma,geom_target_gamma,geom_target_gamma,geom_target_gamma))
 #else
-#       define TEX2D(c) tex2D(sBackBuffer, (c))
+#       define TEX2D(c) tex2D(ReShade::BackBuffer, (c))
 #endif

 // aspect ratio
-#define aspect     (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
-#define overscan   (1.01.xx);
+#define aspect     (geom_invert_aspect>0.5?float2(0.75,1.0):float2(1.0,0.75))
+#define overscan   (float2(1.01,1.01));


 struct ST_VertexOut
@@ -189,21 +175,94 @@ struct ST_VertexOut
 };


+float vs_intersect(float2 xy, float2 sinangle, float2 cosangle)
+{
+    float A = dot(xy,xy) + geom_d*geom_d;
+    float B = 2.0*(geom_R*(dot(xy,sinangle)-geom_d*cosangle.x*cosangle.y)-geom_d*geom_d);
+    float C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
+    
+    return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
+}
+
+float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
+{
+    float c     = vs_intersect(xy, sinangle, cosangle);
+    float2 point  = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
+    float2 poc    = point/cosangle;
+    
+    float2 tang   = sinangle/cosangle;
+    float A     = dot(tang, tang) + 1.0;
+    float B     = -2.0*dot(poc, tang);
+    float C     = dot(poc, poc) - 1.0;
+    
+    float a     = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A);
+    float2 uv     = (point - a*sinangle)/cosangle;
+    float r     = FIX(geom_R*acos(a));
+    
+    return uv*r/sin(r/geom_R);
+}
+
+float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle)
+{
+    float r = FIX(sqrt(dot(uv,uv)));
+    uv *= sin(r/geom_R)/r;
+    float x = 1.0-cos(r/geom_R);
+    float D = geom_d/geom_R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
+    
+    return geom_d*(uv*cosangle-x*sinangle)/D;
+}
+
+float3 vs_maxscale(float2 sinangle, float2 cosangle)
+{
+    float2 c  = vs_bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
+    float2 a  = float2(0.5,0.5)*aspect;
+    
+    float2 lo = float2(vs_fwtrans(float2(-a.x,  c.y), sinangle, cosangle).x,
+                   vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
+
+    float2 hi = float2(vs_fwtrans(float2(+a.x,  c.y), sinangle, cosangle).x,
+                   vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
+    
+    return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
+}
+
+
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+ //   float2 SourceSize = 1.0/NormalizedNativePixelSize;
+    float2 SourceSize = ViewportSize*BufferViewportRatio;
+
+    // Precalculate a bunch of useful values we'll need in the fragment
+    // shader.
+    vVARS.sinangle    = sin(float2(geom_x_tilt, geom_y_tilt));
+    vVARS.cosangle    = cos(float2(geom_x_tilt, geom_y_tilt));
+    vVARS.stretch     = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
+    vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
+}
+
+
+
 float intersect(float2 xy, float2 sinangle, float2 cosangle)
 {
    float A = dot(xy,xy) + geom_d*geom_d;
    float B, C;

-    B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
-    C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
+       B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
+       C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;

    return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
 }

 float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
 {
-    float  c      = intersect(xy, sinangle, cosangle);
-    float2 point  = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx;
+    float c     = intersect(xy, sinangle, cosangle);
+    float2 point  = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
    float2 poc    = point/cosangle;
    float2 tang   = sinangle/cosangle;

@@ -212,7 +271,7 @@ float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
    float C     = dot(poc, poc) - 1.0;

    float a     = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
-    float2 uv   = (point - a*sinangle) / cosangle;
+    float2 uv     = (point - a*sinangle) / cosangle;
    float r     = FIX(geom_R*acos(a));
    
    return uv*r/sin(r/geom_R);
@@ -225,91 +284,74 @@ float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
    float x = 1.0 - cos(r/geom_R);
    float D;
    
-    D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
+      D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);

    return geom_d*(uv*cosangle - x*sinangle)/D;
 }

 float3 maxscale(float2 sinangle, float2 cosangle)
 {
-    float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
-    float2 a = 0.5.xx*aspect;
+       float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
+       float2 a = float2(0.5, 0.5)*aspect;

-    float2 lo = float2(fwtrans(float2(-a.x,  c.y), sinangle, cosangle).x,
-                       fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
-    float2 hi = float2(fwtrans(float2(+a.x,  c.y), sinangle, cosangle).x,
-                       fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
+       float2 lo = float2(fwtrans(float2(-a.x,  c.y), sinangle, cosangle).x,
+                      fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
+       float2 hi = float2(fwtrans(float2(+a.x,  c.y), sinangle, cosangle).x,
+                      fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;

-    return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
+       return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
 }

 float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
 {
-    coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy;
+    coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy;
    
    return (bkwtrans(coord, sinangle, cosangle) /
-        float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx);
+        float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + float2(0.5, 0.5));
 }

-
-// Vertex shader generating a triangle covering the entire screen
-void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    // Screen centering
-    texcoord = texcoord - float2(centerx,centery)/100.0;
-
-    float2 SourceSize = 1.0/NormalizedNativePixelSize;
-
-    // Precalculate a bunch of useful values we'll need in the fragment
-    // shader.
-    vVARS.sinangle    = sin(float2(geom_x_tilt, geom_y_tilt));
-    vVARS.cosangle    = cos(float2(geom_x_tilt, geom_y_tilt));
-    vVARS.stretch     = maxscale(vVARS.sinangle, vVARS.cosangle);
-    vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
-}
-
-
 float corner(float2 coord)
 {
-           coord = min(coord, 1.0.xx - coord) * aspect;
-    float2 cdist = geom_cornersize.xx;
-           coord = (cdist - min(coord, cdist));
-    float   dist = sqrt(dot(coord, coord));
+    coord = (coord - float2(0.5, 0.5)) * float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0) + float2(0.5, 0.5);
+    coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
+    float2 cdist = float2(geom_cornersize, geom_cornersize);
+    coord = (cdist - min(coord, cdist));
+    float dist = sqrt(dot(coord, coord));
    
-    return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
+      return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
 }

-float fwidth(float value)
-{
-    return abs(ddx(value)) + abs(ddy(value));
+float fwidth(float value){
+  return abs(ddx(value)) + abs(ddy(value));
 }


 float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
 {
    // Texture coordinates of the texel containing the active pixel.
-    float2 xy = (geom_curvature == true) ? transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch) : vTexCoord;
+    float2 xy;

-    float cval = corner((xy-0.5.xx) * BufferToViewportRatio + 0.5.xx);
+    if (geom_curvature > 0.5)
+      xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
+    else
+      xy = vTexCoord;

-    float2 uv_ratio = frac((xy * vVARS.TextureSize - 0.5.xx) / vVARS.TextureSize);
+    float cval = corner(xy);
+
+    float2 uv_ratio = frac((xy * vVARS.TextureSize - float2(0.5, 0.5)) / vVARS.TextureSize);

    float4 col = TEX2D(xy);

 #ifndef LINEAR_PROCESSING
-    col  = pow(col, geom_target_gamma.xxxx);
+    col  = pow(col , float4(geom_target_gamma, geom_target_gamma, geom_target_gamma, geom_target_gamma));
 #endif

    col.rgb *= (geom_lum * step(0.0, uv_ratio.y));

-    float3 mul_res = col.rgb * cval.xxx;
+    float3 mul_res = col.rgb * float3(cval, cval, cval);

    // Convert the image gamma for display on our output device.
-    mul_res = pow(mul_res, 1.0 / geom_monitor_gamma.xxx);
+    mul_res = pow(mul_res, float3(1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma));

    return float4(mul_res, 1.0);
 }
--- a/data/resources/shaders/reshade/Shaders/misc/include/geom.fxh
+++ b/data/resources/shaders/reshade/Shaders/misc/include/geom.fxh
@@ -1,224 +0,0 @@
-#ifndef GEOM_PARAMS_H
-#define GEOM_PARAMS_H
-
-/*
-    Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
-    into any other shaders and provide curvature/warping/oversampling features.
-
-    Adapted by Hyllian (2024).
-*/
-
-
-/*
-    CRT-interlaced
-
-    Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
-
-    This program is free software; you can redistribute it and/or modify it
-    under the terms of the GNU General Public License as published by the Free
-    Software Foundation; either version 2 of the License, or (at your option)
-    any later version.
-
-    (cgwg gave their consent to have the original version of this shader
-    distributed under the GPL in this message:
-
-    http://board.byuu.org/viewtopic.php?p=26075#p26075
-
-    "Feel free to distribute my shaders under the GPL. After all, the
-    barrel distortion code was taken from the Curvature shader, which is
-    under the GPL."
-    )
-    This shader variant is pre-configured with screen curvature
-*/
-
-
-uniform bool geom_curvature <
-	ui_type = "radio";
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Curvature Toggle";
-> = 0.0;
-
-uniform float geom_R <
-	ui_type = "drag";
-	ui_min = 0.1;
-	ui_max = 10.0;
-	ui_step = 0.1;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Curvature Radius";
-> = 2.0;
-
-uniform float geom_d <
-	ui_type = "drag";
-	ui_min = 0.1;
-	ui_max = 3.0;
-	ui_step = 0.1;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Distance";
-> = 1.5;
-
-uniform bool geom_invert_aspect <
-	ui_type = "radio";
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Curvature Aspect Inversion";
-> = 0.0;
-
-uniform float geom_cornersize <
-	ui_type = "drag";
-	ui_min = 0.001;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Corner Size";
-> = 0.03;
-
-uniform float geom_cornersmooth <
-	ui_type = "drag";
-	ui_min = 80.0;
-	ui_max = 2000.0;
-	ui_step = 100.0;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Corner Smoothness";
-> = 1000.0;
-
-uniform float geom_x_tilt <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.05;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Horizontal Tilt";
-> = 0.0;
-
-uniform float geom_y_tilt <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.05;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Vertical Tilt";
-> = 0.0;
-
-uniform float geom_overscan_x <
-	ui_type = "drag";
-	ui_min = -125.0;
-	ui_max = 125.0;
-	ui_step = 0.5;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Horiz. Overscan %";
-> = 100.0;
-
-uniform float geom_overscan_y <
-	ui_type = "drag";
-	ui_min = -125.0;
-	ui_max = 125.0;
-	ui_step = 0.5;
-	ui_category = "Geom Curvature";
-	ui_label = "Geom Vert. Overscan %";
-> = 100.0;
-
-uniform float centerx <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 0.1;
-	ui_category = "Geom Curvature";
-	ui_label = "Image Center X";
-> = 0.00;
-
-uniform float centery <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 0.1;
-	ui_category = "Geom Curvature";
-	ui_label = "Image Center Y";
-> = 0.00;
-
-
-
-// Macros.
-#define FIX(c) max(abs(c), 1e-5);
-
-// aspect ratio
-#define aspect     (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
-
-
-float intersect(float2 xy, float2 sinangle, float2 cosangle)
-{
-    float A = dot(xy,xy) + geom_d*geom_d;
-    float B, C;
-
-    B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
-    C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
-
-    return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
-}
-
-float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
-{
-    float  c      = intersect(xy, sinangle, cosangle);
-    float2 point  = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx;
-    float2 poc    = point/cosangle;
-    float2 tang   = sinangle/cosangle;
-
-    float A     = dot(tang, tang) + 1.0;
-    float B     = -2.0*dot(poc, tang);
-    float C     = dot(poc, poc) - 1.0;
-
-    float a     = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
-    float2 uv   = (point - a*sinangle) / cosangle;
-    float r     = FIX(geom_R*acos(a));
-    
-    return uv*r/sin(r/geom_R);
-}
-
-float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
-{
-    float r = FIX(sqrt(dot(uv, uv)));
-    uv *= sin(r/geom_R)/r;
-    float x = 1.0 - cos(r/geom_R);
-    float D;
-    
-    D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
-
-    return geom_d*(uv*cosangle - x*sinangle)/D;
-}
-
-float3 maxscale(float2 sinangle, float2 cosangle)
-{
-    float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
-    float2 a = 0.5.xx*aspect;
-
-    float2 lo = float2(fwtrans(float2(-a.x,  c.y), sinangle, cosangle).x,
-                       fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
-    float2 hi = float2(fwtrans(float2(+a.x,  c.y), sinangle, cosangle).x,
-                       fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
-
-    return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
-}
-
-float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
-{
-    coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy;
-    
-    return (bkwtrans(coord, sinangle, cosangle) /
-        float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx);
-}
-
-
-float corner(float2 coord)
-{
-           coord = min(coord, 1.0.xx - coord) * aspect;
-    float2 cdist = geom_cornersize.xx;
-           coord = (cdist - min(coord, cdist));
-    float   dist = sqrt(dot(coord, coord));
-    
-    return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
-}
-
-float fwidth(float value)
-{
-    return abs(ddx(value)) + abs(ddy(value));
-}
-
-#endif  //  GEOM_PARAMS_H
--- a/data/resources/shaders/reshade/Shaders/misc/include/mask.fxh
+++ b/data/resources/shaders/reshade/Shaders/misc/include/mask.fxh
@@ -1,242 +0,0 @@
-#ifndef MASK_PARAMS_H
-#define MASK_PARAMS_H
-
-uniform float MASK_DARK_STRENGTH <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.0;
-    ui_step = 0.01;
-    ui_category = "CRT Mask";
-    ui_label = "MASK DARK SUBPIXEL STRENGTH";
-> = 0.5;
-
-uniform float MASK_LIGHT_STRENGTH <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 6.0;
-    ui_step = 0.01;
-    ui_category = "CRT Mask";
-    ui_label = "MASK LIGHT SUBPIXEL STRENGTH";
-> = 0.5;
-
-/* Mask code pasted from subpixel_masks.h. Masks 3 and 4 added. */
-float3 mask_weights(float2 coord, int phosphor_layout, float monitor_subpixels, float mask_light_str, float mask_dark_str){
-   float3 weights = float3(1.,1.,1.);
-   float on = 1.+mask_light_str;
-//   float on = 1.;
-   float off = 1.-mask_dark_str;
-   float3 red     = monitor_subpixels==1.0 ? float3(on,  off, off) : float3(off, off, on );
-   float3 green   = float3(off, on,  off);
-   float3 blue    = monitor_subpixels==1.0 ? float3(off, off, on ) : float3(on,  off, off);
-   float3 magenta = float3(on,  off, on );
-   float3 yellow  = monitor_subpixels==1.0 ? float3(on,  on,  off) : float3(off, on,  on );
-   float3 cyan    = monitor_subpixels==1.0 ? float3(off, on,  on ) : float3(on,  on,  off);
-   float3 black   = float3(off, off, off);
-   float3 white   = float3(on,  on,  on );
-   int w, z = 0;
-   
-   // This pattern is used by a few layouts, so we'll define it here
-   float3 aperture_weights = lerp(magenta, green, floor(coord.x % 2.0));
-   
-   if(phosphor_layout == 0) return weights;
-
-   else if(phosphor_layout == 1){
-      // classic aperture for RGB panels; good for 1080p, too small for 4K+
-      // aka aperture_1_2_bgr
-      weights  = aperture_weights;
-      return weights;
-   }
-
-   else if(phosphor_layout == 2){
-      // Classic RGB layout; good for 1080p and lower
-      float3 bw3[3] = {red, green, blue};
-//      float3 bw3[3] = float3[](black, yellow, blue);
-      
-      z = int(floor(coord.x % 3.0));
-      
-      weights = bw3[z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 3){
-      // black and white aperture; good for weird subpixel layouts and low brightness; good for 1080p and lower
-      float3 bw3[3] = {black, white, black};
-      
-      z = int(floor(coord.x % 3.0));
-      
-      weights = bw3[z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 4){
-      // reduced TVL aperture for RGB panels. Good for 4k.
-      // aperture_2_4_rgb
-      
-      float3 big_ap_rgb[4] = {red, yellow, cyan, blue};
-      
-      w = int(floor(coord.x % 4.0));
-      
-      weights = big_ap_rgb[w];
-      return weights;
-   }
-   
-   else if(phosphor_layout == 5){
-      // black and white aperture; good for weird subpixel layouts and low brightness; good for 4k 
-      float3 bw4[4] = {black, black, white, white};
-      
-      z = int(floor(coord.x % 4.0));
-      
-      weights = bw4[z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 6){
-      // aperture_1_4_rgb; good for simulating lower 
-      float3 ap4[4] = {red, green, blue, black};
-      
-      z = int(floor(coord.x % 4.0));
-      
-      weights = ap4[z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 7){
-      // 2x2 shadow mask for RGB panels; good for 1080p, too small for 4K+
-      // aka delta_1_2x1_bgr
-      float3 inverse_aperture = lerp(green, magenta, floor(coord.x % 2.0));
-      weights               = lerp(aperture_weights, inverse_aperture, floor(coord.y % 2.0));
-      return weights;
-   }
-
-   else if(phosphor_layout == 8){
-      // delta_2_4x1_rgb
-      float3 delta[8] = {
-         red, yellow, cyan, blue,
-         cyan, blue, red, yellow
-      };
-      
-      w = int(floor(coord.y % 2.0));
-      z = int(floor(coord.x % 4.0));
-      
-      weights = delta[4*w+z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 9){
-      // delta_1_4x1_rgb; dunno why this is called 4x1 when it's obviously 4x2 /shrug
-      float3 delta1[8] = {
-         red,  green, blue, black,
-         blue, black, red,  green
-      };
-      
-      w = int(floor(coord.y % 2.0));
-      z = int(floor(coord.x % 4.0));
-      
-      weights = delta1[4*w+z];
-      return weights;
-   }
-   
-   else if(phosphor_layout == 10){
-      // delta_2_4x2_rgb
-      float3 delta[16] = {
-         red,  yellow, cyan, blue,
-         red,  yellow, cyan, blue,
-         cyan, blue,   red,  yellow,
-         cyan, blue,   red,  yellow
-      };
-      
-      w = int(floor(coord.y % 4.0));
-      z = int(floor(coord.x % 4.0));
-      
-      weights = delta[4*w+z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 11){
-      // slot mask for RGB panels; looks okay at 1080p, looks better at 4K
-      float3 slotmask[24] = {
-         red, green, blue,    red, green, blue,
-         red, green, blue,  black, black, black,
-         red, green, blue,    red, green, blue,
-         black, black, black, red, green, blue,
-      };
-      
-      w = int(floor(coord.y % 4.0));
-      z = int(floor(coord.x % 6.0));
-
-      // use the indexes to find which color to apply to the current pixel
-      weights = slotmask[6*w+z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 12){
-      // slot mask for RGB panels; looks okay at 1080p, looks better at 4K
-      float3 slotmask[24] = {
-         black,  white, black,   black,  white, black,
-         black,  white, black,  black, black, black,
-         black,  white, black,  black,  white, black,
-         black, black, black,  black,  white, black
-      };
-      
-      w = int(floor(coord.y % 4.0));
-      z = int(floor(coord.x % 6.0));
-
-      // use the indexes to find which color to apply to the current pixel
-      weights = slotmask[6*w+z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 13){
-      // based on MajorPainInTheCactus' HDR slot mask
-      float3 slot[32] = {
-         red,   green, blue,  black, red,   green, blue,  black,
-         red,   green, blue,  black, black, black, black, black,
-         red,   green, blue,  black, red,   green, blue,  black,
-         black, black, black, black, red,   green, blue,  black
-      };
-      
-      w = int(floor(coord.y % 4.0));
-      z = int(floor(coord.x % 8.0));
-      
-      weights = slot[8*w+z];
-      return weights;
-   }
-
-   else if(phosphor_layout == 14){
-      // same as above but for RGB panels
-      float3 slot2[40] = {
-         red,   yellow, green, blue,  blue,  red,   yellow, green, blue,  blue ,
-         black, green,  green, blue,  blue,  red,   red,    black, black, black,
-         red,   yellow, green, blue,  blue,  red,   yellow, green, blue,  blue ,
-         red,   red,    black, black, black, black, green,  green, blue,  blue 
-      };
-   
-      w = int(floor(coord.y % 4.0));
-      z = int(floor(coord.x % 10.0));
-      
-      weights = slot2[10*w+z];
-      return weights;
-   }
-   
-   else if(phosphor_layout == 15){
-      // slot_3_7x6_rgb
-      float3 slot[84] = {
-         red,   red,   yellow, green, cyan,  blue,  blue,  red,   red,   yellow, green,  cyan,  blue,  blue,
-         red,   red,   yellow, green, cyan,  blue,  blue,  red,   red,   yellow, green,  cyan,  blue,  blue,
-         red,   red,   yellow, green, cyan,  blue,  blue,  black, black, black,  black,  black, black, black,
-         red,   red,   yellow, green, cyan,  blue,  blue,  red,   red,   yellow, green,  cyan,  blue,  blue,
-         red,   red,   yellow, green, cyan,  blue,  blue,  red,   red,   yellow, green,  cyan,  blue,  blue,
-         black, black, black,  black, black, black, black, black, red,   red,    yellow, green, cyan,  blue
-      };
-      
-      w = int(floor(coord.y % 6.0));
-      z = int(floor(coord.x % 14.0));
-      
-      weights = slot[14*w+z];
-      return weights;
-   }
-
-   else return weights;
-}
-
-#endif  //  MASK_PARAMS_H
--- a/data/resources/shaders/reshade/Shaders/ntsc/ntsc-adaptive-lite.fx
+++ b/data/resources/shaders/reshade/Shaders/ntsc/ntsc-adaptive-lite.fx
@@ -1,437 +0,0 @@
-#include "ReShade.fxh"
-
-// NTSC-Adaptive-Lite  -  Faster for 2-Phase games (only 15 taps!)
-// based on Themaister's NTSC shader
-
-
-uniform int quality <
-    ui_type = "combo";
-    ui_items = "Custom\0Svideo\0Composite\0RF\0";
-    ui_label = "NTSC Preset";
-> = 2;
-
-uniform bool ntsc_fields <
-    ui_type = "radio";
-    ui_label = "NTSC Merge Fields";
-> = false;
-
-uniform int ntsc_phase <
-    ui_type = "combo";
-    ui_items = "Auto\0(2-Phase)\0(3-Phase)\0";
-    ui_label = "NTSC Phase";
-> = 0;
-
-uniform float ntsc_scale <
-    ui_type = "drag";
-    ui_min = 0.20;
-    ui_max = 3.0;
-    ui_step = 0.05;
-    ui_label = "NTSC Resolution Scaling";
-> = 1.0;
-
-uniform float ntsc_sat <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 2.0;
-    ui_step = 0.01;
-    ui_label = "NTSC Color Saturation";
-> = 1.0;
-
-uniform float ntsc_bright <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 1.5;
-    ui_step = 0.01;
-    ui_label = "NTSC Brightness";
-> = 1.0;
-
-uniform float cust_fringing <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 5.0;
-    ui_step = 0.1;
-    ui_label = "NTSC Custom Fringing Value";
-> = 0.0;
-
-uniform float cust_artifacting <
-    ui_type = "drag";
-    ui_min = 0.0;
-    ui_max = 5.0;
-    ui_step = 0.1;
-    ui_label = "NTSC Custom Artifacting Value";
-> = 0.0;
-
-uniform float chroma_scale <
-    ui_type = "drag";
-    ui_min = 0.2;
-    ui_max = 4.0;
-    ui_step = 0.1;
-    ui_label = "NTSC Chroma Scaling";
-> = 1.0;
-
-uniform float ntsc_artifacting_rainbow <
-    ui_type = "drag";
-    ui_min = -1.0;
-    ui_max = 1.0;
-    ui_step = 0.1;
-    ui_label = "NTSC Artifacting Rainbow Effect";
-> = 0.0;
-
-uniform bool linearize <
-    ui_type = "radio";
-    ui_label = "NTSC Linearize Output Gamma";
-> = false;
-
-
-uniform float  FrameCount < source = "framecount"; >;
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float  BufferWidth < source = "bufferwidth"; >;
-uniform float  BufferHeight < source = "bufferheight"; >;
-
-
-// RGB16f is the same as float_framebuffer.
-texture2D tNTSC_P0 < pooled = false; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
-sampler2D sNTSC_P0{Texture=tNTSC_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
-
-#define PI 3.14159265
-#define OutputSize float2(BufferWidth,BufferHeight)
-
-struct ST_VertexOut
-{
-    float2 pix_no          : TEXCOORD1;
-    float  phase           : TEXCOORD2;
-    float  BRIGHTNESS      : TEXCOORD3;
-    float  SATURATION      : TEXCOORD4;
-    float  FRINGING        : TEXCOORD5;
-    float  ARTIFACTING     : TEXCOORD6;
-    float  CHROMA_MOD_FREQ : TEXCOORD7;
-    float  MERGE           : TEXCOORD8;
-};
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_NTSC_ADAPTIVE_P0(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 TexCoord : TEXCOORD, out ST_VertexOut vVARS)
-{
-    TexCoord.x = (id == 2) ? 2.0 : 0.0;
-    TexCoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(TexCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float res = ntsc_scale;
-    float OriginalSize = 1.0/NormalizedNativePixelSize.x;
-    float2 SourceSize  = 1.0/NormalizedNativePixelSize;
-
-    if (res < 1.0) vVARS.pix_no = TexCoord * SourceSize.xy * (res * OutputSize.xy / SourceSize.xy); else
-                   vVARS.pix_no = TexCoord * SourceSize.xy * (      OutputSize.xy / SourceSize.xy);
-    vVARS.phase = (ntsc_phase < 1) ? ((OriginalSize > 300.0) ? 2.0 : 3.0) : ((ntsc_phase > 2) ? 3.0 : 2.0);
-    
-    float Quality = float(quality-1);
-
-    res = max(res, 1.0);    
-    vVARS.CHROMA_MOD_FREQ = (vVARS.phase < 2.5) ? (4.0 * PI / 15.0) : (PI / 3.0);
-    vVARS.ARTIFACTING = (Quality > -0.5) ? Quality * 0.5*(res+1.0) : cust_artifacting;
-    vVARS.FRINGING = (Quality > -0.5) ? Quality : cust_fringing;
-    vVARS.SATURATION = ntsc_sat;
-    vVARS.BRIGHTNESS = ntsc_bright;    
-    vVARS.pix_no.x = vVARS.pix_no.x * res;
-
-    vVARS.MERGE = (Quality == 2.0 || vVARS.phase < 2.5) ? 0.0 : 1.0;
-    vVARS.MERGE = (Quality == -1.0) ? float(ntsc_fields == true) : vVARS.MERGE;
-}
-
-#define mix_mat float3x3(vVARS.BRIGHTNESS, vVARS.FRINGING, vVARS.FRINGING, vVARS.ARTIFACTING, 2.0 * vVARS.SATURATION, 0.0, vVARS.ARTIFACTING, 0.0, 2.0 * vVARS.SATURATION)
-
-static const float3x3 yiq2rgb_mat = float3x3(
-   1.0, 0.956, 0.6210,
-   1.0, -0.2720, -0.6474,
-   1.0, -1.1060, 1.7046);
-
-float3 yiq2rgb(float3 yiq)
-{
-   return mul(yiq2rgb_mat, yiq);
-}
-
-static const float3x3 yiq_mat = float3x3(
-      0.2989, 0.5870, 0.1140,
-      0.5959, -0.2744, -0.3216,
-      0.2115, -0.5229, 0.3114
-);
-
-float3 rgb2yiq(float3 col)
-{
-   return mul(yiq_mat, col);
-}
-
-static const float3 Y = float3( 0.299,  0.587,  0.114);
-
-float df3(float3 a, float3 b, float3 c)
-{
-    return dot(smoothstep(0.0, 0.56, 3.0*(b - a) * (b - c)), Y);
-}
-
-
-float4 PS_NTSC_ADAPTIVE_P0(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
-{
-   float3 col = tex2D(ReShade::BackBuffer, vTexCoord).rgb;
-   float3 yiq = rgb2yiq(col);
-   float3 yiq2 = yiq;
-
-    float4 SourceSize  = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize);
-
-   float mod1 = 2.0;
-   float mod2 = 3.0;
-
-   float2 dx = float2(1.0, 0.0)*SourceSize.zw;
-   float2 dy = float2(0.0, 1.0)*SourceSize.zw;
-
-   float3 C = tex2D(ReShade::BackBuffer, vTexCoord    ).xyz;
-   float3 L = tex2D(ReShade::BackBuffer, vTexCoord -dx).xyz;
-   float3 R = tex2D(ReShade::BackBuffer, vTexCoord +dx).xyz;
-   float3 U = tex2D(ReShade::BackBuffer, vTexCoord -dy).xyz;
-   float3 D = tex2D(ReShade::BackBuffer, vTexCoord +dy).xyz;
-   float3 UL = tex2D(ReShade::BackBuffer, vTexCoord -dx -dy).xyz;
-   float3 UR = tex2D(ReShade::BackBuffer, vTexCoord +dx -dy).xyz;
-   float3 DL = tex2D(ReShade::BackBuffer, vTexCoord -dx +dy).xyz;
-   float3 DR = tex2D(ReShade::BackBuffer, vTexCoord +dx +dy).xyz;
-
-   float hori = step(0.01,(df3(L, C, R) * df3(UL, U, UR) * df3(DL, D, DR)));
-   float vert = 1.0 - step(0.01,(df3(U, C, D) * df3(UL, L, DL) * df3(UR, R, DR)));
-
-   float blend = hori * vert * ntsc_artifacting_rainbow;
-
-if (vVARS.MERGE > 0.5)
-{
-   float chroma_phase2 = (vVARS.phase < 2.5) ? PI * ((vVARS.pix_no.y % mod1) + ((FrameCount+1.) % 2.)) : 0.6667 * PI * ((vVARS.pix_no.y % mod2) + ((FrameCount+1.) % 2.));
-   float mod_phase2 = (blend + 1.0) * chroma_phase2 + vVARS.pix_no.x * vVARS.CHROMA_MOD_FREQ;
-   float i_mod2 = cos(mod_phase2);
-   float q_mod2 = sin(mod_phase2);
-   yiq2.yz *= float2(i_mod2, q_mod2); // Modulate.
-   yiq2 = mul(mix_mat, yiq2); // Cross-talk.
-   yiq2.yz *= float2(i_mod2, q_mod2); // Demodulate.   
-}
-  
-   float chroma_phase = (vVARS.phase < 2.5) ? PI * ((vVARS.pix_no.y % mod1) + ((FrameCount+1.) % 2.)) : 0.6667 * PI * ((vVARS.pix_no.y % mod2) + ((FrameCount+1.) % 2.));
-   float mod_phase = (blend + 1.0) * chroma_phase + vVARS.pix_no.x * vVARS.CHROMA_MOD_FREQ;
-
-
-   float i_mod = cos(mod_phase);
-   float q_mod = sin(mod_phase);
-
-   yiq.yz *= float2(i_mod, q_mod); // Modulate.
-   yiq = mul(mix_mat, yiq); // Cross-talk.
-   yiq.yz *= float2(i_mod, q_mod); // Demodulate.
-      
-   yiq = (vVARS.MERGE < 0.5) ? yiq : 0.5*(yiq+yiq2);
-   
-   return float4(yiq, 1.0);
-}
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_NTSC_ADAPTIVE_P1(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 TexCoord : TEXCOORD)
-{
-    TexCoord.x = (id == 2) ? 2.0 : 0.0;
-    TexCoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(TexCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-}
-
-
-float3 fetch_offset(sampler2D Source, float2 tex, float offset, float2 one_x)
-{
-   /* Insert chroma scaling. Thanks to guest.r ideas. */
-
-   float3 yiq;
-
-   yiq.x  = tex2D(Source, tex + float2((offset) * (one_x.x), 0.0)).x;
-   yiq.yz = tex2D(Source, tex + float2((offset) * (one_x.y), 0.0)).yz;
-
-   return yiq;
-
-/*  Old code
-   return texture(Source, vTexCoord + float2((offset) * (one_x), 0.0)).xyz;
-*/
-}
-
-/* These are accurate and normalized coeffs. */
-static const int TAPS_3_phase = 24;
-static const float luma_filter_3_phase[25] = {
-0.0000120203033684164,
-0.0000221465589348544,
-0.0000131553320142694,
-0.0000120203033684164,
-0.0000499802614018372,
-0.000113942875690297,
-0.000122153082899506,
-5.61214E-06,
-0.000170520303591422,
-0.000237204986579451,
-0.000169644281482376,
-0.000285695210375719,
-0.000984598849305758,
-0.0020187339488074,
-0.00200232553469184,
-0.000909904964181485,
-0.00704925890919635,
-0.0132231937269633,
-0.0126072491817548,
-0.00246092210875218,
-0.0358691302651096,
-0.0840185734607569,
-0.135566921437963,
-0.175265691355518,
-0.190181351796957};
-
-/* These are accurate and normalized coeffs. */
-static const float chroma_filter_3_phase[25] = {
-0.000135741056915795,
-0.000568115749081878,
-0.00130605691082327,
-0.00231369942971182,
-0.00350569685928248,
-0.00474731062446688,
-0.00585980203774502,
-0.00663114046295865,
-0.00683148404964774,
-0.00623234997205773,
-0.00462792764511295,
-0.00185665431957684,
-0.00217899013894782,
-0.00749647783836479,
-0.0140227874371299,
-0.021590863169257,
-0.0299437436530477,
-0.0387464461271303,
-0.0476049759842373,
-0.0560911497485196,
-0.0637713405314321,
-0.0702368383153846,
-0.0751333078160781,
-0.0781868487834974,
-0.0792244191487085};
-
-
-/* These are accurate and normalized coeffs. Though they don't produce ideal smooth vertical lines transparency. */
-static const int TAPS_2_phase = 15;
-static const float luma_filter_2_phase[16] = {
-0.00134372867555492,
-0.00294231678339247,
-0.00399617683765551,
-0.00303632635732925,
-0.00110556727614119,
-0.00839970341605087,
-0.0169515379999301,
-0.0229874881474188,
-0.0217113019865528,
-0.00889151239892142,
-0.0173269874254282,
-0.0550969075027442,
-0.098655909675851,
-0.139487291941771,
-0.168591277052964,
-0.17914037794465};
-
-
-/* These are accurate and normalized coeffs. */
-static const float chroma_filter_2_phase[16] = {
-0.00406084767413046,
-0.00578573638571078,
-0.00804447474387669,
-0.0109152541019797,
-0.0144533032717188,
-0.0186765858322351,
-0.0235518468184291,
-0.0289834149989225,
-0.034807373222651,
-0.0407934139180355,
-0.0466558344725586,
-0.0520737649339226,
-0.0567190701585739,
-0.0602887575746322,
-0.0625375226221969,
-0.0633055985408521};
-
-
-
-float4 PS_NTSC_ADAPTIVE_P1(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
-{
-    float4 SourceSize  = float4(BufferWidth, 1.0/NormalizedNativePixelSize.y, 1.0/BufferWidth, NormalizedNativePixelSize.y);
-
-   float res = ntsc_scale;
-   float OriginalSize = 1.0/NormalizedNativePixelSize.x;
-   float3 signal = float3(0.0, 0.0, 0.0);
-   float phase = (ntsc_phase < 1) ? ((OriginalSize > 300.0) ? 2.0 : 3.0) : ((ntsc_phase > 1) ? 3.0 : 2.0);
-
-   float chroma_scale = phase > 2.5 ? min(chroma_scale, 2.2) : chroma_scale/2.0;
-   float2 one_x = (SourceSize.z / res) * float2(1.0, 1.0 / chroma_scale);
-
-   float2 tex = vTexCoord;
-
-   if(phase < 2.5)
-   {
-      float3 sums = fetch_offset(sNTSC_P0, tex, 0.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 0.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[0], chroma_filter_2_phase[0], chroma_filter_2_phase[0]);
-      sums = fetch_offset(sNTSC_P0, tex, 1.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 1.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[1], chroma_filter_2_phase[1], chroma_filter_2_phase[1]);
-      sums = fetch_offset(sNTSC_P0, tex, 2.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 2.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[2], chroma_filter_2_phase[2], chroma_filter_2_phase[2]);
-      sums = fetch_offset(sNTSC_P0, tex, 3.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 3.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[3], chroma_filter_2_phase[3], chroma_filter_2_phase[3]);
-      sums = fetch_offset(sNTSC_P0, tex, 4.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 4.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[4], chroma_filter_2_phase[4], chroma_filter_2_phase[4]);
-      sums = fetch_offset(sNTSC_P0, tex, 5.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 5.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[5], chroma_filter_2_phase[5], chroma_filter_2_phase[5]);
-      sums = fetch_offset(sNTSC_P0, tex, 6.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 6.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[6], chroma_filter_2_phase[6], chroma_filter_2_phase[6]);
-      sums = fetch_offset(sNTSC_P0, tex, 7.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 7.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[7], chroma_filter_2_phase[7], chroma_filter_2_phase[7]);
-      sums = fetch_offset(sNTSC_P0, tex, 8.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 8.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[8], chroma_filter_2_phase[8], chroma_filter_2_phase[8]);
-      sums = fetch_offset(sNTSC_P0, tex, 9.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 9.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[9], chroma_filter_2_phase[9], chroma_filter_2_phase[9]);
-      sums = fetch_offset(sNTSC_P0, tex, 10.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 10.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[10], chroma_filter_2_phase[10], chroma_filter_2_phase[10]);
-      sums = fetch_offset(sNTSC_P0, tex, 11.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 11.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[11], chroma_filter_2_phase[11], chroma_filter_2_phase[11]);
-      sums = fetch_offset(sNTSC_P0, tex, 12.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 12.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[12], chroma_filter_2_phase[12], chroma_filter_2_phase[12]);
-      sums = fetch_offset(sNTSC_P0, tex, 13.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 13.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[13], chroma_filter_2_phase[13], chroma_filter_2_phase[13]);
-      sums = fetch_offset(sNTSC_P0, tex, 14.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 14.0, one_x);
-      signal += sums * float3(luma_filter_2_phase[14], chroma_filter_2_phase[14], chroma_filter_2_phase[14]);
-      
-      signal += tex2D(sNTSC_P0, vTexCoord).xyz *
-         float3(luma_filter_2_phase[TAPS_2_phase], chroma_filter_2_phase[TAPS_2_phase], chroma_filter_2_phase[TAPS_2_phase]);
-   }
-   else if(phase > 2.5)
-   {
-      for (int i = 0; i < TAPS_3_phase; i++)
-      {
-         float offset = float(i);
-
-         float3 sums = fetch_offset(sNTSC_P0, tex, offset - float(TAPS_3_phase), one_x) +
-            fetch_offset(sNTSC_P0, tex, float(TAPS_3_phase) - offset, one_x);
-         signal += sums * float3(luma_filter_3_phase[i], chroma_filter_3_phase[i], chroma_filter_3_phase[i]);
-      }
-      signal += tex2D(sNTSC_P0, vTexCoord).xyz *
-         float3(luma_filter_3_phase[TAPS_3_phase], chroma_filter_3_phase[TAPS_3_phase], chroma_filter_3_phase[TAPS_3_phase]);
-   }
-
-   float3 rgb = yiq2rgb(signal);
-
-   if(linearize == false) return float4(rgb, 1.0);
-   else return pow(float4(rgb, 1.0), float4(2.2, 2.2, 2.2, 2.2));
-}
-
-technique NTSC_ADAPTIVE
-{
-    pass
-    {
-        VertexShader = VS_NTSC_ADAPTIVE_P0;
-        PixelShader  = PS_NTSC_ADAPTIVE_P0;
-        RenderTarget = tNTSC_P0;
-    }
-    pass
-    {
-        VertexShader = PostProcessVS;
-        PixelShader  = PS_NTSC_ADAPTIVE_P1;
-    }
-}
--- a/data/resources/shaders/reshade/Shaders/overlay/geom-overlay.fx
+++ b/data/resources/shaders/reshade/Shaders/overlay/geom-overlay.fx
@@ -1,415 +0,0 @@
-#include "ReShade.fxh"
-
-/*
-    Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
-    into any other shaders and provide curvature/warping/oversampling features.
-
-    Adapted by Hyllian (2024).
-*/
-
-
-/*
-    CRT-interlaced
-
-    Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
-
-    This program is free software; you can redistribute it and/or modify it
-    under the terms of the GNU General Public License as published by the Free
-    Software Foundation; either version 2 of the License, or (at your option)
-    any later version.
-
-    (cgwg gave their consent to have the original version of this shader
-    distributed under the GPL in this message:
-
-    http://board.byuu.org/viewtopic.php?p=26075#p26075
-
-    "Feel free to distribute my shaders under the GPL. After all, the
-    barrel distortion code was taken from the Curvature shader, which is
-    under the GPL."
-    )
-    This shader variant is pre-configured with screen curvature
-*/
-
-
-
-uniform bool geom_curvature <
-	ui_type = "radio";
-	ui_label = "Geom Curvature Toggle";
-	ui_category = "Curvature";
-	ui_tooltip = "This shader only works with Aspect Ratio: Stretch to Fill.";
-> = true;
-
-uniform float geom_R <
-	ui_type = "drag";
-	ui_min = 0.1;
-	ui_max = 10.0;
-	ui_step = 0.1;
-	ui_label = "Geom Curvature Radius";
-> = 10.0;
-
-uniform float geom_d <
-	ui_type = "drag";
-	ui_min = 0.1;
-	ui_max = 10.0;
-	ui_step = 0.1;
-	ui_label = "Geom Distance";
-> = 10.0;
-
-uniform bool geom_invert_aspect <
-	ui_type = "radio";
-	ui_label = "Geom Curvature Aspect Inversion";
-> = 0.0;
-
-uniform float geom_cornersize <
-	ui_type = "drag";
-	ui_min = 0.001;
-	ui_max = 1.0;
-	ui_step = 0.005;
-	ui_label = "Geom Corner Size";
-> = 0.006;
-
-uniform float geom_cornersmooth <
-	ui_type = "drag";
-	ui_min = 80.0;
-	ui_max = 2000.0;
-	ui_step = 100.0;
-	ui_label = "Geom Corner Smoothness";
-> = 200.0;
-
-uniform float geom_x_tilt <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.05;
-	ui_label = "Geom Horizontal Tilt";
-> = 0.0;
-
-uniform float geom_y_tilt <
-	ui_type = "drag";
-	ui_min = -1.0;
-	ui_max = 1.0;
-	ui_step = 0.05;
-	ui_label = "Geom Vertical Tilt";
-> = 0.0;
-
-uniform float geom_overscan_x <
-	ui_type = "drag";
-	ui_min = -125.0;
-	ui_max = 125.0;
-	ui_step = 0.5;
-	ui_label = "Geom Horiz. Overscan %";
-> = 48.5;
-
-uniform float geom_overscan_y <
-	ui_type = "drag";
-	ui_min = -125.0;
-	ui_max = 125.0;
-	ui_step = 0.5;
-	ui_label = "Geom Vert. Overscan %";
-> = 64.5;
-
-uniform float centerx <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 0.1;
-	ui_label = "Image Center X";
-> = 0.0;
-
-uniform float centery <
-	ui_type = "drag";
-	ui_min = -100.0;
-	ui_max = 100.0;
-	ui_step = 0.1;
-	ui_label = "Image Center Y";
-> = -8.8;
-
-uniform float geom_lum <
-	ui_type = "drag";
-	ui_min = 0.5;
-	ui_max = 2.0;
-	ui_step = 0.01;
-	ui_label = "Geom Luminance";
-> = 1.0;
-
-uniform float geom_target_gamma <
-	ui_type = "drag";
-	ui_min = 0.1;
-	ui_max = 5.0;
-	ui_step = 0.1;
-	ui_label = "Geom Target Gamma";
-> = 2.4;
-
-uniform float geom_monitor_gamma <
-	ui_type = "drag";
-	ui_min = 0.1;
-	ui_max = 5.0;
-	ui_step = 0.1;
-	ui_label = "Geom Monitor Gamma";
-> = 2.2;
-
-
-uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
-uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
-uniform float2 ViewportSize < source = "viewportsize"; >;
-uniform float  ViewportX < source = "viewportx"; >;
-uniform float  ViewportY < source = "viewporty"; >;
-uniform float  ViewportWidth < source = "viewportwidth"; >;
-uniform float  ViewportHeight < source = "viewportheight"; >;
-uniform float2 ViewportOffset < source = "viewportoffset"; >;
-
-sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
-
-texture tOverlay < source = "overlay/psx.jpg"; >
-{
-	Width = BUFFER_WIDTH;
-	Height = BUFFER_HEIGHT;
-	MipLevels = 1;
-};
-
-sampler sOverlay { Texture = tOverlay; AddressU = BORDER; AddressV = BORDER; MinFilter = LINEAR; MagFilter = LINEAR;};
-
-// Comment the next line to disable interpolation in linear gamma (and
-// gain speed).
-#define LINEAR_PROCESSING
-
-// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature
-#define OVERSAMPLE
-
-// Use the older, purely gaussian beam profile; uncomment for speed
-//#define USEGAUSSIAN
-
-// Macros.
-#define FIX(c) max(abs(c), 1e-5);
-#define PI 3.141592653589
-
-#ifdef LINEAR_PROCESSING
-#       define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(geom_target_gamma,geom_target_gamma,geom_target_gamma,geom_target_gamma))
-#else
-#       define TEX2D(c) tex2D(sBackBuffer, (c))
-#endif
-
-// aspect ratio
-#define aspect     (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
-#define overscan   (float2(1.01,1.01));
-
-
-struct ST_VertexOut
-{
-    float2 sinangle    : TEXCOORD1;
-    float2 cosangle    : TEXCOORD2;
-    float3 stretch     : TEXCOORD3;
-    float2 TextureSize : TEXCOORD4;
-};
-
-
-float vs_intersect(float2 xy, float2 sinangle, float2 cosangle)
-{
-    float A = dot(xy,xy) + geom_d*geom_d;
-    float B = 2.0*(geom_R*(dot(xy,sinangle)-geom_d*cosangle.x*cosangle.y)-geom_d*geom_d);
-    float C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
-    
-    return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
-}
-
-float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
-{
-    float c     = vs_intersect(xy, sinangle, cosangle);
-    float2 point  = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
-    float2 poc    = point/cosangle;
-    
-    float2 tang   = sinangle/cosangle;
-    float A     = dot(tang, tang) + 1.0;
-    float B     = -2.0*dot(poc, tang);
-    float C     = dot(poc, poc) - 1.0;
-    
-    float a     = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A);
-    float2 uv     = (point - a*sinangle)/cosangle;
-    float r     = FIX(geom_R*acos(a));
-    
-    return uv*r/sin(r/geom_R);
-}
-
-float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle)
-{
-    float r = FIX(sqrt(dot(uv,uv)));
-    uv *= sin(r/geom_R)/r;
-    float x = 1.0-cos(r/geom_R);
-    float D = geom_d/geom_R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
-    
-    return geom_d*(uv*cosangle-x*sinangle)/D;
-}
-
-float3 vs_maxscale(float2 sinangle, float2 cosangle)
-{
-    float2 c  = vs_bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
-    float2 a  = float2(0.5,0.5)*aspect;
-    
-    float2 lo = float2(vs_fwtrans(float2(-a.x,  c.y), sinangle, cosangle).x,
-                   vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
-
-    float2 hi = float2(vs_fwtrans(float2(+a.x,  c.y), sinangle, cosangle).x,
-                   vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
-    
-    return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
-}
-
-
-
-
-// Vertex shader generating a triangle covering the entire screen
-void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
-{
-    texcoord.x = (id == 2) ? 2.0 : 0.0;
-    texcoord.y = (id == 1) ? 2.0 : 0.0;
-    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
-
-    float2 SourceSize = 1.0/NormalizedNativePixelSize;
-
-    // Precalculate a bunch of useful values we'll need in the fragment
-    // shader.
-    vVARS.sinangle    = sin(float2(geom_x_tilt, geom_y_tilt));
-    vVARS.cosangle    = cos(float2(geom_x_tilt, geom_y_tilt));
-    vVARS.stretch     = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
-    vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
-}
-
-
-
-float intersect(float2 xy, float2 sinangle, float2 cosangle)
-{
-    float A = dot(xy,xy) + geom_d*geom_d;
-    float B, C;
-
-       B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
-       C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
-
-    return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
-}
-
-float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
-{
-    float c     = intersect(xy, sinangle, cosangle);
-    float2 point  = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
-    float2 poc    = point/cosangle;
-    float2 tang   = sinangle/cosangle;
-
-    float A     = dot(tang, tang) + 1.0;
-    float B     = -2.0*dot(poc, tang);
-    float C     = dot(poc, poc) - 1.0;
-
-    float a     = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
-    float2 uv     = (point - a*sinangle) / cosangle;
-    float r     = FIX(geom_R*acos(a));
-    
-    return uv*r/sin(r/geom_R);
-}
-
-float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
-{
-    float r = FIX(sqrt(dot(uv, uv)));
-    uv *= sin(r/geom_R)/r;
-    float x = 1.0 - cos(r/geom_R);
-    float D;
-    
-      D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
-
-    return geom_d*(uv*cosangle - x*sinangle)/D;
-}
-
-float3 maxscale(float2 sinangle, float2 cosangle)
-{
-       float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
-       float2 a = float2(0.5, 0.5)*aspect;
-
-       float2 lo = float2(fwtrans(float2(-a.x,  c.y), sinangle, cosangle).x,
-                      fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
-       float2 hi = float2(fwtrans(float2(+a.x,  c.y), sinangle, cosangle).x,
-                      fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
-
-       return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
-}
-
-float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
-{
-    coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy;
-    
-    return (bkwtrans(coord, sinangle, cosangle) /
-        float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + float2(0.5, 0.5));
-}
-
-float corner(float2 coord)
-{
-    coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
-    float2 cdist = float2(geom_cornersize, geom_cornersize);
-    coord = (cdist - min(coord, cdist));
-    float dist = sqrt(dot(coord, coord));
-    
-      return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
-}
-
-float fwidth(float value){
-  return abs(ddx(value)) + abs(ddy(value));
-}
-
-
-// Code snippet borrowed from crt-cyclon. (credits to DariusG)
-float2 Warp(float2 pos)
-{
-    pos = pos*2.0 - 1.0;
-    pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0);
-    pos = pos*0.5 + 0.5;
-
-    return pos;
-}
-
-float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
-{
-    // Texture coordinates of the texel containing the active pixel.
-    float2 xy;
-
-    if (geom_curvature == true)
-      xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
-    else
-      xy = vTexCoord;
-
-    // center screen
-    xy = Warp(xy - float2(centerx,centery)/100.0);
-
-    float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5));
-
-    float2 uv_ratio = frac((xy * vVARS.TextureSize - float2(0.5, 0.5)) / vVARS.TextureSize);
-
-    float4 col = TEX2D(xy);
-
-#ifndef LINEAR_PROCESSING
-    col  = pow(col , float4(geom_target_gamma, geom_target_gamma, geom_target_gamma, geom_target_gamma));
-#endif
-
-    col.rgb *= (geom_lum * step(0.0, uv_ratio.y));
-
-    float3 mul_res = col.rgb * float3(cval, cval, cval);
-
-    // Convert the image gamma for display on our output device.
-    mul_res = pow(mul_res, float3(1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma));
-
-    float4 overlay = tex2D(sOverlay, vTexCoord);
-
-    float2 top_left     = (float2(ViewportX, ViewportY) - ViewportOffset)/ViewportSize;
-    float2 bottom_right = (float2(ViewportX + ViewportWidth, ViewportY + ViewportHeight) - ViewportOffset)/ViewportSize;
-
-    if (xy.x < top_left.x || xy.x > bottom_right.x || xy.y < top_left.y || xy.y > bottom_right.y)
-        mul_res = overlay.rgb;
-  
-    return float4(mul_res, 1.0);
-}
-
-
-technique CRT_Geom
-{
-    pass
-    {
-        VertexShader = VS_CRT_Geom;
-        PixelShader  = PS_CRT_Geom;
-    }
-}
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMask.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMask.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDP.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDP.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskResizeTo64.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskResizeTo64.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png
--- a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png
+++ b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png
--- a/data/resources/shaders/reshade/Textures/overlay/OVERLAY_CREDITS_AND_LICENSE.md
+++ b/data/resources/shaders/reshade/Textures/overlay/OVERLAY_CREDITS_AND_LICENSE.md
@@ -1,15 +0,0 @@
-# To Use
-
-Choose Aspect Ratio: Stretch to Fill.
-
-# Psx.jpg Credits
-
-To the Author: SOQUEROEU.
-
-The "psx.jpg" background was edited from the one obtained from "Soqueroeu TV Backgrounds 2.0" repository: https://github.com/soqueroeu/Soqueroeu-TV-Backgrounds_V2.0/tree/main.
-
-The material is free to use according to the agreement below:
-
-## AGREEMENT
-
-This pack is free. You should not pay for anything related to this graphics pack and shader preset. You may distribute and reproduce part from this content, as long as you give credit to the authors involved. You may not profit from the sale of products that contain material in this package without the author's prior permission.
--- a/data/resources/shaders/reshade/Textures/overlay/psx.jpg
+++ b/data/resources/shaders/reshade/Textures/overlay/psx.jpg
--- a/dep/CMakeLists.txt
+++ b/dep/CMakeLists.txt
@@ -9,12 +9,17 @@ add_subdirectory(libchdr EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(libchdr)
 add_subdirectory(xxhash EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(xxhash)
+add_subdirectory(rapidjson EXCLUDE_FROM_ALL)
 add_subdirectory(imgui EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(imgui)
 add_subdirectory(simpleini EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(simpleini)
 add_subdirectory(vulkan EXCLUDE_FROM_ALL)
+add_subdirectory(soundtouch EXCLUDE_FROM_ALL)
+disable_compiler_warnings_for_target(soundtouch)
 add_subdirectory(googletest EXCLUDE_FROM_ALL)
+add_subdirectory(cpuinfo EXCLUDE_FROM_ALL)
+disable_compiler_warnings_for_target(cpuinfo)
 add_subdirectory(fast_float EXCLUDE_FROM_ALL)
 add_subdirectory(reshadefx EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(reshadefx)
@@ -25,6 +30,8 @@ disable_compiler_warnings_for_target(rapidyaml)
 add_subdirectory(cubeb EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(cubeb)
 disable_compiler_warnings_for_target(speex)
+add_subdirectory(discord-rpc EXCLUDE_FROM_ALL)
+disable_compiler_warnings_for_target(discord-rpc)
 add_subdirectory(kissfft EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(kissfft)
 add_subdirectory(freesurround EXCLUDE_FROM_ALL)
--- a/dep/cpuinfo/.gitignore
+++ b/dep/cpuinfo/.gitignore
@@ -0,0 +1,21 @@
+# Ninja files
+build.ninja
+
+# Build objects and artifacts
+deps/
+build/
+bin/
+lib/
+libs/
+obj/
+*.pyc
+*.pyo
+
+# System files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
--- a/dep/cpuinfo/CMakeLists.txt
+++ b/dep/cpuinfo/CMakeLists.txt
@@ -0,0 +1,223 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
+
+# ---[ Setup project
+PROJECT(
+    cpuinfo
+    LANGUAGES C CXX
+    )
+
+# ---[ Options.
+SET(CPUINFO_LIBRARY_TYPE "default" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build")
+SET_PROPERTY(CACHE CPUINFO_LIBRARY_TYPE PROPERTY STRINGS default static shared)
+SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
+SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared)
+SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)")
+SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none)
+
+MACRO(CPUINFO_TARGET_ENABLE_C99 target)
+  SET_TARGET_PROPERTIES(${target} PROPERTIES
+    C_STANDARD 99
+    C_EXTENSIONS NO)
+ENDMACRO()
+
+MACRO(CPUINFO_TARGET_ENABLE_CXX11 target)
+  SET_TARGET_PROPERTIES(${target} PROPERTIES
+    CXX_STANDARD 11
+    CXX_EXTENSIONS NO)
+ENDMACRO()
+
+MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target)
+  IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "default")
+    IF(CPUINFO_RUNTIME_TYPE STREQUAL "shared")
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE
+        "/MD$<$<CONFIG:Debug>:d>")
+    ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static")
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE
+        "/MT$<$<CONFIG:Debug>:d>")
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+# -- [ Determine target processor
+SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}")
+IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
+  SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
+ENDIF()
+
+# ---[ Build flags
+SET(CPUINFO_SUPPORTED_PLATFORM TRUE)
+IF(NOT CMAKE_SYSTEM_PROCESSOR)
+  IF(NOT IOS)
+    MESSAGE(WARNING
+      "Target processor architecture is not specified. "
+      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+  ENDIF()
+ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
+  MESSAGE(WARNING
+    "Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. "
+    "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+  SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+ENDIF()
+
+IF(NOT CMAKE_SYSTEM_NAME)
+    MESSAGE(WARNING
+      "Target operating system is not specified. "
+      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+  SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
+  IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    MESSAGE(WARNING
+      "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
+      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
+    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
+  ENDIF()
+ENDIF()
+
+# ---[ cpuinfo library
+SET(CPUINFO_SRCS
+  src/init.c
+  src/api.c
+  src/cache.c)
+
+IF(CPUINFO_SUPPORTED_PLATFORM)
+  IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
+    LIST(APPEND CPUINFO_SRCS
+      src/x86/init.c
+      src/x86/info.c
+      src/x86/vendor.c
+      src/x86/uarch.c
+      src/x86/name.c
+      src/x86/topology.c
+      src/x86/isa.c
+      src/x86/cache/init.c
+      src/x86/cache/descriptor.c
+      src/x86/cache/deterministic.c)
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+      LIST(APPEND CPUINFO_SRCS
+        src/x86/linux/init.c
+        src/x86/linux/cpuinfo.c)
+    ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+      LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
+    ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
+      LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
+    ENDIF()
+  ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
+    LIST(APPEND CPUINFO_SRCS
+      src/arm/uarch.c
+      src/arm/cache.c)
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+      LIST(APPEND CPUINFO_SRCS
+        src/arm/linux/init.c
+        src/arm/linux/cpuinfo.c
+        src/arm/linux/clusters.c
+        src/arm/linux/chipset.c
+        src/arm/linux/midr.c
+        src/arm/linux/hwcap.c)
+      IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]")
+        LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c)
+        IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi")
+          SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm)
+        ENDIF()
+      ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$")
+        LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c)
+      ENDIF()
+    ELSEIF(IOS OR (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CPUINFO_TARGET_PROCESSOR STREQUAL "arm64"))
+      LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c)
+    ENDIF()
+    IF(CMAKE_SYSTEM_NAME STREQUAL "Android")
+      LIST(APPEND CPUINFO_SRCS
+        src/arm/android/properties.c)
+    ENDIF()
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    LIST(APPEND CPUINFO_SRCS
+      src/emscripten/init.c)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    LIST(APPEND CPUINFO_SRCS
+      src/linux/smallfile.c
+      src/linux/multiline.c
+      src/linux/cpulist.c
+      src/linux/processors.c)
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    LIST(APPEND CPUINFO_SRCS src/mach/topology.c)
+  ENDIF()
+
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
+    SET(THREADS_PREFER_PTHREAD_FLAG TRUE)
+    FIND_PACKAGE(Threads REQUIRED)
+  ENDIF()
+ENDIF()
+
+IF(CPUINFO_LIBRARY_TYPE STREQUAL "default")
+  ADD_LIBRARY(cpuinfo ${CPUINFO_SRCS})
+ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "shared")
+  ADD_LIBRARY(cpuinfo SHARED ${CPUINFO_SRCS})
+ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "static")
+  ADD_LIBRARY(cpuinfo STATIC ${CPUINFO_SRCS})
+ELSE()
+  MESSAGE(FATAL_ERROR "Unsupported library type ${CPUINFO_LIBRARY_TYPE}")
+ENDIF()
+ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
+CPUINFO_TARGET_ENABLE_C99(cpuinfo)
+CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
+CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
+IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
+  # Target Windows 7+ API
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601)
+ENDIF()
+SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
+TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
+TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
+TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src)
+IF(CPUINFO_LOG_LEVEL STREQUAL "default")
+  # default logging level: error (subject to change)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5)
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4)
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3)
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1)
+ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none")
+  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0)
+ELSE()
+  MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}")
+ENDIF()
+TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE CPUINFO_LOG_LEVEL=0)
+
+IF(CPUINFO_SUPPORTED_PLATFORM)
+  TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1)
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+    TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1)
+    TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1)
+  ENDIF()
+ELSE()
+  TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0)
+ENDIF()
+
+# ---[ cpuinfo dependencies: clog
+IF(NOT DEFINED CLOG_SOURCE_DIR)
+  SET(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog")
+ENDIF()
+IF(NOT TARGET clog)
+  SET(CLOG_BUILD_TESTS OFF CACHE BOOL "")
+  SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
+  ADD_SUBDIRECTORY(
+    "${CLOG_SOURCE_DIR}")
+  # We build static version of clog but a dynamic library may indirectly depend on it
+  SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
+ENDIF()
+TARGET_LINK_LIBRARIES(cpuinfo PRIVATE clog)
+TARGET_LINK_LIBRARIES(cpuinfo_internals PRIVATE clog)
--- a/dep/cpuinfo/LICENSE
+++ b/dep/cpuinfo/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2019 Google LLC
+Copyright (c) 2017-2018 Facebook Inc.
+Copyright (C) 2012-2017 Georgia Institute of Technology
+Copyright (C) 2010-2012 Marat Dukhan
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/dep/cpuinfo/README.md
+++ b/dep/cpuinfo/README.md
@@ -0,0 +1,283 @@
+# CPU INFOrmation library
+
+[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/LICENSE)
+[![Linux/Mac build status](https://img.shields.io/travis/pytorch/cpuinfo.svg)](https://travis-ci.org/pytorch/cpuinfo)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/g5khy9nr0xm458t7/branch/master?svg=true)](https://ci.appveyor.com/project/MaratDukhan/cpuinfo/branch/master)
+
+cpuinfo is a library to detect essential for performance optimization information about host CPU.
+
+## Features
+
+- **Cross-platform** availability:
+  - Linux, Windows, macOS, Android, and iOS operating systems
+  - x86, x86-64, ARM, and ARM64 architectures
+- Modern **C/C++ interface**
+  - Thread-safe
+  - No memory allocation after initialization
+  - No exceptions thrown
+- Detection of **supported instruction sets**, up to AVX512 (x86) and ARMv8.3 extensions
+- Detection of SoC and core information:
+  - **Processor (SoC) name**
+  - Vendor and **microarchitecture** for each CPU core
+  - ID (**MIDR** on ARM, **CPUID** leaf 1 EAX value on x86) for each CPU core
+- Detection of **cache information**:
+  - Cache type (instruction/data/unified), size and line size
+  - Cache associativity
+  - Cores and logical processors (hyper-threads) sharing the cache
+- Detection of **topology information** (relative between logical processors, cores, and processor packages)
+- Well-tested **production-quality** code:
+  - 60+ mock tests based on data from real devices
+  - Includes work-arounds for common bugs in hardware and OS kernels
+  - Supports systems with heterogenous cores, such as **big.LITTLE** and Max.Med.Min
+- Permissive **open-source** license (Simplified BSD)
+
+## Examples
+
+Log processor name:
+
+```c
+cpuinfo_initialize();
+printf("Running on %s CPU\n", cpuinfo_get_package(0)->name);
+```
+
+Detect if target is a 32-bit or 64-bit ARM system:
+
+```c
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+    /* 32-bit ARM-specific code here */
+#endif
+```
+
+Check if the host CPU support ARM NEON
+
+```c
+cpuinfo_initialize();
+if (cpuinfo_has_arm_neon()) {
+    neon_implementation(arguments);
+}
+```
+
+Check if the host CPU supports x86 AVX
+
+```c
+cpuinfo_initialize();
+if (cpuinfo_has_x86_avx()) {
+    avx_implementation(arguments);
+}
+```
+
+Check if the thread runs on a Cortex-A53 core
+
+```c
+cpuinfo_initialize();
+switch (cpuinfo_get_current_core()->uarch) {
+    case cpuinfo_uarch_cortex_a53:
+        cortex_a53_implementation(arguments);
+        break;
+    default:
+        generic_implementation(arguments);
+        break;
+}
+```
+
+Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):
+
+```c
+cpuinfo_initialize();
+const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
+```
+
+Pin thread to cores sharing L2 cache with the current core (Linux or Android)
+
+```c
+cpuinfo_initialize();
+cpu_set_t cpu_set;
+CPU_ZERO(&cpu_set);
+const struct cpuinfo_cache* current_l2 = cpuinfo_get_current_processor()->cache.l2;
+for (uint32_t i = 0; i < current_l2->processor_count; i++) {
+    CPU_SET(cpuinfo_get_processor(current_l2->processor_start + i)->linux_id, &cpu_set);
+}
+pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set);
+```
+
+## Use via pkg-config
+
+If you would like to provide your project's build environment with the necessary compiler and linker flags in a portable manner, the library by default when built enables `CPUINFO_BUILD_PKG_CONFIG` and will generate a [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/) manifest (_libcpuinfo.pc_). Here are several examples of how to use it:
+
+### Command Line
+
+If you used your distro's package manager to install the library, you can verify that it is available to your build environment like so:
+
+```console
+$ pkg-config --cflags --libs libcpuinfo
+-I/usr/include/x86_64-linux-gnu/ -L/lib/x86_64-linux-gnu/ -lcpuinfo
+```
+
+If you have installed the library from source into a non-standard prefix, pkg-config may need help finding it:
+
+```console
+$ PKG_CONFIG_PATH="/home/me/projects/cpuinfo/prefix/lib/pkgconfig/:$PKG_CONFIG_PATH" pkg-config --cflags --libs libcpuinfo
+-I/home/me/projects/cpuinfo/prefix/include -L/home/me/projects/cpuinfo/prefix/lib -lcpuinfo
+```
+
+### GNU Autotools
+
+To [use](https://autotools.io/pkgconfig/pkg_check_modules.html) with the GNU Autotools include the following snippet in your project's `configure.ac`:
+
+```makefile
+# CPU INFOrmation library...
+PKG_CHECK_MODULES(
+    [libcpuinfo], [libcpuinfo], [],
+    [AC_MSG_ERROR([libcpuinfo missing...])])
+YOURPROJECT_CXXFLAGS="$YOURPROJECT_CXXFLAGS $libcpuinfo_CFLAGS"
+YOURPROJECT_LIBS="$YOURPROJECT_LIBS $libcpuinfo_LIBS"
+```
+
+### Meson
+
+To use with Meson you just need to add `dependency('libcpuinfo')` as a dependency for your executable.
+
+```meson
+project(
+    'MyCpuInfoProject',
+    'cpp',
+    meson_version: '>=0.55.0'
+)
+
+executable(
+    'MyCpuInfoExecutable',
+    sources: 'main.cpp',
+    dependencies: dependency('libcpuinfo')
+)
+```
+
+### CMake
+
+To use with CMake use the [FindPkgConfig](https://cmake.org/cmake/help/latest/module/FindPkgConfig.html) module. Here is an example:
+
+```cmake
+cmake_minimum_required(VERSION 3.6)
+project("MyCpuInfoProject")
+
+find_package(PkgConfig)
+pkg_check_modules(CpuInfo REQUIRED IMPORTED_TARGET libcpuinfo)
+
+add_executable(${PROJECT_NAME} main.cpp)
+target_link_libraries(${PROJECT_NAME} PkgConfig::CpuInfo)
+```
+
+### Makefile
+
+To use within a vanilla makefile, you can call pkg-config directly to supply compiler and linker flags using shell substitution.
+
+```makefile
+CFLAGS=-g3 -Wall -Wextra -Werror ...
+LDFLAGS=-lfoo ...
+...
+CFLAGS+= $(pkg-config --cflags libcpuinfo)
+LDFLAGS+= $(pkg-config --libs libcpuinfo)
+```
+
+## Exposed information
+- [x] Processor (SoC) name
+- [x] Microarchitecture
+- [x] Usable instruction sets
+- [ ] CPU frequency
+- [x] Cache
+  - [x] Size
+  - [x] Associativity
+  - [x] Line size
+  - [x] Number of partitions
+  - [x] Flags (unified, inclusive, complex hash function)
+  - [x] Topology (logical processors that share this cache level)
+- [ ] TLB
+  - [ ] Number of entries
+  - [ ] Associativity
+  - [ ] Covered page types (instruction, data)
+  - [ ] Covered page sizes
+- [x] Topology information
+  - [x] Logical processors
+  - [x] Cores
+  - [x] Packages (sockets)
+
+## Supported environments:
+- [x] Android
+  - [x] x86 ABI
+  - [x] x86_64 ABI
+  - [x] armeabi ABI
+  - [x] armeabiv7-a ABI
+  - [x] arm64-v8a ABI
+  - [ ] ~~mips ABI~~
+  - [ ] ~~mips64 ABI~~
+- [x] Linux
+  - [x] x86
+  - [x] x86-64
+  - [x] 32-bit ARM (ARMv5T and later)
+  - [x] ARM64
+  - [ ] PowerPC64
+- [x] iOS
+  - [x] x86 (iPhone simulator)
+  - [x] x86-64 (iPhone simulator)
+  - [x] ARMv7
+  - [x] ARM64
+- [x] OS X
+  - [x] x86
+  - [x] x86-64
+- [x] Windows
+  - [x] x86
+  - [x] x86-64
+
+## Methods
+
+- Processor (SoC) name detection
+  - [x] Using CPUID leaves 0x80000002–0x80000004 on x86/x86-64
+  - [x] Using `/proc/cpuinfo` on ARM
+  - [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android)
+  - [ ] Using kernel log (`dmesg`) on ARM Linux
+- Vendor and microarchitecture detection
+  - [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill)
+  - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2)
+  - [ ] VIA-designed x86/x86-64 cores
+  - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise)
+  - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1)
+  - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo)
+  - [x] Nvidia-designed ARM cores (Denver and Carmel)
+  - [x] Samsung-designed ARM cores (Exynos)
+  - [x] Intel-designed ARM cores (XScale up to 3rd-gen)
+  - [x] Apple-designed ARM cores (up to Lightning and Thunder)
+  - [x] Cavium-designed ARM cores (ThunderX)
+  - [x] AppliedMicro-designed ARM cores (X-Gene)
+- Instruction set detection
+  - [x] Using CPUID (x86/x86-64)
+  - [x] Using `/proc/cpuinfo` on 32-bit ARM EABI (Linux)
+  - [x] Using microarchitecture heuristics on (32-bit ARM)
+  - [x] Using `FPSID` and `WCID` registers (32-bit ARM)
+  - [x] Using `getauxval` (Linux/ARM)
+  - [x] Using `/proc/self/auxv` (Android/ARM)
+  - [ ] Using instruction probing on ARM (Linux)
+  - [ ] Using CPUID registers on ARM64 (Linux)
+- Cache detection
+  - [x] Using CPUID leaf 0x00000002 (x86/x86-64)
+  - [x] Using CPUID leaf 0x00000004 (non-AMD x86/x86-64)
+  - [ ] Using CPUID leaves 0x80000005-0x80000006 (AMD x86/x86-64)
+  - [x] Using CPUID leaf 0x8000001D (AMD x86/x86-64)
+  - [x] Using `/proc/cpuinfo` (Linux/pre-ARMv7)
+  - [x] Using microarchitecture heuristics (ARM)
+  - [x] Using chipset name (ARM)
+  - [x] Using `sysctlbyname` (Mach)
+  - [x] Using sysfs `typology` directories (ARM/Linux)
+  - [ ] Using sysfs `cache` directories (Linux)
+- TLB detection
+  - [x] Using CPUID leaf 0x00000002 (x86/x86-64)
+  - [ ] Using CPUID leaves 0x80000005-0x80000006 and 0x80000019 (AMD x86/x86-64)
+  - [x] Using microarchitecture heuristics (ARM)
+- Topology detection
+  - [x] Using CPUID leaf 0x00000001 on x86/x86-64 (legacy APIC ID)
+  - [x] Using CPUID leaf 0x0000000B on x86/x86-64 (Intel APIC ID)
+  - [ ] Using CPUID leaf 0x8000001E on x86/x86-64 (AMD APIC ID)
+  - [x] Using `/proc/cpuinfo` (Linux)
+  - [x] Using `host_info` (Mach)
+  - [x] Using `GetLogicalProcessorInformationEx` (Windows)
+  - [x] Using sysfs (Linux)
+  - [x] Using chipset name (ARM/Linux)
+
--- a/dep/cpuinfo/cpuinfo.vcxproj
+++ b/dep/cpuinfo/cpuinfo.vcxproj
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="..\msvc\vsprops\Configurations.props" />
+  <ItemGroup>
+    <ClCompile Include="deps\clog\src\clog.c" />
+    <ClCompile Include="src\api.c" />
+    <ClCompile Include="src\arm\cache.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\arm\uarch.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\arm\windows\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\cache.c" />
+    <ClCompile Include="src\init.c" />
+    <ClCompile Include="src\x86\cache\descriptor.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\deterministic.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\info.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\isa.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\name.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\topology.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\uarch.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\vendor.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="src\x86\windows\init.c">
+      <ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="deps\clog\include\clog.h" />
+    <ClInclude Include="include\cpuinfo.h" />
+    <ClInclude Include="src\arm\api.h" />
+    <ClInclude Include="src\arm\midr.h" />
+    <ClInclude Include="src\arm\windows\api.h" />
+    <ClInclude Include="src\cpuinfo\common.h" />
+    <ClInclude Include="src\cpuinfo\internal-api.h" />
+    <ClInclude Include="src\cpuinfo\log.h" />
+    <ClInclude Include="src\cpuinfo\utils.h" />
+    <ClInclude Include="src\x86\api.h" />
+    <ClInclude Include="src\x86\cpuid.h" />
+    <ClInclude Include="src\x86\windows\api.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{EE55AA65-EA6B-4861-810B-78354B53A807}</ProjectGuid>
+  </PropertyGroup>
+  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>TurnOffAllWarnings</WarningLevel>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;$(ProjectDir)src;$(ProjectDir)deps\clog\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp14</LanguageStandard>
+      <ObjectFileName>$(IntDir)%(RelativeDir)</ObjectFileName>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <Import Project="..\msvc\vsprops\Targets.props" />
+</Project>
--- a/dep/cpuinfo/cpuinfo.vcxproj.filters
+++ b/dep/cpuinfo/cpuinfo.vcxproj.filters
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="x86">
+      <UniqueIdentifier>{8fc9f543-ff04-48fb-ae1a-7c575a8aed13}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="x86\windows">
+      <UniqueIdentifier>{0b540baa-aafb-4e51-8cbf-b7e7c00d9a4d}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="x86\descriptor">
+      <UniqueIdentifier>{53ef3c40-8e03-46d1-aeb3-6446c40469da}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="cpuinfo">
+      <UniqueIdentifier>{26002d26-399a-41bb-93cb-42fb9be21c1f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="clog">
+      <UniqueIdentifier>{7f0aba4c-ca06-4a7b-aed1-4f1e6976e839}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="arm">
+      <UniqueIdentifier>{f8cee8f2-6ab7-47cf-a5fb-3ae5e444000c}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="arm\windows">
+      <UniqueIdentifier>{cca5126a-b401-4925-b163-d2e64b010c7b}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\x86\isa.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\name.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\topology.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\uarch.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\vendor.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\info.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\init.c">
+      <Filter>x86</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\windows\init.c">
+      <Filter>x86\windows</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\deterministic.c">
+      <Filter>x86\descriptor</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\init.c">
+      <Filter>x86\descriptor</Filter>
+    </ClCompile>
+    <ClCompile Include="src\x86\cache\descriptor.c">
+      <Filter>x86\descriptor</Filter>
+    </ClCompile>
+    <ClCompile Include="src\api.c" />
+    <ClCompile Include="src\cache.c" />
+    <ClCompile Include="src\init.c" />
+    <ClCompile Include="deps\clog\src\clog.c">
+      <Filter>clog</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\cache.c">
+      <Filter>arm</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\uarch.c">
+      <Filter>arm</Filter>
+    </ClCompile>
+    <ClCompile Include="src\arm\windows\init.c">
+      <Filter>arm\windows</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="src\x86\api.h">
+      <Filter>x86</Filter>
+    </ClInclude>
+    <ClInclude Include="src\x86\cpuid.h">
+      <Filter>x86</Filter>
+    </ClInclude>
+    <ClInclude Include="src\x86\windows\api.h">
+      <Filter>x86\windows</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\internal-api.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\log.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\utils.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="src\cpuinfo\common.h">
+      <Filter>cpuinfo</Filter>
+    </ClInclude>
+    <ClInclude Include="include\cpuinfo.h" />
+    <ClInclude Include="deps\clog\include\clog.h">
+      <Filter>clog</Filter>
+    </ClInclude>
+    <ClInclude Include="src\arm\api.h">
+      <Filter>arm</Filter>
+    </ClInclude>
+    <ClInclude Include="src\arm\midr.h">
+      <Filter>arm</Filter>
+    </ClInclude>
+    <ClInclude Include="src\arm\windows\api.h">
+      <Filter>arm\windows</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
--- a/dep/cpuinfo/deps/clog/.gitignore
+++ b/dep/cpuinfo/deps/clog/.gitignore
@@ -0,0 +1,19 @@
+# Ninja files
+build.ninja
+
+# Build objects and artifacts
+deps/
+build/
+bin/
+lib/
+*.pyc
+*.pyo
+
+# System files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
--- a/dep/cpuinfo/deps/clog/CMakeLists.txt
+++ b/dep/cpuinfo/deps/clog/CMakeLists.txt
@@ -0,0 +1,42 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
+
+# ---[ Project and semantic versioning.
+PROJECT(clog C CXX)
+
+# ---[ Options.
+SET(CLOG_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
+SET_PROPERTY(CACHE CLOG_RUNTIME_TYPE PROPERTY STRINGS default static shared)
+IF(ANDROID)
+  OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" OFF)
+ELSE()
+  OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" ON)
+ENDIF()
+
+MACRO(CLOG_TARGET_RUNTIME_LIBRARY target)
+  IF(MSVC AND NOT CLOG_RUNTIME_TYPE STREQUAL "default")
+    IF(CLOG_RUNTIME_TYPE STREQUAL "shared")
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE
+        "/MD$<$<CONFIG:Debug>:d>")
+    ELSEIF(CLOG_RUNTIME_TYPE STREQUAL "static")
+      TARGET_COMPILE_OPTIONS(${target} PRIVATE
+        "/MT$<$<CONFIG:Debug>:d>")
+    ENDIF()
+  ENDIF()
+ENDMACRO()
+
+# ---[ clog library
+ADD_LIBRARY(clog STATIC src/clog.c)
+SET_TARGET_PROPERTIES(clog PROPERTIES
+  C_STANDARD 99
+  C_EXTENSIONS NO)
+CLOG_TARGET_RUNTIME_LIBRARY(clog)
+SET_TARGET_PROPERTIES(clog PROPERTIES PUBLIC_HEADER include/clog.h)
+TARGET_INCLUDE_DIRECTORIES(clog BEFORE PUBLIC include)
+IF(CLOG_LOG_TO_STDIO)
+  TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=1)
+ELSE()
+  TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=0)
+ENDIF()
+IF(ANDROID AND NOT CLOG_LOG_TO_STDIO)
+  TARGET_LINK_LIBRARIES(clog PRIVATE log)
+ENDIF()
--- a/dep/cpuinfo/deps/clog/LICENSE
+++ b/dep/cpuinfo/deps/clog/LICENSE
@@ -0,0 +1,26 @@
+Copyright (C) 2018 Marat Dukhan
+Copyright (c) 2017-2018 Facebook Inc.
+Copyright (c) 2017 Georgia Institute of Technology
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/dep/cpuinfo/deps/clog/README.md
+++ b/dep/cpuinfo/deps/clog/README.md
@@ -0,0 +1,57 @@
+# clog: C-style (a-la printf) logging library
+
+[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/deps/clog/LICENSE)
+
+C-style library for logging errors, warnings, information notes, and debug information.
+
+## Features
+
+- printf-style interface for formatting variadic parameters.
+- Separate functions for logging errors, warnings, information notes, and debug information.
+- Independent logging settings for different modules.
+- Logging to logcat on Android and stderr/stdout on other platforms.
+- Compatible with C99 and C++.
+- Covered with unit tests.
+
+## Example
+
+```c
+#include <clog.h>
+
+#ifndef MYMODULE_LOG_LEVEL
+    #define MYMODULE_LOG_LEVEL CLOG_DEBUG
+#endif
+
+CLOG_DEFINE_LOG_DEBUG(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
+CLOG_DEFINE_LOG_INFO(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
+CLOG_DEFINE_LOG_WARNING(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
+CLOG_DEFINE_LOG_ERROR(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
+
+...
+
+void some_function(...) {
+    int status = ...
+    if (status != 0) {
+        mymodule_log_error(
+            "something really bad happened: "
+            "operation failed with status %d", status);
+    }
+
+    uint32_t expected_zero = ...
+    if (expected_zero != 0) {
+        mymodule_log_warning(
+            "something suspicious happened (var = %"PRIu32"), "
+            "fall back to generic implementation", expected_zero);
+    }
+
+    void* usually_non_null = ...
+    if (usually_non_null == NULL) {
+        mymodule_log_info(
+            "something unusual, but common, happened: "
+            "enabling work-around");
+    }
+
+    float a = ...
+    mymodule_log_debug("computed a = %.7f", a);
+}
+```
--- a/dep/cpuinfo/deps/clog/include/clog.h
+++ b/dep/cpuinfo/deps/clog/include/clog.h
@@ -0,0 +1,100 @@
+#pragma once
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#define CLOG_NONE 0
+#define CLOG_FATAL 1
+#define CLOG_ERROR 2
+#define CLOG_WARNING 3
+#define CLOG_INFO 4
+#define CLOG_DEBUG 5
+
+#ifndef CLOG_VISIBILITY
+	#if defined(__ELF__)
+		#define CLOG_VISIBILITY __attribute__((__visibility__("internal")))
+	#elif defined(__MACH__)
+		#define CLOG_VISIBILITY __attribute__((__visibility__("hidden")))
+	#else
+		#define CLOG_VISIBILITY
+	#endif
+#endif
+
+#ifndef CLOG_ARGUMENTS_FORMAT
+	#if defined(__GNUC__)
+		#define CLOG_ARGUMENTS_FORMAT __attribute__((__format__(__printf__, 1, 2)))
+	#else
+		#define CLOG_ARGUMENTS_FORMAT
+	#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CLOG_VISIBILITY void clog_vlog_debug(const char* module, const char* format, va_list args);
+CLOG_VISIBILITY void clog_vlog_info(const char* module, const char* format, va_list args);
+CLOG_VISIBILITY void clog_vlog_warning(const char* module, const char* format, va_list args);
+CLOG_VISIBILITY void clog_vlog_error(const char* module, const char* format, va_list args);
+CLOG_VISIBILITY void clog_vlog_fatal(const char* module, const char* format, va_list args);
+
+#define CLOG_DEFINE_LOG_DEBUG(log_debug_function_name, module, level) \
+	CLOG_ARGUMENTS_FORMAT \
+	inline static void log_debug_function_name(const char* format, ...) { \
+		if (level >= CLOG_DEBUG) { \
+			va_list args; \
+			va_start(args, format); \
+			clog_vlog_debug(module, format, args); \
+			va_end(args); \
+		} \
+	}
+
+#define CLOG_DEFINE_LOG_INFO(log_info_function_name, module, level) \
+	CLOG_ARGUMENTS_FORMAT \
+	inline static void log_info_function_name(const char* format, ...) { \
+		if (level >= CLOG_INFO) { \
+			va_list args; \
+			va_start(args, format); \
+			clog_vlog_info(module, format, args); \
+			va_end(args); \
+		} \
+	}
+
+#define CLOG_DEFINE_LOG_WARNING(log_warning_function_name, module, level) \
+	CLOG_ARGUMENTS_FORMAT \
+	inline static void log_warning_function_name(const char* format, ...) { \
+		if (level >= CLOG_WARNING) { \
+			va_list args; \
+			va_start(args, format); \
+			clog_vlog_warning(module, format, args); \
+			va_end(args); \
+		} \
+	}
+
+#define CLOG_DEFINE_LOG_ERROR(log_error_function_name, module, level) \
+	CLOG_ARGUMENTS_FORMAT \
+	inline static void log_error_function_name(const char* format, ...) { \
+		if (level >= CLOG_ERROR) { \
+			va_list args; \
+			va_start(args, format); \
+			clog_vlog_error(module, format, args); \
+			va_end(args); \
+		} \
+	}
+
+#define CLOG_DEFINE_LOG_FATAL(log_fatal_function_name, module, level) \
+	CLOG_ARGUMENTS_FORMAT \
+	inline static void log_fatal_function_name(const char* format, ...) { \
+		if (level >= CLOG_FATAL) { \
+			va_list args; \
+			va_start(args, format); \
+			clog_vlog_fatal(module, format, args); \
+			va_end(args); \
+		} \
+		abort(); \
+	}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
--- a/dep/cpuinfo/deps/clog/src/clog.c
+++ b/dep/cpuinfo/deps/clog/src/clog.c
@@ -0,0 +1,423 @@
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#ifdef _WIN32
+	#include <windows.h>
+#else
+	#include <unistd.h>
+#endif
+#ifdef __ANDROID__
+	#include <android/log.h>
+#endif
+
+#ifndef CLOG_LOG_TO_STDIO
+	#ifdef __ANDROID__
+		#define CLOG_LOG_TO_STDIO 0
+	#else
+		#define CLOG_LOG_TO_STDIO 1
+	#endif
+#endif
+
+#include <clog.h>
+
+
+/* Messages up to this size are formatted entirely on-stack, and don't allocate heap memory */
+#define CLOG_STACK_BUFFER_SIZE 1024
+
+#define CLOG_FATAL_PREFIX "Fatal error: "
+#define CLOG_FATAL_PREFIX_LENGTH 13
+#define CLOG_FATAL_PREFIX_FORMAT "Fatal error in %s: "
+#define CLOG_ERROR_PREFIX "Error: "
+#define CLOG_ERROR_PREFIX_LENGTH 7
+#define CLOG_ERROR_PREFIX_FORMAT "Error in %s: "
+#define CLOG_WARNING_PREFIX "Warning: "
+#define CLOG_WARNING_PREFIX_LENGTH 9
+#define CLOG_WARNING_PREFIX_FORMAT "Warning in %s: "
+#define CLOG_INFO_PREFIX "Note: "
+#define CLOG_INFO_PREFIX_LENGTH 6
+#define CLOG_INFO_PREFIX_FORMAT "Note (%s): "
+#define CLOG_DEBUG_PREFIX "Debug: "
+#define CLOG_DEBUG_PREFIX_LENGTH 7
+#define CLOG_DEBUG_PREFIX_FORMAT "Debug (%s): "
+#define CLOG_SUFFIX_LENGTH 1
+
+void clog_vlog_fatal(const char* module, const char* format, va_list args) {
+	#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
+		__android_log_vprint(ANDROID_LOG_FATAL, module, format, args);
+	#else
+		char stack_buffer[CLOG_STACK_BUFFER_SIZE];
+		char* heap_buffer = NULL;
+		char* out_buffer = &stack_buffer[0];
+
+		/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
+		va_list args_copy;
+		va_copy(args_copy, args);
+
+		int prefix_chars = CLOG_FATAL_PREFIX_LENGTH;
+		if (module == NULL) {
+			memcpy(stack_buffer, CLOG_FATAL_PREFIX, CLOG_FATAL_PREFIX_LENGTH);
+		} else {
+			prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_FATAL_PREFIX_FORMAT, module);
+			if (prefix_chars < 0) {
+				/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
+				prefix_chars = 0;
+			}
+		}
+
+		int format_chars;
+		if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
+			/*
+			 * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
+			 * Do not even try to format the string into on-stack buffer.
+			 */
+			format_chars = vsnprintf(NULL, 0, format, args);
+		} else {
+			format_chars =
+				vsnprintf(
+					&stack_buffer[prefix_chars],
+					CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
+					format,
+					args);
+		}
+		if (format_chars < 0) {
+			/* Format error in the message: silently ignore this particular message. */
+			goto cleanup;
+		}
+		if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
+			/* Allocate a buffer on heap, and vsnprintf to this buffer */
+			heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+			if (heap_buffer == NULL) {
+				goto cleanup;
+			}
+
+			if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
+				/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
+				snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_FATAL_PREFIX_FORMAT, module);
+			} else {
+				/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
+				memcpy(heap_buffer, stack_buffer, prefix_chars);
+			}
+			vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
+			out_buffer = heap_buffer;
+		}
+		out_buffer[prefix_chars + format_chars] = '\n';
+		#ifdef _WIN32
+			DWORD bytes_written;
+			WriteFile(
+				GetStdHandle(STD_ERROR_HANDLE),
+				out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
+				&bytes_written, NULL);
+		#else
+			write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+		#endif
+
+cleanup:
+		free(heap_buffer);
+		va_end(args_copy);
+	#endif
+}
+
+void clog_vlog_error(const char* module, const char* format, va_list args) {
+	#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
+		__android_log_vprint(ANDROID_LOG_ERROR, module, format, args);
+	#else
+		char stack_buffer[CLOG_STACK_BUFFER_SIZE];
+		char* heap_buffer = NULL;
+		char* out_buffer = &stack_buffer[0];
+
+		/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
+		va_list args_copy;
+		va_copy(args_copy, args);
+
+		int prefix_chars = CLOG_ERROR_PREFIX_LENGTH;
+		if (module == NULL) {
+			memcpy(stack_buffer, CLOG_ERROR_PREFIX, CLOG_ERROR_PREFIX_LENGTH);
+		} else {
+			prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_ERROR_PREFIX_FORMAT, module);
+			if (prefix_chars < 0) {
+				/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
+				prefix_chars = 0;
+			}
+		}
+
+		int format_chars;
+		if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
+			/*
+			 * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
+			 * Do not even try to format the string into on-stack buffer.
+			 */
+			format_chars = vsnprintf(NULL, 0, format, args);
+		} else {
+			format_chars =
+				vsnprintf(
+					&stack_buffer[prefix_chars],
+					CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
+					format,
+					args);
+		}
+		if (format_chars < 0) {
+			/* Format error in the message: silently ignore this particular message. */
+			goto cleanup;
+		}
+		if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
+			/* Allocate a buffer on heap, and vsnprintf to this buffer */
+			heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+			if (heap_buffer == NULL) {
+				goto cleanup;
+			}
+
+			if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
+				/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
+				snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_ERROR_PREFIX_FORMAT, module);
+			} else {
+				/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
+				memcpy(heap_buffer, stack_buffer, prefix_chars);
+			}
+			vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
+			out_buffer = heap_buffer;
+		}
+		out_buffer[prefix_chars + format_chars] = '\n';
+		#ifdef _WIN32
+			DWORD bytes_written;
+			WriteFile(
+				GetStdHandle(STD_ERROR_HANDLE),
+				out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
+				&bytes_written, NULL);
+		#else
+			write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+		#endif
+
+cleanup:
+		free(heap_buffer);
+		va_end(args_copy);
+	#endif
+}
+
+void clog_vlog_warning(const char* module, const char* format, va_list args) {
+	#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
+		__android_log_vprint(ANDROID_LOG_WARN, module, format, args);
+	#else
+		char stack_buffer[CLOG_STACK_BUFFER_SIZE];
+		char* heap_buffer = NULL;
+		char* out_buffer = &stack_buffer[0];
+
+		/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
+		va_list args_copy;
+		va_copy(args_copy, args);
+
+		int prefix_chars = CLOG_WARNING_PREFIX_LENGTH;
+		if (module == NULL) {
+			memcpy(stack_buffer, CLOG_WARNING_PREFIX, CLOG_WARNING_PREFIX_LENGTH);
+		} else {
+			prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_WARNING_PREFIX_FORMAT, module);
+			if (prefix_chars < 0) {
+				/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
+				prefix_chars = 0;
+			}
+		}
+
+		int format_chars;
+		if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
+			/*
+			 * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
+			 * Do not even try to format the string into on-stack buffer.
+			 */
+			format_chars = vsnprintf(NULL, 0, format, args);
+		} else {
+			format_chars =
+				vsnprintf(
+					&stack_buffer[prefix_chars],
+					CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
+					format,
+					args);
+		}
+		if (format_chars < 0) {
+			/* Format error in the message: silently ignore this particular message. */
+			goto cleanup;
+		}
+		if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
+			/* Allocate a buffer on heap, and vsnprintf to this buffer */
+			heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+			if (heap_buffer == NULL) {
+				goto cleanup;
+			}
+
+			if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
+				/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
+				snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_WARNING_PREFIX_FORMAT, module);
+			} else {
+				/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
+				memcpy(heap_buffer, stack_buffer, prefix_chars);
+			}
+			vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
+			out_buffer = heap_buffer;
+		}
+		out_buffer[prefix_chars + format_chars] = '\n';
+		#ifdef _WIN32
+			DWORD bytes_written;
+			WriteFile(
+				GetStdHandle(STD_ERROR_HANDLE),
+				out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
+				&bytes_written, NULL);
+		#else
+			write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+		#endif
+
+cleanup:
+		free(heap_buffer);
+		va_end(args_copy);
+	#endif
+}
+
+void clog_vlog_info(const char* module, const char* format, va_list args) {
+	#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
+		__android_log_vprint(ANDROID_LOG_INFO, module, format, args);
+	#else
+		char stack_buffer[CLOG_STACK_BUFFER_SIZE];
+		char* heap_buffer = NULL;
+		char* out_buffer = &stack_buffer[0];
+
+		/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
+		va_list args_copy;
+		va_copy(args_copy, args);
+
+		int prefix_chars = CLOG_INFO_PREFIX_LENGTH;
+		if (module == NULL) {
+			memcpy(stack_buffer, CLOG_INFO_PREFIX, CLOG_INFO_PREFIX_LENGTH);
+		} else {
+			prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_INFO_PREFIX_FORMAT, module);
+			if (prefix_chars < 0) {
+				/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
+				prefix_chars = 0;
+			}
+		}
+
+		int format_chars;
+		if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
+			/*
+			 * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
+			 * Do not even try to format the string into on-stack buffer.
+			 */
+			format_chars = vsnprintf(NULL, 0, format, args);
+		} else {
+			format_chars =
+				vsnprintf(
+					&stack_buffer[prefix_chars],
+					CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
+					format,
+					args);
+		}
+		if (format_chars < 0) {
+			/* Format error in the message: silently ignore this particular message. */
+			goto cleanup;
+		}
+		if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
+			/* Allocate a buffer on heap, and vsnprintf to this buffer */
+			heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+			if (heap_buffer == NULL) {
+				goto cleanup;
+			}
+
+			if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
+				/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
+				snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_INFO_PREFIX_FORMAT, module);
+			} else {
+				/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
+				memcpy(heap_buffer, stack_buffer, prefix_chars);
+			}
+			vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
+			out_buffer = heap_buffer;
+		}
+		out_buffer[prefix_chars + format_chars] = '\n';
+		#ifdef _WIN32
+			DWORD bytes_written;
+			WriteFile(
+				GetStdHandle(STD_OUTPUT_HANDLE),
+				out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
+				&bytes_written, NULL);
+		#else
+			write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+		#endif
+
+cleanup:
+		free(heap_buffer);
+		va_end(args_copy);
+	#endif
+}
+
+void clog_vlog_debug(const char* module, const char* format, va_list args) {
+	#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
+		__android_log_vprint(ANDROID_LOG_DEBUG, module, format, args);
+	#else
+		char stack_buffer[CLOG_STACK_BUFFER_SIZE];
+		char* heap_buffer = NULL;
+		char* out_buffer = &stack_buffer[0];
+
+		/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
+		va_list args_copy;
+		va_copy(args_copy, args);
+
+		int prefix_chars = CLOG_DEBUG_PREFIX_LENGTH;
+		if (module == NULL) {
+			memcpy(stack_buffer, CLOG_DEBUG_PREFIX, CLOG_DEBUG_PREFIX_LENGTH);
+		} else {
+			prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_DEBUG_PREFIX_FORMAT, module);
+			if (prefix_chars < 0) {
+				/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
+				prefix_chars = 0;
+			}
+		}
+
+		int format_chars;
+		if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
+			/*
+			 * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
+			 * Do not even try to format the string into on-stack buffer.
+			 */
+			format_chars = vsnprintf(NULL, 0, format, args);
+		} else {
+			format_chars =
+				vsnprintf(
+					&stack_buffer[prefix_chars],
+					CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
+					format,
+					args);
+		}
+		if (format_chars < 0) {
+			/* Format error in the message: silently ignore this particular message. */
+			goto cleanup;
+		}
+		if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
+			/* Allocate a buffer on heap, and vsnprintf to this buffer */
+			heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+			if (heap_buffer == NULL) {
+				goto cleanup;
+			}
+
+			if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
+				/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
+				snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_DEBUG_PREFIX_FORMAT, module);
+			} else {
+				/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
+				memcpy(heap_buffer, stack_buffer, prefix_chars);
+			}
+			vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
+			out_buffer = heap_buffer;
+		}
+		out_buffer[prefix_chars + format_chars] = '\n';
+		#ifdef _WIN32
+			DWORD bytes_written;
+			WriteFile(
+				GetStdHandle(STD_OUTPUT_HANDLE),
+				out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
+				&bytes_written, NULL);
+		#else
+			write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
+		#endif
+
+cleanup:
+		free(heap_buffer);
+		va_end(args_copy);
+	#endif
+}
--- a/dep/cpuinfo/include/cpuinfo-mock.h
+++ b/dep/cpuinfo/include/cpuinfo-mock.h
@@ -0,0 +1,78 @@
+#pragma once
+#ifndef CPUINFO_MOCK_H
+#define CPUINFO_MOCK_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#if defined(__linux__)
+	#include <sys/types.h>
+#endif
+
+#if !defined(CPUINFO_MOCK) || !(CPUINFO_MOCK)
+	#error This header is intended only for test use
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#if CPUINFO_ARCH_ARM
+	void CPUINFO_ABI cpuinfo_set_fpsid(uint32_t fpsid);
+	void CPUINFO_ABI cpuinfo_set_wcid(uint32_t wcid);
+#endif /* CPUINFO_ARCH_ARM */
+
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+	struct cpuinfo_mock_cpuid {
+		uint32_t input_eax;
+		uint32_t input_ecx;
+		uint32_t eax;
+		uint32_t ebx;
+		uint32_t ecx;
+		uint32_t edx;
+	};
+
+	void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries);
+	void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]);
+	void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]);
+#endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */
+
+struct cpuinfo_mock_file {
+	const char* path;
+	size_t size;
+	const char* content;
+	size_t offset;
+};
+
+struct cpuinfo_mock_property {
+	const char* key;
+	const char* value;
+};
+
+#if defined(__linux__)
+	void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files);
+	int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag);
+	int CPUINFO_ABI cpuinfo_mock_close(int fd);
+	ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity);
+
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap);
+	#endif
+	#if CPUINFO_ARCH_ARM
+		void CPUINFO_ABI cpuinfo_set_hwcap2(uint32_t hwcap2);
+	#endif
+#endif
+
+#if defined(__ANDROID__)
+	void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties);
+	void CPUINFO_ABI cpuinfo_mock_gl_renderer(const char* renderer);
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* CPUINFO_MOCK_H */
--- a/dep/cpuinfo/include/cpuinfo.h
+++ b/dep/cpuinfo/include/cpuinfo.h
--- a/dep/cpuinfo/src/api.c
+++ b/dep/cpuinfo/src/api.c
@@ -0,0 +1,410 @@
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#ifdef __linux__
+	#include <linux/api.h>
+
+	#include <unistd.h>
+	#include <sys/syscall.h>
+	#if !defined(__NR_getcpu)
+		#include <asm-generic/unistd.h>
+	#endif
+#endif
+
+bool cpuinfo_is_initialized = false;
+
+struct cpuinfo_processor* cpuinfo_processors = NULL;
+struct cpuinfo_core* cpuinfo_cores = NULL;
+struct cpuinfo_cluster* cpuinfo_clusters = NULL;
+struct cpuinfo_package* cpuinfo_packages = NULL;
+struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max] = { NULL };
+
+uint32_t cpuinfo_processors_count = 0;
+uint32_t cpuinfo_cores_count = 0;
+uint32_t cpuinfo_clusters_count = 0;
+uint32_t cpuinfo_packages_count = 0;
+uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
+uint32_t cpuinfo_max_cache_size = 0;
+
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+	struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL;
+	uint32_t cpuinfo_uarchs_count = 0;
+#else
+	struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown };
+#endif
+
+#ifdef __linux__
+	uint32_t cpuinfo_linux_cpu_max = 0;
+	const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
+	const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL;
+	#endif
+#endif
+
+
+const struct cpuinfo_processor* cpuinfo_get_processors(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors");
+	}
+	return cpuinfo_processors;
+}
+
+const struct cpuinfo_core* cpuinfo_get_cores(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
+	}
+	return cpuinfo_cores;
+}
+
+const struct cpuinfo_cluster* cpuinfo_get_clusters(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters");
+	}
+	return cpuinfo_clusters;
+}
+
+const struct cpuinfo_package* cpuinfo_get_packages(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages");
+	}
+	return cpuinfo_packages;
+}
+
+const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() {
+	if (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs");
+	}
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		return cpuinfo_uarchs;
+	#else
+		return &cpuinfo_global_uarch;
+	#endif
+}
+
+const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) {
+		return NULL;
+	}
+	return &cpuinfo_processors[index];
+}
+
+const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) {
+		return NULL;
+	}
+	return &cpuinfo_cores[index];
+}
+
+const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) {
+		return NULL;
+	}
+	return &cpuinfo_clusters[index];
+}
+
+const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) {
+		return NULL;
+	}
+	return &cpuinfo_packages[index];
+}
+
+const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) {
+	if (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch");
+	}
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) {
+			return NULL;
+		}
+		return &cpuinfo_uarchs[index];
+	#else
+		if CPUINFO_UNLIKELY(index != 0) {
+			return NULL;
+		}
+		return &cpuinfo_global_uarch;
+	#endif
+}
+
+uint32_t cpuinfo_get_processors_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count");
+	}
+	return cpuinfo_processors_count;
+}
+
+uint32_t cpuinfo_get_cores_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count");
+	}
+	return cpuinfo_cores_count;
+}
+
+uint32_t cpuinfo_get_clusters_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count");
+	}
+	return cpuinfo_clusters_count;
+}
+
+uint32_t cpuinfo_get_packages_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count");
+	}
+	return cpuinfo_packages_count;
+}
+
+uint32_t cpuinfo_get_uarchs_count(void) {
+	if (!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count");
+	}
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		return cpuinfo_uarchs_count;
+	#else
+		return 1;
+	#endif
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_1i];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_1d];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_2];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_3];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches");
+	}
+	return cpuinfo_cache[cpuinfo_cache_level_4];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_1i][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_1d][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_2][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_3][index];
+}
+
+const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache");
+	}
+	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) {
+		return NULL;
+	}
+	return &cpuinfo_cache[cpuinfo_cache_level_4][index];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_1i];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_1d];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_2];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_3];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count");
+	}
+	return cpuinfo_cache_count[cpuinfo_cache_level_4];
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size");
+	}
+	return cpuinfo_max_cache_size;
+}
+
+const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
+	}
+	#ifdef __linux__
+		/* Initializing this variable silences a MemorySanitizer error. */
+		unsigned cpu = 0;
+		if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+			return 0;
+		}
+		if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+			return 0;
+		}
+		return cpuinfo_linux_cpu_to_processor_map[cpu];
+	#else
+		return NULL;
+	#endif
+}
+
+const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
+	}
+	#ifdef __linux__
+		/* Initializing this variable silences a MemorySanitizer error. */
+		unsigned cpu = 0;
+		if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+			return 0;
+		}
+		if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+			return 0;
+		}
+		return cpuinfo_linux_cpu_to_core_map[cpu];
+	#else
+		return NULL;
+	#endif
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index");
+	}
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		#ifdef __linux__
+			if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+				/* Special case: avoid syscall on systems with only a single type of cores */
+				return 0;
+			}
+
+			/* General case */
+			/* Initializing this variable silences a MemorySanitizer error. */
+			unsigned cpu = 0;
+			if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+				return 0;
+			}
+			if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+				return 0;
+			}
+			return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+		#else
+			/* Fallback: pretend to be on the big core. */
+			return 0;
+		#endif
+	#else
+		/* Only ARM/ARM64 processors may include cores of different types in the same package. */
+		return 0;
+	#endif
+}
+
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
+	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
+	}
+	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+		#ifdef __linux__
+			if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+				/* Special case: avoid syscall on systems with only a single type of cores */
+				return 0;
+			}
+
+			/* General case */
+			/* Initializing this variable silences a MemorySanitizer error. */
+			unsigned cpu = 0;
+			if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+				return default_uarch_index;
+			}
+			if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+				return default_uarch_index;
+			}
+			return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+		#else
+			/* Fallback: no API to query current core, use default uarch index. */
+			return default_uarch_index;
+		#endif
+	#else
+		/* Only ARM/ARM64 processors may include cores of different types in the same package. */
+		return 0;
+	#endif
+}
--- a/dep/cpuinfo/src/arm/android/api.h
+++ b/dep/cpuinfo/src/arm/android/api.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+#include <arm/api.h>
+#include <arm/linux/api.h>
+
+enum cpuinfo_android_chipset_property {
+	cpuinfo_android_chipset_property_proc_cpuinfo_hardware = 0,
+	cpuinfo_android_chipset_property_ro_product_board,
+	cpuinfo_android_chipset_property_ro_board_platform,
+	cpuinfo_android_chipset_property_ro_mediatek_platform,
+	cpuinfo_android_chipset_property_ro_arch,
+	cpuinfo_android_chipset_property_ro_chipname,
+	cpuinfo_android_chipset_property_ro_hardware_chipname,
+	cpuinfo_android_chipset_property_max,
+};
+
+CPUINFO_INTERNAL void cpuinfo_arm_android_parse_properties(
+	struct cpuinfo_android_properties properties[restrict static 1]);
--- a/dep/cpuinfo/src/arm/android/properties.c
+++ b/dep/cpuinfo/src/arm/android/properties.c
@@ -0,0 +1,67 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+
+#include <sys/system_properties.h>
+
+#include <linux/api.h>
+#include <arm/android/api.h>
+#include <arm/linux/api.h>
+#include <cpuinfo/log.h>
+
+#if CPUINFO_MOCK
+	#include <cpuinfo-mock.h>
+
+	static struct cpuinfo_mock_property* cpuinfo_mock_properties = NULL;
+
+	void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties) {
+		cpuinfo_log_info("Android properties mocking enabled");
+		cpuinfo_mock_properties = properties;
+	}
+
+	static int cpuinfo_android_property_get(const char* key, char* value) {
+		if (cpuinfo_mock_properties != NULL) {
+			for (const struct cpuinfo_mock_property* prop = cpuinfo_mock_properties; prop->key != NULL; prop++) {
+				if (strncmp(key, prop->key, CPUINFO_BUILD_PROP_NAME_MAX) == 0) {
+					strncpy(value, prop->value, CPUINFO_BUILD_PROP_VALUE_MAX);
+					return (int) strnlen(prop->value, CPUINFO_BUILD_PROP_VALUE_MAX);
+				}
+			}
+		}
+		*value = '\0';
+		return 0;
+	}
+#else
+	static inline int cpuinfo_android_property_get(const char* key, char* value) {
+		return __system_property_get(key, value);
+	}
+#endif
+
+void cpuinfo_arm_android_parse_properties(struct cpuinfo_android_properties properties[restrict static 1]) {
+	const int ro_product_board_length =
+		cpuinfo_android_property_get("ro.product.board", properties->ro_product_board);
+	cpuinfo_log_debug("read ro.product.board = \"%.*s\"", ro_product_board_length, properties->ro_product_board);
+
+	const int ro_board_platform_length =
+		cpuinfo_android_property_get("ro.board.platform", properties->ro_board_platform);
+	cpuinfo_log_debug("read ro.board.platform = \"%.*s\"", ro_board_platform_length, properties->ro_board_platform);
+
+	const int ro_mediatek_platform_length =
+		cpuinfo_android_property_get("ro.mediatek.platform", properties->ro_mediatek_platform);
+	cpuinfo_log_debug("read ro.mediatek.platform = \"%.*s\"",
+		ro_mediatek_platform_length, properties->ro_mediatek_platform);
+
+	const int ro_arch_length =
+		cpuinfo_android_property_get("ro.arch", properties->ro_arch);
+	cpuinfo_log_debug("read ro.arch = \"%.*s\"", ro_arch_length, properties->ro_arch);
+
+	const int ro_chipname_length =
+		cpuinfo_android_property_get("ro.chipname", properties->ro_chipname);
+	cpuinfo_log_debug("read ro.chipname = \"%.*s\"", ro_chipname_length, properties->ro_chipname);
+
+	const int ro_hardware_chipname_length =
+		cpuinfo_android_property_get("ro.hardware.chipname", properties->ro_hardware_chipname);
+	cpuinfo_log_debug("read ro.hardware.chipname = \"%.*s\"", ro_hardware_chipname_length, properties->ro_hardware_chipname);
+}
--- a/dep/cpuinfo/src/arm/api.h
+++ b/dep/cpuinfo/src/arm/api.h
@@ -0,0 +1,154 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+
+enum cpuinfo_arm_chipset_vendor {
+	cpuinfo_arm_chipset_vendor_unknown = 0,
+	cpuinfo_arm_chipset_vendor_qualcomm,
+	cpuinfo_arm_chipset_vendor_mediatek,
+	cpuinfo_arm_chipset_vendor_samsung,
+	cpuinfo_arm_chipset_vendor_hisilicon,
+	cpuinfo_arm_chipset_vendor_actions,
+	cpuinfo_arm_chipset_vendor_allwinner,
+	cpuinfo_arm_chipset_vendor_amlogic,
+	cpuinfo_arm_chipset_vendor_broadcom,
+	cpuinfo_arm_chipset_vendor_lg,
+	cpuinfo_arm_chipset_vendor_leadcore,
+	cpuinfo_arm_chipset_vendor_marvell,
+	cpuinfo_arm_chipset_vendor_mstar,
+	cpuinfo_arm_chipset_vendor_novathor,
+	cpuinfo_arm_chipset_vendor_nvidia,
+	cpuinfo_arm_chipset_vendor_pinecone,
+	cpuinfo_arm_chipset_vendor_renesas,
+	cpuinfo_arm_chipset_vendor_rockchip,
+	cpuinfo_arm_chipset_vendor_spreadtrum,
+	cpuinfo_arm_chipset_vendor_telechips,
+	cpuinfo_arm_chipset_vendor_texas_instruments,
+	cpuinfo_arm_chipset_vendor_wondermedia,
+	cpuinfo_arm_chipset_vendor_max,
+};
+
+enum cpuinfo_arm_chipset_series {
+	cpuinfo_arm_chipset_series_unknown = 0,
+	cpuinfo_arm_chipset_series_qualcomm_qsd,
+	cpuinfo_arm_chipset_series_qualcomm_msm,
+	cpuinfo_arm_chipset_series_qualcomm_apq,
+	cpuinfo_arm_chipset_series_qualcomm_snapdragon,
+	cpuinfo_arm_chipset_series_mediatek_mt,
+	cpuinfo_arm_chipset_series_samsung_exynos,
+	cpuinfo_arm_chipset_series_hisilicon_k3v,
+	cpuinfo_arm_chipset_series_hisilicon_hi,
+	cpuinfo_arm_chipset_series_hisilicon_kirin,
+	cpuinfo_arm_chipset_series_actions_atm,
+	cpuinfo_arm_chipset_series_allwinner_a,
+	cpuinfo_arm_chipset_series_amlogic_aml,
+	cpuinfo_arm_chipset_series_amlogic_s,
+	cpuinfo_arm_chipset_series_broadcom_bcm,
+	cpuinfo_arm_chipset_series_lg_nuclun,
+	cpuinfo_arm_chipset_series_leadcore_lc,
+	cpuinfo_arm_chipset_series_marvell_pxa,
+	cpuinfo_arm_chipset_series_mstar_6a,
+	cpuinfo_arm_chipset_series_novathor_u,
+	cpuinfo_arm_chipset_series_nvidia_tegra_t,
+	cpuinfo_arm_chipset_series_nvidia_tegra_ap,
+	cpuinfo_arm_chipset_series_nvidia_tegra_sl,
+	cpuinfo_arm_chipset_series_pinecone_surge_s,
+	cpuinfo_arm_chipset_series_renesas_mp,
+	cpuinfo_arm_chipset_series_rockchip_rk,
+	cpuinfo_arm_chipset_series_spreadtrum_sc,
+	cpuinfo_arm_chipset_series_telechips_tcc,
+	cpuinfo_arm_chipset_series_texas_instruments_omap,
+	cpuinfo_arm_chipset_series_wondermedia_wm,
+	cpuinfo_arm_chipset_series_max,
+};
+
+#define CPUINFO_ARM_CHIPSET_SUFFIX_MAX 8
+
+struct cpuinfo_arm_chipset {
+	enum cpuinfo_arm_chipset_vendor vendor;
+	enum cpuinfo_arm_chipset_series series;
+	uint32_t model;
+	char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX];
+};
+
+#define CPUINFO_ARM_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX
+
+#ifndef __cplusplus
+#ifndef _MSC_VER
+	CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string(
+		const struct cpuinfo_arm_chipset chipset[restrict static 1],
+		char name[restrict static CPUINFO_ARM_CHIPSET_NAME_MAX]);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset(
+		struct cpuinfo_arm_chipset chipset[restrict static 1], uint32_t cores, uint32_t max_cpu_freq_max);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch(
+		uint32_t midr,
+	#if CPUINFO_ARCH_ARM
+		bool has_vfpv4,
+	#endif
+		enum cpuinfo_vendor vendor[restrict static 1],
+		enum cpuinfo_uarch uarch[restrict static 1]);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+		enum cpuinfo_uarch uarch,
+		uint32_t cluster_cores,
+		uint32_t midr,
+		const struct cpuinfo_arm_chipset chipset[restrict static 1],
+		uint32_t cluster_id,
+		uint32_t arch_version,
+		struct cpuinfo_cache l1i[restrict static 1],
+		struct cpuinfo_cache l1d[restrict static 1],
+		struct cpuinfo_cache l2[restrict static 1],
+		struct cpuinfo_cache l3[restrict static 1]);
+
+	CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
+		const struct cpuinfo_processor processor[1]);
+#else
+	CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string(
+		const struct cpuinfo_arm_chipset chipset[1],
+		char name[CPUINFO_ARM_CHIPSET_NAME_MAX]);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset(
+		struct cpuinfo_arm_chipset chipset[1], uint32_t cores, uint32_t max_cpu_freq_max);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch(
+		uint32_t midr,
+	#if CPUINFO_ARCH_ARM
+		bool has_vfpv4,
+	#endif
+		enum cpuinfo_vendor vendor[1],
+		enum cpuinfo_uarch uarch[1]);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+		enum cpuinfo_uarch uarch,
+		uint32_t cluster_cores,
+		uint32_t midr,
+		const struct cpuinfo_arm_chipset chipset[1],
+		uint32_t cluster_id,
+		uint32_t arch_version,
+		struct cpuinfo_cache l1i[1],
+		struct cpuinfo_cache l1d[1],
+		struct cpuinfo_cache l2[1],
+		struct cpuinfo_cache l3[1]);
+
+	CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
+		const struct cpuinfo_processor processor[1]);
+#endif
+#else /* defined(__cplusplus) */
+	CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+		enum cpuinfo_uarch uarch,
+		uint32_t cluster_cores,
+		uint32_t midr,
+		const struct cpuinfo_arm_chipset chipset[1],
+		uint32_t cluster_id,
+		uint32_t arch_version,
+		struct cpuinfo_cache l1i[1],
+		struct cpuinfo_cache l1d[1],
+		struct cpuinfo_cache l2[1],
+		struct cpuinfo_cache l3[1]);
+#endif
--- a/dep/cpuinfo/src/arm/cache.c
+++ b/dep/cpuinfo/src/arm/cache.c
--- a/dep/cpuinfo/src/arm/linux/aarch32-isa.c
+++ b/dep/cpuinfo/src/arm/linux/aarch32-isa.c
@@ -0,0 +1,267 @@
+#include <stdint.h>
+
+#if CPUINFO_MOCK
+	#include <cpuinfo-mock.h>
+#endif
+#include <arm/linux/api.h>
+#include <arm/linux/cp.h>
+#include <arm/midr.h>
+#include <cpuinfo/log.h>
+
+
+#if CPUINFO_MOCK
+	uint32_t cpuinfo_arm_fpsid = 0;
+	uint32_t cpuinfo_arm_mvfr0 = 0;
+	uint32_t cpuinfo_arm_wcid = 0;
+
+	void cpuinfo_set_fpsid(uint32_t fpsid) {
+		cpuinfo_arm_fpsid = fpsid;
+	}
+
+	void cpuinfo_set_wcid(uint32_t wcid) {
+		cpuinfo_arm_wcid = wcid;
+	}
+#endif
+
+
+void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+	uint32_t features,
+	uint32_t features2,
+	uint32_t midr,
+	uint32_t architecture_version,
+	uint32_t architecture_flags,
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	struct cpuinfo_arm_isa isa[restrict static 1])
+{
+	if (architecture_version >= 8) {
+		/*
+		 * ARMv7 code running on ARMv8: IDIV, VFP, NEON are always supported,
+		 * but may be not reported in /proc/cpuinfo features.
+		 */
+		isa->armv5e  = true;
+		isa->armv6   = true;
+		isa->armv6k  = true;
+		isa->armv7   = true;
+		isa->armv7mp = true;
+		isa->armv8   = true;
+		isa->thumb  = true;
+		isa->thumb2 = true;
+		isa->idiv = true;
+		isa->vfpv3 = true;
+		isa->d32 = true;
+		isa->fp16 = true;
+		isa->fma = true;
+		isa->neon = true;
+
+		/*
+		 * NEON FP16 compute extension and VQRDMLAH/VQRDMLSH instructions are not indicated in /proc/cpuinfo.
+		 * Use a MIDR-based heuristic to whitelist processors known to support it:
+		 * - Processors with Cortex-A55 cores
+		 * - Processors with Cortex-A65 cores
+		 * - Processors with Cortex-A75 cores
+		 * - Processors with Cortex-A76 cores
+		 * - Processors with Cortex-A77 cores
+		 * - Processors with Exynos M4 cores
+		 * - Processors with Exynos M5 cores
+		 * - Neoverse N1 cores
+		 */
+		if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
+			/* Only little cores of Exynos 9810 support FP16 & RDM */
+			cpuinfo_log_warning("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
+		} else {
+			switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case UINT32_C(0x4100D050): /* Cortex-A55 */
+				case UINT32_C(0x4100D060): /* Cortex-A65 */
+				case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+				case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+				case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+				case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+				case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+				case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
+				case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
+				case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
+				case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
+				case UINT32_C(0x53000030): /* Exynos M4 */
+				case UINT32_C(0x53000040): /* Exynos M5 */
+					isa->fp16arith = true;
+					isa->rdm = true;
+					break;
+			}
+		}
+
+		/*
+		 * NEON VDOT instructions are not indicated in /proc/cpuinfo.
+		 * Use a MIDR-based heuristic to whitelist processors known to support it.
+		 */
+		switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+			case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+			case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+			case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+			case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+			case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
+			case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
+			case UINT32_C(0x53000030): /* Exynos-M4 */
+			case UINT32_C(0x53000040): /* Exynos-M5 */
+				isa->dot = true;
+				break;
+			case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */
+				isa->dot = !!(midr_get_variant(midr) >= 1);
+				break;
+			case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */
+				isa->dot = !!(midr_get_variant(midr) >= 2);
+				break;
+		}
+	} else {
+		/* ARMv7 or lower: use feature flags to detect optional features */
+
+		/*
+		 * ARM11 (ARM 1136/1156/1176/11 MPCore) processors can report v7 architecture
+		 * even though they support only ARMv6 instruction set.
+		 */
+		if (architecture_version == 7 && midr_is_arm11(midr)) {
+			cpuinfo_log_warning("kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)");
+			architecture_version = 6;
+		}
+
+		if (architecture_version < 7) {
+			const uint32_t armv7_features_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | CPUINFO_ARM_LINUX_FEATURE_VFPD32 |
+				CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON | CPUINFO_ARM_LINUX_FEATURE_IDIVT | CPUINFO_ARM_LINUX_FEATURE_IDIVA;
+			if (features & armv7_features_mask) {
+				architecture_version = 7;
+			}
+		}
+		if ((architecture_version >= 6) || (features & CPUINFO_ARM_LINUX_FEATURE_EDSP) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_E)) {
+			isa->armv5e = true;
+		}
+		if (architecture_version >= 6) {
+			isa->armv6 = true;
+		}
+		if (architecture_version >= 7) {
+			isa->armv6k = true;
+			isa->armv7 = true;
+
+			/*
+			 * ARMv7 MP extension (PLDW instruction) is not indicated in /proc/cpuinfo.
+			 * Use heuristic list of supporting processors:
+			 * - Processors supporting UDIV/SDIV instructions ("idiva" + "idivt" features in /proc/cpuinfo)
+			 * - Cortex-A5
+			 * - Cortex-A9
+			 * - Dual-Core Scorpion
+			 * - Krait (supports UDIV/SDIV, but kernels may not report it in /proc/cpuinfo)
+			 *
+			 * TODO: check single-core Qualcomm Scorpion.
+			 */
+			switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case UINT32_C(0x4100C050): /* Cortex-A5 */
+				case UINT32_C(0x4100C090): /* Cortex-A9 */
+				case UINT32_C(0x510002D0): /* Scorpion (dual-core) */
+				case UINT32_C(0x510004D0): /* Krait (dual-core) */
+				case UINT32_C(0x510006F0): /* Krait (quad-core) */
+					isa->armv7mp = true;
+					break;
+				default:
+					/* In practice IDIV instruction implies ARMv7+MP ISA */
+					isa->armv7mp = (features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV;
+					break;
+			}
+		}
+
+		if (features & CPUINFO_ARM_LINUX_FEATURE_IWMMXT) {
+			const uint32_t wcid = read_wcid();
+			cpuinfo_log_debug("WCID = 0x%08"PRIx32, wcid);
+			const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF);
+			if (coprocessor_type >= 0x10) {
+				isa->wmmx = true;
+				if (coprocessor_type >= 0x20) {
+					isa->wmmx2 = true;
+				}
+			} else {
+				cpuinfo_log_warning("WMMX ISA disabled: OS reported iwmmxt feature, "
+					"but WCID coprocessor type 0x%"PRIx32" indicates no WMMX support",
+					coprocessor_type);
+			}
+		}
+
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_THUMB) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_T)) {
+			isa->thumb = true;
+
+			/*
+			 * There is no separate feature flag for Thumb 2.
+			 * All ARMv7 processors and ARM 1156 support Thumb 2.
+			 */
+			if (architecture_version >= 7 || midr_is_arm1156(midr)) {
+				isa->thumb2 = true;
+			}
+		}
+		if (features & CPUINFO_ARM_LINUX_FEATURE_THUMBEE) {
+			isa->thumbee = true;
+		}
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_JAVA) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_J)) {
+			isa->jazelle = true;
+		}
+
+		/* Qualcomm Krait may have buggy kernel configuration that doesn't report IDIV */
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV || midr_is_krait(midr)) {
+			isa->idiv = true;
+		}
+
+		const uint32_t vfp_mask = \
+			CPUINFO_ARM_LINUX_FEATURE_VFP | CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \
+			CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+		if (features & vfp_mask) {
+			const uint32_t vfpv3_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \
+				CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+			if ((architecture_version >= 7) || (features & vfpv3_mask)) {
+				isa->vfpv3 = true;
+
+				const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
+				if (features & d32_mask) {
+					isa->d32 = true;
+				}
+			} else {
+				#if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
+					isa->vfpv3 = true;
+				#else
+					const uint32_t fpsid = read_fpsid();
+					cpuinfo_log_debug("FPSID = 0x%08"PRIx32, fpsid);
+					const uint32_t subarchitecture = (fpsid >> 16) & UINT32_C(0x7F);
+					if (subarchitecture >= 0x01) {
+						isa->vfpv2 = true;
+					}
+				#endif
+			}
+		}
+		if (features & CPUINFO_ARM_LINUX_FEATURE_NEON) {
+			isa->neon = true;
+		}
+
+		/*
+		 * There is no separate feature flag for FP16 support.
+		 * VFPv4 implies VFPv3-FP16 support (and in practice, NEON-HP as well).
+		 * Additionally, ARM Cortex-A9 and Qualcomm Scorpion support FP16.
+		 */
+		if ((features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) || midr_is_cortex_a9(midr) || midr_is_scorpion(midr)) {
+			isa->fp16 = true;
+		}
+
+		if (features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) {
+			isa->fma = true;
+		}
+	}
+
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_AES) {
+		isa->aes = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_PMULL) {
+		isa->pmull = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA1) {
+		isa->sha1 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA2) {
+		isa->sha2 = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_CRC32) {
+		isa->crc32 = true;
+	}
+}
--- a/dep/cpuinfo/src/arm/linux/aarch64-isa.c
+++ b/dep/cpuinfo/src/arm/linux/aarch64-isa.c
@@ -0,0 +1,127 @@
+#include <stdint.h>
+
+#include <arm/linux/api.h>
+#include <cpuinfo/log.h>
+
+
+void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
+	uint32_t features,
+	uint32_t features2,
+	uint32_t midr,
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	struct cpuinfo_arm_isa isa[restrict static 1])
+{
+	if (features & CPUINFO_ARM_LINUX_FEATURE_AES) {
+		isa->aes = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_PMULL) {
+		isa->pmull = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_SHA1) {
+		isa->sha1 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_SHA2) {
+		isa->sha2 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_CRC32) {
+		isa->crc32 = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_ATOMICS) {
+		isa->atomics = true;
+	}
+
+	/*
+	 * Some phones ship with an old kernel configuration that doesn't report NEON FP16 compute extension and SQRDMLAH/SQRDMLSH/UQRDMLAH/UQRDMLSH instructions.
+	 * Use a MIDR-based heuristic to whitelist processors known to support it:
+	 * - Processors with Cortex-A55 cores
+	 * - Processors with Cortex-A65 cores
+	 * - Processors with Cortex-A75 cores
+	 * - Processors with Cortex-A76 cores
+	 * - Processors with Cortex-A77 cores
+	 * - Processors with Exynos M4 cores
+	 * - Processors with Exynos M5 cores
+	 * - Neoverse N1 cores
+	 */
+	if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
+		/* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */
+		cpuinfo_log_warning("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
+	} else {
+		const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
+		switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+			case UINT32_C(0x4100D050): /* Cortex-A55 */
+			case UINT32_C(0x4100D060): /* Cortex-A65 */
+			case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+			case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+			case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+			case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+			case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+			case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
+			case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
+			case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
+			case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
+			case UINT32_C(0x53000030): /* Exynos M4 */
+			case UINT32_C(0x53000040): /* Exynos M5 */
+				isa->fp16arith = true;
+				isa->rdm = true;
+				break;
+			default:
+				if ((features & fp16arith_mask) == fp16arith_mask) {
+					isa->fp16arith = true;
+				} else if (features & CPUINFO_ARM_LINUX_FEATURE_FPHP) {
+					cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for scalar operations");
+				} else if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDHP) {
+					cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for SIMD operations");
+				}
+				if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM) {
+					isa->rdm = true;
+				}
+				break;
+		}
+	}
+
+	/*
+	 * Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions.
+	 * Use a MIDR-based heuristic to whitelist processors known to support it.
+	 */
+	switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+		case UINT32_C(0x4100D060): /* Cortex-A65 */
+		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+		case UINT32_C(0x4100D0C0): /* Neoverse N1 */
+		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+		case UINT32_C(0x4100D4A0): /* Neoverse E1 */
+		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+		case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
+		case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
+		case UINT32_C(0x53000030): /* Exynos-M4 */
+		case UINT32_C(0x53000040): /* Exynos-M5 */
+			isa->dot = true;
+			break;
+		case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */
+			isa->dot = !!(midr_get_variant(midr) >= 1);
+			break;
+		case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */
+			isa->dot = !!(midr_get_variant(midr) >= 2);
+			break;
+		default:
+			if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) {
+				isa->dot = true;
+			}
+			break;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) {
+		isa->jscvt = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) {
+		isa->jscvt = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_FCMA) {
+		isa->fcma = true;
+	}
+	if (features & CPUINFO_ARM_LINUX_FEATURE_SVE) {
+		isa->sve = true;
+	}
+	if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
+		isa->sve2 = true;
+	}
+}
--- a/dep/cpuinfo/src/arm/linux/api.h
+++ b/dep/cpuinfo/src/arm/linux/api.h
@@ -0,0 +1,384 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/common.h>
+#include <arm/midr.h>
+#include <arm/api.h>
+#include <linux/api.h>
+
+/* No hard limit in the kernel, maximum length observed on non-rogue kernels is 64 */
+#define CPUINFO_HARDWARE_VALUE_MAX 64
+/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */
+#define CPUINFO_REVISION_VALUE_MAX 9
+
+#ifdef __ANDROID__
+	/* As per include/sys/system_properties.h in Android NDK */
+	#define CPUINFO_BUILD_PROP_NAME_MAX  32
+	#define CPUINFO_BUILD_PROP_VALUE_MAX 92
+
+	struct cpuinfo_android_properties {
+		char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
+		char ro_product_board[CPUINFO_BUILD_PROP_VALUE_MAX];
+		char ro_board_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
+		char ro_mediatek_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
+		char ro_arch[CPUINFO_BUILD_PROP_VALUE_MAX];
+		char ro_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
+		char ro_hardware_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
+	};
+#endif
+
+#define CPUINFO_ARM_LINUX_ARCH_T   UINT32_C(0x00000001)
+#define CPUINFO_ARM_LINUX_ARCH_E   UINT32_C(0x00000002)
+#define CPUINFO_ARM_LINUX_ARCH_J   UINT32_C(0x00000004)
+
+#define CPUINFO_ARM_LINUX_ARCH_TE  UINT32_C(0x00000003)
+#define CPUINFO_ARM_LINUX_ARCH_TEJ UINT32_C(0x00000007)
+
+struct cpuinfo_arm_linux_proc_cpuinfo_cache {
+	uint32_t i_size;
+	uint32_t i_assoc;
+	uint32_t i_line_length;
+	uint32_t i_sets;
+	uint32_t d_size;
+	uint32_t d_assoc;
+	uint32_t d_line_length;
+	uint32_t d_sets;
+};
+
+#if CPUINFO_ARCH_ARM
+	/* arch/arm/include/uapi/asm/hwcap.h */
+
+	#define CPUINFO_ARM_LINUX_FEATURE_SWP      UINT32_C(0x00000001)
+	#define CPUINFO_ARM_LINUX_FEATURE_HALF     UINT32_C(0x00000002)
+	#define CPUINFO_ARM_LINUX_FEATURE_THUMB    UINT32_C(0x00000004)
+	#define CPUINFO_ARM_LINUX_FEATURE_26BIT    UINT32_C(0x00000008)
+	#define CPUINFO_ARM_LINUX_FEATURE_FASTMULT UINT32_C(0x00000010)
+	#define CPUINFO_ARM_LINUX_FEATURE_FPA      UINT32_C(0x00000020)
+	#define CPUINFO_ARM_LINUX_FEATURE_VFP      UINT32_C(0x00000040)
+	#define CPUINFO_ARM_LINUX_FEATURE_EDSP     UINT32_C(0x00000080)
+	#define CPUINFO_ARM_LINUX_FEATURE_JAVA     UINT32_C(0x00000100)
+	#define CPUINFO_ARM_LINUX_FEATURE_IWMMXT   UINT32_C(0x00000200)
+	#define CPUINFO_ARM_LINUX_FEATURE_CRUNCH   UINT32_C(0x00000400)
+	#define CPUINFO_ARM_LINUX_FEATURE_THUMBEE  UINT32_C(0x00000800)
+	#define CPUINFO_ARM_LINUX_FEATURE_NEON     UINT32_C(0x00001000)
+	#define CPUINFO_ARM_LINUX_FEATURE_VFPV3    UINT32_C(0x00002000)
+	#define CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision registers */
+	#define CPUINFO_ARM_LINUX_FEATURE_TLS      UINT32_C(0x00008000)
+	#define CPUINFO_ARM_LINUX_FEATURE_VFPV4    UINT32_C(0x00010000)
+	#define CPUINFO_ARM_LINUX_FEATURE_IDIVA    UINT32_C(0x00020000)
+	#define CPUINFO_ARM_LINUX_FEATURE_IDIVT    UINT32_C(0x00040000)
+	#define CPUINFO_ARM_LINUX_FEATURE_IDIV     UINT32_C(0x00060000)
+	#define CPUINFO_ARM_LINUX_FEATURE_VFPD32   UINT32_C(0x00080000)
+	#define CPUINFO_ARM_LINUX_FEATURE_LPAE     UINT32_C(0x00100000)
+	#define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM  UINT32_C(0x00200000)
+
+	#define CPUINFO_ARM_LINUX_FEATURE2_AES   UINT32_C(0x00000001)
+	#define CPUINFO_ARM_LINUX_FEATURE2_PMULL UINT32_C(0x00000002)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SHA1  UINT32_C(0x00000004)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SHA2  UINT32_C(0x00000008)
+	#define CPUINFO_ARM_LINUX_FEATURE2_CRC32 UINT32_C(0x00000010)
+#elif CPUINFO_ARCH_ARM64
+	/* arch/arm64/include/uapi/asm/hwcap.h */
+	#define CPUINFO_ARM_LINUX_FEATURE_FP       UINT32_C(0x00000001)
+	#define CPUINFO_ARM_LINUX_FEATURE_ASIMD    UINT32_C(0x00000002)
+	#define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM  UINT32_C(0x00000004)
+	#define CPUINFO_ARM_LINUX_FEATURE_AES      UINT32_C(0x00000008)
+	#define CPUINFO_ARM_LINUX_FEATURE_PMULL    UINT32_C(0x00000010)
+	#define CPUINFO_ARM_LINUX_FEATURE_SHA1     UINT32_C(0x00000020)
+	#define CPUINFO_ARM_LINUX_FEATURE_SHA2     UINT32_C(0x00000040)
+	#define CPUINFO_ARM_LINUX_FEATURE_CRC32    UINT32_C(0x00000080)
+	#define CPUINFO_ARM_LINUX_FEATURE_ATOMICS  UINT32_C(0x00000100)
+	#define CPUINFO_ARM_LINUX_FEATURE_FPHP     UINT32_C(0x00000200)
+	#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP  UINT32_C(0x00000400)
+	#define CPUINFO_ARM_LINUX_FEATURE_CPUID    UINT32_C(0x00000800)
+	#define CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM UINT32_C(0x00001000)
+	#define CPUINFO_ARM_LINUX_FEATURE_JSCVT    UINT32_C(0x00002000)
+	#define CPUINFO_ARM_LINUX_FEATURE_FCMA     UINT32_C(0x00004000)
+	#define CPUINFO_ARM_LINUX_FEATURE_LRCPC    UINT32_C(0x00008000)
+	#define CPUINFO_ARM_LINUX_FEATURE_DCPOP    UINT32_C(0x00010000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SHA3     UINT32_C(0x00020000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SM3      UINT32_C(0x00040000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SM4      UINT32_C(0x00080000)
+	#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP  UINT32_C(0x00100000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SHA512   UINT32_C(0x00200000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SVE      UINT32_C(0x00400000)
+	#define CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM UINT32_C(0x00800000)
+	#define CPUINFO_ARM_LINUX_FEATURE_DIT      UINT32_C(0x01000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_USCAT    UINT32_C(0x02000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_ILRCPC   UINT32_C(0x04000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_FLAGM    UINT32_C(0x08000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SSBS     UINT32_C(0x10000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_SB       UINT32_C(0x20000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_PACA     UINT32_C(0x40000000)
+	#define CPUINFO_ARM_LINUX_FEATURE_PACG     UINT32_C(0x80000000)
+
+	#define CPUINFO_ARM_LINUX_FEATURE2_DCPODP     UINT32_C(0x00000001)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVE2       UINT32_C(0x00000002)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEAES     UINT32_C(0x00000004)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEPMULL   UINT32_C(0x00000008)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEBITPERM UINT32_C(0x00000010)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVESHA3    UINT32_C(0x00000020)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVESM4     UINT32_C(0x00000040)
+	#define CPUINFO_ARM_LINUX_FEATURE2_FLAGM2     UINT32_C(0x00000080)
+	#define CPUINFO_ARM_LINUX_FEATURE2_FRINT      UINT32_C(0x00000100)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEI8MM    UINT32_C(0x00000200)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEF32MM   UINT32_C(0x00000400)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEF64MM   UINT32_C(0x00000800)
+	#define CPUINFO_ARM_LINUX_FEATURE2_SVEBF16    UINT32_C(0x00001000)
+	#define CPUINFO_ARM_LINUX_FEATURE2_I8MM       UINT32_C(0x00002000)
+	#define CPUINFO_ARM_LINUX_FEATURE2_BF16       UINT32_C(0x00004000)
+	#define CPUINFO_ARM_LINUX_FEATURE2_DGH        UINT32_C(0x00008000)
+	#define CPUINFO_ARM_LINUX_FEATURE2_RNG        UINT32_C(0x00010000)
+	#define CPUINFO_ARM_LINUX_FEATURE2_BTI        UINT32_C(0x00020000)
+#endif
+
+#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000)
+#define CPUINFO_ARM_LINUX_VALID_IMPLEMENTER  UINT32_C(0x00020000)
+#define CPUINFO_ARM_LINUX_VALID_VARIANT      UINT32_C(0x00040000)
+#define CPUINFO_ARM_LINUX_VALID_PART         UINT32_C(0x00080000)
+#define CPUINFO_ARM_LINUX_VALID_REVISION     UINT32_C(0x00100000)
+#define CPUINFO_ARM_LINUX_VALID_PROCESSOR    UINT32_C(0x00200000)
+#define CPUINFO_ARM_LINUX_VALID_FEATURES     UINT32_C(0x00400000)
+#if CPUINFO_ARCH_ARM
+	#define CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE UINT32_C(0x01000000)
+	#define CPUINFO_ARM_LINUX_VALID_ICACHE_SETS UINT32_C(0x02000000)
+	#define CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS UINT32_C(0x04000000)
+	#define CPUINFO_ARM_LINUX_VALID_ICACHE_LINE UINT32_C(0x08000000)
+	#define CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE UINT32_C(0x10000000)
+	#define CPUINFO_ARM_LINUX_VALID_DCACHE_SETS UINT32_C(0x20000000)
+	#define CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS UINT32_C(0x40000000)
+	#define CPUINFO_ARM_LINUX_VALID_DCACHE_LINE UINT32_C(0x80000000)
+#endif
+
+#define CPUINFO_ARM_LINUX_VALID_INFO          UINT32_C(0x007F0000)
+#define CPUINFO_ARM_LINUX_VALID_MIDR          UINT32_C(0x003F0000)
+#if CPUINFO_ARCH_ARM
+	#define CPUINFO_ARM_LINUX_VALID_ICACHE     UINT32_C(0x0F000000)
+	#define CPUINFO_ARM_LINUX_VALID_DCACHE     UINT32_C(0xF0000000)
+	#define CPUINFO_ARM_LINUX_VALID_CACHE_LINE UINT32_C(0x88000000)
+#endif
+
+struct cpuinfo_arm_linux_processor {
+	uint32_t architecture_version;
+#if CPUINFO_ARCH_ARM
+	uint32_t architecture_flags;
+	struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache;
+#endif
+	uint32_t features;
+	uint32_t features2;
+	/**
+	 * Main ID Register value.
+	 */
+	uint32_t midr;
+	enum cpuinfo_vendor vendor;
+	enum cpuinfo_uarch uarch;
+	uint32_t uarch_index;
+	/**
+	 * ID of the physical package which includes this logical processor.
+	 * The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id
+	 */
+	uint32_t package_id;
+	/**
+	 * Minimum processor ID on the package which includes this logical processor.
+	 * This value can serve as an ID for the cluster of logical processors: it is the
+	 * same for all logical processors on the same package.
+	 */
+	uint32_t package_leader_id;
+	/**
+	 * Number of logical processors in the package.
+	 */
+	uint32_t package_processor_count;
+	/**
+	 * Maximum frequency, in kHZ.
+	 * The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq
+	 * If failed to read or parse the file, the value is 0.
+	 */
+	uint32_t max_frequency;
+	/**
+	 * Minimum frequency, in kHZ.
+	 * The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq
+	 * If failed to read or parse the file, the value is 0.
+	 */
+	uint32_t min_frequency;
+	/** Linux processor ID */
+	uint32_t system_processor_id;
+	uint32_t flags;
+};
+
+struct cpuinfo_arm_linux_cluster {
+	uint32_t processor_id_min;
+	uint32_t processor_id_max;
+};
+
+/* Returns true if the two processors do belong to the same cluster */
+static inline bool cpuinfo_arm_linux_processor_equals(
+	struct cpuinfo_arm_linux_processor processor_i[restrict static 1],
+	struct cpuinfo_arm_linux_processor processor_j[restrict static 1])
+{
+	const uint32_t joint_flags = processor_i->flags & processor_j->flags;
+
+	bool same_max_frequency = false;
+	if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+		if (processor_i->max_frequency != processor_j->max_frequency) {
+			return false;
+		} else {
+			same_max_frequency = true;
+		}
+	}
+
+	bool same_min_frequency = false;
+	if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+		if (processor_i->min_frequency != processor_j->min_frequency) {
+			return false;
+		} else {
+			same_min_frequency = true;
+		}
+	}
+
+	if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) {
+		if (processor_i->midr == processor_j->midr) {
+			if (midr_is_cortex_a53(processor_i->midr)) {
+				return same_min_frequency & same_max_frequency;
+			} else {
+				return true;
+			}
+		}
+	}
+
+	return same_max_frequency && same_min_frequency;
+}
+
+/* Returns true if the two processors certainly don't belong to the same cluster */
+static inline bool cpuinfo_arm_linux_processor_not_equals(
+	struct cpuinfo_arm_linux_processor processor_i[restrict static 1],
+	struct cpuinfo_arm_linux_processor processor_j[restrict static 1])
+{
+	const uint32_t joint_flags = processor_i->flags & processor_j->flags;
+
+	if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+		if (processor_i->max_frequency != processor_j->max_frequency) {
+			return true;
+		}
+	}
+
+	if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+		if (processor_i->min_frequency != processor_j->min_frequency) {
+			return true;
+		}
+	}
+
+	if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) {
+		if (processor_i->midr != processor_j->midr) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo(
+	char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+	char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
+	uint32_t max_processors_count,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]);
+
+#if CPUINFO_ARCH_ARM
+	CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval(
+		uint32_t hwcap[restrict static 1],
+		uint32_t hwcap2[restrict static 1]);
+	CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs(
+		uint32_t hwcap[restrict static 1],
+		uint32_t hwcap2[restrict static 1]);
+
+	CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+		uint32_t features,
+		uint32_t features2,
+		uint32_t midr,
+		uint32_t architecture_version,
+		uint32_t architecture_flags,
+		const struct cpuinfo_arm_chipset chipset[restrict static 1],
+		struct cpuinfo_arm_isa isa[restrict static 1]);
+#elif CPUINFO_ARCH_ARM64
+	CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval(
+		uint32_t hwcap[restrict static 1],
+		uint32_t hwcap2[restrict static 1]);
+
+	CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
+		uint32_t features,
+		uint32_t features2,
+		uint32_t midr,
+		const struct cpuinfo_arm_chipset chipset[restrict static 1],
+		struct cpuinfo_arm_isa isa[restrict static 1]);
+#endif
+
+#ifdef __ANDROID__
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset(
+			const struct cpuinfo_android_properties properties[restrict static 1],
+			uint32_t cores,
+			uint32_t max_cpu_freq_max);
+#else
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_linux_decode_chipset(
+			const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+			const char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
+			uint32_t cores,
+			uint32_t max_cpu_freq_max);
+#endif
+
+CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+	cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware(
+		const char proc_cpuinfo_hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+		uint32_t cores, uint32_t max_cpu_freq_max, bool is_tegra);
+
+#ifdef __ANDROID__
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset_from_ro_product_board(
+			const char ro_product_board[restrict static CPUINFO_BUILD_PROP_VALUE_MAX],
+			uint32_t cores, uint32_t max_cpu_freq_max);
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset_from_ro_board_platform(
+			const char ro_board_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX],
+			uint32_t cores, uint32_t max_cpu_freq_max);
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform(
+			const char ro_mediatek_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset_from_ro_arch(
+			const char ro_arch[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset_from_ro_chipname(
+			const char ro_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_android_decode_chipset_from_ro_hardware_chipname(
+			const char ro_hardware_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
+#else
+	CPUINFO_INTERNAL struct cpuinfo_arm_chipset
+		cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_revision(
+			const char proc_cpuinfo_revision[restrict static CPUINFO_REVISION_VALUE_MAX]);
+#endif
+
+CPUINFO_INTERNAL bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
+	uint32_t usable_processors,
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+CPUINFO_INTERNAL void cpuinfo_arm_linux_count_cluster_processors(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr(
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	uint32_t max_processors,
+	uint32_t usable_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+
+extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
+extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries;
--- a/dep/cpuinfo/src/arm/linux/chipset.c
+++ b/dep/cpuinfo/src/arm/linux/chipset.c
--- a/dep/cpuinfo/src/arm/linux/clusters.c
+++ b/dep/cpuinfo/src/arm/linux/clusters.c
@@ -0,0 +1,493 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <arm/linux/api.h>
+#if defined(__ANDROID__)
+	#include <arm/android/api.h>
+#endif
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <linux/api.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+	return (bitfield & mask) == mask;
+}
+
+/*
+ * Assigns logical processors to clusters of cores using heuristic based on the typical configuration of clusters for
+ * 5, 6, 8, and 10 cores:
+ * - 5 cores (ARM32 Android only): 2 clusters of 4+1 cores
+ * - 6 cores: 2 clusters of 4+2 cores
+ * - 8 cores: 2 clusters of 4+4 cores
+ * - 10 cores: 3 clusters of 4+4+2 cores
+ *
+ * The function must be called after parsing OS-provided information on core clusters.
+ * Its purpose is to detect clusters of cores when OS-provided information is lacking or incomplete, i.e.
+ * - Linux kernel is not configured to report information in sysfs topology leaf.
+ * - Linux kernel reports topology information only for online cores, and only cores on one cluster are online, e.g.:
+ *   - Exynos 8890 has 8 cores in 4+4 clusters, but only the first cluster of 4 cores is reported, and cluster
+ *     configuration of logical processors 4-7 is not reported (all remaining processors 4-7 form cluster 1)
+ *   - MT6797 has 10 cores in 4+4+2, but only the first cluster of 4 cores is reported, and cluster configuration
+ *     of logical processors 4-9 is not reported (processors 4-7 form cluster 1, and processors 8-9 form cluster 2).
+ *
+ * Heuristic assignment of processors to the above pre-defined clusters fails if such assignment would contradict
+ * information provided by the operating system:
+ * - Any of the OS-reported processor clusters is different than the corresponding heuristic cluster.
+ * - Processors in a heuristic cluster have no OS-provided cluster siblings information, but have known and different
+ *   minimum/maximum frequency.
+ * - Processors in a heuristic cluster have no OS-provided cluster siblings information, but have known and different
+ *   MIDR components.
+ *
+ * If the heuristic assignment of processors to clusters of cores fails, all processors' clusters are unchanged.
+ *
+ * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
+ * @param max_processors - number of elements in the @p processors array.
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
+ *                             frequency, MIDR infromation, and core cluster (package siblings list) information.
+ *
+ * @retval true if the heuristic successfully assigned all processors into clusters of cores.
+ * @retval false if known details about processors contradict the heuristic configuration of core clusters.
+ */
+bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
+	uint32_t usable_processors,
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
+{
+	uint32_t cluster_processors[3];
+	switch (usable_processors) {
+		case 10:
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 4;
+			cluster_processors[2] = 2;
+			break;
+		case 8:
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 4;
+			break;
+		case 6:
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 2;
+			break;
+#if defined(__ANDROID__) && CPUINFO_ARCH_ARM
+		case 5:
+			/*
+			 * The only processor with 5 cores is Leadcore L1860C (ARMv7, mobile),
+			 * but this configuration is not too unreasonable for a virtualized ARM server.
+			 */
+			cluster_processors[0] = 4;
+			cluster_processors[1] = 1;
+			break;
+#endif
+		default:
+			return false;
+	}
+
+	/*
+	 * Assignment of processors to core clusters is done in two passes:
+	 * 1. Verify that the clusters proposed by heuristic are compatible with known details about processors.
+	 * 2. If verification passed, update core clusters for the processors.
+	 */
+
+	uint32_t cluster = 0;
+	uint32_t expected_cluster_processors = 0;
+	uint32_t cluster_start, cluster_flags, cluster_midr, cluster_max_frequency, cluster_min_frequency;
+	bool expected_cluster_exists;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (expected_cluster_processors == 0) {
+				/* Expect this processor to start a new cluster */
+
+				expected_cluster_exists = !!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER);
+				if (expected_cluster_exists) {
+					if (processors[i].package_leader_id != i) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %"PRIu32" is expected to start a new cluster #%"PRIu32" with %"PRIu32" cores, "
+							"but system siblings lists reported it as a sibling of processor %"PRIu32,
+							i, cluster, cluster_processors[cluster], processors[i].package_leader_id);
+						return false;
+					}
+				} else {
+					cluster_flags = 0;
+				}
+
+				cluster_start = i;
+				expected_cluster_processors = cluster_processors[cluster++];
+			} else {
+				/* Expect this processor to belong to the same cluster as processor */
+
+				if (expected_cluster_exists) {
+					/*
+					 * The cluster suggested by the heuristic was already parsed from system siblings lists.
+					 * For all processors we expect in the cluster, check that:
+					 * - They have pre-assigned cluster from siblings lists (CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER flag).
+					 * - They were assigned to the same cluster based on siblings lists
+					 *   (package_leader_id points to the first processor in the cluster).
+					 */
+
+					if ((processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) == 0) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %"PRIu32" is expected to belong to the cluster of processor %"PRIu32", "
+							"but system siblings lists did not report it as a sibling of processor %"PRIu32,
+							i, cluster_start, cluster_start);
+						return false;
+					}
+					if (processors[i].package_leader_id != cluster_start) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %"PRIu32" is expected to belong to the cluster of processor %"PRIu32", "
+							"but system siblings lists reported it to belong to the cluster of processor %"PRIu32,
+							i, cluster_start, cluster_start);
+						return false;
+					}
+				} else {
+					/*
+					 * The cluster suggest by the heuristic was not parsed from system siblings lists.
+					 * For all processors we expect in the cluster, check that:
+					 * - They have no pre-assigned cluster from siblings lists.
+					 * - If their min/max CPU frequency is known, it is the same.
+					 * - If any part of their MIDR (Implementer, Variant, Part, Revision) is known, it is the same.
+					 */
+
+					if (processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) {
+						cpuinfo_log_debug(
+							"heuristic detection of core clusters failed: "
+							"processor %"PRIu32" is expected to be unassigned to any cluster, "
+							"but system siblings lists reported it to belong to the cluster of processor %"PRIu32,
+							i, processors[i].package_leader_id);
+						return false;
+					}
+
+					if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+						if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+							if (cluster_min_frequency != processors[i].min_frequency) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of its expected cluster (%"PRIu32" KHz)",
+									i, processors[i].min_frequency, cluster_min_frequency);
+								return false;
+							}
+						} else {
+							cluster_min_frequency = processors[i].min_frequency;
+							cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+						if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+							if (cluster_max_frequency != processors[i].max_frequency) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of its expected cluster (%"PRIu32" KHz)",
+									i, processors[i].max_frequency, cluster_max_frequency);
+								return false;
+							}
+						} else {
+							cluster_max_frequency = processors[i].max_frequency;
+							cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of its expected cluster (0x%02"PRIx32")",
+									i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr));
+								return false;
+							}
+						} else {
+							cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Variant of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")",
+									i, midr_get_variant(processors[i].midr), midr_get_variant(cluster_midr));
+								return false;
+							}
+						} else {
+							cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Part of processor %"PRIu32" (0x%03"PRIx32") is different than of its expected cluster (0x%03"PRIx32")",
+									i, midr_get_part(processors[i].midr), midr_get_part(cluster_midr));
+								return false;
+							}
+						} else {
+							cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
+						}
+					}
+
+					if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+						if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+							if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) {
+								cpuinfo_log_debug(
+									"heuristic detection of core clusters failed: "
+									"CPU Revision of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")",
+									i, midr_get_revision(cluster_midr), midr_get_revision(processors[i].midr));
+								return false;
+							}
+						} else {
+							cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
+							cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
+						}
+					}
+				}
+			}
+			expected_cluster_processors--;
+		}
+	}
+
+	/* Verification passed, assign all processors to new clusters */
+	cluster = 0;
+	expected_cluster_processors = 0;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (expected_cluster_processors == 0) {
+				/* Expect this processor to start a new cluster */
+
+				cluster_start = i;
+				expected_cluster_processors = cluster_processors[cluster++];
+			} else {
+				/* Expect this processor to belong to the same cluster as processor */
+
+				if (!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
+					cpuinfo_log_debug("assigned processor %"PRIu32" to cluster of processor %"PRIu32" based on heuristic",
+						i, cluster_start);
+				}
+
+				processors[i].package_leader_id = cluster_start;
+				processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+			}
+			expected_cluster_processors--;
+		}
+	}
+	return true;
+}
+
+/*
+ * Assigns logical processors to clusters of cores in sequential manner:
+ * - Clusters detected from OS-provided information are unchanged:
+ *   - Processors assigned to these clusters stay assigned to the same clusters
+ *   - No new processors are added to these clusters
+ * - Processors without pre-assigned cluster are clustered in one sequential scan:
+ *   - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding
+ *     processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor.
+ *   - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding
+ *     processor, the processor is assigned to a newly created cluster.
+ *
+ * The function must be called after parsing OS-provided information on core clusters, and usually is called only
+ * if heuristic assignment of processors to clusters (cpuinfo_arm_linux_cluster_processors_by_heuristic) failed.
+ *
+ * Its purpose is to detect clusters of cores when OS-provided information is lacking or incomplete, i.e.
+ * - Linux kernel is not configured to report information in sysfs topology leaf.
+ * - Linux kernel reports topology information only for online cores, and all cores on some of the clusters are offline.
+ *
+ * Sequential assignment of processors to clusters always succeeds, and upon exit, all usable processors in the
+ * @p processors array have cluster information.
+ *
+ * @param max_processors - number of elements in the @p processors array.
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
+ *                             frequency, MIDR infromation, and core cluster (package siblings list) information.
+ *
+ * @retval true if the heuristic successfully assigned all processors into clusters of cores.
+ * @retval false if known details about processors contradict the heuristic configuration of core clusters.
+ */
+void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
+{
+	uint32_t cluster_flags = 0;
+	uint32_t cluster_processors = 0;
+	uint32_t cluster_start, cluster_midr, cluster_max_frequency, cluster_min_frequency;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if ((processors[i].flags & (CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) == CPUINFO_LINUX_FLAG_VALID) {
+			if (cluster_processors == 0) {
+				goto new_cluster;
+			}
+
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+				if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+					if (cluster_min_frequency != processors[i].min_frequency) {
+						cpuinfo_log_info(
+							"minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
+							"processor %"PRIu32" starts to a new cluster",
+							i, processors[i].min_frequency, cluster_min_frequency, i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_min_frequency = processors[i].min_frequency;
+					cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+				if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+					if (cluster_max_frequency != processors[i].max_frequency) {
+						cpuinfo_log_debug(
+							"maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
+							"processor %"PRIu32" starts a new cluster",
+							i, processors[i].max_frequency, cluster_max_frequency, i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_max_frequency = processors[i].max_frequency;
+					cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); "
+							"processor %"PRIu32" starts to a new cluster",
+							i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Variant of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")"
+							"processor %"PRIu32" starts to a new cluster",
+							i, midr_get_variant(processors[i].midr), midr_get_variant(cluster_midr), i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Part of processor %"PRIu32" (0x%03"PRIx32") is different than of its expected cluster (0x%03"PRIx32")"
+							"processor %"PRIu32" starts to a new cluster",
+							i, midr_get_part(processors[i].midr), midr_get_part(cluster_midr), i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
+				}
+			}
+
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+				if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+					if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) {
+						cpuinfo_log_debug(
+							"CPU Revision of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")"
+							"processor %"PRIu32" starts to a new cluster",
+							i, midr_get_revision(cluster_midr), midr_get_revision(processors[i].midr), i);
+						goto new_cluster;
+					}
+				} else {
+					cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
+					cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
+				}
+			}
+
+			/* All checks passed, attach processor to the preceeding cluster */
+			cluster_processors++;
+			processors[i].package_leader_id = cluster_start;
+			processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+			cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start);
+			continue;
+
+new_cluster:
+			/* Create a new cluster starting with processor i */
+			cluster_start = i;
+			processors[i].package_leader_id = i;
+			processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+			cluster_processors = 1;
+
+			/* Copy known information from processor to cluster, and set the flags accordingly */
+			cluster_flags = 0;
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
+				cluster_min_frequency = processors[i].min_frequency;
+				cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+			}
+			if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+				cluster_max_frequency = processors[i].max_frequency;
+				cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+				cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+				cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+				cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
+			}
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+				cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
+				cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
+			}
+		}
+	}
+}
+
+/*
+ * Counts the number of logical processors in each core cluster.
+ * This function should be called after all processors are assigned to core clusters.
+ *
+ * @param max_processors - number of elements in the @p processors array.
+ * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
+ *                             and decoded core cluster (package_leader_id) information.
+ *                             The function expects the value of processors[i].package_processor_count to be zero.
+ *                             Upon return, processors[i].package_processor_count will contain the number of logical
+ *                             processors in the respective core cluster.
+ */
+void cpuinfo_arm_linux_count_cluster_processors(
+	uint32_t max_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
+{
+	/* First pass: accumulate the number of processors at the group leader's package_processor_count */
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t package_leader_id = processors[i].package_leader_id;
+			processors[package_leader_id].package_processor_count += 1;
+		}
+	}
+	/* Second pass: copy the package_processor_count from the group leader processor */
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t package_leader_id = processors[i].package_leader_id;
+			processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
+		}
+	}
+}
--- a/dep/cpuinfo/src/arm/linux/cp.h
+++ b/dep/cpuinfo/src/arm/linux/cp.h
@@ -0,0 +1,44 @@
+#include <stdint.h>
+
+
+#if CPUINFO_MOCK
+	extern uint32_t cpuinfo_arm_fpsid;
+	extern uint32_t cpuinfo_arm_mvfr0;
+	extern uint32_t cpuinfo_arm_wcid;
+
+	static inline uint32_t read_fpsid(void) {
+		return cpuinfo_arm_fpsid;
+	}
+
+	static inline uint32_t read_mvfr0(void) {
+		return cpuinfo_arm_mvfr0;
+	}
+
+	static inline uint32_t read_wcid(void) {
+		return cpuinfo_arm_wcid;
+	}
+#else
+	#if !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 7))
+		/*
+		 * CoProcessor 10 is inaccessible from user mode since ARMv7,
+		 * and clang refuses to compile inline assembly when targeting ARMv7+
+		 */
+		static inline uint32_t read_fpsid(void) {
+			uint32_t fpsid;
+			__asm__ __volatile__("MRC p10, 0x7, %[fpsid], cr0, cr0, 0" : [fpsid] "=r" (fpsid));
+			return fpsid;
+		}
+
+		static inline uint32_t read_mvfr0(void) {
+			uint32_t mvfr0;
+			__asm__ __volatile__("MRC p10, 0x7, %[mvfr0], cr7, cr0, 0" : [mvfr0] "=r" (mvfr0));
+			return mvfr0;
+		}
+	#endif
+
+	static inline uint32_t read_wcid(void) {
+		uint32_t wcid;
+		__asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r" (wcid));
+		return wcid;
+	}
+#endif
--- a/dep/cpuinfo/src/arm/linux/cpuinfo.c
+++ b/dep/cpuinfo/src/arm/linux/cpuinfo.c
@@ -0,0 +1,908 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+
+#include <linux/api.h>
+#include <arm/linux/api.h>
+#include <arm/midr.h>
+#include <cpuinfo/log.h>
+
+/*
+ * Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo.
+ * This is also the limit on the length of a single line.
+ */
+#define BUFFER_SIZE 1024
+
+
+static uint32_t parse_processor_number(
+	const char* processor_start,
+	const char* processor_end)
+{
+	const size_t processor_length = (size_t) (processor_end - processor_start);
+
+	if (processor_length == 0) {
+		cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty");
+		return 0;
+	}
+
+	uint32_t processor_number = 0;
+	for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) {
+		const uint32_t digit = (uint32_t) (*digit_ptr - '0');
+		if (digit > 10) {
+			cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored",
+				(int) (processor_end - digit_ptr), digit_ptr);
+			break;
+		}
+
+		processor_number = processor_number * 10 + digit;
+	}
+
+	return processor_number;
+}
+
+/*
+ *	Full list of ARM features reported in /proc/cpuinfo:
+ *
+ *	* swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
+ *	* half - support for half-word loads and stores. These instruction are part of ARMv4,
+ *	         so no need to check it on supported CPUs.
+ *	* thumb - support for 16-bit Thumb instruction set. Note that BX instruction is detected
+ *	          by ARMv4T architecture, not by this flag.
+ *	* 26bit - old CPUs merged 26-bit PC and program status register (flags) into 32-bit PC
+ *	          and had special instructions for working with packed PC. Now it is all deprecated.
+ *	* fastmult - most old ARM CPUs could only compute 2 bits of multiplication result per clock
+ *	             cycle, but CPUs with M suffix (e.g. ARM7TDMI) could compute 4 bits per cycle.
+ *	             Of course, now it makes no sense.
+ *	* fpa - floating point accelerator available. On original ARM ABI all floating-point operations
+ *	        generated FPA instructions. If FPA was not available, these instructions generated
+ *	        "illegal operation" interrupts, and the OS processed them by emulating the FPA instructions.
+ *	        Debian used this ABI before it switched to EABI. Now FPA is deprecated.
+ *	* vfp - vector floating point instructions. Available on most modern CPUs (as part of VFPv3).
+ *	        Required by Android ARMv7A ABI and by Ubuntu on ARM.
+ *              Note: there is no flag for VFPv2.
+ *	* edsp - V5E instructions: saturating add/sub and 16-bit x 16-bit -> 32/64-bit multiplications.
+ *	         Required on Android, supported by all CPUs in production.
+ *	* java - Jazelle extension. Supported on most CPUs.
+ *	* iwmmxt - Intel/Marvell Wireless MMX instructions. 64-bit integer SIMD.
+ *	           Supported on XScale (Since PXA270) and Sheeva (PJ1, PJ4) architectures.
+ *	           Note that there is no flag for WMMX2 instructions.
+ *	* crunch - Maverick Crunch instructions. Junk.
+ *	* thumbee - ThumbEE instructions. Almost no documentation is available.
+ *	* neon - NEON instructions (aka Advanced SIMD). MVFR1 register gives more
+ *	         fine-grained information on particular supported features, but
+ *	         the Linux kernel exports only a single flag for all of them.
+ *	         According to ARMv7A docs it also implies the availability of VFPv3
+ *	         (with 32 double-precision registers d0-d31).
+ *	* vfpv3 - VFPv3 instructions. Available on most modern CPUs. Augment VFPv2 by
+ *	          conversion to/from integers and load constant instructions.
+ *	          Required by Android ARMv7A ABI and by Ubuntu on ARM.
+ *	* vfpv3d16 - VFPv3 instructions with only 16 double-precision registers (d0-d15).
+ *	* tls - software thread ID registers.
+ *	        Used by kernel (and likely libc) for efficient implementation of TLS.
+ *	* vfpv4 - fused multiply-add instructions.
+ *	* idiva - DIV instructions available in ARM mode.
+ *	* idivt - DIV instructions available in Thumb mode.
+ *  * vfpd32 - VFP (of any version) with 32 double-precision registers d0-d31.
+ *  * lpae - Large Physical Address Extension (physical address up to 40 bits).
+ *  * evtstrm - generation of Event Stream by timer.
+ *  * aes - AES instructions.
+ *  * pmull - Polinomial Multiplication instructions.
+ *  * sha1 - SHA1 instructions.
+ *  * sha2 - SHA2 instructions.
+ *  * crc32 - CRC32 instructions.
+ *
+ *	/proc/cpuinfo on ARM is populated in file arch/arm/kernel/setup.c in Linux kernel
+ *	Note that some devices may use patched Linux kernels with different feature names.
+ *	However, the names above were checked on a large number of /proc/cpuinfo listings.
+ */
+static void parse_features(
+	const char* features_start,
+	const char* features_end,
+	struct cpuinfo_arm_linux_processor processor[restrict static 1])
+{
+	const char* feature_start = features_start;
+	const char* feature_end;
+
+	/* Mark the features as valid */
+	processor->flags |= CPUINFO_ARM_LINUX_VALID_FEATURES | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+
+	do {
+		feature_end = feature_start + 1;
+		for (; feature_end != features_end; feature_end++) {
+			if (*feature_end == ' ') {
+				break;
+			}
+		}
+		const size_t feature_length = (size_t) (feature_end - feature_start);
+
+		switch (feature_length) {
+			case 2:
+				if (memcmp(feature_start, "fp", feature_length) == 0) {
+#if CPUINFO_ARCH_ARM64
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_FP;
+#endif
+#if CPUINFO_ARCH_ARM
+				} else if (memcmp(feature_start, "wp", feature_length) == 0) {
+					/*
+					 * Some AArch64 kernels, including the one on Nexus 5X,
+					 * erroneously report "swp" as "wp" to AArch32 programs
+					 */
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP;
+#endif
+				} else {
+					goto unexpected;
+				}
+				break;
+			case 3:
+				if (memcmp(feature_start, "aes", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM
+						processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_AES;
+					#elif CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_AES;
+					#endif
+#if CPUINFO_ARCH_ARM
+				} else if (memcmp(feature_start, "swp", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP;
+				} else if (memcmp(feature_start, "fpa", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPA;
+				} else if (memcmp(feature_start, "vfp", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFP;
+				} else if (memcmp(feature_start, "tls", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS;
+#endif /* CPUINFO_ARCH_ARM */
+				} else {
+					goto unexpected;
+				}
+				break;
+			case 4:
+				if (memcmp(feature_start, "sha1", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM
+						processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA1;
+					#elif CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA1;
+					#endif
+				} else if (memcmp(feature_start, "sha2", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM
+						processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA2;
+					#elif CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA2;
+					#endif
+				} else if (memcmp(feature_start, "fphp", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPHP;
+					#endif
+				} else if (memcmp(feature_start, "fcma", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA;
+					#endif
+#if CPUINFO_ARCH_ARM
+				} else if (memcmp(feature_start, "half", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF;
+				} else if (memcmp(feature_start, "edsp", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_EDSP;
+				} else if (memcmp(feature_start, "java", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_JAVA;
+				} else if (memcmp(feature_start, "neon", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_NEON;
+				} else if (memcmp(feature_start, "lpae", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_LPAE;
+				} else if (memcmp(feature_start, "tlsi", feature_length) == 0) {
+					/*
+					 * Some AArch64 kernels, including the one on Nexus 5X,
+					 * erroneously report "tls" as "tlsi" to AArch32 programs
+					 */
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS;
+#endif /* CPUINFO_ARCH_ARM */
+				} else {
+					goto unexpected;
+				}
+				break;
+			case 5:
+				if (memcmp(feature_start, "pmull", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM
+						processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_PMULL;
+					#elif CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_PMULL;
+					#endif
+				} else if (memcmp(feature_start, "crc32", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM
+						processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_CRC32;
+					#elif CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRC32;
+					#endif
+				} else if (memcmp(feature_start, "asimd", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMD;
+					#endif
+				} else if (memcmp(feature_start, "cpuid", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_CPUID;
+					#endif
+				} else if (memcmp(feature_start, "jscvt", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_JSCVT;
+					#endif
+				} else if (memcmp(feature_start, "lrcpc", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_LRCPC;
+					#endif
+#if CPUINFO_ARCH_ARM
+				} else if (memcmp(feature_start, "thumb", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMB;
+				} else if (memcmp(feature_start, "26bit", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_26BIT;
+				} else if (memcmp(feature_start, "vfpv3", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3;
+				} else if (memcmp(feature_start, "vfpv4", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV4;
+				} else if (memcmp(feature_start, "idiva", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVA;
+				} else if (memcmp(feature_start, "idivt", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVT;
+#endif /* CPUINFO_ARCH_ARM */
+				} else {
+					goto unexpected;
+				}
+ 				break;
+#if CPUINFO_ARCH_ARM
+			case 6:
+				if (memcmp(feature_start, "iwmmxt", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_IWMMXT;
+				} else if (memcmp(feature_start, "crunch", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRUNCH;
+				} else if (memcmp(feature_start, "vfpd32", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPD32;
+				} else {
+					goto unexpected;
+				}
+				break;
+#endif /* CPUINFO_ARCH_ARM */
+			case 7:
+				if (memcmp(feature_start, "evtstrm", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_EVTSTRM;
+				} else if (memcmp(feature_start, "atomics", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_ATOMICS;
+					#endif
+				} else if (memcmp(feature_start, "asimdhp", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
+					#endif
+#if CPUINFO_ARCH_ARM
+				} else if (memcmp(feature_start, "thumbee", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMBEE;
+#endif /* CPUINFO_ARCH_ARM */
+				} else {
+					goto unexpected;
+				}
+				break;
+			case 8:
+				if (memcmp(feature_start, "asimdrdm", feature_length) == 0) {
+					#if CPUINFO_ARCH_ARM64
+						processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM;
+					#endif
+#if CPUINFO_ARCH_ARM
+				} else if (memcmp(feature_start, "fastmult", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT;
+				} else if (memcmp(feature_start, "vfpv3d16", feature_length) == 0) {
+					processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3D16;
+#endif /* CPUINFO_ARCH_ARM */
+				} else {
+					goto unexpected;
+				}
+				break;
+			default:
+			unexpected:
+				cpuinfo_log_warning("unexpected /proc/cpuinfo feature \"%.*s\" is ignored",
+					(int) feature_length, feature_start);
+				break;
+		}
+		feature_start = feature_end;
+		for (; feature_start != features_end; feature_start++) {
+			if (*feature_start != ' ') {
+				break;
+			}
+		}
+	} while (feature_start != feature_end);
+}
+
+static void parse_cpu_architecture(
+	const char* cpu_architecture_start,
+	const char* cpu_architecture_end,
+	struct cpuinfo_arm_linux_processor processor[restrict static 1])
+{
+	const size_t cpu_architecture_length = (size_t) (cpu_architecture_end - cpu_architecture_start);
+	/* Early AArch64 kernels report "CPU architecture: AArch64" instead of a numeric value 8 */
+	if (cpu_architecture_length == 7) {
+		if (memcmp(cpu_architecture_start, "AArch64", cpu_architecture_length) == 0) {
+			processor->midr = midr_set_architecture(processor->midr, UINT32_C(0xF));
+			processor->architecture_version = 8;
+			processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+			return;
+		}
+	}
+
+
+	uint32_t architecture = 0;
+	const char* cpu_architecture_ptr = cpu_architecture_start;
+	for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) {
+		const uint32_t digit = (*cpu_architecture_ptr) - '0';
+
+		/* Verify that CPU architecture is a decimal number */
+		if (digit >= 10) {
+			break;
+		}
+
+		architecture = architecture * 10 + digit;
+	}
+
+	if (cpu_architecture_ptr == cpu_architecture_start) {
+		cpuinfo_log_warning("CPU architecture %.*s in /proc/cpuinfo is ignored due to non-digit at the beginning of the string",
+			(int) cpu_architecture_length, cpu_architecture_start);
+	} else {
+		if (architecture != 0) {
+			processor->architecture_version = architecture;
+			processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+
+			for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) {
+				const char feature = *cpu_architecture_ptr;
+				switch (feature) {
+#if CPUINFO_ARCH_ARM
+					case 'T':
+						processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_T;
+						break;
+					case 'E':
+						processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_E;
+						break;
+					case 'J':
+						processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_J;
+						break;
+#endif /* CPUINFO_ARCH_ARM */
+					case ' ':
+					case '\t':
+						/* Ignore whitespace at the end */
+						break;
+					default:
+						cpuinfo_log_warning("skipped unknown architectural feature '%c' for ARMv%"PRIu32,
+							feature, architecture);
+						break;
+				}
+			}
+		} else {
+			cpuinfo_log_warning("CPU architecture %.*s in /proc/cpuinfo is ignored due to invalid value (0)",
+				(int) cpu_architecture_length, cpu_architecture_start);
+		}
+	}
+
+	uint32_t midr_architecture = UINT32_C(0xF);
+#if CPUINFO_ARCH_ARM
+	switch (processor->architecture_version) {
+		case 6:
+			midr_architecture = UINT32_C(0x7); /* ARMv6 */
+			break;
+		case 5:
+			if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TEJ) == CPUINFO_ARM_LINUX_ARCH_TEJ) {
+				midr_architecture = UINT32_C(0x6); /* ARMv5TEJ */
+			} else if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TE) == CPUINFO_ARM_LINUX_ARCH_TE) {
+				midr_architecture = UINT32_C(0x5); /* ARMv5TE */
+			} else {
+				midr_architecture = UINT32_C(0x4); /* ARMv5T */
+			}
+			break;
+	}
+#endif
+	processor->midr = midr_set_architecture(processor->midr, midr_architecture);
+}
+
+static void parse_cpu_part(
+	const char* cpu_part_start,
+	const char* cpu_part_end,
+	struct cpuinfo_arm_linux_processor processor[restrict static 1])
+{
+	const size_t cpu_part_length = (size_t) (cpu_part_end - cpu_part_start);
+
+	/*
+	 * CPU part should contain hex prefix (0x) and one to three hex digits.
+	 * I have never seen less than three digits as a value of this field,
+	 * but I don't think it is impossible to see such values in future.
+	 * Value can not contain more than three hex digits since
+	 * Main ID Register (MIDR) assigns only a 12-bit value for CPU part.
+	 */
+	if (cpu_part_length < 3 || cpu_part_length > 5) {
+		cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)",
+			(int) cpu_part_length, cpu_part_start, cpu_part_length);
+		return;
+	}
+
+	/* Verify the presence of hex prefix */
+	if (cpu_part_start[0] != '0' || cpu_part_start[1] != 'x') {
+		cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix",
+			(int) cpu_part_length, cpu_part_start);
+		return;
+	}
+
+	/* Verify that characters after hex prefix are hexadecimal digits and decode them */
+	uint32_t cpu_part = 0;
+	for (const char* digit_ptr = cpu_part_start + 2; digit_ptr != cpu_part_end; digit_ptr++) {
+		const char digit_char = *digit_ptr;
+		uint32_t digit;
+		if (digit_char >= '0' && digit_char <= '9') {
+			digit = digit_char - '0';
+		} else if ((uint32_t) (digit_char - 'A') < 6) {
+			digit = 10 + (digit_char - 'A');
+		} else if ((uint32_t) (digit_char - 'a') < 6) {
+			digit = 10 + (digit_char - 'a');
+		} else {
+			cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character %c at offset %zu",
+				(int) cpu_part_length, cpu_part_start, digit_char, (size_t) (digit_ptr - cpu_part_start));
+			return;
+		}
+		cpu_part = cpu_part * 16 + digit;
+	}
+
+	processor->midr = midr_set_part(processor->midr, cpu_part);
+	processor->flags |= CPUINFO_ARM_LINUX_VALID_PART | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+}
+
+static void parse_cpu_implementer(
+	const char* cpu_implementer_start,
+	const char* cpu_implementer_end,
+	struct cpuinfo_arm_linux_processor processor[restrict static 1])
+{
+	const size_t cpu_implementer_length = cpu_implementer_end - cpu_implementer_start;
+
+	/*
+	 * Value should contain hex prefix (0x) and one or two hex digits.
+	 * I have never seen single hex digit as a value of this field,
+	 * but I don't think it is impossible in future.
+	 * Value can not contain more than two hex digits since
+	 * Main ID Register (MIDR) assigns only an 8-bit value for CPU implementer.
+	 */
+	switch (cpu_implementer_length) {
+		case 3:
+		case 4:
+			break;
+		default:
+		cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)",
+			(int) cpu_implementer_length, cpu_implementer_start, cpu_implementer_length);
+		return;
+	}
+
+	/* Verify the presence of hex prefix */
+	if (cpu_implementer_start[0] != '0' || cpu_implementer_start[1] != 'x') {
+		cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix",
+			(int) cpu_implementer_length, cpu_implementer_start);
+		return;
+	}
+
+	/* Verify that characters after hex prefix are hexadecimal digits and decode them */
+	uint32_t cpu_implementer = 0;
+	for (const char* digit_ptr = cpu_implementer_start + 2; digit_ptr != cpu_implementer_end; digit_ptr++) {
+		const char digit_char = *digit_ptr;
+		uint32_t digit;
+		if (digit_char >= '0' && digit_char <= '9') {
+			digit = digit_char - '0';
+		} else if ((uint32_t) (digit_char - 'A') < 6) {
+			digit = 10 + (digit_char - 'A');
+		} else if ((uint32_t) (digit_char - 'a') < 6) {
+			digit = 10 + (digit_char - 'a');
+		} else {
+			cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c' at offset %zu",
+				(int) cpu_implementer_length, cpu_implementer_start, digit_char, (size_t) (digit_ptr - cpu_implementer_start));
+			return;
+		}
+		cpu_implementer = cpu_implementer * 16 + digit;
+	}
+
+	processor->midr = midr_set_implementer(processor->midr, cpu_implementer);
+	processor->flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+}
+
+static void parse_cpu_variant(
+	const char* cpu_variant_start,
+	const char* cpu_variant_end,
+	struct cpuinfo_arm_linux_processor processor[restrict static 1])
+{
+	const size_t cpu_variant_length = cpu_variant_end - cpu_variant_start;
+
+	/*
+	 * Value should contain hex prefix (0x) and one hex digit.
+	 * Value can not contain more than one hex digits since
+	 * Main ID Register (MIDR) assigns only a 4-bit value for CPU variant.
+	 */
+	if (cpu_variant_length != 3) {
+		cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)",
+			(int) cpu_variant_length, cpu_variant_start, cpu_variant_length);
+		return;
+	}
+
+	/* Skip if there is no hex prefix (0x) */
+	if (cpu_variant_start[0] != '0' || cpu_variant_start[1] != 'x') {
+		cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix",
+			(int) cpu_variant_length, cpu_variant_start);
+		return;
+	}
+
+	/* Check if the value after hex prefix is indeed a hex digit and decode it. */
+	const char digit_char = cpu_variant_start[2];
+	uint32_t cpu_variant;
+	if ((uint32_t) (digit_char - '0') < 10) {
+		cpu_variant = (uint32_t) (digit_char - '0');
+	} else if ((uint32_t) (digit_char - 'A') < 6) {
+		cpu_variant = 10 + (uint32_t) (digit_char - 'A');
+	} else if ((uint32_t) (digit_char - 'a') < 6) {
+		cpu_variant = 10 + (uint32_t) (digit_char - 'a');
+	} else {
+		cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'",
+			(int) cpu_variant_length, cpu_variant_start, digit_char);
+		return;
+	}
+
+	processor->midr = midr_set_variant(processor->midr, cpu_variant);
+	processor->flags |= CPUINFO_ARM_LINUX_VALID_VARIANT | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+}
+
+static void parse_cpu_revision(
+	const char* cpu_revision_start,
+	const char* cpu_revision_end,
+	struct cpuinfo_arm_linux_processor processor[restrict static 1])
+{
+	uint32_t cpu_revision = 0;
+	for (const char* digit_ptr = cpu_revision_start; digit_ptr != cpu_revision_end; digit_ptr++) {
+		const uint32_t digit = (uint32_t) (*digit_ptr - '0');
+
+		/* Verify that the character in CPU revision is a decimal digit */
+		if (digit >= 10) {
+			cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu",
+				(int) (cpu_revision_end - cpu_revision_start), cpu_revision_start,
+				*digit_ptr, (size_t) (digit_ptr - cpu_revision_start));
+			return;
+		}
+
+		cpu_revision = cpu_revision * 10 + digit;
+	}
+
+	processor->midr = midr_set_revision(processor->midr, cpu_revision);
+	processor->flags |= CPUINFO_ARM_LINUX_VALID_REVISION | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+}
+
+#if CPUINFO_ARCH_ARM
+/*
+ * Decode one of the cache-related numbers reported by Linux kernel
+ * for pre-ARMv7 architecture.
+ * An example cache-related information in /proc/cpuinfo:
+ *
+ *      I size          : 32768
+ *      I assoc         : 4
+ *      I line length   : 32
+ *      I sets          : 256
+ *      D size          : 16384
+ *      D assoc         : 4
+ *      D line length   : 32
+ *      D sets          : 128
+ *
+ */
+static void parse_cache_number(
+	const char* number_start,
+	const char* number_end,
+	const char* number_name,
+	uint32_t number_ptr[restrict static 1],
+	uint32_t flags[restrict static 1],
+	uint32_t number_mask)
+{
+	uint32_t number = 0;
+	for (const char* digit_ptr = number_start; digit_ptr != number_end; digit_ptr++) {
+		const uint32_t digit = *digit_ptr - '0';
+		if (digit >= 10) {
+			cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu",
+				number_name, (int) (number_end - number_start), number_start,
+				*digit_ptr, (size_t) (digit_ptr - number_start));
+			return;
+		}
+
+		number = number * 10 + digit;
+	}
+
+	if (number == 0) {
+		cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to invalid value of zero reported by the kernel",
+			number_name, (int) (number_end - number_start), number_start);
+	}
+
+	/* If the number specifies a cache line size, verify that is a reasonable power of 2 */
+	if (number_mask & CPUINFO_ARM_LINUX_VALID_CACHE_LINE) {
+		switch (number) {
+			case 16:
+			case 32:
+			case 64:
+			case 128:
+				break;
+			default:
+				cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
+					number_name, (int) (number_end - number_start), number_start);
+		}
+	}
+
+	*number_ptr = number;
+	*flags |= number_mask | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+}
+#endif /* CPUINFO_ARCH_ARM */
+
+struct proc_cpuinfo_parser_state {
+	char* hardware;
+	char* revision;
+	uint32_t processor_index;
+	uint32_t max_processors_count;
+	struct cpuinfo_arm_linux_processor* processors;
+	struct cpuinfo_arm_linux_processor dummy_processor;
+};
+
+/*
+ *	Decode a single line of /proc/cpuinfo information.
+ *	Lines have format <words-with-spaces>[ ]*:[ ]<space-separated words>
+ *	An example of /proc/cpuinfo (from Pandaboard-ES):
+ *
+ *		Processor       : ARMv7 Processor rev 10 (v7l)
+ *		processor       : 0
+ *		BogoMIPS        : 1392.74
+ *
+ *		processor       : 1
+ *		BogoMIPS        : 1363.33
+ *
+ *		Features        : swp half thumb fastmult vfp edsp thumbee neon vfpv3
+ *		CPU implementer : 0x41
+ *		CPU architecture: 7
+ *		CPU variant     : 0x2
+ *		CPU part        : 0xc09
+ *		CPU revision    : 10
+ *
+ *		Hardware        : OMAP4 Panda board
+ *		Revision        : 0020
+ *		Serial          : 0000000000000000
+ */
+static bool parse_line(
+	const char* line_start,
+	const char* line_end,
+	struct proc_cpuinfo_parser_state state[restrict static 1],
+	uint64_t line_number)
+{
+	/* Empty line. Skip. */
+	if (line_start == line_end) {
+		return true;
+	}
+
+	/* Search for ':' on the line. */
+	const char* separator = line_start;
+	for (; separator != line_end; separator++) {
+		if (*separator == ':') {
+			break;
+		}
+	}
+	/* Skip line if no ':' separator was found. */
+	if (separator == line_end) {
+		cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found",
+			(int) (line_end - line_start), line_start);
+		return true;
+	}
+
+	/* Skip trailing spaces in key part. */
+	const char* key_end = separator;
+	for (; key_end != line_start; key_end--) {
+		if (key_end[-1] != ' ' && key_end[-1] != '\t') {
+			break;
+		}
+	}
+	/* Skip line if key contains nothing but spaces. */
+	if (key_end == line_start) {
+		cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces",
+			(int) (line_end - line_start), line_start);
+		return true;
+	}
+
+	/* Skip leading spaces in value part. */
+	const char* value_start = separator + 1;
+	for (; value_start != line_end; value_start++) {
+		if (*value_start != ' ') {
+			break;
+		}
+	}
+	/* Value part contains nothing but spaces. Skip line. */
+	if (value_start == line_end) {
+		cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces",
+			(int) (line_end - line_start), line_start);
+		return true;
+	}
+
+	/* Skip trailing spaces in value part (if any) */
+	const char* value_end = line_end;
+	for (; value_end != value_start; value_end--) {
+		if (value_end[-1] != ' ') {
+			break;
+		}
+	}
+
+	const uint32_t processor_index      = state->processor_index;
+	const uint32_t max_processors_count = state->max_processors_count;
+	struct cpuinfo_arm_linux_processor* processors = state->processors;
+	struct cpuinfo_arm_linux_processor* processor  = &state->dummy_processor;
+	if (processor_index < max_processors_count) {
+		processor = &processors[processor_index];
+	}
+
+	const size_t key_length = key_end - line_start;
+	switch (key_length) {
+		case 6:
+			if (memcmp(line_start, "Serial", key_length) == 0) {
+				/* Usually contains just zeros, useless */
+#if CPUINFO_ARCH_ARM
+			} else if (memcmp(line_start, "I size", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"instruction cache size", &processor->proc_cpuinfo_cache.i_size,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE);
+			} else if (memcmp(line_start, "I sets", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"instruction cache sets", &processor->proc_cpuinfo_cache.i_sets,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SETS);
+			} else if (memcmp(line_start, "D size", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"data cache size", &processor->proc_cpuinfo_cache.d_size,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE);
+			} else if (memcmp(line_start, "D sets", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"data cache sets", &processor->proc_cpuinfo_cache.d_sets,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SETS);
+#endif /* CPUINFO_ARCH_ARM */
+			} else {
+				goto unknown;
+			}
+			break;
+#if CPUINFO_ARCH_ARM
+		case 7:
+			if (memcmp(line_start, "I assoc", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"instruction cache associativity", &processor->proc_cpuinfo_cache.i_assoc,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS);
+			} else if (memcmp(line_start, "D assoc", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"data cache associativity", &processor->proc_cpuinfo_cache.d_assoc,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS);
+			} else {
+				goto unknown;
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM */
+		case 8:
+			if (memcmp(line_start, "CPU part", key_length) == 0) {
+				parse_cpu_part(value_start, value_end, processor);
+			} else if (memcmp(line_start, "Features", key_length) == 0) {
+				parse_features(value_start, value_end, processor);
+			} else if (memcmp(line_start, "BogoMIPS", key_length) == 0) {
+				/* BogoMIPS is useless, don't parse */
+			} else if (memcmp(line_start, "Hardware", key_length) == 0) {
+				size_t value_length = value_end - value_start;
+				if (value_length > CPUINFO_HARDWARE_VALUE_MAX) {
+					cpuinfo_log_info(
+						"length of Hardware value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit",
+						(int) value_length, value_start, CPUINFO_HARDWARE_VALUE_MAX);
+					value_length = CPUINFO_HARDWARE_VALUE_MAX;
+				} else {
+					state->hardware[value_length] = '\0';
+				}
+				memcpy(state->hardware, value_start, value_length);
+				cpuinfo_log_debug("parsed /proc/cpuinfo Hardware = \"%.*s\"", (int) value_length, value_start);
+			} else if (memcmp(line_start, "Revision", key_length) == 0) {
+				size_t value_length = value_end - value_start;
+				if (value_length > CPUINFO_REVISION_VALUE_MAX) {
+					cpuinfo_log_info(
+						"length of Revision value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit",
+						(int) value_length, value_start, CPUINFO_REVISION_VALUE_MAX);
+					value_length = CPUINFO_REVISION_VALUE_MAX;
+				} else {
+					state->revision[value_length] = '\0';
+				}
+				memcpy(state->revision, value_start, value_length);
+				cpuinfo_log_debug("parsed /proc/cpuinfo Revision = \"%.*s\"", (int) value_length, value_start);
+			} else {
+				goto unknown;
+			}
+			break;
+		case 9:
+			if (memcmp(line_start, "processor", key_length) == 0) {
+				const uint32_t new_processor_index = parse_processor_number(value_start, value_end);
+				if (new_processor_index < processor_index) {
+					/* Strange: decreasing processor number */
+					cpuinfo_log_warning(
+						"unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo",
+						new_processor_index, processor_index);
+				} else if (new_processor_index > processor_index + 1) {
+					/* Strange, but common: skipped processor $(processor_index + 1) */
+					cpuinfo_log_info(
+						"unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo",
+						new_processor_index, processor_index);
+				}
+				if (new_processor_index < max_processors_count) {
+					/* Record that the processor was mentioned in /proc/cpuinfo */
+					processors[new_processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR;
+				} else {
+					/* Log and ignore processor */
+					cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32,
+						new_processor_index, max_processors_count - 1);
+				}
+				state->processor_index = new_processor_index;
+				return true;
+			} else if (memcmp(line_start, "Processor", key_length) == 0) {
+				/* TODO: parse to fix misreported architecture, similar to Android's cpufeatures */
+			} else {
+				goto unknown;
+			}
+			break;
+		case 11:
+			if (memcmp(line_start, "CPU variant", key_length) == 0) {
+				parse_cpu_variant(value_start, value_end, processor);
+			} else {
+				goto unknown;
+			}
+			break;
+		case 12:
+			if (memcmp(line_start, "CPU revision", key_length) == 0) {
+				parse_cpu_revision(value_start, value_end, processor);
+			} else {
+				goto unknown;
+			}
+			break;
+#if CPUINFO_ARCH_ARM
+		case 13:
+			if (memcmp(line_start, "I line length", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"instruction cache line size", &processor->proc_cpuinfo_cache.i_line_length,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_LINE);
+			} else if (memcmp(line_start, "D line length", key_length) == 0) {
+				parse_cache_number(value_start, value_end,
+					"data cache line size", &processor->proc_cpuinfo_cache.d_line_length,
+					&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_LINE);
+			} else {
+				goto unknown;
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM */
+		case 15:
+			if (memcmp(line_start, "CPU implementer", key_length) == 0) {
+				parse_cpu_implementer(value_start, value_end, processor);
+			} else if (memcmp(line_start, "CPU implementor", key_length) == 0) {
+				parse_cpu_implementer(value_start, value_end, processor);
+			} else {
+				goto unknown;
+			}
+			break;
+		case 16:
+			if (memcmp(line_start, "CPU architecture", key_length) == 0) {
+				parse_cpu_architecture(value_start, value_end, processor);
+			} else {
+				goto unknown;
+			}
+			break;
+		default:
+		unknown:
+			cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start);
+
+	}
+	return true;
+}
+
+bool cpuinfo_arm_linux_parse_proc_cpuinfo(
+	char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
+	char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
+	uint32_t max_processors_count,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count])
+{
+	struct proc_cpuinfo_parser_state state = {
+		.hardware = hardware,
+		.revision = revision,
+		.processor_index = 0,
+		.max_processors_count = max_processors_count,
+		.processors = processors,
+	};
+	return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE,
+		(cpuinfo_line_callback) parse_line, &state);
+}
--- a/dep/cpuinfo/src/arm/linux/hwcap.c
+++ b/dep/cpuinfo/src/arm/linux/hwcap.c
@@ -0,0 +1,159 @@
+#include <string.h>
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <dlfcn.h>
+#include <elf.h>
+
+#if CPUINFO_MOCK
+	#include <cpuinfo-mock.h>
+#endif
+#include <cpuinfo.h>
+#include <arm/linux/api.h>
+#include <cpuinfo/log.h>
+
+#if CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_ARM && !defined(__ANDROID__)
+	#include <sys/auxv.h>
+#else
+	#define AT_HWCAP 16
+	#define AT_HWCAP2 26
+#endif
+
+
+#if CPUINFO_MOCK
+	static uint32_t mock_hwcap = 0;
+	void cpuinfo_set_hwcap(uint32_t hwcap) {
+		mock_hwcap = hwcap;
+	}
+
+	static uint32_t mock_hwcap2 = 0;
+	void cpuinfo_set_hwcap2(uint32_t hwcap2) {
+		mock_hwcap2 = hwcap2;
+	}
+#endif
+
+
+#if CPUINFO_ARCH_ARM
+	typedef unsigned long (*getauxval_function_t)(unsigned long);
+
+	bool cpuinfo_arm_linux_hwcap_from_getauxval(
+		uint32_t hwcap[restrict static 1],
+		uint32_t hwcap2[restrict static 1])
+	{
+		#if CPUINFO_MOCK
+			*hwcap  = mock_hwcap;
+			*hwcap2 = mock_hwcap2;
+			return true;
+		#elif defined(__ANDROID__)
+			/* Android: dynamically check if getauxval is supported */
+			void* libc = NULL;
+			getauxval_function_t getauxval = NULL;
+
+			dlerror();
+			libc = dlopen("libc.so", RTLD_LAZY);
+			if (libc == NULL) {
+				cpuinfo_log_warning("failed to load libc.so: %s", dlerror());
+				goto cleanup;
+			}
+
+			getauxval = (getauxval_function_t) dlsym(libc, "getauxval");
+			if (getauxval == NULL) {
+				cpuinfo_log_info("failed to locate getauxval in libc.so: %s", dlerror());
+				goto cleanup;
+			}
+
+			*hwcap  = getauxval(AT_HWCAP);
+			*hwcap2 = getauxval(AT_HWCAP2);
+
+		cleanup:
+			if (libc != NULL) {
+				dlclose(libc);
+				libc = NULL;
+			}
+			return getauxval != NULL;
+		#else
+			/* GNU/Linux: getauxval is always supported */
+			*hwcap  = getauxval(AT_HWCAP);
+			*hwcap2 = getauxval(AT_HWCAP2);
+			return true;
+		#endif
+	}
+
+	#ifdef __ANDROID__
+		bool cpuinfo_arm_linux_hwcap_from_procfs(
+			uint32_t hwcap[restrict static 1],
+			uint32_t hwcap2[restrict static 1])
+		{
+			#if CPUINFO_MOCK
+				*hwcap  = mock_hwcap;
+				*hwcap2 = mock_hwcap2;
+				return true;
+			#else
+				uint32_t hwcaps[2] = { 0, 0 };
+				bool result = false;
+				int file = -1;
+
+				file = open("/proc/self/auxv", O_RDONLY);
+				if (file == -1) {
+					cpuinfo_log_warning("failed to open /proc/self/auxv: %s", strerror(errno));
+					goto cleanup;
+				}
+
+				ssize_t bytes_read;
+				do {
+					Elf32_auxv_t elf_auxv;
+					bytes_read = read(file, &elf_auxv, sizeof(Elf32_auxv_t));
+					if (bytes_read < 0) {
+						cpuinfo_log_warning("failed to read /proc/self/auxv: %s", strerror(errno));
+						goto cleanup;
+					} else if (bytes_read > 0) {
+						if (bytes_read == sizeof(elf_auxv)) {
+							switch (elf_auxv.a_type) {
+								case AT_HWCAP:
+									hwcaps[0] = (uint32_t) elf_auxv.a_un.a_val;
+									break;
+								case AT_HWCAP2:
+									hwcaps[1] = (uint32_t) elf_auxv.a_un.a_val;
+									break;
+							}
+						} else {
+							cpuinfo_log_warning(
+								"failed to read %zu bytes from /proc/self/auxv: %zu bytes available",
+								sizeof(elf_auxv), (size_t) bytes_read);
+							goto cleanup;
+						}
+					}
+				} while (bytes_read == sizeof(Elf32_auxv_t));
+
+				/* Success, commit results */
+				*hwcap  = hwcaps[0];
+				*hwcap2 = hwcaps[1];
+				result = true;
+
+			cleanup:
+				if (file != -1) {
+					close(file);
+					file = -1;
+				}
+				return result;
+			#endif
+		}
+	#endif /* __ANDROID__ */
+#elif CPUINFO_ARCH_ARM64
+	void cpuinfo_arm_linux_hwcap_from_getauxval(
+		uint32_t hwcap[restrict static 1],
+		uint32_t hwcap2[restrict static 1])
+	{
+		#if CPUINFO_MOCK
+			*hwcap  = mock_hwcap;
+			*hwcap2 = mock_hwcap2;
+		#else
+			*hwcap  = (uint32_t) getauxval(AT_HWCAP);
+			*hwcap2 = (uint32_t) getauxval(AT_HWCAP2);
+			return ;
+		#endif
+	}
+#endif
--- a/dep/cpuinfo/src/arm/linux/init.c
+++ b/dep/cpuinfo/src/arm/linux/init.c
@@ -0,0 +1,765 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <arm/linux/api.h>
+#if defined(__ANDROID__)
+	#include <arm/android/api.h>
+#endif
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <linux/api.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+
+struct cpuinfo_arm_isa cpuinfo_isa = { 0 };
+
+static struct cpuinfo_package package = { { 0 } };
+
+static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+	return (bitfield & mask) == mask;
+}
+
+static inline uint32_t min(uint32_t a, uint32_t b) {
+	return a < b ? a : b;
+}
+
+static inline int cmp(uint32_t a, uint32_t b) {
+	return (a > b) - (a < b);
+}
+
+static bool cluster_siblings_parser(
+	uint32_t processor, uint32_t siblings_start, uint32_t siblings_end,
+	struct cpuinfo_arm_linux_processor* processors)
+{
+	processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+	uint32_t package_leader_id = processors[processor].package_leader_id;
+
+	for (uint32_t sibling = siblings_start; sibling < siblings_end; sibling++) {
+		if (!bitmask_all(processors[sibling].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			cpuinfo_log_info("invalid processor %"PRIu32" reported as a sibling for processor %"PRIu32,
+				sibling, processor);
+			continue;
+		}
+
+		const uint32_t sibling_package_leader_id = processors[sibling].package_leader_id;
+		if (sibling_package_leader_id < package_leader_id) {
+			package_leader_id = sibling_package_leader_id;
+		}
+
+		processors[sibling].package_leader_id = package_leader_id;
+		processors[sibling].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
+	}
+
+	processors[processor].package_leader_id = package_leader_id;
+
+	return true;
+}
+
+static int cmp_arm_linux_processor(const void* ptr_a, const void* ptr_b) {
+	const struct cpuinfo_arm_linux_processor* processor_a = (const struct cpuinfo_arm_linux_processor*) ptr_a;
+	const struct cpuinfo_arm_linux_processor* processor_b = (const struct cpuinfo_arm_linux_processor*) ptr_b;
+
+	/* Move usable processors towards the start of the array */
+	const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID);
+	const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID);
+	if (usable_a != usable_b) {
+		return (int) usable_b - (int) usable_a;
+	}
+
+	/* Compare based on core type (e.g. Cortex-A57 < Cortex-A53) */
+	const uint32_t midr_a = processor_a->midr;
+	const uint32_t midr_b = processor_b->midr;
+	if (midr_a != midr_b) {
+		const uint32_t score_a = midr_score_core(midr_a);
+		const uint32_t score_b = midr_score_core(midr_b);
+		if (score_a != score_b) {
+			return score_a > score_b ? -1 : 1;
+		}
+	}
+
+	/* Compare based on core frequency (e.g. 2.0 GHz < 1.2 GHz) */
+	const uint32_t frequency_a = processor_a->max_frequency;
+	const uint32_t frequency_b = processor_b->max_frequency;
+	if (frequency_a != frequency_b) {
+		return frequency_a > frequency_b ? -1 : 1;
+	}
+
+	/* Compare based on cluster leader id (i.e. cluster 1 < cluster 0) */
+	const uint32_t cluster_a = processor_a->package_leader_id;
+	const uint32_t cluster_b = processor_b->package_leader_id;
+	if (cluster_a != cluster_b) {
+		return cluster_a > cluster_b ? -1 : 1;
+	}
+
+	/* Compare based on system processor id (i.e. processor 0 < processor 1) */
+	const uint32_t id_a = processor_a->system_processor_id;
+	const uint32_t id_b = processor_b->system_processor_id;
+	return cmp(id_a, id_b);
+}
+
+void cpuinfo_arm_linux_init(void) {
+	struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL;
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+	const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+	const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+	uint32_t* linux_cpu_to_uarch_index_map = NULL;
+
+	const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
+	cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
+
+	const uint32_t max_possible_processors_count = 1 +
+		cpuinfo_linux_get_max_possible_processor(max_processors_count);
+	cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count);
+	const uint32_t max_present_processors_count = 1 +
+		cpuinfo_linux_get_max_present_processor(max_processors_count);
+	cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count);
+
+	uint32_t valid_processor_mask = 0;
+	uint32_t arm_linux_processors_count = max_processors_count;
+	if (max_present_processors_count != 0) {
+		arm_linux_processors_count = min(arm_linux_processors_count, max_present_processors_count);
+		valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT;
+	}
+	if (max_possible_processors_count != 0) {
+		arm_linux_processors_count = min(arm_linux_processors_count, max_possible_processors_count);
+		valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE;
+	}
+	if ((max_present_processors_count | max_possible_processors_count) == 0) {
+		cpuinfo_log_error("failed to parse both lists of possible and present processors");
+		return;
+	}
+
+	arm_linux_processors = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_arm_linux_processor));
+	if (arm_linux_processors == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %"PRIu32" ARM logical processors",
+			arm_linux_processors_count * sizeof(struct cpuinfo_arm_linux_processor),
+			arm_linux_processors_count);
+		return;
+	}
+
+	if (max_possible_processors_count) {
+		cpuinfo_linux_detect_possible_processors(
+			arm_linux_processors_count, &arm_linux_processors->flags,
+			sizeof(struct cpuinfo_arm_linux_processor),
+			CPUINFO_LINUX_FLAG_POSSIBLE);
+	}
+
+	if (max_present_processors_count) {
+		cpuinfo_linux_detect_present_processors(
+			arm_linux_processors_count, &arm_linux_processors->flags,
+			sizeof(struct cpuinfo_arm_linux_processor),
+			CPUINFO_LINUX_FLAG_PRESENT);
+	}
+
+#if defined(__ANDROID__)
+	struct cpuinfo_android_properties android_properties;
+	cpuinfo_arm_android_parse_properties(&android_properties);
+#else
+	char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
+#endif
+	char proc_cpuinfo_revision[CPUINFO_REVISION_VALUE_MAX];
+
+	if (!cpuinfo_arm_linux_parse_proc_cpuinfo(
+#if defined(__ANDROID__)
+			android_properties.proc_cpuinfo_hardware,
+#else
+			proc_cpuinfo_hardware,
+#endif
+			proc_cpuinfo_revision,
+			arm_linux_processors_count,
+			arm_linux_processors)) {
+		cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo");
+		return;
+	}
+
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, valid_processor_mask)) {
+			arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
+			cpuinfo_log_debug("parsed processor %"PRIu32" MIDR 0x%08"PRIx32,
+				i, arm_linux_processors[i].midr);
+		}
+	}
+
+	uint32_t valid_processors = 0, last_midr = 0;
+	#if CPUINFO_ARCH_ARM
+	uint32_t last_architecture_version = 0, last_architecture_flags = 0;
+	#endif
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		arm_linux_processors[i].system_processor_id = i;
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			valid_processors += 1;
+
+			if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) {
+				/*
+				 * Processor is in possible and present lists, but not reported in /proc/cpuinfo.
+				 * This is fairly common: high-index processors can be not reported if they are offline.
+				 */
+				cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i);
+			}
+
+			if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
+				last_midr = arm_linux_processors[i].midr;
+			}
+			#if CPUINFO_ARCH_ARM
+				if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ARCHITECTURE)) {
+					last_architecture_version = arm_linux_processors[i].architecture_version;
+					last_architecture_flags   = arm_linux_processors[i].architecture_flags;
+				}
+			#endif
+		} else {
+			/* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */
+			if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) {
+				cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i);
+			}
+		}
+	}
+
+#if defined(__ANDROID__)
+	const struct cpuinfo_arm_chipset chipset =
+		cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0);
+#else
+	const struct cpuinfo_arm_chipset chipset =
+		cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0);
+#endif
+
+	#if CPUINFO_ARCH_ARM
+		uint32_t isa_features = 0, isa_features2 = 0;
+		#ifdef __ANDROID__
+			/*
+			 * On Android before API 20, libc.so does not provide getauxval function.
+			 * Thus, we try to dynamically find it, or use two fallback mechanisms:
+			 * 1. dlopen libc.so, and try to find getauxval
+			 * 2. Parse /proc/self/auxv procfs file
+			 * 3. Use features reported in /proc/cpuinfo
+			 */
+			if (!cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2)) {
+				/* getauxval can't be used, fall back to parsing /proc/self/auxv */
+				if (!cpuinfo_arm_linux_hwcap_from_procfs(&isa_features, &isa_features2)) {
+					/*
+					 * Reading /proc/self/auxv failed, probably due to file permissions.
+					 * Use information from /proc/cpuinfo to detect ISA.
+					 *
+					 * If different processors report different ISA features, take the intersection.
+					 */
+					uint32_t processors_with_features = 0;
+					for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+						if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_FEATURES)) {
+							if (processors_with_features == 0) {
+								isa_features = arm_linux_processors[i].features;
+								isa_features2 = arm_linux_processors[i].features2;
+							} else {
+								isa_features &= arm_linux_processors[i].features;
+								isa_features2 &= arm_linux_processors[i].features2;
+							}
+							processors_with_features += 1;
+						}
+					}
+				}
+			}
+		#else
+			/* On GNU/Linux getauxval is always available */
+			cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
+		#endif
+		cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+			isa_features, isa_features2,
+			last_midr, last_architecture_version, last_architecture_flags,
+			&chipset, &cpuinfo_isa);
+	#elif CPUINFO_ARCH_ARM64
+		uint32_t isa_features = 0, isa_features2 = 0;
+		/* getauxval is always available on ARM64 Android */
+		cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
+		cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
+			isa_features, isa_features2, last_midr, &chipset, &cpuinfo_isa);
+	#endif
+
+	/* Detect min/max frequency and package ID */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t max_frequency = cpuinfo_linux_get_processor_max_frequency(i);
+			if (max_frequency != 0) {
+				arm_linux_processors[i].max_frequency = max_frequency;
+				arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+			}
+
+			const uint32_t min_frequency = cpuinfo_linux_get_processor_min_frequency(i);
+			if (min_frequency != 0) {
+				arm_linux_processors[i].min_frequency = min_frequency;
+				arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
+			}
+
+			if (cpuinfo_linux_get_processor_package_id(i, &arm_linux_processors[i].package_id)) {
+				arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID;
+			}
+		}
+	}
+
+	/* Initialize topology group IDs */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		arm_linux_processors[i].package_leader_id = i;
+	}
+
+	/* Propagate topology group IDs among siblings */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			continue;
+		}
+
+		if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) {
+			cpuinfo_linux_detect_core_siblings(
+				arm_linux_processors_count, i,
+				(cpuinfo_siblings_callback) cluster_siblings_parser,
+				arm_linux_processors);
+		}
+	}
+
+	/* Propagate all cluster IDs */
+	uint32_t clustered_processors = 0;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
+			clustered_processors += 1;
+
+			const uint32_t package_leader_id = arm_linux_processors[i].package_leader_id;
+			if (package_leader_id < i) {
+				arm_linux_processors[i].package_leader_id = arm_linux_processors[package_leader_id].package_leader_id;
+			}
+
+			cpuinfo_log_debug("processor %"PRIu32" clustered with processor %"PRIu32" as inferred from system siblings lists",
+				i, arm_linux_processors[i].package_leader_id);
+		}
+	}
+
+	if (clustered_processors != valid_processors) {
+		/*
+		 * Topology information about some or all logical processors may be unavailable, for the following reasons:
+		 * - Linux kernel is too old, or configured without support for topology information in sysfs.
+		 * - Core is offline, and Linux kernel is configured to not report topology for offline cores.
+		 *
+		 * In this case, we assign processors to clusters using two methods:
+		 * - Try heuristic cluster configurations (e.g. 6-core SoC usually has 4+2 big.LITTLE configuration).
+		 * - If heuristic failed, assign processors to core clusters in a sequential scan.
+		 */
+		if (!cpuinfo_arm_linux_detect_core_clusters_by_heuristic(valid_processors, arm_linux_processors_count, arm_linux_processors)) {
+			cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(arm_linux_processors_count, arm_linux_processors);
+		}
+	}
+
+	cpuinfo_arm_linux_count_cluster_processors(arm_linux_processors_count, arm_linux_processors);
+
+	const uint32_t cluster_count = cpuinfo_arm_linux_detect_cluster_midr(
+		&chipset,
+		arm_linux_processors_count, valid_processors, arm_linux_processors);
+
+	/* Initialize core vendor, uarch, MIDR, and frequency for every logical processor */
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			const uint32_t cluster_leader = arm_linux_processors[i].package_leader_id;
+			if (cluster_leader == i) {
+				/* Cluster leader: decode core vendor and uarch */
+				cpuinfo_arm_decode_vendor_uarch(
+				arm_linux_processors[cluster_leader].midr,
+#if CPUINFO_ARCH_ARM
+				!!(arm_linux_processors[cluster_leader].features & CPUINFO_ARM_LINUX_FEATURE_VFPV4),
+#endif
+				&arm_linux_processors[cluster_leader].vendor,
+				&arm_linux_processors[cluster_leader].uarch);
+			} else {
+				/* Cluster non-leader: copy vendor, uarch, MIDR, and frequency from cluster leader */
+				arm_linux_processors[i].flags |= arm_linux_processors[cluster_leader].flags &
+					(CPUINFO_ARM_LINUX_VALID_MIDR | CPUINFO_LINUX_FLAG_MAX_FREQUENCY);
+				arm_linux_processors[i].midr = arm_linux_processors[cluster_leader].midr;
+				arm_linux_processors[i].vendor = arm_linux_processors[cluster_leader].vendor;
+				arm_linux_processors[i].uarch = arm_linux_processors[cluster_leader].uarch;
+				arm_linux_processors[i].max_frequency = arm_linux_processors[cluster_leader].max_frequency;
+			}
+		}
+	}
+
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			cpuinfo_log_debug("post-analysis processor %"PRIu32": MIDR %08"PRIx32" frequency %"PRIu32,
+				i, arm_linux_processors[i].midr, arm_linux_processors[i].max_frequency);
+		}
+	}
+
+	qsort(arm_linux_processors, arm_linux_processors_count,
+		sizeof(struct cpuinfo_arm_linux_processor), cmp_arm_linux_processor);
+
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			cpuinfo_log_debug("post-sort processor %"PRIu32": system id %"PRIu32" MIDR %08"PRIx32" frequency %"PRIu32,
+				i, arm_linux_processors[i].system_processor_id, arm_linux_processors[i].midr, arm_linux_processors[i].max_frequency);
+		}
+	}
+
+	uint32_t uarchs_count = 0;
+	enum cpuinfo_uarch last_uarch;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) {
+				last_uarch = arm_linux_processors[i].uarch;
+				uarchs_count += 1;
+			}
+			arm_linux_processors[i].uarch_index = uarchs_count - 1;
+		}
+	}
+
+	/*
+	 * Assumptions:
+	 * - No SMP (i.e. each core supports only one hardware thread).
+	 * - Level 1 instruction and data caches are private to the core clusters.
+	 * - Level 2 and level 3 cache is shared between cores in the same cluster.
+	 */
+	cpuinfo_arm_chipset_to_string(&chipset, package.name);
+	package.processor_count = valid_processors;
+	package.core_count = valid_processors;
+	package.cluster_count = cluster_count;
+
+	processors = calloc(valid_processors, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+			valid_processors * sizeof(struct cpuinfo_processor), valid_processors);
+		goto cleanup;
+	}
+
+	cores = calloc(valid_processors, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+			valid_processors * sizeof(struct cpuinfo_core), valid_processors);
+		goto cleanup;
+	}
+
+	clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
+			cluster_count * sizeof(struct cpuinfo_cluster), cluster_count);
+		goto cleanup;
+	}
+
+	uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures",
+			uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count);
+		goto cleanup;
+	}
+
+	linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*));
+	if (linux_cpu_to_processor_map == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries",
+			arm_linux_processors_count * sizeof(struct cpuinfo_processor*), arm_linux_processors_count);
+		goto cleanup;
+	}
+
+	linux_cpu_to_core_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_core*));
+	if (linux_cpu_to_core_map == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" core mapping entries",
+			arm_linux_processors_count * sizeof(struct cpuinfo_core*), arm_linux_processors_count);
+		goto cleanup;
+	}
+
+	if (uarchs_count > 1) {
+		linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t));
+		if (linux_cpu_to_uarch_index_map == NULL) {
+			cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries",
+				arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count);
+			goto cleanup;
+		}
+	}
+
+	l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache));
+	if (l1i == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
+			valid_processors * sizeof(struct cpuinfo_cache), valid_processors);
+		goto cleanup;
+	}
+
+	l1d = calloc(valid_processors, sizeof(struct cpuinfo_cache));
+	if (l1d == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
+			valid_processors * sizeof(struct cpuinfo_cache), valid_processors);
+		goto cleanup;
+	}
+
+	uint32_t uarchs_index = 0;
+	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) {
+				last_uarch = arm_linux_processors[i].uarch;
+				uarchs[uarchs_index] = (struct cpuinfo_uarch_info) {
+					.uarch = arm_linux_processors[i].uarch,
+					.midr = arm_linux_processors[i].midr,
+				};
+				uarchs_index += 1;
+			}
+			uarchs[uarchs_index - 1].processor_count += 1;
+			uarchs[uarchs_index - 1].core_count += 1;
+		}
+	}
+
+	uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
+	/* Indication whether L3 (if it exists) is shared between all cores */
+	bool shared_l3 = true;
+	/* Populate cache infromation structures in l1i, l1d */
+	for (uint32_t i = 0; i < valid_processors; i++) {
+		if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+			cluster_id += 1;
+			clusters[cluster_id] = (struct cpuinfo_cluster) {
+				.processor_start = i,
+				.processor_count = arm_linux_processors[i].package_processor_count,
+				.core_start = i,
+				.core_count = arm_linux_processors[i].package_processor_count,
+				.cluster_id = cluster_id,
+				.package = &package,
+				.vendor = arm_linux_processors[i].vendor,
+				.uarch = arm_linux_processors[i].uarch,
+				.midr = arm_linux_processors[i].midr,
+			};
+		}
+
+		processors[i].smt_id = 0;
+		processors[i].core = cores + i;
+		processors[i].cluster = clusters + cluster_id;
+		processors[i].package = &package;
+		processors[i].linux_id = (int) arm_linux_processors[i].system_processor_id;
+		processors[i].cache.l1i = l1i + i;
+		processors[i].cache.l1d = l1d + i;
+		linux_cpu_to_processor_map[arm_linux_processors[i].system_processor_id] = &processors[i];
+
+		cores[i].processor_start = i;
+		cores[i].processor_count = 1;
+		cores[i].core_id = i;
+		cores[i].cluster = clusters + cluster_id;
+		cores[i].package = &package;
+		cores[i].vendor = arm_linux_processors[i].vendor;
+		cores[i].uarch = arm_linux_processors[i].uarch;
+		cores[i].midr = arm_linux_processors[i].midr;
+		linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i];
+
+		if (linux_cpu_to_uarch_index_map != NULL) {
+			linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] =
+				arm_linux_processors[i].uarch_index;
+		}
+
+		struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 };
+		cpuinfo_arm_decode_cache(
+			arm_linux_processors[i].uarch,
+			arm_linux_processors[i].package_processor_count,
+			arm_linux_processors[i].midr,
+			&chipset,
+			cluster_id,
+			arm_linux_processors[i].architecture_version,
+			&l1i[i], &l1d[i], &temp_l2, &temp_l3);
+		l1i[i].processor_start = l1d[i].processor_start = i;
+		l1i[i].processor_count = l1d[i].processor_count = 1;
+		#if CPUINFO_ARCH_ARM
+			/* L1I reported in /proc/cpuinfo overrides defaults */
+			if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ICACHE)) {
+				l1i[i] = (struct cpuinfo_cache) {
+					.size = arm_linux_processors[i].proc_cpuinfo_cache.i_size,
+					.associativity = arm_linux_processors[i].proc_cpuinfo_cache.i_assoc,
+					.sets = arm_linux_processors[i].proc_cpuinfo_cache.i_sets,
+					.partitions = 1,
+					.line_size = arm_linux_processors[i].proc_cpuinfo_cache.i_line_length
+				};
+			}
+			/* L1D reported in /proc/cpuinfo overrides defaults */
+			if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_DCACHE)) {
+				l1d[i] = (struct cpuinfo_cache) {
+					.size = arm_linux_processors[i].proc_cpuinfo_cache.d_size,
+					.associativity = arm_linux_processors[i].proc_cpuinfo_cache.d_assoc,
+					.sets = arm_linux_processors[i].proc_cpuinfo_cache.d_sets,
+					.partitions = 1,
+					.line_size = arm_linux_processors[i].proc_cpuinfo_cache.d_line_length
+				};
+			}
+		#endif
+
+		if (temp_l3.size != 0) {
+			/*
+			 * Assumptions:
+			 * - L2 is private to each core
+			 * - L3 is shared by cores in the same cluster
+			 * - If cores in different clusters report the same L3, it is shared between all cores.
+			 */
+			l2_count += 1;
+			if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+				if (cluster_id == 0) {
+					big_l3_size = temp_l3.size;
+					l3_count = 1;
+				} else if (temp_l3.size != big_l3_size) {
+					/* If some cores have different L3 size, L3 is not shared between all cores */
+					shared_l3 = false;
+					l3_count += 1;
+				}
+			}
+		} else {
+			/* If some cores don't have L3 cache, L3 is not shared between all cores */
+			shared_l3 = false;
+			if (temp_l2.size != 0) {
+				/* Assume L2 is shared by cores in the same cluster */
+				if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+					l2_count += 1;
+				}
+			}
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache), l2_count);
+			goto cleanup;
+		}
+
+		if (l3_count != 0) {
+			l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+			if (l3 == NULL) {
+				cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
+					l3_count * sizeof(struct cpuinfo_cache), l3_count);
+				goto cleanup;
+			}
+		}
+	}
+
+	cluster_id = UINT32_MAX;
+	uint32_t l2_index = UINT32_MAX, l3_index = UINT32_MAX;
+	for (uint32_t i = 0; i < valid_processors; i++) {
+		if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+			cluster_id++;
+		}
+
+		struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = { 0 }, temp_l3 = { 0 };
+		cpuinfo_arm_decode_cache(
+			arm_linux_processors[i].uarch,
+			arm_linux_processors[i].package_processor_count,
+			arm_linux_processors[i].midr,
+			&chipset,
+			cluster_id,
+			arm_linux_processors[i].architecture_version,
+			&dummy_l1i, &dummy_l1d, &temp_l2, &temp_l3);
+
+		if (temp_l3.size != 0) {
+			/*
+			 * Assumptions:
+			 * - L2 is private to each core
+			 * - L3 is shared by cores in the same cluster
+			 * - If cores in different clusters report the same L3, it is shared between all cores.
+			 */
+			l2_index += 1;
+			l2[l2_index] = (struct cpuinfo_cache) {
+				.size            = temp_l2.size,
+				.associativity   = temp_l2.associativity,
+				.sets            = temp_l2.sets,
+				.partitions      = 1,
+				.line_size       = temp_l2.line_size,
+				.flags           = temp_l2.flags,
+				.processor_start = i,
+				.processor_count = 1,
+			};
+			processors[i].cache.l2 = l2 + l2_index;
+			if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+				l3_index += 1;
+				if (l3_index < l3_count) {
+					l3[l3_index] = (struct cpuinfo_cache) {
+						.size            = temp_l3.size,
+						.associativity   = temp_l3.associativity,
+						.sets            = temp_l3.sets,
+						.partitions      = 1,
+						.line_size       = temp_l3.line_size,
+						.flags           = temp_l3.flags,
+						.processor_start = i,
+						.processor_count =
+							shared_l3 ? valid_processors : arm_linux_processors[i].package_processor_count,
+					};
+				}
+			}
+			if (shared_l3) {
+				processors[i].cache.l3 = l3;
+			} else if (l3_index < l3_count) {
+				processors[i].cache.l3 = l3 + l3_index;
+			}
+		} else if (temp_l2.size != 0) {
+			/* Assume L2 is shared by cores in the same cluster */
+			if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
+				l2_index += 1;
+				l2[l2_index] = (struct cpuinfo_cache) {
+					.size            = temp_l2.size,
+					.associativity   = temp_l2.associativity,
+					.sets            = temp_l2.sets,
+					.partitions      = 1,
+					.line_size       = temp_l2.line_size,
+					.flags           = temp_l2.flags,
+					.processor_start = i,
+					.processor_count = arm_linux_processors[i].package_processor_count,
+				};
+			}
+			processors[i].cache.l2 = l2 + l2_index;
+		}
+	}
+
+	/* Commit */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = &package;
+	cpuinfo_uarchs = uarchs;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
+
+	cpuinfo_processors_count = valid_processors;
+	cpuinfo_cores_count = valid_processors;
+	cpuinfo_clusters_count = cluster_count;
+	cpuinfo_packages_count = 1;
+	cpuinfo_uarchs_count = uarchs_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors;
+	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+	cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_linux_cpu_max = arm_linux_processors_count;
+	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+	cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	uarchs = NULL;
+	l1i = l1d = l2 = l3 = NULL;
+	linux_cpu_to_processor_map = NULL;
+	linux_cpu_to_core_map = NULL;
+	linux_cpu_to_uarch_index_map = NULL;
+
+cleanup:
+	free(arm_linux_processors);
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(uarchs);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+	free(linux_cpu_to_processor_map);
+	free(linux_cpu_to_core_map);
+	free(linux_cpu_to_uarch_index_map);
+}
--- a/dep/cpuinfo/src/arm/linux/midr.c
+++ b/dep/cpuinfo/src/arm/linux/midr.c
@@ -0,0 +1,863 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cpuinfo.h>
+#include <arm/linux/api.h>
+#if defined(__ANDROID__)
+	#include <arm/android/api.h>
+#endif
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <linux/api.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+#include <cpuinfo/common.h>
+
+
+#define CLUSTERS_MAX 3
+
+static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
+	return (bitfield & mask) == mask;
+}
+
+/* Description of core clusters configuration in a chipset (identified by series and model number) */
+struct cluster_config {
+	/* Number of cores (logical processors) */
+	uint8_t cores;
+	/* ARM chipset series (see cpuinfo_arm_chipset_series enum) */
+	uint8_t series;
+	/* Chipset model number (see cpuinfo_arm_chipset struct) */
+	uint16_t model;
+	/* Number of heterogenous clusters in the CPU package */
+	uint8_t clusters;
+	/*
+	 * Number of cores in each cluster:
+	 # - Symmetric configurations: [0] = # cores
+	 * - big.LITTLE configurations: [0] = # LITTLE cores, [1] = # big cores
+	 * - Max.Med.Min configurations: [0] = # Min cores, [1] = # Med cores, [2] = # Max cores
+	 */
+	uint8_t cluster_cores[CLUSTERS_MAX];
+	/*
+	 * MIDR of cores in each cluster:
+	 * - Symmetric configurations: [0] = core MIDR
+	 * - big.LITTLE configurations: [0] = LITTLE core MIDR, [1] = big core MIDR
+	 * - Max.Med.Min configurations: [0] = Min core MIDR, [1] = Med core MIDR, [2] = Max core MIDR
+	 */
+	uint32_t cluster_midr[CLUSTERS_MAX];
+};
+
+/*
+ * The list of chipsets where MIDR may not be unambigiously decoded at least on some devices.
+ * The typical reasons for impossibility to decoded MIDRs are buggy kernels, which either do not report all MIDR
+ * information (e.g. on ATM7029 kernel doesn't report CPU Part), or chipsets have more than one type of cores
+ * (i.e. 4x Cortex-A53 + 4x Cortex-A53 is out) and buggy kernels report MIDR information only about some cores
+ * in /proc/cpuinfo (either only online cores, or only the core that reads /proc/cpuinfo). On these kernels/chipsets,
+ * it is not possible to detect all core types by just parsing /proc/cpuinfo, so we use chipset name and this table to
+ * find their MIDR (and thus microarchitecture, cache, etc).
+ *
+ * Note: not all chipsets with heterogeneous multiprocessing need an entry in this table. The following HMP
+ * chipsets always list information about all cores in /proc/cpuinfo:
+ *
+ * - Snapdragon 660
+ * - Snapdragon 820 (MSM8996)
+ * - Snapdragon 821 (MSM8996PRO)
+ * - Snapdragon 835 (MSM8998)
+ * - Exynos 8895
+ * - Kirin 960
+ *
+ * As these are all new processors, there is hope that this table won't uncontrollably grow over time.
+ */
+static const struct cluster_config cluster_configs[] = {
+#if CPUINFO_ARCH_ARM
+	{
+		/*
+		 * MSM8916 (Snapdragon 410): 4x Cortex-A53
+		 * Some AArch32 phones use non-standard /proc/cpuinfo format.
+		 */
+		.cores = 4,
+		.series = cpuinfo_arm_chipset_series_qualcomm_msm,
+		.model = UINT16_C(8916),
+		.clusters = 1,
+		.cluster_cores = {
+			[0] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD030),
+		},
+	},
+	{
+		/*
+		 * MSM8939 (Snapdragon 615): 4x Cortex-A53 + 4x Cortex-A53
+		 * Some AArch32 phones use non-standard /proc/cpuinfo format.
+		 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_qualcomm_msm,
+		.model = UINT16_C(8939),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD034),
+		},
+	},
+#endif
+	{
+		/* MSM8956 (Snapdragon 650): 2x Cortex-A72 + 4x Cortex-A53 */
+		.cores = 6,
+		.series = cpuinfo_arm_chipset_series_qualcomm_msm,
+		.model = UINT16_C(8956),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD080),
+		},
+	},
+	{
+		/* MSM8976/MSM8976PRO (Snapdragon 652/653): 4x Cortex-A72 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_qualcomm_msm,
+		.model = UINT16_C(8976),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD080),
+		},
+	},
+	{
+		/* MSM8992 (Snapdragon 808): 2x Cortex-A57 + 4x Cortex-A53 */
+		.cores = 6,
+		.series = cpuinfo_arm_chipset_series_qualcomm_msm,
+		.model = UINT16_C(8992),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD033),
+			[1] = UINT32_C(0x411FD072),
+		},
+	},
+	{
+		/* MSM8994/MSM8994V (Snapdragon 810): 4x Cortex-A57 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_qualcomm_msm,
+		.model = UINT16_C(8994),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD032),
+			[1] = UINT32_C(0x411FD071),
+		},
+	},
+#if CPUINFO_ARCH_ARM
+	{
+		/* Exynos 5422: 4x Cortex-A15 + 4x Cortex-A7 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+		.model = UINT16_C(5422),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC073),
+			[1] = UINT32_C(0x412FC0F3),
+		},
+	},
+	{
+		/* Exynos 5430: 4x Cortex-A15 + 4x Cortex-A7 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+		.model = UINT16_C(5430),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC074),
+			[1] = UINT32_C(0x413FC0F3),
+		},
+	},
+#endif /* CPUINFO_ARCH_ARM */
+	{
+		/* Exynos 5433: 4x Cortex-A57 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+		.model = UINT16_C(5433),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD031),
+			[1] = UINT32_C(0x411FD070),
+		},
+	},
+	{
+		/* Exynos 7420: 4x Cortex-A57 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+		.model = UINT16_C(7420),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD032),
+			[1] = UINT32_C(0x411FD070),
+		},
+	},
+	{
+		/* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+		.model = UINT16_C(8890),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x531F0011),
+		},
+	},
+#if CPUINFO_ARCH_ARM
+	{
+		/* Kirin 920: 4x Cortex-A15 + 4x Cortex-A7 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
+		.model = UINT16_C(920),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC075),
+			[1] = UINT32_C(0x413FC0F3),
+		},
+	},
+	{
+		/* Kirin 925: 4x Cortex-A15 + 4x Cortex-A7 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
+		.model = UINT16_C(925),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC075),
+			[1] = UINT32_C(0x413FC0F3),
+		},
+	},
+	{
+		/* Kirin 928: 4x Cortex-A15 + 4x Cortex-A7 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
+		.model = UINT16_C(928),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC075),
+			[1] = UINT32_C(0x413FC0F3),
+		},
+	},
+#endif /* CPUINFO_ARCH_ARM */
+	{
+		/* Kirin 950: 4x Cortex-A72 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
+		.model = UINT16_C(950),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD080),
+		},
+	},
+	{
+		/* Kirin 955: 4x Cortex-A72 + 4x Cortex-A53 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
+		.model = UINT16_C(955),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD080),
+		},
+	},
+#if CPUINFO_ARCH_ARM
+	{
+		/* MediaTek MT8135: 2x Cortex-A7 + 2x Cortex-A15 */
+		.cores = 4,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(8135),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 2,
+			[1] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC073),
+			[1] = UINT32_C(0x413FC0F2),
+		},
+	},
+#endif
+	{
+		/* MediaTek MT8173: 2x Cortex-A72 + 2x Cortex-A53 */
+		.cores = 4,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(8173),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 2,
+			[1] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD032),
+			[1] = UINT32_C(0x410FD080),
+		},
+	},
+	{
+		/* MediaTek MT8176: 2x Cortex-A72 + 4x Cortex-A53 */
+		.cores = 6,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(8176),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD032),
+			[1] = UINT32_C(0x410FD080),
+		},
+	},
+#if CPUINFO_ARCH_ARM64
+	{
+		/*
+		 * MediaTek MT8735: 4x Cortex-A53
+		 * Some AArch64 phones use non-standard /proc/cpuinfo format.
+		 */
+		.cores = 4,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(8735),
+		.clusters = 1,
+		.cluster_cores = {
+			[0] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+		},
+	},
+#endif
+#if CPUINFO_ARCH_ARM
+	{
+		/*
+		 * MediaTek MT6592: 4x Cortex-A7 + 4x Cortex-A7
+		 * Some phones use non-standard /proc/cpuinfo format.
+		 */
+		.cores = 4,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(6592),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC074),
+			[1] = UINT32_C(0x410FC074),
+		},
+	},
+	{
+		/* MediaTek MT6595: 4x Cortex-A17 + 4x Cortex-A7 */
+		.cores = 8,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(6595),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC075),
+			[1] = UINT32_C(0x410FC0E0),
+		},
+	},
+#endif
+	{
+		/* MediaTek MT6797: 2x Cortex-A72 + 4x Cortex-A53 + 4x Cortex-A53 */
+		.cores = 10,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(6797),
+		.clusters = 3,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+			[2] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD034),
+			[2] = UINT32_C(0x410FD081),
+		},
+	},
+	{
+		/* MediaTek MT6799: 2x Cortex-A73 + 4x Cortex-A53 + 4x Cortex-A35 */
+		.cores = 10,
+		.series = cpuinfo_arm_chipset_series_mediatek_mt,
+		.model = UINT16_C(6799),
+		.clusters = 3,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 4,
+			[2] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD041),
+			[1] = UINT32_C(0x410FD034),
+			[2] = UINT32_C(0x410FD092),
+		},
+	},
+	{
+		/* Rockchip RK3399: 2x Cortex-A72 + 4x Cortex-A53 */
+		.cores = 6,
+		.series = cpuinfo_arm_chipset_series_rockchip_rk,
+		.model = UINT16_C(3399),
+		.clusters = 2,
+		.cluster_cores = {
+			[0] = 4,
+			[1] = 2,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FD034),
+			[1] = UINT32_C(0x410FD082),
+		},
+	},
+#if CPUINFO_ARCH_ARM
+	{
+		/* Actions ATM8029: 4x Cortex-A5
+		 * Most devices use non-standard /proc/cpuinfo format.
+		 */
+		.cores = 4,
+		.series = cpuinfo_arm_chipset_series_actions_atm,
+		.model = UINT16_C(7029),
+		.clusters = 1,
+		.cluster_cores = {
+			[0] = 4,
+		},
+		.cluster_midr = {
+			[0] = UINT32_C(0x410FC051),
+		},
+	},
+#endif
+};
+
+/*
+ * Searches chipset name in mapping of chipset name to cores' MIDR values. If match is successful, initializes MIDR
+ * for all clusters' leaders with tabulated values.
+ *
+ * @param[in] chipset - chipset (SoC) name information.
+ * @param clusters_count - number of CPU core clusters detected in the SoC.
+ * @param cluster_leaders - indices of core clusters' leaders in the @p processors array.
+ * @param processors_count - number of usable logical processors in the system.
+ * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
+ *                             and decoded core cluster (package_leader_id) information.
+ *                             Upon successful return, processors[i].midr for all clusters' leaders contains the
+ *                             tabulated MIDR values.
+ * @param verify_midr - indicated whether the function should check that the MIDR values to be assigned to leaders of
+ *                      core clusters are consistent with known parts of their parsed values.
+ *                      Set if to false if the only MIDR value parsed from /proc/cpuinfo is for the last processor
+ *                      reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor.
+ *
+ * @retval true if the chipset was found in the mapping and core clusters' leaders initialized with MIDR values.
+ * @retval false if the chipset was not found in the mapping, or any consistency check failed.
+ */
+static bool cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	uint32_t clusters_count,
+	const uint32_t cluster_leaders[restrict static CLUSTERS_MAX],
+	uint32_t processors_count,
+	struct cpuinfo_arm_linux_processor processors[restrict static processors_count],
+	bool verify_midr)
+{
+	if (clusters_count <= CLUSTERS_MAX) {
+		for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) {
+			if (cluster_configs[c].model == chipset->model && cluster_configs[c].series == chipset->series) {
+				/* Verify that the total number of cores and clusters of cores matches expectation */
+				if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) {
+					return false;
+				}
+
+				/* Verify that core cluster configuration matches expectation */
+				for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
+					const uint32_t cluster_leader = cluster_leaders[cluster];
+					if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) {
+						return false;
+					}
+				}
+
+				if (verify_midr) {
+					/* Verify known parts of MIDR */
+					for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
+						const uint32_t cluster_leader = cluster_leaders[cluster];
+
+						/* Create a mask of known midr bits */
+						uint32_t midr_mask = 0;
+						if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+							midr_mask |= CPUINFO_ARM_MIDR_IMPLEMENTER_MASK;
+						}
+						if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+							midr_mask |= CPUINFO_ARM_MIDR_VARIANT_MASK;
+						}
+						if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+							midr_mask |= CPUINFO_ARM_MIDR_PART_MASK;
+						}
+						if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+							midr_mask |= CPUINFO_ARM_MIDR_REVISION_MASK;
+						}
+
+						/* Verify the bits under the mask */
+						if ((processors[cluster_leader].midr ^ cluster_configs[c].cluster_midr[cluster]) & midr_mask) {
+							cpuinfo_log_debug("parsed MIDR of cluster %08"PRIu32" does not match tabulated value %08"PRIu32,
+								processors[cluster_leader].midr, cluster_configs[c].cluster_midr[cluster]);
+							return false;
+						}
+					}
+				}
+
+				/* Assign MIDRs according to tabulated configurations */
+				for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
+					const uint32_t cluster_leader = cluster_leaders[cluster];
+					processors[cluster_leader].midr = cluster_configs[c].cluster_midr[cluster];
+					processors[cluster_leader].flags |= CPUINFO_ARM_LINUX_VALID_MIDR;
+					cpuinfo_log_debug("cluster %"PRIu32" MIDR = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_midr[cluster]);
+				}
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+/*
+ * Initializes MIDR for leaders of core clusters using a heuristic for big.LITTLE systems:
+ * - If the only known MIDR is for the big core cluster, guess the matching MIDR for the LITTLE cluster.
+ * - Estimate which of the clusters is big using maximum frequency, if known, otherwise using system processor ID.
+ * - Initialize the MIDR for big and LITTLE core clusters using the guesstimates values.
+ *
+ * @param clusters_count - number of CPU core clusters detected in the SoC.
+ * @param cluster_with_midr_count - number of CPU core clusters in the SoC with known MIDR values.
+ * @param last_processor_with_midr - index of the last logical processor with known MIDR in the @p processors array.
+ * @param cluster_leaders - indices of core clusters' leaders in the @p processors array.
+ * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
+ *                             and decoded core cluster (package_leader_id) information.
+ *                             Upon successful return, processors[i].midr for all core clusters' leaders contains
+ *                             the heuristically detected MIDR value.
+ * @param verify_midr - indicated whether the function should check that the MIDR values to be assigned to leaders of
+ *                      core clusters are consistent with known parts of their parsed values.
+ *                      Set if to false if the only MIDR value parsed from /proc/cpuinfo is for the last processor
+ *                      reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor.
+ *
+ * @retval true if this is a big.LITTLE system with only one known MIDR and the CPU core clusters' leaders were
+ *              initialized with MIDR values.
+ * @retval false if this is not a big.LITTLE system.
+ */
+static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
+	uint32_t clusters_count,
+	uint32_t cluster_with_midr_count,
+	uint32_t last_processor_with_midr,
+	const uint32_t cluster_leaders[restrict static CLUSTERS_MAX],
+	struct cpuinfo_arm_linux_processor processors[restrict static last_processor_with_midr],
+	bool verify_midr)
+{
+	if (clusters_count != 2 || cluster_with_midr_count != 1) {
+		/* Not a big.LITTLE system, or MIDR is known for both/neither clusters */
+		return false;
+	}
+
+	const uint32_t midr_flags =
+		(processors[processors[last_processor_with_midr].package_leader_id].flags & CPUINFO_ARM_LINUX_VALID_MIDR);
+	const uint32_t big_midr = processors[processors[last_processor_with_midr].package_leader_id].midr;
+	const uint32_t little_midr = midr_little_core_for_big(big_midr);
+
+	/* Default assumption: the first reported cluster is LITTLE cluster (this holds on most Linux kernels) */
+	uint32_t little_cluster_leader = cluster_leaders[0];
+	const uint32_t other_cluster_leader = cluster_leaders[1];
+	/* If maximum frequency is known for both clusters, assume LITTLE cluster is the one with lower frequency */
+	if (processors[little_cluster_leader].flags & processors[other_cluster_leader].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+		if (processors[little_cluster_leader].max_frequency > processors[other_cluster_leader].max_frequency) {
+			little_cluster_leader = other_cluster_leader;
+		}
+	}
+
+	if (verify_midr) {
+		/* Verify known parts of MIDR */
+		for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
+			const uint32_t cluster_leader = cluster_leaders[cluster];
+
+			/* Create a mask of known midr bits */
+			uint32_t midr_mask = 0;
+			if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
+				midr_mask |= CPUINFO_ARM_MIDR_IMPLEMENTER_MASK;
+			}
+			if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
+				midr_mask |= CPUINFO_ARM_MIDR_VARIANT_MASK;
+			}
+			if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_PART) {
+				midr_mask |= CPUINFO_ARM_MIDR_PART_MASK;
+			}
+			if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
+				midr_mask |= CPUINFO_ARM_MIDR_REVISION_MASK;
+			}
+
+			/* Verify the bits under the mask */
+			const uint32_t midr = (cluster_leader == little_cluster_leader) ? little_midr : big_midr;
+			if ((processors[cluster_leader].midr ^ midr) & midr_mask) {
+				cpuinfo_log_debug(
+					"parsed MIDR %08"PRIu32" of cluster leader %"PRIu32" is inconsistent with expected value %08"PRIu32,
+					processors[cluster_leader].midr, cluster_leader, midr);
+				return false;
+			}
+		}
+	}
+
+	for (uint32_t c = 0; c < clusters_count; c++) {
+		/* Skip cluster with already assigned MIDR */
+		const uint32_t cluster_leader = cluster_leaders[c];
+		if (bitmask_all(processors[cluster_leader].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
+			continue;
+		}
+
+		const uint32_t midr = (cluster_leader == little_cluster_leader) ? little_midr : big_midr;
+		cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, cluster_leader, midr);
+		/* To be consistent, we copy the MIDR entirely, rather than by parts */
+		processors[cluster_leader].midr = midr;
+		processors[cluster_leader].flags |= midr_flags;
+	}
+	return true;
+}
+
+/*
+ * Initializes MIDR for leaders of core clusters in a single sequential scan:
+ *  - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value.
+ *  - Clusters following any reported MIDR value to have that MIDR value.
+ *
+ * @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value.
+ * @param processors_count - number of logical processor descriptions in the @p processors array.
+ * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
+ *                             and decoded core cluster (package_leader_id) information.
+ *                             Upon successful return, processors[i].midr for all core clusters' leaders contains
+ *                             the assigned MIDR value.
+ */
+static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
+	uint32_t default_midr,
+	uint32_t processors_count,
+	struct cpuinfo_arm_linux_processor processors[restrict static processors_count])
+{
+	uint32_t midr = default_midr;
+	for (uint32_t i = 0; i < processors_count; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (processors[i].package_leader_id == i) {
+				if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
+					midr = processors[i].midr;
+				} else {
+					cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr);
+					/* To be consistent, we copy the MIDR entirely, rather than by parts */
+					processors[i].midr = midr;
+					processors[i].flags |= CPUINFO_ARM_LINUX_VALID_MIDR;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Detects MIDR of each CPU core clusters' leader.
+ *
+ * @param[in] chipset - chipset (SoC) name information.
+ * @param max_processors - number of processor descriptions in the @p processors array.
+ * @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and
+ *                            PRESENT flags.
+ * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
+ *                             and decoded core cluster (package_leader_id) information.
+ *                             Upon return, processors[i].midr for all clusters' leaders contains the MIDR value.
+ *
+ * @returns The number of core clusters
+ */
+uint32_t cpuinfo_arm_linux_detect_cluster_midr(
+	const struct cpuinfo_arm_chipset chipset[restrict static 1],
+	uint32_t max_processors,
+	uint32_t usable_processors,
+	struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
+{
+	uint32_t clusters_count = 0;
+	uint32_t cluster_leaders[CLUSTERS_MAX];
+	uint32_t last_processor_in_cpuinfo = max_processors;
+	uint32_t last_processor_with_midr = max_processors;
+	uint32_t processors_with_midr_count = 0;
+	for (uint32_t i = 0; i < max_processors; i++) {
+		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+			if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR) {
+				last_processor_in_cpuinfo = i;
+			}
+			if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PART)) {
+				last_processor_with_midr = i;
+				processors_with_midr_count += 1;
+			}
+			const uint32_t group_leader = processors[i].package_leader_id;
+			if (group_leader == i) {
+				if (clusters_count < CLUSTERS_MAX) {
+					cluster_leaders[clusters_count] = i;
+				}
+				clusters_count += 1;
+			} else {
+				/* Copy known bits of information to cluster leader */
+
+				if ((processors[i].flags & ~processors[group_leader].flags) & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
+					processors[group_leader].max_frequency = processors[i].max_frequency;
+					processors[group_leader].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
+				}
+				if (!bitmask_all(processors[group_leader].flags, CPUINFO_ARM_LINUX_VALID_MIDR) &&
+					bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR))
+				{
+					processors[group_leader].midr = processors[i].midr;
+					processors[group_leader].flags |= CPUINFO_ARM_LINUX_VALID_MIDR;
+				}
+			}
+		}
+	}
+	cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count);
+
+	/*
+	 * Two relations between reported /proc/cpuinfo information, and cores is possible:
+	 * - /proc/cpuinfo reports information for all or some of the cores below the corresponding
+	 *   "processor : <number>" lines. Information on offline cores may be missing.
+	 * - /proc/cpuinfo reports information only once, after all "processor : <number>" lines.
+	 *   The reported information may relate to processor #0 or to the processor which
+	 *   executed the system calls to read /proc/cpuinfo. It is also indistinguishable
+	 *   from /proc/cpuinfo reporting information only for the last core (e.g. if all other
+	 *   cores are offline).
+	 *
+	 * We detect the second case by checking if /proc/cpuinfo contains valid MIDR only for one,
+	 * last reported, processor. Note, that the last reported core may be not the last
+	 * present & possible processor, as /proc/cpuinfo may non-report high-index offline cores.
+	 */
+	if (processors_with_midr_count == 1 && last_processor_in_cpuinfo == last_processor_with_midr && clusters_count > 1) {
+		/*
+		 * There are multiple core clusters, but /proc/cpuinfo reported MIDR only for one
+		 * processor, and we don't even know which logical processor this information refers to.
+		 *
+		 * We make three attempts to detect MIDR for all clusters:
+		 * 1. Search tabulated MIDR values for chipsets which have heterogeneous clusters and ship with Linux
+		 *    kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values.
+		 * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration,
+		 *    and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known.
+		 * 3. Initialize MIDRs for all core clusters to the only parsed MIDR value.
+		 */
+		cpuinfo_log_debug("the only reported MIDR can not be attributed to a particular processor");
+
+		if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
+			chipset, clusters_count, cluster_leaders, usable_processors, processors, false))
+		{
+			return clusters_count;
+		}
+
+		/* Try big.LITTLE heuristic */
+		if (cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
+			clusters_count, 1, last_processor_with_midr,
+			cluster_leaders, processors, false))
+		{
+			return clusters_count;
+		}
+
+		/* Fall back to sequential initialization of MIDR values for core clusters */
+		cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
+			processors[processors[last_processor_with_midr].package_leader_id].midr,
+			max_processors, processors);
+	} else if (processors_with_midr_count < usable_processors) {
+		/*
+		 * /proc/cpuinfo reported MIDR only for some processors, and probably some core clusters do not have MIDR
+		 * for any of the cores. Check if this is the case.
+		 */
+		uint32_t clusters_with_midr_count = 0;
+		for (uint32_t i = 0; i < max_processors; i++) {
+			if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_MIDR)) {
+				if (processors[i].package_leader_id == i) {
+					clusters_with_midr_count += 1;
+				}
+			}
+		}
+
+		if (clusters_with_midr_count < clusters_count) {
+			/*
+			 * /proc/cpuinfo reported MIDR only for some clusters, need to reconstruct others.
+			 * We make three attempts to detect MIDR for clusters without it:
+			 * 1. Search tabulated MIDR values for chipsets which have heterogeneous clusters and ship with Linux
+			 *    kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values.
+			 * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration,
+			 *    and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known.
+			 * 3. Initialize MIDRs for core clusters in a single sequential scan:
+			 *    - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
+			 *    - Clusters following any reported MIDR value to have that MIDR value.
+			 */
+
+			if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
+				chipset, clusters_count, cluster_leaders, usable_processors, processors, true))
+			{
+				return clusters_count;
+			}
+
+			if (last_processor_with_midr != max_processors) {
+				/* Try big.LITTLE heuristic */
+				if (cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
+					clusters_count, processors_with_midr_count, last_processor_with_midr,
+					cluster_leaders, processors, true))
+				{
+					return clusters_count;
+				}
+
+				/* Fall back to sequential initialization of MIDR values for core clusters */
+				cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
+					processors[processors[last_processor_with_midr].package_leader_id].midr,
+					max_processors, processors);
+			}
+		}
+	}
+	return clusters_count;
+}
--- a/dep/cpuinfo/src/arm/mach/init.c
+++ b/dep/cpuinfo/src/arm/mach/init.c
@@ -0,0 +1,619 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <alloca.h>
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <mach/machine.h>
+
+#include <cpuinfo.h>
+#include <mach/api.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
+#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
+	#define CPUFAMILY_ARM_MONSOON_MISTRAL   0xE81E7EF6
+#endif
+#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
+	#define CPUFAMILY_ARM_VORTEX_TEMPEST    0x07D34B9F
+#endif
+#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
+	#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+#endif
+#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
+	#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
+#endif
+
+struct cpuinfo_arm_isa cpuinfo_isa = {
+#if CPUINFO_ARCH_ARM
+	.thumb = true,
+	.thumb2 = true,
+	.thumbee = false,
+	.jazelle = false,
+	.armv5e = true,
+	.armv6 = true,
+	.armv6k = true,
+	.armv7 = true,
+	.vfpv2 = false,
+	.vfpv3 = true,
+	.d32 = true,
+	.wmmx = false,
+	.wmmx2 = false,
+	.neon = true,
+#endif
+#if CPUINFO_ARCH_ARM64
+	.aes = true,
+	.sha1 = true,
+	.sha2 = true,
+	.pmull = true,
+	.crc32 = true,
+#endif
+};
+
+static uint32_t get_sys_info(int type_specifier, const char* name) {
+	size_t size = 0;
+	uint32_t result = 0;
+	int mib[2] = { CTL_HW, type_specifier };
+	if (sysctl(mib, 2, NULL, &size, NULL, 0) != 0) {
+		cpuinfo_log_info("sysctl(\"%s\") failed: %s", name, strerror(errno));
+	} else if (size == sizeof(uint32_t)) {
+		sysctl(mib, 2, &result, &size, NULL, 0);
+		cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", name, result, size);
+	} else {
+		cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", name);
+	}
+	return result;
+}
+
+static uint32_t get_sys_info_by_name(const char* type_specifier) {
+	size_t size = 0;
+	uint32_t result = 0;
+	if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) {
+		cpuinfo_log_info("sysctlbyname(\"%s\") failed: %s", type_specifier, strerror(errno));
+	} else if (size == sizeof(uint32_t)) {
+		sysctlbyname(type_specifier, &result, &size, NULL, 0);
+		cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", type_specifier, result, size);
+	} else {
+		cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", type_specifier);
+	}
+	return result;
+}
+
+static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) {
+	switch (cpu_family) {
+		case CPUFAMILY_ARM_SWIFT:
+			return cpuinfo_uarch_swift;
+		case CPUFAMILY_ARM_CYCLONE:
+			return cpuinfo_uarch_cyclone;
+		case CPUFAMILY_ARM_TYPHOON:
+			return cpuinfo_uarch_typhoon;
+		case CPUFAMILY_ARM_TWISTER:
+			return cpuinfo_uarch_twister;
+		case CPUFAMILY_ARM_HURRICANE:
+			return cpuinfo_uarch_hurricane;
+		case CPUFAMILY_ARM_MONSOON_MISTRAL:
+			/* 2x Monsoon + 4x Mistral cores */
+			return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
+		case CPUFAMILY_ARM_VORTEX_TEMPEST:
+			/* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */
+			return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
+		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+			/* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */
+			return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
+		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+			/* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */
+			return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm;
+		default:
+			/* Use hw.cpusubtype for detection */
+			break;
+	}
+
+	#if CPUINFO_ARCH_ARM
+		switch (cpu_subtype) {
+			case CPU_SUBTYPE_ARM_V7:
+				return cpuinfo_uarch_cortex_a8;
+			case CPU_SUBTYPE_ARM_V7F:
+				return cpuinfo_uarch_cortex_a9;
+			case CPU_SUBTYPE_ARM_V7K:
+				return cpuinfo_uarch_cortex_a7;
+			default:
+				return cpuinfo_uarch_unknown;
+		}
+	#else
+		return cpuinfo_uarch_unknown;
+	#endif
+}
+
+static void decode_package_name(char* package_name) {
+	size_t size;
+	if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) {
+		cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
+		return;
+	}
+
+	char *machine_name = alloca(size);
+	if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) {
+		cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
+		return;
+	}
+	cpuinfo_log_debug("hw.machine: %s", machine_name);
+
+	char name[10];
+	uint32_t major = 0, minor = 0;
+	if (sscanf(machine_name, "%9[^,0123456789]%"SCNu32",%"SCNu32, name, &major, &minor) != 3) {
+		cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno));
+		return;
+	}
+
+	uint32_t chip_model = 0;
+	char suffix = '\0';
+	if (strcmp(name, "iPhone") == 0) {
+		/*
+		 * iPhone 4 and up are supported:
+		 *  - iPhone 4       [A4]:  iPhone3,1, iPhone3,2, iPhone3,3
+		 *  - iPhone 4S      [A5]:  iPhone4,1
+		 *  - iPhone 5       [A6]:  iPhone5,1, iPhone5,2
+		 *  - iPhone 5c      [A6]:  iPhone5,3, iPhone5,4
+		 *  - iPhone 5s      [A7]:  iPhone6,1, iPhone6,2
+		 *  - iPhone 6       [A8]:  iPhone7,2
+		 *  - iPhone 6 Plus  [A8]:  iPhone7,1
+		 *  - iPhone 6s      [A9]:  iPhone8,1
+		 *  - iPhone 6s Plus [A9]:  iPhone8,2
+		 *  - iPhone SE      [A9]:  iPhone8,4
+		 *  - iPhone 7       [A10]: iPhone9,1, iPhone9,3
+		 *  - iPhone 7 Plus  [A10]: iPhone9,2, iPhone9,4
+		 *  - iPhone 8       [A11]: iPhone10,1, iPhone10,4
+		 *  - iPhone 8 Plus  [A11]: iPhone10,2, iPhone10,5
+		 *  - iPhone X       [A11]: iPhone10,3, iPhone10,6
+		 *  - iPhone XS      [A12]: iPhone11,2,
+		 *  - iPhone XS Max  [A12]: iPhone11,4, iPhone11,6
+		 *  - iPhone XR      [A12]: iPhone11,8
+		 */
+		chip_model = major + 1;
+	} else if (strcmp(name, "iPad") == 0) {
+		switch (major) {
+			/* iPad 2 and up are supported */
+			case 2:
+				/*
+				 * iPad 2    [A5]: iPad2,1, iPad2,2, iPad2,3, iPad2,4
+				 * iPad mini [A5]: iPad2,5, iPad2,6, iPad2,7
+				 */
+				chip_model = major + 3;
+				break;
+			case 3:
+				/*
+				 * iPad 3rd Gen [A5X]: iPad3,1, iPad3,2, iPad3,3
+				 * iPad 4th Gen [A6X]: iPad3,4, iPad3,5, iPad3,6
+				 */
+				chip_model = (minor <= 3) ? 5 : 6;
+				suffix = 'X';
+				break;
+			case 4:
+				/*
+				 * iPad Air         [A7]: iPad4,1, iPad4,2, iPad4,3
+				 * iPad mini Retina [A7]: iPad4,4, iPad4,5, iPad4,6
+				 * iPad mini 3      [A7]: iPad4,7, iPad4,8, iPad4,9
+				 */
+				chip_model = major + 3;
+				break;
+			case 5:
+				/*
+				 * iPad mini 4 [A8]:  iPad5,1, iPad5,2
+				 * iPad Air 2  [A8X]: iPad5,3, iPad5,4
+				 */
+				chip_model = major + 3;
+				suffix = (minor <= 2) ? '\0' : 'X';
+				break;
+			case 6:
+				/*
+				 * iPad Pro 9.7" [A9X]: iPad6,3, iPad6,4
+				 * iPad Pro      [A9X]: iPad6,7, iPad6,8
+				 * iPad 5th Gen  [A9]:  iPad6,11, iPad6,12
+				 */
+				chip_model = major + 3;
+				suffix = minor <= 8 ? 'X' : '\0';
+				break;
+			case 7:
+				/*
+				 * iPad Pro 12.9" [A10X]: iPad7,1, iPad7,2
+				 * iPad Pro 10.5" [A10X]: iPad7,3, iPad7,4
+				 * iPad 6th Gen   [A10]:  iPad7,5, iPad7,6
+				 */
+				chip_model = major + 3;
+				suffix = minor <= 4 ? 'X' : '\0';
+				break;
+			default:
+				cpuinfo_log_info("unknown iPad: %s", machine_name);
+				break;
+		}
+	} else if (strcmp(name, "iPod") == 0) {
+		switch (major) {
+			case 5:
+				chip_model = 5;
+				break;
+				/* iPod touch (5th Gen) [A5]: iPod5,1 */
+			case 7:
+				/* iPod touch (6th Gen, 2015) [A8]: iPod7,1 */
+				chip_model = 8;
+				break;
+			default:
+				cpuinfo_log_info("unknown iPod: %s", machine_name);
+				break;
+		}
+	} else {
+		cpuinfo_log_info("unknown device: %s", machine_name);
+	}
+	if (chip_model != 0) {
+		snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%"PRIu32"%c", chip_model, suffix);
+	}
+}
+
+void cpuinfo_arm_mach_init(void) {
+	struct cpuinfo_processor* processors = NULL;
+	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
+	struct cpuinfo_package* packages = NULL;
+	struct cpuinfo_uarch_info* uarchs = NULL;
+	struct cpuinfo_cache* l1i = NULL;
+	struct cpuinfo_cache* l1d = NULL;
+	struct cpuinfo_cache* l2 = NULL;
+	struct cpuinfo_cache* l3 = NULL;
+
+	struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
+	processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+			mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads);
+		goto cleanup;
+	}
+	cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+			mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores);
+		goto cleanup;
+	}
+	packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" packages",
+			mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages);
+		goto cleanup;
+	}
+
+	const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
+	const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
+	const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
+
+	for (uint32_t i = 0; i < mach_topology.packages; i++) {
+		packages[i] = (struct cpuinfo_package) {
+			.processor_start = i * threads_per_package,
+			.processor_count = threads_per_package,
+			.core_start = i * cores_per_package,
+			.core_count = cores_per_package,
+		};
+		decode_package_name(packages[i].name);
+	}
+
+
+	const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
+	const uint32_t cpu_type = get_sys_info_by_name("hw.cputype");
+	const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype");
+	switch (cpu_type) {
+		case CPU_TYPE_ARM64:
+			cpuinfo_isa.aes = true;
+			cpuinfo_isa.sha1 = true;
+			cpuinfo_isa.sha2 = true;
+			cpuinfo_isa.pmull = true;
+			cpuinfo_isa.crc32 = true;
+			break;
+#if CPUINFO_ARCH_ARM
+		case CPU_TYPE_ARM:
+			switch (cpu_subtype) {
+				case CPU_SUBTYPE_ARM_V8:
+					cpuinfo_isa.armv8 = true;
+					cpuinfo_isa.aes = true;
+					cpuinfo_isa.sha1 = true;
+					cpuinfo_isa.sha2 = true;
+					cpuinfo_isa.pmull = true;
+					cpuinfo_isa.crc32 = true;
+					/* Fall-through to add ARMv7S features */
+				case CPU_SUBTYPE_ARM_V7S:
+				case CPU_SUBTYPE_ARM_V7K:
+					cpuinfo_isa.fma = true;
+					/* Fall-through to add ARMv7F features */
+				case CPU_SUBTYPE_ARM_V7F:
+					cpuinfo_isa.armv7mp = true;
+					cpuinfo_isa.fp16 = true;
+					/* Fall-through to add ARMv7 features */
+				case CPU_SUBTYPE_ARM_V7:
+					break;
+				default:
+					break;
+			}
+			break;
+#endif
+	}
+	/*
+	 * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via
+	 * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments
+	 * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf),
+	 * but on new iOS versions these calls just fail with EPERM.
+	 *
+	 * Thus, we whitelist CPUs known to support these instructions.
+	 */
+	switch (cpu_family) {
+		case CPUFAMILY_ARM_MONSOON_MISTRAL:
+		case CPUFAMILY_ARM_VORTEX_TEMPEST:
+		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+			#if CPUINFO_ARCH_ARM64
+				cpuinfo_isa.atomics = true;
+			#endif
+			cpuinfo_isa.fp16arith = true;
+	}
+
+	/*
+	 * There does not yet seem to exist an OS mechanism to detect support for
+	 * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs
+	 * known to support these instruction.
+	 */
+	switch (cpu_family) {
+		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+		case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+			cpuinfo_isa.dot = true;
+	}
+
+	uint32_t num_clusters = 1;
+	for (uint32_t i = 0; i < mach_topology.cores; i++) {
+		cores[i] = (struct cpuinfo_core) {
+			.processor_start = i * threads_per_core,
+			.processor_count = threads_per_core,
+			.core_id = i % cores_per_package,
+			.package = packages + i / cores_per_package,
+			.vendor = cpuinfo_vendor_apple,
+			.uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores),
+		};
+		if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
+			num_clusters++;
+		}
+	}
+	for (uint32_t i = 0; i < mach_topology.threads; i++) {
+		const uint32_t smt_id = i % threads_per_core;
+		const uint32_t core_id = i / threads_per_core;
+		const uint32_t package_id = i / threads_per_package;
+
+		processors[i].smt_id = smt_id;
+		processors[i].core = &cores[core_id];
+		processors[i].package = &packages[package_id];
+	}
+
+	clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %"PRIu32" clusters",
+			num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
+		goto cleanup;
+	}
+	uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
+	if (uarchs == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+			num_clusters * sizeof(enum cpuinfo_uarch), num_clusters);
+		goto cleanup;
+	}
+	uint32_t cluster_idx = UINT32_MAX;
+	for (uint32_t i = 0; i < mach_topology.cores; i++) {
+		if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
+			cluster_idx++;
+			uarchs[cluster_idx] = (struct cpuinfo_uarch_info) {
+				.uarch = cores[i].uarch,
+				.processor_count = 1,
+				.core_count = 1,
+			};
+			clusters[cluster_idx] = (struct cpuinfo_cluster) {
+				.processor_start = i * threads_per_core,
+				.processor_count = 1,
+				.core_start = i,
+				.core_count = 1,
+				.cluster_id = cluster_idx,
+				.package = cores[i].package,
+				.vendor = cores[i].vendor,
+				.uarch = cores[i].uarch,
+			};
+		} else {
+			uarchs[cluster_idx].processor_count++;
+			uarchs[cluster_idx].core_count++;
+			clusters[cluster_idx].processor_count++;
+			clusters[cluster_idx].core_count++;
+		}
+		cores[i].cluster = &clusters[cluster_idx];
+	}
+
+	for (uint32_t i = 0; i < mach_topology.threads; i++) {
+		const uint32_t core_id = i / threads_per_core;
+		processors[i].cluster = cores[core_id].cluster;
+	}
+
+	for (uint32_t i = 0; i < mach_topology.packages; i++) {
+		packages[i].cluster_start = 0;
+		packages[i].cluster_count = num_clusters;
+	}
+
+	const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE");
+	const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE");
+	const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE");
+	const uint32_t l2_cache_size = get_sys_info(HW_L2CACHESIZE, "HW_L2CACHESIZE");
+	const uint32_t l3_cache_size = get_sys_info(HW_L3CACHESIZE, "HW_L3CACHESIZE");
+	const uint32_t l1_cache_associativity = 4;
+	const uint32_t l2_cache_associativity = 8;
+	const uint32_t l3_cache_associativity = 16;
+	const uint32_t cache_partitions = 1;
+	const uint32_t cache_flags = 0;
+
+	uint32_t threads_per_l1 = 0, l1_count = 0;
+	if (l1i_cache_size != 0 || l1d_cache_size != 0) {
+		/* Assume L1 caches are private to each core */
+		threads_per_l1 = 1;
+		l1_count = mach_topology.threads / threads_per_l1;
+		cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count);
+	}
+
+	uint32_t threads_per_l2 = 0, l2_count = 0;
+	if (l2_cache_size != 0) {
+		/* Assume L2 cache is shared between all cores */
+		threads_per_l2 = mach_topology.cores;
+		l2_count = 1;
+		cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
+	}
+
+	uint32_t threads_per_l3 = 0, l3_count = 0;
+	if (l3_cache_size != 0) {
+		/* Assume L3 cache is shared between all cores */
+		threads_per_l3 = mach_topology.cores;
+		l3_count = 1;
+		cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
+	}
+
+	if (l1i_cache_size != 0) {
+		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
+				l1_count * sizeof(struct cpuinfo_cache), l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1i[c] = (struct cpuinfo_cache) {
+				.size            = l1i_cache_size,
+				.associativity   = l1_cache_associativity,
+				.sets            = l1i_cache_size / (l1_cache_associativity * cacheline_size),
+				.partitions      = cache_partitions,
+				.line_size       = cacheline_size,
+				.flags           = cache_flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l1i = &l1i[t / threads_per_l1];
+		}
+	}
+
+	if (l1d_cache_size != 0) {
+		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
+				l1_count * sizeof(struct cpuinfo_cache), l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1d[c] = (struct cpuinfo_cache) {
+				.size            = l1d_cache_size,
+				.associativity   = l1_cache_associativity,
+				.sets            = l1d_cache_size / (l1_cache_associativity * cacheline_size),
+				.partitions      = cache_partitions,
+				.line_size       = cacheline_size,
+				.flags           = cache_flags,
+				.processor_start = c * threads_per_l1,
+				.processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l1d = &l1d[t / threads_per_l1];
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
+				l2_count * sizeof(struct cpuinfo_cache), l2_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l2_count; c++) {
+			l2[c] = (struct cpuinfo_cache) {
+				.size            = l2_cache_size,
+				.associativity   = l2_cache_associativity,
+				.sets            = l2_cache_size / (l2_cache_associativity * cacheline_size),
+				.partitions      = cache_partitions,
+				.line_size       = cacheline_size,
+				.flags           = cache_flags,
+				.processor_start = c * threads_per_l2,
+				.processor_count = threads_per_l2,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l2 = &l2[0];
+		}
+	}
+
+	if (l3_count != 0) {
+		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
+												l3_count * sizeof(struct cpuinfo_cache), l3_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l3_count; c++) {
+			l3[c] = (struct cpuinfo_cache) {
+				.size            = l3_cache_size,
+				.associativity   = l3_cache_associativity,
+				.sets            = l3_cache_size / (l3_cache_associativity * cacheline_size),
+				.partitions      = cache_partitions,
+				.line_size       = cacheline_size,
+				.flags           = cache_flags,
+				.processor_start = c * threads_per_l3,
+				.processor_count = threads_per_l3,
+			};
+		}
+		for (uint32_t t = 0; t < mach_topology.threads; t++) {
+			processors[t].cache.l3 = &l3[0];
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_uarchs = uarchs;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
+
+	cpuinfo_processors_count = mach_topology.threads;
+	cpuinfo_cores_count = mach_topology.cores;
+	cpuinfo_clusters_count = num_clusters;
+	cpuinfo_packages_count = mach_topology.packages;
+	cpuinfo_uarchs_count = num_clusters;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	uarchs = NULL;
+	l1i = l1d = l2 = l3 = NULL;
+
+cleanup:
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(uarchs);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+}
--- a/dep/cpuinfo/src/arm/midr.h
+++ b/dep/cpuinfo/src/arm/midr.h
@@ -0,0 +1,257 @@
+#pragma once
+#include <stdint.h>
+
+
+#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK  UINT32_C(0xFF000000)
+#define CPUINFO_ARM_MIDR_VARIANT_MASK      UINT32_C(0x00F00000)
+#define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000)
+#define CPUINFO_ARM_MIDR_PART_MASK         UINT32_C(0x0000FFF0)
+#define CPUINFO_ARM_MIDR_REVISION_MASK     UINT32_C(0x0000000F)
+
+#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET  24
+#define CPUINFO_ARM_MIDR_VARIANT_OFFSET      20
+#define CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET 16
+#define CPUINFO_ARM_MIDR_PART_OFFSET          4
+#define CPUINFO_ARM_MIDR_REVISION_OFFSET      0
+
+#define CPUINFO_ARM_MIDR_ARM1156         UINT32_C(0x410FB560)
+#define CPUINFO_ARM_MIDR_CORTEX_A7       UINT32_C(0x410FC070)
+#define CPUINFO_ARM_MIDR_CORTEX_A9       UINT32_C(0x410FC090)
+#define CPUINFO_ARM_MIDR_CORTEX_A15      UINT32_C(0x410FC0F0)
+#define CPUINFO_ARM_MIDR_CORTEX_A17      UINT32_C(0x410FC0E0)
+#define CPUINFO_ARM_MIDR_CORTEX_A35      UINT32_C(0x410FD040)
+#define CPUINFO_ARM_MIDR_CORTEX_A53      UINT32_C(0x410FD030)
+#define CPUINFO_ARM_MIDR_CORTEX_A55      UINT32_C(0x410FD050)
+#define CPUINFO_ARM_MIDR_CORTEX_A57      UINT32_C(0x410FD070)
+#define CPUINFO_ARM_MIDR_CORTEX_A72      UINT32_C(0x410FD080)
+#define CPUINFO_ARM_MIDR_CORTEX_A73      UINT32_C(0x410FD090)
+#define CPUINFO_ARM_MIDR_CORTEX_A75      UINT32_C(0x410FD0A0)
+#define CPUINFO_ARM_MIDR_KRYO280_GOLD    UINT32_C(0x51AF8001)
+#define CPUINFO_ARM_MIDR_KRYO280_SILVER  UINT32_C(0x51AF8014)
+#define CPUINFO_ARM_MIDR_KRYO385_GOLD    UINT32_C(0x518F802D)
+#define CPUINFO_ARM_MIDR_KRYO385_SILVER  UINT32_C(0x518F803C)
+#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
+#define CPUINFO_ARM_MIDR_KRYO_GOLD       UINT32_C(0x510F2050)
+#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
+#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2    UINT32_C(0x530F0010)
+#define CPUINFO_ARM_MIDR_DENVER2         UINT32_C(0x4E0F0030)
+
+inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
+	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
+		((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
+}
+
+inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
+	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
+		((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
+}
+
+inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
+	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
+		((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
+}
+
+inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
+	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
+		((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
+}
+
+inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
+	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
+		((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
+}
+
+inline static uint32_t midr_get_variant(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) >> CPUINFO_ARM_MIDR_VARIANT_OFFSET;
+}
+
+inline static uint32_t midr_get_implementer(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) >> CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET;
+}
+
+inline static uint32_t midr_get_part(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_PART_MASK) >> CPUINFO_ARM_MIDR_PART_OFFSET;
+}
+
+inline static uint32_t midr_get_revision(uint32_t midr) {
+	return (midr & CPUINFO_ARM_MIDR_REVISION_MASK) >> CPUINFO_ARM_MIDR_REVISION_OFFSET;
+}
+
+inline static uint32_t midr_copy_implementer(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | (other_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
+}
+
+inline static uint32_t midr_copy_variant(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | (other_midr & CPUINFO_ARM_MIDR_VARIANT_MASK);
+}
+
+inline static uint32_t midr_copy_architecture(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | (other_midr & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
+}
+
+inline static uint32_t midr_copy_part(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | (other_midr & CPUINFO_ARM_MIDR_PART_MASK);
+}
+
+inline static uint32_t midr_copy_revision(uint32_t midr, uint32_t other_midr) {
+	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | (other_midr & CPUINFO_ARM_MIDR_REVISION_MASK);
+}
+
+inline static bool midr_is_arm1156(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_ARM1156 & uarch_mask);
+}
+
+inline static bool midr_is_arm11(uint32_t midr) {
+	return (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | 0x0000F000)) == UINT32_C(0x4100B000);
+}
+
+inline static bool midr_is_cortex_a9(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A9 & uarch_mask);
+}
+
+inline static bool midr_is_scorpion(uint32_t midr) {
+	switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+		case UINT32_C(0x510000F0):
+		case UINT32_C(0x510002D0):
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static bool midr_is_krait(uint32_t midr) {
+	switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+		case UINT32_C(0x510004D0):
+		case UINT32_C(0x510006F0):
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static bool midr_is_cortex_a53(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A53 & uarch_mask);
+}
+
+inline static bool midr_is_qualcomm_cortex_a53_silver(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_SILVER & uarch_mask);
+}
+
+inline static bool midr_is_qualcomm_cortex_a55_silver(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO385_SILVER & uarch_mask);
+}
+
+inline static bool midr_is_kryo280_gold(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_GOLD & uarch_mask);
+}
+
+inline static bool midr_is_kryo_silver(uint32_t midr) {
+	const uint32_t uarch_mask =
+		CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	switch (midr & uarch_mask) {
+		case CPUINFO_ARM_MIDR_KRYO_SILVER_820:
+		case CPUINFO_ARM_MIDR_KRYO_SILVER_821:
+			return true;
+		default:
+			return false;
+	}
+}
+
+inline static bool midr_is_kryo_gold(uint32_t midr) {
+	const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO_GOLD & uarch_mask);
+}
+
+inline static uint32_t midr_score_core(uint32_t midr) {
+	const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	switch (midr & core_mask) {
+		case UINT32_C(0x53000030): /* Exynos M4 */
+		case UINT32_C(0x53000040): /* Exynos M5 */
+		case UINT32_C(0x4100D440): /* Cortex-X1 */
+			/* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */
+			return 6;
+		case UINT32_C(0x4E000030): /* Denver 2 */
+		case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
+		case UINT32_C(0x53000020): /* Exynos M3 */
+		case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
+		case UINT32_C(0x51008020): /* Kryo 385 Gold */
+		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+		case UINT32_C(0x51002050): /* Kryo Gold */
+		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+		case UINT32_C(0x4100D410): /* Cortex-A78 */
+		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+		case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+		case UINT32_C(0x4100D090): /* Cortex-A73 */
+		case UINT32_C(0x4100D080): /* Cortex-A72 */
+#if CPUINFO_ARCH_ARM
+		case UINT32_C(0x4100C0F0): /* Cortex-A15 */
+		case UINT32_C(0x4100C0E0): /* Cortex-A17 */
+		case UINT32_C(0x4100C0D0): /* Rockchip RK3288 cores */
+		case UINT32_C(0x4100C0C0): /* Cortex-A12 */
+#endif /* CPUINFO_ARCH_ARM */
+			/* These cores are always in big role */
+			return 5;
+		case UINT32_C(0x4100D070): /* Cortex-A57 */
+			/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */
+			return 4;
+#if CPUINFO_ARCH_ARM64
+		case UINT32_C(0x4100D060): /* Cortex-A65 */
+#endif /* CPUINFO_ARCH_ARM64 */
+		case UINT32_C(0x4100D050): /* Cortex-A55 */
+		case UINT32_C(0x4100D030): /* Cortex-A53 */
+			/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
+			return 2;
+		case UINT32_C(0x4100D040): /* Cortex-A35 */
+#if CPUINFO_ARCH_ARM
+		case UINT32_C(0x4100C070): /* Cortex-A7 */
+#endif /* CPUINFO_ARCH_ARM */
+		case UINT32_C(0x51008050): /* Kryo 485 Silver */
+		case UINT32_C(0x51008030): /* Kryo 385 Silver */
+		case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
+		case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
+		case UINT32_C(0x51002010): /* Kryo Silver (Snapdragon 821) */
+			/* These cores are always in LITTLE core */
+			return 1;
+		default:
+			/*
+			 * Unknown cores, or cores which do not have big/LITTLE roles.
+			 * To be future-proof w.r.t. cores not yet recognized in cpuinfo, assume position between
+			 * Cortex-A57/A72/A73/A75 and Cortex-A53/A55. Then at least future cores paired with
+			 * one of these known cores will be properly scored.
+			 */
+			return 3;
+	}
+}
+
+inline static uint32_t midr_little_core_for_big(uint32_t midr) {
+	const uint32_t core_mask =
+		CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+	switch (midr & core_mask) {
+		case CPUINFO_ARM_MIDR_CORTEX_A75:
+			return CPUINFO_ARM_MIDR_CORTEX_A55;
+		case CPUINFO_ARM_MIDR_CORTEX_A73:
+		case CPUINFO_ARM_MIDR_CORTEX_A72:
+		case CPUINFO_ARM_MIDR_CORTEX_A57:
+		case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
+			return CPUINFO_ARM_MIDR_CORTEX_A53;
+		case CPUINFO_ARM_MIDR_CORTEX_A17:
+		case CPUINFO_ARM_MIDR_CORTEX_A15:
+			return CPUINFO_ARM_MIDR_CORTEX_A7;
+		case CPUINFO_ARM_MIDR_KRYO280_GOLD:
+			return CPUINFO_ARM_MIDR_KRYO280_SILVER;
+		case CPUINFO_ARM_MIDR_KRYO_GOLD:
+			return CPUINFO_ARM_MIDR_KRYO_SILVER_820;
+		case CPUINFO_ARM_MIDR_DENVER2:
+			return CPUINFO_ARM_MIDR_CORTEX_A57;
+		default:
+			return midr;
+	}
+}
--- a/dep/cpuinfo/src/arm/tlb.c
+++ b/dep/cpuinfo/src/arm/tlb.c
@@ -0,0 +1,133 @@
+
+
+switch (uarch) {
+	case cpuinfo_uarch_cortex_a5:
+		/*
+		 * Cortex-A5 Technical Reference Manual:
+		 * 6.3.1. Micro TLB
+		 *   The first level of caching for the page table information is a micro TLB of
+		 *   10 entries that is implemented on each of the instruction and data sides.
+		 * 6.3.2. Main TLB
+		 *   Misses from the instruction and data micro TLBs are handled by a unified main TLB.
+		 *   The main TLB is 128-entry two-way set-associative.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a7:
+		/*
+		 * Cortex-A7 MPCore Technical Reference Manual:
+		 * 5.3.1. Micro TLB
+		 *   The first level of caching for the page table information is a micro TLB of
+		 *   10 entries that is implemented on each of the instruction and data sides.
+		 * 5.3.2. Main TLB
+		 *   Misses from the micro TLBs are handled by a unified main TLB. This is a 256-entry 2-way
+		 *   set-associative structure. The main TLB supports all the VMSAv7 page sizes of
+		 *   4KB, 64KB, 1MB and 16MB in addition to the LPAE page sizes of 2MB and 1G.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a8:
+		/*
+		 * Cortex-A8 Technical Reference Manual:
+		 * 6.1. About the MMU
+		 *    The MMU features include the following:
+		 *     - separate, fully-associative, 32-entry data and instruction TLBs
+		 *     - TLB entries that support 4KB, 64KB, 1MB, and 16MB pages
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a9:
+		/*
+		 * ARM Cortex‑A9 Technical Reference Manual:
+		 * 6.2.1 Micro TLB
+		 *    The first level of caching for the page table information is a micro TLB of 32 entries on the data side,
+		 *    and configurable 32 or 64 entries on the instruction side.
+		 * 6.2.2 Main TLB
+		 *    The main TLB is implemented as a combination of:
+		 *     - A fully-associative, lockable array of four elements.
+		 *     - A 2-way associative structure of 2x32, 2x64, 2x128 or 2x256 entries.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a15:
+		/*
+		 * ARM Cortex-A15 MPCore Processor Technical Reference Manual:
+		 * 5.2.1. L1 instruction TLB
+		 *    The L1 instruction TLB is a 32-entry fully-associative structure. This TLB caches entries at the 4KB
+		 *    granularity of Virtual Address (VA) to Physical Address (PA) mapping only. If the page tables map the
+		 *    memory region to a larger granularity than 4K, it only allocates one mapping for the particular 4K region
+		 *    to which the current access corresponds.
+		 * 5.2.2. L1 data TLB
+		 *    There are two separate 32-entry fully-associative TLBs that are used for data loads and stores,
+		 *    respectively. Similar to the L1 instruction TLB, both of these cache entries at the 4KB granularity of
+		 *    VA to PA mappings only. At implementation time, the Cortex-A15 MPCore processor can be configured with
+		 *    the -l1tlb_1m option, to have the L1 data TLB cache entries at both the 4KB and 1MB granularity.
+		 *    With this configuration, any translation that results in a 1MB or larger page is cached in the L1 data
+		 *    TLB as a 1MB entry. Any translation that results in a page smaller than 1MB is cached in the L1 data TLB
+		 *    as a 4KB entry. By default, all translations are cached in the L1 data TLB as a 4KB entry.
+		 * 5.2.3. L2 TLB
+		 *    Misses from the L1 instruction and data TLBs are handled by a unified L2 TLB. This is a 512-entry 4-way
+		 *    set-associative structure. The L2 TLB supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB in
+		 *    addition to the LPAE page sizes of 2MB and 1GB.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a17:
+		/*
+		 * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
+		 * 5.2.1. Instruction micro TLB
+		 *    The instruction micro TLB is implemented as a 32, 48 or 64 entry, fully-associative structure. This TLB
+		 *    caches entries at the 4KB and 1MB granularity of Virtual Address (VA) to Physical Address (PA) mapping
+		 *    only. If the translation tables map the memory region to a larger granularity than 4KB or 1MB, it only
+		 *    allocates one mapping for the particular 4KB region to which the current access corresponds.
+		 * 5.2.2. Data micro TLB
+		 *    The data micro TLB is a 32 entry fully-associative TLB that is used for data loads and stores. The cache
+		 *    entries have a 4KB and 1MB granularity of VA to PA mappings only.
+		 * 5.2.3. Unified main TLB
+		 *    Misses from the instruction and data micro TLBs are handled by a unified main TLB. This is a 1024 entry
+		 *    4-way set-associative structure. The main TLB supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB
+		 *    in addition to the LPAE page sizes of 2MB and 1GB.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a35:
+		/*
+		 * ARM Cortex‑A35 Processor Technical Reference Manual:
+		 * A6.2 TLB Organization
+		 *   Micro TLB
+		 *     The first level of caching for the translation table information is a micro TLB of ten entries that
+		 *     is implemented on each of the instruction and data sides.
+		 *   Main TLB
+		 *     A unified main TLB handles misses from the micro TLBs. It has a 512-entry, 2-way, set-associative
+		 *     structure and supports all VMSAv8 block sizes, except 1GB. If it fetches a 1GB block, the TLB splits
+		 *     it into 512MB blocks and stores the appropriate block for the lookup.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a53:
+		/*
+		 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
+		 * 5.2.1. Micro TLB
+		 *    The first level of caching for the translation table information is a micro TLB of ten entries that is
+		 *    implemented on each of the instruction and data sides.
+		 * 5.2.2. Main TLB
+		 *    A unified main TLB handles misses from the micro TLBs. This is a 512-entry, 4-way, set-associative
+		 *    structure. The main TLB supports all VMSAv8 block sizes, except 1GB. If a 1GB block is fetched, it is
+		 *    split into 512MB blocks and the appropriate block for the lookup stored.
+		 */
+		break;
+	case cpuinfo_uarch_cortex_a57:
+		/*
+		 * ARM® Cortex-A57 MPCore Processor Technical Reference Manual:
+		 * 5.2.1 L1 instruction TLB
+		 *    The L1 instruction TLB is a 48-entry fully-associative structure. This TLB caches entries of three
+		 *    different page sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings. If the page tables map the memory
+		 *    region to a larger granularity than 1MB, it only allocates one mapping for the particular 1MB region to
+		 *    which the current access corresponds.
+		 * 5.2.2 L1 data TLB
+		 *    The L1 data TLB is a 32-entry fully-associative TLB that is used for data loads and stores. This TLB
+		 *    caches entries of three different page sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings.
+		 * 5.2.3 L2 TLB
+		 *    Misses from the L1 instruction and data TLBs are handled by a unified L2 TLB. This is a 1024-entry 4-way
+		 *    set-associative structure. The L2 TLB supports the page sizes of 4K, 64K, 1MB and 16MB. It also supports
+		 *    page sizes of 2MB and 1GB for the long descriptor format translation in AArch32 state and in AArch64 state
+		 *    when using the 4KB translation granule. In addition, the L2 TLB supports the 512MB page map size defined
+		 *    for the AArch64 translations that use a 64KB translation granule.
+		 */
+		break;
+}
+
+
--- a/dep/cpuinfo/src/arm/uarch.c
+++ b/dep/cpuinfo/src/arm/uarch.c
@@ -0,0 +1,367 @@
+#include <stdint.h>
+
+#include <arm/api.h>
+#include <arm/midr.h>
+#include <cpuinfo/log.h>
+
+
+void cpuinfo_arm_decode_vendor_uarch(
+	uint32_t midr,
+#if CPUINFO_ARCH_ARM
+	bool has_vfpv4,
+#endif /* CPUINFO_ARCH_ARM */
+#ifndef _MSC_VER
+	enum cpuinfo_vendor vendor[restrict static 1],
+	enum cpuinfo_uarch uarch[restrict static 1])
+#else
+	enum cpuinfo_vendor vendor[1],
+	enum cpuinfo_uarch uarch[1])
+#endif
+{
+	switch (midr_get_implementer(midr)) {
+		case 'A':
+			*vendor = cpuinfo_vendor_arm;
+			switch (midr_get_part(midr)) {
+#if CPUINFO_ARCH_ARM
+				case 0xC05:
+					*uarch = cpuinfo_uarch_cortex_a5;
+					break;
+				case 0xC07:
+					*uarch = cpuinfo_uarch_cortex_a7;
+					break;
+				case 0xC08:
+					*uarch = cpuinfo_uarch_cortex_a8;
+					break;
+				case 0xC09:
+					*uarch = cpuinfo_uarch_cortex_a9;
+					break;
+				case 0xC0C:
+					*uarch = cpuinfo_uarch_cortex_a12;
+					break;
+				case 0xC0E:
+					*uarch = cpuinfo_uarch_cortex_a17;
+					break;
+				case 0xC0D:
+					/*
+					 * Rockchip RK3288 only.
+					 * Core information is ambiguous: some sources specify Cortex-A12, others - Cortex-A17.
+					 * Assume it is Cortex-A12.
+					 */
+					*uarch = cpuinfo_uarch_cortex_a12;
+					break;
+				case 0xC0F:
+					*uarch = cpuinfo_uarch_cortex_a15;
+					break;
+#endif /* CPUINFO_ARCH_ARM */
+				case 0xD01:
+					*uarch = cpuinfo_uarch_cortex_a32;
+					break;
+				case 0xD03:
+					*uarch = cpuinfo_uarch_cortex_a53;
+					break;
+				case 0xD04:
+					*uarch = cpuinfo_uarch_cortex_a35;
+					break;
+				case 0xD05:
+					// Note: use Variant, not Revision, field
+					*uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ?
+						cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55;
+					break;
+				case 0xD06:
+					*uarch = cpuinfo_uarch_cortex_a65;
+					break;
+				case 0xD07:
+					*uarch = cpuinfo_uarch_cortex_a57;
+					break;
+				case 0xD08:
+					*uarch = cpuinfo_uarch_cortex_a72;
+					break;
+				case 0xD09:
+					*uarch = cpuinfo_uarch_cortex_a73;
+					break;
+				case 0xD0A:
+					*uarch = cpuinfo_uarch_cortex_a75;
+					break;
+				case 0xD0B:
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xD0C:
+					*uarch = cpuinfo_uarch_neoverse_n1;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+				case 0xD0D:
+					*uarch = cpuinfo_uarch_cortex_a77;
+					break;
+				case 0xD0E: /* Cortex-A76AE */
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				case 0xD41: /* Cortex-A78 */
+					*uarch = cpuinfo_uarch_cortex_a78;
+					break;
+				case 0xD44: /* Cortex-X1 */
+					*uarch = cpuinfo_uarch_cortex_x1;
+					break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xD4A:
+					*uarch = cpuinfo_uarch_neoverse_e1;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+				default:
+					switch (midr_get_part(midr) >> 8) {
+#if CPUINFO_ARCH_ARM
+						case 7:
+							*uarch = cpuinfo_uarch_arm7;
+							break;
+						case 9:
+							*uarch = cpuinfo_uarch_arm9;
+							break;
+						case 11:
+							*uarch = cpuinfo_uarch_arm11;
+							break;
+#endif /* CPUINFO_ARCH_ARM */
+						default:
+							cpuinfo_log_warning("unknown ARM CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+					}
+			}
+			break;
+		case 'B':
+			*vendor = cpuinfo_vendor_broadcom;
+			switch (midr_get_part(midr)) {
+				case 0x00F:
+					*uarch = cpuinfo_uarch_brahma_b15;
+					break;
+				case 0x100:
+					*uarch = cpuinfo_uarch_brahma_b53;
+					break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0x516:
+					/* Broadcom Vulkan was sold to Cavium before it reached the market, so we identify it as Cavium ThunderX2 */
+					*vendor = cpuinfo_vendor_cavium;
+					*uarch = cpuinfo_uarch_thunderx2;
+					break;
+#endif
+				default:
+					cpuinfo_log_warning("unknown Broadcom CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+		case 'C':
+			*vendor = cpuinfo_vendor_cavium;
+			switch (midr_get_part(midr)) {
+				case 0x0A0: /* ThunderX */
+				case 0x0A1: /* ThunderX 88XX */
+				case 0x0A2: /* ThunderX 81XX */
+				case 0x0A3: /* ThunderX 83XX */
+					*uarch = cpuinfo_uarch_thunderx;
+					break;
+				case 0x0AF: /* ThunderX2 99XX */
+					*uarch = cpuinfo_uarch_thunderx2;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Cavium CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#endif
+		case 'H':
+			*vendor = cpuinfo_vendor_huawei;
+			switch (midr_get_part(midr)) {
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xD01: /* Kunpeng 920 series */
+					*uarch = cpuinfo_uarch_taishan_v110;
+					break;
+#endif
+				case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Huawei CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#if CPUINFO_ARCH_ARM
+		case 'i':
+			*vendor = cpuinfo_vendor_intel;
+			switch (midr_get_part(midr) >> 8) {
+				case 2: /* PXA 210/25X/26X */
+				case 4: /* PXA 27X */
+				case 6: /* PXA 3XX */
+					*uarch = cpuinfo_uarch_xscale;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Intel CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM */
+		case 'N':
+			*vendor = cpuinfo_vendor_nvidia;
+			switch (midr_get_part(midr)) {
+				case 0x000:
+					*uarch = cpuinfo_uarch_denver;
+					break;
+				case 0x003:
+					*uarch = cpuinfo_uarch_denver2;
+					break;
+				case 0x004:
+					*uarch = cpuinfo_uarch_carmel;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Nvidia CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#if !defined(__ANDROID__)
+		case 'P':
+			*vendor = cpuinfo_vendor_apm;
+			switch (midr_get_part(midr)) {
+				case 0x000:
+					*uarch = cpuinfo_uarch_xgene;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Applied Micro CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#endif
+		case 'Q':
+			*vendor = cpuinfo_vendor_qualcomm;
+			switch (midr_get_part(midr)) {
+#if CPUINFO_ARCH_ARM
+				case 0x00F:
+					/* Mostly Scorpions, but some Cortex A5 may report this value as well */
+					if (has_vfpv4) {
+						/* Unlike Scorpion, Cortex-A5 comes with VFPv4 */
+						*vendor = cpuinfo_vendor_arm;
+						*uarch = cpuinfo_uarch_cortex_a5;
+					} else {
+						*uarch = cpuinfo_uarch_scorpion;
+					}
+					break;
+				case 0x02D: /* Dual-core Scorpions */
+					*uarch = cpuinfo_uarch_scorpion;
+					break;
+				case 0x04D:
+					/*
+					 * Dual-core Krait:
+					 * - r1p0 -> Krait 200
+					 * - r1p4 -> Krait 200
+					 * - r2p0 -> Krait 300
+					 */
+				case 0x06F:
+					/*
+					 * Quad-core Krait:
+					 * - r0p1 -> Krait 200
+					 * - r0p2 -> Krait 200
+					 * - r1p0 -> Krait 300
+					 * - r2p0 -> Krait 400 (Snapdragon 800 MSMxxxx)
+					 * - r2p1 -> Krait 400 (Snapdragon 801 MSMxxxxPRO)
+					 * - r3p1 -> Krait 450
+					 */
+					*uarch = cpuinfo_uarch_krait;
+					break;
+#endif /* CPUINFO_ARCH_ARM */
+				case 0x201: /* Qualcomm Snapdragon 821: Low-power Kryo "Silver" */
+				case 0x205: /* Qualcomm Snapdragon 820 & 821: High-performance Kryo "Gold" */
+				case 0x211: /* Qualcomm Snapdragon 820: Low-power Kryo "Silver" */
+					*uarch = cpuinfo_uarch_kryo;
+					break;
+				case 0x800: /* High-performance Kryo 260 (r10p2) / Kryo 280 (r10p1) "Gold" -> Cortex-A73 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a73;
+					break;
+				case 0x801: /* Low-power Kryo 260 / 280 "Silver" -> Cortex-A53 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a53;
+					break;
+				case 0x802: /* High-performance Kryo 385 "Gold" -> Cortex-A75 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a75;
+					break;
+				case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a55r0;
+					break;
+				case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a76;
+					break;
+				case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */
+					*vendor = cpuinfo_vendor_arm;
+					*uarch = cpuinfo_uarch_cortex_a55;
+					break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+				case 0xC00:
+					*uarch = cpuinfo_uarch_falkor;
+					break;
+				case 0xC01:
+					*uarch = cpuinfo_uarch_saphira;
+					break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+				default:
+					cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+		case 'S':
+			*vendor = cpuinfo_vendor_samsung;
+			switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+				case 0x00100010:
+					/*
+					 * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x001
+					 */
+					*uarch = cpuinfo_uarch_exynos_m1;
+					break;
+				case 0x00400010:
+					/*
+					 * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has:
+					 * - CPU variant 0x4
+					 * - CPU part 0x001
+					 */
+					*uarch = cpuinfo_uarch_exynos_m2;
+					break;
+				case 0x00100020:
+					/*
+					 * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x002
+					 */
+					*uarch = cpuinfo_uarch_exynos_m3;
+					break;
+				case 0x00100030:
+					/*
+					 * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x003
+					 */
+					*uarch = cpuinfo_uarch_exynos_m4;
+					break;
+				case 0x00100040:
+					/*
+					 * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has:
+					 * - CPU variant 0x1
+					 * - CPU part 0x004
+					 */
+					*uarch = cpuinfo_uarch_exynos_m5;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored",
+						midr_get_variant(midr), midr_get_part(midr));
+			}
+			break;
+#if CPUINFO_ARCH_ARM
+		case 'V':
+			*vendor = cpuinfo_vendor_marvell;
+			switch (midr_get_part(midr)) {
+				case 0x581: /* PJ4 / PJ4B */
+				case 0x584: /* PJ4B-MP / PJ4C */
+					*uarch = cpuinfo_uarch_pj4;
+					break;
+				default:
+					cpuinfo_log_warning("unknown Marvell CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
+			}
+			break;
+#endif /* CPUINFO_ARCH_ARM */
+		default:
+			cpuinfo_log_warning("unknown CPU implementer '%c' (0x%02"PRIx32") with CPU part 0x%03"PRIx32" ignored",
+				(char) midr_get_implementer(midr), midr_get_implementer(midr), midr_get_part(midr));
+	}
+}
--- a/dep/cpuinfo/src/arm/windows/api.h
+++ b/dep/cpuinfo/src/arm/windows/api.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <windows.h>
+
+#include <cpuinfo.h>
+#include <x86/api.h>
+
+struct cpuinfo_arm_linux_processor {
+	/**
+	 * Minimum processor ID on the package which includes this logical processor.
+	 * This value can serve as an ID for the cluster of logical processors: it is the
+	 * same for all logical processors on the same package.
+	 */
+	uint32_t package_leader_id;
+	/**
+	 * Minimum processor ID on the core which includes this logical processor.
+	 * This value can serve as an ID for the cluster of logical processors: it is the
+	 * same for all logical processors on the same package.
+	 */
+	/**
+	 * Number of logical processors in the package.
+	 */
+	uint32_t package_processor_count;
+	/**
+	 * Maximum frequency, in kHZ.
+	 * The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq
+	 * If failed to read or parse the file, the value is 0.
+	 */
+	uint32_t max_frequency;
+	/**
+	 * Minimum frequency, in kHZ.
+	 * The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq
+	 * If failed to read or parse the file, the value is 0.
+	 */
+	uint32_t min_frequency;
+	/** Linux processor ID */
+	uint32_t system_processor_id;
+	uint32_t flags;
+};
--- a/Show More
+++ b/Show More