Compare commits

1 Commits

Author SHA1 Message Date
Stenzek
5f7037f347 Feature: Add scripting interface 2024-06-24 17:16:13 +10:00
539 changed files with 77913 additions and 49124 deletions

View File

@@ -34,37 +34,35 @@ jobs:
path: |
dep/msvc/deps-arm64
dep/msvc/deps-x64
key: deps ${{ hashFiles('scripts/deps/build-dependencies-windows-arm64.bat', 'scripts/deps/build-dependencies-windows-x64.bat') }}
key: deps ${{ hashFiles('scripts/build-dependencies-windows-arm64.bat', 'scripts/build-dependencies-windows-x64.bat') }}
- name: Build X64 Dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
env:
DEBUG: 0
run: scripts/deps/build-dependencies-windows-x64.bat
run: scripts/build-dependencies-windows-x64.bat
- name: Build ARM64 Dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
env:
DEBUG: 0
run: scripts/deps/build-dependencies-windows-arm64.bat
- name: Initialize build tag
shell: cmd
run: |
echo #pragma once > src/scmversion/tag.h
run: scripts/build-dependencies-windows-arm64.bat
- name: Tag as preview build
if: github.ref == 'refs/heads/master'
shell: cmd
run: |
echo #pragma once > src/scmversion/tag.h
echo #define SCM_RELEASE_ASSET "duckstation-windows-x64-release.zip" >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAG "preview" >> src/scmversion/tag.h
- name: Tag as dev build
if: github.ref == 'refs/heads/dev'
shell: cmd
run: |
echo #pragma once > src/scmversion/tag.h
echo #define SCM_RELEASE_ASSET "duckstation-windows-x64-release.zip" >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAG "latest" >> src/scmversion/tag.h
@@ -122,29 +120,25 @@ jobs:
path: |
dep/msvc/deps-arm64
dep/msvc/deps-x64
key: deps ${{ hashFiles('scripts/deps/build-dependencies-windows-arm64.bat', 'scripts/deps/build-dependencies-windows-x64.bat') }}
key: deps ${{ hashFiles('scripts/build-dependencies-windows-arm64.bat', 'scripts/build-dependencies-windows-x64.bat') }}
- name: Build X64 Dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
env:
DEBUG: 0
run: scripts/deps/build-dependencies-windows-x64.bat
run: scripts/build-dependencies-windows-x64.bat
- name: Build ARM64 Dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
env:
DEBUG: 0
run: scripts/deps/build-dependencies-windows-arm64.bat
- name: Initialize build tag
shell: cmd
run: |
echo #pragma once > src/scmversion/tag.h
run: scripts/build-dependencies-windows-arm64.bat
- name: Tag as preview build
if: github.ref == 'refs/heads/master'
shell: cmd
run: |
echo #pragma once > src/scmversion/tag.h
echo #define SCM_RELEASE_ASSET "duckstation-windows-arm64-release.zip" >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAG "preview" >> src/scmversion/tag.h
@@ -153,6 +147,7 @@ jobs:
if: github.ref == 'refs/heads/dev'
shell: cmd
run: |
echo #pragma once > src/scmversion/tag.h
echo #define SCM_RELEASE_ASSET "duckstation-windows-arm64-release.zip" >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAGS {"latest", "preview"} >> src/scmversion/tag.h
echo #define SCM_RELEASE_TAG "latest" >> src/scmversion/tag.h
@@ -198,6 +193,14 @@ jobs:
runs-on: ubuntu-22.04
timeout-minutes: 120
steps:
# Work around https://github.com/actions/runner-images/issues/8659
- name: Remove GCC 13 from runner image
shell: bash
run: |
sudo rm -f /etc/apt/sources.list.d/ubuntu-toolchain-r-ubuntu-test-jammy.list
sudo apt-get update
sudo apt-get install -y --allow-downgrades 'libc6=2.35-0ubuntu*' 'libc6-dev=2.35-0ubuntu*' libstdc++6=12.3.0-1ubuntu1~22.04 libgcc-s1=12.3.0-1ubuntu1~22.04
- uses: actions/checkout@v4.1.6
with:
fetch-depth: 0
@@ -223,19 +226,16 @@ jobs:
uses: actions/cache@v4.0.2
with:
path: ~/deps
key: deps ${{ hashFiles('scripts/deps/build-dependencies-linux.sh') }}
key: deps ${{ hashFiles('scripts/build-dependencies-linux.sh') }}
- name: Build Dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
run: scripts/deps/build-dependencies-linux.sh "$HOME/deps"
- name: Initialize build tag
run: |
echo '#pragma once' > src/scmversion/tag.h
run: scripts/build-dependencies-linux.sh "$HOME/deps"
- name: Tag as preview build
if: github.ref == 'refs/heads/master'
run: |
echo '#pragma once' > src/scmversion/tag.h
echo '#define SCM_RELEASE_ASSET "DuckStation-x64.AppImage"' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAG "preview"' >> src/scmversion/tag.h
@@ -243,6 +243,7 @@ jobs:
- name: Tag as dev build
if: github.ref == 'refs/heads/dev'
run: |
echo '#pragma once' > src/scmversion/tag.h
echo '#define SCM_RELEASE_ASSET "DuckStation-x64.AppImage"' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAG "latest"' >> src/scmversion/tag.h
@@ -282,10 +283,6 @@ jobs:
shell: bash
run: git config --global --add safe.directory "*"
- name: Initialize build tag
run: |
echo '#pragma once' > src/scmversion/tag.h
- name: Generate AppStream XML
run: |
scripts/generate-metainfo.sh scripts/flatpak
@@ -314,7 +311,7 @@ jobs:
- name: Push to Flathub stable
if: github.ref == 'refs/heads/dev'
uses: flathub-infra/flatpak-github-actions/flat-manager@b6c92176b7f578aedd80cac74cd8f0336f618e89
uses: flathub-infra/flatpak-github-actions/flat-manager@23796715b3dfa4c86ddf50cf29c3cc8b3c82dca8
with:
flat-manager-url: https://hub.flathub.org/
repository: stable
@@ -353,19 +350,16 @@ jobs:
uses: actions/cache@v4.0.2
with:
path: ~/deps
key: deps-mac ${{ hashFiles('scripts/deps/build-dependencies-mac.sh') }}
key: deps-mac ${{ hashFiles('scripts/build-dependencies-mac.sh') }}
- name: Build Dependencies
if: steps.cache-deps-mac.outputs.cache-hit != 'true'
run: scripts/deps/build-dependencies-mac.sh "$HOME/deps"
- name: Initialize build tag
run: |
echo '#pragma once' > src/scmversion/tag.h
run: scripts/build-dependencies-mac.sh "$HOME/deps"
- name: Tag as preview build
if: github.ref == 'refs/heads/master'
run: |
echo '#pragma once' > src/scmversion/tag.h
echo '#define SCM_RELEASE_ASSET "duckstation-mac-release.zip"' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAG "preview"' >> src/scmversion/tag.h
@@ -373,6 +367,7 @@ jobs:
- name: Tag as dev build
if: github.ref == 'refs/heads/dev'
run: |
echo '#pragma once' > src/scmversion/tag.h
echo '#define SCM_RELEASE_ASSET "duckstation-mac-release.zip"' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAGS {"latest", "preview"}' >> src/scmversion/tag.h
echo '#define SCM_RELEASE_TAG "latest"' >> src/scmversion/tag.h

View File

@@ -1,36 +0,0 @@
name: Upload Caches
on:
workflow_dispatch:
jobs:
upload-windows-cache:
runs-on: windows-2022
timeout-minutes: 120
steps:
- uses: actions/checkout@v4.1.6
with:
fetch-depth: 0
- name: Cache Dependencies
id: cache-deps
uses: actions/cache@v4.0.2
with:
path: |
dep/msvc/deps-arm64
dep/msvc/deps-x64
key: deps ${{ hashFiles('scripts/deps/build-dependencies-windows-arm64.bat', 'scripts/deps/build-dependencies-windows-x64.bat') }}
- name: Zip Cache Files
if: steps.cache-deps.outputs.cache-hit == 'true'
shell: cmd
run: |
"C:\Program Files\7-Zip\7z.exe" a -r deps-x64.zip ./dep/msvc/deps-x64
"C:\Program Files\7-Zip\7z.exe" a -r deps-arm64.zip ./dep/msvc/deps-arm64
- name: Upload Cache Files
if: steps.cache-deps.outputs.cache-hit == 'true'
uses: actions/upload-artifact@v4.3.3
with:
name: "windows"
path: "deps-*.zip"

1
.gitignore vendored
View File

@@ -8,7 +8,6 @@
# dependency build temp files
deps-build/
/deps/
# vs stuff
.vs

View File

@@ -9,16 +9,13 @@ endif()
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
find_package(SDL2 2.30.6 REQUIRED)
find_package(SDL2 2.30.4 REQUIRED)
find_package(Zstd 1.5.6 REQUIRED)
find_package(WebP REQUIRED) # v1.4.0, spews an error on Linux because no pkg-config.
find_package(ZLIB REQUIRED) # 1.3, but Mac currently doesn't use it.
find_package(PNG 1.6.40 REQUIRED)
find_package(JPEG REQUIRED) # No version because flatpak uses libjpeg-turbo.
find_package(Freetype 2.11.1 REQUIRED)
find_package(cpuinfo REQUIRED)
find_package(DiscordRPC 3.4.0 REQUIRED)
find_package(SoundTouch 2.3.3 REQUIRED)
if(NOT WIN32)
find_package(CURL REQUIRED)

View File

@@ -57,8 +57,6 @@ function(detect_architecture)
if("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES)
message(STATUS "Building x86_64 MacOS binaries.")
set(CPU_ARCH_X64 TRUE PARENT_SCOPE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xarch_x86_64 -msse4.1" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xarch_x86_64 -msse4.1" PARENT_SCOPE)
endif()
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
message(STATUS "Building ARM64 MacOS binaries.")
@@ -69,10 +67,6 @@ function(detect_architecture)
CMAKE_SIZEOF_VOID_P EQUAL 8)
message(STATUS "Building x86_64 binaries.")
set(CPU_ARCH_X64 TRUE PARENT_SCOPE)
if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1" PARENT_SCOPE)
endif()
elseif(("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") AND
CMAKE_SIZEOF_VOID_P EQUAL 8) # Might have an A64 kernel, e.g. Raspbian.
message(STATUS "Building ARM64 binaries.")

View File

@@ -16,8 +16,7 @@ The following people have contributed to the project in some way, and are credit
- posix - @Richard-L, blexx - German
- @phoe-nix, @zkdpower - Chinese (Simplified)
- Sorer - @MojoJojoDojo - Hebrew
- Hipnosis - @Hipnosis183, MrHomunculus, @falsepopsky - Spanish, Spanish (Latin America)
- @IlDucci - Spanish (Spain)
- Hipnosis - @Hipnosis183, MrHomunculus, @falsepopsky - Spanish
- @RaydenX93 - Italian
- @r57zone - Russian
- @6lackmag3 - Russian (Android)

View File

@@ -50,15 +50,13 @@ Other features include:
- Automatic loading/applying of PPF patches.
## System Requirements
- A CPU faster than a potato. But it needs to be x86_64 (SSE4.1), AArch32/armv7, AArch64/ARMv8, or RISC-V/RV64.
- A CPU faster than a potato. But it needs to be x86_64, AArch32/armv7, AArch64/ARMv8, or RISC-V/RV64.
- For the hardware renderers, a GPU capable of OpenGL 3.1/OpenGL ES 3.1/Direct3D 11 Feature Level 10.0 (or Vulkan 1.0) and above. So, basically anything made in the last 10 years or so.
- SDL, XInput or DInput compatible game controller (e.g. XB360/XBOne/XBSeries). DualShock 3 users on Windows will need to install the official DualShock 3 drivers included as part of PlayStation Now.
## Downloading and running
Binaries of DuckStation for Windows x64/ARM64, Linux x86_64 (in AppImage/Flatpak formats), and macOS Universal Binaries are available via GitHub Releases and are automatically built with every commit/push. Binaries or packages distributed through other sources may be out of date and are not supported by the developer, please speak to them for support, not us.
For x86 machines (most systems), you will need a CPU that supports the SSE4.1 instruction set. This includes all CPUs manufactured after 2007. If you want to use DuckStation with a CPU that is older, [v0.1-6995](https://github.com/stenzek/duckstation/releases/tag/v0.1-6995) is the last version that does not require SSE4.1.
### Windows
DuckStation **requires** Windows 10/11, specifically version 1809 or newer. If you are still using Windows 7/8/8.1, DuckStation **will not run** on your operating system. Running these operating systems in 2023 should be considered a security risk, and I would recommend updating to something which receives vendor support.
@@ -166,7 +164,7 @@ alsa-lib-devel brotli-devel clang cmake dbus-devel egl-wayland-devel extra-cmake
#### Building
1. Clone the repository: `git clone https://github.com/stenzek/duckstation.git`, `cd duckstation`.
2. Build dependencies. You can save these outside of the tree if you like. This will take a while. `scripts/deps/build-dependencies-linux.sh deps`.
2. Build dependencies. You can save these outside of the tree if you like. This will take a while. `scripts/build-dependencies-linux.sh deps`.
3. Run CMake to configure the build system. Assuming a build subdirectory of `build-release`, run `cmake -B build-release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" -DCMAKE_PREFIX_PATH="$PWD/deps" -G Ninja`. If you want a release (optimized) build, include `-DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON`.
4. Compile the source code. For the example above, run `ninja -C build-release`
5. Run the binary, located in the build directory under `./build-release/bin/duckstation-qt`.
@@ -179,7 +177,7 @@ Requirements:
1. Clone the repository: `git clone https://github.com/stenzek/duckstation.git`.
2. Build the dependencies. This will take a while. `scripts/deps/build-dependencies-mac.sh deps`.
2. Build the dependencies. This will take a while. `scripts/build-dependencies-mac.sh deps`.
2. Run CMake to configure the build system: `cmake -Bbuild-release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DCMAKE_PREFIX_PATH="$PWD/deps"`.
4. Compile the source code: `cmake --build build-release --parallel`.
5. Run the binary, located in the build directory under `bin/DuckStation.app`.

View File

@@ -3,7 +3,6 @@
# Windows
03000000300f00000a01000000000000,3 In 1 Conversion Box,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b8,x:b3,y:b0,platform:Windows,
03000000fa190000918d000000000000,3 In 1 Conversion Box,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b8,x:b3,y:b0,platform:Windows,
03000000fa2d00000100000000000000,3dRudder Foot Motion Controller,leftx:a0,lefty:a1,rightx:a5,righty:a2,platform:Windows,
03000000d0160000040d000000000000,4Play Adapter,a:b1,b:b3,back:b4,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,leftshoulder:b6,leftstick:b14,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b15,righttrigger:b9,rightx:a3,righty:a4,start:b5,x:b0,y:b2,platform:Windows,
03000000d0160000050d000000000000,4Play Adapter,a:b1,b:b3,back:b4,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,leftshoulder:b6,leftstick:b14,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b15,righttrigger:b9,rightx:a3,righty:a4,start:b5,x:b0,y:b2,platform:Windows,
@@ -485,7 +484,7 @@
03000000f0250000c183000000000000,PlayStation Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
03000000d9040000160f000000000000,PlayStation Controller Adapter,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b9,x:b3,y:b0,platform:Windows,
030000004c0500003713000000000000,PlayStation Vita,a:b1,b:b2,back:b8,dpdown:b13,dpleft:b15,dpright:b14,dpup:b12,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a3,righty:a4,start:b9,x:b0,y:b3,platform:Windows,
03000000d620000011a7000000000000,PowerA Core Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
03000000d620000011a7000000000000,PowerA Core Plus GameCube Controller,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
03000000dd62000015a7000000000000,PowerA Fusion Nintendo Switch Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
03000000d620000012a7000000000000,PowerA Fusion Nintendo Switch Fight Pad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
03000000dd62000016a7000000000000,PowerA Fusion Pro Nintendo Switch Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
@@ -874,7 +873,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
03000000050b00000045000031000000,ASUS Gamepad,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
03000000050b00000579000000010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b12,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b14,leftshoulder:b6,leftstick:b15,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b42,paddle1:b9,paddle2:b11,rightshoulder:b7,rightstick:b16,righttrigger:a4,rightx:a2,righty:a3,start:b13,x:b3,y:b4,platform:Mac OS X,
03000000050b00000679000000010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b12,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b14,leftshoulder:b6,leftstick:b15,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b23,rightshoulder:b7,rightstick:b16,righttrigger:a4,rightx:a2,righty:a3,start:b13,x:b3,y:b4,platform:Mac OS X,
03000000503200000110000045010000,Atari VCS Classic,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:Mac OS X,
03000000503200000110000045010000,Atari VCS Classic,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:MacOSX
03000000503200000110000047010000,Atari VCS Classic Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:Mac OS X,
03000000503200000210000047010000,Atari VCS Modern Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a4,rightx:a2,righty:a3,start:b8,x:b2,y:b3,platform:Mac OS X,
030000008a3500000102000000010000,Backbone One,a:b0,b:b1,back:b16,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b17,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b15,x:b2,y:b3,platform:Mac OS X,
@@ -1082,7 +1081,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
030000005e040000d102000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
030000005e040000dd02000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
030000005e040000e002000000000000,Xbox One Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Mac OS X,
030000005e040000e002000003090000,Xbox One Controller,a:b0,b:b1,x:b2,y:b3,back:b6,guide:b10,start:b7,leftstick:b8,rightstick:b9,leftshoulder:b4,rightshoulder:b5,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,platform:Mac OS X,
030000005e040000e002000003090000,Xbox One Controller,a:b0,b:b1,back:b16,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
030000005e040000e302000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
030000005e040000ea02000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
030000005e040000fd02000003090000,Xbox One Controller,a:b0,b:b1,back:b16,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
@@ -1092,7 +1091,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
030000005e040000130b000009050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
030000005e040000130b000013050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
030000005e040000130b000015050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
030000005e040000130b000007050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
030000005e040000130b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
030000005e040000220b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
03000000172700004431000029010000,XiaoMi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a6,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Mac OS X,
@@ -1391,7 +1389,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
050000005e040000050b000003090000,Microsoft Xbox One Elite 2,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a6,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
050000005e0400008e02000030110000,Microsoft Xbox One Elite 2,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,paddle1:b11,paddle2:b13,paddle3:b12,paddle4:b14,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
030000005e040000120b00000b050000,Microsoft Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
060000005e040000120b000001050000,Microsoft Xbox Series X Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
060000005e040000120b000001050000,Microsoft Xbox Series X Controller,a:b0,b:b1,x:b2,y:b3,back:b6,start:b7,guide:b8,leftshoulder:b4,rightshoulder:b5,leftstick:b9,rightstick:b10,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,dpup:h0.1,dpleft:h0.8,dpdown:h0.4,dpright:h0.2,platform:Linux,
03000000030000000300000002000000,Miroof,a:b1,b:b0,back:b6,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,start:b7,x:b3,y:b2,platform:Linux,
03000000790000001c18000010010000,Mobapad Chitu HD,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
050000004d4f435554452d3035335800,Mocute 053X,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
@@ -1469,7 +1467,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
030000004c0500003713000011010000,PlayStation Vita,a:b1,b:b2,back:b8,dpdown:b13,dpleft:b15,dpright:b14,dpup:b12,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a3,righty:a4,start:b9,x:b0,y:b3,platform:Linux,
03000000c62400000053000000010000,PowerA,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
03000000c62400003a54000001010000,PowerA 1428124-01,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
03000000d620000011a7000011010000,PowerA Core Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
03000000d620000011a7000011010000,PowerA Core Plus Gamecube Controller,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
03000000dd62000015a7000011010000,PowerA Fusion Nintendo Switch Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
03000000d620000012a7000011010000,PowerA Fusion Nintendo Switch Fight Pad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
03000000d62000000140000001010000,PowerA Fusion Pro 2 Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1525,8 +1523,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
03000000300f00001211000011010000,Qanba Arcade Joystick,a:b2,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b5,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b6,start:b9,x:b1,y:b3,platform:Linux,
03000000222c00000225000011010000,Qanba Dragon Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
03000000222c00000025000011010000,Qanba Dragon Arcade Joystick (PS4),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
03000000222c00001220000011010000,Qanba Drone 2 Arcade Joystick (PS4),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
03000000222c00001020000011010000,Qanba Drone 2 Arcade Joystick (PS5),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
03000000222c00000020000011010000,Qanba Drone Arcade PS4 Joystick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,rightshoulder:b5,righttrigger:a4,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
03000000300f00001210000010010000,Qanba Joystick Plus,a:b0,b:b1,back:b8,leftshoulder:b5,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b4,righttrigger:b6,start:b9,x:b2,y:b3,platform:Linux,
03000000222c00000223000011010000,Qanba Obsidian Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
@@ -1693,7 +1689,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
050000005e040000220b000013050000,Xbox One Elite 2 Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
050000005e040000050b000002090000,Xbox One Elite Series 2,a:b0,b:b1,back:b136,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a6,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
030000005e040000ea02000011050000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
050082795e040000e002000003090000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
060000005e040000ea0200000b050000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
060000005e040000ea0200000d050000,Xbox One S Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
030000005e040000120b000001050000,Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,

View File

@@ -956,10 +956,6 @@ SLES-02089:
controllers:
- AnalogController
- DigitalController
traits:
- ForceSoftwareRendererForReadbacks # 250+ readbacks per frame when loading...
settings:
displayLineStartOffset: 2 # Game doesn't fill the whole framebuffer, stops flicker.
metadata:
publisher: "Cryo Interactive"
developer: "Smart Dog"
@@ -3181,8 +3177,7 @@ SCPS-10126:
- AnalogController
- DigitalController
traits:
- ForceAccurateBlending # Requires 16-bit blend precision
- DisableTrueColor # to fix screen flicker.
- ForceSoftwareRenderer
metadata:
publisher: "Sony"
developer: "Sony"
@@ -3204,9 +3199,6 @@ SLES-04108:
controllers:
- AnalogController
- DigitalController
traits:
- ForceAccurateBlending # Requires 16-bit blend precision
- DisableTrueColor # to fix screen flicker.
metadata:
publisher: "Vivendi Universal Games, Inc"
developer: "Coktel Vision / Neko Entertaiment"
@@ -3856,8 +3848,6 @@ SLPS-00269:
name: "Air Management '96 (Japan)"
controllers:
- DigitalController
traits:
- ForceRecompilerICache # Prevents crashes.
metadata:
publisher: "Koei"
developer: "Koei"
@@ -15420,8 +15410,6 @@ SLPS-01222:
- SLPS-01223
controllers:
- DigitalController
traits:
- DisableWidescreen
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -15445,8 +15433,6 @@ SLPS-01223:
- SLPS-01223
controllers:
- DigitalController
traits:
- DisableWidescreen
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -15463,8 +15449,6 @@ SLPS-01223:
linkCable: false
SLPS-00999:
name: "Biohazard 2 (Japan) (Trial Edition)"
traits:
- DisableWidescreen
SLPS-01510:
name: "Biohazard 2 - Dual Shock Ver. (Japan) (Disc 1) (Leon-hen)"
discSet:
@@ -15475,8 +15459,6 @@ SLPS-01510:
controllers:
- AnalogController
- DigitalController
traits:
- DisableWidescreen
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -15501,8 +15483,6 @@ SLPS-01511:
controllers:
- AnalogController
- DigitalController
traits:
- DisableWidescreen
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -15522,9 +15502,6 @@ SLPS-02300:
controllers:
- AnalogController
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -15541,17 +15518,11 @@ SLPS-02300:
linkCable: false
SLPM-80485:
name: "Biohazard 3 - Last Escape (Japan) (Demo)"
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
SLPM-87224:
name: "Biohazard 3 - Last Escape (Japan) (Rev 1)"
controllers:
- AnalogController
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -19005,8 +18976,6 @@ SLES-01304:
name: "Breath of Fire III (Europe)"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
metadata:
publisher: "Infogrames"
developer: "Capcom"
@@ -19025,8 +18994,6 @@ SLES-01319:
name: "Breath of Fire III (France)"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
metadata:
publisher: "Infogrames"
developer: "Capcom"
@@ -19045,8 +19012,6 @@ SLES-01320:
name: "Breath of Fire III (Germany)"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
metadata:
publisher: "Infogrames"
developer: "Capcom"
@@ -19065,8 +19030,6 @@ SLPS-00990:
name: "Breath of Fire III (Japan)"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
metadata:
publisher: "Capcom"
developer: "Capcom"
@@ -19083,8 +19046,6 @@ SLPS-00990:
linkCable: false
SLPM-80115:
name: "Breath of Fire III (Japan) (Demo)"
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
SLUS-00422:
name: "Breath of Fire III (USA)"
compatibility:
@@ -19092,8 +19053,6 @@ SLUS-00422:
versionTested: "0.1-1072-g840a806"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
metadata:
publisher: "Capcom"
developer: "Capcom"
@@ -19112,8 +19071,6 @@ SLPM-86720:
name: "Breath of Fire III [PlayStation the Best] (aka Breath of Fire 3 [PlayStation the Best])"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes jittery sprites.
metadata:
publisher: "Capcom"
developer: "Capcom"
@@ -22797,8 +22754,7 @@ SLES-01182:
- DigitalController
- NeGcon
settings:
dmaMaxSliceTicks: 500 # Needs smaller sizes to avoid menu corruption.
dmaHaltTicks: 250
dmaMaxSliceTicks: 100
metadata:
publisher: "THQ"
developer: "Interactive Entertainment"
@@ -22822,8 +22778,7 @@ SLUS-00882:
rating: NoIssues
comments: "Intro logos require the software renderer to display correctly."
settings:
dmaMaxSliceTicks: 500 # Needs smaller sizes to avoid menu corruption.
dmaHaltTicks: 250
dmaMaxSliceTicks: 100
controllers:
- AnalogController
- DigitalController
@@ -30723,9 +30678,6 @@ SCPS-10003:
name: "Crime Crackers (Japan)"
controllers:
- DigitalController
settings:
dmaMaxSliceTicks: 100 # Stops DMA from blazing past the deferred CDROM async interrupt.
displayActiveEndOffset: -1 # Fixes garbage on edge of screen in cutscenes.
codes:
- HASH-111C340E270B10A8
metadata:
@@ -43231,7 +43183,6 @@ SLES-00132:
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -43256,7 +43207,6 @@ SLPS-00308:
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -43282,7 +43232,6 @@ SLUS-00077:
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -46298,10 +46247,9 @@ SLES-00703:
- AnalogController
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial, breaks rendering.
- DisableWidescreen # No effect.
- DisablePGXP # 2.5D, not beneficial.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
publisher: "GT Interactive"
developer: "3D Realms Entertainment"
@@ -46322,10 +46270,9 @@ SLES-00987:
- AnalogController
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial, breaks rendering.
- DisableWidescreen # No effect.
- DisablePGXP # 2.5D, not beneficial.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
publisher: "GT Interactive"
developer: "3D Realms Entertainment"
@@ -46343,17 +46290,14 @@ SLES-00987:
SLED-01027:
name: "Duke Nukem (France) (Demo)"
traits:
- DisablePGXP # 2.5D, not beneficial, breaks rendering.
- DisableWidescreen # No effect.
- DisablePGXP # 2.5D, not beneficial.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
SLES-03405:
name: "Duke Nukem - Land of the Babes (Europe) (En,Fr,De,Es,It)"
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46377,8 +46321,6 @@ SLES-03440:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46401,8 +46343,6 @@ SLUS-01002:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46422,8 +46362,6 @@ SLES-01515:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46443,8 +46381,6 @@ SLES-03517:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46464,8 +46400,6 @@ SLES-01619:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46485,8 +46419,6 @@ SLES-03518:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46509,8 +46441,6 @@ SLUS-00583:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
metadata:
publisher: "GT Interactive Software Corp"
developer: "N-Space"
@@ -46527,22 +46457,17 @@ SLUS-00583:
linkCable: false
SLUS-80583:
name: "Duke Nukem - Time to Kill (USA) (Demo 1)"
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
SLUS-90036:
name: "Duke Nukem - Time to Kill (USA) (Demo 2)"
traits:
- ForcePGXPCPUMode # Improves wall texture wobble.
SLPS-01557:
name: "Duke Nukem - Total Meltdown (Japan)"
controllers:
- AnalogController
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial, breaks rendering.
- DisableWidescreen # No effect.
- DisablePGXP # 2.5D, not beneficial.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
publisher: "King Record Co. Ltd"
developer: "3D Realms Entertainment"
@@ -46566,10 +46491,9 @@ SLUS-00355:
- AnalogController
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial, breaks rendering.
- DisableWidescreen # No effect.
- DisablePGXP # 2.5D, not beneficial.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
publisher: "GT Interactive"
developer: "3D Realms Entertainment"
@@ -51042,9 +50966,6 @@ SLES-00501:
name: "Extreme Snow Break (Europe)"
controllers:
- DigitalController
settings:
dmaMaxSliceTicks: 10 # Very sensitive to DMA timing, otherwise polygon flicker.
dmaHaltTicks: 100 # CPU needs to run significantly faster than DMA.
metadata:
publisher: "Microids"
developer: "Virtual Studio"
@@ -51061,9 +50982,6 @@ SLES-00501:
linkCable: false
SLED-01193:
name: "Extreme Snow Break (Europe) (Demo)"
settings:
dmaMaxSliceTicks: 10 # Very sensitive to DMA timing, otherwise polygon flicker.
dmaHaltTicks: 100 # CPU needs to run significantly faster than DMA.
PCPX-96178:
name: "e-Jump (Japan) (Disc 1)"
discSet:
@@ -54982,7 +54900,6 @@ SLES-00487:
- PlayStationMouse
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -55006,7 +54923,6 @@ SLPS-00727:
- PlayStationMouse
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -55033,7 +54949,6 @@ SLUS-00331:
- PlayStationMouse
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -60547,12 +60462,9 @@ SLPM-87331:
name: "Front Mission 2 (Japan) (Front Mission History)"
controllers:
- DigitalController
traits:
# Pick your poison here. Disabling true colour fixes the sprite backgrounds,
# but if you're upscaling, leaves junk around the edges.
- ForceSoftwareRendererForReadbacks
codes:
- SLPM-87331
- SLPM-87397
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"
@@ -60567,14 +60479,10 @@ SLPM-87331:
vibration: false
multitap: false
linkCable: false
SLPS-01000:
SCPS-45116:
name: "Front Mission 2 (Japan, Asia)"
controllers:
- DigitalController
traits:
# Pick your poison here. Disabling true colour fixes the sprite backgrounds,
# but if you're upscaling, leaves junk around the edges.
- ForceSoftwareRendererForReadbacks
codes:
- SCPS-45116
- SLPS-01000
@@ -66109,11 +66017,6 @@ SLUS-00127:
linkCable: false
SLES-00032:
name: "Grand Theft Auto (Europe) (En,Fr,De,It)"
discSet:
name: "Grand Theft Auto (Europe) (En,Fr,De,It)"
serials:
- SLES-00032
- SLES-03389
controllers:
- AnalogController
- DigitalController
@@ -66137,11 +66040,6 @@ SLES-00032:
linkCable: false
SLUS-00106:
name: "Grand Theft Auto (USA)"
discSet:
name: "Grand Theft Auto (USA)"
serials:
- SLUS-00106
- SLUS-00846
compatibility:
rating: NoIssues
versionTested: "0.1-1308-g622e50fa"
@@ -66164,11 +66062,6 @@ SLUS-00106:
linkCable: false
SLES-03389:
name: "Grand Theft Auto - London 1969 (Europe) (En,Fr,De,It)"
discSet:
name: "Grand Theft Auto (Europe) (En,Fr,De,It)"
serials:
- SLES-00032
- SLES-03389
controllers:
- AnalogController
- DigitalController
@@ -66215,11 +66108,6 @@ SLES-01714:
linkCable: false
SLUS-00846:
name: "Grand Theft Auto - Mission Pack 1 - London 1969 (USA)"
discSet:
name: "Grand Theft Auto (USA)"
serials:
- SLUS-00106
- SLUS-00846
controllers:
- AnalogController
- DigitalController
@@ -66722,8 +66610,6 @@ SLPS-00719:
name: "Great Battle VI, The (Japan)"
controllers:
- DigitalController
codes:
- HASH-2A8D6A1D4C539B43
metadata:
publisher: "Banpresto"
developer: "Aspect"
@@ -68062,8 +67948,6 @@ SCPS-10006:
compatibility:
rating: NoIssues
versionTested: "0.1-4525-gdfd67664"
traits:
- DisableWidescreen # No effect.
controllers:
- DigitalController
codes:
@@ -71043,7 +70927,6 @@ SLES-00555:
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -71069,7 +70952,6 @@ SLUS-00348:
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -71092,7 +70974,6 @@ SLPS-00972:
- DigitalController
traits:
- DisablePGXP # 2.5D, not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled rendering.
metadata:
@@ -71236,12 +71117,10 @@ SLUS-01244:
compatibility:
rating: GraphicalAudioIssues
versionTested: "0.1-4693-gbbcf1c67"
upscalingIssues: "Menus transparency is wrong (Issue #592)"
controllers:
- AnalogController
- DigitalController
traits:
- ForceAccurateBlending # Requires 16-bit blend precision
- DisableTrueColor # to fix transparency in menu backgrounds.
metadata:
publisher: "The 3DO Company"
developer: "Team .366"
@@ -94107,8 +93986,6 @@ SLES-00211:
name: "Magic Carpet (Europe) (En,Fr,De,Es,Sv)"
controllers:
- DigitalController
settings:
displayDeinterlacingMode: Disabled # Isn't actually interlaced, uses 240p buffers, they just left 480i enabled...
metadata:
publisher: "Electronic Arts"
developer: "Bullfrog Productions / Krisalis"
@@ -94131,8 +94008,6 @@ SLPS-00587:
name: "Magic Carpet (Japan)"
controllers:
- DigitalController
settings:
displayDeinterlacingMode: Disabled # Isn't actually interlaced, uses 240p buffers, they just left 480i enabled...
metadata:
publisher: "Electronic Arts"
developer: "Bullfrog Productions / Krisalis"
@@ -94151,8 +94026,6 @@ SLUS-00029:
name: "Magic Carpet (USA) (En,Fr,De,Es,Sv)"
controllers:
- DigitalController
settings:
displayDeinterlacingMode: Disabled # Isn't actually interlaced, uses 240p buffers, they just left 480i enabled...
metadata:
publisher: "Electronic Arts"
developer: "Bullfrog Productions / Krisalis"
@@ -101354,7 +101227,8 @@ SLPS-00047:
controllers:
- DigitalController
codes:
- HASH-F6005ABBC40728D4
- SLPS-00047
- SLPS-02104
metadata:
publisher: "Altron"
developer: "Altron"
@@ -112695,7 +112569,6 @@ SLPS-00050:
SCES-00582:
name: "Nightmare Creatures (Europe)"
controllers:
- AnalogController
- DigitalController
metadata:
publisher: "Activision"
@@ -112714,7 +112587,6 @@ SCES-00582:
SCES-00684:
name: "Nightmare Creatures (Germany)"
controllers:
- AnalogController
- DigitalController
metadata:
publisher: "Activision"
@@ -112733,7 +112605,6 @@ SCES-00684:
SIPS-60027:
name: "Nightmare Creatures (Japan)"
controllers:
- AnalogController
- DigitalController
metadata:
publisher: "Sony"
@@ -112757,7 +112628,6 @@ SLUS-00582:
rating: NoIssues
versionTested: "0.1-986-gfc911de1"
controllers:
- AnalogController
- DigitalController
metadata:
publisher: "Activision"
@@ -115891,6 +115761,7 @@ SCUS-94449:
SLPM-86439:
name: "Omiai Commando - Ba-Couple ni Tsukkomi o (Japan)"
controllers:
- AnalogController
- DigitalController
metadata:
publisher: "Enix"
@@ -116379,8 +116250,6 @@ SLPS-02951:
- AnalogController
- DigitalController
- NeGcon
settings:
displayDeinterlacingMode: Blend # Only used in menus, MAD flickers with fading.
metadata:
publisher: "MTO"
developer: "MTO"
@@ -130044,8 +129913,6 @@ SLPM-80296:
name: "Rally de Africa (Japan) (Taikenban)"
SLPS-02679:
name: "Rally de Europe (Japan)"
traits:
- DisableWidescreen # Speedometer breaks with WS rendering.
controllers:
- AnalogController
- DigitalController
@@ -130332,8 +130199,6 @@ SCES-00004:
compatibility:
rating: NoIssues
versionTested: "0.1-1308-g622e50fa"
traits:
- DisableWidescreen # No effect.
controllers:
- DigitalController
metadata:
@@ -130440,8 +130305,6 @@ SLES-01103:
controllers:
- AnalogController
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes affine texture mapping on floor.
metadata:
publisher: "Mindscape"
developer: "Pure Entertainment"
@@ -130471,7 +130334,6 @@ SLUS-00656:
- DigitalController
traits:
- ForceInterlacing
- ForcePGXPCPUMode # Fixes affine texture mapping on floor.
metadata:
publisher: "Mindscape"
developer: "Pure Entertainment"
@@ -132999,7 +132861,6 @@ SLES-02529:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
libcrypt: true
metadata:
publisher: "Eidos Interactive"
@@ -133019,7 +132880,6 @@ SLED-02541:
name: "Resident Evil 3 - Nemesis (Europe) (Demo)"
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
SLES-02530:
name: "Resident Evil 3 - Nemesis (France)"
controllers:
@@ -133027,7 +132887,6 @@ SLES-02530:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
libcrypt: true
metadata:
publisher: "Eidos Interactive"
@@ -133050,7 +132909,6 @@ SLES-02531:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
libcrypt: true
metadata:
publisher: "Eidos Interactive"
@@ -133073,7 +132931,6 @@ SLES-02698:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
libcrypt: true
metadata:
publisher: "Eidos Interactive"
@@ -133096,7 +132953,6 @@ SLES-02533:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
libcrypt: true
metadata:
publisher: "Eidos Interactive"
@@ -133122,7 +132978,6 @@ SLES-02532:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
libcrypt: true
metadata:
publisher: "Proein / Eidos Interactive"
@@ -133148,7 +133003,6 @@ SLUS-00923:
- DigitalController
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
metadata:
publisher: "Capcom"
developer: "Capcom Production Studio 4"
@@ -133167,7 +133021,6 @@ SLUS-90064:
name: "Resident Evil 3 - Nemesis (USA) (Demo)"
traits:
- DisableWidescreen
- ForcePGXPCPUMode # Fixes jitter in character models.
SLPS-01974:
name: "Restaurant Dream (Japan)"
controllers:
@@ -149904,8 +149757,6 @@ SCES-00577:
- DigitalController
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
dmaHaltTicks: 150 # Fixes sprites in menus.
metadata:
publisher: "Sony Computer Entertaiment Europe"
developer: "Namco"
@@ -149929,8 +149780,6 @@ SLUS-00240:
- DigitalController
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
dmaHaltTicks: 150 # Fixes sprites in menus.
metadata:
publisher: "Namco"
developer: "Namco"
@@ -149952,10 +149801,6 @@ SLPS-00555:
versionTested: "0.1-2202-ga17e15f1"
controllers:
- DigitalController
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
dmaHaltTicks: 150 # Fixes sprites in menus.
metadata:
publisher: "Namco"
developer: "Namco"
@@ -149977,10 +149822,6 @@ SLPS-00545:
versionTested: "0.1-2202-ga17e15f1"
controllers:
- DigitalController
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
dmaHaltTicks: 150 # Fixes sprites in menus.
metadata:
publisher: "Namco"
developer: "Namco"
@@ -150002,10 +149843,6 @@ SLPS-91168:
codes:
- SLPS-91168
- SLPS-91454
settings:
gpuLineDetectMode: BasicTriangles # Fixes upscaled water rendering.
dmaMaxSliceTicks: 100 # Tight timing required for DMA modified after start.
dmaHaltTicks: 150 # Fixes sprites in menus.
metadata:
publisher: "Namco"
developer: "Namco"
@@ -153537,7 +153374,6 @@ SLES-00585:
- DigitalController
traits:
- DisablePGXP # 2.5D, PGXP is not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
metadata:
@@ -153560,7 +153396,6 @@ SLES-00640:
- DigitalController
traits:
- DisablePGXP # 2.5D, PGXP is not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
metadata:
@@ -153584,7 +153419,6 @@ SLPS-00685:
- DigitalController
traits:
- DisablePGXP # 2.5D, PGXP is not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
metadata:
@@ -153607,7 +153441,6 @@ SLES-00646:
- DigitalController
traits:
- DisablePGXP # 2.5D, PGXP is not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
metadata:
@@ -153634,7 +153467,6 @@ SLUS-00297:
- DigitalController
traits:
- DisablePGXP # 2.5D, PGXP is not beneficial.
- DisableWidescreen # No effect.
settings:
gpuLineDetectMode: AggressiveTriangles # Fixes upscaled rendering.
metadata:
@@ -155542,6 +155374,7 @@ SLES-02598:
SLPS-02508:
name: "Street Fighter EX2 Plus (Japan)"
controllers:
- AnalogController
- DigitalController
metadata:
publisher: "Capcom"
@@ -172170,9 +172003,6 @@ SLPS-00025:
- DigitalController
traits:
- ForceRecompilerICache
settings:
dmaMaxSliceTicks: 500 # Stops a large GPU transfer breaking CD.
dmaHaltTicks: 300
codes:
- HASH-A8647D688C39B63F
- HASH-21D86F0985C11667
@@ -186425,9 +186255,6 @@ SCPS-45170:
codes:
- SCPS-45170
- SCPS-45171
traits:
- ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
- DisablePGXPOn2DPolygons # Fixes misaligned text.
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"
@@ -186449,9 +186276,6 @@ SLPS-02773:
codes:
- SLPS-02773
- SLPS-02774
traits:
- ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
- DisablePGXPOn2DPolygons # Fixes misaligned text.
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"
@@ -186480,9 +186304,6 @@ SLPS-01160:
- SLPS-02775
- SLPS-91436
- SLPS-91437
traits:
- ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
- DisablePGXPOn2DPolygons # Fixes misaligned text.
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"
@@ -186509,9 +186330,6 @@ SLPS-01161:
codes:
- SLPS-01161
- SLPS-02776
traits:
- ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
- DisablePGXPOn2DPolygons # Fixes misaligned text.
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"
@@ -186538,9 +186356,6 @@ SLUS-00664:
versionTested: "0.1-1308-g622e50fa"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
- DisablePGXPOn2DPolygons # Fixes misaligned text.
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"
@@ -186567,9 +186382,6 @@ SLUS-00669:
versionTested: "0.1-1308-g622e50fa"
controllers:
- DigitalController
traits:
- ForcePGXPCPUMode # Fixes battle shadows, radar jitter.
- DisablePGXPOn2DPolygons # Fixes misaligned text.
metadata:
publisher: "Squaresoft"
developer: "Squaresoft"

View File

@@ -0,0 +1,780 @@
// Crt-Consumer
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
/*
[configuration]
[OptionRangeFloat]
GUIName = Pre-Scale Sharpening
OptionName = PRE_SCALE
MinValue = 1.0
MaxValue = 4.0
StepAmount = 0.1
DefaultValue = 1.5
[OptionRangeFloat]
GUIName = Convergence X
OptionName = blurx
MinValue = -4.0
MaxValue = 4.0
StepAmount = 0.05
DefaultValue = 0.25
[OptionRangeFloat]
GUIName = Convergence Y
OptionName = blury
MinValue = -4.0
MaxValue = 4.0
StepAmount = 0.05
DefaultValue = -0.1
[OptionRangeFloat]
GUIName = Curvature X
OptionName = warpx
MinValue = 0.0
MaxValue = 0.12
StepAmount = 0.01
DefaultValue = 0.03
[OptionRangeFloat]
GUIName = Curvature Y
OptionName = warpy
MinValue = 0.0
MaxValue = 0.12
StepAmount = 0.01
DefaultValue = 0.04
[OptionRangeFloat]
GUIName = Corner size
OptionName = corner
MinValue = 0.0
MaxValue = 0.10
StepAmount = 0.01
DefaultValue = 0.03
[OptionRangeFloat]
GUIName = Border Smoothness
OptionName = smoothness
MinValue = 100.0
MaxValue = 600.0
StepAmount = 5.0
DefaultValue = 400.0
[OptionRangeFloat]
GUIName = Interlacing Toggle
OptionName = inter
MinValue = 0.0
MaxValue = 1.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Interlacing Downscale Scanlines
OptionName = Downscale
MinValue = 1.0
MaxValue = 8.0
StepAmount = 1.
DefaultValue = 2.0
[OptionRangeFloat]
GUIName = Beam low
OptionName = scanlow
MinValue = 1.0
MaxValue = 15.0
StepAmount = 1.0
DefaultValue = 6.0
[OptionRangeFloat]
GUIName = Beam high
OptionName = scanhigh
MinValue = 1.0
MaxValue = 15.0
StepAmount = 1.0
DefaultValue = 8.0
[OptionRangeFloat]
GUIName = Scanlines dark
OptionName = beamlow
MinValue = 0.5
MaxValue = 2.5
StepAmount = 0.0
DefaultValue = 1.45
[OptionRangeFloat]
GUIName = Scanlines bright
OptionName = beamhigh
MinValue = 0.5
MaxValue = 2.5
StepAmount = 0.0
DefaultValue = 1.05
[OptionRangeFloat]
GUIName = Protect White On Masks
OptionName = preserve
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.01
DefaultValue = 0.98
[OptionRangeFloat]
GUIName = Bright boost dark pixels
OptionName = brightboost1
MinValue = 0.0
MaxValue = 3.0
StepAmount = 0.05
DefaultValue = 1.25
[OptionRangeFloat]
GUIName = Bright boost bright pixels
OptionName = brightboost2
MinValue = 0.0
MaxValue = 3.0
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Glow pixels per axis
OptionName = glow
MinValue = 1.0
MaxValue = 6.0
StepAmount = 1.0
DefaultValue = 3.0
[OptionRangeFloat]
GUIName = Glow quality
OptionName = quality
MinValue = 0.25
MaxValue = 4.0
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Glow intensity
OptionName = glow_str
MinValue = 0.0001
MaxValue = 2.0
StepAmount = 0.05
DefaultValue = 0.3
[OptionRangeFloat]
GUIName = Add Noise
OptionName = nois
MinValue = 0.0
MaxValue = 32.0
StepAmount = 1.0
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Post Brightness
OptionName = postbr
MinValue = 0.0
MaxValue = 2.5
StepAmount = 0.02
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Palette Fixes. Sega, PUAE Atari ST dark colors
OptionName = palette_fix
MinValue = 0.0
MaxValue = 2.0
StepAmount = 1.0
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Mask Type
OptionName = Shadowmask
MinValue = -1.0
MaxValue = 8.0
StepAmount = 1.
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Mask Size
OptionName = masksize
MinValue = 1.0
MaxValue = 2.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Mask dark
OptionName = MaskDark
MinValue = 0.0
MaxValue = 2.0
StepAmount = 0.1
DefaultValue = 0.2
[OptionRangeFloat]
GUIName = Mask light
OptionName = MaskLight
MinValue = 0.0
MaxValue = 2.0
StepAmount = 0.1
DefaultValue = 1.5
[OptionRangeFloat]
GUIName = Slot Mask Strength
OptionName = slotmask
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Slot Mask Width
OptionName = slotwidth
MinValue = 1.0
MaxValue = 6.0
StepAmount = 0.5
DefaultValue = 2.0
[OptionRangeFloat]
GUIName = Slot Mask Height: 2x1 or 4x1
OptionName = double_slot
MinValue = 1.0
MaxValue = 2.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Slot Mask Size
OptionName = slotms
MinValue = 1.0
MaxValue = 2.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Gamma Out
OptionName = GAMMA_OUT
MinValue = 0.0
MaxValue = 4.0
StepAmount = 0.05
DefaultValue = 2.25
[OptionRangeFloat]
GUIName = Saturation
OptionName = sat
MinValue = 0.0
MaxValue = 2.0
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Contrast, 1.0:Off
OptionName = contrast
MinValue = 0.00
MaxValue = 2.00
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Color Temperature %
OptionName = WP
MinValue = -100.0
MaxValue = 100.0
StepAmount = 5.
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Red-Green Tint
OptionName = rg
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.005
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Red-Blue Tint
OptionName = rb
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.005
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Green-Red Tint
OptionName = gr
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.005
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Green-Blue Tint
OptionName = gb
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.005
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Blue-Red Tint
OptionName = br
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.005
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Blue-Green Tint
OptionName = bg
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.005
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Vignette On/Off
OptionName = vignette
MinValue = 0.0
MaxValue = 1.0
StepAmount = 1.0
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Vignette Power
OptionName = vpower
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.01
DefaultValue = 0.15
[OptionRangeFloat]
GUIName = Vignette strength
OptionName = vstr
MinValue = 0.0
MaxValue = 50.0
StepAmount = 1.0
DefaultValue = 40.0
[OptionRangeFloat]
GUIName = Switch off shader
OptionName = alloff
MinValue = 0.0
MaxValue = 1.0
StepAmount = 1.0
DefaultValue = 0.0
[/configuration]
*/
#define iTime (float(GetTime())/2.0)
#define iTimer (float(GetTime())/60.0)
#define SourceSize (vec4(1.0/GetInvNativePixelSize(),GetInvNativePixelSize()))
vec2 Warp(vec2 pos)
{
pos = pos * 2.0 - 1.0;
pos *= vec2(1.0 + (pos.y * pos.y) * warpx, 1.0 + (pos.x * pos.x) * warpy);
return pos * 0.5 + 0.5;
}
float sw(float y, float l)
{
float beam = mix(scanlow, scanhigh, y);
float scan = mix(beamlow, beamhigh, l);
float ex = y * scan;
return exp2(-beam * ex * ex);
}
vec3 mask(vec2 x, vec3 col, float l)
{
x = floor(x / masksize);
if (Shadowmask == 0.0)
{
float m = fract(x.x * 0.4999);
if (m < 0.4999) return vec3(1.0, MaskDark, 1.0);
else return vec3(MaskDark, 1.0, MaskDark);
}
else if (Shadowmask == 1.0)
{
vec3 Mask = vec3(MaskDark, MaskDark, MaskDark);
float line = MaskLight;
float odd = 0.0;
if (fract(x.x / 6.0) < 0.5) odd = 1.0;
if (fract((x.y + odd) / 2.0) < 0.5) line = MaskDark;
float m = fract(x.x / 3.0);
if (m < 0.333) Mask.b = MaskLight;
else if (m < 0.666) Mask.g = MaskLight;
else Mask.r = MaskLight;
Mask *= line;
return Mask;
}
else if (Shadowmask == 2.0)
{
float m = fract(x.x*0.3333);
if (m < 0.3333) return vec3(MaskDark, MaskDark, MaskLight);
if (m < 0.6666) return vec3(MaskDark, MaskLight, MaskDark);
else return vec3(MaskLight, MaskDark, MaskDark);
}
if (Shadowmask == 3.0)
{
float m = fract(x.x * 0.5);
if (m < 0.5) return vec3(1.0, 1.0, 1.0);
else return vec3(MaskDark, MaskDark, MaskDark);
}
else if (Shadowmask == 4.0)
{
vec3 Mask = vec3(col.rgb);
float line = MaskLight;
float odd = 0.0;
if (fract(x.x / 4.0) < 0.5) odd = 1.0;
if (fract((x.y + odd) / 2.0) < 0.5) line = MaskDark;
float m = fract(x.x / 2.0);
if (m < 0.5) { Mask.r = 1.0; Mask.b = 1.0; }
else Mask.g = 1.0;
Mask *= line;
return Mask;
}
else if (Shadowmask == 5.0)
{
vec3 Mask = vec3(1.0, 1.0, 1.0);
if (fract(x.x / 4.0) < 0.5)
{
if (fract(x.y / 3.0) < 0.666)
{
if (fract(x.x / 2.0) < 0.5) Mask = vec3(1.0, MaskDark, 1.0);
else Mask = vec3(MaskDark, 1.0, MaskDark);
}
else Mask *= l;
}
else if (fract(x.x / 4.0) >= 0.5)
{
if (fract(x.y / 3.0) > 0.333)
{
if (fract(x.x / 2.0) < 0.5) Mask = vec3(1.0, MaskDark, 1.0);
else Mask = vec3(MaskDark, 1.0, MaskDark);
}
else Mask *= l;
}
return Mask;
}
else if (Shadowmask == 6.0)
{
vec3 Mask = vec3(MaskDark, MaskDark, MaskDark);
if (fract(x.x / 6.0) < 0.5)
{
if (fract(x.y / 4.0) < 0.75)
{
if (fract(x.x / 3.0) < 0.3333) Mask.r = MaskLight;
else if (fract(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
else Mask.b = MaskLight;
}
else Mask * l * 0.9;
}
else if (fract(x.x / 6.0) >= 0.5)
{
if (fract(x.y / 4.0) >= 0.5 || fract(x.y / 4.0) < 0.25)
{
if (fract(x.x / 3.0) < 0.3333) Mask.r = MaskLight;
else if (fract(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
else Mask.b = MaskLight;
}
else Mask * l * 0.9;
}
return Mask;
}
else if (Shadowmask == 7.0)
{
float m = fract(x.x * 0.3333);
if (m < 0.3333) return vec3(MaskDark, MaskLight, MaskLight * col.b); //Cyan
if (m < 0.6666) return vec3(MaskLight * col.r, MaskDark, MaskLight); //Magenta
else return vec3(MaskLight, MaskLight * col.g, MaskDark); //Yellow
}
else if (Shadowmask == 8.0)
{
vec3 Mask = vec3(MaskDark, MaskDark, MaskDark);
float bright = MaskLight;
float left = 0.0;
if (fract(x.x / 6.0) < 0.5) left = 1.0;
float m = fract(x.x / 3.0);
if (m < 0.333) Mask.b = 0.9;
else if (m < 0.666) Mask.g = 0.9;
else Mask.r = 0.9;
if (mod(x.y, 2.0) == 1.0 && left == 1.0 || mod(x.y, 2.0) == 0.0 && left == 0.0)
Mask *= bright;
return Mask;
}
else return vec3(1.0, 1.0, 1.0);
}
float SlotMask(vec2 pos, vec3 c)
{
if (slotmask == 0.0) return 1.0;
pos = floor(pos / slotms);
float mx = pow(max(max(c.r, c.g), c.b), 1.33);
float mlen = slotwidth * 2.0;
float px = fract(pos.x / mlen);
float py = floor(fract(pos.y / (2.0 * double_slot)) * 2.0 * double_slot);
float slot_dark = mix(1.0 - slotmask, 1.0 - 0.80 * slotmask, mx);
float slot = 1.0 + 0.7 * slotmask * (1.0 - mx);
if (py == 0.0 && px < 0.5) slot = slot_dark;
else if (py == double_slot && px >= 0.5) slot = slot_dark;
return slot;
}
mat4 contrastMatrix(float contrast)
{
float t = (1.0 - contrast) / 2.0;
return mat4(contrast, 0, 0, 0,
0, contrast, 0, 0,
0, 0, contrast, 0,
t, t, t, 1);
}
mat3 vign(float l)
{
// vec2 vpos = vTexCoord;
vec2 vpos = GetCoordinates();
vpos *= 1.0 - vpos.xy;
float vig = vpos.x * vpos.y * vstr;
vig = min(pow(vig, vpower), 1.0);
if (vignette == 0.0) vig = 1.0;
return mat3(vig, 0, 0,
0, vig, 0,
0, 0, vig);
}
vec3 saturation(vec3 textureColor)
{
float luminance = length(textureColor.rgb) * 0.5775;
vec3 luminanceWeighting = vec3(0.4, 0.5, 0.1);
if (luminance < 0.5) luminanceWeighting.rgb = (luminanceWeighting.rgb * luminanceWeighting.rgb)
+ (luminanceWeighting.rgb * luminanceWeighting.rgb);
luminance = dot(textureColor.rgb, luminanceWeighting);
vec3 greyScaleColor = vec3(luminance, luminance, luminance);
vec3 res = vec3(mix(greyScaleColor, textureColor.rgb, sat));
return res;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
vec3 glow0 (vec2 texcoord, vec3 col)
{
// the more quality, the smaller the offset and better quality, less visible glow too
vec2 size = SourceSize.zw/quality;
vec3 c01;
vec3 sum = vec3(0.0);
// glow = pixels per axis, the more the slower!
for (float x = -glow; x <= glow; x = x+1.0)
{
// multiply texture, the more far away the less pronounced
float factor = 1.0/glow;
for (float y = -glow; y <= glow; y = y+1.0)
{
vec2 offset = vec2(x, y) * size;
c01 = SampleLocation(texcoord + offset).rgb*factor; c01 = c01*c01;
sum += c01;
}
}
return (glow_str * sum / (glow * glow )) ;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
float noise(vec2 co)
{
return fract(sin(iTimer * dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453);
}
float corner0(vec2 coord)
{
coord = (coord - vec2(0.5, 0.5)) * 1.0 + vec2(0.5, 0.5);
coord = min(coord, vec2(1.0, 1.0) - coord) * vec2(1.0, SourceSize.y / SourceSize.x);
vec2 cdist = vec2(corner, corner);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist) * smoothness, 0.0, 1.0);
}
const mat3 D65_to_XYZ = mat3(
0.4306190, 0.2220379, 0.0201853,
0.3415419, 0.7066384, 0.1295504,
0.1783091, 0.0713236, 0.9390944);
const mat3 XYZ_to_D65 = mat3(
3.0628971, -0.9692660, 0.0678775,
-1.3931791, 1.8760108, -0.2288548,
-0.4757517, 0.0415560, 1.0693490);
const mat3 D50_to_XYZ = mat3(
0.4552773, 0.2323025, 0.0145457,
0.3675500, 0.7077956, 0.1049154,
0.1413926, 0.0599019, 0.7057489);
const mat3 XYZ_to_D50 = mat3(
2.9603944, -0.9787684, 0.0844874,
-1.4678519, 1.9161415, -0.2545973,
-0.4685105, 0.0334540, 1.4216174);
void main()
{
vec2 vTexCoord = GetCoordinates();
vec2 pos = Warp(vTexCoord.xy);
vec2 tex_size = 1.0 / GetInvNativePixelSize();
vec2 OutputSize = GetWindowSize();
vec2 pC4 = (pos + 0.5/tex_size);
vec2 fp = fract(pos * tex_size);
if (inter < 0.5 && tex_size.y > 400.0){ fp.y = fract(pos.y * tex_size.y*1.0/Downscale);}
vec4 res = vec4(1.0);
if (alloff == 1.0)
res = SampleLocation(pC4);
else
{
vec2 texel = pos * tex_size;
vec2 texel_floored = floor(texel);
float scale = PRE_SCALE;
float region_range = 0.5 - 0.5 / scale;
// Figure out where in the texel to sample to get correct pre-scaled bilinear.
// Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
vec2 center_dist = fp - 0.5;
vec2 fpp = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
vec2 mod_texel = texel_floored + fpp;
vec2 coords = mod_texel / SourceSize.xy;
vec3 sample1 = SampleLocation(vec2(coords.x + blurx*SourceSize.z, coords.y - blury*SourceSize.w)).rgb;
vec3 sample2 = SampleLocation(coords).rgb;
vec3 sample3 = SampleLocation(vec2(coords.x - blurx*SourceSize.z, coords.y + blury*SourceSize.w )).rgb;
vec3 color = vec3(sample1.r * 0.5 + sample2.r * 0.5,
sample1.g * 0.25 + sample2.g * 0.5 + sample3.g * 0.25,
sample2.b * 0.5 + sample3.b * 0.5);
if (palette_fix != 0.0)
{
if (palette_fix == 1.0) color = color* 1.0667;
else if (palette_fix == 2.0) color = color * 2.0;
}
//COLOR TEMPERATURE FROM GUEST.R-DR.VENOM
if (WP != 0.0)
{
vec3 warmer = D50_to_XYZ * color;
warmer = XYZ_to_D65 * warmer;
vec3 cooler = D65_to_XYZ * color;
cooler = XYZ_to_D50 * cooler;
float m = abs(WP) / 100.0;
vec3 comp = (WP < 0.0) ? cooler : warmer;
comp = clamp(comp, 0.0, 1.0);
color = vec3(mix(color, comp, m));
}
mat3 hue = mat3 (1., rg, rb, //red tint
gr, 1., gb, //green tint
br, bg, 1.); //blue tint
color = hue * color;
color = (2.0*pow(color,vec3(2.8))) - pow(color,vec3(3.6));
float lum = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
float f = fract(fp.y -0.5);
if (inter > 0.5 && tex_size.y > 400.0) color = color;
else
{color = color * sw(f,lum) + color * sw (1.0-f,lum);}
float lum1 = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
color *= mix(mask((vTexCoord * OutputSize.xy), color,lum1), vec3(1.0), lum1*preserve);
if (slotmask != 0.0) color *= SlotMask((vTexCoord * OutputSize.xy) * 1.0001, color);
color *= mix(brightboost1, brightboost2, max(max(color.r, color.g), color.b));
color = pow(color,vec3(1.0 / GAMMA_OUT));
if (glow_str != 0.0) color += glow0(coords,color);
if (sat != 1.0) color = saturation(color);
if (corner != 0.0) color *= corner0(pC4);
if (nois != 0.0) color *= 1.0 + noise(coords * 2.0) / nois;
color *= mix(1.0, postbr, lum);
res = vec4(color, 1.0);
if (contrast != 1.0) res = contrastMatrix(contrast) * res;
if (inter > 0.5 && SourceSize.y > 400.0 && fract(iTime) < 0.5) res = res * 0.95;
res.rgb *= vign(lum);
}
SetOutput(res);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,84 +0,0 @@
#include "ReShade.fxh"
// CrashGG presents
// 'XY-Pos-free'
// A super-simple shader refined from the super-fast crt-cyclon.fx, It only provides
// the functions of free pixel stretching and position translation on the XY axis.
// Suitable for users who only want to fine-tune the screen zoom and position and do not like the bundled CRT-like effects.
// Fixed some bugs in the original version, adjusted the step progress and the range.
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.
uniform float zoomx <
ui_type = "drag";
ui_min = -0.3000;
ui_max = 0.3000;
ui_step = 0.0005;
ui_label = "Zoom Image X";
> = 0.0000;
uniform float zoomy <
ui_type = "drag";
ui_min = -0.3000;
ui_max = 0.3000;
ui_step = 0.0005;
ui_label = "Zoom Image Y";
> = 0.0000;
uniform float centerx <
ui_type = "drag";
ui_min = -9.99;
ui_max = 9.99;
ui_step = 0.01;
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -9.99;
ui_max = 9.99;
ui_step = 0.01;
ui_label = "Image Center Y";
> = 0.00;
float2 Warp(float2 pos)
{
pos = pos*2.0-1.0;
pos *= float2(1.0+pos.y*pos.y*0, 1.0+pos.x*pos.x*0);
pos = pos*0.5+0.5;
return pos;
}
float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
// zoom in and center screen
float2 pos = Warp((vTexCoord*float2(1.0-zoomx,1.0-zoomy)-float2(centerx,centery)/100.0));
// Convergence
float3 res = tex2D(ReShade::BackBuffer,pos).rgb;
// Vignette
float x = 0.0;
return float4(res, 1.0);
}
technique CRT_CYCLON
{
pass PS_CRT_CYCLON
{
VertexShader = PostProcessVS;
PixelShader = CRT_CYCLON_PS;
}
}

View File

@@ -1,104 +0,0 @@
#include "ReShade.fxh"
/*
Copyright (C) 2016 guest(r) - guest.r@gmail.com
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
static const float3 dt = float3(1.0,1.0,1.0);
float3 texture2d(sampler2D tex, float2 coord, float4 yx) {
float3 s00 = tex2D(tex, coord + yx.zw).xyz;
float3 s20 = tex2D(tex, coord + yx.xw).xyz;
float3 s22 = tex2D(tex, coord + yx.xy).xyz;
float3 s02 = tex2D(tex, coord + yx.zy).xyz;
float m1=dot(abs(s00-s22),dt)+0.001;
float m2=dot(abs(s02-s20),dt)+0.001;
return 0.5*(m2*(s00+s22)+m1*(s02+s20))/(m1+m2);
}
float4 PS_aa_shader_40(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
// Calculating texel coordinates
float2 size = 4.0 / NormalizedNativePixelSize;
float2 inv_size = 1.0 / size;
float4 yx = float4(inv_size, -inv_size);
float2 OGL2Pos = vTexCoord * size;
float2 fp = frac(OGL2Pos);
float2 dx = float2(inv_size.x,0.0);
float2 dy = float2(0.0, inv_size.y);
float2 g1 = float2(inv_size.x,inv_size.y);
float2 g2 = float2(-inv_size.x,inv_size.y);
float2 pC4 = floor(OGL2Pos) * 1.0001 * inv_size;
// Reading the texels
float3 C1 = texture2d(sBackBuffer, pC4 - dy, yx);
float3 C0 = texture2d(sBackBuffer, pC4 - g1, yx);
float3 C2 = texture2d(sBackBuffer, pC4 - g2, yx);
float3 C3 = texture2d(sBackBuffer, pC4 - dx, yx);
float3 C4 = texture2d(sBackBuffer, pC4 , yx);
float3 C5 = texture2d(sBackBuffer, pC4 + dx, yx);
float3 C6 = texture2d(sBackBuffer, pC4 + g2, yx);
float3 C7 = texture2d(sBackBuffer, pC4 + dy, yx);
float3 C8 = texture2d(sBackBuffer, pC4 + g1, yx);
float3 ul, ur, dl, dr;
float m1, m2;
m1 = dot(abs(C0-C4),dt)+0.001;
m2 = dot(abs(C1-C3),dt)+0.001;
ul = (m2*(C0+C4)+m1*(C1+C3))/(m1+m2);
m1 = dot(abs(C1-C5),dt)+0.001;
m2 = dot(abs(C2-C4),dt)+0.001;
ur = (m2*(C1+C5)+m1*(C2+C4))/(m1+m2);
m1 = dot(abs(C3-C7),dt)+0.001;
m2 = dot(abs(C6-C4),dt)+0.001;
dl = (m2*(C3+C7)+m1*(C6+C4))/(m1+m2);
m1 = dot(abs(C4-C8),dt)+0.001;
m2 = dot(abs(C5-C7),dt)+0.001;
dr = (m2*(C4+C8)+m1*(C5+C7))/(m1+m2);
float3 c11 = 0.5*((dr*fp.x+dl*(1-fp.x))*fp.y+(ur*fp.x+ul*(1-fp.x))*(1-fp.y) );
return float4(c11, 1.0);
}
technique aa_shader_40
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_aa_shader_40;
}
}

View File

@@ -1,271 +0,0 @@
#include "ReShade.fxh"
/**
* @license
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
*
* TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
* *AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
* OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, NONINFRINGEMENT,IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
* OR ITS SUPPLIERS BE LIABLE FOR ANY DIRECT, SPECIAL, INCIDENTAL, INDIRECT, OR
* CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS
* OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY
* OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*/
/*
FXAA_PRESET - Choose compile-in knob preset 0-5.
------------------------------------------------------------------------------
FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required
to apply algorithm.
1.0/3.0 - too little
1.0/4.0 - good start
1.0/8.0 - applies to more edges
1.0/16.0 - overkill
------------------------------------------------------------------------------
FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks.
Perf optimization.
1.0/32.0 - visible limit (smaller isn't visible)
1.0/16.0 - good compromise
1.0/12.0 - upper limit (seeing artifacts)
------------------------------------------------------------------------------
FXAA_SEARCH_STEPS - Maximum number of search steps for end of span.
------------------------------------------------------------------------------
FXAA_SEARCH_THRESHOLD - Controls when to stop searching.
1.0/4.0 - seems to be the best quality wise
------------------------------------------------------------------------------
FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal.
1.0/2.0 - low removal
1.0/3.0 - medium removal
1.0/4.0 - default removal
1.0/8.0 - high removal
0.0 - complete removal
------------------------------------------------------------------------------
FXAA_SUBPIX_CAP - Insures fine detail is not completely removed.
This is important for the transition of sub-pixel detail,
like fences and wires.
3.0/4.0 - default (medium amount of filtering)
7.0/8.0 - high amount of filtering
1.0 - no capping of sub-pixel aliasing removal
*/
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
#ifndef FXAA_PRESET
#define FXAA_PRESET 6
#endif
#if (FXAA_PRESET == 3)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/16.0)
#define FXAA_SEARCH_STEPS 16
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
#if (FXAA_PRESET == 4)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 24
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
#if (FXAA_PRESET == 5)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 32
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
#if (FXAA_PRESET == 6)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 32
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX_CAP (1.0)
#define FXAA_SUBPIX_TRIM (0.0)
#endif
#define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM))
// Return the luma, the estimation of luminance from rgb inputs.
// This approximates luma using one FMA instruction,
// skipping normalization and tossing out blue.
// FxaaLuma() will range 0.0 to 2.963210702.
float FxaaLuma(float3 rgb) {
return rgb.y * (0.587/0.299) + rgb.x;
}
float3 FxaaLerp3(float3 a, float3 b, float amountOfA) {
return (-float3(amountOfA, amountOfA, amountOfA) * b) + ((a * float3(amountOfA, amountOfA, amountOfA)) + b);
}
float4 FxaaTexOff(sampler2D tex, float2 pos, int2 off, float2 rcpFrame) {
float x = pos.x + float(off.x) * rcpFrame.x;
float y = pos.y + float(off.y) * rcpFrame.y;
return tex2D(tex, float2(x, y));
}
// pos is the output of FxaaVertexShader interpolated across screen.
// xy -> actual texture position {0.0 to 1.0}
// rcpFrame should be a uniform equal to {1.0/frameWidth, 1.0/frameHeight}
float3 FxaaPixelShader(float2 pos, sampler2D tex, float2 rcpFrame)
{
float3 rgbN = FxaaTexOff(tex, pos.xy, int2( 0,-1), rcpFrame).xyz;
float3 rgbW = FxaaTexOff(tex, pos.xy, int2(-1, 0), rcpFrame).xyz;
float3 rgbM = FxaaTexOff(tex, pos.xy, int2( 0, 0), rcpFrame).xyz;
float3 rgbE = FxaaTexOff(tex, pos.xy, int2( 1, 0), rcpFrame).xyz;
float3 rgbS = FxaaTexOff(tex, pos.xy, int2( 0, 1), rcpFrame).xyz;
float lumaN = FxaaLuma(rgbN);
float lumaW = FxaaLuma(rgbW);
float lumaM = FxaaLuma(rgbM);
float lumaE = FxaaLuma(rgbE);
float lumaS = FxaaLuma(rgbS);
float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
float range = rangeMax - rangeMin;
if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD))
{
return rgbM;
}
float3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
float rangeL = abs(lumaL - lumaM);
float blendL = max(0.0, (rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE;
blendL = min(FXAA_SUBPIX_CAP, blendL);
float3 rgbNW = FxaaTexOff(tex, pos.xy, int2(-1,-1), rcpFrame).xyz;
float3 rgbNE = FxaaTexOff(tex, pos.xy, int2( 1,-1), rcpFrame).xyz;
float3 rgbSW = FxaaTexOff(tex, pos.xy, int2(-1, 1), rcpFrame).xyz;
float3 rgbSE = FxaaTexOff(tex, pos.xy, int2( 1, 1), rcpFrame).xyz;
rgbL += (rgbNW + rgbNE + rgbSW + rgbSE);
rgbL *= (1.0/float3(9.0, 9.0, 9.0));
float lumaNW = FxaaLuma(rgbNW);
float lumaNE = FxaaLuma(rgbNE);
float lumaSW = FxaaLuma(rgbSW);
float lumaSE = FxaaLuma(rgbSE);
float edgeVert =
abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) +
abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) +
abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE));
float edgeHorz =
abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) +
abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) +
abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
bool horzSpan = edgeHorz >= edgeVert;
float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
if(!horzSpan)
{
lumaN = lumaW;
lumaS = lumaE;
}
float gradientN = abs(lumaN - lumaM);
float gradientS = abs(lumaS - lumaM);
lumaN = (lumaN + lumaM) * 0.5;
lumaS = (lumaS + lumaM) * 0.5;
if (gradientN < gradientS)
{
lumaN = lumaS;
lumaN = lumaS;
gradientN = gradientS;
lengthSign *= -1.0;
}
float2 posN;
posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);
gradientN *= FXAA_SEARCH_THRESHOLD;
float2 posP = posN;
float2 offNP = horzSpan ? float2(rcpFrame.x, 0.0) : float2(0.0, rcpFrame.y);
float lumaEndN = lumaN;
float lumaEndP = lumaN;
bool doneN = false;
bool doneP = false;
posN += offNP * float2(-1.0, -1.0);
posP += offNP * float2( 1.0, 1.0);
for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
if(!doneN)
{
lumaEndN = FxaaLuma(tex2D(tex, posN.xy).xyz);
}
if(!doneP)
{
lumaEndP = FxaaLuma(tex2D(tex, posP.xy).xyz);
}
doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN);
doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN);
if(doneN && doneP)
{
break;
}
if(!doneN)
{
posN -= offNP;
}
if(!doneP)
{
posP += offNP;
}
}
float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;
bool directionN = dstN < dstP;
lumaEndN = directionN ? lumaEndN : lumaEndP;
if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0))
{
lengthSign = 0.0;
}
float spanLength = (dstP + dstN);
dstN = directionN ? dstN : dstP;
float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign;
float3 rgbF = tex2D(tex, float2(
pos.x + (horzSpan ? 0.0 : subPixelOffset),
pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;
return FxaaLerp3(rgbL, rgbF, blendL);
}
float4 PS_FXAA(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float3 color = FxaaPixelShader(vTexCoord, sBackBuffer, 1.0 / (ViewportSize*BufferToViewportRatio));
return float4(color, 1.0);
}
technique FXAA
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_FXAA;
}
}

View File

@@ -1,163 +0,0 @@
#include "ReShade.fxh"
/*
G-sharp resampler 2.0 - dynamic range (upscaler, downsampler)
Copyright (C) 2024 guest(r)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float GSHARP0 <
ui_type = "drag";
ui_min = 0.75;
ui_max = 8.0;
ui_step = 0.05;
ui_label = "Filter Range";
> = 2.45;
uniform float GBOOST <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Filter Boost (same range, speedup)";
> = 1.75;
uniform float GMAXSHARP <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Filter Sharpness";
> = 0.1;
uniform float GPAR <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.10;
ui_label = "Anti-Ringing";
> = 0.50;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
texture2D tGSHARP2_H{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sGSHARP2_H{Texture=tGSHARP2_H;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP)
float smothstep(float x)
{
return exp(-2.33*x*x);
}
float getw(float x)
{
float z = x/GBOOST;
float y = smothstep(z);
return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0));
}
float3 gsharp2(float2 tex, float2 dx, float f, sampler2D Source)
{
float3 color = 0.0.xxx;
float w, fp;
float wsum = 0.0;
float3 pixel;
float3 cmax = 0.0.xxx;
float3 cmin = 1.0.xxx;
float FPR = GSHARP0;
float FPR2 = 2.0*FPR;
float FPR3 = FPR2*FPR2;
float LOOPSIZE = ceil(FPR2);
float x = -LOOPSIZE+1.0;
do
{
fp = min(abs(x+f),FPR2);
pixel = tex2D(Source, tex + x*dx).rgb;
fp = fp/FPR;
w = getw(fp);
if (w > 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); }
color = color + w * pixel;
wsum = wsum + w;
x = x + 1.0;
} while (x <= LOOPSIZE);
color = color / wsum;
return lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR);
}
float4 PS_GSHARP2_H(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize);
float2 pos = vTexCoord * SourceSize.xy-0.5;
float f = -frac(pos.x);
float2 tex = (floor(pos) + 0.5)*SourceSize.zw;
float3 color;
float2 dx = float2(SourceSize.z, 0.0);
color = gsharp2(tex, dx, f, sBackBuffer);
return float4(color, 1.0);
}
float4 PS_GSHARP2_V(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float4 SourceSize = float4((ViewportSize.x*BufferToViewportRatio.x), 1.0/NormalizedInternalPixelSize.y, 1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedInternalPixelSize.y);
float2 pos = vTexCoord * SourceSize.xy-0.5;
float f = -frac(pos.y);
float2 tex = (floor(pos) + 0.5)*SourceSize.zw;
float3 color;
float2 dy = float2(0.0, SourceSize.w);
color = gsharp2(tex, dy, f, sGSHARP2_H);
return float4(color, 1.0);
}
technique GSHARP2
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_GSHARP2_H;
RenderTarget = tGSHARP2_H;
}
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_GSHARP2_V;
}
}

View File

@@ -1,145 +0,0 @@
#include "ReShade.fxh"
/*
G-sharp resampler 2.0 - dynamic range (upscaler, downsampler)
Copyright (C) 2024 guest(r)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float GSHARP0 <
ui_type = "drag";
ui_min = 0.75;
ui_max = 8.0;
ui_step = 0.05;
ui_label = "Filter Range";
> = 2.45;
uniform float GBOOST <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Filter Boost (same range, speedup)";
> = 1.75;
uniform float GMAXSHARP <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Filter Sharpness";
> = 0.1;
uniform float GPAR <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.10;
ui_label = "Anti-Ringing";
> = 0.50;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP)
float smothstep(float x)
{
return exp(-2.33*x*x);
}
float getw(float x)
{
float z = x/GBOOST;
float y = smothstep(z);
return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0));
}
float4 PS_GSHARP2(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float2 texCoord = vTexCoord;
float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize);
float2 pos = vTexCoord * SourceSize.xy-0.5;
float2 f = -frac(pos);
float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
float3 color = 0.0.xxx;
float2 dx = float2(SourceSize.z, 0.0);
float2 dy = float2(0.0, SourceSize.w);
float w, fp;
float wsum = 0.0;
float3 pixel;
float3 cmax = 0.0.xxx;
float3 cmin = 1.0.xxx;
float FPR = GSHARP0;
float FPR2 = 2.0*FPR;
float FPR3 = FPR2*FPR2;
float LOOPSIZE = ceil(FPR2);
float y = -LOOPSIZE+1.0;
float x = 0.0;
do
{
x = -LOOPSIZE + 1.0;
do
{
fp = dot(float2(x+f.x,y+f.y),float2(x+f.x,y+f.y));
if (fp >= FPR3) w = 0.0;
else
{
pixel = tex2D(sBackBuffer, tex + x*dx + y*dy).rgb;
fp = sqrt(fp)/FPR;
w = getw(fp);
if (w >= 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); }
color = color + w * pixel;
wsum = wsum + w;
}
x = x + 1.0;
} while (x <= LOOPSIZE);
y = y + 1.0;
} while (y <= LOOPSIZE);
color = color / wsum;
color = lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR);
return float4(color, 1.0);
}
technique GSHARP2
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_GSHARP2;
}
}

View File

@@ -1,797 +0,0 @@
#include "ReShade.fxh"
/*
CRT-Consumer
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float PRE_SCALE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 4.0;
ui_step = 0.1;
ui_label = "Pre-Scale Sharpening";
> = 1.5;
uniform float blurx <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Convergence X";
> = 0.25;
uniform float blury <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Convergence Y";
> = -0.1;
uniform float warpx <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.12;
ui_step = 0.01;
ui_label = " Curvature X";
> = 0.03;
uniform float warpy <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.12;
ui_step = 0.01;
ui_label = " Curvature Y";
> = 0.04;
uniform float corner <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.10;
ui_step = 0.01;
ui_label = " Corner size";
> = 0.03;
uniform float smoothness <
ui_type = "drag";
ui_min = 100.0;
ui_max = 600.0;
ui_step = 5.0;
ui_label = " Border Smoothness";
> = 400.0;
uniform bool inter <
ui_type = "radio";
ui_label = "Interlacing Toggle";
> = true;
uniform float Downscale <
ui_type = "drag";
ui_min = 1.0;
ui_max = 8.0;
ui_step = 1.;
ui_label = "Interlacing Downscale Scanlines";
> = 2.0;
uniform float scanlow <
ui_type = "drag";
ui_min = 1.0;
ui_max = 15.0;
ui_step = 1.0;
ui_label = "Beam low";
> = 6.0;
uniform float scanhigh <
ui_type = "drag";
ui_min = 1.0;
ui_max = 15.0;
ui_step = 1.0;
ui_label = "Beam high";
> = 8.0;
uniform float beamlow <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Scanlines dark";
> = 1.45;
uniform float beamhigh <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Scanlines bright";
> = 1.05;
uniform float preserve <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "Protect White On Masks";
> = 0.98;
uniform float brightboost1 <
ui_type = "drag";
ui_min = 0.0;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "Bright boost dark pixels";
> = 1.25;
uniform float brightboost2 <
ui_type = "drag";
ui_min = 0.0;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "Bright boost bright pixels";
> = 1.0;
uniform float glow <
ui_type = "drag";
ui_min = 1.0;
ui_max = 6.0;
ui_step = 1.0;
ui_label = "Glow pixels per axis";
> = 3.0;
uniform float quality <
ui_type = "drag";
ui_min = 0.25;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Glow quality";
> = 1.0;
uniform float glow_str <
ui_type = "drag";
ui_min = 0.0001;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Glow intensity";
> = 0.3;
uniform float nois <
ui_type = "drag";
ui_min = 0.0;
ui_max = 32.0;
ui_step = 1.0;
ui_label = "Add Noise";
> = 0.0;
uniform float postbr <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.5;
ui_step = 0.02;
ui_label = "Post Brightness";
> = 1.0;
uniform float palette_fix <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Palette Fixes. Sega, PUAE Atari ST dark colors";
> = 0.0;
uniform float Shadowmask <
ui_type = "drag";
ui_min = -1.0;
ui_max = 8.0;
ui_step = 1.;
ui_label = "Mask Type";
> = 0.0;
uniform float masksize <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Mask Size";
> = 1.0;
uniform float MaskDark <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.1;
ui_label = "Mask dark";
> = 0.2;
uniform float MaskLight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.1;
ui_label = "Mask light";
> = 1.5;
uniform float slotmask <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Slot Mask Strength";
> = 0.0;
uniform float slotwidth <
ui_type = "drag";
ui_min = 1.0;
ui_max = 6.0;
ui_step = 0.5;
ui_label = "Slot Mask Width";
> = 2.0;
uniform float double_slot <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Slot Mask Height: 2x1 or 4x1";
> = 1.0;
uniform float slotms <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Slot Mask Size";
> = 1.0;
uniform float GAMMA_OUT <
ui_type = "drag";
ui_min = 0.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Gamma Out";
> = 2.25;
uniform float sat <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Saturation";
> = 1.0;
uniform float contrast <
ui_type = "drag";
ui_min = 0.00;
ui_max = 2.00;
ui_step = 0.05;
ui_label = "Contrast, 1.0:Off";
> = 1.0;
uniform float WP <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 5.;
ui_label = "Color Temperature %";
> = 0.0;
uniform float rg <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Red-Green Tint";
> = 0.0;
uniform float rb <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Red-Blue Tint";
> = 0.0;
uniform float gr <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Green-Red Tint";
> = 0.0;
uniform float gb <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Green-Blue Tint";
> = 0.0;
uniform float br <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Blue-Red Tint";
> = 0.0;
uniform float bg <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Blue-Green Tint";
> = 0.0;
uniform bool vignette <
ui_type = "radio";
ui_label = "Vignette On/Off";
> = false;
uniform float vpower <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "Vignette Power";
> = 0.15;
uniform float vstr <
ui_type = "drag";
ui_min = 0.0;
ui_max = 50.0;
ui_step = 1.0;
ui_label = "Vignette strength";
> = 40.0;
uniform bool alloff <
ui_type = "radio";
ui_label = "Switch off shader";
> = false;
uniform float FrameCount < source = "framecount"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportX < source = "viewportx"; >;
uniform float ViewportY < source = "viewporty"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
uniform float2 ViewportOffset < source = "viewportoffset"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float BufferHeight < source = "bufferheight"; >;
uniform float NativeWidth < source = "nativewidth"; >;
uniform float NativeHeight < source = "nativeheight"; >;
uniform float InternalWidth < source = "internalwidth"; >;
uniform float InternalHeight < source = "internalheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define iTime (float(FrameCount)/2.0)
#define iTimer (float(FrameCount)/60.0)
#define SourceSize (float4(1.0/NormalizedNativePixelSize,NormalizedNativePixelSize))
#define OutputSize (ViewportSize*BufferToViewportRatio)
float2 Warp(float2 pos)
{
pos = pos * 2.0 - 1.0;
pos *= float2(1.0 + (pos.y * pos.y) * warpx, 1.0 + (pos.x * pos.x) * warpy);
return pos * 0.5 + 0.5;
}
float sw(float y, float l)
{
float beam = lerp(scanlow, scanhigh, y);
float scan = lerp(beamlow, beamhigh, l);
float ex = y * scan;
return exp2(-beam * ex * ex);
}
float3 mask(float2 x, float3 col, float l)
{
x = floor(x / masksize);
if (Shadowmask == 0.0)
{
float m = frac(x.x * 0.4999);
if (m < 0.4999) return float3(1.0, MaskDark, 1.0);
else return float3(MaskDark, 1.0, MaskDark);
}
else if (Shadowmask == 1.0)
{
float3 Mask = float3(MaskDark, MaskDark, MaskDark);
float line = MaskLight;
float odd = 0.0;
if (frac(x.x / 6.0) < 0.5) odd = 1.0;
if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark;
float m = frac(x.x / 3.0);
if (m < 0.333) Mask.b = MaskLight;
else if (m < 0.666) Mask.g = MaskLight;
else Mask.r = MaskLight;
Mask *= line;
return Mask;
}
else if (Shadowmask == 2.0)
{
float m = frac(x.x*0.3333);
if (m < 0.3333) return float3(MaskDark, MaskDark, MaskLight);
if (m < 0.6666) return float3(MaskDark, MaskLight, MaskDark);
else return float3(MaskLight, MaskDark, MaskDark);
}
if (Shadowmask == 3.0)
{
float m = frac(x.x * 0.5);
if (m < 0.5) return float3(1.0, 1.0, 1.0);
else return float3(MaskDark, MaskDark, MaskDark);
}
else if (Shadowmask == 4.0)
{
float3 Mask = float3(col.rgb);
float line = MaskLight;
float odd = 0.0;
if (frac(x.x / 4.0) < 0.5) odd = 1.0;
if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark;
float m = frac(x.x / 2.0);
if (m < 0.5) { Mask.r = 1.0; Mask.b = 1.0; }
else Mask.g = 1.0;
Mask *= line;
return Mask;
}
else if (Shadowmask == 5.0)
{
float3 Mask = float3(1.0, 1.0, 1.0);
if (frac(x.x / 4.0) < 0.5)
{
if (frac(x.y / 3.0) < 0.666)
{
if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0, MaskDark, 1.0);
else Mask = float3(MaskDark, 1.0, MaskDark);
}
else Mask *= l;
}
else if (frac(x.x / 4.0) >= 0.5)
{
if (frac(x.y / 3.0) > 0.333)
{
if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0, MaskDark, 1.0);
else Mask = float3(MaskDark, 1.0, MaskDark);
}
else Mask *= l;
}
return Mask;
}
else if (Shadowmask == 6.0)
{
float3 Mask = float3(MaskDark, MaskDark, MaskDark);
if (frac(x.x / 6.0) < 0.5)
{
if (frac(x.y / 4.0) < 0.75)
{
if (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight;
else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
else Mask.b = MaskLight;
}
else Mask * l * 0.9;
}
else if (frac(x.x / 6.0) >= 0.5)
{
if (frac(x.y / 4.0) >= 0.5 || frac(x.y / 4.0) < 0.25)
{
if (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight;
else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
else Mask.b = MaskLight;
}
else Mask * l * 0.9;
}
return Mask;
}
else if (Shadowmask == 7.0)
{
float m = frac(x.x * 0.3333);
if (m < 0.3333) return float3(MaskDark, MaskLight, MaskLight * col.b); //Cyan
if (m < 0.6666) return float3(MaskLight * col.r, MaskDark, MaskLight); //Magenta
else return float3(MaskLight, MaskLight * col.g, MaskDark); //Yellow
}
else if (Shadowmask == 8.0)
{
float3 Mask = float3(MaskDark, MaskDark, MaskDark);
float bright = MaskLight;
float left = 0.0;
if (frac(x.x / 6.0) < 0.5) left = 1.0;
float m = frac(x.x / 3.0);
if (m < 0.333) Mask.b = 0.9;
else if (m < 0.666) Mask.g = 0.9;
else Mask.r = 0.9;
if ((x.y % 2.0) == 1.0 && left == 1.0 || (x.y % 2.0) == 0.0 && left == 0.0)
Mask *= bright;
return Mask;
}
else return float3(1.0, 1.0, 1.0);
}
float SlotMask(float2 pos, float3 c)
{
if (slotmask == 0.0) return 1.0;
pos = floor(pos / slotms);
float mx = pow(max(max(c.r, c.g), c.b), 1.33);
float mlen = slotwidth * 2.0;
float px = frac(pos.x / mlen);
float py = floor(frac(pos.y / (2.0 * double_slot)) * 2.0 * double_slot);
float slot_dark = lerp(1.0 - slotmask, 1.0 - 0.80 * slotmask, mx);
float slot = 1.0 + 0.7 * slotmask * (1.0 - mx);
if (py == 0.0 && px < 0.5) slot = slot_dark;
else if (py == double_slot && px >= 0.5) slot = slot_dark;
return slot;
}
float4x4 contrastMatrix(float contrast)
{
float t = (1.0 - contrast) / 2.0;
return float4x4(contrast, 0, 0, 0,
0, contrast, 0, 0,
0, 0, contrast, 0,
t, t, t, 1);
}
float3x3 vign(float l, float2 tex)
{
float2 vpos = tex;
vpos *= 1.0 - vpos.xy;
float vig = vpos.x * vpos.y * vstr;
vig = min(pow(vig, vpower), 1.0);
if (vignette == false) vig = 1.0;
return float3x3(vig, 0, 0,
0, vig, 0,
0, 0, vig);
}
float3 saturation(float3 textureColor)
{
float luminance = length(textureColor.rgb) * 0.5775;
float3 luminanceWeighting = float3(0.4, 0.5, 0.1);
if (luminance < 0.5) luminanceWeighting.rgb = (luminanceWeighting.rgb * luminanceWeighting.rgb)
+ (luminanceWeighting.rgb * luminanceWeighting.rgb);
luminance = dot(textureColor.rgb, luminanceWeighting);
float3 greyScaleColor = float3(luminance, luminance, luminance);
float3 res = float3(lerp(greyScaleColor, textureColor.rgb, sat));
return res;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
float3 glow0 (float2 texcoord, float3 col)
{
// the more quality, the smaller the offset and better quality, less visible glow too
float2 size = SourceSize.zw/quality;
float3 c01;
float3 sum = float3(0.0, 0.0, 0.0);
// glow = pixels per axis, the more the slower!
for (float x = -glow; x <= glow; x = x+1.0)
{
// multiply texture, the more far away the less pronounced
float factor = 1.0/glow;
for (float y = -glow; y <= glow; y = y+1.0)
{
float2 offset = float2(x, y) * size;
c01 = tex2D(sBackBuffer, texcoord + offset).rgb*factor; c01 = c01*c01;
sum += c01;
}
}
return (glow_str * sum / (glow * glow )) ;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
float noise(float2 co)
{
return frac(sin(iTimer * dot(co.xy ,float2(12.9898,78.233))) * 43758.5453);
}
float corner0(float2 coord)
{
coord = (coord - float2(0.5, 0.5)) * 1.0 + float2(0.5, 0.5);
coord = min(coord, float2(1.0, 1.0) - coord) * float2(1.0, SourceSize.y / SourceSize.x);
float2 cdist = float2(corner, corner);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist) * smoothness, 0.0, 1.0);
}
static const float3x3 D65_to_XYZ = float3x3(
0.4306190, 0.2220379, 0.0201853,
0.3415419, 0.7066384, 0.1295504,
0.1783091, 0.0713236, 0.9390944);
static const float3x3 XYZ_to_D65 = float3x3(
3.0628971, -0.9692660, 0.0678775,
-1.3931791, 1.8760108, -0.2288548,
-0.4757517, 0.0415560, 1.0693490);
static const float3x3 D50_to_XYZ = float3x3(
0.4552773, 0.2323025, 0.0145457,
0.3675500, 0.7077956, 0.1049154,
0.1413926, 0.0599019, 0.7057489);
static const float3x3 XYZ_to_D50 = float3x3(
2.9603944, -0.9787684, 0.0844874,
-1.4678519, 1.9161415, -0.2545973,
-0.4685105, 0.0334540, 1.4216174);
float4 PS_CRT_CONSUMER(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float2 pos = Warp(vTexCoord.xy);
float2 tex_size = SourceSize.xy;
float2 pC4 = (pos + 0.5/tex_size);
float2 fp = frac(pos * tex_size);
if (inter == false && tex_size.y > 400.0){ fp.y = frac(pos.y * tex_size.y*1.0/Downscale);}
float4 res = float4(1.0, 1.0, 1.0, 1.0);
if (alloff == true)
res = tex2D(sBackBuffer, pC4);
else
{
float2 texel = pos * tex_size;
float2 texel_floored = floor(texel);
float scale = PRE_SCALE;
float region_range = 0.5 - 0.5 / scale;
// Figure out where in the texel to sample to get correct pre-scaled bilinear.
// Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
float2 center_dist = fp - 0.5;
float2 fpp = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
float2 mod_texel = texel_floored + fpp;
float2 coords = mod_texel / SourceSize.xy;
float3 sample1 = tex2D(sBackBuffer, float2(coords.x + blurx*SourceSize.z, coords.y - blury*SourceSize.w)).rgb;
float3 sample2 = tex2D(sBackBuffer, coords).rgb;
float3 sample3 = tex2D(sBackBuffer, float2(coords.x - blurx*SourceSize.z, coords.y + blury*SourceSize.w )).rgb;
float3 color = float3(sample1.r * 0.5 + sample2.r * 0.5,
sample1.g * 0.25 + sample2.g * 0.5 + sample3.g * 0.25,
sample2.b * 0.5 + sample3.b * 0.5);
if (palette_fix != 0.0)
{
if (palette_fix == 1.0) color = color* 1.0667;
else if (palette_fix == 2.0) color = color * 2.0;
}
//COLOR TEMPERATURE FROM GUEST.R-DR.VENOM
if (WP != 0.0)
{
float3 warmer = mul(color, D50_to_XYZ);
warmer = mul(warmer, XYZ_to_D65);
float3 cooler = mul(color, D65_to_XYZ);
cooler = mul(cooler, XYZ_to_D50);
float m = abs(WP) / 100.0;
float3 comp = (WP < 0.0) ? cooler : warmer;
comp = clamp(comp, 0.0, 1.0);
color = float3(lerp(color, comp, m));
}
float3x3 hue = float3x3 (1., rg, rb, //red tint
gr, 1., gb, //green tint
br, bg, 1.); //blue tint
color = mul(color, hue);
color = (2.0*pow(color,float3(2.8, 2.8, 2.8))) - pow(color,float3(3.6, 3.6, 3.6));
float lum = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
float f = frac(fp.y -0.5);
if (inter == true && tex_size.y > 400.0) color = color;
else
{color = color * sw(f,lum) + color * sw (1.0-f,lum);}
float lum1 = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
color *= lerp(mask((vTexCoord * OutputSize.xy), color,lum1), float3(1.0, 1.0, 1.0), lum1*preserve);
if (slotmask != 0.0) color *= SlotMask((vTexCoord * OutputSize.xy) * 1.0001, color);
color *= lerp(brightboost1, brightboost2, max(max(color.r, color.g), color.b));
color = pow(color,float3(1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT));
if (glow_str != 0.0) color += glow0(coords,color);
if (sat != 1.0) color = saturation(color);
if (corner != 0.0) color *= corner0(pC4);
if (nois != 0.0) color *= 1.0 + noise(coords * 2.0) / nois;
color *= lerp(1.0, postbr, lum);
res = float4(color, 1.0);
if (contrast != 1.0) res = mul(res, contrastMatrix(contrast));
if (inter == true && SourceSize.y > 400.0 && frac(iTime) < 0.5) res = res * 0.95;
res.rgb = mul(res.rgb, vign(lum, vTexCoord));
}
return res;
}
technique CRT_CONSUMER
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_CRT_CONSUMER;
}
}

View File

@@ -32,8 +32,11 @@ uniform float SCANLINE <
ui_label = "Scanline Weight";
> = 0.3;
uniform bool INTERLACE <
ui_type = "radio";
uniform float INTERLACE <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Interlacing On/Off";
> = 1.0;
@@ -61,8 +64,11 @@ uniform float MSIZE <
ui_label = "Mask Size";
> = 1.0;
uniform bool SLOT <
ui_type = "radio";
uniform float SLOT <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Slot Mask On/Off";
> = 1.0;
@@ -106,8 +112,11 @@ uniform float bogus_geom <
ui_label = " [ GEOMETRY SETTINGS ] ";
> = 0.0;
uniform bool bzl <
ui_type = "radio";
uniform float bzl <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Bezel On/Off";
> = 1.0;
@@ -139,7 +148,7 @@ uniform float centerx <
ui_type = "drag";
ui_min = -5.0;
ui_max = 5.0;
ui_step = 0.05;
ui_step = 0.0;
ui_label = "Image Center X";
> = 0.0;
@@ -167,8 +176,11 @@ uniform float WARPY <
ui_label = "Curvature Vertical";
> = 0.01;
uniform bool vig <
ui_type = "radio";
uniform float vig <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Vignette On/Off";
> = 1.0;
@@ -224,7 +236,7 @@ uniform float BLACK <
ui_type = "drag";
ui_min = -0.20;
ui_max = 0.20;
ui_step = 0.01;
ui_step = 0.0;
ui_label = "Black Level";
> = 0.0;
@@ -238,9 +250,9 @@ uniform float RG <
uniform float RB <
ui_type = "drag";
ui_min = -0.25;
ui_max = 0.25;
ui_step = 0.01;
ui_min = 0.0;
ui_max = -0.25;
ui_step = 0.2;
ui_label = "Blue <-to-> Red Hue";
> = 0.0;
@@ -311,10 +323,9 @@ uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform int FrameCount < source = "framecount"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;};
texture tBezel < source = "crt-cyclon/bezel.png"; >
{
Width = BUFFER_WIDTH;
@@ -431,9 +442,8 @@ uniform float2 BufferHeight < source = "bufferheight"; >;
float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float4 SourceSize = float4(1.0 / NormalizedNativePixelSize, NormalizedNativePixelSize);
float4 SourceSize = float4(1.0 / (NormalizedInternalPixelSize * UpscaleMultiplier), NormalizedInternalPixelSize * UpscaleMultiplier);
float2 OutputSize = ViewportSize;
float2 scale = BufferViewportRatio.xy;
float2 warpcoords = (vTexCoord-float2(0.5,0.5)) * BufferViewportRatio + float2(0.5,0.5);
@@ -448,7 +458,7 @@ float3x3 hue = float3x3(
float4 bez = float4(0.0,0.0,0.0,0.0);
// if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*SourceSize.xy/OriginalSize.xy*0.97+float2(0.015,0.015));
// if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*scale*0.97+float2(0.015,0.015));
if (bzl == true) bez = tex2D(sBezel,warpcoords*0.97+float2(0.015,0.015)); // This fix Bezel to adjust to Game's aspect ratio.
if (bzl == 1.0) bez = tex2D(sBezel,warpcoords*0.97+float2(0.015,0.015)); // This fix Bezel to adjust to Game's aspect ratio.
bez.rgb = lerp(bez.rgb, float3(ambient,ambient,ambient),0.5);
@@ -463,10 +473,10 @@ float3x3 hue = float3x3(
pos.x = lerp(pos.x, i.x*ps.x, 0.2);
// Convergence
float3 res0 = tex2D(sBackBuffer,pos).rgb;
float resr = tex2D(sBackBuffer,pos + dx*CONV_R).r;
float resb = tex2D(sBackBuffer,pos + dx*CONV_B).b;
float resg = tex2D(sBackBuffer,pos + dx*CONV_G).g;
float3 res0 = tex2D(ReShade::BackBuffer,pos).rgb;
float resr = tex2D(ReShade::BackBuffer,pos + dx*CONV_R).r;
float resb = tex2D(ReShade::BackBuffer,pos + dx*CONV_B).b;
float resg = tex2D(ReShade::BackBuffer,pos + dx*CONV_G).g;
float3 res = float3( res0.r*(1.0-C_STR) + resr*C_STR,
res0.g*(1.0-C_STR) + resg*C_STR,
@@ -474,7 +484,7 @@ float3x3 hue = float3x3(
);
// Vignette
float x = 0.0;
if (vig == true){
if (vig == 1.0){
x = vTexCoord.x*scale.x-0.5;
// x = vTexCoord.x-0.5;
x = x*x;}
@@ -498,7 +508,7 @@ float3x3 hue = float3x3(
{
s = frac(bpos.y*SourceSize.y/2.0-0.5);
// if (INTERLACE == 1.0) s = mod(float(FrameCount),2.0) < 1.0 ? s: s+0.5;
if (INTERLACE == true) s = (float(FrameCount) % 2.0) < 1.0 ? s: s+0.5;
if (INTERLACE == 1.0) s = (float(FrameCount) % 2.0) < 1.0 ? s: s+0.5;
}
// Calculate CRT-Geom scanlines weight and apply
float weight = scanlineWeights(s, res, x);
@@ -511,7 +521,7 @@ float3x3 hue = float3x3(
float CGWG = lerp(Maskl, Maskh, l);
res *= Mask(xy, CGWG);
// Apply slot mask on top of Trinitron-like mask
if (SLOT == true) res *= lerp(slot(xy/2.0),float3(1.0,1.0,1.0),CGWG);
if (SLOT == 1.0) res *= lerp(slot(xy/2.0),float3(1.0,1.0,1.0),CGWG);
if (POTATO == 0.0) res = inv_gamma(res,pwr);
else {res = sqrt(res); res *= lerp(1.3,1.1,l);}
@@ -526,7 +536,7 @@ float3x3 hue = float3x3(
res -= float3(BLACK,BLACK,BLACK);
res *= blck;
// Apply bezel code, adapted from New-Pixie
if (bzl == true)
if (bzl >0.0)
res.rgb = lerp(res.rgb, lerp(max(res.rgb, 0.0), pow( abs(bez.rgb), float3( 1.4,1.4,1.4 ) ), bez.w * bez.w), float3( 1.0,1.0,1.0 ) );

View File

@@ -1,150 +0,0 @@
#include "ReShade.fxh"
/*
zfast_crt_geo - A simple, fast CRT shader.
Copyright (C) 2017 Greg Hogan (SoltanGris42)
Copyright (C) 2023 Jose Linares (Dogway)
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
Notes: This shader does scaling with a weighted linear filter
based on the algorithm by Iñigo Quilez here:
https://iquilezles.org/articles/texture/
but modified to be somewhat sharper. Then a scanline effect that varies
based on pixel brightness is applied along with a monochrome aperture mask.
This shader runs at ~60fps on the Chromecast HD (10GFlops) on a 1080p display.
(https://forums.libretro.com/t/android-googletv-compatible-shaders-nitpicky)
Dogway: I modified zfast_crt.glsl shader to include screen curvature,
vignetting, round corners and phosphor*temperature. Horizontal pixel is left out
from the Quilez' algo (read above) to provide a more S-Video like horizontal blur.
The scanlines and mask are also now performed in the recommended linear light.
For this to run smoothly on GPU deprived platforms like the Chromecast and
older consoles, I had to remove several parameters and hardcode them into the shader.
Another POV is to run the shader on handhelds like the Switch or SteamDeck so they consume less battery.
*/
uniform float SCANLINE_WEIGHT <
ui_type = "drag";
ui_min = 0.0;
ui_max = 15.0;
ui_step = 0.5;
ui_label = "Scanline Amount";
> = 7.0;
uniform float MASK_DARK <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Mask Effect Amount";
> = 0.5;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float BufferHeight < source = "bufferheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
struct ST_VertexOut
{
float2 invDims : TEXCOORD1;
};
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geo_zFast(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
vVARS.invDims = NormalizedNativePixelSize;
}
#define MSCL (BufferHeight > 1499.0 ? 0.3333 : 0.5)
// This compensates the scanline+mask embedded gamma from the beam dynamics
#define pwr ((1.0/((-0.0325*SCANLINE_WEIGHT+1.0)*(-0.311*MASK_DARK+1.0))-1.2).xxx)
// NTSC-J (D93) -> Rec709 D65 Joint Matrix (with D93 simulation)
// This is compensated for a linearization hack (RGB*RGB and then sqrt())
static const float3x3 P22D93 = float3x3(
1.00000, 0.00000, -0.06173,
0.07111, 0.96887, -0.01136,
0.00000, 0.08197, 1.07280);
// Returns gamma corrected output, compensated for scanline+mask embedded gamma
float3 inv_gamma(float3 col, float3 power)
{
float3 cir = col-1.0;
cir *= cir;
col = lerp(sqrt(col),sqrt(1.0-cir),power);
return col;
}
float2 Warp(float2 pos)
{
pos = pos*2.0-1.0;
pos *= float2(1.0 + (pos.y*pos.y)*0.0276, 1.0 + (pos.x*pos.x)*0.0414);
return pos*0.5 + 0.5;
}
float4 PS_CRT_Geo_zFast(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0, in ST_VertexOut vVARS) : SV_Target
{
float2 pos = vTexCoord;
float2 xy = Warp(pos);
float2 corn = min(xy,1.0-xy); // This is used to mask the rounded
corn.x = 0.0001/corn.x; // corners later on
pos *= (1.0 - pos.xy);
float vig = pos.x * pos.y * 46.0;
vig = min(sqrt(vig), 1.0);
// Of all the pixels that are mapped onto the texel we are
// currently rendering, which pixel are we currently rendering?
float ratio_scale = xy.y / NormalizedNativePixelSize.y - 0.5;
// Snap to the center of the underlying texel.
float i = floor(ratio_scale) + 0.5;
// This is just like "Quilez Scaling" but sharper
float f = ratio_scale - i;
float Y = f*f;
float p = (i + 4.0*Y*f)*vVARS.invDims.y;
float whichmask = floor(vTexCoord.x*BufferWidth)*(-MSCL);
float mask = 1.0 + float(frac(whichmask) < MSCL)*(-MASK_DARK);
float3 colour = tex2D(sBackBuffer, float2(xy.x,p)).rgb;
colour = max(mul(P22D93 * vig, colour*colour), 0.0.xxx);
float scanLineWeight = (1.5 - SCANLINE_WEIGHT*(Y - Y*Y));
if (corn.y <= corn.x || corn.x < 0.0001 )
colour = 0.0.xxx;
return float4(inv_gamma(colour.rgb*lerp(scanLineWeight*mask, 1.0, colour.r*0.26667+colour.g*0.26667+colour.b*0.26667),pwr),1.0);
}
technique CRT_Geo_zFast
{
pass
{
VertexShader = VS_CRT_Geo_zFast;
PixelShader = PS_CRT_Geo_zFast;
}
}

View File

@@ -52,13 +52,16 @@ uniform bool CURVATURE <
ui_category = "Curvature";
ui_type = "radio";
ui_label = "CRTGeom Curvature Toggle";
> = true;
> = 1.0;
uniform bool invert_aspect <
ui_type = "radio";
uniform float invert_aspect <
ui_type = "drag";
ui_category = "Curvature";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "CRTGeom Curvature Aspect Inversion";
> = false;
> = 0.0;
uniform float R <
ui_type = "drag";
@@ -90,8 +93,8 @@ uniform float cornersmooth <
uniform float x_tilt <
ui_type = "drag";
ui_category = "Curvature";
ui_min = -1.0;
ui_max = 1.0;
ui_min = -0.5;
ui_max = 0.5;
ui_step = 0.05;
ui_label = "CRTGeom Horizontal Tilt";
> = 0.0;
@@ -99,8 +102,8 @@ uniform float x_tilt <
uniform float y_tilt <
ui_type = "drag";
ui_category = "Curvature";
ui_min = -1.0;
ui_max = 1.0;
ui_min = -0.5;
ui_max = 0.5;
ui_step = 0.05;
ui_label = "CRTGeom Vertical Tilt";
> = 0.0;
@@ -121,22 +124,6 @@ uniform float overscan_y <
ui_label = "CRTGeom Vert. Overscan %";
> = 100.0;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center Y";
> = 0.00;
uniform float DOTMASK <
ui_type = "drag";
ui_min = 0.0;
@@ -161,10 +148,13 @@ uniform float scanline_weight <
ui_label = "CRTGeom Scanline Weight";
> = 0.3;
uniform bool vertical_scanlines <
ui_type = "radio";
uniform float vertical_scanlines <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "CRTGeom Vertical Scanlines";
> = false;
> = 0.0;
uniform float lum <
ui_type = "drag";
@@ -185,17 +175,14 @@ uniform float interlace_detect <
uniform float FrameCount < source = "framecount"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 BufferViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
uniform float2 NativePixelSize < source = "native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
// Comment the next line to disable interpolation in linear gamma (and
// gain speed).
@@ -212,13 +199,13 @@ sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BO
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
# define TEX2D(c) pow(tex2D(ReShade::BackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
#else
# define TEX2D(c) tex2D(sBackBuffer, (c))
# define TEX2D(c) tex2D(ReShade::BackBuffer, (c))
#endif
// aspect ratio
#define aspect (invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
#define aspect (invert_aspect>0.5?float2(0.75,1.0):float2(1.0,0.75))
#define overscan (float2(1.01,1.01));
@@ -285,15 +272,6 @@ float3 vs_maxscale(float2 sinangle, float2 cosangle)
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
// Code snippet borrowed from crt-cyclon. (credits to DariusG)
float2 Warp(float2 pos)
{
pos = pos*2.0 - 1.0;
pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0);
pos = pos*0.5 + 0.5;
return pos;
}
// Vertex shader generating a triangle covering the entire screen
@@ -303,11 +281,8 @@ void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, ou
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
// center screen
texcoord = Warp(texcoord - float2(centerx,centery)/100.0);
float2 SourceSize = 1.0/NormalizedNativePixelSize;
float2 OutputSize = ViewportSize*BufferToViewportRatio;
float2 OutputSize = ViewportSize*BufferViewportRatio;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
@@ -315,7 +290,7 @@ void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, ou
vVARS.cosangle = cos(float2(x_tilt, y_tilt));
vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
{
vVARS.TextureSize = float2(SHARPER * SourceSize.x, SourceSize.y);
@@ -346,7 +321,7 @@ float intersect(float2 xy, float2 sinangle, float2 cosangle)
float A = dot(xy,xy) + d*d;
float B, C;
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
{
B = 2.0*(R*(dot(xy,sinangle) - d*cosangle.x*cosangle.y) - d*d);
C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
@@ -383,7 +358,7 @@ float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
float x = 1.0 - cos(r/R);
float D;
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
D = d/R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
else
D = d/R + x*cosangle.y*cosangle.x + dot(uv,sinangle);
@@ -393,7 +368,7 @@ float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
float3 maxscale(float2 sinangle, float2 cosangle)
{
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
{
float2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5, 0.5)*aspect;
@@ -460,12 +435,13 @@ float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
float corner(float2 coord)
{
coord = (coord - float2(0.5, 0.5)) * float2(overscan_x / 100.0, overscan_y / 100.0) + float2(0.5, 0.5);
coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
float2 cdist = float2(cornersize, cornersize);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
return clamp((cdist.x - dist)*cornersmooth, 0.0, 1.0);
else
return clamp((cdist.y - dist)*cornersmooth, 0.0, 1.0);
@@ -476,7 +452,6 @@ float fwidth(float value){
}
float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
// Here's a helpful diagram to keep in mind while trying to
@@ -502,18 +477,17 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
// Texture coordinates of the texel containing the active pixel.
float2 xy;
if (CURVATURE == true)
if (CURVATURE > 0.5)
xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
else
xy = vTexCoord;
float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5));
float cval = corner(xy);
// Of all the pixels that are mapped onto the texel we are
// currently rendering, which pixel are we currently rendering?
float2 ilvec;
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
ilvec = float2(0.0, vVARS.ilfac.y * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0);
else
ilvec = float2(vVARS.ilfac.x * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0, 0.0);
@@ -528,7 +502,7 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
// of various neighbour texels in a scanline on the current
// pixel.
float4 coeffs;
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
coeffs = PI * float4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);
else
coeffs = PI * float4(1.0 + uv_ratio.y, uv_ratio.y, 1.0 - uv_ratio.y, 2.0 - uv_ratio.y);
@@ -546,7 +520,7 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
// scanlines at the horizontal location of the current pixel,
// using the Lanczos coefficients above.
float4 col, col2;
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
{
col = clamp(
mul(coeffs, float4x4(
@@ -595,7 +569,7 @@ float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_
// Calculate the influence of the current and next scanlines on
// the current pixel.
float4 weights, weights2;
if(vertical_scanlines == false)
if(vertical_scanlines < 0.5)
{
weights = scanlineWeights(uv_ratio.y, col);
weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);

View File

@@ -1,375 +0,0 @@
#include "ReShade.fxh"
/*
Hyllian's CRT-sinc Shader
Copyright (C) 2011-2024 Hyllian
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
uniform int HFILTER_PROFILE <
ui_type = "combo";
ui_items = "Custom\0Composite\0Composite Soft\0";
ui_label = "H-FILTER PROFILE";
> = 0;
uniform float SHP <
ui_type = "drag";
ui_min = 0.50;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "CUSTOM H-FILTER SHARPNESS";
> = 1.0;
uniform bool CRT_ANTI_RINGING <
ui_type = "radio";
ui_label = "ANTI RINGING";
> = true;
uniform bool SHARPNESS_HACK <
ui_type = "radio";
ui_label = "SHARPNESS HACK";
> = false;
uniform float CRT_InputGamma <
ui_type = "drag";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "INPUT GAMMA";
> = 2.4;
uniform float CRT_OutputGamma <
ui_type = "drag";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.05;
ui_label = "OUTPUT GAMMA";
> = 2.2;
uniform int MASK_LAYOUT <
ui_type = "combo";
ui_items = "0-Off\0"
"1-Aperture Classic\0""2-Aperture1 RGB 1080p\0""3-Aperture2 RGB 1080p\0""4-Aperture1 RGB 4k\0""5-Aperture2 RGB 4k\0""6-Aperture3 RGB 4k\0"
"7-Shadow Classic\0""8-Shadow1 1080p\0""9-Shadow2 1080p\0""10-Shadow1 4k\0"
"11-Slot1 1080p\0""12-Slot2 1080p\0""13-Slot1 4k\0""14-Slot1 4k\0""15-Slot1 8k\0";
ui_category = "CRT Mask";
ui_label = "MASK LAYOUT";
> = 1;
uniform int MONITOR_SUBPIXELS <
ui_type = "combo";
ui_items = "RGB\0BGR\0";
ui_category = "CRT Mask";
ui_label = "MONITOR SUBPIXELS LAYOUT";
> = 0;
uniform float BRIGHTBOOST <
ui_type = "drag";
ui_min = 0.0;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "BRIGHTNESS BOOST";
> = 1.0;
uniform float BEAM_MIN_WIDTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "MIN BEAM WIDTH";
> = 0.86;
uniform float BEAM_MAX_WIDTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "MAX BEAM WIDTH";
> = 1.0;
uniform float SCANLINES_STRENGTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "SCANLINES STRENGTH";
> = 0.72;
uniform int SCANLINES_SHAPE <
ui_type = "combo";
ui_items = "Sinc\0Gaussian\0";
ui_label = "SCANLINES SHAPE";
> = 1.0;
uniform float SCANLINES_CUTOFF <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1000.0;
ui_step = 1.0;
ui_label = "SCANLINES CUTOFF";
ui_tooltip = "Max vertical native resolution above which scanlines are disabled.";
> = 390.0;
uniform bool SCANLINES_HIRES <
ui_type = "radio";
ui_label = "HIGH RESOLUTION SCANLINES";
> = false;
uniform float POST_BRIGHTNESS <
ui_type = "drag";
ui_min = 1.0;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "POST-BRIGHTNESS";
> = 1.00;
uniform bool VSCANLINES <
ui_type = "radio";
ui_label = "VERTICAL SCANLINES";
> = false;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float BufferHeight < source = "bufferheight"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
#include "../misc/include/mask.fxh"
#include "../misc/include/geom.fxh"
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
texture2D tBackBufferLinear{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D sBackBufferLinear{Texture=tBackBufferLinear;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define GAMMA_IN(color) pow(color, float3(CRT_InputGamma, CRT_InputGamma, CRT_InputGamma))
#define GAMMA_OUT(color) pow(color, float3(1.0 / CRT_OutputGamma, 1.0 / CRT_OutputGamma, 1.0 / CRT_OutputGamma))
#define SCANLINES_STRENGTH (-0.16*SCANLINES_SHAPE+SCANLINES_STRENGTH)
#define CORNER_SMOOTHNESS (80.0*pow(CORNER_SMOOTHNESS,10.0))
#define pi 3.1415926535897932384626433832795
#define RADIUS 2.0 // No need for more than 2-taps
float2 get_hfilter_profile()
{
float2 hf_profile = float2(SHP, RADIUS);
if (HFILTER_PROFILE == 1) hf_profile = float2(0.78, 2.0); // SNES composite
else if (HFILTER_PROFILE == 2) hf_profile = float2(0.65, 2.0); // Genesis composite
return hf_profile;
}
/* Some window functions for tests. */
float4 sinc(float4 x) { return sin(pi*x)*(1.0/(pi*x+0.001.xxxx)); }
float4 hann_window(float4 x) { return 0.5 * ( 1.0 - cos( 0.5 * pi * ( x + 2.0 ) ) ); }
float4 blackman_window(float4 x) { return 0.42 - 0.5*cos(0.5*pi*(x+2.0)) + 0.08*cos(pi*(x+2.0)); }
float4 lanczos(float4 x, float a) { return sinc(x) * sinc(x / a); }
float4 blackman(float4 x, float a) { return sinc(x) * blackman_window(x); }
float4 hann(float4 x, float a) { return sinc(x) * hann_window(x); }
float4 resampler4(float4 x, float2 hfp)
{
return blackman(x * hfp.x, hfp.y);
}
#define wa (0.5*pi)
#define wb (pi)
float3 resampler3(float3 x)
{
float3 res;
res.x = (x.x<=0.001) ? 1.0 : sin(x.x*wa)*sin(x.x*wb)/(wa*wb*x.x*x.x);
res.y = (x.y<=0.001) ? 1.0 : sin(x.y*wa)*sin(x.y*wb)/(wa*wb*x.y*x.y);
res.z = (x.z<=0.001) ? 1.0 : sin(x.z*wa)*sin(x.z*wb)/(wa*wb*x.z*x.z);
return res;
}
float3 get_scanlines(float3 d0, float3 d1, float3 color0, float3 color1)
{
if (SCANLINES_SHAPE > 0.5) {
d0 = exp(-16.0*d0*d0);
d1 = exp(-16.0*d1*d1);
}
else {
d0 = clamp(2.0*d0, 0.0, 1.0);
d1 = clamp(2.0*d1, 0.0, 1.0);
d0 = resampler3(d0);
d1 = resampler3(d1);
}
return (BRIGHTBOOST*(color0*d0+color1*d1));
}
float4 PS_BackBufferLinear(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
// float2 tc = (floor(vTexCoord / NormalizedNativePixelSize) + 0.5.xx) * NormalizedNativePixelSize;
return float4(GAMMA_IN(tex2D(sBackBuffer, vTexCoord).rgb), 1.0);
}
struct ST_VertexOut
{
float2 sinangle : TEXCOORD1;
float2 cosangle : TEXCOORD2;
float3 stretch : TEXCOORD3;
float2 TextureSize : TEXCOORD4;
};
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
// Screen centering
texcoord = texcoord - float2(centerx,centery)/100.0;
float2 SourceSize = 1.0/NormalizedNativePixelSize;
float shp_hack = 1.0 + float(SHARPNESS_HACK);
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
vVARS.stretch = maxscale(vVARS.sinangle, vVARS.cosangle);
vVARS.TextureSize = lerp(float2(shp_hack*SourceSize.x, SourceSize.y), float2(SourceSize.x, shp_hack*SourceSize.y), VSCANLINES);
}
float4 PS_CRT_Hyllian(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0, in ST_VertexOut vVARS) : SV_Target
{
float2 OutputSize = float2(BufferWidth, BufferHeight);
float2 TextureSize = vVARS.TextureSize;
float2 dx = lerp(float2(1.0/TextureSize.x, 0.0), float2(0.0, 1.0/TextureSize.y), VSCANLINES);
float2 dy = lerp(float2(0.0, 1.0/TextureSize.y), float2(1.0/TextureSize.x, 0.0), VSCANLINES);
// Texture coordinates of the texel containing the active pixel.
float2 WarpedTexCoord = (geom_curvature == true) ? transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch) : vTexCoord;
float cval = corner((WarpedTexCoord-0.5.xx) * BufferToViewportRatio + 0.5.xx);
float2 pix_coord = WarpedTexCoord*TextureSize - 0.5.xx;
float2 tc = ( (SCANLINES_HIRES == true) ? (lerp(float2(floor(pix_coord.x), pix_coord.y), float2(pix_coord.x, floor(pix_coord.y)), VSCANLINES) + float2(0.5, 0.5)) : (floor(pix_coord) + float2(0.5, 0.5)) )/TextureSize;
float2 fp = lerp(frac(pix_coord), frac(pix_coord.yx), VSCANLINES);
float3 c00 = tex2D(sBackBufferLinear, tc - dx).xyz;
float3 c01 = tex2D(sBackBufferLinear, tc ).xyz;
float3 c02 = tex2D(sBackBufferLinear, tc + dx).xyz;
float3 c03 = tex2D(sBackBufferLinear, tc + 2.0*dx).xyz;
float3 c10, c11, c12, c13;
if (SCANLINES_HIRES == false)
{
c10 = tex2D(sBackBufferLinear, tc - dx + dy).xyz;
c11 = tex2D(sBackBufferLinear, tc + dy).xyz;
c12 = tex2D(sBackBufferLinear, tc + dx + dy).xyz;
c13 = tex2D(sBackBufferLinear, tc + 2.0*dx + dy).xyz;
}
else { c10 = c00; c11 = c01; c12 = c02; c13 = c03;}
float4x3 color_matrix0 = float4x3(c00, c01, c02, c03);
float4x3 color_matrix1 = float4x3(c10, c11, c12, c13);
float2 hfp = get_hfilter_profile();
float4 weights = resampler4(float4(1.0+fp.x, fp.x, 1.0-fp.x, 2.0-fp.x), hfp);
float3 color0 = mul(weights, color_matrix0)/dot(weights, 1.0.xxxx);
float3 color1 = mul(weights, color_matrix1)/dot(weights, 1.0.xxxx);
// Get min/max samples
float3 min_sample0 = min(c01,c02);
float3 max_sample0 = max(c01,c02);
float3 min_sample1 = min(c11,c12);
float3 max_sample1 = max(c11,c12);
// Anti-ringing
float3 aux = color0;
color0 = clamp(color0, min_sample0, max_sample0);
color0 = lerp(aux, color0, CRT_ANTI_RINGING);
aux = color1;
color1 = clamp(color1, min_sample1, max_sample1);
color1 = lerp(aux, color1, CRT_ANTI_RINGING);
float pos0 = fp.y;
float pos1 = 1 - fp.y;
float3 lum0 = lerp(BEAM_MIN_WIDTH.xxx, BEAM_MAX_WIDTH.xxx, color0);
float3 lum1 = lerp(BEAM_MIN_WIDTH.xxx, BEAM_MAX_WIDTH.xxx, color1);
float3 d0 = SCANLINES_STRENGTH*pos0/(lum0*lum0+0.0000001.xxx);
float3 d1 = SCANLINES_STRENGTH*pos1/(lum1*lum1+0.0000001.xxx);
float3 color = (vVARS.TextureSize.y <= SCANLINES_CUTOFF) ? get_scanlines(d0, d1, color0, color1) : tex2D(sBackBufferLinear, WarpedTexCoord.xy).xyz;
color *= BRIGHTBOOST;
color = GAMMA_OUT(color);
float2 mask_coords =vTexCoord.xy * OutputSize.xy;
mask_coords = lerp(mask_coords.xy, mask_coords.yx, VSCANLINES);
color.rgb*=GAMMA_OUT(mask_weights(mask_coords, MASK_LAYOUT, MONITOR_SUBPIXELS, MASK_DARK_STRENGTH, MASK_LIGHT_STRENGTH));
float4 res = float4(POST_BRIGHTNESS*color, 1.0);
res.rgb = res.rgb * cval.xxx;
return float4(res.rgb, 1.0);
}
technique CRT_Hyllian
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_BackBufferLinear;
RenderTarget = tBackBufferLinear;
}
pass
{
VertexShader = VS_CRT_Geom;
PixelShader = PS_CRT_Hyllian;
}
}

View File

@@ -1,521 +0,0 @@
#include "ReShade.fxh"
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
// Ported to Duckstation (ReShade specs) by Hyllian (2024).
// Set shader params for all passes here:
uniform float crt_gamma <
ui_type = "drag";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.025;
ui_label = "Simulated CRT Gamma";
ui_category = "Display Settings";
> = 2.5;
uniform float lcd_gamma <
ui_type = "drag";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.025;
ui_label = "Your Display Gamma";
ui_category = "Display Settings";
> = 2.2;
uniform float levels_contrast <
ui_type = "drag";
ui_min = 0.0;
ui_max = 4.0;
ui_step = 0.015625;
ui_label = "Contrast";
ui_category = "Display Settings";
> = 1.0;
uniform float halation_weight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Halation Weight";
ui_category = "Effects";
> = 0.0;
uniform float diffusion_weight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Diffusion Weight";
ui_category = "Effects";
> = 0.075;
uniform float bloom_underestimate_levels <
ui_type = "drag";
ui_min = 0.0;
ui_max = 5.0;
ui_step = 0.01;
ui_label = "Bloom - Underestimate Levels";
ui_category = "Effects";
> = 0.8;
uniform float bloom_excess <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Bloom - Excess";
ui_category = "Effects";
> = 0.0;
uniform float beam_min_sigma <
ui_type = "drag";
ui_min = 0.005;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Min Sigma";
ui_category = "Beam Dynamics";
> = 0.02;
uniform float beam_max_sigma <
ui_type = "drag";
ui_min = 0.005;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Max Sigma";
ui_category = "Beam Dynamics";
> = 0.3;
uniform float beam_spot_power <
ui_type = "drag";
ui_min = 0.01;
ui_max = 16.0;
ui_step = 0.01;
ui_label = "Spot Power";
ui_category = "Beam Dynamics";
> = 0.33;
uniform float beam_min_shape <
ui_type = "drag";
ui_min = 2.0;
ui_max = 32.0;
ui_step = 0.1;
ui_label = "Min Shape";
ui_category = "Beam Dynamics";
> = 2.0;
uniform float beam_max_shape <
ui_type = "drag";
ui_min = 2.0;
ui_max = 32.0;
ui_step = 0.1;
ui_label = "Max Shape";
ui_category = "Beam Dynamics";
> = 4.0;
uniform float beam_shape_power <
ui_type = "drag";
ui_min = 0.01;
ui_max = 16.0;
ui_step = 0.01;
ui_label = "Shape Power";
ui_category = "Beam Dynamics";
> = 0.25;
uniform int beam_horiz_filter <
ui_type = "combo";
ui_items = "Quilez\0Gaussian\0Lanczos\0";
ui_label = "Horizontal Filter";
ui_category = "Beam Dynamics";
> = 0;
uniform float beam_horiz_sigma <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.67;
ui_step = 0.005;
ui_label = "Horizontal Sigma";
ui_category = "Beam Dynamics";
> = 0.35;
uniform float beam_horiz_linear_rgb_weight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "Horiz Linear RGB Weight";
ui_category = "Beam Dynamics";
> = 1.0;
uniform float convergence_offset_x_r <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Offset X Red";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_x_g <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Offset X Green";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_x_b <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Offset X Blue";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_y_r <
ui_type = "drag";
ui_min = -2.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Offset Y Red";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_y_g <
ui_type = "drag";
ui_min = -2.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Offset Y Green";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_y_b <
ui_type = "drag";
ui_min = -2.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Offset Y Blue";
ui_category = "Convergence";
> = 0.0;
uniform int mask_type <
ui_type = "combo";
ui_items = "Aperture Grille\0Slot Mask\0Shadow Mask\0";
ui_label = "Type";
ui_category = "Mask";
> = 0;
uniform float mask_sample_mode_desired <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 1.;
ui_label = "Sample Mode";
ui_category = "Mask";
> = 0.0;
uniform float mask_specify_num_triads <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Specify Number of Triads";
ui_category = "Mask";
> = 0.0;
uniform float mask_triad_size_desired <
ui_type = "drag";
ui_min = 1.0;
ui_max = 18.0;
ui_step = 0.125;
ui_label = "Triad Size Desired";
ui_category = "Mask";
> = 3.0;
uniform float mask_num_triads_desired <
ui_type = "drag";
ui_min = 342.0;
ui_max = 1920.0;
ui_step = 1.0;
ui_label = "Number of Triads Desired";
ui_category = "Mask";
> = 480.0;
uniform bool interlace_detect <
ui_type = "radio";
ui_label = "Enable Interlacing Detection";
ui_category = "Interlacing";
> = true;
uniform bool interlace_bff <
ui_type = "radio";
ui_label = "Bottom Field First";
ui_category = "Interlacing";
> = false;
uniform bool interlace_1080i <
ui_type = "radio";
ui_label = "Detect 1080i";
ui_category = "Interlacing";
> = false;
uniform float FrameCount < source = "framecount"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
uniform float2 NativePixelSize < source = "native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
#include "../misc/include/geom.fxh"
#define VIEWPORT_SIZE (ViewportSize*BufferToViewportRatio)
#define TEXTURE_SIZE (1.0/NormalizedNativePixelSize)
#define ORIG_LINEARIZED_texture_size TEXTURE_SIZE
#define VERTICAL_SCANLINES_texture_size TEXTURE_SIZE
#define BLOOM_APPROX_texture_size TEXTURE_SIZE
#define BLUR9FAST_VERTICAL_texture_size TEXTURE_SIZE
#define HALATION_BLUR_texture_size TEXTURE_SIZE
#define MASK_RESIZE_VERT_texture_size TEXTURE_SIZE
#define MASK_RESIZE_texture_size float2(64.0,0.0625*((VIEWPORT_SIZE).y))
#define MASKED_SCANLINES_texture_size (0.0625*VIEWPORT_SIZE)
#define BRIGHTPASS_texture_size VIEWPORT_SIZE
#define BLOOM_VERTICAL_texture_size VIEWPORT_SIZE
#define BLOOM_HORIZONTAL_texture_size VIEWPORT_SIZE
#define ORIG_LINEARIZED_video_size ORIG_LINEARIZED_texture_size
#define VERTICAL_SCANLINES_video_size VERTICAL_SCANLINES_texture_size
#define BLOOM_APPROX_video_size BLOOM_APPROX_texture_size
#define BLUR9FAST_VERTICAL_video_size BLUR9FAST_VERTICAL_texture_size
#define HALATION_BLUR_video_size HALATION_BLUR_texture_size
#define MASK_RESIZE_VERT_video_size MASK_RESIZE_VERT_texture_size
#define MASK_RESIZE_video_size MASK_RESIZE_texture_size
#define MASKED_SCANLINES_video_size MASKED_SCANLINES_texture_size
#define BRIGHTPASS_video_size BRIGHTPASS_texture_size
#define BLOOM_VERTICAL_video_size BLOOM_VERTICAL_texture_size
#define BLOOM_HORIZONTAL_video_size BLOOM_HORIZONTAL_texture_size
#define video_size texture_size
texture2D tmask_grille_texture_small < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
texture2D tmask_slot_texture_small < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
texture2D tmask_shadow_texture_small < source = "crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
texture2D tmask_grille_texture_large < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
texture2D tmask_slot_texture_large < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
texture2D tmask_shadow_texture_large < source = "crt-royale/TileableLinearShadowMaskEDP.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
sampler2D mask_grille_texture_small { Texture = tmask_grille_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_slot_texture_small { Texture = tmask_slot_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_shadow_texture_small { Texture = tmask_shadow_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_grille_texture_large { Texture = tmask_grille_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_slot_texture_large { Texture = tmask_slot_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_shadow_texture_large { Texture = tmask_shadow_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
#ifndef DEBUG_PASSES
#define DEBUG_PASSES 11
#endif
texture2D tORIG_LINEARIZED{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D ORIG_LINEARIZED{Texture=tORIG_LINEARIZED;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#if (DEBUG_PASSES > 1)
texture2D tVERTICAL_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D VERTICAL_SCANLINES{Texture=tVERTICAL_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 2)
texture2D tBLOOM_APPROX{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BLOOM_APPROX{Texture=tBLOOM_APPROX;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 3)
// Need checking if it's really necessary to rendertarget.
texture2D tBLUR9FAST_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BLUR9FAST_VERTICAL{Texture=tBLUR9FAST_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 4)
texture2D tHALATION_BLUR{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D HALATION_BLUR{Texture=tHALATION_BLUR;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 5)
texture2D tMASK_RESIZE_VERTICAL{Width=64.0;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;};
sampler2D MASK_RESIZE_VERTICAL{Texture=tMASK_RESIZE_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#endif
#if (DEBUG_PASSES > 6)
texture2D tMASK_RESIZE{Width=BUFFER_WIDTH*0.0625;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;};
sampler2D MASK_RESIZE{Texture=tMASK_RESIZE;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#endif
#if (DEBUG_PASSES > 7)
texture2D tMASKED_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D MASKED_SCANLINES{Texture=tMASKED_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 8)
texture2D tBRIGHTPASS{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BRIGHTPASS{Texture=tBRIGHTPASS;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 9)
texture2D tBLOOM_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BLOOM_VERTICAL{Texture=tBLOOM_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#include "crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh"
#if (DEBUG_PASSES > 1)
#include "crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh"
#endif
#if (DEBUG_PASSES > 2)
#include "crt-royale/src/crt-royale-bloom-approx.fxh"
#endif
#if (DEBUG_PASSES > 3)
#include "crt-royale/src/blur9fast-vertical.fxh"
#endif
#if (DEBUG_PASSES > 4)
#include "crt-royale/src/blur9fast-horizontal.fxh"
#endif
#if (DEBUG_PASSES > 5)
#include "crt-royale/src/crt-royale-mask-resize-vertical.fxh"
#endif
#if (DEBUG_PASSES > 6)
#include "crt-royale/src/crt-royale-mask-resize-horizontal.fxh"
#endif
#if (DEBUG_PASSES > 7)
#include "crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh"
#endif
#if (DEBUG_PASSES > 8)
#include "crt-royale/src/crt-royale-brightpass.fxh"
#endif
#if (DEBUG_PASSES > 9)
#include "crt-royale/src/crt-royale-bloom-vertical.fxh"
#endif
#if (DEBUG_PASSES > 10)
#include "crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh"
#endif
technique CRT_Royale
{
pass
{
VertexShader = VS_Linearize;
PixelShader = PS_Linearize;
RenderTarget = tORIG_LINEARIZED;
}
#if (DEBUG_PASSES > 1)
pass
{
VertexShader = VS_Scanlines_Vertical_Interlacing;
PixelShader = PS_Scanlines_Vertical_Interlacing;
RenderTarget = tVERTICAL_SCANLINES;
}
#endif
#if (DEBUG_PASSES > 2)
pass
{
VertexShader = VS_Bloom_Approx;
PixelShader = PS_Bloom_Approx;
RenderTarget = tBLOOM_APPROX;
}
#endif
#if (DEBUG_PASSES > 3)
pass
{
VertexShader = VS_Blur9Fast_Vertical;
PixelShader = PS_Blur9Fast_Vertical;
RenderTarget = tBLUR9FAST_VERTICAL;
}
#endif
#if (DEBUG_PASSES > 4)
pass
{
VertexShader = VS_Blur9Fast_Horizontal;
PixelShader = PS_Blur9Fast_Horizontal;
RenderTarget = tHALATION_BLUR;
}
#endif
#if (DEBUG_PASSES > 5)
pass
{
VertexShader = VS_Mask_Resize_Vertical;
PixelShader = PS_Mask_Resize_Vertical;
RenderTarget = tMASK_RESIZE_VERTICAL;
}
#endif
#if (DEBUG_PASSES > 6)
pass
{
VertexShader = VS_Mask_Resize_Horizontal;
PixelShader = PS_Mask_Resize_Horizontal;
RenderTarget = tMASK_RESIZE;
}
#endif
#if (DEBUG_PASSES > 7)
pass
{
VertexShader = VS_Scanlines_Horizontal_Apply_Mask;
PixelShader = PS_Scanlines_Horizontal_Apply_Mask;
RenderTarget = tMASKED_SCANLINES;
}
#endif
#if (DEBUG_PASSES > 8)
pass
{
VertexShader = VS_Brightpass;
PixelShader = PS_Brightpass;
RenderTarget = tBRIGHTPASS;
}
#endif
#if (DEBUG_PASSES > 9)
pass
{
VertexShader = VS_Bloom_Vertical;
PixelShader = PS_Bloom_Vertical;
RenderTarget = tBLOOM_VERTICAL;
}
#endif
#if (DEBUG_PASSES > 10)
pass
{
VertexShader = VS_Bloom_Horizontal;
PixelShader = PS_Bloom_Horizontal;
}
#endif
}

View File

@@ -1,280 +0,0 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS

View File

@@ -1,249 +0,0 @@
#ifndef BIND_SHADER_PARAMS_H
#define BIND_SHADER_PARAMS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "helper-functions-and-macros.fxh"
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
// Override some parameters for gamma-management.h and tex2Dantialias.h:
#define OVERRIDE_DEVICE_GAMMA
static const float gba_gamma = 3.5; // Irrelevant but necessary to define.
#define ANTIALIAS_OVERRIDE_BASICS
#define ANTIALIAS_OVERRIDE_PARAMETERS
// Disable runtime shader params if the user doesn't explicitly want them.
// Static constants will be defined in place of uniforms of the same name.
#ifndef RUNTIME_SHADER_PARAMS_ENABLE
#undef PARAMETER_UNIFORM
#endif
// Bind option names to shader parameter uniforms or static constants.
#ifdef PARAMETER_UNIFORM
uniform float crt_gamma;
uniform float lcd_gamma;
uniform float levels_contrast;
uniform float halation_weight;
uniform float diffusion_weight;
uniform float bloom_underestimate_levels;
uniform float bloom_excess;
uniform float beam_min_sigma;
uniform float beam_max_sigma;
uniform float beam_spot_power;
uniform float beam_min_shape;
uniform float beam_max_shape;
uniform float beam_shape_power;
uniform float beam_horiz_sigma;
#ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
uniform float beam_horiz_filter;
uniform float beam_horiz_linear_rgb_weight;
#else
static const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
static const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
#endif
uniform float convergence_offset_x_r;
uniform float convergence_offset_x_g;
uniform float convergence_offset_x_b;
uniform float convergence_offset_y_r;
uniform float convergence_offset_y_g;
uniform float convergence_offset_y_b;
#ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
uniform float mask_type;
#else
static const float mask_type = clamp(mask_type_static, 0.0, 2.0);
#endif
uniform float mask_sample_mode_desired;
uniform float mask_specify_num_triads;
uniform float mask_triad_size_desired;
uniform float mask_num_triads_desired;
uniform float aa_subpixel_r_offset_x_runtime;
uniform float aa_subpixel_r_offset_y_runtime;
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
uniform float aa_cubic_c;
uniform float aa_gauss_sigma;
#else
static const float aa_cubic_c = aa_cubic_c_static; // Clamp to [0, 4]?
static const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static); // Clamp to [FIXZERO(0), 1]?
#endif
uniform float geom_mode_runtime;
uniform float geom_radius;
uniform float geom_view_dist;
uniform float geom_tilt_angle_x;
uniform float geom_tilt_angle_y;
uniform float geom_aspect_ratio_x;
uniform float geom_aspect_ratio_y;
uniform float geom_overscan_x;
uniform float geom_overscan_y;
uniform float border_size;
uniform float border_darkness;
uniform float border_compress;
uniform float interlace_bff;
uniform float interlace_1080i;
#else
// Use constants from user-settings.h, and limit ranges appropriately:
/* static const float crt_gamma = macro_max(0.0, crt_gamma_static);
static const float lcd_gamma = macro_max(0.0, lcd_gamma_static);
static const float levels_contrast = macro_clamp(levels_contrast_static, 0.0, 4.0);
static const float halation_weight = macro_clamp(halation_weight_static, 0.0, 1.0);
static const float diffusion_weight = macro_clamp(diffusion_weight_static, 0.0, 1.0);
static const float bloom_underestimate_levels = macro_max(FIX_ZERO(0.0), bloom_underestimate_levels_static);
static const float bloom_excess = macro_clamp(bloom_excess_static, 0.0, 1.0);
static const float beam_min_sigma = macro_max(FIX_ZERO(0.0), beam_min_sigma_static);
static const float beam_max_sigma = macro_max(beam_min_sigma, beam_max_sigma_static);
static const float beam_spot_power = macro_max(beam_spot_power_static, 0.0);
static const float beam_min_shape = macro_max(2.0, beam_min_shape_static);
static const float beam_max_shape = macro_max(beam_min_shape, beam_max_shape_static);
static const float beam_shape_power = macro_max(0.0, beam_shape_power_static);
static const float beam_horiz_filter = macro_clamp(beam_horiz_filter_static, 0.0, 2.0);
static const float beam_horiz_sigma = macro_max(FIX_ZERO(0.0), beam_horiz_sigma_static);
static const float beam_horiz_linear_rgb_weight = macro_clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
*/ // Unpack static vector elements to match scalar uniforms:
/* static const float convergence_offset_x_r = macro_clamp(convergence_offsets_r_static.x, -4.0, 4.0);
static const float convergence_offset_x_g = macro_clamp(convergence_offsets_g_static.x, -4.0, 4.0);
static const float convergence_offset_x_b = macro_clamp(convergence_offsets_b_static.x, -4.0, 4.0);
static const float convergence_offset_y_r = macro_clamp(convergence_offsets_r_static.y, -4.0, 4.0);
static const float convergence_offset_y_g = macro_clamp(convergence_offsets_g_static.y, -4.0, 4.0);
static const float convergence_offset_y_b = macro_clamp(convergence_offsets_b_static.y, -4.0, 4.0);
static const float mask_type = macro_clamp(mask_type_static, 0.0, 2.0);
static const float mask_sample_mode_desired = macro_clamp(mask_sample_mode_static, 0.0, 2.0);
static const float mask_specify_num_triads = macro_clamp(mask_specify_num_triads_static, 0.0, 1.0);
static const float mask_triad_size_desired = macro_clamp(mask_triad_size_desired_static, 1.0, 18.0);
static const float mask_num_triads_desired = macro_clamp(mask_num_triads_desired_static, 342.0, 1920.0);
static const float aa_subpixel_r_offset_x_runtime = macro_clamp(aa_subpixel_r_offset_static.x, -0.5, 0.5);
static const float aa_subpixel_r_offset_y_runtime = macro_clamp(aa_subpixel_r_offset_static.y, -0.5, 0.5);
static const float aa_cubic_c = aa_cubic_c_static; // Clamp to [0, 4]?
static const float aa_gauss_sigma = macro_max(FIX_ZERO(0.0), aa_gauss_sigma_static); // Clamp to [FIXZERO(0), 1]?
static const float geom_mode_runtime = macro_clamp(geom_mode_static, 0.0, 3.0);
static const float geom_radius = macro_max(1.0/(2.0*pi), geom_radius_static); // Clamp to [1/(2*pi), 1024]?
static const float geom_view_dist = macro_max(0.5, geom_view_dist_static); // Clamp to [0.5, 1024]?
static const float geom_tilt_angle_x = macro_clamp(geom_tilt_angle_static.x, -pi, pi);
static const float geom_tilt_angle_y = macro_clamp(geom_tilt_angle_static.y, -pi, pi);
static const float geom_aspect_ratio_x = geom_aspect_ratio_static; // Force >= 1?
static const float geom_aspect_ratio_y = 1.0;
static const float geom_overscan_x = macro_max(FIX_ZERO(0.0), geom_overscan_static.x);
static const float geom_overscan_y = macro_max(FIX_ZERO(0.0), geom_overscan_static.y);
static const float border_size = macro_clamp(border_size_static, 0.0, 0.5); // 0.5 reaches to image center
static const float border_darkness = macro_max(0.0, border_darkness_static);
static const float border_compress = macro_max(1.0, border_compress_static); // < 1.0 darkens whole image
static const float interlace_bff = float(interlace_bff_static);
static const float interlace_1080i = float(interlace_1080i_static);
*/
#endif
/*
// Provide accessors for vector constants that pack scalar uniforms:
float2 get_aspect_vector(const float geom_aspect_ratio)
{
// Get an aspect ratio vector. Enforce geom_max_aspect_ratio, and prevent
// the absolute scale from affecting the uv-mapping for curvature:
const float geom_clamped_aspect_ratio =
min(geom_aspect_ratio, geom_max_aspect_ratio);
const float2 geom_aspect =
normalize(float2(geom_clamped_aspect_ratio, 1.0));
return geom_aspect;
}
float2 get_geom_overscan_vector()
{
return float2(geom_overscan_x, geom_overscan_y);
}
float2 get_geom_tilt_angle_vector()
{
return float2(geom_tilt_angle_x, geom_tilt_angle_y);
}
*/
float3 get_convergence_offsets_x_vector()
{
return float3(convergence_offset_x_r, convergence_offset_x_g,
convergence_offset_x_b);
}
float3 get_convergence_offsets_y_vector()
{
return float3(convergence_offset_y_r, convergence_offset_y_g,
convergence_offset_y_b);
}
float2 get_convergence_offsets_r_vector()
{
return float2(convergence_offset_x_r, convergence_offset_y_r);
}
float2 get_convergence_offsets_g_vector()
{
return float2(convergence_offset_x_g, convergence_offset_y_g);
}
float2 get_convergence_offsets_b_vector()
{
return float2(convergence_offset_x_b, convergence_offset_y_b);
}
/*
float2 get_aa_subpixel_r_offset()
{
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
#ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
// WARNING: THIS IS EXTREMELY EXPENSIVE.
return float2(aa_subpixel_r_offset_x_runtime,
aa_subpixel_r_offset_y_runtime);
#else
return aa_subpixel_r_offset_static;
#endif
#else
return aa_subpixel_r_offset_static;
#endif
}
*/
// Provide accessors settings which still need "cooking:"
float get_mask_amplify()
{
static const float mask_grille_amplify = 1.0/mask_grille_avg_color;
static const float mask_slot_amplify = 1.0/mask_slot_avg_color;
static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
return mask_type < 0.5 ? mask_grille_amplify :
mask_type < 1.5 ? mask_slot_amplify :
mask_shadow_amplify;
}
float get_mask_sample_mode()
{
#ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
return mask_sample_mode_desired;
#else
return clamp(mask_sample_mode_desired, 1.0, 2.0);
#endif
#else
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
return mask_sample_mode_static;
#else
return clamp(mask_sample_mode_static, 1.0, 2.0);
#endif
#endif
}
#endif // BIND_SHADER_PARAMS_H

View File

@@ -1,317 +0,0 @@
#ifndef BLOOM_FUNCTIONS_H
#define BLOOM_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These utility functions and constants help several passes determine the
// size and center texel weight of the phosphor bloom in a uniform manner.
////////////////////////////////// INCLUDES //////////////////////////////////
// We need to calculate the correct blur sigma using some .cgp constants:
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "blur-functions.fxh"
/////////////////////////////// BLOOM CONSTANTS //////////////////////////////
// Compute constants with manual inlines of the functions below:
static const float bloom_diff_thresh = 1.0/256.0;
/////////////////////////////////// HELPERS //////////////////////////////////
float get_min_sigma_to_blur_triad(const float triad_size,
const float thresh)
{
// Requires: 1.) triad_size is the final phosphor triad size in pixels
// 2.) thresh is the max desired pixel difference in the
// blurred triad (e.g. 1.0/256.0).
// Returns: Return the minimum sigma that will fully blur a phosphor
// triad on the screen to an even color, within thresh.
// This closed-form function was found by curve-fitting data.
// Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
return -0.05168 + 0.6113*triad_size -
1.122*triad_size*sqrt(0.000416 + thresh);
// Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
//return 0.5985*triad_size - triad_size*sqrt(thresh)
}
float get_absolute_scale_blur_sigma(const float thresh)
{
// Requires: 1.) min_expected_triads must be a global float. The number
// of horizontal phosphor triads in the final image must be
// >= min_allowed_viewport_triads.x for realistic results.
// 2.) bloom_approx_scale_x must be a global float equal to the
// absolute horizontal scale of BLOOM_APPROX.
// 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x
// should be <= 1.1658025090 to keep the final result <
// 0.62666015625 (the largest sigma ensuring the largest
// unused texel weight stays < 1.0/256.0 for a 3x3 blur).
// 4.) thresh is the max desired pixel difference in the
// blurred triad (e.g. 1.0/256.0).
// Returns: Return the minimum Gaussian sigma that will blur the pass
// output as much as it would have taken to blur away
// bloom_approx_scale_x horizontal phosphor triads.
// Description:
// BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd
// use the same blur sigma as the actual phosphor bloom and scale it down
// to the current resolution with (bloom_approx_scale_x/viewport_size_x), but
// we don't know the viewport size in this pass. Instead, we'll blur as
// much as it would take to blur away min_allowed_viewport_triads.x. This
// will blur "more than necessary" if the user actually uses more triads,
// but that's not terrible either, because blurring a constant fraction of
// the viewport may better resemble a true optical bloom anyway (since the
// viewport will generally be about the same fraction of each player's
// field of view, regardless of screen size and resolution).
// Assume an extremely large viewport size for asymptotic results.
return bloom_approx_scale_x/max_viewport_size_x *
get_min_sigma_to_blur_triad(
max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
}
float get_center_weight(const float sigma)
{
// Given a Gaussian blur sigma, get the blur weight for the center texel.
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
return get_fast_gaussian_weight_sum_inv(sigma);
#else
const float denom_inv = 0.5/(sigma*sigma);
const float w0 = 1.0;
const float w1 = exp(-1.0 * denom_inv);
const float w2 = exp(-4.0 * denom_inv);
const float w3 = exp(-9.0 * denom_inv);
const float w4 = exp(-16.0 * denom_inv);
const float w5 = exp(-25.0 * denom_inv);
const float w6 = exp(-36.0 * denom_inv);
const float w7 = exp(-49.0 * denom_inv);
const float w8 = exp(-64.0 * denom_inv);
const float w9 = exp(-81.0 * denom_inv);
const float w10 = exp(-100.0 * denom_inv);
const float w11 = exp(-121.0 * denom_inv);
const float w12 = exp(-144.0 * denom_inv);
const float w13 = exp(-169.0 * denom_inv);
const float w14 = exp(-196.0 * denom_inv);
const float w15 = exp(-225.0 * denom_inv);
const float w16 = exp(-256.0 * denom_inv);
const float w17 = exp(-289.0 * denom_inv);
const float w18 = exp(-324.0 * denom_inv);
const float w19 = exp(-361.0 * denom_inv);
const float w20 = exp(-400.0 * denom_inv);
const float w21 = exp(-441.0 * denom_inv);
// Note: If the implementation uses a smaller blur than the max allowed,
// the worst case scenario is that the center weight will be overestimated,
// so we'll put a bit more energy into the brightpass...no huge deal.
// Then again, if the implementation uses a larger blur than the max
// "allowed" because of dynamic branching, the center weight could be
// underestimated, which is more of a problem...consider always using
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
// 43x blur:
const float weight_sum_inv = 1.0 /
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 +
w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21));
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
// 31x blur:
const float weight_sum_inv = 1.0 /
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15));
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
// 25x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12));
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
// 17x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8));
#else
// 9x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4));
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
const float center_weight = weight_sum_inv * weight_sum_inv;
return center_weight;
#endif
}
float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv,
const float2 dxdy, const float sigma)
{
// If sigma is static, we can safely branch and use the smallest blur
// that's big enough. Ignore #define hints, because we'll only use a
// large blur if we actually need it, and the branches cost nothing.
#ifndef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
#else
// It's still worth branching if the profile supports dynamic branches:
// It's much faster than using a hugely excessive blur, but each branch
// eats ~1% FPS.
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
#endif
#endif
// Failed optimization notes:
// I originally created a same-size mipmapped 5-tap separable blur10 that
// could handle any sigma by reaching into lower mip levels. It was
// as fast as blur25fast for runtime sigmas and a tad faster than
// blur31fast for static sigmas, but mipmapping two viewport-size passes
// ate 10% of FPS across all codepaths, so it wasn't worth it.
#ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
if(sigma <= blur9_std_dev)
{
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
}
else if(sigma <= blur17_std_dev)
{
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
}
else if(sigma <= blur25_std_dev)
{
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
}
else if(sigma <= blur31_std_dev)
{
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
}
else
{
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
}
#else
// If we can't afford to branch, we can only guess at what blur
// size we need. Therefore, use the largest blur allowed.
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
#else
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
#endif // PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
}
float get_bloom_approx_sigma(const float output_size_x_runtime,
const float estimated_viewport_size_x)
{
// Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
// This is included for dynamic codepaths just in case the
// following two globals are incorrect:
// 2.) bloom_approx_size_x_for_skip should == the same
// if PHOSPHOR_BLOOM_FAKE is #defined
// 3.) bloom_approx_size_x should == the same otherwise
// Returns: For gaussian4x4, return a dynamic small bloom sigma that's
// as close to optimal as possible given available information.
// For blur3x3, return the a static small bloom sigma that
// works well for typical cases. Otherwise, we're using simple
// bilinear filtering, so use static calculations.
// Assume the default static value. This is a compromise that ensures
// typical triads are blurred, even if unusually large ones aren't.
static const float mask_num_triads_static =
max(min_allowed_viewport_triads.x, mask_num_triads_desired_static);
const float mask_num_triads_from_size =
estimated_viewport_size_x/mask_triad_size_desired;
const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x,
lerp(mask_num_triads_from_size, mask_num_triads_desired,
mask_specify_num_triads));
// Assume an extremely large viewport size for asymptotic results:
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// Use the runtime num triads and output size:
const float asymptotic_triad_size =
max_viewport_size_x/mask_num_triads_runtime;
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
asymptotic_triad_size, bloom_diff_thresh);
const float bloom_approx_sigma =
asymptotic_sigma * output_size_x_runtime/max_viewport_size_x;
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
// account for the Gaussian scanline sigma from the last pass too.
// The bloom will be too wide horizontally but tall enough vertically.
return length(float2(bloom_approx_sigma, beam_max_sigma));
}
else // 3x3 blur resize (the bilinear resize doesn't need a sigma)
{
// We're either using blur3x3 or bilinear filtering. The biggest
// reason to choose blur3x3 is to avoid dynamic weights, so use a
// static calculation.
#ifdef PHOSPHOR_BLOOM_FAKE
static const float output_size_x_static =
bloom_approx_size_x_for_fake;
#else
static const float output_size_x_static = bloom_approx_size_x;
#endif
static const float asymptotic_triad_size =
max_viewport_size_x/mask_num_triads_static;
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
asymptotic_triad_size, bloom_diff_thresh);
const float bloom_approx_sigma =
asymptotic_sigma * output_size_x_static/max_viewport_size_x;
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
// try accounting for the Gaussian scanline sigma from the last pass
// too; use the static default value:
return length(float2(bloom_approx_sigma, beam_max_sigma_static));
}
}
float get_final_bloom_sigma(const float bloom_sigma_runtime)
{
// Requires: 1.) bloom_sigma_runtime is a precalculated sigma that's
// optimal for the [known] triad size.
// 2.) Call this from a fragment shader (not a vertex shader),
// or blurring with static sigmas won't be constant-folded.
// Returns: Return the optimistic static sigma if the triad size is
// known at compile time. Otherwise return the optimal runtime
// sigma (10% slower) or an implementation-specific compromise
// between an optimistic or pessimistic static sigma.
// Notes: Call this from the fragment shader, NOT the vertex shader,
// so static sigmas can be constant-folded!
const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad(
mask_triad_size_desired_static, bloom_diff_thresh);
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
return bloom_sigma_runtime;
#else
// Overblurring looks as bad as underblurring, so assume average-size
// triads, not worst-case huge triads:
return bloom_sigma_optimistic;
#endif
}
#endif // BLOOM_FUNCTIONS_H

View File

@@ -1,299 +0,0 @@
#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H
#define DERIVED_SETTINGS_AND_CONSTANTS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These macros and constants can be used across the whole codebase.
// Unlike the values in user-settings.cgh, end users shouldn't modify these.
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "user-cgp-constants.fxh"
/////////////////////////////// FIXED SETTINGS ///////////////////////////////
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
//#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16
// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma.
#ifndef SIMULATE_CRT_ON_LCD
#define SIMULATE_CRT_ON_LCD
#endif
// Manually tiling a manually resized texture creates texture coord derivative
// discontinuities and confuses anisotropic filtering, causing discolored tile
// seams in the phosphor mask. Workarounds:
// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's
// downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and
// disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either.
// b.) "Tile flat twice" requires drawing two full tiles without border padding
// to the resized mask FBO, and it's incompatible with same-pass curvature.
// (Same-pass curvature isn't used but could be in the future...maybe.)
// c.) "Fix discontinuities" requires derivatives and drawing one tile with
// border padding to the resized mask FBO, but it works with same-pass
// curvature. It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined.
// Precedence: a, then, b, then c (if multiple strategies are #defined).
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD // 129.7 FPS, 4x, flat; 101.8 at fullscreen
#define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // 128.1 FPS, 4x, flat; 101.5 at fullscreen
#define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES // 124.4 FPS, 4x, flat; 97.4 at fullscreen
// Also, manually resampling the phosphor mask is slightly blurrier with
// anisotropic filtering. (Resampling with mipmapping is even worse: It
// creates artifacts, but only with the fully bloomed shader.) The difference
// is subtle with small triads, but you can fix it for a small cost.
//#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
////////////////////////////// DERIVED SETTINGS //////////////////////////////
// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the
// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable
// incompatible settings ASAP. (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be
// #defined by either user-settings.h or a wrapper .cg that #includes the
// current .cg pass.)
#ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
#undef PHOSPHOR_MASK_MANUALLY_RESIZE
#endif
#ifdef RUNTIME_GEOMETRY_MODE
#undef RUNTIME_GEOMETRY_MODE
#endif
// Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is
// inferior in most cases, so replace 2.0 with 0.0:
static const float bloom_approx_filter =
bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static;
#else
static const float bloom_approx_filter = bloom_approx_filter_static;
#endif
// Disable slow runtime paths if static parameters are used. Most of these
// won't be a problem anyway once the params are disabled, but some will.
#ifndef RUNTIME_SHADER_PARAMS_ENABLE
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#undef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#endif
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
#undef RUNTIME_ANTIALIAS_WEIGHTS
#endif
#ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#endif
#ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#endif
#ifdef RUNTIME_GEOMETRY_TILT
#undef RUNTIME_GEOMETRY_TILT
#endif
#ifdef RUNTIME_GEOMETRY_MODE
#undef RUNTIME_GEOMETRY_MODE
#endif
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#endif
#endif
// Make tex2Dbias a backup for tex2Dlod for wider compatibility.
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
// Rule out unavailable anisotropic compatibility strategies:
#ifndef DRIVERS_ALLOW_DERIVATIVES
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#endif
#ifndef DRIVERS_ALLOW_TEX2DLOD
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#endif
#ifdef ANTIALIAS_DISABLE_ANISOTROPIC
#undef ANTIALIAS_DISABLE_ANISOTROPIC
#endif
#endif
#ifndef DRIVERS_ALLOW_TEX2DBIAS
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
#endif
// Prioritize anisotropic tiling compatibility strategies by performance and
// disable unused strategies. This concentrates all the nesting in one place.
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#else
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#else
// ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with
// flat texture coords in the same pass, but that's all we use.
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#endif
#endif
#endif
// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can
// reduce some #ifdef nesting in the next section by essentially OR'ing them:
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
#endif
// Prioritize anisotropic resampling compatibility strategies the same way:
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
#endif
/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS //////////////////////
// If we can use the large mipmapped LUT without mipmapping artifacts, we
// should: It gives us more options for using fewer samples.
#ifdef DRIVERS_ALLOW_TEX2DLOD
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// TODO: Take advantage of this!
#define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
static const float2 mask_resize_src_lut_size = mask_texture_large_size;
#else
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
#endif
#else
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
#endif
// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to
// main_fragment, or a static alias of one of the above. This makes it hard
// to select the phosphor mask at runtime: We can't even assign to a uniform
// global in the vertex shader or select a sampler2D in the vertex shader and
// pass it to the fragment shader (even with explicit TEXUNIT# bindings),
// because it just gives us the input texture or a black screen. However, we
// can get around these limitations by calling tex2D three times with different
// uniform samplers (or resizing the phosphor mask three times altogether).
// With dynamic branches, we can process only one of these branches on top of
// quickly discarding fragments we don't need (cgc seems able to overcome
// limigations around dependent texture fetches inside of branches). Without
// dynamic branches, we have to process every branch for every fragment...which
// is slower. Runtime sampling mode selection is slower without dynamic
// branches as well. Let the user's static #defines decide if it's worth it.
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#else
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#endif
#endif
// We need to render some minimum number of tiles in the resize passes.
// We need at least 1.0 just to repeat a single tile, and we need extra
// padding beyond that for anisotropic filtering, discontinuitity fixing,
// antialiasing, same-pass curvature (not currently used), etc. First
// determine how many border texels and tiles we need, based on how the result
// will be sampled:
#ifdef GEOMETRY_EARLY
static const float max_subpixel_offset = aa_subpixel_r_offset_static.x;
// Most antialiasing filters have a base radius of 4.0 pixels:
static const float max_aa_base_pixel_border = 4.0 +
max_subpixel_offset;
#else
static const float max_aa_base_pixel_border = 0.0;
#endif
// Anisotropic filtering adds about 0.5 to the pixel border:
#ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
#else
static const float max_aniso_pixel_border = max_aa_base_pixel_border;
#endif
// Fixing discontinuities adds 1.0 more to the pixel border:
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0;
#else
static const float max_tiled_pixel_border = max_aniso_pixel_border;
#endif
// Convert the pixel border to an integer texel border. Assume same-pass
// curvature about triples the texel frequency:
#ifdef GEOMETRY_EARLY
static const float max_mask_texel_border =
macro_ceil(max_tiled_pixel_border * 3.0);
#else
static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border);
#endif
// Convert the texel border to a tile border using worst-case assumptions:
static const float max_mask_tile_border = max_mask_texel_border/
(mask_min_allowed_triad_size * mask_triads_per_tile);
// Finally, set the number of resized tiles to render to MASK_RESIZE, and set
// the starting texel (inside borders) for sampling it.
#ifndef GEOMETRY_EARLY
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// Special case: Render two tiles without borders. Anisotropic
// filtering doesn't seem to be a problem here.
static const float mask_resize_num_tiles = 1.0 + 1.0;
static const float mask_start_texels = 0.0;
#else
static const float mask_resize_num_tiles = 1.0 +
2.0 * max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
#else
static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of
// mask_resize_viewport_scale. This limits the maximum final triad size.
// Estimate the minimum number of triads we can split the screen into in each
// dimension (we'll be as correct as mask_resize_viewport_scale is):
static const float mask_resize_num_triads =
mask_resize_num_tiles * mask_triads_per_tile;
static const float2 min_allowed_viewport_triads =
mask_resize_num_triads.xx / mask_resize_viewport_scale;
#endif // DERIVED_SETTINGS_AND_CONSTANTS_H

View File

@@ -1,545 +0,0 @@
#ifndef GAMMA_MANAGEMENT_H
#define GAMMA_MANAGEMENT_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// This file provides gamma-aware tex*D*() and encode_output() functions.
// Requires: Before #include-ing this file, the including file must #define
// the following macros when applicable and follow their rules:
// 1.) #define FIRST_PASS if this is the first pass.
// 2.) #define LAST_PASS if this is the last pass.
// 3.) If sRGB is available, set srgb_framebufferN = "true" for
// every pass except the last in your .cgp preset.
// 4.) If sRGB isn't available but you want gamma-correctness with
// no banding, #define GAMMA_ENCODE_EVERY_FBO each pass.
// 5.) #define SIMULATE_CRT_ON_LCD if desired (precedence over 5-7)
// 6.) #define SIMULATE_GBA_ON_LCD if desired (precedence over 6-7)
// 7.) #define SIMULATE_LCD_ON_CRT if desired (precedence over 7)
// 8.) #define SIMULATE_GBA_ON_CRT if desired (precedence over -)
// If an option in [5, 8] is #defined in the first or last pass, it
// should be #defined for both. It shouldn't make a difference
// whether it's #defined for intermediate passes or not.
// Optional: The including file (or an earlier included file) may optionally
// #define a number of macros indicating it will override certain
// macros and associated constants are as follows:
// static constants with either static or uniform constants. The
// 1.) OVERRIDE_STANDARD_GAMMA: The user must first define:
// static const float ntsc_gamma
// static const float pal_gamma
// static const float crt_reference_gamma_high
// static const float crt_reference_gamma_low
// static const float lcd_reference_gamma
// static const float crt_office_gamma
// static const float lcd_office_gamma
// 2.) OVERRIDE_DEVICE_GAMMA: The user must first define:
// static const float crt_gamma
// static const float gba_gamma
// static const float lcd_gamma
// 3.) OVERRIDE_FINAL_GAMMA: The user must first define:
// static const float input_gamma
// static const float intermediate_gamma
// static const float output_gamma
// (intermediate_gamma is for GAMMA_ENCODE_EVERY_FBO.)
// 4.) OVERRIDE_ALPHA_ASSUMPTIONS: The user must first define:
// static const bool assume_opaque_alpha
// The gamma constant overrides must be used in every pass or none,
// and OVERRIDE_FINAL_GAMMA bypasses all of the SIMULATE* macros.
// OVERRIDE_ALPHA_ASSUMPTIONS may be set on a per-pass basis.
// Usage: After setting macros appropriately, ignore gamma correction and
// replace all tex*D*() calls with equivalent gamma-aware
// tex*D*_linearize calls, except:
// 1.) When you read an LUT, use regular tex*D or a gamma-specified
// function, depending on its gamma encoding:
// tex*D*_linearize_gamma (takes a runtime gamma parameter)
// 2.) If you must read pass0's original input in a later pass, use
// tex2D_linearize_ntsc_gamma. If you want to read pass0's
// input with gamma-corrected bilinear filtering, consider
// creating a first linearizing pass and reading from the input
// of pass1 later.
// Then, return encode_output(color) from every fragment shader.
// Finally, use the global gamma_aware_bilinear boolean if you want
// to statically branch based on whether bilinear filtering is
// gamma-correct or not (e.g. for placing Gaussian blur samples).
//
// Detailed Policy:
// tex*D*_linearize() functions enforce a consistent gamma-management policy
// based on the FIRST_PASS and GAMMA_ENCODE_EVERY_FBO settings. They assume
// their input texture has the same encoding characteristics as the input for
// the current pass (which doesn't apply to the exceptions listed above).
// Similarly, encode_output() enforces a policy based on the LAST_PASS and
// GAMMA_ENCODE_EVERY_FBO settings. Together, they result in one of the
// following two pipelines.
// Typical pipeline with intermediate sRGB framebuffers:
// linear_color = pow(pass0_encoded_color, input_gamma);
// intermediate_output = linear_color; // Automatic sRGB encoding
// linear_color = intermediate_output; // Automatic sRGB decoding
// final_output = pow(intermediate_output, 1.0/output_gamma);
// Typical pipeline without intermediate sRGB framebuffers:
// linear_color = pow(pass0_encoded_color, input_gamma);
// intermediate_output = pow(linear_color, 1.0/intermediate_gamma);
// linear_color = pow(intermediate_output, intermediate_gamma);
// final_output = pow(intermediate_output, 1.0/output_gamma);
// Using GAMMA_ENCODE_EVERY_FBO is much slower, but it's provided as a way to
// easily get gamma-correctness without banding on devices where sRGB isn't
// supported.
//
// Use This Header to Maximize Code Reuse:
// The purpose of this header is to provide a consistent interface for texture
// reads and output gamma-encoding that localizes and abstracts away all the
// annoying details. This greatly reduces the amount of code in each shader
// pass that depends on the pass number in the .cgp preset or whether sRGB
// FBO's are being used: You can trivially change the gamma behavior of your
// whole pass by commenting or uncommenting 1-3 #defines. To reuse the same
// code in your first, Nth, and last passes, you can even put it all in another
// header file and #include it from skeleton .cg files that #define the
// appropriate pass-specific settings.
//
// Rationale for Using Three Macros:
// This file uses GAMMA_ENCODE_EVERY_FBO instead of an opposite macro like
// SRGB_PIPELINE to ensure sRGB is assumed by default, which hopefully imposes
// a lower maintenance burden on each pass. At first glance it seems we could
// accomplish everything with two macros: GAMMA_CORRECT_IN / GAMMA_CORRECT_OUT.
// This works for simple use cases where input_gamma == output_gamma, but it
// breaks down for more complex scenarios like CRT simulation, where the pass
// number determines the gamma encoding of the input and output.
/////////////////////////////// BASE CONSTANTS ///////////////////////////////
// Set standard gamma constants, but allow users to override them:
#ifndef OVERRIDE_STANDARD_GAMMA
// Standard encoding gammas:
static const float ntsc_gamma = 2.2; // Best to use NTSC for PAL too?
static const float pal_gamma = 2.8; // Never actually 2.8 in practice
// Typical device decoding gammas (only use for emulating devices):
// CRT/LCD reference gammas are higher than NTSC and Rec.709 video standard
// gammas: The standards purposely undercorrected for an analog CRT's
// assumed 2.5 reference display gamma to maintain contrast in assumed
// [dark] viewing conditions: http://www.poynton.com/PDFs/GammaFAQ.pdf
// These unstated assumptions about display gamma and perceptual rendering
// intent caused a lot of confusion, and more modern CRT's seemed to target
// NTSC 2.2 gamma with circuitry. LCD displays seem to have followed suit
// (they struggle near black with 2.5 gamma anyway), especially PC/laptop
// displays designed to view sRGB in bright environments. (Standards are
// also in flux again with BT.1886, but it's underspecified for displays.)
static const float crt_reference_gamma_high = 2.5; // In (2.35, 2.55)
static const float crt_reference_gamma_low = 2.35; // In (2.35, 2.55)
static const float lcd_reference_gamma = 2.5; // To match CRT
static const float crt_office_gamma = 2.2; // Circuitry-adjusted for NTSC
static const float lcd_office_gamma = 2.2; // Approximates sRGB
#endif // OVERRIDE_STANDARD_GAMMA
// Assuming alpha == 1.0 might make it easier for users to avoid some bugs,
// but only if they're aware of it.
#ifndef OVERRIDE_ALPHA_ASSUMPTIONS
static const bool assume_opaque_alpha = false;
#endif
/////////////////////// DERIVED CONSTANTS AS FUNCTIONS ///////////////////////
// gamma-management.h should be compatible with overriding gamma values with
// runtime user parameters, but we can only define other global constants in
// terms of static constants, not uniform user parameters. To get around this
// limitation, we need to define derived constants using functions.
// Set device gamma constants, but allow users to override them:
#ifdef OVERRIDE_DEVICE_GAMMA
// The user promises to globally define the appropriate constants:
float get_crt_gamma() { return crt_gamma; }
float get_gba_gamma() { return gba_gamma; }
float get_lcd_gamma() { return lcd_gamma; }
#else
float get_crt_gamma() { return crt_reference_gamma_high; }
float get_gba_gamma() { return 3.5; } // Game Boy Advance; in (3.0, 4.0)
float get_lcd_gamma() { return lcd_office_gamma; }
#endif // OVERRIDE_DEVICE_GAMMA
// Set decoding/encoding gammas for the first/lass passes, but allow overrides:
#ifdef OVERRIDE_FINAL_GAMMA
// The user promises to globally define the appropriate constants:
float get_intermediate_gamma() { return intermediate_gamma; }
float get_input_gamma() { return input_gamma; }
float get_output_gamma() { return output_gamma; }
#else
// If we gamma-correct every pass, always use ntsc_gamma between passes to
// ensure middle passes don't need to care if anything is being simulated:
float get_intermediate_gamma() { return ntsc_gamma; }
#ifdef SIMULATE_CRT_ON_LCD
float get_input_gamma() { return get_crt_gamma(); }
float get_output_gamma() { return get_lcd_gamma(); }
#else
#ifdef SIMULATE_GBA_ON_LCD
float get_input_gamma() { return get_gba_gamma(); }
float get_output_gamma() { return get_lcd_gamma(); }
#else
#ifdef SIMULATE_LCD_ON_CRT
float get_input_gamma() { return get_lcd_gamma(); }
float get_output_gamma() { return get_crt_gamma(); }
#else
#ifdef SIMULATE_GBA_ON_CRT
float get_input_gamma() { return get_gba_gamma(); }
float get_output_gamma() { return get_crt_gamma(); }
#else // Don't simulate anything:
float get_input_gamma() { return ntsc_gamma; }
float get_output_gamma() { return ntsc_gamma; }
#endif // SIMULATE_GBA_ON_CRT
#endif // SIMULATE_LCD_ON_CRT
#endif // SIMULATE_GBA_ON_LCD
#endif // SIMULATE_CRT_ON_LCD
#endif // OVERRIDE_FINAL_GAMMA
// Set decoding/encoding gammas for the current pass. Use static constants for
// linearize_input and gamma_encode_output, because they aren't derived, and
// they let the compiler do dead-code elimination.
#ifndef GAMMA_ENCODE_EVERY_FBO
#ifdef FIRST_PASS
static const bool linearize_input = true;
float get_pass_input_gamma() { return get_input_gamma(); }
#else
static const bool linearize_input = false;
float get_pass_input_gamma() { return 1.0; }
#endif
#ifdef LAST_PASS
static const bool gamma_encode_output = true;
float get_pass_output_gamma() { return get_output_gamma(); }
#else
static const bool gamma_encode_output = false;
float get_pass_output_gamma() { return 1.0; }
#endif
#else
static const bool linearize_input = true;
static const bool gamma_encode_output = true;
#ifdef FIRST_PASS
float get_pass_input_gamma() { return get_input_gamma(); }
#else
float get_pass_input_gamma() { return get_intermediate_gamma(); }
#endif
#ifdef LAST_PASS
float get_pass_output_gamma() { return get_output_gamma(); }
#else
float get_pass_output_gamma() { return get_intermediate_gamma(); }
#endif
#endif
// Users might want to know if bilinear filtering will be gamma-correct:
static const bool gamma_aware_bilinear = !linearize_input;
////////////////////// COLOR ENCODING/DECODING FUNCTIONS /////////////////////
float4 encode_output(const float4 color)
{
if(gamma_encode_output)
{
if(assume_opaque_alpha)
{
return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), 1.0);
}
else
{
return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), color.a);
}
}
else
{
return color;
}
}
float4 decode_input(const float4 color)
{
return color;
}
float4 decode_input_first(const float4 color)
{
if(assume_opaque_alpha)
{
return float4(pow(color.rgb, get_input_gamma()), 1.0);
}
else
{
return float4(pow(color.rgb, get_input_gamma()), color.a);
}
}
float4 decode_gamma_input(const float4 color, const float3 gamma)
{
if(assume_opaque_alpha)
{
return float4(pow(color.rgb, gamma), 1.0);
}
else
{
return float4(pow(color.rgb, gamma), color.a);
}
}
/////////////////////////// TEXTURE LOOKUP WRAPPERS //////////////////////////
// "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// Provide a wide array of linearizing texture lookup wrapper functions. The
// Cg shader spec Retroarch uses only allows for 2D textures, but 1D and 3D
// lookups are provided for completeness in case that changes someday. Nobody
// is likely to use the *fetch and *proj functions, but they're included just
// in case. The only tex*D texture sampling functions omitted are:
// - tex*Dcmpbias
// - tex*Dcmplod
// - tex*DARRAY*
// - tex*DMS*
// - Variants returning integers
// Standard line length restrictions are ignored below for vertical brevity.
/*
// tex1D:
float4 tex1D_linearize(const sampler1D tex, const float tex_coords)
{ return decode_input(tex1D(tex, tex_coords)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords)
{ return decode_input(tex1D(tex, tex_coords)); }
float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, texel_off)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, texel_off)); }
float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy)
{ return decode_input(tex1D(tex, tex_coords, dx, dy)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy)
{ return decode_input(tex1D(tex, tex_coords, dx, dy)); }
float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off)); }
// tex1Dbias:
float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords)
{ return decode_input(tex1Dbias(tex, tex_coords)); }
float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex1Dbias(tex, tex_coords, texel_off)); }
// tex1Dfetch:
float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords)
{ return decode_input(tex1Dfetch(tex, tex_coords)); }
float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords, const int texel_off)
{ return decode_input(tex1Dfetch(tex, tex_coords, texel_off)); }
// tex1Dlod:
float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords)
{ return decode_input(tex1Dlod(tex, tex_coords)); }
float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex1Dlod(tex, tex_coords, texel_off)); }
// tex1Dproj:
float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords)
{ return decode_input(tex1Dproj(tex, tex_coords)); }
float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords)
{ return decode_input(tex1Dproj(tex, tex_coords)); }
float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off)
{ return decode_input(tex1Dproj(tex, tex_coords, texel_off)); }
float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords, const int texel_off)
{ return decode_input(tex1Dproj(tex, tex_coords, texel_off)); }
*/
// tex2D:
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords)
{ return decode_input(tex2D(tex, tex_coords)); }
float4 tex2D_linearize_first(const sampler2D tex, const float2 tex_coords)
{ return decode_input_first(tex2D(tex, tex_coords)); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords)
{ return decode_input(tex2D(tex, tex_coords.xy)); }
//float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off)
//{ return decode_input(tex2D(tex, tex_coords, texel_off)); }
//float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off)
//{ return decode_input(tex2D(tex, tex_coords.xy, texel_off)); }
/*
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy)
{ return decode_input(tex2D(tex, tex_coords, dx, dy)); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy)
{ return decode_input(tex2D(tex, tex_coords, dx, dy)); }
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off)
{ return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off)); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off)
{ return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off)); }
// tex2Dbias:
float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords)
{ return decode_input(tex2Dbias(tex, tex_coords)); }
float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex2Dbias(tex, tex_coords, texel_off)); }
// tex2Dfetch:
float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords)
{ return decode_input(tex2Dfetch(tex, tex_coords)); }
float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords, const int texel_off)
{ return decode_input(tex2Dfetch(tex, tex_coords, texel_off)); }
*/
// tex2Dlod:
float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords)
{ return decode_input(tex2Dlod(tex, tex_coords)); }
//float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
//{ return decode_input(tex2Dlod(tex, tex_coords, texel_off)); }
/*
// tex2Dproj:
float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords)
{ return decode_input(tex2Dproj(tex, tex_coords)); }
float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords)
{ return decode_input(tex2Dproj(tex, tex_coords)); }
float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off)
{ return decode_input(tex2Dproj(tex, tex_coords, texel_off)); }
float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex2Dproj(tex, tex_coords, texel_off)); }
// tex3D:
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords)
{ return decode_input(tex3D(tex, tex_coords)); }
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const int texel_off)
{ return decode_input(tex3D(tex, tex_coords, texel_off)); }
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy)
{ return decode_input(tex3D(tex, tex_coords, dx, dy)); }
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy, const int texel_off)
{ return decode_input(tex3D(tex, tex_coords, dx, dy, texel_off)); }
// tex3Dbias:
float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords)
{ return decode_input(tex3Dbias(tex, tex_coords)); }
float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex3Dbias(tex, tex_coords, texel_off)); }
// tex3Dfetch:
float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords)
{ return decode_input(tex3Dfetch(tex, tex_coords)); }
float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords, const int texel_off)
{ return decode_input(tex3Dfetch(tex, tex_coords, texel_off)); }
// tex3Dlod:
float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords)
{ return decode_input(tex3Dlod(tex, tex_coords)); }
float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex3Dlod(tex, tex_coords, texel_off)); }
// tex3Dproj:
float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords)
{ return decode_input(tex3Dproj(tex, tex_coords)); }
float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex3Dproj(tex, tex_coords, texel_off)); }
// NONSTANDARD "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// This narrow selection of nonstandard tex2D* functions can be useful:
// tex2Dlod0: Automatically fill in the tex2D LOD parameter for mip level 0.
float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords)
{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0))); }
float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off)
{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0), texel_off)); }
// MANUALLY LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// Provide a narrower selection of tex2D* wrapper functions that decode an
// input sample with a specified gamma value. These are useful for reading
// LUT's and for reading the input of pass0 in a later pass.
// tex2D:
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma); }
// tex2Dbias:
float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2Dbias(tex, tex_coords), gamma); }
float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2Dbias(tex, tex_coords, texel_off), gamma); }
// tex2Dfetch:
float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2Dfetch(tex, tex_coords), gamma); }
float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2Dfetch(tex, tex_coords, texel_off), gamma); }
*/
// tex2Dlod:
float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2Dlod(tex, tex_coords), gamma); }
//float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma)
//{ return decode_gamma_input(tex2Dlod(tex, tex_coords, texel_off), gamma); }
#endif // GAMMA_MANAGEMENT_H

View File

@@ -1,76 +0,0 @@
#ifndef _HELPER_FUNCTIONS_AND_MACROS_H
#define _HELPER_FUNCTIONS_AND_MACROS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
float4 tex2D_nograd(sampler2D tex, float2 tex_coords)
{
return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0);
}
// ReShade 4 does not permit the use of functions or the ternary operator
// outside of a function definition. This is a problem for this port
// because the original crt-royale shader makes heavy use of these
// constructs at the root level.
// These preprocessor definitions are a workaround for this limitation.
// Note that they are strictly intended for defining complex global
// constants. I doubt they're more performant than the built-in
// equivalents, so I recommend using the built-ins whenever you can.
#define macro_sign(c) -((int) ((c) != 0)) * -((int) ((c) > 0))
#define macro_abs(c) (c) * macro_sign(c)
#define macro_min(c, d) (c) * ((int) ((c) <= (d))) + (d) * ((int) ((c) > (d)))
#define macro_max(c, d) (c) * ((int) ((c) >= (d))) + (d) * ((int) ((c) < (d)))
#define macro_clamp(c, l, u) macro_min(macro_max(c, l), u)
#define macro_ceil(c) (float) ((int) (c) + (int) (((int) (c)) < (c)))
#define macro_cond(c, a, b) float(c) * (a) + float(!(c)) * (b)
//////////////////////// COMMON MATHEMATICAL CONSTANTS ///////////////////////
static const float pi = 3.141592653589;
// We often want to find the location of the previous texel, e.g.:
// const float2 curr_texel = uv * texture_size;
// const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5);
// const float2 prev_texel_uv = prev_texel / texture_size;
// However, many GPU drivers round incorrectly around exact texel locations.
// We need to subtract a little less than 0.5 before flooring, and some GPU's
// require this value to be farther from 0.5 than others; define it here.
// const float2 prev_texel =
// floor(curr_texel - float2(under_half)) + float2(0.5);
static const float under_half = 0.4995;
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
#define FIX_ZERO(c) (macro_max(macro_abs(c), 0.0000152587890625)) // 2^-16
// #define fmod(x, y) ((x) - (y) * floor((x)/(y) + FIX_ZERO(0.0)))
#define fmod(x, y) (frac((x) / (y)) * (y))
#endif // _HELPER_FUNCTIONS_AND_MACROS_H

View File

@@ -1,676 +0,0 @@
#ifndef PHOSPHOR_MASK_RESIZING_H
#define PHOSPHOR_MASK_RESIZING_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
///////////////////////////// CODEPATH SELECTION /////////////////////////////
// Choose a looping strategy based on what's allowed:
// Dynamic loops not allowed: Use a flat static loop.
// Dynamic loops accomodated: Coarsely branch around static loops.
// Dynamic loops assumed allowed: Use a flat dynamic loop.
#ifndef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#ifdef ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
#define BREAK_LOOPS_INTO_PIECES
#else
#define USE_SINGLE_STATIC_LOOP
#endif
#endif // No else needed: Dynamic loops assumed.
////////////////////////////////// CONSTANTS /////////////////////////////////
// The larger the resized tile, the fewer samples we'll need for downsizing.
// See if we can get a static min tile size > mask_min_allowed_tile_size:
static const float mask_min_allowed_tile_size = macro_ceil(
mask_min_allowed_triad_size * mask_triads_per_tile);
static const float mask_min_expected_tile_size =
mask_min_allowed_tile_size;
// Limit the number of sinc resize taps by the maximum minification factor:
static const float pi_over_lobes = pi/mask_sinc_lobes;
static const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes *
mask_resize_src_lut_size.x/mask_min_expected_tile_size;
// Vectorized loops sample in multiples of 4. Round up to be safe:
static const float max_sinc_resize_samples_m4 = macro_ceil(
max_sinc_resize_samples_float * 0.25) * 4.0;
///////////////////////// RESAMPLING FUNCTION HELPERS ////////////////////////
float get_dynamic_loop_size(const float magnification_scale)
{
// Requires: The following global constants must be defined:
// 1.) mask_sinc_lobes
// 2.) max_sinc_resize_samples_m4
// Returns: The minimum number of texture samples for a correct downsize
// at magnification_scale.
// We're downsizing, so the filter is sized across 2*lobes output pixels
// (not 2*lobes input texels). This impacts distance measurements and the
// minimum number of input samples needed.
const float min_samples_float = 2.0 * mask_sinc_lobes / magnification_scale;
const float min_samples_m4 = ceil(min_samples_float * 0.25) * 4.0;
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
const float max_samples_m4 = max_sinc_resize_samples_m4;
#else // ifdef BREAK_LOOPS_INTO_PIECES
// Simulating loops with branches imposes a 128-sample limit.
const float max_samples_m4 = min(128.0, max_sinc_resize_samples_m4);
#endif
return min(min_samples_m4, max_samples_m4);
}
float2 get_first_texel_tile_uv_and_dist(const float2 tex_uv,
const float2 texture_size, const float dr,
const float input_tiles_per_texture_r, const float samples,
const bool vertical)
{
// Requires: 1.) dr == du == 1.0/texture_size.x or
// dr == dv == 1.0/texture_size.y
// (whichever direction we're resampling in).
// It's a scalar to save register space.
// 2.) input_tiles_per_texture_r is the number of input tiles
// that can fit in the input texture in the direction we're
// resampling this pass.
// 3.) vertical indicates whether we're resampling vertically
// this pass (or horizontally).
// Returns: Pack and return the first sample's tile_uv coord in [0, 1]
// and its texel distance from the destination pixel, in the
// resized dimension only.
// We'll start with the topmost or leftmost sample and work down or right,
// so get the first sample location and distance. Modify both dimensions
// as if we're doing a one-pass 2D resize; we'll throw away the unneeded
// (and incorrect) dimension at the end.
const float2 curr_texel = tex_uv * texture_size;
const float2 prev_texel = floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 first_texel = prev_texel - float2(samples.xx/2.0.xx - 1.0.xx);
const float2 first_texel_uv_wrap_2D = first_texel * dr;
const float2 first_texel_dist_2D = curr_texel - first_texel;
// Convert from tex_uv to tile_uv coords so we can sub fracs for fmods.
const float2 first_texel_tile_uv_wrap_2D =
first_texel_uv_wrap_2D * input_tiles_per_texture_r;
// Project wrapped coordinates to the [0, 1] range. We'll do this with all
// samples,but the first texel is special, since it might be negative.
const float2 coord_negative =
float2(first_texel_tile_uv_wrap_2D < 0.0.xx);
const float2 first_texel_tile_uv_2D =
frac(first_texel_tile_uv_wrap_2D) + coord_negative;
// Pack the first texel's tile_uv coord and texel distance in 1D:
const float2 tile_u_and_dist =
float2(first_texel_tile_uv_2D.x, first_texel_dist_2D.x);
const float2 tile_v_and_dist =
float2(first_texel_tile_uv_2D.y, first_texel_dist_2D.y);
return vertical ? tile_v_and_dist : tile_u_and_dist;
//return lerp(tile_u_and_dist, tile_v_and_dist, float(vertical));
}
float4 tex2Dlod0try(const sampler2D tex, const float2 tex_uv)
{
// Mipmapping and anisotropic filtering get confused by sinc-resampling.
// One [slow] workaround is to select the lowest mip level:
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
return tex2Dlod(tex, float4(tex_uv, 0.0, 0.0));
#else
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
return tex2Dbias(tex, float4(tex_uv, 0.0, -16.0));
#else
return tex2D(tex, tex_uv);
#endif
#endif
}
////////////////////////////// LOOP BODY MACROS //////////////////////////////
// Using functions can exceed the temporary register limit, so we're
// stuck with #define macros (I'm TRULY sorry). They're declared here instead
// of above to be closer to the actual invocation sites. Steps:
// 1.) Get the exact texel location.
// 2.) Sample the phosphor mask (already assumed encoded in linear RGB).
// 3.) Get the distance from the current pixel and sinc weight:
// sinc(dist) = sin(pi * dist)/(pi * dist)
// We can also use the slower/smoother Lanczos instead:
// L(x) = sinc(dist) * sinc(dist / lobes)
// 4.) Accumulate the weight sum in weights, and accumulate the weighted texels
// in pixel_color (we'll normalize outside the loop at the end).
// We vectorize the loop to help reduce the Lanczos window's cost.
// The r coord is the coord in the dimension we're resizing along (u or v),
// and first_texel_tile_uv_rrrr is a float4 of the first texel's u or v
// tile_uv coord in [0, 1]. tex_uv_r will contain the tile_uv u or v coord
// for four new texel samples.
#define CALCULATE_R_COORD_FOR_4_SAMPLES \
const float4 true_i = float4(i_base + i,i_base + i,i_base + i,i_base + i) + float4(0.0, 1.0, 2.0, 3.0); \
const float4 tile_uv_r = frac( \
first_texel_tile_uv_rrrr + true_i * tile_dr); \
const float4 tex_uv_r = tile_uv_r * tile_size_uv_r;
#ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
const float4 pi_dist_over_lobes = pi_over_lobes * dist; \
const float4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
(pi_dist*pi_dist_over_lobes), 1.0.xxxx);
#else
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
const float4 weights = min(sin(pi_dist)/pi_dist, 1.0.xxxx);
#endif
#define UPDATE_COLOR_AND_WEIGHT_SUMS \
const float4 dist = magnification_scale * \
abs(first_dist_unscaled - true_i); \
const float4 pi_dist = pi * dist; \
CALCULATE_SINC_RESAMPLE_WEIGHTS; \
pixel_color += new_sample0 * weights.xxx; \
pixel_color += new_sample1 * weights.yyy; \
pixel_color += new_sample2 * weights.zzz; \
pixel_color += new_sample3 * weights.www; \
weight_sum += weights;
#define VERTICAL_SINC_RESAMPLE_LOOP_BODY \
CALCULATE_R_COORD_FOR_4_SAMPLES; \
const float3 new_sample0 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.x)).rgb; \
const float3 new_sample1 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.y)).rgb; \
const float3 new_sample2 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.z)).rgb; \
const float3 new_sample3 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.w)).rgb; \
UPDATE_COLOR_AND_WEIGHT_SUMS;
#define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY \
CALCULATE_R_COORD_FOR_4_SAMPLES; \
const float3 new_sample0 = tex2Dlod0try(tex, \
float2(tex_uv_r.x, tex_uv.y)).rgb; \
const float3 new_sample1 = tex2Dlod0try(tex, \
float2(tex_uv_r.y, tex_uv.y)).rgb; \
const float3 new_sample2 = tex2Dlod0try(tex, \
float2(tex_uv_r.z, tex_uv.y)).rgb; \
const float3 new_sample3 = tex2Dlod0try(tex, \
float2(tex_uv_r.w, tex_uv.y)).rgb; \
UPDATE_COLOR_AND_WEIGHT_SUMS;
//////////////////////////// RESAMPLING FUNCTIONS ////////////////////////////
float3 downsample_vertical_sinc_tiled(const sampler2D tex,
const float2 tex_uv, const float2 texture_size, const float dr,
const float magnification_scale, const float tile_size_uv_r)
{
// Requires: 1.) dr == du == 1.0/texture_size.x or
// dr == dv == 1.0/texture_size.y
// (whichever direction we're resampling in).
// It's a scalar to save register space.
// 2.) tile_size_uv_r is the number of texels an input tile
// takes up in the input texture, in the direction we're
// resampling this pass.
// 3.) magnification_scale must be <= 1.0.
// Returns: Return a [Lanczos] sinc-resampled pixel of a vertically
// downsized input tile embedded in an input texture. (The
// vertical version is special-cased though: It assumes the
// tile size equals the [static] texture size, since it's used
// on an LUT texture input containing one tile. For more
// generic use, eliminate the "static" in the parameters.)
// The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
// we're resizing along, e.g. "dy" in this case.
#ifdef USE_SINGLE_STATIC_LOOP
// A static loop can be faster, but it might blur too much from using
// more samples than it should.
static const int samples = int(max_sinc_resize_samples_m4);
#else
const int samples = int(get_dynamic_loop_size(magnification_scale));
#endif
// Get the first sample location (scalar tile uv coord along the resized
// dimension) and distance from the output location (in texels):
static const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
// true = vertical resize:
const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true);
const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
// Get the tile sample offset:
static const float tile_dr = dr * input_tiles_per_texture_r;
// Sum up each weight and weighted sample color, varying the looping
// strategy based on our expected dynamic loop capabilities. See the
// loop body macros above.
int i_base = 0;
float4 weight_sum = 0.0.xxxx;
float3 pixel_color = 0.0.xxx;
static const int i_step = 4;
#ifdef BREAK_LOOPS_INTO_PIECES
if(samples - i_base >= 64)
{
for(int i = 0; i < 64; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 64;
}
if(samples - i_base >= 32)
{
for(int i = 0; i < 32; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 32;
}
if(samples - i_base >= 16)
{
for(int i = 0; i < 16; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 16;
}
if(samples - i_base >= 8)
{
for(int i = 0; i < 8; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 8;
}
if(samples - i_base >= 4)
{
for(int i = 0; i < 4; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 4;
}
// Do another 4-sample block for a total of 128 max samples.
if(samples - i_base > 0)
{
for(int i = 0; i < 4; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
}
#else
for(int i = 0; i < samples; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
#endif
// Normalize so the weight_sum == 1.0, and return:
const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx +
weight_sum_reduce.yyy);
return (pixel_color/scalar_weight_sum);
}
float3 downsample_horizontal_sinc_tiled(const sampler2D tex,
const float2 tex_uv, const float2 texture_size, const float dr,
const float magnification_scale, const float tile_size_uv_r)
{
// Differences from downsample_horizontal_sinc_tiled:
// 1.) The dr and tile_size_uv_r parameters are not static consts.
// 2.) The "vertical" parameter to get_first_texel_tile_uv_and_dist is
// set to false instead of true.
// 3.) The horizontal version of the loop body is used.
// TODO: If we can get guaranteed compile-time dead code elimination,
// we can combine the vertical/horizontal downsampling functions by:
// 1.) Add an extra static const bool parameter called "vertical."
// 2.) Supply it with the result of get_first_texel_tile_uv_and_dist().
// 3.) Use a conditional assignment in the loop body macro. This is the
// tricky part: We DO NOT want to incur the extra conditional
// assignment in the inner loop at runtime!
// The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
// we're resizing along, e.g. "dx" in this case.
#ifdef USE_SINGLE_STATIC_LOOP
// If we have to load all samples, we might as well use them.
static const int samples = int(max_sinc_resize_samples_m4);
#else
const int samples = int(get_dynamic_loop_size(magnification_scale));
#endif
// Get the first sample location (scalar tile uv coord along resized
// dimension) and distance from the output location (in texels):
const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
// false = horizontal resize:
const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, false);
const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
// Get the tile sample offset:
const float tile_dr = dr * input_tiles_per_texture_r;
// Sum up each weight and weighted sample color, varying the looping
// strategy based on our expected dynamic loop capabilities. See the
// loop body macros above.
int i_base = 0;
float4 weight_sum = 0.0.xxxx;
float3 pixel_color = 0.0.xxx;
static const int i_step = 4;
#ifdef BREAK_LOOPS_INTO_PIECES
if(samples - i_base >= 64)
{
for(int i = 0; i < 64; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 64;
}
if(samples - i_base >= 32)
{
for(int i = 0; i < 32; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 32;
}
if(samples - i_base >= 16)
{
for(int i = 0; i < 16; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 16;
}
if(samples - i_base >= 8)
{
for(int i = 0; i < 8; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 8;
}
if(samples - i_base >= 4)
{
for(int i = 0; i < 4; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 4;
}
// Do another 4-sample block for a total of 128 max samples.
if(samples - i_base > 0)
{
for(int i = 0; i < 4; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
}
#else
for(int i = 0; i < samples; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
#endif
// Normalize so the weight_sum == 1.0, and return:
const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx +
weight_sum_reduce.yyy);
return (pixel_color/scalar_weight_sum);
}
//////////////////////////// TILE SIZE CALCULATION ///////////////////////////
float2 get_resized_mask_tile_size(const float2 estimated_viewport_size,
const float2 estimated_mask_resize_output_size,
const bool solemnly_swear_same_inputs_for_every_pass)
{
// Requires: The following global constants must be defined according to
// certain constraints:
// 1.) mask_resize_num_triads: Must be high enough that our
// mask sampling method won't have artifacts later
// (long story; see derived-settings-and-constants.h)
// 2.) mask_resize_src_lut_size: Texel size of our mask LUT
// 3.) mask_triads_per_tile: Num horizontal triads in our LUT
// 4.) mask_min_allowed_triad_size: User setting (the more
// restrictive it is, the faster the resize will go)
// 5.) mask_min_allowed_tile_size_x < mask_resize_src_lut_size.x
// 6.) mask_triad_size_desired_{runtime, static}
// 7.) mask_num_triads_desired_{runtime, static}
// 8.) mask_specify_num_triads must be 0.0/1.0 (false/true)
// The function parameters must be defined as follows:
// 1.) estimated_viewport_size == (final viewport size);
// If mask_specify_num_triads is 1.0/true and the viewport
// estimate is wrong, the number of triads will differ from
// the user's preference by about the same factor.
// 2.) estimated_mask_resize_output_size: Must equal the
// output size of the MASK_RESIZE pass.
// Exception: The x component may be estimated garbage if
// and only if the caller throws away the x result.
// 3.) solemnly_swear_same_inputs_for_every_pass: Set to false,
// unless you can guarantee that every call across every
// pass will use the same sizes for the other parameters.
// When calling this across multiple passes, always use the
// same y viewport size/scale, and always use the same x
// viewport size/scale when using the x result.
// Returns: Return the final size of a manually resized mask tile, after
// constraining the desired size to avoid artifacts. Under
// unusual circumstances, tiles may become stretched vertically
// (see wall of text below).
// Stated tile properties must be correct:
static const float tile_aspect_ratio_inv =
mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
static const float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
static const float2 tile_aspect = float2(1.0, tile_aspect_ratio_inv);
// If mask_specify_num_triads is 1.0/true and estimated_viewport_size.x is
// wrong, the user preference will be misinterpreted:
const float desired_tile_size_x = mask_triads_per_tile * lerp(
mask_triad_size_desired,
estimated_viewport_size.x / mask_num_triads_desired,
mask_specify_num_triads);
if(get_mask_sample_mode() > 0.5)
{
// We don't need constraints unless we're sampling MASK_RESIZE.
return desired_tile_size_x * tile_aspect;
}
// Make sure we're not upsizing:
const float temp_tile_size_x =
min(desired_tile_size_x, mask_resize_src_lut_size.x);
// Enforce min_tile_size and max_tile_size in both dimensions:
const float2 temp_tile_size = temp_tile_size_x * tile_aspect;
static const float2 min_tile_size =
mask_min_allowed_tile_size * tile_aspect;
const float2 max_tile_size =
estimated_mask_resize_output_size / mask_resize_num_tiles;
const float2 clamped_tile_size =
clamp(temp_tile_size, min_tile_size, max_tile_size);
// Try to maintain tile_aspect_ratio. This is the tricky part:
// If we're currently resizing in the y dimension, the x components
// could be MEANINGLESS. (If estimated_mask_resize_output_size.x is
// bogus, then so is max_tile_size.x and clamped_tile_size.x.)
// We can't adjust the y size based on clamped_tile_size.x. If it
// clamps when it shouldn't, it won't clamp again when later passes
// call this function with the correct sizes, and the discrepancy will
// break the sampling coords in MASKED_SCANLINES. Instead, we'll limit
// the x size based on the y size, but not vice versa, unless the
// caller swears the parameters were the same (correct) in every pass.
// As a result, triads could appear vertically stretched if:
// a.) mask_resize_src_lut_size.x > mask_resize_src_lut_size.y: Wide
// LUT's might clamp x more than y (all provided LUT's are square)
// b.) true_viewport_size.x < true_viewport_size.y: The user is playing
// with a vertically oriented screen (not accounted for anyway)
// c.) mask_resize_viewport_scale.x < masked_resize_viewport_scale.y:
// Viewport scales are equal by default.
// If any of these are the case, you can fix the stretching by setting:
// mask_resize_viewport_scale.x = mask_resize_viewport_scale.y *
// (1.0 / min_expected_aspect_ratio) *
// (mask_resize_src_lut_size.x / mask_resize_src_lut_size.y)
const float x_tile_size_from_y =
clamped_tile_size.y * tile_aspect_ratio;
const float y_tile_size_from_x = lerp(clamped_tile_size.y,
clamped_tile_size.x * tile_aspect_ratio_inv,
float(solemnly_swear_same_inputs_for_every_pass));
const float2 reclamped_tile_size = float2(
min(clamped_tile_size.x, x_tile_size_from_y),
min(clamped_tile_size.y, y_tile_size_from_x));
// We need integer tile sizes in both directions for tiled sampling to
// work correctly. Use floor (to make sure we don't round up), but be
// careful to avoid a rounding bug where floor decreases whole numbers:
const float2 final_resized_tile_size =
floor(reclamped_tile_size + float2(FIX_ZERO(0.0),FIX_ZERO(0.0)));
return final_resized_tile_size;
}
///////////////////////// FINAL MASK SAMPLING HELPERS ////////////////////////
float4 get_mask_sampling_parameters(const float2 mask_resize_texture_size,
const float2 mask_resize_video_size, const float2 true_viewport_size,
out float2 mask_tiles_per_screen)
{
// Requires: 1.) Requirements of get_resized_mask_tile_size() must be
// met, particularly regarding global constants.
// The function parameters must be defined as follows:
// 1.) mask_resize_texture_size == MASK_RESIZE.texture_size
// if get_mask_sample_mode() is 0 (otherwise anything)
// 2.) mask_resize_video_size == MASK_RESIZE.video_size
// if get_mask_sample_mode() is 0 (otherwise anything)
// 3.) true_viewport_size == IN.output_size for a pass set to
// 1.0 viewport scale (i.e. it must be correct)
// Returns: Return a float4 containing:
// xy: tex_uv coords for the start of the mask tile
// zw: tex_uv size of the mask tile from start to end
// mask_tiles_per_screen is an out parameter containing the
// number of mask tiles that will fit on the screen.
// First get the final resized tile size. The viewport size and mask
// resize viewport scale must be correct, but don't solemnly swear they
// were correct in both mask resize passes unless you know it's true.
// (We can better ensure a correct tile aspect ratio if the parameters are
// guaranteed correct in all passes...but if we lie, we'll get inconsistent
// sizes across passes, resulting in broken texture coordinates.)
const float mask_sample_mode = get_mask_sample_mode();
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
true_viewport_size, mask_resize_video_size, false);
if(mask_sample_mode < 0.5)
{
// Sample MASK_RESIZE: The resized tile is a fraction of the texture
// size and starts at a nonzero offset to allow for border texels:
const float2 mask_tile_uv_size = mask_resize_tile_size /
mask_resize_texture_size;
const float2 skipped_tiles = mask_start_texels/mask_resize_tile_size;
const float2 mask_tile_start_uv = skipped_tiles * mask_tile_uv_size;
// mask_tiles_per_screen must be based on the *true* viewport size:
mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
return float4(mask_tile_start_uv, mask_tile_uv_size);
}
else
{
// If we're tiling at the original size (1:1 pixel:texel), redefine a
// "tile" to be the full texture containing many triads. Otherwise,
// we're hardware-resampling an LUT, and the texture truly contains a
// single unresized phosphor mask tile anyway.
static const float2 mask_tile_uv_size = 1.0.xx;
static const float2 mask_tile_start_uv = 0.0.xx;
if(mask_sample_mode > 1.5)
{
// Repeat the full LUT at a 1:1 pixel:texel ratio without resizing:
mask_tiles_per_screen = true_viewport_size/mask_texture_large_size;
}
else
{
// Hardware-resize the original LUT:
mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
}
return float4(mask_tile_start_uv, mask_tile_uv_size);
}
}
float2 fix_tiling_discontinuities_normalized(const float2 tile_uv,
float2 duv_dx, float2 duv_dy)
{
// Requires: 1.) duv_dx == ddx(tile_uv)
// 2.) duv_dy == ddy(tile_uv)
// 3.) tile_uv contains tile-relative uv coords in [0, 1],
// such that (0.5, 0.5) is the center of a tile, etc.
// ("Tile" can mean texture, the video embedded in the
// texture, or some other "tile" embedded in a texture.)
// Returns: Return new tile_uv coords that contain no discontinuities
// across a 2x2 pixel quad.
// Description:
// When uv coords wrap from 1.0 to 0.0, they create a discontinuity in the
// derivatives, which we assume happened if the absolute difference between
// any fragment in a 2x2 block is > ~half a tile. If the current block has
// a u or v discontinuity and the current fragment is in the first half of
// the tile along that axis (i.e. it wrapped from 1.0 to 0.0), add a tile
// to that coord to make the 2x2 block continuous. (It will now have a
// coord > 1.0 in the padding area beyond the tile.) This function takes
// derivatives as parameters so the caller can reuse them.
// In case we're using high-quality (nVidia-style) derivatives, ensure
// diagonically opposite fragments see each other for correctness:
duv_dx = abs(duv_dx) + abs(ddy(duv_dx));
duv_dy = abs(duv_dy) + abs(ddx(duv_dy));
const float2 pixel_in_first_half_tile = float2(tile_uv < 0.5.xx);
const float2 jump_exists = float2(duv_dx + duv_dy > 0.5.xx);
return tile_uv + jump_exists * pixel_in_first_half_tile;
}
float2 convert_phosphor_tile_uv_wrap_to_tex_uv(const float2 tile_uv_wrap,
const float4 mask_tile_start_uv_and_size)
{
// Requires: 1.) tile_uv_wrap contains tile-relative uv coords, where the
// tile spans from [0, 1], such that (0.5, 0.5) is at the
// tile center. The input coords can range from [0, inf],
// and their fractional parts map to a repeated tile.
// ("Tile" can mean texture, the video embedded in the
// texture, or some other "tile" embedded in a texture.)
// 2.) mask_tile_start_uv_and_size.xy contains tex_uv coords
// for the start of the embedded tile in the full texture.
// 3.) mask_tile_start_uv_and_size.zw contains the [fractional]
// tex_uv size of the embedded tile in the full texture.
// Returns: Return tex_uv coords (used for texture sampling)
// corresponding to tile_uv_wrap.
if(get_mask_sample_mode() < 0.5)
{
// Manually repeat the resized mask tile to fill the screen:
// First get fractional tile_uv coords. Using frac/fmod on coords
// confuses anisotropic filtering; fix it as user options dictate.
// derived-settings-and-constants.h disables incompatible options.
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
float2 tile_uv = frac(tile_uv_wrap * 0.5) * 2.0;
#else
float2 tile_uv = frac(tile_uv_wrap);
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
const float2 tile_uv_dx = ddx(tile_uv);
const float2 tile_uv_dy = ddy(tile_uv);
tile_uv = fix_tiling_discontinuities_normalized(tile_uv,
tile_uv_dx, tile_uv_dy);
#endif
// The tile is embedded in a padded FBO, and it may start at a
// nonzero offset if border texels are used to avoid artifacts:
const float2 mask_tex_uv = mask_tile_start_uv_and_size.xy +
tile_uv * mask_tile_start_uv_and_size.zw;
return mask_tex_uv;
}
else
{
// Sample from the input phosphor mask texture with hardware tiling.
// If we're tiling at the original size (mode 2), the "tile" is the
// whole texture, and it contains a large number of triads mapped with
// a 1:1 pixel:texel ratio. OTHERWISE, the texture contains a single
// unresized tile. tile_uv_wrap already has correct coords for both!
return tile_uv_wrap;
}
}
#endif // PHOSPHOR_MASK_RESIZING_H

View File

@@ -1,243 +0,0 @@
#ifndef QUAD_PIXEL_COMMUNICATION_H
#define QUAD_PIXEL_COMMUNICATION_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey*
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DISCLAIMER /////////////////////////////////
// *This code was inspired by "Shader Amortization using Pixel Quad Message
// Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2. My intent
// is not to plagiarize his fundamentally similar code and assert my own
// copyright, but the algorithmic helper functions require so little code that
// implementations can't vary by much except bugfixes and conventions. I just
// wanted to license my own particular code here to avoid ambiguity and make it
// clear that as far as I'm concerned, people can do as they please with it.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// Given screen pixel numbers, derive a "quad vector" describing a fragment's
// position in its 2x2 pixel quad. Given that vector, obtain the values of any
// variable at neighboring fragments.
// Requires: Using this file in general requires:
// 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) The GPU driver is using fine/high-quality derivatives.
// Functions will give incorrect results if this is not true,
// so a test function is included.
///////////////////// QUAD-PIXEL COMMUNICATION PRIMITIVES ////////////////////
float4 get_quad_vector_naive(const float4 output_pixel_num_wrt_uvxy)
{
// Requires: Two measures of the current fragment's output pixel number
// in the range ([0, IN.output_size.x), [0, IN.output_size.y)):
// 1.) output_pixel_num_wrt_uvxy.xy increase with uv coords.
// 2.) output_pixel_num_wrt_uvxy.zw increase with screen xy.
// Returns: Two measures of the fragment's position in its 2x2 quad:
// 1.) The .xy components are its 2x2 placement with respect to
// uv direction (the origin (0, 0) is at the top-left):
// top-left = (-1.0, -1.0) top-right = ( 1.0, -1.0)
// bottom-left = (-1.0, 1.0) bottom-right = ( 1.0, 1.0)
// You need this to arrange/weight shared texture samples.
// 2.) The .zw components are its 2x2 placement with respect to
// screen xy direction (IN.position); the origin varies.
// quad_gather needs this measure to work correctly.
// Note: quad_vector.zw = quad_vector.xy * float2(
// ddx(output_pixel_num_wrt_uvxy.x),
// ddy(output_pixel_num_wrt_uvxy.y));
// Caveats: This function assumes the GPU driver always starts 2x2 pixel
// quads at even pixel numbers. This assumption can be wrong
// for odd output resolutions (nondeterministically so).
const float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0;
const float4 quad_vector = pixel_odd * 2.0 - 1.0.xxxx;
return quad_vector;
}
float4 get_quad_vector(const float4 output_pixel_num_wrt_uvxy)
{
// Requires: Same as get_quad_vector_naive() (see that first).
// Returns: Same as get_quad_vector_naive() (see that first), but it's
// correct even if the 2x2 pixel quad starts at an odd pixel,
// which can occur at odd resolutions.
const float4 quad_vector_guess =
get_quad_vector_naive(output_pixel_num_wrt_uvxy);
// If quad_vector_guess.zw doesn't increase with screen xy, we know
// the 2x2 pixel quad starts at an odd pixel:
const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z),
ddy(quad_vector_guess.w));
return quad_vector_guess * odd_start_mirror.xyxy;
}
float4 get_quad_vector(const float2 output_pixel_num_wrt_uv)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) output_pixel_num_wrt_uv must increase with uv coords and
// measure the current fragment's output pixel number in:
// ([0, IN.output_size.x), [0, IN.output_size.y))
// Returns: Same as get_quad_vector_naive() (see that first), but it's
// correct even if the 2x2 pixel quad starts at an odd pixel,
// which can occur at odd resolutions.
// Caveats: This function requires less information than the version
// taking a float4, but it's potentially slower.
// Do screen coords increase with or against uv? Get the direction
// with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}.
const float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x),
ddy(output_pixel_num_wrt_uv.y));
const float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0;
const float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - 0.5.xx) * 2.0;
const float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror;
// If quad_vector_screen_guess doesn't increase with screen xy, we know
// the 2x2 pixel quad starts at an odd pixel:
const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x),
ddy(quad_vector_screen_guess.y));
const float4 quad_vector_guess = float4(
quad_vector_uv_guess, quad_vector_screen_guess);
return quad_vector_guess * odd_start_mirror.xyxy;
}
void quad_gather(const float4 quad_vector, const float4 curr,
out float4 adjx, out float4 adjy, out float4 diag)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) The GPU driver is using fine/high-quality derivatives.
// 3.) quad_vector describes the current fragment's location in
// its 2x2 pixel quad using get_quad_vector()'s conventions.
// 4.) curr is any vector you wish to get neighboring values of.
// Returns: Values of an input vector (curr) at neighboring fragments
// adjacent x, adjacent y, and diagonal (via out parameters).
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
void quad_gather(const float4 quad_vector, const float3 curr,
out float3 adjx, out float3 adjy, out float3 diag)
{
// Float3 version
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
void quad_gather(const float4 quad_vector, const float2 curr,
out float2 adjx, out float2 adjy, out float2 diag)
{
// Float2 version
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
float4 quad_gather(const float4 quad_vector, const float curr)
{
// Float version:
// Returns: return.x == current
// return.y == adjacent x
// return.z == adjacent y
// return.w == diagonal
float4 all = curr.xxxx;
all.y = all.x - ddx(all.x) * quad_vector.z;
all.zw = all.xy - ddy(all.xy) * quad_vector.w;
return all;
}
float4 quad_gather_sum(const float4 quad_vector, const float4 curr)
{
// Requires: Same as quad_gather()
// Returns: Sum of an input vector (curr) at all fragments in a quad.
float4 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float3 quad_gather_sum(const float4 quad_vector, const float3 curr)
{
// Float3 version:
float3 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float2 quad_gather_sum(const float4 quad_vector, const float2 curr)
{
// Float2 version:
float2 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float quad_gather_sum(const float4 quad_vector, const float curr)
{
// Float version:
const float4 all_values = quad_gather(quad_vector, curr);
return (all_values.x + all_values.y + all_values.z + all_values.w);
}
bool fine_derivatives_working(const float4 quad_vector, float4 curr)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) quad_vector describes the current fragment's location in
// its 2x2 pixel quad using get_quad_vector()'s conventions.
// 3.) curr must be a test vector with non-constant derivatives
// (its value should change nonlinearly across fragments).
// Returns: true if fine/hybrid/high-quality derivatives are used, or
// false if coarse derivatives are used or inconclusive
// Usage: Test whether quad-pixel communication is working!
// Method: We can confirm fine derivatives are used if the following
// holds (ever, for any value at any fragment):
// (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy))
// The more values we test (e.g. test a float4 two ways), the
// easier it is to demonstrate fine derivatives are working.
// TODO: Check for floating point exact comparison issues!
float4 ddx_curr = ddx(curr);
float4 ddy_curr = ddy(curr);
float4 adjx = curr - ddx_curr * quad_vector.z;
float4 adjy = curr - ddy_curr * quad_vector.w;
bool ddy_different = any(ddy_curr != ddy(adjx));
bool ddx_different = any(ddx_curr != ddx(adjy));
return any(bool2(ddy_different, ddx_different));
}
bool fine_derivatives_working_fast(const float4 quad_vector, float curr)
{
// Requires: Same as fine_derivatives_working()
// Returns: Same as fine_derivatives_working()
// Usage: This is faster than fine_derivatives_working() but more
// likely to return false negatives, so it's less useful for
// offline testing/debugging. It's also useless as the basis
// for dynamic runtime branching as of May 2014: Derivatives
// (and quad-pixel communication) are currently disallowed in
// branches. However, future GPU's may allow you to use them
// in dynamic branches if you promise the branch condition
// evaluates the same for every fragment in the quad (and/or if
// the driver enforces that promise by making a single fragment
// control branch decisions). If that ever happens, this
// version may become a more economical choice.
float ddx_curr = ddx(curr);
float ddy_curr = ddy(curr);
float adjx = curr - ddx_curr * quad_vector.z;
return (ddy_curr != ddy(adjx));
}
#endif // QUAD_PIXEL_COMMUNICATION_H

View File

@@ -1,569 +0,0 @@
#ifndef SCANLINE_FUNCTIONS_H
#define SCANLINE_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "special-functions.fxh"
#include "gamma-management.fxh"
///////////////////////////// SCANLINE FUNCTIONS /////////////////////////////
float3 get_gaussian_sigma(const float3 color, const float sigma_range)
{
// Requires: Globals:
// 1.) beam_min_sigma and beam_max_sigma are global floats
// containing the desired minimum and maximum beam standard
// deviations, for dim and bright colors respectively.
// 2.) beam_max_sigma must be > 0.0
// 3.) beam_min_sigma must be in (0.0, beam_max_sigma]
// 4.) beam_spot_power must be defined as a global float.
// Parameters:
// 1.) color is the underlying source color along a scanline
// 2.) sigma_range = beam_max_sigma - beam_min_sigma; we take
// sigma_range as a parameter to avoid repeated computation
// when beam_{min, max}_sigma are runtime shader parameters
// Optional: Users may set beam_spot_shape_function to 1 to define the
// inner f(color) subfunction (see below) as:
// f(color) = sqrt(1.0 - (color - 1.0)*(color - 1.0))
// Otherwise (technically, if beam_spot_shape_function < 0.5):
// f(color) = pow(color, beam_spot_power)
// Returns: The standard deviation of the Gaussian beam for "color:"
// sigma = beam_min_sigma + sigma_range * f(color)
// Details/Discussion:
// The beam's spot shape vaguely resembles an aspect-corrected f() in the
// range [0, 1] (not quite, but it's related). f(color) = color makes
// spots look like diamonds, and a spherical function or cube balances
// between variable width and a soft/realistic shape. A beam_spot_power
// > 1.0 can produce an ugly spot shape and more initial clipping, but the
// final shape also differs based on the horizontal resampling filter and
// the phosphor bloom. For instance, resampling horizontally in nonlinear
// light and/or with a sharp (e.g. Lanczos) filter will sharpen the spot
// shape, but a sixth root is still quite soft. A power function (default
// 1.0/3.0 beam_spot_power) is most flexible, but a fixed spherical curve
// has the highest variability without an awful spot shape.
//
// beam_min_sigma affects scanline sharpness/aliasing in dim areas, and its
// difference from beam_max_sigma affects beam width variability. It only
// affects clipping [for pure Gaussians] if beam_spot_power > 1.0 (which is
// a conservative estimate for a more complex constraint).
//
// beam_max_sigma affects clipping and increasing scanline width/softness
// as color increases. The wider this is, the more scanlines need to be
// evaluated to avoid distortion. For a pure Gaussian, the max_beam_sigma
// at which the first unused scanline always has a weight < 1.0/255.0 is:
// num scanlines = 2, max_beam_sigma = 0.2089; distortions begin ~0.34
// num scanlines = 3, max_beam_sigma = 0.3879; distortions begin ~0.52
// num scanlines = 4, max_beam_sigma = 0.5723; distortions begin ~0.70
// num scanlines = 5, max_beam_sigma = 0.7591; distortions begin ~0.89
// num scanlines = 6, max_beam_sigma = 0.9483; distortions begin ~1.08
// Generalized Gaussians permit more leeway here as steepness increases.
if(beam_spot_shape_function < 0.5)
{
// Use a power function:
return beam_min_sigma.xxx + sigma_range *
pow(color, beam_spot_power);
}
else
{
// Use a spherical function:
const float3 color_minus_1 = color - 1.0.xxx;
return beam_min_sigma.xxx + sigma_range *
sqrt(1.0.xxx - color_minus_1*color_minus_1);
}
}
float3 get_generalized_gaussian_beta(const float3 color,
const float shape_range)
{
// Requires: Globals:
// 1.) beam_min_shape and beam_max_shape are global floats
// containing the desired min/max generalized Gaussian
// beta parameters, for dim and bright colors respectively.
// 2.) beam_max_shape must be >= 2.0
// 3.) beam_min_shape must be in [2.0, beam_max_shape]
// 4.) beam_shape_power must be defined as a global float.
// Parameters:
// 1.) color is the underlying source color along a scanline
// 2.) shape_range = beam_max_shape - beam_min_shape; we take
// shape_range as a parameter to avoid repeated computation
// when beam_{min, max}_shape are runtime shader parameters
// Returns: The type-I generalized Gaussian "shape" parameter beta for
// the given color.
// Details/Discussion:
// Beta affects the scanline distribution as follows:
// a.) beta < 2.0 narrows the peak to a spike with a discontinuous slope
// b.) beta == 2.0 just degenerates to a Gaussian
// c.) beta > 2.0 flattens and widens the peak, then drops off more steeply
// than a Gaussian. Whereas high sigmas widen and soften peaks, high
// beta widen and sharpen peaks at the risk of aliasing.
// Unlike high beam_spot_powers, high beam_shape_powers actually soften shape
// transitions, whereas lower ones sharpen them (at the risk of aliasing).
return beam_min_shape + shape_range * pow(color, beam_shape_power);
}
float3 scanline_gaussian_integral_contrib(const float3 dist,
const float3 color, const float pixel_height, const float sigma_range)
{
// Requires: 1.) dist is the distance of the [potentially separate R/G/B]
// point(s) from a scanline in units of scanlines, where
// 1.0 means the sample point straddles the next scanline.
// 2.) color is the underlying source color along a scanline.
// 3.) pixel_height is the output pixel height in scanlines.
// 4.) Requirements of get_gaussian_sigma() must be met.
// Returns: Return a scanline's light output over a given pixel.
// Details:
// The CRT beam profile follows a roughly Gaussian distribution which is
// wider for bright colors than dark ones. The integral over the full
// range of a Gaussian function is always 1.0, so we can vary the beam
// with a standard deviation without affecting brightness. 'x' = distance:
// gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
// gaussian integral = 0.5 (1.0 + erf(x/(sigma * sqrt(2))))
// Use a numerical approximation of the "error function" (the Gaussian
// indefinite integral) to find the definite integral of the scanline's
// average brightness over a given pixel area. Even if curved coords were
// used in this pass, a flat scalar pixel height works almost as well as a
// pixel height computed from a full pixel-space to scanline-space matrix.
const float3 sigma = get_gaussian_sigma(color, sigma_range);
const float3 ph_offset = (pixel_height.xxx) * 0.5;
const float3 denom_inv = 1.0/(sigma*sqrt(2.0));
const float3 integral_high = erf((dist + ph_offset)*denom_inv);
const float3 integral_low = erf((dist - ph_offset)*denom_inv);
return color * 0.5*(integral_high - integral_low)/pixel_height;
}
float3 scanline_generalized_gaussian_integral_contrib(const float3 dist,
const float3 color, const float pixel_height, const float sigma_range,
const float shape_range)
{
// Requires: 1.) Requirements of scanline_gaussian_integral_contrib()
// must be met.
// 2.) Requirements of get_gaussian_sigma() must be met.
// 3.) Requirements of get_generalized_gaussian_beta() must be
// met.
// Returns: Return a scanline's light output over a given pixel.
// A generalized Gaussian distribution allows the shape (beta) to vary
// as well as the width (alpha). "gamma" refers to the gamma function:
// generalized sample =
// beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
// ligamma(s, z) is the lower incomplete gamma function, for which we only
// implement two of four branches (because we keep 1/beta <= 0.5):
// generalized integral = 0.5 + 0.5* sign(x) *
// ligamma(1/beta, (|x|/alpha)**beta)/gamma(1/beta)
// See get_generalized_gaussian_beta() for a discussion of beta.
// We base alpha on the intended Gaussian sigma, but it only strictly
// models models standard deviation at beta == 2, because the standard
// deviation depends on both alpha and beta (keeping alpha independent is
// faster and preserves intuitive behavior and a full spectrum of results).
const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
const float3 beta = get_generalized_gaussian_beta(color, shape_range);
const float3 alpha_inv = 1.0.xxx/alpha;
const float3 s = 1.0.xxx/beta;
const float3 ph_offset = (pixel_height.xxx) * 0.5;
// Pass beta to gamma_impl to avoid repeated divides. Similarly pass
// beta (i.e. 1/s) and 1/gamma(s) to normalized_ligamma_impl.
const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, beta);
const float3 dist1 = dist + ph_offset;
const float3 dist0 = dist - ph_offset;
const float3 integral_high = sign(dist1) * normalized_ligamma_impl(
s, pow(abs(dist1)*alpha_inv, beta), beta, gamma_s_inv);
const float3 integral_low = sign(dist0) * normalized_ligamma_impl(
s, pow(abs(dist0)*alpha_inv, beta), beta, gamma_s_inv);
return color * 0.5*(integral_high - integral_low)/pixel_height;
}
float3 scanline_gaussian_sampled_contrib(const float3 dist, const float3 color,
const float pixel_height, const float sigma_range)
{
// See scanline_gaussian integral_contrib() for detailed comments!
// gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
const float3 sigma = get_gaussian_sigma(color, sigma_range);
// Avoid repeated divides:
const float3 sigma_inv = 1.0.xxx/sigma;
const float3 inner_denom_inv = 0.5 * sigma_inv * sigma_inv;
const float3 outer_denom_inv = sigma_inv/sqrt(2.0*pi);
if(beam_antialias_level > 0.5)
{
// Sample 1/3 pixel away in each direction as well:
const float3 sample_offset = pixel_height.xxx/3.0;
const float3 dist2 = dist + sample_offset;
const float3 dist3 = abs(dist - sample_offset);
// Average three pure Gaussian samples:
const float3 scale = color/3.0 * outer_denom_inv;
const float3 weight1 = exp(-(dist*dist)*inner_denom_inv);
const float3 weight2 = exp(-(dist2*dist2)*inner_denom_inv);
const float3 weight3 = exp(-(dist3*dist3)*inner_denom_inv);
return scale * (weight1 + weight2 + weight3);
}
else
{
return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv;
}
}
float3 scanline_generalized_gaussian_sampled_contrib(const float3 dist,
const float3 color, const float pixel_height, const float sigma_range,
const float shape_range)
{
// See scanline_generalized_gaussian_integral_contrib() for details!
// generalized sample =
// beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
const float3 beta = get_generalized_gaussian_beta(color, shape_range);
// Avoid repeated divides:
const float3 alpha_inv = 1.0.xxx/alpha;
const float3 beta_inv = 1.0.xxx/beta;
const float3 scale = color * beta * 0.5 * alpha_inv /
gamma_impl(beta_inv, beta);
if(beam_antialias_level > 0.5)
{
// Sample 1/3 pixel closer to and farther from the scanline too.
const float3 sample_offset = pixel_height.xxx/3.0;
const float3 dist2 = dist + sample_offset;
const float3 dist3 = abs(dist - sample_offset);
// Average three generalized Gaussian samples:
const float3 weight1 = exp(-pow(abs(dist*alpha_inv), beta));
const float3 weight2 = exp(-pow(abs(dist2*alpha_inv), beta));
const float3 weight3 = exp(-pow(abs(dist3*alpha_inv), beta));
return scale/3.0 * (weight1 + weight2 + weight3);
}
else
{
return scale * exp(-pow(abs(dist*alpha_inv), beta));
}
}
float3 scanline_contrib(float3 dist, float3 color,
float pixel_height, const float sigma_range, const float shape_range)
{
// Requires: 1.) Requirements of scanline_gaussian_integral_contrib()
// must be met.
// 2.) Requirements of get_gaussian_sigma() must be met.
// 3.) Requirements of get_generalized_gaussian_beta() must be
// met.
// Returns: Return a scanline's light output over a given pixel, using
// a generalized or pure Gaussian distribution and sampling or
// integrals as desired by user codepath choices.
if(beam_generalized_gaussian)
{
if(beam_antialias_level > 1.5)
{
return scanline_generalized_gaussian_integral_contrib(
dist, color, pixel_height, sigma_range, shape_range);
}
else
{
return scanline_generalized_gaussian_sampled_contrib(
dist, color, pixel_height, sigma_range, shape_range);
}
}
else
{
if(beam_antialias_level > 1.5)
{
return scanline_gaussian_integral_contrib(
dist, color, pixel_height, sigma_range);
}
else
{
return scanline_gaussian_sampled_contrib(
dist, color, pixel_height, sigma_range);
}
}
}
float3 get_raw_interpolated_color(const float3 color0,
const float3 color1, const float3 color2, const float3 color3,
const float4 weights)
{
// Use max to avoid bizarre artifacts from negative colors:
return max(mul(weights, float4x3(color0, color1, color2, color3)), 0.0);
}
float3 get_interpolated_linear_color(const float3 color0, const float3 color1,
const float3 color2, const float3 color3, const float4 weights)
{
// Requires: 1.) Requirements of include/gamma-management.h must be met:
// intermediate_gamma must be globally defined, and input
// colors are interpreted as linear RGB unless you #define
// GAMMA_ENCODE_EVERY_FBO (in which case they are
// interpreted as gamma-encoded with intermediate_gamma).
// 2.) color0-3 are colors sampled from a texture with tex2D().
// They are interpreted as defined in requirement 1.
// 3.) weights contains weights for each color, summing to 1.0.
// 4.) beam_horiz_linear_rgb_weight must be defined as a global
// float in [0.0, 1.0] describing how much blending should
// be done in linear RGB (rest is gamma-corrected RGB).
// 5.) RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE must be #defined
// if beam_horiz_linear_rgb_weight is anything other than a
// static constant, or we may try branching at runtime
// without dynamic branches allowed (slow).
// Returns: Return an interpolated color lookup between the four input
// colors based on the weights in weights. The final color will
// be a linear RGB value, but the blending will be done as
// indicated above.
const float intermediate_gamma = get_intermediate_gamma();
// Branch if beam_horiz_linear_rgb_weight is static (for free) or if the
// profile allows dynamic branches (faster than computing extra pows):
#ifndef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
#else
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
#endif
#endif
#ifdef SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
// beam_horiz_linear_rgb_weight is static, so we can branch:
#ifdef GAMMA_ENCODE_EVERY_FBO
const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
color0, color1, color2, color3, weights), intermediate_gamma);
if(beam_horiz_linear_rgb_weight > 0.0)
{
const float3 linear_mixed_color = get_raw_interpolated_color(
pow(color0, intermediate_gamma),
pow(color1, intermediate_gamma),
pow(color2, intermediate_gamma),
pow(color3, intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
}
else
{
return gamma_mixed_color;
}
#else
const float3 linear_mixed_color = get_raw_interpolated_color(
color0, color1, color2, color3, weights);
if(beam_horiz_linear_rgb_weight < 1.0)
{
const float3 gamma_mixed_color = get_raw_interpolated_color(
pow(color0, 1.0/intermediate_gamma),
pow(color1, 1.0/intermediate_gamma),
pow(color2, 1.0/intermediate_gamma),
pow(color3, 1.0/intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
}
else
{
return linear_mixed_color;
}
#endif // GAMMA_ENCODE_EVERY_FBO
#else
#ifdef GAMMA_ENCODE_EVERY_FBO
// Inputs: color0-3 are colors in gamma-encoded RGB.
const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
color0, color1, color2, color3, weights), intermediate_gamma);
const float3 linear_mixed_color = get_raw_interpolated_color(
pow(color0, intermediate_gamma),
pow(color1, intermediate_gamma),
pow(color2, intermediate_gamma),
pow(color3, intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
#else
// Inputs: color0-3 are colors in linear RGB.
const float3 linear_mixed_color = get_raw_interpolated_color(
color0, color1, color2, color3, weights);
const float3 gamma_mixed_color = get_raw_interpolated_color(
pow(color0, 1.0/intermediate_gamma),
pow(color1, 1.0/intermediate_gamma),
pow(color2, 1.0/intermediate_gamma),
pow(color3, 1.0/intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
#endif // GAMMA_ENCODE_EVERY_FBO
#endif // SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
}
float3 get_scanline_color(const sampler2D Source, const float2 scanline_uv,
const float2 uv_step_x, const float4 weights)
{
// Requires: 1.) scanline_uv must be vertically snapped to the caller's
// desired line or scanline and horizontally snapped to the
// texel just left of the output pixel (color1)
// 2.) uv_step_x must contain the horizontal uv distance
// between texels.
// 3.) weights must contain interpolation filter weights for
// color0, color1, color2, and color3, where color1 is just
// left of the output pixel.
// Returns: Return a horizontally interpolated texture lookup using 2-4
// nearby texels, according to weights and the conventions of
// get_interpolated_linear_color().
// We can ignore the outside texture lookups for Quilez resampling.
const float3 color1 = tex2D(Source, scanline_uv).rgb;
const float3 color2 = tex2D(Source, scanline_uv + uv_step_x).rgb;
float3 color0 = 0.0.xxx;
float3 color3 = 0.0.xxx;
if(beam_horiz_filter > 0.5)
{
color0 = tex2D(Source, scanline_uv - uv_step_x).rgb;
color3 = tex2D(Source, scanline_uv + 2.0 * uv_step_x).rgb;
}
// Sample the texture as-is, whether it's linear or gamma-encoded:
// get_interpolated_linear_color() will handle the difference.
return get_interpolated_linear_color(color0, color1, color2, color3, weights);
}
float3 sample_single_scanline_horizontal(const sampler2D Source,
const float2 tex_uv, const float2 texture_size,
const float2 texture_size_inv)
{
// TODO: Add function requirements.
// Snap to the previous texel and get sample dists from 2/4 nearby texels:
const float2 curr_texel = tex_uv * texture_size;
// Use under_half to fix a rounding bug right around exact texel locations.
const float2 prev_texel =
floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 prev_texel_hor = float2(prev_texel.x, curr_texel.y);
const float2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv;
const float prev_dist = curr_texel.x - prev_texel_hor.x;
const float4 sample_dists = float4(1.0 + prev_dist, prev_dist,
1.0 - prev_dist, 2.0 - prev_dist);
// Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
float4 weights;
if(beam_horiz_filter < 0.5)
{
// Quilez:
const float x = sample_dists.y;
const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
weights = float4(0.0, 1.0 - w2, w2, 0.0);
}
else if(beam_horiz_filter < 1.5)
{
// Gaussian:
float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);
weights = exp(-(sample_dists*sample_dists)*inner_denom_inv);
}
else
{
// Lanczos2:
const float4 pi_dists = FIX_ZERO(sample_dists * pi);
weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5) /
(pi_dists * pi_dists);
}
// Ensure the weight sum == 1.0:
const float4 final_weights = weights/dot(weights, 1.0.xxxx);
// Get the interpolated horizontal scanline color:
const float2 uv_step_x = float2(texture_size_inv.x, 0.0);
return get_scanline_color(
Source, prev_texel_hor_uv, uv_step_x, final_weights);
}
float3 sample_rgb_scanline_horizontal(const sampler2D Source,
const float2 tex_uv, const float2 texture_size,
const float2 texture_size_inv)
{
// TODO: Add function requirements.
// Rely on a helper to make convergence easier.
if(beam_misconvergence)
{
const float3 convergence_offsets_rgb =
get_convergence_offsets_x_vector();
const float3 offset_u_rgb =
convergence_offsets_rgb * texture_size_inv.xxx;
const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, 0.0);
const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, 0.0);
const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, 0.0);
const float3 sample_r = sample_single_scanline_horizontal(
Source, scanline_uv_r, texture_size, texture_size_inv);
const float3 sample_g = sample_single_scanline_horizontal(
Source, scanline_uv_g, texture_size, texture_size_inv);
const float3 sample_b = sample_single_scanline_horizontal(
Source, scanline_uv_b, texture_size, texture_size_inv);
return float3(sample_r.r, sample_g.g, sample_b.b);
}
else
{
return sample_single_scanline_horizontal(Source, tex_uv, texture_size,
texture_size_inv);
}
}
float2 get_last_scanline_uv(const float2 tex_uv, const float2 texture_size,
const float2 texture_size_inv, const float2 il_step_multiple,
const float frame_count, out float dist)
{
// Compute texture coords for the last/upper scanline, accounting for
// interlacing: With interlacing, only consider even/odd scanlines every
// other frame. Top-field first (TFF) order puts even scanlines on even
// frames, and BFF order puts them on odd frames. Texels are centered at:
// frac(tex_uv * texture_size) == x.5
// Caution: If these coordinates ever seem incorrect, first make sure it's
// not because anisotropic filtering is blurring across field boundaries.
// Note: TFF/BFF won't matter for sources that double-weave or similar.
const float field_offset = floor(il_step_multiple.y * 0.75) *
fmod(frame_count + float(interlace_bff), 2.0);
const float2 curr_texel = tex_uv * texture_size;
// Use under_half to fix a rounding bug right around exact texel locations.
// This causes an insane bug on duckstation, so it's disabled here. (Hyllian, 2024)
// const float2 prev_texel_num = floor(curr_texel - under_half.xx);
const float2 prev_texel_num = curr_texel;
const float wrong_field = fmod(
prev_texel_num.y + field_offset, il_step_multiple.y);
const float2 scanline_texel_num = prev_texel_num - float2(0.0, wrong_field);
// Snap to the center of the previous scanline in the current field:
const float2 scanline_texel = scanline_texel_num + 0.5.xx;
const float2 scanline_uv = scanline_texel * texture_size_inv;
// Save the sample's distance from the scanline, in units of scanlines:
dist = (curr_texel.y - scanline_texel.y)/il_step_multiple.y;
return scanline_uv;
}
bool is_interlaced(float num_lines)
{
// Detect interlacing based on the number of lines in the source.
if(interlace_detect)
{
// NTSC: 525 lines, 262.5/field; 486 active (2 half-lines), 243/field
// NTSC Emulators: Typically 224 or 240 lines
// PAL: 625 lines, 312.5/field; 576 active (typical), 288/field
// PAL Emulators: ?
// ATSC: 720p, 1080i, 1080p
// Where do we place our cutoffs? Assumptions:
// 1.) We only need to care about active lines.
// 2.) Anything > 288 and <= 576 lines is probably interlaced.
// 3.) Anything > 576 lines is probably not interlaced...
// 4.) ...except 1080 lines, which is a crapshoot (user decision).
// 5.) Just in case the main program uses calculated video sizes,
// we should nudge the float thresholds a bit.
const bool sd_interlace = ((num_lines > 288.5) && (num_lines < 576.5));
const bool hd_interlace = interlace_1080i ?
((num_lines > 1079.5) && (num_lines < 1080.5)) :
false;
return (sd_interlace || hd_interlace);
}
else
{
return false;
}
}
#endif // SCANLINE_FUNCTIONS_H

View File

@@ -1,498 +0,0 @@
#ifndef SPECIAL_FUNCTIONS_H
#define SPECIAL_FUNCTIONS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// This file implements the following mathematical special functions:
// 1.) erf() = 2/sqrt(pi) * indefinite_integral(e**(-x**2))
// 2.) gamma(s), a real-numbered extension of the integer factorial function
// It also implements normalized_ligamma(s, z), a normalized lower incomplete
// gamma function for s < 0.5 only. Both gamma() and normalized_ligamma() can
// be called with an _impl suffix to use an implementation version with a few
// extra precomputed parameters (which may be useful for the caller to reuse).
// See below for details.
//
// Design Rationale:
// Pretty much every line of code in this file is duplicated four times for
// different input types (float4/float3/float2/float). This is unfortunate,
// but Cg doesn't allow function templates. Macros would be far less verbose,
// but they would make the code harder to document and read. I don't expect
// these functions will require a whole lot of maintenance changes unless
// someone ever has need for more robust incomplete gamma functions, so code
// duplication seems to be the lesser evil in this case.
/////////////////////////// GAUSSIAN ERROR FUNCTION //////////////////////////
float4 erf6(float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Return an Abramowitz/Stegun approximation of erf(), where:
// erf(x) = 2/sqrt(pi) * integral(e**(-x**2))
// This approximation has a max absolute error of 2.5*10**-5
// with solid numerical robustness and efficiency. See:
// https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
static const float4 one = 1.0.xxxx;
const float4 sign_x = sign(x);
const float4 t = one/(one + 0.47047*abs(x));
const float4 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float3 erf6(const float3 x)
{
// Float3 version:
static const float3 one = 1.0.xxx;
const float3 sign_x = sign(x);
const float3 t = one/(one + 0.47047*abs(x));
const float3 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float2 erf6(const float2 x)
{
// Float2 version:
static const float2 one = 1.0.xx;
const float2 sign_x = sign(x);
const float2 t = one/(one + 0.47047*abs(x));
const float2 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float erf6(const float x)
{
// Float version:
const float sign_x = sign(x);
const float t = 1.0/(1.0 + 0.47047*abs(x));
const float result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float4 erft(const float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Approximate erf() with the hyperbolic tangent. The error is
// visually noticeable, but it's blazing fast and perceptually
// close...at least on ATI hardware. See:
// http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html
// Warning: Only use this if your hardware drivers correctly implement
// tanh(): My nVidia 8800GTS returns garbage output.
return tanh(1.202760580 * x);
}
float3 erft(const float3 x)
{
// Float3 version:
return tanh(1.202760580 * x);
}
float2 erft(const float2 x)
{
// Float2 version:
return tanh(1.202760580 * x);
}
float erft(const float x)
{
// Float version:
return tanh(1.202760580 * x);
}
float4 erf(const float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Some approximation of erf(x), depending on user settings.
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float3 erf(const float3 x)
{
// Float3 version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float2 erf(const float2 x)
{
// Float2 version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float erf(const float x)
{
// Float version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
/////////////////////////// COMPLETE GAMMA FUNCTION //////////////////////////
float4 gamma_impl(const float4 s, const float4 s_inv)
{
// Requires: 1.) s is the standard parameter to the gamma function, and
// it should lie in the [0, 36] range.
// 2.) s_inv = 1.0/s. This implementation function requires
// the caller to precompute this value, giving users the
// opportunity to reuse it.
// Returns: Return approximate gamma function (real-numbered factorial)
// output using the Lanczos approximation with two coefficients
// calculated using Paul Godfrey's method here:
// http://my.fit.edu/~gabdo/gamma.txt
// An optimal g value for s in [0, 36] is ~1.12906830989, with
// a maximum relative error of 0.000463 for 2**16 equally
// evals. We could use three coeffs (0.0000346 error) without
// hurting latency, but this allows more parallelism with
// outside instructions.
static const float4 g = 1.12906830989.xxxx;
static const float4 c0 = 0.8109119309638332633713423362694399653724431.xxxx;
static const float4 c1 = 0.4808354605142681877121661197951496120000040.xxxx;
static const float4 e = 2.71828182845904523536028747135266249775724709.xxxx;
const float4 sph = s + 0.5.xxxx;
const float4 lanczos_sum = c0 + c1/(s + 1.0.xxxx);
const float4 base = (sph + g)/e; // or (s + g + float4(0.5))/e
// gamma(s + 1) = base**sph * lanczos_sum; divide by s for gamma(s).
// This has less error for small s's than (s -= 1.0) at the beginning.
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float3 gamma_impl(const float3 s, const float3 s_inv)
{
// Float3 version:
static const float3 g = 1.12906830989.xxx;
static const float3 c0 = 0.8109119309638332633713423362694399653724431.xxx;
static const float3 c1 = 0.4808354605142681877121661197951496120000040.xxx;
static const float3 e = 2.71828182845904523536028747135266249775724709.xxx;
const float3 sph = s + 0.5.xxx;
const float3 lanczos_sum = c0 + c1/(s + 1.0.xxx);
const float3 base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float2 gamma_impl(const float2 s, const float2 s_inv)
{
// Float2 version:
static const float2 g = 1.12906830989.xx;
static const float2 c0 = 0.8109119309638332633713423362694399653724431.xx;
static const float2 c1 = 0.4808354605142681877121661197951496120000040.xx;
static const float2 e = 2.71828182845904523536028747135266249775724709.xx;
const float2 sph = s + 0.5.xx;
const float2 lanczos_sum = c0 + c1/(s + 1.0.xx);
const float2 base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float gamma_impl(const float s, const float s_inv)
{
// Float version:
static const float g = 1.12906830989;
static const float c0 = 0.8109119309638332633713423362694399653724431;
static const float c1 = 0.4808354605142681877121661197951496120000040;
static const float e = 2.71828182845904523536028747135266249775724709;
const float sph = s + 0.5;
const float lanczos_sum = c0 + c1/(s + 1.0);
const float base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float4 gamma(const float4 s)
{
// Requires: s is the standard parameter to the gamma function, and it
// should lie in the [0, 36] range.
// Returns: Return approximate gamma function output with a maximum
// relative error of 0.000463. See gamma_impl for details.
return gamma_impl(s, 1.0.xxxx/s);
}
float3 gamma(const float3 s)
{
// Float3 version:
return gamma_impl(s, 1.0.xxx/s);
}
float2 gamma(const float2 s)
{
// Float2 version:
return gamma_impl(s, 1.0.xx/s);
}
float gamma(const float s)
{
// Float version:
return gamma_impl(s, 1.0/s);
}
//////////////// INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT) ///////////////
// Lower incomplete gamma function for small s and z (implementation):
float4 ligamma_small_z_impl(const float4 s, const float4 z, const float4 s_inv)
{
// Requires: 1.) s < ~0.5
// 2.) z <= ~0.775075
// 3.) s_inv = 1.0/s (precomputed for outside reuse)
// Returns: A series representation for the lower incomplete gamma
// function for small s and small z (4 terms).
// The actual "rolled up" summation looks like:
// last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0;
// sum = last_sign * last_pow / ((s + k) * last_factorial)
// for(int i = 0; i < 4; ++i)
// {
// last_sign *= -1.0; last_pow *= z; last_factorial *= i;
// sum += last_sign * last_pow / ((s + k) * last_factorial);
// }
// Unrolled, constant-unfolded and arranged for madds and parallelism:
const float4 scale = pow(z, s);
float4 sum = s_inv; // Summation iteration 0 result
// Summation iterations 1, 2, and 3:
const float4 z_sq = z*z;
const float4 denom1 = s + 1.0.xxxx;
const float4 denom2 = 2.0*s + 4.0.xxxx;
const float4 denom3 = 6.0*s + 18.0.xxxx;
//float4 denom4 = 24.0*s + float4(96.0);
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
//sum += z_sq * z_sq / denom4;
// Scale and return:
return scale * sum;
}
float3 ligamma_small_z_impl(const float3 s, const float3 z, const float3 s_inv)
{
// Float3 version:
const float3 scale = pow(z, s);
float3 sum = s_inv;
const float3 z_sq = z*z;
const float3 denom1 = s + 1.0.xxx;
const float3 denom2 = 2.0*s + 4.0.xxx;
const float3 denom3 = 6.0*s + 18.0.xxx;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
float2 ligamma_small_z_impl(const float2 s, const float2 z, const float2 s_inv)
{
// Float2 version:
const float2 scale = pow(z, s);
float2 sum = s_inv;
const float2 z_sq = z*z;
const float2 denom1 = s + 1.0.xx;
const float2 denom2 = 2.0*s + 4.0.xx;
const float2 denom3 = 6.0*s + 18.0.xx;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
float ligamma_small_z_impl(const float s, const float z, const float s_inv)
{
// Float version:
const float scale = pow(z, s);
float sum = s_inv;
const float z_sq = z*z;
const float denom1 = s + 1.0;
const float denom2 = 2.0*s + 4.0;
const float denom3 = 6.0*s + 18.0;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
// Upper incomplete gamma function for small s and large z (implementation):
float4 uigamma_large_z_impl(const float4 s, const float4 z)
{
// Requires: 1.) s < ~0.5
// 2.) z > ~0.775075
// Returns: Gauss's continued fraction representation for the upper
// incomplete gamma function (4 terms).
// The "rolled up" continued fraction looks like this. The denominator
// is truncated, and it's calculated "from the bottom up:"
// denom = float4('inf');
// float4 one = float4(1.0);
// for(int i = 4; i > 0; --i)
// {
// denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom;
// }
// Unrolled and constant-unfolded for madds and parallelism:
const float4 numerator = pow(z, s) * exp(-z);
float4 denom = 7.0.xxxx + z - s;
denom = 5.0.xxxx + z - s + (3.0*s - 9.0.xxxx)/denom;
denom = 3.0.xxxx + z - s + (2.0*s - 4.0.xxxx)/denom;
denom = 1.0.xxxx + z - s + (s - 1.0.xxxx)/denom;
return numerator / denom;
}
float3 uigamma_large_z_impl(const float3 s, const float3 z)
{
// Float3 version:
const float3 numerator = pow(z, s) * exp(-z);
float3 denom = 7.0.xxx + z - s;
denom = 5.0.xxx + z - s + (3.0*s - 9.0.xxx)/denom;
denom = 3.0.xxx + z - s + (2.0*s - 4.0.xxx)/denom;
denom = 1.0.xxx + z - s + (s - 1.0.xxx)/denom;
return numerator / denom;
}
float2 uigamma_large_z_impl(const float2 s, const float2 z)
{
// Float2 version:
const float2 numerator = pow(z, s) * exp(-z);
float2 denom = 7.0.xx + z - s;
denom = 5.0.xx + z - s + (3.0*s - 9.0.xx)/denom;
denom = 3.0.xx + z - s + (2.0*s - 4.0.xx)/denom;
denom = 1.0.xx + z - s + (s - 1.0.xx)/denom;
return numerator / denom;
}
float uigamma_large_z_impl(const float s, const float z)
{
// Float version:
const float numerator = pow(z, s) * exp(-z);
float denom = 7.0 + z - s;
denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
denom = 1.0 + z - s + (s - 1.0)/denom;
return numerator / denom;
}
// Normalized lower incomplete gamma function for small s (implementation):
float4 normalized_ligamma_impl(const float4 s, const float4 z,
const float4 s_inv, const float4 gamma_s_inv)
{
// Requires: 1.) s < ~0.5
// 2.) s_inv = 1/s (precomputed for outside reuse)
// 3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse)
// Returns: Approximate the normalized lower incomplete gamma function
// for s < 0.5. Since we only care about s < 0.5, we only need
// to evaluate two branches (not four) based on z. Each branch
// uses four terms, with a max relative error of ~0.00182. The
// branch threshold and specifics were adapted for fewer terms
// from Gil/Segura/Temme's paper here:
// http://oai.cwi.nl/oai/asset/20433/20433B.pdf
// Evaluate both branches: Real branches test slower even when available.
static const float4 thresh = 0.775075.xxxx;
const bool4 z_is_large = z > thresh;
const float4 large_z = 1.0.xxxx - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float4 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
// Combine the results from both branches:
return large_z * float4(z_is_large.xxxx) + small_z * float4(!z_is_large.xxxx);
}
float3 normalized_ligamma_impl(const float3 s, const float3 z,
const float3 s_inv, const float3 gamma_s_inv)
{
// Float3 version:
static const float3 thresh = 0.775075.xxx;
const bool3 z_is_large = z > thresh;
const float3 large_z = 1.0.xxx - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float3(z_is_large.xxx) + small_z * float3(!z_is_large.xxx);
}
float2 normalized_ligamma_impl(const float2 s, const float2 z,
const float2 s_inv, const float2 gamma_s_inv)
{
// Float2 version:
static const float2 thresh = 0.775075.xx;
const bool2 z_is_large = z > thresh;
const float2 large_z = 1.0.xx - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float2 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float2(z_is_large.xx) + small_z * float2(!z_is_large.xx);
}
float normalized_ligamma_impl(const float s, const float z,
const float s_inv, const float gamma_s_inv)
{
// Float version:
static const float thresh = 0.775075;
const bool z_is_large = z > thresh;
const float large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float(z_is_large) + small_z * float(!z_is_large);
}
// Normalized lower incomplete gamma function for small s:
float4 normalized_ligamma(const float4 s, const float4 z)
{
// Requires: s < ~0.5
// Returns: Approximate the normalized lower incomplete gamma function
// for s < 0.5. See normalized_ligamma_impl() for details.
const float4 s_inv = 1.0.xxxx/s;
const float4 gamma_s_inv = 1.0.xxxx/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float3 normalized_ligamma(const float3 s, const float3 z)
{
// Float3 version:
const float3 s_inv = 1.0.xxx/s;
const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float2 normalized_ligamma(const float2 s, const float2 z)
{
// Float2 version:
const float2 s_inv = 1.0.xx/s;
const float2 gamma_s_inv = 1.0.xx/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float normalized_ligamma(const float s, const float z)
{
// Float version:
const float s_inv = 1.0/s;
const float gamma_s_inv = 1.0/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
#endif // SPECIAL_FUNCTIONS_H

View File

@@ -1,58 +0,0 @@
#ifndef USER_CGP_CONSTANTS_H
#define USER_CGP_CONSTANTS_H
// IMPORTANT:
// These constants MUST be set appropriately for the settings in crt-royale.cgp
// (or whatever related .cgp file you're using). If they aren't, you're likely
// to get artifacts, the wrong phosphor mask size, etc. I wish these could be
// set directly in the .cgp file to make things easier, but...they can't.
// PASS SCALES AND RELATED CONSTANTS:
// Copy the absolute scale_x for BLOOM_APPROX. There are two major versions of
// this shader: One does a viewport-scale bloom, and the other skips it. The
// latter benefits from a higher bloom_approx_scale_x, so save both separately:
static const float bloom_approx_size_x = 320.0;
static const float bloom_approx_size_x_for_fake = 400.0;
// Copy the viewport-relative scales of the phosphor mask resize passes
// (MASK_RESIZE and the pass immediately preceding it):
static const float2 mask_resize_viewport_scale = float2(0.0625, 0.0625);
// Copy the geom_max_aspect_ratio used to calculate the MASK_RESIZE scales, etc.:
static const float geom_max_aspect_ratio = 4.0/3.0;
// PHOSPHOR MASK TEXTURE CONSTANTS:
// Set the following constants to reflect the properties of the phosphor mask
// texture named in crt-royale.cgp. The shader optionally resizes a mask tile
// based on user settings, then repeats a single tile until filling the screen.
// The shader must know the input texture size (default 64x64), and to manually
// resize, it must also know the horizontal triads per tile (default 8).
static const float2 mask_texture_small_size = 64.0.xx;
static const float2 mask_texture_large_size = 512.0.xx;
static const float mask_triads_per_tile = 8.0;
// We need the average brightness of the phosphor mask to compensate for the
// dimming it causes. The following four values are roughly correct for the
// masks included with the shader. Update the value for any LUT texture you
// change. [Un]comment "#define PHOSPHOR_MASK_GRILLE14" depending on whether
// the loaded aperture grille uses 14-pixel or 15-pixel stripes (default 15).
//#define PHOSPHOR_MASK_GRILLE14
static const float mask_grille14_avg_color = 50.6666666/255.0;
// TileableLinearApertureGrille14Wide7d33Spacing*.png
// TileableLinearApertureGrille14Wide10And6Spacing*.png
static const float mask_grille15_avg_color = 53.0/255.0;
// TileableLinearApertureGrille15Wide6d33Spacing*.png
// TileableLinearApertureGrille15Wide8And5d5Spacing*.png
static const float mask_slot_avg_color = 46.0/255.0;
// TileableLinearSlotMask15Wide9And4d5Horizontal8VerticalSpacing*.png
// TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing*.png
static const float mask_shadow_avg_color = 41.0/255.0;
// TileableLinearShadowMask*.png
// TileableLinearShadowMaskEDP*.png
#ifdef PHOSPHOR_MASK_GRILLE14
static const float mask_grille_avg_color = mask_grille14_avg_color;
#else
static const float mask_grille_avg_color = mask_grille15_avg_color;
#endif
#endif // USER_CGP_CONSTANTS_H

View File

@@ -1,359 +0,0 @@
#ifndef USER_SETTINGS_H
#define USER_SETTINGS_H
///////////////////////////// DRIVER CAPABILITIES ////////////////////////////
// The Cg compiler uses different "profiles" with different capabilities.
// This shader requires a Cg compilation profile >= arbfp1, but a few options
// require higher profiles like fp30 or fp40. The shader can't detect profile
// or driver capabilities, so instead you must comment or uncomment the lines
// below with "//" before "#define." Disable an option if you get compilation
// errors resembling those listed. Generally speaking, all of these options
// will run on nVidia cards, but only DRIVERS_ALLOW_TEX2DBIAS (if that) is
// likely to run on ATI/AMD, due to the Cg compiler's profile limitations.
// Derivatives: Unsupported on fp20, ps_1_1, ps_1_2, ps_1_3, and arbfp1.
// Among other things, derivatives help us fix anisotropic filtering artifacts
// with curved manually tiled phosphor mask coords. Related errors:
// error C3004: function "float2 ddx(float2);" not supported in this profile
// error C3004: function "float2 ddy(float2);" not supported in this profile
//#define DRIVERS_ALLOW_DERIVATIVES
// Fine derivatives: Unsupported on older ATI cards.
// Fine derivatives enable 2x2 fragment block communication, letting us perform
// fast single-pass blur operations. If your card uses coarse derivatives and
// these are enabled, blurs could look broken. Derivatives are a prerequisite.
#ifdef DRIVERS_ALLOW_DERIVATIVES
#define DRIVERS_ALLOW_FINE_DERIVATIVES
#endif
// Dynamic looping: Requires an fp30 or newer profile.
// This makes phosphor mask resampling faster in some cases. Related errors:
// error C5013: profile does not support "for" statements and "for" could not
// be unrolled
//#define DRIVERS_ALLOW_DYNAMIC_BRANCHES
// Without DRIVERS_ALLOW_DYNAMIC_BRANCHES, we need to use unrollable loops.
// Using one static loop avoids overhead if the user is right, but if the user
// is wrong (loops are allowed), breaking a loop into if-blocked pieces with a
// binary search can potentially save some iterations. However, it may fail:
// error C6001: Temporary register limit of 32 exceeded; 35 registers
// needed to compile program
//#define ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
// tex2Dlod: Requires an fp40 or newer profile. This can be used to disable
// anisotropic filtering, thereby fixing related artifacts. Related errors:
// error C3004: function "float4 tex2Dlod(sampler2D, float4);" not supported in
// this profile
//#define DRIVERS_ALLOW_TEX2DLOD
// tex2Dbias: Requires an fp30 or newer profile. This can be used to alleviate
// artifacts from anisotropic filtering and mipmapping. Related errors:
// error C3004: function "float4 tex2Dbias(sampler2D, float4);" not supported
// in this profile
//#define DRIVERS_ALLOW_TEX2DBIAS
// Integrated graphics compatibility: Integrated graphics like Intel HD 4000
// impose stricter limitations on register counts and instructions. Enable
// INTEGRATED_GRAPHICS_COMPATIBILITY_MODE if you still see error C6001 or:
// error C6002: Instruction limit of 1024 exceeded: 1523 instructions needed
// to compile program.
// Enabling integrated graphics compatibility mode will automatically disable:
// 1.) PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer.
// (This may be reenabled in a later release.)
// 2.) RUNTIME_GEOMETRY_MODE
// 3.) The high-quality 4x4 Gaussian resize for the bloom approximation
//#define INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
//////////////////////////// USER CODEPATH OPTIONS ///////////////////////////
// To disable a #define option, turn its line into a comment with "//."
// RUNTIME VS. COMPILE-TIME OPTIONS (Major Performance Implications):
// Enable runtime shader parameters in the Retroarch (etc.) GUI? They override
// many of the options in this file and allow real-time tuning, but many of
// them are slower. Disabling them and using this text file will boost FPS.
#define RUNTIME_SHADER_PARAMS_ENABLE
// Specify the phosphor bloom sigma at runtime? This option is 10% slower, but
// it's the only way to do a wide-enough full bloom with a runtime dot pitch.
#define RUNTIME_PHOSPHOR_BLOOM_SIGMA
// Specify antialiasing weight parameters at runtime? (Costs ~20% with cubics)
#define RUNTIME_ANTIALIAS_WEIGHTS
// Specify subpixel offsets at runtime? (WARNING: EXTREMELY EXPENSIVE!)
//#define RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
// Make beam_horiz_filter and beam_horiz_linear_rgb_weight into runtime shader
// parameters? This will require more math or dynamic branching.
#define RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
// Specify the tilt at runtime? This makes things about 3% slower.
#define RUNTIME_GEOMETRY_TILT
// Specify the geometry mode at runtime?
#define RUNTIME_GEOMETRY_MODE
// Specify the phosphor mask type (aperture grille, slot mask, shadow mask) and
// mode (Lanczos-resize, hardware resize, or tile 1:1) at runtime, even without
// dynamic branches? This is cheap if mask_resize_viewport_scale is small.
#define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// PHOSPHOR MASK:
// Manually resize the phosphor mask for best results (slower)? Disabling this
// removes the option to do so, but it may be faster without dynamic branches.
#define PHOSPHOR_MASK_MANUALLY_RESIZE
// If we sinc-resize the mask, should we Lanczos-window it (slower but better)?
#define PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
// Larger blurs are expensive, but we need them to blur larger triads. We can
// detect the right blur if the triad size is static or our profile allows
// dynamic branches, but otherwise we use the largest blur the user indicates
// they might need:
#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
//#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
//#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
//#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
// Here's a helpful chart:
// MaxTriadSize BlurSize MinTriadCountsByResolution
// 3.0 9.0 480/640/960/1920 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 6.0 17.0 240/320/480/960 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 9.0 25.0 160/213/320/640 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 12.0 31.0 120/160/240/480 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 18.0 43.0 80/107/160/320 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
/////////////////////////////// USER PARAMETERS //////////////////////////////
// Note: Many of these static parameters are overridden by runtime shader
// parameters when those are enabled. However, many others are static codepath
// options that were cleaner or more convert to code as static constants.
// GAMMA:
static const float crt_gamma_static = 2.5; // range [1, 5]
static const float lcd_gamma_static = 2.2; // range [1, 5]
// LEVELS MANAGEMENT:
// Control the final multiplicative image contrast:
static const float levels_contrast_static = 1.0; // range [0, 4)
// We auto-dim to avoid clipping between passes and restore brightness
// later. Control the dim factor here: Lower values clip less but crush
// blacks more (static only for now).
static const float levels_autodim_temp = 0.5; // range (0, 1]
// HALATION/DIFFUSION/BLOOM:
// Halation weight: How much energy should be lost to electrons bounding
// around under the CRT glass and exciting random phosphors?
static const float halation_weight_static = 0.0; // range [0, 1]
// Refractive diffusion weight: How much light should spread/diffuse from
// refracting through the CRT glass?
static const float diffusion_weight_static = 0.075; // range [0, 1]
// Underestimate brightness: Bright areas bloom more, but we can base the
// bloom brightpass on a lower brightness to sharpen phosphors, or a higher
// brightness to soften them. Low values clip, but >= 0.8 looks okay.
static const float bloom_underestimate_levels_static = 0.8; // range [0, 5]
// Blur all colors more than necessary for a softer phosphor bloom?
static const float bloom_excess_static = 0.0; // range [0, 1]
// The BLOOM_APPROX pass approximates a phosphor blur early on with a small
// blurred resize of the input (convergence offsets are applied as well).
// There are three filter options (static option only for now):
// 0.) Bilinear resize: A fast, close approximation to a 4x4 resize
// if min_allowed_viewport_triads and the BLOOM_APPROX resolution are sane
// and beam_max_sigma is low.
// 1.) 3x3 resize blur: Medium speed, soft/smeared from bilinear blurring,
// always uses a static sigma regardless of beam_max_sigma or
// mask_num_triads_desired.
// 2.) True 4x4 Gaussian resize: Slowest, technically correct.
// These options are more pronounced for the fast, unbloomed shader version.
static const float bloom_approx_filter_static = 2.0;
// ELECTRON BEAM SCANLINE DISTRIBUTION:
// How many scanlines should contribute light to each pixel? Using more
// scanlines is slower (especially for a generalized Gaussian) but less
// distorted with larger beam sigmas (especially for a pure Gaussian). The
// max_beam_sigma at which the closest unused weight is guaranteed <
// 1.0/255.0 (for a 3x antialiased pure Gaussian) is:
// 2 scanlines: max_beam_sigma = 0.2089; distortions begin ~0.34; 141.7 FPS pure, 131.9 FPS generalized
// 3 scanlines, max_beam_sigma = 0.3879; distortions begin ~0.52; 137.5 FPS pure; 123.8 FPS generalized
// 4 scanlines, max_beam_sigma = 0.5723; distortions begin ~0.70; 134.7 FPS pure; 117.2 FPS generalized
// 5 scanlines, max_beam_sigma = 0.7591; distortions begin ~0.89; 131.6 FPS pure; 112.1 FPS generalized
// 6 scanlines, max_beam_sigma = 0.9483; distortions begin ~1.08; 127.9 FPS pure; 105.6 FPS generalized
static const float beam_num_scanlines = 3.0; // range [2, 6]
// A generalized Gaussian beam varies shape with color too, now just width.
// It's slower but more flexible (static option only for now).
static const bool beam_generalized_gaussian = true;
// What kind of scanline antialiasing do you want?
// 0: Sample weights at 1x; 1: Sample weights at 3x; 2: Compute an integral
// Integrals are slow (especially for generalized Gaussians) and rarely any
// better than 3x antialiasing (static option only for now).
static const float beam_antialias_level = 1.0; // range [0, 2]
// Min/max standard deviations for scanline beams: Higher values widen and
// soften scanlines. Depending on other options, low min sigmas can alias.
static const float beam_min_sigma_static = 0.02; // range (0, 1]
static const float beam_max_sigma_static = 0.3; // range (0, 1]
// Beam width varies as a function of color: A power function (0) is more
// configurable, but a spherical function (1) gives the widest beam
// variability without aliasing (static option only for now).
static const float beam_spot_shape_function = 0.0;
// Spot shape power: Powers <= 1 give smoother spot shapes but lower
// sharpness. Powers >= 1.0 are awful unless mix/max sigmas are close.
static const float beam_spot_power_static = 1.0/3.0; // range (0, 16]
// Generalized Gaussian max shape parameters: Higher values give flatter
// scanline plateaus and steeper dropoffs, simultaneously widening and
// sharpening scanlines at the cost of aliasing. 2.0 is pure Gaussian, and
// values > ~40.0 cause artifacts with integrals.
static const float beam_min_shape_static = 2.0; // range [2, 32]
static const float beam_max_shape_static = 4.0; // range [2, 32]
// Generalized Gaussian shape power: Affects how quickly the distribution
// changes shape from Gaussian to steep/plateaued as color increases from 0
// to 1.0. Higher powers appear softer for most colors, and lower powers
// appear sharper for most colors.
static const float beam_shape_power_static = 1.0/4.0; // range (0, 16]
// What filter should be used to sample scanlines horizontally?
// 0: Quilez (fast), 1: Gaussian (configurable), 2: Lanczos2 (sharp)
static const float beam_horiz_filter_static = 0.0;
// Standard deviation for horizontal Gaussian resampling:
static const float beam_horiz_sigma_static = 0.35; // range (0, 2/3]
// Do horizontal scanline sampling in linear RGB (correct light mixing),
// gamma-encoded RGB (darker, hard spot shape, may better match bandwidth-
// limiting circuitry in some CRT's), or a weighted avg.?
static const float beam_horiz_linear_rgb_weight_static = 1.0; // range [0, 1]
// Simulate scanline misconvergence? This needs 3x horizontal texture
// samples and 3x texture samples of BLOOM_APPROX and HALATION_BLUR in
// later passes (static option only for now).
static const bool beam_misconvergence = true;
// Convergence offsets in x/y directions for R/G/B scanline beams in units
// of scanlines. Positive offsets go right/down; ranges [-2, 2]
static const float2 convergence_offsets_r_static = float2(0.1, 0.2);
static const float2 convergence_offsets_g_static = float2(0.3, 0.4);
static const float2 convergence_offsets_b_static = float2(0.5, 0.6);
// Detect interlacing (static option only for now)?
static const bool interlace_detect_static = true;
// Assume 1080-line sources are interlaced?
static const bool interlace_1080i_static = false;
// For interlaced sources, assume TFF (top-field first) or BFF order?
// (Whether this matters depends on the nature of the interlaced input.)
static const bool interlace_bff_static = false;
// ANTIALIASING:
// What AA level do you want for curvature/overscan/subpixels? Options:
// 0x (none), 1x (sample subpixels), 4x, 5x, 6x, 7x, 8x, 12x, 16x, 20x, 24x
// (Static option only for now)
static const float aa_level = 12.0; // range [0, 24]
// What antialiasing filter do you want (static option only)? Options:
// 0: Box (separable), 1: Box (cylindrical),
// 2: Tent (separable), 3: Tent (cylindrical),
// 4: Gaussian (separable), 5: Gaussian (cylindrical),
// 6: Cubic* (separable), 7: Cubic* (cylindrical, poor)
// 8: Lanczos Sinc (separable), 9: Lanczos Jinc (cylindrical, poor)
// * = Especially slow with RUNTIME_ANTIALIAS_WEIGHTS
static const float aa_filter = 6.0; // range [0, 9]
// Flip the sample grid on odd/even frames (static option only for now)?
static const bool aa_temporal = false;
// Use RGB subpixel offsets for antialiasing? The pixel is at green, and
// the blue offset is the negative r offset; range [0, 0.5]
static const float2 aa_subpixel_r_offset_static = float2(-1.0/3.0, 0.0);//float2(0.0);
// Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell
// 1.) "Keys cubics" with B = 1 - 2C are considered the highest quality.
// 2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening.
// 3.) C = 1.0/3.0 is the Mitchell-Netravali filter.
// 4.) C = 0.0 is a soft spline filter.
static const float aa_cubic_c_static = 0.5; // range [0, 4]
// Standard deviation for Gaussian antialiasing: Try 0.5/aa_pixel_diameter.
static const float aa_gauss_sigma_static = 0.5; // range [0.0625, 1.0]
// PHOSPHOR MASK:
// Mask type: 0 = aperture grille, 1 = slot mask, 2 = EDP shadow mask
static const float mask_type_static = 1.0; // range [0, 2]
// We can sample the mask three ways. Pick 2/3 from: Pretty/Fast/Flexible.
// 0.) Sinc-resize to the desired dot pitch manually (pretty/slow/flexible).
// This requires PHOSPHOR_MASK_MANUALLY_RESIZE to be #defined.
// 1.) Hardware-resize to the desired dot pitch (ugly/fast/flexible). This
// is halfway decent with LUT mipmapping but atrocious without it.
// 2.) Tile it without resizing at a 1:1 texel:pixel ratio for flat coords
// (pretty/fast/inflexible). Each input LUT has a fixed dot pitch.
// This mode reuses the same masks, so triads will be enormous unless
// you change the mask LUT filenames in your .cgp file.
static const float mask_sample_mode_static = 0.0; // range [0, 2]
// Prefer setting the triad size (0.0) or number on the screen (1.0)?
// If RUNTIME_PHOSPHOR_BLOOM_SIGMA isn't #defined, the specified triad size
// will always be used to calculate the full bloom sigma statically.
static const float mask_specify_num_triads_static = 0.0; // range [0, 1]
// Specify the phosphor triad size, in pixels. Each tile (usually with 8
// triads) will be rounded to the nearest integer tile size and clamped to
// obey minimum size constraints (imposed to reduce downsize taps) and
// maximum size constraints (imposed to have a sane MASK_RESIZE FBO size).
// To increase the size limit, double the viewport-relative scales for the
// two MASK_RESIZE passes in crt-royale.cgp and user-cgp-contants.h.
// range [1, mask_texture_small_size/mask_triads_per_tile]
static const float mask_triad_size_desired_static = 24.0 / 8.0;
// If mask_specify_num_triads is 1.0/true, we'll go by this instead (the
// final size will be rounded and constrained as above); default 480.0
static const float mask_num_triads_desired_static = 480.0;
// How many lobes should the sinc/Lanczos resizer use? More lobes require
// more samples and avoid moire a bit better, but some is unavoidable
// depending on the destination size (static option for now).
static const float mask_sinc_lobes = 3.0; // range [2, 4]
// The mask is resized using a variable number of taps in each dimension,
// but some Cg profiles always fetch a constant number of taps no matter
// what (no dynamic branching). We can limit the maximum number of taps if
// we statically limit the minimum phosphor triad size. Larger values are
// faster, but the limit IS enforced (static option only, forever);
// range [1, mask_texture_small_size/mask_triads_per_tile]
// TODO: Make this 1.0 and compensate with smarter sampling!
static const float mask_min_allowed_triad_size = 2.0;
// GEOMETRY:
// Geometry mode:
// 0: Off (default), 1: Spherical mapping (like cgwg's),
// 2: Alt. spherical mapping (more bulbous), 3: Cylindrical/Trinitron
static const float geom_mode_static = 0.0; // range [0, 3]
// Radius of curvature: Measured in units of your viewport's diagonal size.
static const float geom_radius_static = 2.0; // range [1/(2*pi), 1024]
// View dist is the distance from the player to their physical screen, in
// units of the viewport's diagonal size. It controls the field of view.
static const float geom_view_dist_static = 2.0; // range [0.5, 1024]
// Tilt angle in radians (clockwise around up and right vectors):
static const float2 geom_tilt_angle_static = float2(0.0, 0.0); // range [-pi, pi]
// Aspect ratio: When the true viewport size is unknown, this value is used
// to help convert between the phosphor triad size and count, along with
// the mask_resize_viewport_scale constant from user-cgp-constants.h. Set
// this equal to Retroarch's display aspect ratio (DAR) for best results;
// range [1, geom_max_aspect_ratio from user-cgp-constants.h];
// default (256/224)*(54/47) = 1.313069909 (see below)
static const float geom_aspect_ratio_static = 1.313069909;
// Before getting into overscan, here's some general aspect ratio info:
// - DAR = display aspect ratio = SAR * PAR; as in your Retroarch setting
// - SAR = storage aspect ratio = DAR / PAR; square pixel emulator frame AR
// - PAR = pixel aspect ratio = DAR / SAR; holds regardless of cropping
// Geometry processing has to "undo" the screen-space 2D DAR to calculate
// 3D view vectors, then reapplies the aspect ratio to the simulated CRT in
// uv-space. To ensure the source SAR is intended for a ~4:3 DAR, either:
// a.) Enable Retroarch's "Crop Overscan"
// b.) Readd horizontal padding: Set overscan to e.g. N*(1.0, 240.0/224.0)
// Real consoles use horizontal black padding in the signal, but emulators
// often crop this without cropping the vertical padding; a 256x224 [S]NES
// frame (8:7 SAR) is intended for a ~4:3 DAR, but a 256x240 frame is not.
// The correct [S]NES PAR is 54:47, found by blargg and NewRisingSun:
// http://board.zsnes.com/phpBB3/viewtopic.php?f=22&t=11928&start=50
// http://forums.nesdev.com/viewtopic.php?p=24815#p24815
// For flat output, it's okay to set DAR = [existing] SAR * [correct] PAR
// without doing a. or b., but horizontal image borders will be tighter
// than vertical ones, messing up curvature and overscan. Fixing the
// padding first corrects this.
// Overscan: Amount to "zoom in" before cropping. You can zoom uniformly
// or adjust x/y independently to e.g. readd horizontal padding, as noted
// above: Values < 1.0 zoom out; range (0, inf)
static const float2 geom_overscan_static = float2(1.0, 1.0);// * 1.005 * (1.0, 240/224.0)
// Compute a proper pixel-space to texture-space matrix even without ddx()/
// ddy()? This is ~8.5% slower but improves antialiasing/subpixel filtering
// with strong curvature (static option only for now).
static const bool geom_force_correct_tangent_matrix = true;
// BORDERS:
// Rounded border size in texture uv coords:
static const float border_size_static = 0.015; // range [0, 0.5]
// Border darkness: Moderate values darken the border smoothly, and high
// values make the image very dark just inside the border:
static const float border_darkness_static = 2.0; // range [0, inf)
// Border compression: High numbers compress border transitions, narrowing
// the dark border area.
static const float border_compress_static = 2.5; // range [1, inf)
#endif // USER_SETTINGS_H

View File

@@ -1,97 +0,0 @@
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
// #included by vertex shader:
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p4
{
float2 blur_dxdy : TEXCOORD1;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Blur9Fast_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p4 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
/* float2 texture_size = 1.0/NormalizedNativePixelSize;
float2 output_size = (ViewportSize*BufferToViewportRatio);
float2 video_size = 1.0/NormalizedNativePixelSize;
*/
// float2 texture_size = float2(320.0, 240.0);
float2 texture_size = HALATION_BLUR_texture_size;
float2 output_size = VIEWPORT_SIZE;
// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
// float2 output_size = float2(320.0, 240.0);
// float2 output_size = 1.0/NormalizedNativePixelSize;
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is horizontal-only, so zero out the vertical offset:
OUT.blur_dxdy = float2(dxdy.x, 0.0);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Blur9Fast_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p4 VAR) : SV_Target
{
float3 color = tex2Dblur9fast(BLUR9FAST_VERTICAL, vTexCoord, VAR.blur_dxdy);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@@ -1,95 +0,0 @@
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p3
{
float2 blur_dxdy : TEXCOORD1;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Blur9Fast_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p3 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
/*
float2 texture_size = 1.0/NormalizedNativePixelSize;
float2 output_size = (ViewportSize*BufferToViewportRatio);
float2 video_size = 1.0/NormalizedNativePixelSize;
*/
// float2 texture_size = float2(320.0, 240.0);
float2 texture_size = BLUR9FAST_VERTICAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
// float2 output_size = VIEWPORT_SIZE/4.0;
// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
// float2 output_size = 1.0/NormalizedNativePixelSize;
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is vertical-only, so zero out the horizontal offset:
OUT.blur_dxdy = float2(0.0, dxdy.y);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Blur9Fast_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p3 VAR) : SV_Target
{
float3 color = tex2Dblur9fast(BLOOM_APPROX, vTexCoord, VAR.blur_dxdy);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@@ -1,363 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#define ORIG_LINEARIZEDvideo_size VERTICAL_SCANLINES_texture_size
#define ORIG_LINEARIZEDtexture_size VERTICAL_SCANLINES_video_size
#define bloom_approx_scale_x (4.0/3.0)
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
#include "../include/scanline-functions.fxh"
#include "../include/bloom-functions.fxh"
/////////////////////////////////// HELPERS //////////////////////////////////
float3 tex2Dresize_gaussian4x4(const sampler2D tex, const float2 tex_uv,
const float2 dxdy, const float2 texture_size, const float2 texture_size_inv,
const float2 tex_uv_to_pixel_scale, const float sigma)
{
// Requires: 1.) All requirements of gamma-management.h must be satisfied!
// 2.) filter_linearN must == "true" in your .cgp preset.
// 3.) mipmap_inputN must == "true" in your .cgp preset if
// IN.output_size << SRC.video_size.
// 4.) dxdy should contain the uv pixel spacing:
// dxdy = max(float2(1.0),
// SRC.video_size/IN.output_size)/SRC.texture_size;
// 5.) texture_size == SRC.texture_size
// 6.) texture_size_inv == float2(1.0)/SRC.texture_size
// 7.) tex_uv_to_pixel_scale == IN.output_size *
// SRC.texture_size / SRC.video_size;
// 8.) sigma is the desired Gaussian standard deviation, in
// terms of output pixels. It should be < ~0.66171875 to
// ensure the first unused sample (outside the 4x4 box) has
// a weight < 1.0/256.0.
// Returns: A true 4x4 Gaussian resize of the input.
// Description:
// Given correct inputs, this Gaussian resizer samples 4 pixel locations
// along each downsized dimension and/or 4 texel locations along each
// upsized dimension. It computes dynamic weights based on the pixel-space
// distance of each sample from the destination pixel. It is arbitrarily
// resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
// TODO: Move this to a more suitable file once there are others like it.
const float denom_inv = 0.5/(sigma*sigma);
// We're taking 4x4 samples, and we're snapping to texels for upsizing.
// Find texture coords for sample 5 (second row, second column):
const float2 curr_texel = tex_uv * texture_size;
const float2 prev_texel =
floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 prev_texel_uv = prev_texel * texture_size_inv;
const float2 snap = float2(dxdy <= texture_size_inv);
const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap);
// Compute texture coords for other samples:
const float2 dx = float2(dxdy.x, 0.0);
const float2 sample0_uv = sample5_uv - dxdy;
const float2 sample10_uv = sample5_uv + dxdy;
const float2 sample15_uv = sample5_uv + 2.0 * dxdy;
const float2 sample1_uv = sample0_uv + dx;
const float2 sample2_uv = sample0_uv + 2.0 * dx;
const float2 sample3_uv = sample0_uv + 3.0 * dx;
const float2 sample4_uv = sample5_uv - dx;
const float2 sample6_uv = sample5_uv + dx;
const float2 sample7_uv = sample5_uv + 2.0 * dx;
const float2 sample8_uv = sample10_uv - 2.0 * dx;
const float2 sample9_uv = sample10_uv - dx;
const float2 sample11_uv = sample10_uv + dx;
const float2 sample12_uv = sample15_uv - 3.0 * dx;
const float2 sample13_uv = sample15_uv - 2.0 * dx;
const float2 sample14_uv = sample15_uv - dx;
// Load each sample:
const float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
const float3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
const float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
const float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
const float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
const float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
const float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
const float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
const float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
const float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
const float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
const float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
const float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
// Compute destination pixel offsets for each sample:
const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
// Compute Gaussian sample weights:
const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
const float weight_sum_inv = 1.0/(
w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
// Weight and sum the samples:
const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
return sum * weight_sum_inv;
}
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p2
{
float2 tex_uv : TEXCOORD1;
float2 blur_dxdy : TEXCOORD2;
float2 uv_scanline_step : TEXCOORD3;
float estimated_viewport_size_x : TEXCOORD4;
float2 texture_size_inv : TEXCOORD5;
float2 tex_uv_to_pixel_scale : TEXCOORD6;
float2 output_size : TEXCOORD7;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Approx(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p2 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_APPROX_texture_size;
float2 output_size = VIEWPORT_SIZE;
OUT.output_size = output_size;
// This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
// except we're using a different source image.
const float2 video_uv = texcoord * texture_size/video_size;
OUT.tex_uv = video_uv * ORIG_LINEARIZEDvideo_size /
ORIG_LINEARIZEDtexture_size;
// The last pass (vertical scanlines) had a viewport y scale, so we can
// use it to calculate a better runtime sigma:
// OUT.estimated_viewport_size_x = video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y;
OUT.estimated_viewport_size_x = video_size.y * texture_size.x/texture_size.y;
// Get the uv sample distance between output pixels. We're using a resize
// blur, so arbitrary upsizing will be acceptable if filter_linearN =
// "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
// "true" too. The blur will be much more accurate if a true 4x4 Gaussian
// resize is used instead of tex2Dblur3x3_resize (which samples between
// texels even for upsizing).
const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/output_size;
const float2 texture_size_inv = 1.0.xx/ORIG_LINEARIZEDtexture_size;
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// For upsizing, we'll snap to texels and sample the nearest 4.
const float2 dxdy_scale = max(dxdy_min_scale, 1.0.xx);
OUT.blur_dxdy = dxdy_scale * texture_size_inv;
}
else
{
const float2 dxdy_scale = dxdy_min_scale;
OUT.blur_dxdy = dxdy_scale * texture_size_inv;
}
// tex2Dresize_gaussian4x4 needs to know a bit more than the other filters:
OUT.tex_uv_to_pixel_scale = output_size *
ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size;
OUT.texture_size_inv = texture_size_inv;
// Detecting interlacing again here lets us apply convergence offsets in
// this pass. il_step_multiple contains the (texel, scanline) step
// multiple: 1 for progressive, 2 for interlaced.
const float2 orig_video_size = ORIG_LINEARIZEDvideo_size;
const float y_step = 1.0 + float(is_interlaced(orig_video_size.y));
const float2 il_step_multiple = float2(1.0, y_step);
// Get the uv distance between (texels, same-field scanlines):
OUT.uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Approx(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p2 VAR) : SV_Target
{
// Would a viewport-relative size work better for this pass? (No.)
// PROS:
// 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
// write a viewport scale. That number could be used to directly scale
// the viewport-resolution bloom sigma and/or triad size to a smaller
// scale. This way, we could calculate an optimal dynamic sigma no
// matter how the dot pitch is specified.
// CONS:
// 1.) Texel smearing would be much worse at small viewport sizes, but
// performance would be much worse at large viewport sizes, so there
// would be no easy way to calculate a decent scale.
// 2.) Worse, we could no longer get away with using a constant-size blur!
// Instead, we'd have to face all the same difficulties as the real
// phosphor bloom, which requires static #ifdefs to decide the blur
// size based on the expected triad size...a dynamic value.
// 3.) Like the phosphor bloom, we'd have less control over making the blur
// size correct for an optical blur. That said, we likely overblur (to
// maintain brightness) more than the eye would do by itself: 20/20
// human vision distinguishes ~1 arc minute, or 1/60 of a degree. The
// highest viewing angle recommendation I know of is THX's 40.04 degree
// recommendation, at which 20/20 vision can distinguish about 2402.4
// lines. Assuming the "TV lines" definition, that means 1201.2
// distinct light lines and 1201.2 distinct dark lines can be told
// apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2
// pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
// (if they're alternately lit). That's a max of 800.8 triads. Using
// a more popular 30 degree viewing angle recommendation, 20/20 vision
// can distinguish 1800 lines, or 600 triads of alternately lit
// phosphors. In contrast, we currently blur phosphors all the way
// down to 341.3 triads to ensure full brightness.
// 4.) Realistically speaking, we're usually just going to use bilinear
// filtering in this pass anyway, but it only works well to limit
// bandwidth if it's done at a small constant scale.
// Get the constants we need to sample:
float2 output_size = VAR.output_size;
//const sampler2D Source = ORIG_LINEARIZED;
const float2 tex_uv = VAR.tex_uv;
const float2 blur_dxdy = VAR.blur_dxdy;
const float2 texture_size = ORIG_LINEARIZEDtexture_size;
const float2 texture_size_inv = VAR.texture_size_inv;
const float2 tex_uv_to_pixel_scale = VAR.tex_uv_to_pixel_scale;
float2 tex_uv_r, tex_uv_g, tex_uv_b;
if(beam_misconvergence)
{
const float2 uv_scanline_step = VAR.uv_scanline_step;
const float2 convergence_offsets_r = get_convergence_offsets_r_vector();
const float2 convergence_offsets_g = get_convergence_offsets_g_vector();
const float2 convergence_offsets_b = get_convergence_offsets_b_vector();
tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step;
tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step;
tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step;
}
// Get the blur sigma:
const float bloom_approx_sigma = get_bloom_approx_sigma(output_size.x,
VAR.estimated_viewport_size_x);
// Sample the resized and blurred texture, and apply convergence offsets if
// necessary. Applying convergence offsets here triples our samples from
// 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
// HALATION_BLUR 3 times at full resolution every time they're used.
float3 color_r, color_g, color_b, color;
if(bloom_approx_filter > 1.5)
{
// Use a 4x4 Gaussian resize. This is slower but technically correct.
if(beam_misconvergence)
{
color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
else
{
color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
}
else if(bloom_approx_filter > 0.5)
{
// Use a 3x3 resize blur. This is the softest option, because we're
// blurring already blurry bilinear samples. It doesn't play quite as
// nicely with convergence offsets, but it has its charms.
if(beam_misconvergence)
{
color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, bloom_approx_sigma);
color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, bloom_approx_sigma);
color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, bloom_approx_sigma);
}
else
{
color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
}
}
else
{
// Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH
// better than tex2Dblur3x3_resize for the very small sigmas we're
// likely to use at small output resolutions. (This estimate becomes
// too sharp above ~400x300, but the blurs break down above that
// resolution too, unless min_allowed_viewport_triads is high enough to
// keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
if(beam_misconvergence)
{
color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
}
else
{
color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
}
}
// Pack the colors from the red/green/blue beams into a single vector:
if(beam_misconvergence)
{
color = float3(color_r.r, color_g.g, color_b.b);
}
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@@ -1,129 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/scanline-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p10
{
float2 video_uv : TEXCOORD1;
float2 bloom_dxdy : TEXCOORD2;
float bloom_sigma_runtime : TEXCOORD3;
float2 sinangle : TEXCOORD4;
float2 cosangle : TEXCOORD5;
float3 stretch : TEXCOORD6;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p10 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_HORIZONTAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Screen centering
texcoord = texcoord - float2(centerx,centery)/100.0;
float2 tex_uv = texcoord;
// Our various input textures use different coords:
const float2 video_uv = tex_uv * texture_size/video_size;
OUT.video_uv = video_uv;
// We're horizontally blurring the bloom input (vertically blurred
// brightpass). Get the uv distance between output pixels / input texels
// in the horizontal direction (this pass must NOT resize):
OUT.bloom_dxdy = float2(1.0/texture_size.x, 0.0);
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
OUT.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
OUT.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
OUT.stretch = maxscale(OUT.sinangle, OUT.cosangle);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p10 VAR) : SV_Target
{
VAR.video_uv = (geom_curvature == true) ? transform(VAR.video_uv, VAR.sinangle, VAR.cosangle, VAR.stretch) : VAR.video_uv;
float cval = corner((VAR.video_uv-0.5.xx) * BufferToViewportRatio + 0.5.xx);
// Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float3 blurred_brightpass = tex2DblurNfast(BLOOM_VERTICAL,
VAR.video_uv, VAR.bloom_dxdy, bloom_sigma);
// Sample the masked scanlines. Alpha contains the auto-dim factor:
const float3 intensity_dim =
tex2D_linearize(MASKED_SCANLINES, VAR.video_uv).rgb;
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
// Calculate the mask dimpass, add it to the blurred brightpass, and
// undim (from scanline auto-dim) and amplify (from mask dim) the result:
const float mask_amplify = get_mask_amplify();
const float3 brightpass = tex2D_linearize(BRIGHTPASS,
VAR.video_uv).rgb;
const float3 dimpass = intensity_dim - brightpass;
const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
mask_amplify * undim_factor * levels_contrast;
// Sample the halation texture, and let some light bleed into refractive
// diffusion. Conceptually this occurs before the phosphor bloom, but
// adding it in earlier passes causes black crush in the diffusion colors.
const float3 diffusion_color = levels_contrast * tex2D_linearize(
HALATION_BLUR, VAR.video_uv).rgb;
float3 final_bloom = lerp(phosphor_bloom,
diffusion_color, diffusion_weight);
final_bloom = (geom_curvature == true) ? final_bloom * cval.xxx : final_bloom;
final_bloom = pow(final_bloom.rgb, 1.0/get_output_gamma());
// Encode and output the bloomed image:
return encode_output(float4(final_bloom, 1.0));
}

View File

@@ -1,83 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p9
{
float2 tex_uv : TEXCOORD1;
float2 bloom_dxdy : TEXCOORD2;
float bloom_sigma_runtime : TEXCOORD3;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p9 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_VERTICAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
OUT.tex_uv = texcoord;
// Get the uv sample distance between output pixels. Calculate dxdy like
// blurs/vertex-shader-blur-fast-vertical.h.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is vertical-only, so zero out the vertical offset:
OUT.bloom_dxdy = float2(0.0, dxdy.y);
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p9 VAR) : SV_Target
{
// Blur the brightpass horizontally with a 9/17/25/43x blur:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float3 color = tex2DblurNfast(BRIGHTPASS, VAR.tex_uv,
VAR.bloom_dxdy, bloom_sigma);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@@ -1,130 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/scanline-functions.fxh"
#include "../include/bloom-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p8
{
float2 video_uv : TEXCOORD1;
float2 scanline_tex_uv : TEXCOORD2;
float2 blur3x3_tex_uv : TEXCOORD3;
float bloom_sigma_runtime : TEXCOORD4;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Brightpass(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p8 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = BRIGHTPASS_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Our various input textures use different coords:
const float2 video_uv = tex_uv * texture_size/video_size;
OUT.video_uv = video_uv;
OUT.scanline_tex_uv = video_uv * MASKED_SCANLINES_video_size /
MASKED_SCANLINES_texture_size;
OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / BLOOM_APPROX_texture_size;
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Brightpass(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p8 VAR) : SV_Target
{
// Sample the masked scanlines:
const float3 intensity_dim =
tex2D_linearize(MASKED_SCANLINES, VAR.scanline_tex_uv).rgb;
// Get the full intensity, including auto-undimming, and mask compensation:
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
const float mask_amplify = get_mask_amplify();
const float3 intensity = intensity_dim * undim_factor * mask_amplify *
levels_contrast;
// Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
// would look like, so we can estimate how much energy we'll receive from
// blooming neighbors:
const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(
BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
// Compute the blur weight for the center texel and the maximum energy we
// expect to receive from neighbors:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(0.0.xxx, phosphor_blur_approx - center_weight * intensity);
// Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
// because it actually gets better results (on top of being very simple),
// but adjust all intensities for the user's desired underestimate factor:
const float3 area_contrib_underestimate =
bloom_underestimate_levels * max_area_contribution_approx;
const float3 intensity_underestimate =
bloom_underestimate_levels * intensity;
// Calculate the blur_ratio, the ratio of intensity we want to blur:
#ifdef BRIGHTPASS_AREA_BASED
// This area-based version changes blur_ratio more smoothly and blurs
// more, clipping less but offering less phosphor differentiation:
const float3 phosphor_blur_underestimate = bloom_underestimate_levels *
phosphor_blur_approx;
const float3 soft_intensity = max(intensity_underestimate,
phosphor_blur_underestimate * mask_amplify);
const float3 blur_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
soft_intensity - 1.0.xxx) / (center_weight - 1.0);
#else
const float3 blur_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
intensity_underestimate - 1.0.xxx) / (center_weight - 1.0);
#endif
const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
// Calculate the brightpass based on the auto-dimmed, unamplified, masked
// scanlines, encode if necessary, and return!
const float3 brightpass = intensity_dim *
lerp(blur_ratio, 1.0.xxx, bloom_excess);
return encode_output(float4(brightpass, 1.0));
}

View File

@@ -1,109 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
#define FIRST_PASS
#define SIMULATE_CRT_ON_LCD
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/gamma-management.fxh"
#include "../include/scanline-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex
{
float2 tex_uv : TEXCOORD1;
float2 uv_step : TEXCOORD2;
float interlaced : TEXCOORD3;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Linearize(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
OUT.tex_uv = texcoord;
// OUT.tex_uv = (floor(texcoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
// Save the uv distance between texels:
OUT.uv_step = NormalizedNativePixelSize;
// Detect interlacing: 1.0 = true, 0.0 = false.
OUT.interlaced = is_interlaced(1.0/NormalizedNativePixelSize.y);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#define input_texture sBackBuffer
float4 PS_Linearize(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex VAR) : SV_Target
{
// Linearize the input based on CRT gamma and bob interlaced fields.
// Bobbing ensures we can immediately blur without getting artifacts.
// Note: TFF/BFF won't matter for sources that double-weave or similar.
// VAR.tex_uv = (floor(VAR.tex_uv / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
if(interlace_detect)
{
// Sample the current line and an average of the previous/next line;
// tex2D_linearize will decode CRT gamma. Don't bother branching:
const float2 tex_uv = VAR.tex_uv;
const float2 v_step = float2(0.0, VAR.uv_step.y);
const float3 curr_line = tex2D_linearize_first(
input_texture, tex_uv).rgb;
const float3 last_line = tex2D_linearize_first(
input_texture, tex_uv - v_step).rgb;
const float3 next_line = tex2D_linearize_first(
input_texture, tex_uv + v_step).rgb;
const float3 interpolated_line = 0.5 * (last_line + next_line);
// If we're interlacing, determine which field curr_line is in:
const float modulus = VAR.interlaced + 1.0;
const float field_offset =
fmod(FrameCount + float(interlace_bff), modulus);
const float curr_line_texel = tex_uv.y / NormalizedNativePixelSize.y;
// Use under_half to fix a rounding bug around exact texel locations.
const float line_num_last = floor(curr_line_texel - under_half);
const float wrong_field = fmod(line_num_last + field_offset, modulus);
// Select the correct color, and output the result:
const float3 color = lerp(curr_line, interpolated_line, wrong_field);
return encode_output(float4(color, 1.0));
}
else
{
return encode_output(tex2D_linearize_first(input_texture, VAR.tex_uv));
}
}

View File

@@ -1,130 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p6
{
float2 src_tex_uv_wrap : TEXCOORD1;
float2 tile_uv_wrap : TEXCOORD2;
float2 resize_magnification_scale : TEXCOORD3;
float2 src_dxdy : TEXCOORD4;
float2 tile_size_uv : TEXCOORD5;
float2 input_tiles_per_texture : TEXCOORD6;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Mask_Resize_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p6 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASK_RESIZE_texture_size;
float2 output_size = 0.0625*(VIEWPORT_SIZE);
// First estimate the viewport size (the user will get the wrong number of
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
const float2 estimated_viewport_size =
output_size / mask_resize_viewport_scale;
// Find the final size of our resized phosphor mask tiles. We probably
// estimated the viewport size and MASK_RESIZE output size differently last
// pass, so do not swear they were the same. ;)
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
estimated_viewport_size, output_size, false);
// We'll render resized tiles until filling the output FBO or meeting a
// limit, so compute [wrapped] tile uv coords based on the output uv coords
// and the number of tiles that will fit in the FBO.
const float2 output_tiles_this_pass = output_size / mask_resize_tile_size;
const float2 output_video_uv = tex_uv * texture_size / video_size;
const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
// Get the texel size of an input tile and related values:
const float2 input_tile_size = float2(min(
mask_resize_src_lut_size.x, video_size.x), mask_resize_tile_size.y);
const float2 tile_size_uv = input_tile_size / texture_size;
const float2 input_tiles_per_texture = texture_size / input_tile_size;
// Derive [wrapped] texture uv coords from [wrapped] tile uv coords and
// the tile size in uv coords, and save frac() for the fragment shader.
const float2 src_tex_uv_wrap = tile_uv_wrap * tile_size_uv;
// Output the values we need, including the magnification scale and step:
OUT.tile_uv_wrap = tile_uv_wrap;
OUT.src_tex_uv_wrap = src_tex_uv_wrap;
OUT.resize_magnification_scale = mask_resize_tile_size / input_tile_size;
OUT.src_dxdy = float2(1.0/texture_size.x, 0.0);
OUT.tile_size_uv = tile_size_uv;
OUT.input_tiles_per_texture = input_tiles_per_texture;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Mask_Resize_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p6 VAR) : SV_Target
{
// The input contains one mask tile horizontally and a number vertically.
// Resize the tile horizontally to its final screen size and repeat it
// until drawing at least mask_resize_num_tiles, leaving it unchanged
// vertically. Lanczos-resizing the phosphor mask achieves much sharper
// results than mipmapping, outputting >= mask_resize_num_tiles makes for
// easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
float2 texture_size = MASK_RESIZE_texture_size;
const float2 tile_uv_wrap = VAR.tile_uv_wrap;
if(get_mask_sample_mode() < 0.5 &&
max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
{
const float src_dx = VAR.src_dxdy.x;
const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
const float3 pixel_color = downsample_horizontal_sinc_tiled(MASK_RESIZE_VERTICAL,
src_tex_uv, texture_size, VAR.src_dxdy.x,
VAR.resize_magnification_scale.x, VAR.tile_size_uv.x);
// The input LUT was linear RGB, and so is our output:
return float4(pixel_color, 1.0);
}
else
{
discard;
}
#else
discard;
return 1.0.xxxx;
#endif
}

View File

@@ -1,164 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p5
{
float2 src_tex_uv_wrap : TEXCOORD1;
float2 resize_magnification_scale : TEXCOORD2;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Mask_Resize_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p5 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASK_RESIZE_VERT_texture_size;
float2 output_size = float2(64.0, 0.0625*((VIEWPORT_SIZE).y));
// First estimate the viewport size (the user will get the wrong number of
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
const float viewport_y = output_size.y / mask_resize_viewport_scale.y;
// Now get aspect_ratio from texture_size.
// const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
const float aspect_ratio = texture_size.x / texture_size.y;
const float2 estimated_viewport_size =
float2(viewport_y * aspect_ratio, viewport_y);
// Estimate the output size of MASK_RESIZE (the next pass). The estimated
// x component shouldn't matter, because we're not using the x result, and
// we're not swearing it's correct (if we did, the x result would influence
// the y result to maintain the tile aspect ratio).
const float2 estimated_mask_resize_output_size =
float2(output_size.y * aspect_ratio, output_size.y);
// Find the final intended [y] size of our resized phosphor mask tiles,
// then the tile size for the current pass (resize y only):
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
estimated_viewport_size, estimated_mask_resize_output_size, false);
const float2 pass_output_tile_size = float2(min(
mask_resize_src_lut_size.x, output_size.x), mask_resize_tile_size.y);
// We'll render resized tiles until filling the output FBO or meeting a
// limit, so compute [wrapped] tile uv coords based on the output uv coords
// and the number of tiles that will fit in the FBO.
const float2 output_tiles_this_pass = output_size / pass_output_tile_size;
const float2 output_video_uv = tex_uv * texture_size / video_size;
const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
// The input LUT is just a single mask tile, so texture uv coords are the
// same as tile uv coords (save frac() for the fragment shader). The
// magnification scale is also straightforward:
OUT.src_tex_uv_wrap = tile_uv_wrap;
OUT.resize_magnification_scale =
pass_output_tile_size / mask_resize_src_lut_size;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Mask_Resize_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p5 VAR) : SV_Target
{
// Resize the input phosphor mask tile to the final vertical size it will
// appear on screen. Keep 1x horizontal size if possible (IN.output_size
// >= mask_resize_src_lut_size), and otherwise linearly sample horizontally
// to fit exactly one tile. Lanczos-resizing the phosphor mask achieves
// much sharper results than mipmapping, and vertically resizing first
// minimizes the total number of taps required. We output a number of
// resized tiles >= mask_resize_num_tiles for easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
const float2 tile_uv_wrap = VAR.src_tex_uv_wrap;
if(get_mask_sample_mode() < 0.5 &&
tile_uv_wrap.y <= mask_resize_num_tiles)
{
static const float src_dy = 1.0/mask_resize_src_lut_size.y;
const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
float3 pixel_color;
// If mask_type is static, this branch will be resolved statically.
#ifdef PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
if(mask_type < 0.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_grille_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else if(mask_type < 1.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_slot_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else
{
pixel_color = downsample_vertical_sinc_tiled(
mask_shadow_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
#else
if(mask_type < 0.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else if(mask_type < 1.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else
{
pixel_color = downsample_vertical_sinc_tiled(
mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
#endif
// The input LUT was linear RGB, and so is our output:
return float4(pixel_color, 1.0);
}
else
{
discard;
}
#else
discard;
return 1.0.xxxx;
#endif
}

View File

@@ -1,283 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/scanline-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/gamma-management.fxh"
/////////////////////////////////// HELPERS //////////////////////////////////
float4 tex2Dtiled_mask_linearize(const sampler2D tex,
const float2 tex_uv)
{
// If we're manually tiling a texture, anisotropic filtering can get
// confused. One workaround is to just select the lowest mip level:
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// TODO: Use tex2Dlod_linearize with a calculated mip level.
return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0));
#else
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0));
#else
return tex2D_linearize(tex, tex_uv);
#endif
#endif
#else
return tex2D_linearize(tex, tex_uv);
#endif
}
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p7
{
// Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
float2 video_uv : TEXCOORD1;
float2 scanline_tex_uv : TEXCOORD2;
float2 blur3x3_tex_uv : TEXCOORD3;
float2 halation_tex_uv : TEXCOORD4;
float2 scanline_texture_size_inv : TEXCOORD5;
float4 mask_tile_start_uv_and_size : TEXCOORD6;
float2 mask_tiles_per_screen : TEXCOORD7;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Scanlines_Horizontal_Apply_Mask(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p7 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASKED_SCANLINES_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Our various input textures use different coords.
const float2 video_uv = tex_uv * texture_size/video_size;
const float2 scanline_texture_size_inv =
1.0.xx/VERTICAL_SCANLINES_texture_size;
OUT.video_uv = video_uv;
OUT.scanline_tex_uv = video_uv * VERTICAL_SCANLINES_video_size *
scanline_texture_size_inv;
OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size /
BLOOM_APPROX_texture_size;
OUT.halation_tex_uv = video_uv * HALATION_BLUR_video_size /
HALATION_BLUR_texture_size;
OUT.scanline_texture_size_inv = scanline_texture_size_inv;
// Get a consistent name for the final mask texture size. Sample mode 0
// uses the manually resized mask, but ignore it if we never resized.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
const float mask_sample_mode = get_mask_sample_mode();
const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ?
MASKED_SCANLINES_texture_size : mask_texture_large_size;
const float2 mask_resize_video_size = mask_sample_mode < 0.5 ?
MASKED_SCANLINES_video_size : mask_texture_large_size;
#else
const float2 mask_resize_texture_size = mask_texture_large_size;
const float2 mask_resize_video_size = mask_texture_large_size;
#endif
// Compute mask tile dimensions, starting points, etc.:
float2 mask_tiles_per_screen;
OUT.mask_tile_start_uv_and_size = get_mask_sampling_parameters(
mask_resize_texture_size, mask_resize_video_size, output_size,
mask_tiles_per_screen);
OUT.mask_tiles_per_screen = mask_tiles_per_screen;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Scanlines_Horizontal_Apply_Mask(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p7 VAR) : SV_Target
{
// This pass: Sample (misconverged?) scanlines to the final horizontal
// resolution, apply halation (bouncing electrons), and apply the phosphor
// mask. Fake a bloom if requested. Unless we fake a bloom, the output
// will be dim from the scanline auto-dim, mask dimming, and low gamma.
// Horizontally sample the current row (a vertically interpolated scanline)
// and account for horizontal convergence offsets, given in units of texels.
// float2 VERTICAL_SCANLINES_texture_size = float2(1.0/NormalizedNativePixelSize.x, ViewportSize.y*BufferToViewportRatio.y);
float2 output_size = VIEWPORT_SIZE;
const float3 scanline_color_dim = sample_rgb_scanline_horizontal(
VERTICAL_SCANLINES, VAR.scanline_tex_uv,
VERTICAL_SCANLINES_texture_size, VAR.scanline_texture_size_inv);
const float auto_dim_factor = levels_autodim_temp;
// Sample the phosphor mask:
const float2 tile_uv_wrap = VAR.video_uv * VAR.mask_tiles_per_screen;
const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(
tile_uv_wrap, VAR.mask_tile_start_uv_and_size);
float3 phosphor_mask_sample;
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
const bool sample_orig_luts = get_mask_sample_mode() > 0.5;
#else
static const bool sample_orig_luts = true;
#endif
if(sample_orig_luts)
{
// If mask_type is static, this branch will be resolved statically.
if(mask_type < 0.5)
{
phosphor_mask_sample = tex2D_linearize(
mask_grille_texture_large, mask_tex_uv).rgb;
}
else if(mask_type < 1.5)
{
phosphor_mask_sample = tex2D_linearize(
mask_slot_texture_large, mask_tex_uv).rgb;
}
else
{
phosphor_mask_sample = tex2D_linearize(
mask_shadow_texture_large, mask_tex_uv).rgb;
}
}
else
{
// Sample the resized mask, and avoid tiling artifacts:
phosphor_mask_sample = tex2Dtiled_mask_linearize(
MASK_RESIZE, mask_tex_uv).rgb;
}
// Sample the halation texture (auto-dim to match the scanlines), and
// account for both horizontal and vertical convergence offsets, given
// in units of texels horizontally and same-field scanlines vertically:
const float3 halation_color = tex2D_linearize(
HALATION_BLUR, VAR.halation_tex_uv).rgb;
// Apply halation: Halation models electrons flying around under the glass
// and hitting the wrong phosphors (of any color). It desaturates, so
// average the halation electrons to a scalar. Reduce the local scanline
// intensity accordingly to conserve energy.
const float3 halation_intensity_dim =
dot(halation_color, auto_dim_factor.xxx/3.0).xxx;
const float3 electron_intensity_dim = lerp(scanline_color_dim,
halation_intensity_dim, halation_weight);
// Apply the phosphor mask:
const float3 phosphor_emission_dim = electron_intensity_dim *
phosphor_mask_sample;
#ifdef PHOSPHOR_BLOOM_FAKE
// The BLOOM_APPROX pass approximates a blurred version of a masked
// and scanlined image. It's usually used to compute the brightpass,
// but we can also use it to fake the bloom stage entirely. Caveats:
// 1.) A fake bloom is conceptually different, since we're mixing in a
// fully blurred low-res image, and the biggest implication are:
// 2.) If mask_amplify is incorrect, results deteriorate more quickly.
// 3.) The inaccurate blurring hurts quality in high-contrast areas.
// 4.) The bloom_underestimate_levels parameter seems less sensitive.
// Reverse the auto-dimming and amplify to compensate for mask dimming:
#define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
#ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
static const float blur_contrast = 1.05;
#else
static const float blur_contrast = 1.0;
#endif
const float mask_amplify = get_mask_amplify();
const float undim_factor = 1.0/auto_dim_factor;
const float3 phosphor_emission =
phosphor_emission_dim * undim_factor * mask_amplify;
// Get a phosphor blur estimate, accounting for convergence offsets:
const float3 electron_intensity = electron_intensity_dim * undim_factor;
const float3 phosphor_blur_approx_soft = tex2D_linearize(
BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft,
electron_intensity, 0.1) * blur_contrast;
// We could blend between phosphor_emission and phosphor_blur_approx,
// solving for the minimum blend_ratio that avoids clipping past 1.0:
// 1.0 >= total_intensity
// 1.0 >= phosphor_emission * (1.0 - blend_ratio) +
// phosphor_blur_approx * blend_ratio
// blend_ratio = (phosphor_emission - 1.0)/
// (phosphor_emission - phosphor_blur_approx);
// However, this blurs far more than necessary, because it aims for
// full brightness, not minimal blurring. To fix it, base blend_ratio
// on a max area intensity only so it varies more smoothly:
const float3 phosphor_blur_underestimate =
phosphor_blur_approx * bloom_underestimate_levels;
const float3 area_max_underestimate =
phosphor_blur_underestimate * mask_amplify;
#ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
const float3 blend_ratio_temp =
(area_max_underestimate - 1.0.xxx) /
(area_max_underestimate - phosphor_blur_underestimate);
#else
// Try doing it like an area-based brightpass. This is nearly
// identical, but it's worth toying with the code in case I ever
// find a way to make it look more like a real bloom. (I've had
// some promising textures from combining an area-based blend ratio
// for the phosphor blur and a more brightpass-like blend-ratio for
// the phosphor emission, but I haven't found a way to make the
// brightness correct across the whole color range, especially with
// different bloom_underestimate_levels values.)
const float desired_triad_size = lerp(mask_triad_size_desired,
output_size.x/mask_num_triads_desired,
mask_specify_num_triads);
const float bloom_sigma = get_min_sigma_to_blur_triad(
desired_triad_size, bloom_diff_thresh);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(0.0.xxx, phosphor_blur_approx -
center_weight * phosphor_emission);
const float3 area_contrib_underestimate =
bloom_underestimate_levels * max_area_contribution_approx;
const float3 blend_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
area_max_underestimate - 1.0.xxx) / (center_weight - 1.0);
#endif
// Clamp blend_ratio in case it's out-of-range, but be SUPER careful:
// min/max/clamp are BIZARRELY broken with lerp (optimization bug?),
// and this redundant sequence avoids bugs, at least on nVidia cards:
const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0);
const float3 blend_ratio = lerp(blend_ratio_clamped, 1.0.xxx, bloom_excess);
// Blend the blurred and unblurred images:
const float3 phosphor_emission_unclipped =
lerp(phosphor_emission, phosphor_blur_approx, blend_ratio);
// Simulate refractive diffusion by reusing the halation sample.
const float3 pixel_color = lerp(phosphor_emission_unclipped,
halation_color, diffusion_weight);
#else
const float3 pixel_color = phosphor_emission_dim;
#endif
// Encode if necessary, and output.
return encode_output(float4(pixel_color, 1.0));
}

View File

@@ -1,241 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#undef FIRST_PASS
////////////////////////////////// INCLUDES //////////////////////////////////
//#include "../include/user-settings.fxh"
//#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/scanline-functions.fxh"
//#include "../include/gamma-management.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p1
{
// Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
float2 tex_uv : TEXCOORD1;
float2 uv_step : TEXCOORD2; // uv size of a texel (x) and scanline (y)
float2 il_step_multiple : TEXCOORD3; // (1, 1) = progressive, (1, 2) = interlaced
float pixel_height_in_scanlines : TEXCOORD4; // Height of an output pixel in scanlines
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Scanlines_Vertical_Interlacing(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p1 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
OUT.tex_uv = texcoord;
float2 texture_size = VERTICAL_SCANLINES_texture_size;
float2 output_size = float2(TEXTURE_SIZE.x, VIEWPORT_SIZE.y);
// Detect interlacing: il_step_multiple indicates the step multiple between
// lines: 1 is for progressive sources, and 2 is for interlaced sources.
// const float2 video_size = 1.0/NormalizedNativePixelSize;
const float y_step = 1.0 + float(is_interlaced(video_size.y));
OUT.il_step_multiple = float2(1.0, y_step);
// Get the uv tex coords step between one texel (x) and scanline (y):
OUT.uv_step = OUT.il_step_multiple / texture_size;
// If shader parameters are used, {min, max}_{sigma, shape} are runtime
// values. Compute {sigma, shape}_range outside of scanline_contrib() so
// they aren't computed once per scanline (6 times per fragment and up to
// 18 times per vertex):
/* const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
beam_min_sigma;
const float shape_range = max(beam_max_shape, beam_min_shape) -
beam_min_shape;
*/
// We need the pixel height in scanlines for antialiased/integral sampling:
const float ph = (video_size.y / output_size.y) /
OUT.il_step_multiple.y;
OUT.pixel_height_in_scanlines = ph;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Scanlines_Vertical_Interlacing(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p1 VAR) : SV_Target
{
// This pass: Sample multiple (misconverged?) scanlines to the final
// vertical resolution. Temporarily auto-dim the output to avoid clipping.
// Read some attributes into local variables:
const float2 texture_size = VERTICAL_SCANLINES_texture_size;
const float2 texture_size_inv = 1.0/texture_size;
const float2 uv_step = VAR.uv_step;
const float2 il_step_multiple = VAR.il_step_multiple;
const float frame_count = FrameCount;
const float ph = VAR.pixel_height_in_scanlines;
// Get the uv coords of the previous scanline (in this field), and the
// scanline's distance from this sample, in scanlines.
float dist;
const float2 scanline_uv = get_last_scanline_uv(VAR.tex_uv, texture_size,
texture_size_inv, il_step_multiple, frame_count, dist);
// Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next
// scanlines are numbered 2 and 3. Get scanline colors colors (ignore
// horizontal sampling, since since IN.output_size.x = video_size.x).
// NOTE: Anisotropic filtering creates interlacing artifacts, which is why
// ORIG_LINEARIZED bobbed any interlaced input before this pass.
const float2 v_step = float2(0.0, uv_step.y);
const float3 scanline2_color = tex2D_linearize(ORIG_LINEARIZED, scanline_uv).rgb;
const float3 scanline3_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + v_step).rgb;
float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color,
scanline_outside_color;
float dist_round;
// Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines:
if(beam_num_scanlines > 5.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
scanline0_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - 2.0 * v_step).rgb;
scanline5_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 3.0 * v_step).rgb;
}
// Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines:
else if(beam_num_scanlines > 4.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
// dist is in [0, 1]
dist_round = round(dist);
const float2 sample_0_or_5_uv_off =
lerp(-2.0 * v_step, 3.0 * v_step, dist_round);
// Call this "scanline_outside_color" to cope with the conditional
// scanline number:
scanline_outside_color = tex2D_linearize(
ORIG_LINEARIZED, scanline_uv + sample_0_or_5_uv_off).rgb;
}
// Use scanlines 1 and 4 for a total of 4 scanlines:
else if(beam_num_scanlines > 3.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
}
// Use scanline 1 or 4 for a total of 3 scanlines:
else if(beam_num_scanlines > 2.5)
{
// dist is in [0, 1]
dist_round = round(dist);
const float2 sample_1or4_uv_off =
lerp(-v_step, 2.0 * v_step, dist_round);
scanline_outside_color = tex2D_linearize(
ORIG_LINEARIZED, scanline_uv + sample_1or4_uv_off).rgb;
}
// Compute scanline contributions, accounting for vertical convergence.
// Vertical convergence offsets are in units of current-field scanlines.
// dist2 means "positive sample distance from scanline 2, in scanlines:"
float3 dist2 = dist.xxx;
if(beam_misconvergence)
{
const float3 convergence_offsets_vert_rgb =
get_convergence_offsets_y_vector();
dist2 = dist.xxx - convergence_offsets_vert_rgb;
}
// Calculate {sigma, shape}_range outside of scanline_contrib so it's only
// done once per pixel (not 6 times) with runtime params. Don't reuse the
// vertex shader calculations, so static versions can be constant-folded.
const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
beam_min_sigma;
const float shape_range = max(beam_max_shape, beam_min_shape) -
beam_min_shape;
// Calculate and sum final scanline contributions, starting with lines 2/3.
// There is no normalization step, because we're not interpolating a
// continuous signal. Instead, each scanline is an additive light source.
const float3 scanline2_contrib = scanline_contrib(dist2,
scanline2_color, ph, sigma_range, shape_range);
const float3 scanline3_contrib = scanline_contrib(abs(1.0.xxx - dist2),
scanline3_color, ph, sigma_range, shape_range);
float3 scanline_intensity = scanline2_contrib + scanline3_contrib;
if(beam_num_scanlines > 5.5)
{
const float3 scanline0_contrib =
scanline_contrib(dist2 + 2.0.xxx, scanline0_color,
ph, sigma_range, shape_range);
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
const float3 scanline5_contrib =
scanline_contrib(abs(3.0.xxx - dist2), scanline5_color,
ph, sigma_range, shape_range);
scanline_intensity += scanline0_contrib + scanline1_contrib +
scanline4_contrib + scanline5_contrib;
}
else if(beam_num_scanlines > 4.5)
{
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
const float3 dist0or5 = lerp(
dist2 + 2.0.xxx, 3.0.xxx - dist2, dist_round);
const float3 scanline0or5_contrib = scanline_contrib(
dist0or5, scanline_outside_color, ph, sigma_range, shape_range);
scanline_intensity += scanline1_contrib + scanline4_contrib +
scanline0or5_contrib;
}
else if(beam_num_scanlines > 3.5)
{
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
scanline_intensity += scanline1_contrib + scanline4_contrib;
}
else if(beam_num_scanlines > 2.5)
{
const float3 dist1or4 = lerp(
dist2 + 1.0.xxx, 2.0.xxx - dist2, dist_round);
const float3 scanline1or4_contrib = scanline_contrib(
dist1or4, scanline_outside_color, ph, sigma_range, shape_range);
scanline_intensity += scanline1or4_contrib;
}
// Auto-dim the image to avoid clipping, encode if necessary, and output.
// My original idea was to compute a minimal auto-dim factor and put it in
// the alpha channel, but it wasn't working, at least not reliably. This
// is faster anyway, levels_autodim_temp = 0.5 isn't causing banding.
return encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0));
}

View File

@@ -1,166 +0,0 @@
#include "ReShade.fxh"
/*
Bilateral - Smart
Copyright (C) 2024 guest(r)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float FRANGE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 10.0;
ui_step = 1.0;
ui_label = "Filter Range";
> = 5.0;
uniform float FBSMOOTH <
ui_type = "drag";
ui_min = 0.05;
ui_max = 1.0;
ui_step = 0.025;
ui_label = "Filter Base Smoothing";
> = 0.3;
uniform float FSIGMA <
ui_type = "drag";
ui_min = 0.15;
ui_max = 1.5;
ui_step = 0.05;
ui_label = "Filter Strength";
> = 1.0;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
texture2D tBilateral_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sBilateral_P0{Texture=tBilateral_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define FSIGMA1 (1.0/FSIGMA)
#define COMPAT_TEXTURE(c,d) tex2D(c,d)
float wt(float3 A, float3 B)
{
return clamp(FBSMOOTH - 2.33*dot(abs(A-B),1.0.xxx)/(dot(A+B,1.0.xxx)+1.0), 0.0, 0.25);
}
float getw(float x, float3 c, float3 p)
{
float y = pow(max(1.0-x,0.0), FSIGMA1);
float d = wt(c,p);
return y*d;
}
float4 PS_Bilateral_X(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float4 SourceSize = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio));
// float4 SourceSize = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize);
float2 pos = vTexCoord * SourceSize.xy;
float f = 0.5-frac(pos.x);
float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
float2 dx = float2(SourceSize.z, 0.0);
float w, fp;
float wsum = 0.0;
float3 pixel;
float FPR = FRANGE;
float FPR1 = 1.0/FPR;
float LOOPSIZE = FPR;
float x = -FPR;
float3 comp = COMPAT_TEXTURE(sBackBuffer, tex).rgb;
float3 color = 0.0.xxx;
do
{
pixel = COMPAT_TEXTURE(sBackBuffer, tex + x*dx).rgb;
fp = min(abs(x+f),FPR)*FPR1;
w = getw(fp,comp,pixel);
color = color + w * pixel;
wsum = wsum + w;
x = x + 1.0;
} while (x <= LOOPSIZE);
color = color / wsum;
return float4(color, 1.0);
}
float4 PS_Bilateral_Y(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float4 SourceSize = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio));
float2 pos = vTexCoord * SourceSize.xy;
float f = 0.5-frac(pos.y);
float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
float2 dy = float2(0.0, SourceSize.w);
float w, fp;
float wsum = 0.0;
float3 pixel;
float FPR = FRANGE;
float FPR1 = 1.0/FPR;
float LOOPSIZE = FPR;
float y = -FPR;
float3 comp = COMPAT_TEXTURE(sBilateral_P0, tex).rgb;
float3 color = 0.0.xxx;
do
{
pixel = COMPAT_TEXTURE(sBilateral_P0, tex + y*dy).rgb;
fp = min(abs(y+f),FPR)*FPR1;
w = getw(fp,comp,pixel);
color = color + w * pixel;
wsum = wsum + w;
y = y + 1.0;
} while (y <= LOOPSIZE);
color = color / wsum;
return float4(color, 1.0);
}
technique Bilateral
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_Bilateral_X;
RenderTarget = tBilateral_P0;
}
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_Bilateral_Y;
}
}

View File

@@ -32,7 +32,7 @@ uniform float XBR_EDGE_STR_P0 <
ui_min = 0.0;
ui_max = 5.0;
ui_step = 0.5;
ui_label = "Xbr - Edge Strength";
ui_label = "Xbr - Edge Strength p0";
> = 5.0;
uniform float XBR_WEIGHT <
@@ -76,7 +76,7 @@ uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
texture2D tBackBufferY{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sBackBufferY{Texture=tBackBufferY;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
sampler2D sBackBufferY{Texture=tBackBufferY;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
texture2D tSuper_xBR_P0 < pooled = true; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sSuper_xBR_P0{Texture=tSuper_xBR_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
@@ -87,11 +87,8 @@ sampler2D sSuper_xBR_P1{Texture=tSuper_xBR_P1;AddressU=CLAMP;AddressV=CLAMP;Addr
texture2D tSuper_xBR_P2 < pooled = true; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sSuper_xBR_P2{Texture=tSuper_xBR_P2;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define weight1 (XBR_WEIGHT*1.29633/10.0)
#define weight2 (XBR_WEIGHT*1.75068/10.0/2.0)
#define limits (XBR_EDGE_STR_P0+0.000001)
#define Y float3(.2126,.7152,.0722)
static const float3 Y = float3(.2126,.7152,.0722);
static const float wp0[6] = {2.0, 1.0, -1.0, 4.0, -1.0, 1.0};
static const float wp1[6] = {1.0, 0.0, 0.0, 0.0, 0.0, 0.0};
static const float wp2[6] = {0.0, 0.0, 0.0, 1.0, 0.0, 0.0};
@@ -156,11 +153,15 @@ float3 super_xbr(float wp[6], float4 P0, float4 B, float4 C, float4 P1, float4
/* Calc edgeness in horizontal/vertical directions. */
float hv_edge = (hv_wd(wp, f, i, e, h, c, i5, b, h5) - hv_wd(wp, e, f, h, i, d, f4, g, i4));
float limits = XBR_EDGE_STR_P0 + 0.000001;
float edge_strength = smoothstep(0.0, limits, abs(d_edge));
float4 w1, w2;
float3 c3, c4;
float weight1 = (XBR_WEIGHT*1.29633/10.0);
float weight2 = (XBR_WEIGHT*1.75068/10.0/2.0);
/* Filter weights. Two taps only. */
w1 = float4(-weight1, weight1+0.50, weight1+0.50, -weight1);
w2 = float4(-weight2, weight2+0.25, weight2+0.25, -weight2);
@@ -182,17 +183,15 @@ float3 super_xbr(float wp[6], float4 P0, float4 B, float4 C, float4 P1, float4
return color;
}
float4 PS_BackBufferY(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
float4 BackBufferY(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float2 tc = (floor(vTexCoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
float3 color = tex2D(ReShade::BackBuffer, tc).rgb;
float3 color = tex2D(ReShade::BackBuffer, vTexCoord.xy).rgb;
return float4(color, luma(color));
}
float4 PS_Super_xBR_P0(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
float4 Super_xBR_P0(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float2 ps = NormalizedNativePixelSize;
@@ -224,7 +223,7 @@ float4 PS_Super_xBR_P0(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : S
float4 PS_Super_xBR_P1(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
float4 Super_xBR_P1(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float2 ps = NormalizedNativePixelSize;
@@ -267,7 +266,7 @@ float4 PS_Super_xBR_P1(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : S
}
float4 PS_Super_xBR_P2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
float4 Super_xBR_P2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float2 ps = 0.5*NormalizedNativePixelSize;
@@ -326,7 +325,7 @@ float4 resampler(float4 x)
}
float4 PS_Jinc2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
float4 Jinc2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float2 ps = 0.5*NormalizedNativePixelSize;
@@ -392,33 +391,33 @@ float4 PS_Jinc2(float4 pos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Targe
technique Super_xBR
{
pass
pass PS_BackBufferY
{
VertexShader = PostProcessVS;
PixelShader = PS_BackBufferY;
PixelShader = BackBufferY;
RenderTarget = tBackBufferY;
}
pass
pass PS_Super_xBR_P0
{
VertexShader = PostProcessVS;
PixelShader = PS_Super_xBR_P0;
PixelShader = Super_xBR_P0;
RenderTarget = tSuper_xBR_P0;
}
pass
pass PS_Super_xBR_P1
{
VertexShader = PostProcessVS;
PixelShader = PS_Super_xBR_P1;
PixelShader = Super_xBR_P1;
RenderTarget = tSuper_xBR_P1;
}
pass
pass PS_Super_xBR_P2
{
VertexShader = PostProcessVS;
PixelShader = PS_Super_xBR_P2;
PixelShader = Super_xBR_P2;
RenderTarget = tSuper_xBR_P2;
}
pass
pass PS_Jinc2
{
VertexShader = PostProcessVS;
PixelShader = PS_Jinc2;
PixelShader = Jinc2;
}
}

View File

@@ -1,4 +1,4 @@
#include "ReShade.fxh"
#include "../ReShade.fxh"
/*
Bicubic multipass Shader
@@ -32,21 +32,15 @@ uniform int BICUBIC_FILTER <
ui_tooltip = "Bicubic: balanced. Catmull-Rom: sharp. B-Spline: blurred. Hermite: soft pixelized.";
> = 0;
uniform float B_PRESCALE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 8.0;
ui_step = 1.0;
ui_label = "Prescale factor";
> = 1.0;
uniform bool B_ANTI_RINGING <
ui_type = "radio";
ui_label = "Anti-Ringing";
ui_label = "Bicubic Anti-Ringing";
> = false;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
texture2D tBicubic_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sBicubic_P0{Texture=tBicubic_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
@@ -92,18 +86,19 @@ float3 bicubic_ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3)
}
float4 PS_Bicubic_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
float4 Bicubic_X(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
{
// Both dimensions are unfiltered, so it looks for lores pixels.
float2 ps = NormalizedNativePixelSize/B_PRESCALE;
float2 pos = uv_tx.xy/ps - float2(0.5, 0.0);
float2 tc = (floor(pos) + 0.5.xx) * ps;
float2 fp = frac(pos);
float2 ps = NormalizedNativePixelSize;
float2 posi = uv_tx.xy + ps * float2(0.5, 0.0);
float2 fp = frac(posi / ps);
float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 0.0)).rgb;
float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 0.0)).rgb;
float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 0.0)).rgb;
float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 0.0)).rgb;
float2 tc = posi - (fp + 0.5) * ps;
float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 1.0)).rgb;
float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 1.0)).rgb;
float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 1.0)).rgb;
float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 1.0)).rgb;
float3 color = bicubic_ar(fp.x, C0, C1, C2, C3);
@@ -111,36 +106,38 @@ float4 PS_Bicubic_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Targ
}
float4 PS_Bicubic_Y(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
float4 Bicubic_Y(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
{
// One must be careful here. Horizontal dimension is already filtered, so it looks for x in hires.
float2 ps = float2(1.0/BufferWidth, NormalizedNativePixelSize.y/B_PRESCALE);
float2 pos = uv_tx.xy/ps - float2(0.0, 0.5);
float2 tc = (floor(pos) + 0.5.xx) * ps;
float2 fp = frac(pos);
float2 ps = float2(1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedNativePixelSize.y);
float2 posi = uv_tx.xy + ps * float2(0.5, 0.5);
float2 fp = frac(posi / ps);
float3 C0 = tex2D(sBicubic_P0, tc + ps*float2(0.0, -1.0)).rgb;
float3 C1 = tex2D(sBicubic_P0, tc + ps*float2(0.0, 0.0)).rgb;
float3 C2 = tex2D(sBicubic_P0, tc + ps*float2(0.0, 1.0)).rgb;
float3 C3 = tex2D(sBicubic_P0, tc + ps*float2(0.0, 2.0)).rgb;
float2 tc = posi - (fp + 0.5) * ps;
float3 C0 = tex2D(sBicubic_P0, tc + ps*float2(1.0, -1.0)).rgb;
float3 C1 = tex2D(sBicubic_P0, tc + ps*float2(1.0, 0.0)).rgb;
float3 C2 = tex2D(sBicubic_P0, tc + ps*float2(1.0, 1.0)).rgb;
float3 C3 = tex2D(sBicubic_P0, tc + ps*float2(1.0, 2.0)).rgb;
float3 color = bicubic_ar(fp.y, C0, C1, C2, C3);
return float4(color, 1.0);
}
technique Bicubic
{
pass
pass PS_Bicubic_X
{
VertexShader = PostProcessVS;
PixelShader = PS_Bicubic_X;
PixelShader = Bicubic_X;
RenderTarget = tBicubic_P0;
}
pass
pass PS_Bicubic_Y
{
VertexShader = PostProcessVS;
PixelShader = PS_Bicubic_Y;
PixelShader = Bicubic_Y;
}
}

View File

@@ -28,14 +28,6 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
http://www.gnu.org/copyleft/gpl.html
*/
uniform float L3_PRESCALE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 8.0;
ui_step = 1.0;
ui_label = "Prescale factor";
> = 1.0;
uniform bool LANCZOS3_ANTI_RINGING <
ui_type = "radio";
@@ -43,12 +35,12 @@ uniform bool LANCZOS3_ANTI_RINGING <
> = true;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
texture2D tLanczos3_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sLanczos3_P0{Texture=tLanczos3_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define AR_STRENGTH 1.0
#define FIX(c) (max(abs(c),1e-5))
#define PI 3.1415926535897932384626433832795
@@ -56,10 +48,10 @@ sampler2D sLanczos3_P0{Texture=tLanczos3_P0;AddressU=CLAMP;AddressV=CLAMP;Addres
float3 weight3(float x)
{
float3 Sampling = FIX(2.0 * PI * float3(x - 1.5, x - 0.5, x + 0.5));
float3 Sample = FIX(2.0 * PI * float3(x - 1.5, x - 0.5, x + 0.5));
// Lanczos3. Note: we normalize outside this function, so no point in multiplying by radius.
return sin(Sampling) * sin(Sampling / radius) / (Sampling * Sampling);
return sin(Sample) * sin(Sample / radius) / (Sample * Sample);
}
float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C4, float3 C5)
@@ -67,7 +59,7 @@ float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C
float3 w1 = weight3(0.5 - fp * 0.5);
float3 w2 = weight3(1.0 - fp * 0.5);
float sum = dot(w1, 1.0.xxx) + dot(w2, 1.0.xxx);
float sum = dot( w1, float3(1.,1.,1.)) + dot( w2, float3(1.,1.,1.));
w1 /= sum;
w2 /= sum;
@@ -87,21 +79,23 @@ float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C
}
float4 PS_Lanczos3_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
float4 Lanczos3_X(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
{
// Both dimensions are unfiltered, so it looks for lores pixels.
float2 ps = NormalizedNativePixelSize/L3_PRESCALE;
float2 pos = uv_tx.xy/ps - float2(0.5, 0.0);
float2 tc = (floor(pos) + 0.5.xx) * ps;
float2 fp = frac(pos);
float2 ps = NormalizedNativePixelSize;
float2 posi = uv_tx.xy + ps * float2(0.5, 0.0);
float2 fp = frac(posi / ps);
float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-2.0, 0.0)).rgb;
float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 0.0)).rgb;
float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 0.0)).rgb;
float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 0.0)).rgb;
float3 C4 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 0.0)).rgb;
float3 C5 = tex2D(ReShade::BackBuffer, tc + ps*float2( 3.0, 0.0)).rgb;
float2 xystart = posi - (fp + 0.5) * ps;
float ypos = xystart.y + ps.y;
float3 C0 = tex2D(ReShade::BackBuffer, float2(xystart.x - ps.x * 2.0, ypos)).rgb;
float3 C1 = tex2D(ReShade::BackBuffer, float2(xystart.x - ps.x * 1.0, ypos)).rgb;
float3 C2 = tex2D(ReShade::BackBuffer, float2(xystart.x , ypos)).rgb;
float3 C3 = tex2D(ReShade::BackBuffer, float2(xystart.x + ps.x * 1.0, ypos)).rgb;
float3 C4 = tex2D(ReShade::BackBuffer, float2(xystart.x + ps.x * 2.0, ypos)).rgb;
float3 C5 = tex2D(ReShade::BackBuffer, float2(xystart.x + ps.x * 3.0, ypos)).rgb;
float3 color = lanczos3ar(fp.x, C0, C1, C2, C3, C4, C5);
@@ -109,38 +103,42 @@ float4 PS_Lanczos3_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Tar
}
float4 PS_Lanczos3_Y(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
float4 Lanczos3_Y(float4 pos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
{
// One must be careful here. Horizontal dimension is already filtered, so it looks for x in hires.
float2 ps = float2(1.0/BufferWidth, NormalizedNativePixelSize.y/L3_PRESCALE);
float2 pos = uv_tx.xy/ps - float2(0.0, 0.5);
float2 tc = (floor(pos) + 0.5.xx) * ps;
float2 fp = frac(pos);
float2 ps = float2(1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedNativePixelSize.y);
float2 posi = uv_tx.xy + ps * float2(0.5, 0.5);
float2 fp = frac(posi / ps);
float3 C0 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -2.0)).rgb;
float3 C1 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -1.0)).rgb;
float3 C2 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 0.0)).rgb;
float3 C3 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 1.0)).rgb;
float3 C4 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 2.0)).rgb;
float3 C5 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 3.0)).rgb;
float2 xystart = posi - (fp + 0.5) * ps;
float xpos = xystart.x + ps.x;
float3 C0 = tex2D(sLanczos3_P0, float2(xpos, xystart.y - ps.y * 2.0)).rgb;
float3 C1 = tex2D(sLanczos3_P0, float2(xpos, xystart.y - ps.y * 1.0)).rgb;
float3 C2 = tex2D(sLanczos3_P0, float2(xpos, xystart.y )).rgb;
float3 C3 = tex2D(sLanczos3_P0, float2(xpos, xystart.y + ps.y * 1.0)).rgb;
float3 C4 = tex2D(sLanczos3_P0, float2(xpos, xystart.y + ps.y * 2.0)).rgb;
float3 C5 = tex2D(sLanczos3_P0, float2(xpos, xystart.y + ps.y * 3.0)).rgb;
float3 color = lanczos3ar(fp.y, C0, C1, C2, C3, C4, C5);
return float4(color, 1.0);
}
technique Lanczos3
{
pass
pass PS_Lanczos3_X
{
VertexShader = PostProcessVS;
PixelShader = PS_Lanczos3_X;
PixelShader = Lanczos3_X;
RenderTarget = tLanczos3_P0;
}
pass
pass PS_Lanczos3_Y
{
VertexShader = PostProcessVS;
PixelShader = PS_Lanczos3_Y;
PixelShader = Lanczos3_Y;
}
}

View File

@@ -32,8 +32,11 @@
uniform bool geom_curvature <
ui_type = "radio";
uniform float geom_curvature <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Geom Curvature Toggle";
> = 1.0;
@@ -53,8 +56,11 @@ uniform float geom_d <
ui_label = "Geom Distance";
> = 1.5;
uniform bool geom_invert_aspect <
ui_type = "radio";
uniform float geom_invert_aspect <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Geom Curvature Aspect Inversion";
> = 0.0;
@@ -76,16 +82,16 @@ uniform float geom_cornersmooth <
uniform float geom_x_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_min = -0.5;
ui_max = 0.5;
ui_step = 0.05;
ui_label = "Geom Horizontal Tilt";
> = 0.0;
uniform float geom_y_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_min = -0.5;
ui_max = 0.5;
ui_step = 0.05;
ui_label = "Geom Vertical Tilt";
> = 0.0;
@@ -106,22 +112,6 @@ uniform float geom_overscan_y <
ui_label = "Geom Vert. Overscan %";
> = 100.0;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center Y";
> = 0.00;
uniform float geom_lum <
ui_type = "drag";
ui_min = 0.5;
@@ -147,13 +137,9 @@ uniform float geom_monitor_gamma <
> = 2.2;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 BufferViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
// Comment the next line to disable interpolation in linear gamma (and
// gain speed).
@@ -170,14 +156,14 @@ sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BO
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), geom_target_gamma.xxxx)
# define TEX2D(c) pow(tex2D(ReShade::BackBuffer, (c)), float4(geom_target_gamma,geom_target_gamma,geom_target_gamma,geom_target_gamma))
#else
# define TEX2D(c) tex2D(sBackBuffer, (c))
# define TEX2D(c) tex2D(ReShade::BackBuffer, (c))
#endif
// aspect ratio
#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
#define overscan (1.01.xx);
#define aspect (geom_invert_aspect>0.5?float2(0.75,1.0):float2(1.0,0.75))
#define overscan (float2(1.01,1.01));
struct ST_VertexOut
@@ -189,21 +175,94 @@ struct ST_VertexOut
};
float vs_intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B = 2.0*(geom_R*(dot(xy,sinangle)-geom_d*cosangle.x*cosangle.y)-geom_d*geom_d);
float C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = vs_intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A);
float2 uv = (point - a*sinangle)/cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv,uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0-cos(r/geom_R);
float D = geom_d/geom_R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
return geom_d*(uv*cosangle-x*sinangle)/D;
}
float3 vs_maxscale(float2 sinangle, float2 cosangle)
{
float2 c = vs_bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5,0.5)*aspect;
float2 lo = float2(vs_fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(vs_fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
// float2 SourceSize = 1.0/NormalizedNativePixelSize;
float2 SourceSize = ViewportSize*BufferViewportRatio;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
}
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B, C;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx;
float c = intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
@@ -212,7 +271,7 @@ float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
@@ -225,91 +284,74 @@ float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
float x = 1.0 - cos(r/geom_R);
float D;
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
return geom_d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = 0.5.xx*aspect;
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5, 0.5)*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy;
coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx);
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + float2(0.5, 0.5));
}
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
// Screen centering
texcoord = texcoord - float2(centerx,centery)/100.0;
float2 SourceSize = 1.0/NormalizedNativePixelSize;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
vVARS.stretch = maxscale(vVARS.sinangle, vVARS.cosangle);
vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
}
float corner(float2 coord)
{
coord = min(coord, 1.0.xx - coord) * aspect;
float2 cdist = geom_cornersize.xx;
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
coord = (coord - float2(0.5, 0.5)) * float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0) + float2(0.5, 0.5);
coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
float2 cdist = float2(geom_cornersize, geom_cornersize);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
}
float fwidth(float value)
{
return abs(ddx(value)) + abs(ddy(value));
float fwidth(float value){
return abs(ddx(value)) + abs(ddy(value));
}
float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
// Texture coordinates of the texel containing the active pixel.
float2 xy = (geom_curvature == true) ? transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch) : vTexCoord;
float2 xy;
float cval = corner((xy-0.5.xx) * BufferToViewportRatio + 0.5.xx);
if (geom_curvature > 0.5)
xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
else
xy = vTexCoord;
float2 uv_ratio = frac((xy * vVARS.TextureSize - 0.5.xx) / vVARS.TextureSize);
float cval = corner(xy);
float2 uv_ratio = frac((xy * vVARS.TextureSize - float2(0.5, 0.5)) / vVARS.TextureSize);
float4 col = TEX2D(xy);
#ifndef LINEAR_PROCESSING
col = pow(col, geom_target_gamma.xxxx);
col = pow(col , float4(geom_target_gamma, geom_target_gamma, geom_target_gamma, geom_target_gamma));
#endif
col.rgb *= (geom_lum * step(0.0, uv_ratio.y));
float3 mul_res = col.rgb * cval.xxx;
float3 mul_res = col.rgb * float3(cval, cval, cval);
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, 1.0 / geom_monitor_gamma.xxx);
mul_res = pow(mul_res, float3(1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma));
return float4(mul_res, 1.0);
}

View File

@@ -1,224 +0,0 @@
#ifndef GEOM_PARAMS_H
#define GEOM_PARAMS_H
/*
Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
into any other shaders and provide curvature/warping/oversampling features.
Adapted by Hyllian (2024).
*/
/*
CRT-interlaced
Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
(cgwg gave their consent to have the original version of this shader
distributed under the GPL in this message:
http://board.byuu.org/viewtopic.php?p=26075#p26075
"Feel free to distribute my shaders under the GPL. After all, the
barrel distortion code was taken from the Curvature shader, which is
under the GPL."
)
This shader variant is pre-configured with screen curvature
*/
uniform bool geom_curvature <
ui_type = "radio";
ui_category = "Geom Curvature";
ui_label = "Geom Curvature Toggle";
> = 0.0;
uniform float geom_R <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Geom Curvature Radius";
> = 2.0;
uniform float geom_d <
ui_type = "drag";
ui_min = 0.1;
ui_max = 3.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Geom Distance";
> = 1.5;
uniform bool geom_invert_aspect <
ui_type = "radio";
ui_category = "Geom Curvature";
ui_label = "Geom Curvature Aspect Inversion";
> = 0.0;
uniform float geom_cornersize <
ui_type = "drag";
ui_min = 0.001;
ui_max = 1.0;
ui_step = 0.005;
ui_category = "Geom Curvature";
ui_label = "Geom Corner Size";
> = 0.03;
uniform float geom_cornersmooth <
ui_type = "drag";
ui_min = 80.0;
ui_max = 2000.0;
ui_step = 100.0;
ui_category = "Geom Curvature";
ui_label = "Geom Corner Smoothness";
> = 1000.0;
uniform float geom_x_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_category = "Geom Curvature";
ui_label = "Geom Horizontal Tilt";
> = 0.0;
uniform float geom_y_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_category = "Geom Curvature";
ui_label = "Geom Vertical Tilt";
> = 0.0;
uniform float geom_overscan_x <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_category = "Geom Curvature";
ui_label = "Geom Horiz. Overscan %";
> = 100.0;
uniform float geom_overscan_y <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_category = "Geom Curvature";
ui_label = "Geom Vert. Overscan %";
> = 100.0;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Image Center Y";
> = 0.00;
// Macros.
#define FIX(c) max(abs(c), 1e-5);
// aspect ratio
#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B, C;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx;
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv, uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0 - cos(r/geom_R);
float D;
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
return geom_d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = 0.5.xx*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx);
}
float corner(float2 coord)
{
coord = min(coord, 1.0.xx - coord) * aspect;
float2 cdist = geom_cornersize.xx;
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
}
float fwidth(float value)
{
return abs(ddx(value)) + abs(ddy(value));
}
#endif // GEOM_PARAMS_H

View File

@@ -1,242 +0,0 @@
#ifndef MASK_PARAMS_H
#define MASK_PARAMS_H
uniform float MASK_DARK_STRENGTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_category = "CRT Mask";
ui_label = "MASK DARK SUBPIXEL STRENGTH";
> = 0.5;
uniform float MASK_LIGHT_STRENGTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 6.0;
ui_step = 0.01;
ui_category = "CRT Mask";
ui_label = "MASK LIGHT SUBPIXEL STRENGTH";
> = 0.5;
/* Mask code pasted from subpixel_masks.h. Masks 3 and 4 added. */
float3 mask_weights(float2 coord, int phosphor_layout, float monitor_subpixels, float mask_light_str, float mask_dark_str){
float3 weights = float3(1.,1.,1.);
float on = 1.+mask_light_str;
// float on = 1.;
float off = 1.-mask_dark_str;
float3 red = monitor_subpixels==1.0 ? float3(on, off, off) : float3(off, off, on );
float3 green = float3(off, on, off);
float3 blue = monitor_subpixels==1.0 ? float3(off, off, on ) : float3(on, off, off);
float3 magenta = float3(on, off, on );
float3 yellow = monitor_subpixels==1.0 ? float3(on, on, off) : float3(off, on, on );
float3 cyan = monitor_subpixels==1.0 ? float3(off, on, on ) : float3(on, on, off);
float3 black = float3(off, off, off);
float3 white = float3(on, on, on );
int w, z = 0;
// This pattern is used by a few layouts, so we'll define it here
float3 aperture_weights = lerp(magenta, green, floor(coord.x % 2.0));
if(phosphor_layout == 0) return weights;
else if(phosphor_layout == 1){
// classic aperture for RGB panels; good for 1080p, too small for 4K+
// aka aperture_1_2_bgr
weights = aperture_weights;
return weights;
}
else if(phosphor_layout == 2){
// Classic RGB layout; good for 1080p and lower
float3 bw3[3] = {red, green, blue};
// float3 bw3[3] = float3[](black, yellow, blue);
z = int(floor(coord.x % 3.0));
weights = bw3[z];
return weights;
}
else if(phosphor_layout == 3){
// black and white aperture; good for weird subpixel layouts and low brightness; good for 1080p and lower
float3 bw3[3] = {black, white, black};
z = int(floor(coord.x % 3.0));
weights = bw3[z];
return weights;
}
else if(phosphor_layout == 4){
// reduced TVL aperture for RGB panels. Good for 4k.
// aperture_2_4_rgb
float3 big_ap_rgb[4] = {red, yellow, cyan, blue};
w = int(floor(coord.x % 4.0));
weights = big_ap_rgb[w];
return weights;
}
else if(phosphor_layout == 5){
// black and white aperture; good for weird subpixel layouts and low brightness; good for 4k
float3 bw4[4] = {black, black, white, white};
z = int(floor(coord.x % 4.0));
weights = bw4[z];
return weights;
}
else if(phosphor_layout == 6){
// aperture_1_4_rgb; good for simulating lower
float3 ap4[4] = {red, green, blue, black};
z = int(floor(coord.x % 4.0));
weights = ap4[z];
return weights;
}
else if(phosphor_layout == 7){
// 2x2 shadow mask for RGB panels; good for 1080p, too small for 4K+
// aka delta_1_2x1_bgr
float3 inverse_aperture = lerp(green, magenta, floor(coord.x % 2.0));
weights = lerp(aperture_weights, inverse_aperture, floor(coord.y % 2.0));
return weights;
}
else if(phosphor_layout == 8){
// delta_2_4x1_rgb
float3 delta[8] = {
red, yellow, cyan, blue,
cyan, blue, red, yellow
};
w = int(floor(coord.y % 2.0));
z = int(floor(coord.x % 4.0));
weights = delta[4*w+z];
return weights;
}
else if(phosphor_layout == 9){
// delta_1_4x1_rgb; dunno why this is called 4x1 when it's obviously 4x2 /shrug
float3 delta1[8] = {
red, green, blue, black,
blue, black, red, green
};
w = int(floor(coord.y % 2.0));
z = int(floor(coord.x % 4.0));
weights = delta1[4*w+z];
return weights;
}
else if(phosphor_layout == 10){
// delta_2_4x2_rgb
float3 delta[16] = {
red, yellow, cyan, blue,
red, yellow, cyan, blue,
cyan, blue, red, yellow,
cyan, blue, red, yellow
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 4.0));
weights = delta[4*w+z];
return weights;
}
else if(phosphor_layout == 11){
// slot mask for RGB panels; looks okay at 1080p, looks better at 4K
float3 slotmask[24] = {
red, green, blue, red, green, blue,
red, green, blue, black, black, black,
red, green, blue, red, green, blue,
black, black, black, red, green, blue,
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 6.0));
// use the indexes to find which color to apply to the current pixel
weights = slotmask[6*w+z];
return weights;
}
else if(phosphor_layout == 12){
// slot mask for RGB panels; looks okay at 1080p, looks better at 4K
float3 slotmask[24] = {
black, white, black, black, white, black,
black, white, black, black, black, black,
black, white, black, black, white, black,
black, black, black, black, white, black
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 6.0));
// use the indexes to find which color to apply to the current pixel
weights = slotmask[6*w+z];
return weights;
}
else if(phosphor_layout == 13){
// based on MajorPainInTheCactus' HDR slot mask
float3 slot[32] = {
red, green, blue, black, red, green, blue, black,
red, green, blue, black, black, black, black, black,
red, green, blue, black, red, green, blue, black,
black, black, black, black, red, green, blue, black
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 8.0));
weights = slot[8*w+z];
return weights;
}
else if(phosphor_layout == 14){
// same as above but for RGB panels
float3 slot2[40] = {
red, yellow, green, blue, blue, red, yellow, green, blue, blue ,
black, green, green, blue, blue, red, red, black, black, black,
red, yellow, green, blue, blue, red, yellow, green, blue, blue ,
red, red, black, black, black, black, green, green, blue, blue
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 10.0));
weights = slot2[10*w+z];
return weights;
}
else if(phosphor_layout == 15){
// slot_3_7x6_rgb
float3 slot[84] = {
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
red, red, yellow, green, cyan, blue, blue, black, black, black, black, black, black, black,
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
black, black, black, black, black, black, black, black, red, red, yellow, green, cyan, blue
};
w = int(floor(coord.y % 6.0));
z = int(floor(coord.x % 14.0));
weights = slot[14*w+z];
return weights;
}
else return weights;
}
#endif // MASK_PARAMS_H

View File

@@ -1,437 +0,0 @@
#include "ReShade.fxh"
// NTSC-Adaptive-Lite - Faster for 2-Phase games (only 15 taps!)
// based on Themaister's NTSC shader
uniform int quality <
ui_type = "combo";
ui_items = "Custom\0Svideo\0Composite\0RF\0";
ui_label = "NTSC Preset";
> = 2;
uniform bool ntsc_fields <
ui_type = "radio";
ui_label = "NTSC Merge Fields";
> = false;
uniform int ntsc_phase <
ui_type = "combo";
ui_items = "Auto\0(2-Phase)\0(3-Phase)\0";
ui_label = "NTSC Phase";
> = 0;
uniform float ntsc_scale <
ui_type = "drag";
ui_min = 0.20;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "NTSC Resolution Scaling";
> = 1.0;
uniform float ntsc_sat <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.01;
ui_label = "NTSC Color Saturation";
> = 1.0;
uniform float ntsc_bright <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.5;
ui_step = 0.01;
ui_label = "NTSC Brightness";
> = 1.0;
uniform float cust_fringing <
ui_type = "drag";
ui_min = 0.0;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "NTSC Custom Fringing Value";
> = 0.0;
uniform float cust_artifacting <
ui_type = "drag";
ui_min = 0.0;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "NTSC Custom Artifacting Value";
> = 0.0;
uniform float chroma_scale <
ui_type = "drag";
ui_min = 0.2;
ui_max = 4.0;
ui_step = 0.1;
ui_label = "NTSC Chroma Scaling";
> = 1.0;
uniform float ntsc_artifacting_rainbow <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.1;
ui_label = "NTSC Artifacting Rainbow Effect";
> = 0.0;
uniform bool linearize <
ui_type = "radio";
ui_label = "NTSC Linearize Output Gamma";
> = false;
uniform float FrameCount < source = "framecount"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float BufferHeight < source = "bufferheight"; >;
// RGB16f is the same as float_framebuffer.
texture2D tNTSC_P0 < pooled = false; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D sNTSC_P0{Texture=tNTSC_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
#define PI 3.14159265
#define OutputSize float2(BufferWidth,BufferHeight)
struct ST_VertexOut
{
float2 pix_no : TEXCOORD1;
float phase : TEXCOORD2;
float BRIGHTNESS : TEXCOORD3;
float SATURATION : TEXCOORD4;
float FRINGING : TEXCOORD5;
float ARTIFACTING : TEXCOORD6;
float CHROMA_MOD_FREQ : TEXCOORD7;
float MERGE : TEXCOORD8;
};
// Vertex shader generating a triangle covering the entire screen
void VS_NTSC_ADAPTIVE_P0(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 TexCoord : TEXCOORD, out ST_VertexOut vVARS)
{
TexCoord.x = (id == 2) ? 2.0 : 0.0;
TexCoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(TexCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float res = ntsc_scale;
float OriginalSize = 1.0/NormalizedNativePixelSize.x;
float2 SourceSize = 1.0/NormalizedNativePixelSize;
if (res < 1.0) vVARS.pix_no = TexCoord * SourceSize.xy * (res * OutputSize.xy / SourceSize.xy); else
vVARS.pix_no = TexCoord * SourceSize.xy * ( OutputSize.xy / SourceSize.xy);
vVARS.phase = (ntsc_phase < 1) ? ((OriginalSize > 300.0) ? 2.0 : 3.0) : ((ntsc_phase > 2) ? 3.0 : 2.0);
float Quality = float(quality-1);
res = max(res, 1.0);
vVARS.CHROMA_MOD_FREQ = (vVARS.phase < 2.5) ? (4.0 * PI / 15.0) : (PI / 3.0);
vVARS.ARTIFACTING = (Quality > -0.5) ? Quality * 0.5*(res+1.0) : cust_artifacting;
vVARS.FRINGING = (Quality > -0.5) ? Quality : cust_fringing;
vVARS.SATURATION = ntsc_sat;
vVARS.BRIGHTNESS = ntsc_bright;
vVARS.pix_no.x = vVARS.pix_no.x * res;
vVARS.MERGE = (Quality == 2.0 || vVARS.phase < 2.5) ? 0.0 : 1.0;
vVARS.MERGE = (Quality == -1.0) ? float(ntsc_fields == true) : vVARS.MERGE;
}
#define mix_mat float3x3(vVARS.BRIGHTNESS, vVARS.FRINGING, vVARS.FRINGING, vVARS.ARTIFACTING, 2.0 * vVARS.SATURATION, 0.0, vVARS.ARTIFACTING, 0.0, 2.0 * vVARS.SATURATION)
static const float3x3 yiq2rgb_mat = float3x3(
1.0, 0.956, 0.6210,
1.0, -0.2720, -0.6474,
1.0, -1.1060, 1.7046);
float3 yiq2rgb(float3 yiq)
{
return mul(yiq2rgb_mat, yiq);
}
static const float3x3 yiq_mat = float3x3(
0.2989, 0.5870, 0.1140,
0.5959, -0.2744, -0.3216,
0.2115, -0.5229, 0.3114
);
float3 rgb2yiq(float3 col)
{
return mul(yiq_mat, col);
}
static const float3 Y = float3( 0.299, 0.587, 0.114);
float df3(float3 a, float3 b, float3 c)
{
return dot(smoothstep(0.0, 0.56, 3.0*(b - a) * (b - c)), Y);
}
float4 PS_NTSC_ADAPTIVE_P0(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
float3 col = tex2D(ReShade::BackBuffer, vTexCoord).rgb;
float3 yiq = rgb2yiq(col);
float3 yiq2 = yiq;
float4 SourceSize = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize);
float mod1 = 2.0;
float mod2 = 3.0;
float2 dx = float2(1.0, 0.0)*SourceSize.zw;
float2 dy = float2(0.0, 1.0)*SourceSize.zw;
float3 C = tex2D(ReShade::BackBuffer, vTexCoord ).xyz;
float3 L = tex2D(ReShade::BackBuffer, vTexCoord -dx).xyz;
float3 R = tex2D(ReShade::BackBuffer, vTexCoord +dx).xyz;
float3 U = tex2D(ReShade::BackBuffer, vTexCoord -dy).xyz;
float3 D = tex2D(ReShade::BackBuffer, vTexCoord +dy).xyz;
float3 UL = tex2D(ReShade::BackBuffer, vTexCoord -dx -dy).xyz;
float3 UR = tex2D(ReShade::BackBuffer, vTexCoord +dx -dy).xyz;
float3 DL = tex2D(ReShade::BackBuffer, vTexCoord -dx +dy).xyz;
float3 DR = tex2D(ReShade::BackBuffer, vTexCoord +dx +dy).xyz;
float hori = step(0.01,(df3(L, C, R) * df3(UL, U, UR) * df3(DL, D, DR)));
float vert = 1.0 - step(0.01,(df3(U, C, D) * df3(UL, L, DL) * df3(UR, R, DR)));
float blend = hori * vert * ntsc_artifacting_rainbow;
if (vVARS.MERGE > 0.5)
{
float chroma_phase2 = (vVARS.phase < 2.5) ? PI * ((vVARS.pix_no.y % mod1) + ((FrameCount+1.) % 2.)) : 0.6667 * PI * ((vVARS.pix_no.y % mod2) + ((FrameCount+1.) % 2.));
float mod_phase2 = (blend + 1.0) * chroma_phase2 + vVARS.pix_no.x * vVARS.CHROMA_MOD_FREQ;
float i_mod2 = cos(mod_phase2);
float q_mod2 = sin(mod_phase2);
yiq2.yz *= float2(i_mod2, q_mod2); // Modulate.
yiq2 = mul(mix_mat, yiq2); // Cross-talk.
yiq2.yz *= float2(i_mod2, q_mod2); // Demodulate.
}
float chroma_phase = (vVARS.phase < 2.5) ? PI * ((vVARS.pix_no.y % mod1) + ((FrameCount+1.) % 2.)) : 0.6667 * PI * ((vVARS.pix_no.y % mod2) + ((FrameCount+1.) % 2.));
float mod_phase = (blend + 1.0) * chroma_phase + vVARS.pix_no.x * vVARS.CHROMA_MOD_FREQ;
float i_mod = cos(mod_phase);
float q_mod = sin(mod_phase);
yiq.yz *= float2(i_mod, q_mod); // Modulate.
yiq = mul(mix_mat, yiq); // Cross-talk.
yiq.yz *= float2(i_mod, q_mod); // Demodulate.
yiq = (vVARS.MERGE < 0.5) ? yiq : 0.5*(yiq+yiq2);
return float4(yiq, 1.0);
}
// Vertex shader generating a triangle covering the entire screen
void VS_NTSC_ADAPTIVE_P1(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 TexCoord : TEXCOORD)
{
TexCoord.x = (id == 2) ? 2.0 : 0.0;
TexCoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(TexCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
}
float3 fetch_offset(sampler2D Source, float2 tex, float offset, float2 one_x)
{
/* Insert chroma scaling. Thanks to guest.r ideas. */
float3 yiq;
yiq.x = tex2D(Source, tex + float2((offset) * (one_x.x), 0.0)).x;
yiq.yz = tex2D(Source, tex + float2((offset) * (one_x.y), 0.0)).yz;
return yiq;
/* Old code
return texture(Source, vTexCoord + float2((offset) * (one_x), 0.0)).xyz;
*/
}
/* These are accurate and normalized coeffs. */
static const int TAPS_3_phase = 24;
static const float luma_filter_3_phase[25] = {
-0.0000120203033684164,
-0.0000221465589348544,
-0.0000131553320142694,
-0.0000120203033684164,
-0.0000499802614018372,
-0.000113942875690297,
-0.000122153082899506,
-5.61214E-06,
0.000170520303591422,
0.000237204986579451,
0.000169644281482376,
0.000285695210375719,
0.000984598849305758,
0.0020187339488074,
0.00200232553469184,
-0.000909904964181485,
-0.00704925890919635,
-0.0132231937269633,
-0.0126072491817548,
0.00246092210875218,
0.0358691302651096,
0.0840185734607569,
0.135566921437963,
0.175265691355518,
0.190181351796957};
/* These are accurate and normalized coeffs. */
static const float chroma_filter_3_phase[25] = {
-0.000135741056915795,
-0.000568115749081878,
-0.00130605691082327,
-0.00231369942971182,
-0.00350569685928248,
-0.00474731062446688,
-0.00585980203774502,
-0.00663114046295865,
-0.00683148404964774,
-0.00623234997205773,
-0.00462792764511295,
-0.00185665431957684,
0.00217899013894782,
0.00749647783836479,
0.0140227874371299,
0.021590863169257,
0.0299437436530477,
0.0387464461271303,
0.0476049759842373,
0.0560911497485196,
0.0637713405314321,
0.0702368383153846,
0.0751333078160781,
0.0781868487834974,
0.0792244191487085};
/* These are accurate and normalized coeffs. Though they don't produce ideal smooth vertical lines transparency. */
static const int TAPS_2_phase = 15;
static const float luma_filter_2_phase[16] = {
0.00134372867555492,
0.00294231678339247,
0.00399617683765551,
0.00303632635732925,
-0.00110556727614119,
-0.00839970341605087,
-0.0169515379999301,
-0.0229874881474188,
-0.0217113019865528,
-0.00889151239892142,
0.0173269874254282,
0.0550969075027442,
0.098655909675851,
0.139487291941771,
0.168591277052964,
0.17914037794465};
/* These are accurate and normalized coeffs. */
static const float chroma_filter_2_phase[16] = {
0.00406084767413046,
0.00578573638571078,
0.00804447474387669,
0.0109152541019797,
0.0144533032717188,
0.0186765858322351,
0.0235518468184291,
0.0289834149989225,
0.034807373222651,
0.0407934139180355,
0.0466558344725586,
0.0520737649339226,
0.0567190701585739,
0.0602887575746322,
0.0625375226221969,
0.0633055985408521};
float4 PS_NTSC_ADAPTIVE_P1(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float4 SourceSize = float4(BufferWidth, 1.0/NormalizedNativePixelSize.y, 1.0/BufferWidth, NormalizedNativePixelSize.y);
float res = ntsc_scale;
float OriginalSize = 1.0/NormalizedNativePixelSize.x;
float3 signal = float3(0.0, 0.0, 0.0);
float phase = (ntsc_phase < 1) ? ((OriginalSize > 300.0) ? 2.0 : 3.0) : ((ntsc_phase > 1) ? 3.0 : 2.0);
float chroma_scale = phase > 2.5 ? min(chroma_scale, 2.2) : chroma_scale/2.0;
float2 one_x = (SourceSize.z / res) * float2(1.0, 1.0 / chroma_scale);
float2 tex = vTexCoord;
if(phase < 2.5)
{
float3 sums = fetch_offset(sNTSC_P0, tex, 0.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 0.0, one_x);
signal += sums * float3(luma_filter_2_phase[0], chroma_filter_2_phase[0], chroma_filter_2_phase[0]);
sums = fetch_offset(sNTSC_P0, tex, 1.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 1.0, one_x);
signal += sums * float3(luma_filter_2_phase[1], chroma_filter_2_phase[1], chroma_filter_2_phase[1]);
sums = fetch_offset(sNTSC_P0, tex, 2.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 2.0, one_x);
signal += sums * float3(luma_filter_2_phase[2], chroma_filter_2_phase[2], chroma_filter_2_phase[2]);
sums = fetch_offset(sNTSC_P0, tex, 3.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 3.0, one_x);
signal += sums * float3(luma_filter_2_phase[3], chroma_filter_2_phase[3], chroma_filter_2_phase[3]);
sums = fetch_offset(sNTSC_P0, tex, 4.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 4.0, one_x);
signal += sums * float3(luma_filter_2_phase[4], chroma_filter_2_phase[4], chroma_filter_2_phase[4]);
sums = fetch_offset(sNTSC_P0, tex, 5.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 5.0, one_x);
signal += sums * float3(luma_filter_2_phase[5], chroma_filter_2_phase[5], chroma_filter_2_phase[5]);
sums = fetch_offset(sNTSC_P0, tex, 6.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 6.0, one_x);
signal += sums * float3(luma_filter_2_phase[6], chroma_filter_2_phase[6], chroma_filter_2_phase[6]);
sums = fetch_offset(sNTSC_P0, tex, 7.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 7.0, one_x);
signal += sums * float3(luma_filter_2_phase[7], chroma_filter_2_phase[7], chroma_filter_2_phase[7]);
sums = fetch_offset(sNTSC_P0, tex, 8.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 8.0, one_x);
signal += sums * float3(luma_filter_2_phase[8], chroma_filter_2_phase[8], chroma_filter_2_phase[8]);
sums = fetch_offset(sNTSC_P0, tex, 9.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 9.0, one_x);
signal += sums * float3(luma_filter_2_phase[9], chroma_filter_2_phase[9], chroma_filter_2_phase[9]);
sums = fetch_offset(sNTSC_P0, tex, 10.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 10.0, one_x);
signal += sums * float3(luma_filter_2_phase[10], chroma_filter_2_phase[10], chroma_filter_2_phase[10]);
sums = fetch_offset(sNTSC_P0, tex, 11.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 11.0, one_x);
signal += sums * float3(luma_filter_2_phase[11], chroma_filter_2_phase[11], chroma_filter_2_phase[11]);
sums = fetch_offset(sNTSC_P0, tex, 12.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 12.0, one_x);
signal += sums * float3(luma_filter_2_phase[12], chroma_filter_2_phase[12], chroma_filter_2_phase[12]);
sums = fetch_offset(sNTSC_P0, tex, 13.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 13.0, one_x);
signal += sums * float3(luma_filter_2_phase[13], chroma_filter_2_phase[13], chroma_filter_2_phase[13]);
sums = fetch_offset(sNTSC_P0, tex, 14.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 14.0, one_x);
signal += sums * float3(luma_filter_2_phase[14], chroma_filter_2_phase[14], chroma_filter_2_phase[14]);
signal += tex2D(sNTSC_P0, vTexCoord).xyz *
float3(luma_filter_2_phase[TAPS_2_phase], chroma_filter_2_phase[TAPS_2_phase], chroma_filter_2_phase[TAPS_2_phase]);
}
else if(phase > 2.5)
{
for (int i = 0; i < TAPS_3_phase; i++)
{
float offset = float(i);
float3 sums = fetch_offset(sNTSC_P0, tex, offset - float(TAPS_3_phase), one_x) +
fetch_offset(sNTSC_P0, tex, float(TAPS_3_phase) - offset, one_x);
signal += sums * float3(luma_filter_3_phase[i], chroma_filter_3_phase[i], chroma_filter_3_phase[i]);
}
signal += tex2D(sNTSC_P0, vTexCoord).xyz *
float3(luma_filter_3_phase[TAPS_3_phase], chroma_filter_3_phase[TAPS_3_phase], chroma_filter_3_phase[TAPS_3_phase]);
}
float3 rgb = yiq2rgb(signal);
if(linearize == false) return float4(rgb, 1.0);
else return pow(float4(rgb, 1.0), float4(2.2, 2.2, 2.2, 2.2));
}
technique NTSC_ADAPTIVE
{
pass
{
VertexShader = VS_NTSC_ADAPTIVE_P0;
PixelShader = PS_NTSC_ADAPTIVE_P0;
RenderTarget = tNTSC_P0;
}
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_NTSC_ADAPTIVE_P1;
}
}

View File

@@ -1,415 +0,0 @@
#include "ReShade.fxh"
/*
Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
into any other shaders and provide curvature/warping/oversampling features.
Adapted by Hyllian (2024).
*/
/*
CRT-interlaced
Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
(cgwg gave their consent to have the original version of this shader
distributed under the GPL in this message:
http://board.byuu.org/viewtopic.php?p=26075#p26075
"Feel free to distribute my shaders under the GPL. After all, the
barrel distortion code was taken from the Curvature shader, which is
under the GPL."
)
This shader variant is pre-configured with screen curvature
*/
uniform bool geom_curvature <
ui_type = "radio";
ui_label = "Geom Curvature Toggle";
ui_category = "Curvature";
ui_tooltip = "This shader only works with Aspect Ratio: Stretch to Fill.";
> = true;
uniform float geom_R <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_label = "Geom Curvature Radius";
> = 10.0;
uniform float geom_d <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_label = "Geom Distance";
> = 10.0;
uniform bool geom_invert_aspect <
ui_type = "radio";
ui_label = "Geom Curvature Aspect Inversion";
> = 0.0;
uniform float geom_cornersize <
ui_type = "drag";
ui_min = 0.001;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Geom Corner Size";
> = 0.006;
uniform float geom_cornersmooth <
ui_type = "drag";
ui_min = 80.0;
ui_max = 2000.0;
ui_step = 100.0;
ui_label = "Geom Corner Smoothness";
> = 200.0;
uniform float geom_x_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Geom Horizontal Tilt";
> = 0.0;
uniform float geom_y_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Geom Vertical Tilt";
> = 0.0;
uniform float geom_overscan_x <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "Geom Horiz. Overscan %";
> = 48.5;
uniform float geom_overscan_y <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "Geom Vert. Overscan %";
> = 64.5;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center X";
> = 0.0;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center Y";
> = -8.8;
uniform float geom_lum <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.0;
ui_step = 0.01;
ui_label = "Geom Luminance";
> = 1.0;
uniform float geom_target_gamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "Geom Target Gamma";
> = 2.4;
uniform float geom_monitor_gamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "Geom Monitor Gamma";
> = 2.2;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportX < source = "viewportx"; >;
uniform float ViewportY < source = "viewporty"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
uniform float2 ViewportOffset < source = "viewportoffset"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
texture tOverlay < source = "overlay/psx.jpg"; >
{
Width = BUFFER_WIDTH;
Height = BUFFER_HEIGHT;
MipLevels = 1;
};
sampler sOverlay { Texture = tOverlay; AddressU = BORDER; AddressV = BORDER; MinFilter = LINEAR; MagFilter = LINEAR;};
// Comment the next line to disable interpolation in linear gamma (and
// gain speed).
#define LINEAR_PROCESSING
// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature
#define OVERSAMPLE
// Use the older, purely gaussian beam profile; uncomment for speed
//#define USEGAUSSIAN
// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(geom_target_gamma,geom_target_gamma,geom_target_gamma,geom_target_gamma))
#else
# define TEX2D(c) tex2D(sBackBuffer, (c))
#endif
// aspect ratio
#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
#define overscan (float2(1.01,1.01));
struct ST_VertexOut
{
float2 sinangle : TEXCOORD1;
float2 cosangle : TEXCOORD2;
float3 stretch : TEXCOORD3;
float2 TextureSize : TEXCOORD4;
};
float vs_intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B = 2.0*(geom_R*(dot(xy,sinangle)-geom_d*cosangle.x*cosangle.y)-geom_d*geom_d);
float C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = vs_intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A);
float2 uv = (point - a*sinangle)/cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv,uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0-cos(r/geom_R);
float D = geom_d/geom_R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
return geom_d*(uv*cosangle-x*sinangle)/D;
}
float3 vs_maxscale(float2 sinangle, float2 cosangle)
{
float2 c = vs_bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5,0.5)*aspect;
float2 lo = float2(vs_fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(vs_fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 SourceSize = 1.0/NormalizedNativePixelSize;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
}
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B, C;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv, uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0 - cos(r/geom_R);
float D;
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
return geom_d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5, 0.5)*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + float2(0.5, 0.5));
}
float corner(float2 coord)
{
coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
float2 cdist = float2(geom_cornersize, geom_cornersize);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
}
float fwidth(float value){
return abs(ddx(value)) + abs(ddy(value));
}
// Code snippet borrowed from crt-cyclon. (credits to DariusG)
float2 Warp(float2 pos)
{
pos = pos*2.0 - 1.0;
pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0);
pos = pos*0.5 + 0.5;
return pos;
}
float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
// Texture coordinates of the texel containing the active pixel.
float2 xy;
if (geom_curvature == true)
xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
else
xy = vTexCoord;
// center screen
xy = Warp(xy - float2(centerx,centery)/100.0);
float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5));
float2 uv_ratio = frac((xy * vVARS.TextureSize - float2(0.5, 0.5)) / vVARS.TextureSize);
float4 col = TEX2D(xy);
#ifndef LINEAR_PROCESSING
col = pow(col , float4(geom_target_gamma, geom_target_gamma, geom_target_gamma, geom_target_gamma));
#endif
col.rgb *= (geom_lum * step(0.0, uv_ratio.y));
float3 mul_res = col.rgb * float3(cval, cval, cval);
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, float3(1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma));
float4 overlay = tex2D(sOverlay, vTexCoord);
float2 top_left = (float2(ViewportX, ViewportY) - ViewportOffset)/ViewportSize;
float2 bottom_right = (float2(ViewportX + ViewportWidth, ViewportY + ViewportHeight) - ViewportOffset)/ViewportSize;
if (xy.x < top_left.x || xy.x > bottom_right.x || xy.y < top_left.y || xy.y > bottom_right.y)
mul_res = overlay.rgb;
return float4(mul_res, 1.0);
}
technique CRT_Geom
{
pass
{
VertexShader = VS_CRT_Geom;
PixelShader = PS_CRT_Geom;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 214 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

View File

@@ -1,15 +0,0 @@
# To Use
Choose Aspect Ratio: Stretch to Fill.
# Psx.jpg Credits
To the Author: SOQUEROEU.
The "psx.jpg" background was edited from the one obtained from "Soqueroeu TV Backgrounds 2.0" repository: https://github.com/soqueroeu/Soqueroeu-TV-Backgrounds_V2.0/tree/main.
The material is free to use according to the agreement below:
## AGREEMENT
This pack is free. You should not pay for anything related to this graphics pack and shader preset. You may distribute and reproduce part from this content, as long as you give credit to the authors involved. You may not profit from the sale of products that contain material in this package without the author's prior permission.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 470 KiB

View File

@@ -9,12 +9,17 @@ add_subdirectory(libchdr EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(libchdr)
add_subdirectory(xxhash EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(xxhash)
add_subdirectory(rapidjson EXCLUDE_FROM_ALL)
add_subdirectory(imgui EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(imgui)
add_subdirectory(simpleini EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(simpleini)
add_subdirectory(vulkan EXCLUDE_FROM_ALL)
add_subdirectory(soundtouch EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(soundtouch)
add_subdirectory(googletest EXCLUDE_FROM_ALL)
add_subdirectory(cpuinfo EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(cpuinfo)
add_subdirectory(fast_float EXCLUDE_FROM_ALL)
add_subdirectory(reshadefx EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(reshadefx)
@@ -25,6 +30,8 @@ disable_compiler_warnings_for_target(rapidyaml)
add_subdirectory(cubeb EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(cubeb)
disable_compiler_warnings_for_target(speex)
add_subdirectory(discord-rpc EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(discord-rpc)
add_subdirectory(kissfft EXCLUDE_FROM_ALL)
disable_compiler_warnings_for_target(kissfft)
add_subdirectory(freesurround EXCLUDE_FROM_ALL)

21
dep/cpuinfo/.gitignore vendored Normal file
View File

@@ -0,0 +1,21 @@
# Ninja files
build.ninja
# Build objects and artifacts
deps/
build/
bin/
lib/
libs/
obj/
*.pyc
*.pyo
# System files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

223
dep/cpuinfo/CMakeLists.txt Normal file
View File

@@ -0,0 +1,223 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
# ---[ Setup project
PROJECT(
cpuinfo
LANGUAGES C CXX
)
# ---[ Options.
SET(CPUINFO_LIBRARY_TYPE "default" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build")
SET_PROPERTY(CACHE CPUINFO_LIBRARY_TYPE PROPERTY STRINGS default static shared)
SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared)
SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)")
SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none)
MACRO(CPUINFO_TARGET_ENABLE_C99 target)
SET_TARGET_PROPERTIES(${target} PROPERTIES
C_STANDARD 99
C_EXTENSIONS NO)
ENDMACRO()
MACRO(CPUINFO_TARGET_ENABLE_CXX11 target)
SET_TARGET_PROPERTIES(${target} PROPERTIES
CXX_STANDARD 11
CXX_EXTENSIONS NO)
ENDMACRO()
MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target)
IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "default")
IF(CPUINFO_RUNTIME_TYPE STREQUAL "shared")
TARGET_COMPILE_OPTIONS(${target} PRIVATE
"/MD$<$<CONFIG:Debug>:d>")
ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static")
TARGET_COMPILE_OPTIONS(${target} PRIVATE
"/MT$<$<CONFIG:Debug>:d>")
ENDIF()
ENDIF()
ENDMACRO()
# -- [ Determine target processor
SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}")
IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
ENDIF()
# ---[ Build flags
SET(CPUINFO_SUPPORTED_PLATFORM TRUE)
IF(NOT CMAKE_SYSTEM_PROCESSOR)
IF(NOT IOS)
MESSAGE(WARNING
"Target processor architecture is not specified. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()
ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
MESSAGE(WARNING
"Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()
IF(NOT CMAKE_SYSTEM_NAME)
MESSAGE(WARNING
"Target operating system is not specified. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
MESSAGE(WARNING
"Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()
ENDIF()
# ---[ cpuinfo library
SET(CPUINFO_SRCS
src/init.c
src/api.c
src/cache.c)
IF(CPUINFO_SUPPORTED_PLATFORM)
IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
LIST(APPEND CPUINFO_SRCS
src/x86/init.c
src/x86/info.c
src/x86/vendor.c
src/x86/uarch.c
src/x86/name.c
src/x86/topology.c
src/x86/isa.c
src/x86/cache/init.c
src/x86/cache/descriptor.c
src/x86/cache/deterministic.c)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/x86/linux/init.c
src/x86/linux/cpuinfo.c)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
ENDIF()
ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
LIST(APPEND CPUINFO_SRCS
src/arm/uarch.c
src/arm/cache.c)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/arm/linux/init.c
src/arm/linux/cpuinfo.c
src/arm/linux/clusters.c
src/arm/linux/chipset.c
src/arm/linux/midr.c
src/arm/linux/hwcap.c)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]")
LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c)
IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi")
SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm)
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$")
LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c)
ENDIF()
ELSEIF(IOS OR (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CPUINFO_TARGET_PROCESSOR STREQUAL "arm64"))
LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c)
ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/arm/android/properties.c)
ENDIF()
ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
LIST(APPEND CPUINFO_SRCS
src/emscripten/init.c)
ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
LIST(APPEND CPUINFO_SRCS
src/linux/smallfile.c
src/linux/multiline.c
src/linux/cpulist.c
src/linux/processors.c)
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
LIST(APPEND CPUINFO_SRCS src/mach/topology.c)
ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
SET(THREADS_PREFER_PTHREAD_FLAG TRUE)
FIND_PACKAGE(Threads REQUIRED)
ENDIF()
ENDIF()
IF(CPUINFO_LIBRARY_TYPE STREQUAL "default")
ADD_LIBRARY(cpuinfo ${CPUINFO_SRCS})
ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "shared")
ADD_LIBRARY(cpuinfo SHARED ${CPUINFO_SRCS})
ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "static")
ADD_LIBRARY(cpuinfo STATIC ${CPUINFO_SRCS})
ELSE()
MESSAGE(FATAL_ERROR "Unsupported library type ${CPUINFO_LIBRARY_TYPE}")
ENDIF()
ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
CPUINFO_TARGET_ENABLE_C99(cpuinfo)
CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
# Target Windows 7+ API
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601)
ENDIF()
SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src)
IF(CPUINFO_LOG_LEVEL STREQUAL "default")
# default logging level: error (subject to change)
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1)
ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none")
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0)
ELSE()
MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}")
ENDIF()
TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE CPUINFO_LOG_LEVEL=0)
IF(CPUINFO_SUPPORTED_PLATFORM)
TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT})
TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1)
TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1)
ENDIF()
ELSE()
TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0)
ENDIF()
# ---[ cpuinfo dependencies: clog
IF(NOT DEFINED CLOG_SOURCE_DIR)
SET(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog")
ENDIF()
IF(NOT TARGET clog)
SET(CLOG_BUILD_TESTS OFF CACHE BOOL "")
SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
ADD_SUBDIRECTORY(
"${CLOG_SOURCE_DIR}")
# We build static version of clog but a dynamic library may indirectly depend on it
SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
ENDIF()
TARGET_LINK_LIBRARIES(cpuinfo PRIVATE clog)
TARGET_LINK_LIBRARIES(cpuinfo_internals PRIVATE clog)

27
dep/cpuinfo/LICENSE Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2019 Google LLC
Copyright (c) 2017-2018 Facebook Inc.
Copyright (C) 2012-2017 Georgia Institute of Technology
Copyright (C) 2010-2012 Marat Dukhan
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

283
dep/cpuinfo/README.md Normal file
View File

@@ -0,0 +1,283 @@
# CPU INFOrmation library
[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/LICENSE)
[![Linux/Mac build status](https://img.shields.io/travis/pytorch/cpuinfo.svg)](https://travis-ci.org/pytorch/cpuinfo)
[![Windows build status](https://ci.appveyor.com/api/projects/status/g5khy9nr0xm458t7/branch/master?svg=true)](https://ci.appveyor.com/project/MaratDukhan/cpuinfo/branch/master)
cpuinfo is a library to detect essential for performance optimization information about host CPU.
## Features
- **Cross-platform** availability:
- Linux, Windows, macOS, Android, and iOS operating systems
- x86, x86-64, ARM, and ARM64 architectures
- Modern **C/C++ interface**
- Thread-safe
- No memory allocation after initialization
- No exceptions thrown
- Detection of **supported instruction sets**, up to AVX512 (x86) and ARMv8.3 extensions
- Detection of SoC and core information:
- **Processor (SoC) name**
- Vendor and **microarchitecture** for each CPU core
- ID (**MIDR** on ARM, **CPUID** leaf 1 EAX value on x86) for each CPU core
- Detection of **cache information**:
- Cache type (instruction/data/unified), size and line size
- Cache associativity
- Cores and logical processors (hyper-threads) sharing the cache
- Detection of **topology information** (relative between logical processors, cores, and processor packages)
- Well-tested **production-quality** code:
- 60+ mock tests based on data from real devices
- Includes work-arounds for common bugs in hardware and OS kernels
- Supports systems with heterogenous cores, such as **big.LITTLE** and Max.Med.Min
- Permissive **open-source** license (Simplified BSD)
## Examples
Log processor name:
```c
cpuinfo_initialize();
printf("Running on %s CPU\n", cpuinfo_get_package(0)->name);
```
Detect if target is a 32-bit or 64-bit ARM system:
```c
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
/* 32-bit ARM-specific code here */
#endif
```
Check if the host CPU support ARM NEON
```c
cpuinfo_initialize();
if (cpuinfo_has_arm_neon()) {
neon_implementation(arguments);
}
```
Check if the host CPU supports x86 AVX
```c
cpuinfo_initialize();
if (cpuinfo_has_x86_avx()) {
avx_implementation(arguments);
}
```
Check if the thread runs on a Cortex-A53 core
```c
cpuinfo_initialize();
switch (cpuinfo_get_current_core()->uarch) {
case cpuinfo_uarch_cortex_a53:
cortex_a53_implementation(arguments);
break;
default:
generic_implementation(arguments);
break;
}
```
Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):
```c
cpuinfo_initialize();
const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
```
Pin thread to cores sharing L2 cache with the current core (Linux or Android)
```c
cpuinfo_initialize();
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
const struct cpuinfo_cache* current_l2 = cpuinfo_get_current_processor()->cache.l2;
for (uint32_t i = 0; i < current_l2->processor_count; i++) {
CPU_SET(cpuinfo_get_processor(current_l2->processor_start + i)->linux_id, &cpu_set);
}
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set);
```
## Use via pkg-config
If you would like to provide your project's build environment with the necessary compiler and linker flags in a portable manner, the library by default when built enables `CPUINFO_BUILD_PKG_CONFIG` and will generate a [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/) manifest (_libcpuinfo.pc_). Here are several examples of how to use it:
### Command Line
If you used your distro's package manager to install the library, you can verify that it is available to your build environment like so:
```console
$ pkg-config --cflags --libs libcpuinfo
-I/usr/include/x86_64-linux-gnu/ -L/lib/x86_64-linux-gnu/ -lcpuinfo
```
If you have installed the library from source into a non-standard prefix, pkg-config may need help finding it:
```console
$ PKG_CONFIG_PATH="/home/me/projects/cpuinfo/prefix/lib/pkgconfig/:$PKG_CONFIG_PATH" pkg-config --cflags --libs libcpuinfo
-I/home/me/projects/cpuinfo/prefix/include -L/home/me/projects/cpuinfo/prefix/lib -lcpuinfo
```
### GNU Autotools
To [use](https://autotools.io/pkgconfig/pkg_check_modules.html) with the GNU Autotools include the following snippet in your project's `configure.ac`:
```makefile
# CPU INFOrmation library...
PKG_CHECK_MODULES(
[libcpuinfo], [libcpuinfo], [],
[AC_MSG_ERROR([libcpuinfo missing...])])
YOURPROJECT_CXXFLAGS="$YOURPROJECT_CXXFLAGS $libcpuinfo_CFLAGS"
YOURPROJECT_LIBS="$YOURPROJECT_LIBS $libcpuinfo_LIBS"
```
### Meson
To use with Meson you just need to add `dependency('libcpuinfo')` as a dependency for your executable.
```meson
project(
'MyCpuInfoProject',
'cpp',
meson_version: '>=0.55.0'
)
executable(
'MyCpuInfoExecutable',
sources: 'main.cpp',
dependencies: dependency('libcpuinfo')
)
```
### CMake
To use with CMake use the [FindPkgConfig](https://cmake.org/cmake/help/latest/module/FindPkgConfig.html) module. Here is an example:
```cmake
cmake_minimum_required(VERSION 3.6)
project("MyCpuInfoProject")
find_package(PkgConfig)
pkg_check_modules(CpuInfo REQUIRED IMPORTED_TARGET libcpuinfo)
add_executable(${PROJECT_NAME} main.cpp)
target_link_libraries(${PROJECT_NAME} PkgConfig::CpuInfo)
```
### Makefile
To use within a vanilla makefile, you can call pkg-config directly to supply compiler and linker flags using shell substitution.
```makefile
CFLAGS=-g3 -Wall -Wextra -Werror ...
LDFLAGS=-lfoo ...
...
CFLAGS+= $(pkg-config --cflags libcpuinfo)
LDFLAGS+= $(pkg-config --libs libcpuinfo)
```
## Exposed information
- [x] Processor (SoC) name
- [x] Microarchitecture
- [x] Usable instruction sets
- [ ] CPU frequency
- [x] Cache
- [x] Size
- [x] Associativity
- [x] Line size
- [x] Number of partitions
- [x] Flags (unified, inclusive, complex hash function)
- [x] Topology (logical processors that share this cache level)
- [ ] TLB
- [ ] Number of entries
- [ ] Associativity
- [ ] Covered page types (instruction, data)
- [ ] Covered page sizes
- [x] Topology information
- [x] Logical processors
- [x] Cores
- [x] Packages (sockets)
## Supported environments:
- [x] Android
- [x] x86 ABI
- [x] x86_64 ABI
- [x] armeabi ABI
- [x] armeabiv7-a ABI
- [x] arm64-v8a ABI
- [ ] ~~mips ABI~~
- [ ] ~~mips64 ABI~~
- [x] Linux
- [x] x86
- [x] x86-64
- [x] 32-bit ARM (ARMv5T and later)
- [x] ARM64
- [ ] PowerPC64
- [x] iOS
- [x] x86 (iPhone simulator)
- [x] x86-64 (iPhone simulator)
- [x] ARMv7
- [x] ARM64
- [x] OS X
- [x] x86
- [x] x86-64
- [x] Windows
- [x] x86
- [x] x86-64
## Methods
- Processor (SoC) name detection
- [x] Using CPUID leaves 0x800000020x80000004 on x86/x86-64
- [x] Using `/proc/cpuinfo` on ARM
- [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android)
- [ ] Using kernel log (`dmesg`) on ARM Linux
- Vendor and microarchitecture detection
- [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill)
- [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2)
- [ ] VIA-designed x86/x86-64 cores
- [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise)
- [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1)
- [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo)
- [x] Nvidia-designed ARM cores (Denver and Carmel)
- [x] Samsung-designed ARM cores (Exynos)
- [x] Intel-designed ARM cores (XScale up to 3rd-gen)
- [x] Apple-designed ARM cores (up to Lightning and Thunder)
- [x] Cavium-designed ARM cores (ThunderX)
- [x] AppliedMicro-designed ARM cores (X-Gene)
- Instruction set detection
- [x] Using CPUID (x86/x86-64)
- [x] Using `/proc/cpuinfo` on 32-bit ARM EABI (Linux)
- [x] Using microarchitecture heuristics on (32-bit ARM)
- [x] Using `FPSID` and `WCID` registers (32-bit ARM)
- [x] Using `getauxval` (Linux/ARM)
- [x] Using `/proc/self/auxv` (Android/ARM)
- [ ] Using instruction probing on ARM (Linux)
- [ ] Using CPUID registers on ARM64 (Linux)
- Cache detection
- [x] Using CPUID leaf 0x00000002 (x86/x86-64)
- [x] Using CPUID leaf 0x00000004 (non-AMD x86/x86-64)
- [ ] Using CPUID leaves 0x80000005-0x80000006 (AMD x86/x86-64)
- [x] Using CPUID leaf 0x8000001D (AMD x86/x86-64)
- [x] Using `/proc/cpuinfo` (Linux/pre-ARMv7)
- [x] Using microarchitecture heuristics (ARM)
- [x] Using chipset name (ARM)
- [x] Using `sysctlbyname` (Mach)
- [x] Using sysfs `typology` directories (ARM/Linux)
- [ ] Using sysfs `cache` directories (Linux)
- TLB detection
- [x] Using CPUID leaf 0x00000002 (x86/x86-64)
- [ ] Using CPUID leaves 0x80000005-0x80000006 and 0x80000019 (AMD x86/x86-64)
- [x] Using microarchitecture heuristics (ARM)
- Topology detection
- [x] Using CPUID leaf 0x00000001 on x86/x86-64 (legacy APIC ID)
- [x] Using CPUID leaf 0x0000000B on x86/x86-64 (Intel APIC ID)
- [ ] Using CPUID leaf 0x8000001E on x86/x86-64 (AMD APIC ID)
- [x] Using `/proc/cpuinfo` (Linux)
- [x] Using `host_info` (Mach)
- [x] Using `GetLogicalProcessorInformationEx` (Windows)
- [x] Using sysfs (Linux)
- [x] Using chipset name (ARM/Linux)

View File

@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\msvc\vsprops\Configurations.props" />
<ItemGroup>
<ClCompile Include="deps\clog\src\clog.c" />
<ClCompile Include="src\api.c" />
<ClCompile Include="src\arm\cache.c">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\arm\uarch.c">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\arm\windows\init.c">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\cache.c" />
<ClCompile Include="src\init.c" />
<ClCompile Include="src\x86\cache\descriptor.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\cache\deterministic.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\cache\init.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\info.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\init.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\isa.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\name.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\topology.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\uarch.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\vendor.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\x86\windows\init.c">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="deps\clog\include\clog.h" />
<ClInclude Include="include\cpuinfo.h" />
<ClInclude Include="src\arm\api.h" />
<ClInclude Include="src\arm\midr.h" />
<ClInclude Include="src\arm\windows\api.h" />
<ClInclude Include="src\cpuinfo\common.h" />
<ClInclude Include="src\cpuinfo\internal-api.h" />
<ClInclude Include="src\cpuinfo\log.h" />
<ClInclude Include="src\cpuinfo\utils.h" />
<ClInclude Include="src\x86\api.h" />
<ClInclude Include="src\x86\cpuid.h" />
<ClInclude Include="src\x86\windows\api.h" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{EE55AA65-EA6B-4861-810B-78354B53A807}</ProjectGuid>
</PropertyGroup>
<Import Project="..\msvc\vsprops\StaticLibrary.props" />
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>TurnOffAllWarnings</WarningLevel>
<AdditionalIncludeDirectories>$(ProjectDir)include;$(ProjectDir)src;$(ProjectDir)deps\clog\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp14</LanguageStandard>
<ObjectFileName>$(IntDir)%(RelativeDir)</ObjectFileName>
</ClCompile>
</ItemDefinitionGroup>
<Import Project="..\msvc\vsprops\Targets.props" />
</Project>

View File

@@ -0,0 +1,112 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="x86">
<UniqueIdentifier>{8fc9f543-ff04-48fb-ae1a-7c575a8aed13}</UniqueIdentifier>
</Filter>
<Filter Include="x86\windows">
<UniqueIdentifier>{0b540baa-aafb-4e51-8cbf-b7e7c00d9a4d}</UniqueIdentifier>
</Filter>
<Filter Include="x86\descriptor">
<UniqueIdentifier>{53ef3c40-8e03-46d1-aeb3-6446c40469da}</UniqueIdentifier>
</Filter>
<Filter Include="cpuinfo">
<UniqueIdentifier>{26002d26-399a-41bb-93cb-42fb9be21c1f}</UniqueIdentifier>
</Filter>
<Filter Include="clog">
<UniqueIdentifier>{7f0aba4c-ca06-4a7b-aed1-4f1e6976e839}</UniqueIdentifier>
</Filter>
<Filter Include="arm">
<UniqueIdentifier>{f8cee8f2-6ab7-47cf-a5fb-3ae5e444000c}</UniqueIdentifier>
</Filter>
<Filter Include="arm\windows">
<UniqueIdentifier>{cca5126a-b401-4925-b163-d2e64b010c7b}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\x86\isa.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\name.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\topology.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\uarch.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\vendor.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\info.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\init.c">
<Filter>x86</Filter>
</ClCompile>
<ClCompile Include="src\x86\windows\init.c">
<Filter>x86\windows</Filter>
</ClCompile>
<ClCompile Include="src\x86\cache\deterministic.c">
<Filter>x86\descriptor</Filter>
</ClCompile>
<ClCompile Include="src\x86\cache\init.c">
<Filter>x86\descriptor</Filter>
</ClCompile>
<ClCompile Include="src\x86\cache\descriptor.c">
<Filter>x86\descriptor</Filter>
</ClCompile>
<ClCompile Include="src\api.c" />
<ClCompile Include="src\cache.c" />
<ClCompile Include="src\init.c" />
<ClCompile Include="deps\clog\src\clog.c">
<Filter>clog</Filter>
</ClCompile>
<ClCompile Include="src\arm\cache.c">
<Filter>arm</Filter>
</ClCompile>
<ClCompile Include="src\arm\uarch.c">
<Filter>arm</Filter>
</ClCompile>
<ClCompile Include="src\arm\windows\init.c">
<Filter>arm\windows</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\x86\api.h">
<Filter>x86</Filter>
</ClInclude>
<ClInclude Include="src\x86\cpuid.h">
<Filter>x86</Filter>
</ClInclude>
<ClInclude Include="src\x86\windows\api.h">
<Filter>x86\windows</Filter>
</ClInclude>
<ClInclude Include="src\cpuinfo\internal-api.h">
<Filter>cpuinfo</Filter>
</ClInclude>
<ClInclude Include="src\cpuinfo\log.h">
<Filter>cpuinfo</Filter>
</ClInclude>
<ClInclude Include="src\cpuinfo\utils.h">
<Filter>cpuinfo</Filter>
</ClInclude>
<ClInclude Include="src\cpuinfo\common.h">
<Filter>cpuinfo</Filter>
</ClInclude>
<ClInclude Include="include\cpuinfo.h" />
<ClInclude Include="deps\clog\include\clog.h">
<Filter>clog</Filter>
</ClInclude>
<ClInclude Include="src\arm\api.h">
<Filter>arm</Filter>
</ClInclude>
<ClInclude Include="src\arm\midr.h">
<Filter>arm</Filter>
</ClInclude>
<ClInclude Include="src\arm\windows\api.h">
<Filter>arm\windows</Filter>
</ClInclude>
</ItemGroup>
</Project>

19
dep/cpuinfo/deps/clog/.gitignore vendored Normal file
View File

@@ -0,0 +1,19 @@
# Ninja files
build.ninja
# Build objects and artifacts
deps/
build/
bin/
lib/
*.pyc
*.pyo
# System files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

View File

@@ -0,0 +1,42 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
# ---[ Project and semantic versioning.
PROJECT(clog C CXX)
# ---[ Options.
SET(CLOG_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
SET_PROPERTY(CACHE CLOG_RUNTIME_TYPE PROPERTY STRINGS default static shared)
IF(ANDROID)
OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" OFF)
ELSE()
OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" ON)
ENDIF()
MACRO(CLOG_TARGET_RUNTIME_LIBRARY target)
IF(MSVC AND NOT CLOG_RUNTIME_TYPE STREQUAL "default")
IF(CLOG_RUNTIME_TYPE STREQUAL "shared")
TARGET_COMPILE_OPTIONS(${target} PRIVATE
"/MD$<$<CONFIG:Debug>:d>")
ELSEIF(CLOG_RUNTIME_TYPE STREQUAL "static")
TARGET_COMPILE_OPTIONS(${target} PRIVATE
"/MT$<$<CONFIG:Debug>:d>")
ENDIF()
ENDIF()
ENDMACRO()
# ---[ clog library
ADD_LIBRARY(clog STATIC src/clog.c)
SET_TARGET_PROPERTIES(clog PROPERTIES
C_STANDARD 99
C_EXTENSIONS NO)
CLOG_TARGET_RUNTIME_LIBRARY(clog)
SET_TARGET_PROPERTIES(clog PROPERTIES PUBLIC_HEADER include/clog.h)
TARGET_INCLUDE_DIRECTORIES(clog BEFORE PUBLIC include)
IF(CLOG_LOG_TO_STDIO)
TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=1)
ELSE()
TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=0)
ENDIF()
IF(ANDROID AND NOT CLOG_LOG_TO_STDIO)
TARGET_LINK_LIBRARIES(clog PRIVATE log)
ENDIF()

View File

@@ -0,0 +1,26 @@
Copyright (C) 2018 Marat Dukhan
Copyright (c) 2017-2018 Facebook Inc.
Copyright (c) 2017 Georgia Institute of Technology
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,57 @@
# clog: C-style (a-la printf) logging library
[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/deps/clog/LICENSE)
C-style library for logging errors, warnings, information notes, and debug information.
## Features
- printf-style interface for formatting variadic parameters.
- Separate functions for logging errors, warnings, information notes, and debug information.
- Independent logging settings for different modules.
- Logging to logcat on Android and stderr/stdout on other platforms.
- Compatible with C99 and C++.
- Covered with unit tests.
## Example
```c
#include <clog.h>
#ifndef MYMODULE_LOG_LEVEL
#define MYMODULE_LOG_LEVEL CLOG_DEBUG
#endif
CLOG_DEFINE_LOG_DEBUG(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
CLOG_DEFINE_LOG_INFO(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
CLOG_DEFINE_LOG_WARNING(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
CLOG_DEFINE_LOG_ERROR(mymodule_, "My Module", MYMODULE_LOG_LEVEL);
...
void some_function(...) {
int status = ...
if (status != 0) {
mymodule_log_error(
"something really bad happened: "
"operation failed with status %d", status);
}
uint32_t expected_zero = ...
if (expected_zero != 0) {
mymodule_log_warning(
"something suspicious happened (var = %"PRIu32"), "
"fall back to generic implementation", expected_zero);
}
void* usually_non_null = ...
if (usually_non_null == NULL) {
mymodule_log_info(
"something unusual, but common, happened: "
"enabling work-around");
}
float a = ...
mymodule_log_debug("computed a = %.7f", a);
}
```

View File

@@ -0,0 +1,100 @@
#pragma once
#include <stdarg.h>
#include <stdlib.h>
#include <inttypes.h>
#define CLOG_NONE 0
#define CLOG_FATAL 1
#define CLOG_ERROR 2
#define CLOG_WARNING 3
#define CLOG_INFO 4
#define CLOG_DEBUG 5
#ifndef CLOG_VISIBILITY
#if defined(__ELF__)
#define CLOG_VISIBILITY __attribute__((__visibility__("internal")))
#elif defined(__MACH__)
#define CLOG_VISIBILITY __attribute__((__visibility__("hidden")))
#else
#define CLOG_VISIBILITY
#endif
#endif
#ifndef CLOG_ARGUMENTS_FORMAT
#if defined(__GNUC__)
#define CLOG_ARGUMENTS_FORMAT __attribute__((__format__(__printf__, 1, 2)))
#else
#define CLOG_ARGUMENTS_FORMAT
#endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
CLOG_VISIBILITY void clog_vlog_debug(const char* module, const char* format, va_list args);
CLOG_VISIBILITY void clog_vlog_info(const char* module, const char* format, va_list args);
CLOG_VISIBILITY void clog_vlog_warning(const char* module, const char* format, va_list args);
CLOG_VISIBILITY void clog_vlog_error(const char* module, const char* format, va_list args);
CLOG_VISIBILITY void clog_vlog_fatal(const char* module, const char* format, va_list args);
#define CLOG_DEFINE_LOG_DEBUG(log_debug_function_name, module, level) \
CLOG_ARGUMENTS_FORMAT \
inline static void log_debug_function_name(const char* format, ...) { \
if (level >= CLOG_DEBUG) { \
va_list args; \
va_start(args, format); \
clog_vlog_debug(module, format, args); \
va_end(args); \
} \
}
#define CLOG_DEFINE_LOG_INFO(log_info_function_name, module, level) \
CLOG_ARGUMENTS_FORMAT \
inline static void log_info_function_name(const char* format, ...) { \
if (level >= CLOG_INFO) { \
va_list args; \
va_start(args, format); \
clog_vlog_info(module, format, args); \
va_end(args); \
} \
}
#define CLOG_DEFINE_LOG_WARNING(log_warning_function_name, module, level) \
CLOG_ARGUMENTS_FORMAT \
inline static void log_warning_function_name(const char* format, ...) { \
if (level >= CLOG_WARNING) { \
va_list args; \
va_start(args, format); \
clog_vlog_warning(module, format, args); \
va_end(args); \
} \
}
#define CLOG_DEFINE_LOG_ERROR(log_error_function_name, module, level) \
CLOG_ARGUMENTS_FORMAT \
inline static void log_error_function_name(const char* format, ...) { \
if (level >= CLOG_ERROR) { \
va_list args; \
va_start(args, format); \
clog_vlog_error(module, format, args); \
va_end(args); \
} \
}
#define CLOG_DEFINE_LOG_FATAL(log_fatal_function_name, module, level) \
CLOG_ARGUMENTS_FORMAT \
inline static void log_fatal_function_name(const char* format, ...) { \
if (level >= CLOG_FATAL) { \
va_list args; \
va_start(args, format); \
clog_vlog_fatal(module, format, args); \
va_end(args); \
} \
abort(); \
}
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@@ -0,0 +1,423 @@
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
#ifdef __ANDROID__
#include <android/log.h>
#endif
#ifndef CLOG_LOG_TO_STDIO
#ifdef __ANDROID__
#define CLOG_LOG_TO_STDIO 0
#else
#define CLOG_LOG_TO_STDIO 1
#endif
#endif
#include <clog.h>
/* Messages up to this size are formatted entirely on-stack, and don't allocate heap memory */
#define CLOG_STACK_BUFFER_SIZE 1024
#define CLOG_FATAL_PREFIX "Fatal error: "
#define CLOG_FATAL_PREFIX_LENGTH 13
#define CLOG_FATAL_PREFIX_FORMAT "Fatal error in %s: "
#define CLOG_ERROR_PREFIX "Error: "
#define CLOG_ERROR_PREFIX_LENGTH 7
#define CLOG_ERROR_PREFIX_FORMAT "Error in %s: "
#define CLOG_WARNING_PREFIX "Warning: "
#define CLOG_WARNING_PREFIX_LENGTH 9
#define CLOG_WARNING_PREFIX_FORMAT "Warning in %s: "
#define CLOG_INFO_PREFIX "Note: "
#define CLOG_INFO_PREFIX_LENGTH 6
#define CLOG_INFO_PREFIX_FORMAT "Note (%s): "
#define CLOG_DEBUG_PREFIX "Debug: "
#define CLOG_DEBUG_PREFIX_LENGTH 7
#define CLOG_DEBUG_PREFIX_FORMAT "Debug (%s): "
#define CLOG_SUFFIX_LENGTH 1
void clog_vlog_fatal(const char* module, const char* format, va_list args) {
#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
__android_log_vprint(ANDROID_LOG_FATAL, module, format, args);
#else
char stack_buffer[CLOG_STACK_BUFFER_SIZE];
char* heap_buffer = NULL;
char* out_buffer = &stack_buffer[0];
/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
va_list args_copy;
va_copy(args_copy, args);
int prefix_chars = CLOG_FATAL_PREFIX_LENGTH;
if (module == NULL) {
memcpy(stack_buffer, CLOG_FATAL_PREFIX, CLOG_FATAL_PREFIX_LENGTH);
} else {
prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_FATAL_PREFIX_FORMAT, module);
if (prefix_chars < 0) {
/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
prefix_chars = 0;
}
}
int format_chars;
if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
/*
* Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
* Do not even try to format the string into on-stack buffer.
*/
format_chars = vsnprintf(NULL, 0, format, args);
} else {
format_chars =
vsnprintf(
&stack_buffer[prefix_chars],
CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
format,
args);
}
if (format_chars < 0) {
/* Format error in the message: silently ignore this particular message. */
goto cleanup;
}
if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
/* Allocate a buffer on heap, and vsnprintf to this buffer */
heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
if (heap_buffer == NULL) {
goto cleanup;
}
if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_FATAL_PREFIX_FORMAT, module);
} else {
/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
memcpy(heap_buffer, stack_buffer, prefix_chars);
}
vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
out_buffer = heap_buffer;
}
out_buffer[prefix_chars + format_chars] = '\n';
#ifdef _WIN32
DWORD bytes_written;
WriteFile(
GetStdHandle(STD_ERROR_HANDLE),
out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
&bytes_written, NULL);
#else
write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
#endif
cleanup:
free(heap_buffer);
va_end(args_copy);
#endif
}
void clog_vlog_error(const char* module, const char* format, va_list args) {
#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
__android_log_vprint(ANDROID_LOG_ERROR, module, format, args);
#else
char stack_buffer[CLOG_STACK_BUFFER_SIZE];
char* heap_buffer = NULL;
char* out_buffer = &stack_buffer[0];
/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
va_list args_copy;
va_copy(args_copy, args);
int prefix_chars = CLOG_ERROR_PREFIX_LENGTH;
if (module == NULL) {
memcpy(stack_buffer, CLOG_ERROR_PREFIX, CLOG_ERROR_PREFIX_LENGTH);
} else {
prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_ERROR_PREFIX_FORMAT, module);
if (prefix_chars < 0) {
/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
prefix_chars = 0;
}
}
int format_chars;
if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
/*
* Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
* Do not even try to format the string into on-stack buffer.
*/
format_chars = vsnprintf(NULL, 0, format, args);
} else {
format_chars =
vsnprintf(
&stack_buffer[prefix_chars],
CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
format,
args);
}
if (format_chars < 0) {
/* Format error in the message: silently ignore this particular message. */
goto cleanup;
}
if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
/* Allocate a buffer on heap, and vsnprintf to this buffer */
heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
if (heap_buffer == NULL) {
goto cleanup;
}
if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_ERROR_PREFIX_FORMAT, module);
} else {
/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
memcpy(heap_buffer, stack_buffer, prefix_chars);
}
vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
out_buffer = heap_buffer;
}
out_buffer[prefix_chars + format_chars] = '\n';
#ifdef _WIN32
DWORD bytes_written;
WriteFile(
GetStdHandle(STD_ERROR_HANDLE),
out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
&bytes_written, NULL);
#else
write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
#endif
cleanup:
free(heap_buffer);
va_end(args_copy);
#endif
}
void clog_vlog_warning(const char* module, const char* format, va_list args) {
#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
__android_log_vprint(ANDROID_LOG_WARN, module, format, args);
#else
char stack_buffer[CLOG_STACK_BUFFER_SIZE];
char* heap_buffer = NULL;
char* out_buffer = &stack_buffer[0];
/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
va_list args_copy;
va_copy(args_copy, args);
int prefix_chars = CLOG_WARNING_PREFIX_LENGTH;
if (module == NULL) {
memcpy(stack_buffer, CLOG_WARNING_PREFIX, CLOG_WARNING_PREFIX_LENGTH);
} else {
prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_WARNING_PREFIX_FORMAT, module);
if (prefix_chars < 0) {
/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
prefix_chars = 0;
}
}
int format_chars;
if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
/*
* Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
* Do not even try to format the string into on-stack buffer.
*/
format_chars = vsnprintf(NULL, 0, format, args);
} else {
format_chars =
vsnprintf(
&stack_buffer[prefix_chars],
CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
format,
args);
}
if (format_chars < 0) {
/* Format error in the message: silently ignore this particular message. */
goto cleanup;
}
if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
/* Allocate a buffer on heap, and vsnprintf to this buffer */
heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
if (heap_buffer == NULL) {
goto cleanup;
}
if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_WARNING_PREFIX_FORMAT, module);
} else {
/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
memcpy(heap_buffer, stack_buffer, prefix_chars);
}
vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
out_buffer = heap_buffer;
}
out_buffer[prefix_chars + format_chars] = '\n';
#ifdef _WIN32
DWORD bytes_written;
WriteFile(
GetStdHandle(STD_ERROR_HANDLE),
out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
&bytes_written, NULL);
#else
write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
#endif
cleanup:
free(heap_buffer);
va_end(args_copy);
#endif
}
void clog_vlog_info(const char* module, const char* format, va_list args) {
#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
__android_log_vprint(ANDROID_LOG_INFO, module, format, args);
#else
char stack_buffer[CLOG_STACK_BUFFER_SIZE];
char* heap_buffer = NULL;
char* out_buffer = &stack_buffer[0];
/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
va_list args_copy;
va_copy(args_copy, args);
int prefix_chars = CLOG_INFO_PREFIX_LENGTH;
if (module == NULL) {
memcpy(stack_buffer, CLOG_INFO_PREFIX, CLOG_INFO_PREFIX_LENGTH);
} else {
prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_INFO_PREFIX_FORMAT, module);
if (prefix_chars < 0) {
/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
prefix_chars = 0;
}
}
int format_chars;
if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
/*
* Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
* Do not even try to format the string into on-stack buffer.
*/
format_chars = vsnprintf(NULL, 0, format, args);
} else {
format_chars =
vsnprintf(
&stack_buffer[prefix_chars],
CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
format,
args);
}
if (format_chars < 0) {
/* Format error in the message: silently ignore this particular message. */
goto cleanup;
}
if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
/* Allocate a buffer on heap, and vsnprintf to this buffer */
heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
if (heap_buffer == NULL) {
goto cleanup;
}
if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_INFO_PREFIX_FORMAT, module);
} else {
/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
memcpy(heap_buffer, stack_buffer, prefix_chars);
}
vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
out_buffer = heap_buffer;
}
out_buffer[prefix_chars + format_chars] = '\n';
#ifdef _WIN32
DWORD bytes_written;
WriteFile(
GetStdHandle(STD_OUTPUT_HANDLE),
out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
&bytes_written, NULL);
#else
write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
#endif
cleanup:
free(heap_buffer);
va_end(args_copy);
#endif
}
void clog_vlog_debug(const char* module, const char* format, va_list args) {
#if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO
__android_log_vprint(ANDROID_LOG_DEBUG, module, format, args);
#else
char stack_buffer[CLOG_STACK_BUFFER_SIZE];
char* heap_buffer = NULL;
char* out_buffer = &stack_buffer[0];
/* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */
va_list args_copy;
va_copy(args_copy, args);
int prefix_chars = CLOG_DEBUG_PREFIX_LENGTH;
if (module == NULL) {
memcpy(stack_buffer, CLOG_DEBUG_PREFIX, CLOG_DEBUG_PREFIX_LENGTH);
} else {
prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_DEBUG_PREFIX_FORMAT, module);
if (prefix_chars < 0) {
/* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */
prefix_chars = 0;
}
}
int format_chars;
if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) {
/*
* Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer.
* Do not even try to format the string into on-stack buffer.
*/
format_chars = vsnprintf(NULL, 0, format, args);
} else {
format_chars =
vsnprintf(
&stack_buffer[prefix_chars],
CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH,
format,
args);
}
if (format_chars < 0) {
/* Format error in the message: silently ignore this particular message. */
goto cleanup;
}
if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) {
/* Allocate a buffer on heap, and vsnprintf to this buffer */
heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
if (heap_buffer == NULL) {
goto cleanup;
}
if (prefix_chars > CLOG_STACK_BUFFER_SIZE) {
/* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */
snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_DEBUG_PREFIX_FORMAT, module);
} else {
/* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */
memcpy(heap_buffer, stack_buffer, prefix_chars);
}
vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy);
out_buffer = heap_buffer;
}
out_buffer[prefix_chars + format_chars] = '\n';
#ifdef _WIN32
DWORD bytes_written;
WriteFile(
GetStdHandle(STD_OUTPUT_HANDLE),
out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH,
&bytes_written, NULL);
#else
write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH);
#endif
cleanup:
free(heap_buffer);
va_end(args_copy);
#endif
}

View File

@@ -0,0 +1,78 @@
#pragma once
#ifndef CPUINFO_MOCK_H
#define CPUINFO_MOCK_H
#include <stddef.h>
#include <stdint.h>
#include <cpuinfo.h>
#if defined(__linux__)
#include <sys/types.h>
#endif
#if !defined(CPUINFO_MOCK) || !(CPUINFO_MOCK)
#error This header is intended only for test use
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if CPUINFO_ARCH_ARM
void CPUINFO_ABI cpuinfo_set_fpsid(uint32_t fpsid);
void CPUINFO_ABI cpuinfo_set_wcid(uint32_t wcid);
#endif /* CPUINFO_ARCH_ARM */
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
struct cpuinfo_mock_cpuid {
uint32_t input_eax;
uint32_t input_ecx;
uint32_t eax;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
};
void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries);
void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]);
void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]);
#endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */
struct cpuinfo_mock_file {
const char* path;
size_t size;
const char* content;
size_t offset;
};
struct cpuinfo_mock_property {
const char* key;
const char* value;
};
#if defined(__linux__)
void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files);
int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag);
int CPUINFO_ABI cpuinfo_mock_close(int fd);
ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity);
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap);
#endif
#if CPUINFO_ARCH_ARM
void CPUINFO_ABI cpuinfo_set_hwcap2(uint32_t hwcap2);
#endif
#endif
#if defined(__ANDROID__)
void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties);
void CPUINFO_ABI cpuinfo_mock_gl_renderer(const char* renderer);
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CPUINFO_MOCK_H */

File diff suppressed because it is too large Load Diff

410
dep/cpuinfo/src/api.c Normal file
View File

@@ -0,0 +1,410 @@
#include <stdbool.h>
#include <stddef.h>
#include <cpuinfo.h>
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
#ifdef __linux__
#include <linux/api.h>
#include <unistd.h>
#include <sys/syscall.h>
#if !defined(__NR_getcpu)
#include <asm-generic/unistd.h>
#endif
#endif
bool cpuinfo_is_initialized = false;
struct cpuinfo_processor* cpuinfo_processors = NULL;
struct cpuinfo_core* cpuinfo_cores = NULL;
struct cpuinfo_cluster* cpuinfo_clusters = NULL;
struct cpuinfo_package* cpuinfo_packages = NULL;
struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max] = { NULL };
uint32_t cpuinfo_processors_count = 0;
uint32_t cpuinfo_cores_count = 0;
uint32_t cpuinfo_clusters_count = 0;
uint32_t cpuinfo_packages_count = 0;
uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
uint32_t cpuinfo_max_cache_size = 0;
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL;
uint32_t cpuinfo_uarchs_count = 0;
#else
struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown };
#endif
#ifdef __linux__
uint32_t cpuinfo_linux_cpu_max = 0;
const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL;
#endif
#endif
const struct cpuinfo_processor* cpuinfo_get_processors(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors");
}
return cpuinfo_processors;
}
const struct cpuinfo_core* cpuinfo_get_cores(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
}
return cpuinfo_cores;
}
const struct cpuinfo_cluster* cpuinfo_get_clusters(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters");
}
return cpuinfo_clusters;
}
const struct cpuinfo_package* cpuinfo_get_packages(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages");
}
return cpuinfo_packages;
}
const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() {
if (!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs");
}
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
return cpuinfo_uarchs;
#else
return &cpuinfo_global_uarch;
#endif
}
const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) {
return NULL;
}
return &cpuinfo_processors[index];
}
const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) {
return NULL;
}
return &cpuinfo_cores[index];
}
const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) {
return NULL;
}
return &cpuinfo_clusters[index];
}
const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) {
return NULL;
}
return &cpuinfo_packages[index];
}
const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) {
if (!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch");
}
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) {
return NULL;
}
return &cpuinfo_uarchs[index];
#else
if CPUINFO_UNLIKELY(index != 0) {
return NULL;
}
return &cpuinfo_global_uarch;
#endif
}
uint32_t cpuinfo_get_processors_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count");
}
return cpuinfo_processors_count;
}
uint32_t cpuinfo_get_cores_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count");
}
return cpuinfo_cores_count;
}
uint32_t cpuinfo_get_clusters_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count");
}
return cpuinfo_clusters_count;
}
uint32_t cpuinfo_get_packages_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count");
}
return cpuinfo_packages_count;
}
uint32_t cpuinfo_get_uarchs_count(void) {
if (!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count");
}
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
return cpuinfo_uarchs_count;
#else
return 1;
#endif
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_1i];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_1d];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_2];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_3];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches");
}
return cpuinfo_cache[cpuinfo_cache_level_4];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
return NULL;
}
return &cpuinfo_cache[cpuinfo_cache_level_1i][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
return NULL;
}
return &cpuinfo_cache[cpuinfo_cache_level_1d][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) {
return NULL;
}
return &cpuinfo_cache[cpuinfo_cache_level_2][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) {
return NULL;
}
return &cpuinfo_cache[cpuinfo_cache_level_3][index];
}
const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache");
}
if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) {
return NULL;
}
return &cpuinfo_cache[cpuinfo_cache_level_4][index];
}
uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_1i];
}
uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_1d];
}
uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_2];
}
uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_3];
}
uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count");
}
return cpuinfo_cache_count[cpuinfo_cache_level_4];
}
uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size");
}
return cpuinfo_max_cache_size;
}
const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
}
#ifdef __linux__
/* Initializing this variable silences a MemorySanitizer error. */
unsigned cpu = 0;
if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
return 0;
}
if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
return 0;
}
return cpuinfo_linux_cpu_to_processor_map[cpu];
#else
return NULL;
#endif
}
const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
}
#ifdef __linux__
/* Initializing this variable silences a MemorySanitizer error. */
unsigned cpu = 0;
if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
return 0;
}
if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
return 0;
}
return cpuinfo_linux_cpu_to_core_map[cpu];
#else
return NULL;
#endif
}
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index");
}
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
#ifdef __linux__
if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
/* Special case: avoid syscall on systems with only a single type of cores */
return 0;
}
/* General case */
/* Initializing this variable silences a MemorySanitizer error. */
unsigned cpu = 0;
if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
return 0;
}
if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
return 0;
}
return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
#else
/* Fallback: pretend to be on the big core. */
return 0;
#endif
#else
/* Only ARM/ARM64 processors may include cores of different types in the same package. */
return 0;
#endif
}
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
}
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
#ifdef __linux__
if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
/* Special case: avoid syscall on systems with only a single type of cores */
return 0;
}
/* General case */
/* Initializing this variable silences a MemorySanitizer error. */
unsigned cpu = 0;
if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
return default_uarch_index;
}
if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
return default_uarch_index;
}
return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
#else
/* Fallback: no API to query current core, use default uarch index. */
return default_uarch_index;
#endif
#else
/* Only ARM/ARM64 processors may include cores of different types in the same package. */
return 0;
#endif
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include <cpuinfo.h>
#include <cpuinfo/common.h>
#include <arm/api.h>
#include <arm/linux/api.h>
enum cpuinfo_android_chipset_property {
cpuinfo_android_chipset_property_proc_cpuinfo_hardware = 0,
cpuinfo_android_chipset_property_ro_product_board,
cpuinfo_android_chipset_property_ro_board_platform,
cpuinfo_android_chipset_property_ro_mediatek_platform,
cpuinfo_android_chipset_property_ro_arch,
cpuinfo_android_chipset_property_ro_chipname,
cpuinfo_android_chipset_property_ro_hardware_chipname,
cpuinfo_android_chipset_property_max,
};
CPUINFO_INTERNAL void cpuinfo_arm_android_parse_properties(
struct cpuinfo_android_properties properties[restrict static 1]);

View File

@@ -0,0 +1,67 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <sys/system_properties.h>
#include <linux/api.h>
#include <arm/android/api.h>
#include <arm/linux/api.h>
#include <cpuinfo/log.h>
#if CPUINFO_MOCK
#include <cpuinfo-mock.h>
static struct cpuinfo_mock_property* cpuinfo_mock_properties = NULL;
void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties) {
cpuinfo_log_info("Android properties mocking enabled");
cpuinfo_mock_properties = properties;
}
static int cpuinfo_android_property_get(const char* key, char* value) {
if (cpuinfo_mock_properties != NULL) {
for (const struct cpuinfo_mock_property* prop = cpuinfo_mock_properties; prop->key != NULL; prop++) {
if (strncmp(key, prop->key, CPUINFO_BUILD_PROP_NAME_MAX) == 0) {
strncpy(value, prop->value, CPUINFO_BUILD_PROP_VALUE_MAX);
return (int) strnlen(prop->value, CPUINFO_BUILD_PROP_VALUE_MAX);
}
}
}
*value = '\0';
return 0;
}
#else
static inline int cpuinfo_android_property_get(const char* key, char* value) {
return __system_property_get(key, value);
}
#endif
void cpuinfo_arm_android_parse_properties(struct cpuinfo_android_properties properties[restrict static 1]) {
const int ro_product_board_length =
cpuinfo_android_property_get("ro.product.board", properties->ro_product_board);
cpuinfo_log_debug("read ro.product.board = \"%.*s\"", ro_product_board_length, properties->ro_product_board);
const int ro_board_platform_length =
cpuinfo_android_property_get("ro.board.platform", properties->ro_board_platform);
cpuinfo_log_debug("read ro.board.platform = \"%.*s\"", ro_board_platform_length, properties->ro_board_platform);
const int ro_mediatek_platform_length =
cpuinfo_android_property_get("ro.mediatek.platform", properties->ro_mediatek_platform);
cpuinfo_log_debug("read ro.mediatek.platform = \"%.*s\"",
ro_mediatek_platform_length, properties->ro_mediatek_platform);
const int ro_arch_length =
cpuinfo_android_property_get("ro.arch", properties->ro_arch);
cpuinfo_log_debug("read ro.arch = \"%.*s\"", ro_arch_length, properties->ro_arch);
const int ro_chipname_length =
cpuinfo_android_property_get("ro.chipname", properties->ro_chipname);
cpuinfo_log_debug("read ro.chipname = \"%.*s\"", ro_chipname_length, properties->ro_chipname);
const int ro_hardware_chipname_length =
cpuinfo_android_property_get("ro.hardware.chipname", properties->ro_hardware_chipname);
cpuinfo_log_debug("read ro.hardware.chipname = \"%.*s\"", ro_hardware_chipname_length, properties->ro_hardware_chipname);
}

154
dep/cpuinfo/src/arm/api.h Normal file
View File

@@ -0,0 +1,154 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <cpuinfo.h>
#include <cpuinfo/common.h>
enum cpuinfo_arm_chipset_vendor {
cpuinfo_arm_chipset_vendor_unknown = 0,
cpuinfo_arm_chipset_vendor_qualcomm,
cpuinfo_arm_chipset_vendor_mediatek,
cpuinfo_arm_chipset_vendor_samsung,
cpuinfo_arm_chipset_vendor_hisilicon,
cpuinfo_arm_chipset_vendor_actions,
cpuinfo_arm_chipset_vendor_allwinner,
cpuinfo_arm_chipset_vendor_amlogic,
cpuinfo_arm_chipset_vendor_broadcom,
cpuinfo_arm_chipset_vendor_lg,
cpuinfo_arm_chipset_vendor_leadcore,
cpuinfo_arm_chipset_vendor_marvell,
cpuinfo_arm_chipset_vendor_mstar,
cpuinfo_arm_chipset_vendor_novathor,
cpuinfo_arm_chipset_vendor_nvidia,
cpuinfo_arm_chipset_vendor_pinecone,
cpuinfo_arm_chipset_vendor_renesas,
cpuinfo_arm_chipset_vendor_rockchip,
cpuinfo_arm_chipset_vendor_spreadtrum,
cpuinfo_arm_chipset_vendor_telechips,
cpuinfo_arm_chipset_vendor_texas_instruments,
cpuinfo_arm_chipset_vendor_wondermedia,
cpuinfo_arm_chipset_vendor_max,
};
enum cpuinfo_arm_chipset_series {
cpuinfo_arm_chipset_series_unknown = 0,
cpuinfo_arm_chipset_series_qualcomm_qsd,
cpuinfo_arm_chipset_series_qualcomm_msm,
cpuinfo_arm_chipset_series_qualcomm_apq,
cpuinfo_arm_chipset_series_qualcomm_snapdragon,
cpuinfo_arm_chipset_series_mediatek_mt,
cpuinfo_arm_chipset_series_samsung_exynos,
cpuinfo_arm_chipset_series_hisilicon_k3v,
cpuinfo_arm_chipset_series_hisilicon_hi,
cpuinfo_arm_chipset_series_hisilicon_kirin,
cpuinfo_arm_chipset_series_actions_atm,
cpuinfo_arm_chipset_series_allwinner_a,
cpuinfo_arm_chipset_series_amlogic_aml,
cpuinfo_arm_chipset_series_amlogic_s,
cpuinfo_arm_chipset_series_broadcom_bcm,
cpuinfo_arm_chipset_series_lg_nuclun,
cpuinfo_arm_chipset_series_leadcore_lc,
cpuinfo_arm_chipset_series_marvell_pxa,
cpuinfo_arm_chipset_series_mstar_6a,
cpuinfo_arm_chipset_series_novathor_u,
cpuinfo_arm_chipset_series_nvidia_tegra_t,
cpuinfo_arm_chipset_series_nvidia_tegra_ap,
cpuinfo_arm_chipset_series_nvidia_tegra_sl,
cpuinfo_arm_chipset_series_pinecone_surge_s,
cpuinfo_arm_chipset_series_renesas_mp,
cpuinfo_arm_chipset_series_rockchip_rk,
cpuinfo_arm_chipset_series_spreadtrum_sc,
cpuinfo_arm_chipset_series_telechips_tcc,
cpuinfo_arm_chipset_series_texas_instruments_omap,
cpuinfo_arm_chipset_series_wondermedia_wm,
cpuinfo_arm_chipset_series_max,
};
#define CPUINFO_ARM_CHIPSET_SUFFIX_MAX 8
struct cpuinfo_arm_chipset {
enum cpuinfo_arm_chipset_vendor vendor;
enum cpuinfo_arm_chipset_series series;
uint32_t model;
char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX];
};
#define CPUINFO_ARM_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX
#ifndef __cplusplus
#ifndef _MSC_VER
CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string(
const struct cpuinfo_arm_chipset chipset[restrict static 1],
char name[restrict static CPUINFO_ARM_CHIPSET_NAME_MAX]);
CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset(
struct cpuinfo_arm_chipset chipset[restrict static 1], uint32_t cores, uint32_t max_cpu_freq_max);
CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch(
uint32_t midr,
#if CPUINFO_ARCH_ARM
bool has_vfpv4,
#endif
enum cpuinfo_vendor vendor[restrict static 1],
enum cpuinfo_uarch uarch[restrict static 1]);
CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
uint32_t cluster_cores,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
uint32_t cluster_id,
uint32_t arch_version,
struct cpuinfo_cache l1i[restrict static 1],
struct cpuinfo_cache l1d[restrict static 1],
struct cpuinfo_cache l2[restrict static 1],
struct cpuinfo_cache l3[restrict static 1]);
CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
const struct cpuinfo_processor processor[1]);
#else
CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string(
const struct cpuinfo_arm_chipset chipset[1],
char name[CPUINFO_ARM_CHIPSET_NAME_MAX]);
CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset(
struct cpuinfo_arm_chipset chipset[1], uint32_t cores, uint32_t max_cpu_freq_max);
CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch(
uint32_t midr,
#if CPUINFO_ARCH_ARM
bool has_vfpv4,
#endif
enum cpuinfo_vendor vendor[1],
enum cpuinfo_uarch uarch[1]);
CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
uint32_t cluster_cores,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[1],
uint32_t cluster_id,
uint32_t arch_version,
struct cpuinfo_cache l1i[1],
struct cpuinfo_cache l1d[1],
struct cpuinfo_cache l2[1],
struct cpuinfo_cache l3[1]);
CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
const struct cpuinfo_processor processor[1]);
#endif
#else /* defined(__cplusplus) */
CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
uint32_t cluster_cores,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[1],
uint32_t cluster_id,
uint32_t arch_version,
struct cpuinfo_cache l1i[1],
struct cpuinfo_cache l1d[1],
struct cpuinfo_cache l2[1],
struct cpuinfo_cache l3[1]);
#endif

1687
dep/cpuinfo/src/arm/cache.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,267 @@
#include <stdint.h>
#if CPUINFO_MOCK
#include <cpuinfo-mock.h>
#endif
#include <arm/linux/api.h>
#include <arm/linux/cp.h>
#include <arm/midr.h>
#include <cpuinfo/log.h>
#if CPUINFO_MOCK
uint32_t cpuinfo_arm_fpsid = 0;
uint32_t cpuinfo_arm_mvfr0 = 0;
uint32_t cpuinfo_arm_wcid = 0;
void cpuinfo_set_fpsid(uint32_t fpsid) {
cpuinfo_arm_fpsid = fpsid;
}
void cpuinfo_set_wcid(uint32_t wcid) {
cpuinfo_arm_wcid = wcid;
}
#endif
void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint32_t midr,
uint32_t architecture_version,
uint32_t architecture_flags,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
struct cpuinfo_arm_isa isa[restrict static 1])
{
if (architecture_version >= 8) {
/*
* ARMv7 code running on ARMv8: IDIV, VFP, NEON are always supported,
* but may be not reported in /proc/cpuinfo features.
*/
isa->armv5e = true;
isa->armv6 = true;
isa->armv6k = true;
isa->armv7 = true;
isa->armv7mp = true;
isa->armv8 = true;
isa->thumb = true;
isa->thumb2 = true;
isa->idiv = true;
isa->vfpv3 = true;
isa->d32 = true;
isa->fp16 = true;
isa->fma = true;
isa->neon = true;
/*
* NEON FP16 compute extension and VQRDMLAH/VQRDMLSH instructions are not indicated in /proc/cpuinfo.
* Use a MIDR-based heuristic to whitelist processors known to support it:
* - Processors with Cortex-A55 cores
* - Processors with Cortex-A65 cores
* - Processors with Cortex-A75 cores
* - Processors with Cortex-A76 cores
* - Processors with Cortex-A77 cores
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
*/
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
/* Only little cores of Exynos 9810 support FP16 & RDM */
cpuinfo_log_warning("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
} else {
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos M4 */
case UINT32_C(0x53000040): /* Exynos M5 */
isa->fp16arith = true;
isa->rdm = true;
break;
}
}
/*
* NEON VDOT instructions are not indicated in /proc/cpuinfo.
* Use a MIDR-based heuristic to whitelist processors known to support it.
*/
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos-M4 */
case UINT32_C(0x53000040): /* Exynos-M5 */
isa->dot = true;
break;
case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */
isa->dot = !!(midr_get_variant(midr) >= 1);
break;
case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */
isa->dot = !!(midr_get_variant(midr) >= 2);
break;
}
} else {
/* ARMv7 or lower: use feature flags to detect optional features */
/*
* ARM11 (ARM 1136/1156/1176/11 MPCore) processors can report v7 architecture
* even though they support only ARMv6 instruction set.
*/
if (architecture_version == 7 && midr_is_arm11(midr)) {
cpuinfo_log_warning("kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)");
architecture_version = 6;
}
if (architecture_version < 7) {
const uint32_t armv7_features_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | CPUINFO_ARM_LINUX_FEATURE_VFPD32 |
CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON | CPUINFO_ARM_LINUX_FEATURE_IDIVT | CPUINFO_ARM_LINUX_FEATURE_IDIVA;
if (features & armv7_features_mask) {
architecture_version = 7;
}
}
if ((architecture_version >= 6) || (features & CPUINFO_ARM_LINUX_FEATURE_EDSP) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_E)) {
isa->armv5e = true;
}
if (architecture_version >= 6) {
isa->armv6 = true;
}
if (architecture_version >= 7) {
isa->armv6k = true;
isa->armv7 = true;
/*
* ARMv7 MP extension (PLDW instruction) is not indicated in /proc/cpuinfo.
* Use heuristic list of supporting processors:
* - Processors supporting UDIV/SDIV instructions ("idiva" + "idivt" features in /proc/cpuinfo)
* - Cortex-A5
* - Cortex-A9
* - Dual-Core Scorpion
* - Krait (supports UDIV/SDIV, but kernels may not report it in /proc/cpuinfo)
*
* TODO: check single-core Qualcomm Scorpion.
*/
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100C050): /* Cortex-A5 */
case UINT32_C(0x4100C090): /* Cortex-A9 */
case UINT32_C(0x510002D0): /* Scorpion (dual-core) */
case UINT32_C(0x510004D0): /* Krait (dual-core) */
case UINT32_C(0x510006F0): /* Krait (quad-core) */
isa->armv7mp = true;
break;
default:
/* In practice IDIV instruction implies ARMv7+MP ISA */
isa->armv7mp = (features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV;
break;
}
}
if (features & CPUINFO_ARM_LINUX_FEATURE_IWMMXT) {
const uint32_t wcid = read_wcid();
cpuinfo_log_debug("WCID = 0x%08"PRIx32, wcid);
const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF);
if (coprocessor_type >= 0x10) {
isa->wmmx = true;
if (coprocessor_type >= 0x20) {
isa->wmmx2 = true;
}
} else {
cpuinfo_log_warning("WMMX ISA disabled: OS reported iwmmxt feature, "
"but WCID coprocessor type 0x%"PRIx32" indicates no WMMX support",
coprocessor_type);
}
}
if ((features & CPUINFO_ARM_LINUX_FEATURE_THUMB) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_T)) {
isa->thumb = true;
/*
* There is no separate feature flag for Thumb 2.
* All ARMv7 processors and ARM 1156 support Thumb 2.
*/
if (architecture_version >= 7 || midr_is_arm1156(midr)) {
isa->thumb2 = true;
}
}
if (features & CPUINFO_ARM_LINUX_FEATURE_THUMBEE) {
isa->thumbee = true;
}
if ((features & CPUINFO_ARM_LINUX_FEATURE_JAVA) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_J)) {
isa->jazelle = true;
}
/* Qualcomm Krait may have buggy kernel configuration that doesn't report IDIV */
if ((features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV || midr_is_krait(midr)) {
isa->idiv = true;
}
const uint32_t vfp_mask = \
CPUINFO_ARM_LINUX_FEATURE_VFP | CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \
CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if (features & vfp_mask) {
const uint32_t vfpv3_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \
CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if ((architecture_version >= 7) || (features & vfpv3_mask)) {
isa->vfpv3 = true;
const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
if (features & d32_mask) {
isa->d32 = true;
}
} else {
#if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
isa->vfpv3 = true;
#else
const uint32_t fpsid = read_fpsid();
cpuinfo_log_debug("FPSID = 0x%08"PRIx32, fpsid);
const uint32_t subarchitecture = (fpsid >> 16) & UINT32_C(0x7F);
if (subarchitecture >= 0x01) {
isa->vfpv2 = true;
}
#endif
}
}
if (features & CPUINFO_ARM_LINUX_FEATURE_NEON) {
isa->neon = true;
}
/*
* There is no separate feature flag for FP16 support.
* VFPv4 implies VFPv3-FP16 support (and in practice, NEON-HP as well).
* Additionally, ARM Cortex-A9 and Qualcomm Scorpion support FP16.
*/
if ((features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) || midr_is_cortex_a9(midr) || midr_is_scorpion(midr)) {
isa->fp16 = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) {
isa->fma = true;
}
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_AES) {
isa->aes = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_PMULL) {
isa->pmull = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA1) {
isa->sha1 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA2) {
isa->sha2 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_CRC32) {
isa->crc32 = true;
}
}

View File

@@ -0,0 +1,127 @@
#include <stdint.h>
#include <arm/linux/api.h>
#include <cpuinfo/log.h>
void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
struct cpuinfo_arm_isa isa[restrict static 1])
{
if (features & CPUINFO_ARM_LINUX_FEATURE_AES) {
isa->aes = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_PMULL) {
isa->pmull = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_SHA1) {
isa->sha1 = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_SHA2) {
isa->sha2 = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_CRC32) {
isa->crc32 = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_ATOMICS) {
isa->atomics = true;
}
/*
* Some phones ship with an old kernel configuration that doesn't report NEON FP16 compute extension and SQRDMLAH/SQRDMLSH/UQRDMLAH/UQRDMLSH instructions.
* Use a MIDR-based heuristic to whitelist processors known to support it:
* - Processors with Cortex-A55 cores
* - Processors with Cortex-A65 cores
* - Processors with Cortex-A75 cores
* - Processors with Cortex-A76 cores
* - Processors with Cortex-A77 cores
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
*/
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
/* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */
cpuinfo_log_warning("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions");
} else {
const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos M4 */
case UINT32_C(0x53000040): /* Exynos M5 */
isa->fp16arith = true;
isa->rdm = true;
break;
default:
if ((features & fp16arith_mask) == fp16arith_mask) {
isa->fp16arith = true;
} else if (features & CPUINFO_ARM_LINUX_FEATURE_FPHP) {
cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for scalar operations");
} else if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDHP) {
cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for SIMD operations");
}
if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM) {
isa->rdm = true;
}
break;
}
}
/*
* Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions.
* Use a MIDR-based heuristic to whitelist processors known to support it.
*/
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4100D4A0): /* Neoverse E1 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */
case UINT32_C(0x53000030): /* Exynos-M4 */
case UINT32_C(0x53000040): /* Exynos-M5 */
isa->dot = true;
break;
case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */
isa->dot = !!(midr_get_variant(midr) >= 1);
break;
case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */
isa->dot = !!(midr_get_variant(midr) >= 2);
break;
default:
if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) {
isa->dot = true;
}
break;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) {
isa->jscvt = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) {
isa->jscvt = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_FCMA) {
isa->fcma = true;
}
if (features & CPUINFO_ARM_LINUX_FEATURE_SVE) {
isa->sve = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
isa->sve2 = true;
}
}

View File

@@ -0,0 +1,384 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <cpuinfo.h>
#include <cpuinfo/common.h>
#include <arm/midr.h>
#include <arm/api.h>
#include <linux/api.h>
/* No hard limit in the kernel, maximum length observed on non-rogue kernels is 64 */
#define CPUINFO_HARDWARE_VALUE_MAX 64
/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */
#define CPUINFO_REVISION_VALUE_MAX 9
#ifdef __ANDROID__
/* As per include/sys/system_properties.h in Android NDK */
#define CPUINFO_BUILD_PROP_NAME_MAX 32
#define CPUINFO_BUILD_PROP_VALUE_MAX 92
struct cpuinfo_android_properties {
char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
char ro_product_board[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_board_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_mediatek_platform[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_arch[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
char ro_hardware_chipname[CPUINFO_BUILD_PROP_VALUE_MAX];
};
#endif
#define CPUINFO_ARM_LINUX_ARCH_T UINT32_C(0x00000001)
#define CPUINFO_ARM_LINUX_ARCH_E UINT32_C(0x00000002)
#define CPUINFO_ARM_LINUX_ARCH_J UINT32_C(0x00000004)
#define CPUINFO_ARM_LINUX_ARCH_TE UINT32_C(0x00000003)
#define CPUINFO_ARM_LINUX_ARCH_TEJ UINT32_C(0x00000007)
struct cpuinfo_arm_linux_proc_cpuinfo_cache {
uint32_t i_size;
uint32_t i_assoc;
uint32_t i_line_length;
uint32_t i_sets;
uint32_t d_size;
uint32_t d_assoc;
uint32_t d_line_length;
uint32_t d_sets;
};
#if CPUINFO_ARCH_ARM
/* arch/arm/include/uapi/asm/hwcap.h */
#define CPUINFO_ARM_LINUX_FEATURE_SWP UINT32_C(0x00000001)
#define CPUINFO_ARM_LINUX_FEATURE_HALF UINT32_C(0x00000002)
#define CPUINFO_ARM_LINUX_FEATURE_THUMB UINT32_C(0x00000004)
#define CPUINFO_ARM_LINUX_FEATURE_26BIT UINT32_C(0x00000008)
#define CPUINFO_ARM_LINUX_FEATURE_FASTMULT UINT32_C(0x00000010)
#define CPUINFO_ARM_LINUX_FEATURE_FPA UINT32_C(0x00000020)
#define CPUINFO_ARM_LINUX_FEATURE_VFP UINT32_C(0x00000040)
#define CPUINFO_ARM_LINUX_FEATURE_EDSP UINT32_C(0x00000080)
#define CPUINFO_ARM_LINUX_FEATURE_JAVA UINT32_C(0x00000100)
#define CPUINFO_ARM_LINUX_FEATURE_IWMMXT UINT32_C(0x00000200)
#define CPUINFO_ARM_LINUX_FEATURE_CRUNCH UINT32_C(0x00000400)
#define CPUINFO_ARM_LINUX_FEATURE_THUMBEE UINT32_C(0x00000800)
#define CPUINFO_ARM_LINUX_FEATURE_NEON UINT32_C(0x00001000)
#define CPUINFO_ARM_LINUX_FEATURE_VFPV3 UINT32_C(0x00002000)
#define CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision registers */
#define CPUINFO_ARM_LINUX_FEATURE_TLS UINT32_C(0x00008000)
#define CPUINFO_ARM_LINUX_FEATURE_VFPV4 UINT32_C(0x00010000)
#define CPUINFO_ARM_LINUX_FEATURE_IDIVA UINT32_C(0x00020000)
#define CPUINFO_ARM_LINUX_FEATURE_IDIVT UINT32_C(0x00040000)
#define CPUINFO_ARM_LINUX_FEATURE_IDIV UINT32_C(0x00060000)
#define CPUINFO_ARM_LINUX_FEATURE_VFPD32 UINT32_C(0x00080000)
#define CPUINFO_ARM_LINUX_FEATURE_LPAE UINT32_C(0x00100000)
#define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00200000)
#define CPUINFO_ARM_LINUX_FEATURE2_AES UINT32_C(0x00000001)
#define CPUINFO_ARM_LINUX_FEATURE2_PMULL UINT32_C(0x00000002)
#define CPUINFO_ARM_LINUX_FEATURE2_SHA1 UINT32_C(0x00000004)
#define CPUINFO_ARM_LINUX_FEATURE2_SHA2 UINT32_C(0x00000008)
#define CPUINFO_ARM_LINUX_FEATURE2_CRC32 UINT32_C(0x00000010)
#elif CPUINFO_ARCH_ARM64
/* arch/arm64/include/uapi/asm/hwcap.h */
#define CPUINFO_ARM_LINUX_FEATURE_FP UINT32_C(0x00000001)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMD UINT32_C(0x00000002)
#define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00000004)
#define CPUINFO_ARM_LINUX_FEATURE_AES UINT32_C(0x00000008)
#define CPUINFO_ARM_LINUX_FEATURE_PMULL UINT32_C(0x00000010)
#define CPUINFO_ARM_LINUX_FEATURE_SHA1 UINT32_C(0x00000020)
#define CPUINFO_ARM_LINUX_FEATURE_SHA2 UINT32_C(0x00000040)
#define CPUINFO_ARM_LINUX_FEATURE_CRC32 UINT32_C(0x00000080)
#define CPUINFO_ARM_LINUX_FEATURE_ATOMICS UINT32_C(0x00000100)
#define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400)
#define CPUINFO_ARM_LINUX_FEATURE_CPUID UINT32_C(0x00000800)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM UINT32_C(0x00001000)
#define CPUINFO_ARM_LINUX_FEATURE_JSCVT UINT32_C(0x00002000)
#define CPUINFO_ARM_LINUX_FEATURE_FCMA UINT32_C(0x00004000)
#define CPUINFO_ARM_LINUX_FEATURE_LRCPC UINT32_C(0x00008000)
#define CPUINFO_ARM_LINUX_FEATURE_DCPOP UINT32_C(0x00010000)
#define CPUINFO_ARM_LINUX_FEATURE_SHA3 UINT32_C(0x00020000)
#define CPUINFO_ARM_LINUX_FEATURE_SM3 UINT32_C(0x00040000)
#define CPUINFO_ARM_LINUX_FEATURE_SM4 UINT32_C(0x00080000)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000)
#define CPUINFO_ARM_LINUX_FEATURE_SHA512 UINT32_C(0x00200000)
#define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000)
#define CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM UINT32_C(0x00800000)
#define CPUINFO_ARM_LINUX_FEATURE_DIT UINT32_C(0x01000000)
#define CPUINFO_ARM_LINUX_FEATURE_USCAT UINT32_C(0x02000000)
#define CPUINFO_ARM_LINUX_FEATURE_ILRCPC UINT32_C(0x04000000)
#define CPUINFO_ARM_LINUX_FEATURE_FLAGM UINT32_C(0x08000000)
#define CPUINFO_ARM_LINUX_FEATURE_SSBS UINT32_C(0x10000000)
#define CPUINFO_ARM_LINUX_FEATURE_SB UINT32_C(0x20000000)
#define CPUINFO_ARM_LINUX_FEATURE_PACA UINT32_C(0x40000000)
#define CPUINFO_ARM_LINUX_FEATURE_PACG UINT32_C(0x80000000)
#define CPUINFO_ARM_LINUX_FEATURE2_DCPODP UINT32_C(0x00000001)
#define CPUINFO_ARM_LINUX_FEATURE2_SVE2 UINT32_C(0x00000002)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEAES UINT32_C(0x00000004)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEPMULL UINT32_C(0x00000008)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEBITPERM UINT32_C(0x00000010)
#define CPUINFO_ARM_LINUX_FEATURE2_SVESHA3 UINT32_C(0x00000020)
#define CPUINFO_ARM_LINUX_FEATURE2_SVESM4 UINT32_C(0x00000040)
#define CPUINFO_ARM_LINUX_FEATURE2_FLAGM2 UINT32_C(0x00000080)
#define CPUINFO_ARM_LINUX_FEATURE2_FRINT UINT32_C(0x00000100)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEI8MM UINT32_C(0x00000200)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEF32MM UINT32_C(0x00000400)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEF64MM UINT32_C(0x00000800)
#define CPUINFO_ARM_LINUX_FEATURE2_SVEBF16 UINT32_C(0x00001000)
#define CPUINFO_ARM_LINUX_FEATURE2_I8MM UINT32_C(0x00002000)
#define CPUINFO_ARM_LINUX_FEATURE2_BF16 UINT32_C(0x00004000)
#define CPUINFO_ARM_LINUX_FEATURE2_DGH UINT32_C(0x00008000)
#define CPUINFO_ARM_LINUX_FEATURE2_RNG UINT32_C(0x00010000)
#define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000)
#endif
#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000)
#define CPUINFO_ARM_LINUX_VALID_IMPLEMENTER UINT32_C(0x00020000)
#define CPUINFO_ARM_LINUX_VALID_VARIANT UINT32_C(0x00040000)
#define CPUINFO_ARM_LINUX_VALID_PART UINT32_C(0x00080000)
#define CPUINFO_ARM_LINUX_VALID_REVISION UINT32_C(0x00100000)
#define CPUINFO_ARM_LINUX_VALID_PROCESSOR UINT32_C(0x00200000)
#define CPUINFO_ARM_LINUX_VALID_FEATURES UINT32_C(0x00400000)
#if CPUINFO_ARCH_ARM
#define CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE UINT32_C(0x01000000)
#define CPUINFO_ARM_LINUX_VALID_ICACHE_SETS UINT32_C(0x02000000)
#define CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS UINT32_C(0x04000000)
#define CPUINFO_ARM_LINUX_VALID_ICACHE_LINE UINT32_C(0x08000000)
#define CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE UINT32_C(0x10000000)
#define CPUINFO_ARM_LINUX_VALID_DCACHE_SETS UINT32_C(0x20000000)
#define CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS UINT32_C(0x40000000)
#define CPUINFO_ARM_LINUX_VALID_DCACHE_LINE UINT32_C(0x80000000)
#endif
#define CPUINFO_ARM_LINUX_VALID_INFO UINT32_C(0x007F0000)
#define CPUINFO_ARM_LINUX_VALID_MIDR UINT32_C(0x003F0000)
#if CPUINFO_ARCH_ARM
#define CPUINFO_ARM_LINUX_VALID_ICACHE UINT32_C(0x0F000000)
#define CPUINFO_ARM_LINUX_VALID_DCACHE UINT32_C(0xF0000000)
#define CPUINFO_ARM_LINUX_VALID_CACHE_LINE UINT32_C(0x88000000)
#endif
struct cpuinfo_arm_linux_processor {
uint32_t architecture_version;
#if CPUINFO_ARCH_ARM
uint32_t architecture_flags;
struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache;
#endif
uint32_t features;
uint32_t features2;
/**
* Main ID Register value.
*/
uint32_t midr;
enum cpuinfo_vendor vendor;
enum cpuinfo_uarch uarch;
uint32_t uarch_index;
/**
* ID of the physical package which includes this logical processor.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id
*/
uint32_t package_id;
/**
* Minimum processor ID on the package which includes this logical processor.
* This value can serve as an ID for the cluster of logical processors: it is the
* same for all logical processors on the same package.
*/
uint32_t package_leader_id;
/**
* Number of logical processors in the package.
*/
uint32_t package_processor_count;
/**
* Maximum frequency, in kHZ.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq
* If failed to read or parse the file, the value is 0.
*/
uint32_t max_frequency;
/**
* Minimum frequency, in kHZ.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq
* If failed to read or parse the file, the value is 0.
*/
uint32_t min_frequency;
/** Linux processor ID */
uint32_t system_processor_id;
uint32_t flags;
};
struct cpuinfo_arm_linux_cluster {
uint32_t processor_id_min;
uint32_t processor_id_max;
};
/* Returns true if the two processors do belong to the same cluster */
static inline bool cpuinfo_arm_linux_processor_equals(
struct cpuinfo_arm_linux_processor processor_i[restrict static 1],
struct cpuinfo_arm_linux_processor processor_j[restrict static 1])
{
const uint32_t joint_flags = processor_i->flags & processor_j->flags;
bool same_max_frequency = false;
if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (processor_i->max_frequency != processor_j->max_frequency) {
return false;
} else {
same_max_frequency = true;
}
}
bool same_min_frequency = false;
if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (processor_i->min_frequency != processor_j->min_frequency) {
return false;
} else {
same_min_frequency = true;
}
}
if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) {
if (processor_i->midr == processor_j->midr) {
if (midr_is_cortex_a53(processor_i->midr)) {
return same_min_frequency & same_max_frequency;
} else {
return true;
}
}
}
return same_max_frequency && same_min_frequency;
}
/* Returns true if the two processors certainly don't belong to the same cluster */
static inline bool cpuinfo_arm_linux_processor_not_equals(
struct cpuinfo_arm_linux_processor processor_i[restrict static 1],
struct cpuinfo_arm_linux_processor processor_j[restrict static 1])
{
const uint32_t joint_flags = processor_i->flags & processor_j->flags;
if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (processor_i->max_frequency != processor_j->max_frequency) {
return true;
}
}
if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (processor_i->min_frequency != processor_j->min_frequency) {
return true;
}
}
if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) {
if (processor_i->midr != processor_j->midr) {
return true;
}
}
return false;
}
CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo(
char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
uint32_t max_processors_count,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]);
#if CPUINFO_ARCH_ARM
CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1]);
CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1]);
CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint32_t midr,
uint32_t architecture_version,
uint32_t architecture_flags,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
struct cpuinfo_arm_isa isa[restrict static 1]);
#elif CPUINFO_ARCH_ARM64
CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1]);
CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
struct cpuinfo_arm_isa isa[restrict static 1]);
#endif
#ifdef __ANDROID__
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset(
const struct cpuinfo_android_properties properties[restrict static 1],
uint32_t cores,
uint32_t max_cpu_freq_max);
#else
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_linux_decode_chipset(
const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
const char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
uint32_t cores,
uint32_t max_cpu_freq_max);
#endif
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware(
const char proc_cpuinfo_hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
uint32_t cores, uint32_t max_cpu_freq_max, bool is_tegra);
#ifdef __ANDROID__
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset_from_ro_product_board(
const char ro_product_board[restrict static CPUINFO_BUILD_PROP_VALUE_MAX],
uint32_t cores, uint32_t max_cpu_freq_max);
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset_from_ro_board_platform(
const char ro_board_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX],
uint32_t cores, uint32_t max_cpu_freq_max);
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform(
const char ro_mediatek_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset_from_ro_arch(
const char ro_arch[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset_from_ro_chipname(
const char ro_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_android_decode_chipset_from_ro_hardware_chipname(
const char ro_hardware_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]);
#else
CPUINFO_INTERNAL struct cpuinfo_arm_chipset
cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_revision(
const char proc_cpuinfo_revision[restrict static CPUINFO_REVISION_VALUE_MAX]);
#endif
CPUINFO_INTERNAL bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
uint32_t usable_processors,
uint32_t max_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
CPUINFO_INTERNAL void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
uint32_t max_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
CPUINFO_INTERNAL void cpuinfo_arm_linux_count_cluster_processors(
uint32_t max_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr(
const struct cpuinfo_arm_chipset chipset[restrict static 1],
uint32_t max_processors,
uint32_t usable_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,493 @@
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <cpuinfo.h>
#include <arm/linux/api.h>
#if defined(__ANDROID__)
#include <arm/android/api.h>
#endif
#include <arm/api.h>
#include <arm/midr.h>
#include <linux/api.h>
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
return (bitfield & mask) == mask;
}
/*
* Assigns logical processors to clusters of cores using heuristic based on the typical configuration of clusters for
* 5, 6, 8, and 10 cores:
* - 5 cores (ARM32 Android only): 2 clusters of 4+1 cores
* - 6 cores: 2 clusters of 4+2 cores
* - 8 cores: 2 clusters of 4+4 cores
* - 10 cores: 3 clusters of 4+4+2 cores
*
* The function must be called after parsing OS-provided information on core clusters.
* Its purpose is to detect clusters of cores when OS-provided information is lacking or incomplete, i.e.
* - Linux kernel is not configured to report information in sysfs topology leaf.
* - Linux kernel reports topology information only for online cores, and only cores on one cluster are online, e.g.:
* - Exynos 8890 has 8 cores in 4+4 clusters, but only the first cluster of 4 cores is reported, and cluster
* configuration of logical processors 4-7 is not reported (all remaining processors 4-7 form cluster 1)
* - MT6797 has 10 cores in 4+4+2, but only the first cluster of 4 cores is reported, and cluster configuration
* of logical processors 4-9 is not reported (processors 4-7 form cluster 1, and processors 8-9 form cluster 2).
*
* Heuristic assignment of processors to the above pre-defined clusters fails if such assignment would contradict
* information provided by the operating system:
* - Any of the OS-reported processor clusters is different than the corresponding heuristic cluster.
* - Processors in a heuristic cluster have no OS-provided cluster siblings information, but have known and different
* minimum/maximum frequency.
* - Processors in a heuristic cluster have no OS-provided cluster siblings information, but have known and different
* MIDR components.
*
* If the heuristic assignment of processors to clusters of cores fails, all processors' clusters are unchanged.
*
* @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* frequency, MIDR infromation, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
* @retval false if known details about processors contradict the heuristic configuration of core clusters.
*/
bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
uint32_t usable_processors,
uint32_t max_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
{
uint32_t cluster_processors[3];
switch (usable_processors) {
case 10:
cluster_processors[0] = 4;
cluster_processors[1] = 4;
cluster_processors[2] = 2;
break;
case 8:
cluster_processors[0] = 4;
cluster_processors[1] = 4;
break;
case 6:
cluster_processors[0] = 4;
cluster_processors[1] = 2;
break;
#if defined(__ANDROID__) && CPUINFO_ARCH_ARM
case 5:
/*
* The only processor with 5 cores is Leadcore L1860C (ARMv7, mobile),
* but this configuration is not too unreasonable for a virtualized ARM server.
*/
cluster_processors[0] = 4;
cluster_processors[1] = 1;
break;
#endif
default:
return false;
}
/*
* Assignment of processors to core clusters is done in two passes:
* 1. Verify that the clusters proposed by heuristic are compatible with known details about processors.
* 2. If verification passed, update core clusters for the processors.
*/
uint32_t cluster = 0;
uint32_t expected_cluster_processors = 0;
uint32_t cluster_start, cluster_flags, cluster_midr, cluster_max_frequency, cluster_min_frequency;
bool expected_cluster_exists;
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (expected_cluster_processors == 0) {
/* Expect this processor to start a new cluster */
expected_cluster_exists = !!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER);
if (expected_cluster_exists) {
if (processors[i].package_leader_id != i) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"processor %"PRIu32" is expected to start a new cluster #%"PRIu32" with %"PRIu32" cores, "
"but system siblings lists reported it as a sibling of processor %"PRIu32,
i, cluster, cluster_processors[cluster], processors[i].package_leader_id);
return false;
}
} else {
cluster_flags = 0;
}
cluster_start = i;
expected_cluster_processors = cluster_processors[cluster++];
} else {
/* Expect this processor to belong to the same cluster as processor */
if (expected_cluster_exists) {
/*
* The cluster suggested by the heuristic was already parsed from system siblings lists.
* For all processors we expect in the cluster, check that:
* - They have pre-assigned cluster from siblings lists (CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER flag).
* - They were assigned to the same cluster based on siblings lists
* (package_leader_id points to the first processor in the cluster).
*/
if ((processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) == 0) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"processor %"PRIu32" is expected to belong to the cluster of processor %"PRIu32", "
"but system siblings lists did not report it as a sibling of processor %"PRIu32,
i, cluster_start, cluster_start);
return false;
}
if (processors[i].package_leader_id != cluster_start) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"processor %"PRIu32" is expected to belong to the cluster of processor %"PRIu32", "
"but system siblings lists reported it to belong to the cluster of processor %"PRIu32,
i, cluster_start, cluster_start);
return false;
}
} else {
/*
* The cluster suggest by the heuristic was not parsed from system siblings lists.
* For all processors we expect in the cluster, check that:
* - They have no pre-assigned cluster from siblings lists.
* - If their min/max CPU frequency is known, it is the same.
* - If any part of their MIDR (Implementer, Variant, Part, Revision) is known, it is the same.
*/
if (processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"processor %"PRIu32" is expected to be unassigned to any cluster, "
"but system siblings lists reported it to belong to the cluster of processor %"PRIu32,
i, processors[i].package_leader_id);
return false;
}
if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (cluster_min_frequency != processors[i].min_frequency) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of its expected cluster (%"PRIu32" KHz)",
i, processors[i].min_frequency, cluster_min_frequency);
return false;
}
} else {
cluster_min_frequency = processors[i].min_frequency;
cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
}
}
if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (cluster_max_frequency != processors[i].max_frequency) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of its expected cluster (%"PRIu32" KHz)",
i, processors[i].max_frequency, cluster_max_frequency);
return false;
}
} else {
cluster_max_frequency = processors[i].max_frequency;
cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of its expected cluster (0x%02"PRIx32")",
i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr));
return false;
}
} else {
cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"CPU Variant of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")",
i, midr_get_variant(processors[i].midr), midr_get_variant(cluster_midr));
return false;
}
} else {
cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) {
if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"CPU Part of processor %"PRIu32" (0x%03"PRIx32") is different than of its expected cluster (0x%03"PRIx32")",
i, midr_get_part(processors[i].midr), midr_get_part(cluster_midr));
return false;
}
} else {
cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) {
cpuinfo_log_debug(
"heuristic detection of core clusters failed: "
"CPU Revision of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")",
i, midr_get_revision(cluster_midr), midr_get_revision(processors[i].midr));
return false;
}
} else {
cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
}
}
}
}
expected_cluster_processors--;
}
}
/* Verification passed, assign all processors to new clusters */
cluster = 0;
expected_cluster_processors = 0;
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (expected_cluster_processors == 0) {
/* Expect this processor to start a new cluster */
cluster_start = i;
expected_cluster_processors = cluster_processors[cluster++];
} else {
/* Expect this processor to belong to the same cluster as processor */
if (!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
cpuinfo_log_debug("assigned processor %"PRIu32" to cluster of processor %"PRIu32" based on heuristic",
i, cluster_start);
}
processors[i].package_leader_id = cluster_start;
processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
}
expected_cluster_processors--;
}
}
return true;
}
/*
* Assigns logical processors to clusters of cores in sequential manner:
* - Clusters detected from OS-provided information are unchanged:
* - Processors assigned to these clusters stay assigned to the same clusters
* - No new processors are added to these clusters
* - Processors without pre-assigned cluster are clustered in one sequential scan:
* - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding
* processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor.
* - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding
* processor, the processor is assigned to a newly created cluster.
*
* The function must be called after parsing OS-provided information on core clusters, and usually is called only
* if heuristic assignment of processors to clusters (cpuinfo_arm_linux_cluster_processors_by_heuristic) failed.
*
* Its purpose is to detect clusters of cores when OS-provided information is lacking or incomplete, i.e.
* - Linux kernel is not configured to report information in sysfs topology leaf.
* - Linux kernel reports topology information only for online cores, and all cores on some of the clusters are offline.
*
* Sequential assignment of processors to clusters always succeeds, and upon exit, all usable processors in the
* @p processors array have cluster information.
*
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
* frequency, MIDR infromation, and core cluster (package siblings list) information.
*
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
* @retval false if known details about processors contradict the heuristic configuration of core clusters.
*/
void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
uint32_t max_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
{
uint32_t cluster_flags = 0;
uint32_t cluster_processors = 0;
uint32_t cluster_start, cluster_midr, cluster_max_frequency, cluster_min_frequency;
for (uint32_t i = 0; i < max_processors; i++) {
if ((processors[i].flags & (CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) == CPUINFO_LINUX_FLAG_VALID) {
if (cluster_processors == 0) {
goto new_cluster;
}
if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
if (cluster_min_frequency != processors[i].min_frequency) {
cpuinfo_log_info(
"minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
"processor %"PRIu32" starts to a new cluster",
i, processors[i].min_frequency, cluster_min_frequency, i);
goto new_cluster;
}
} else {
cluster_min_frequency = processors[i].min_frequency;
cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
}
}
if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (cluster_max_frequency != processors[i].max_frequency) {
cpuinfo_log_debug(
"maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); "
"processor %"PRIu32" starts a new cluster",
i, processors[i].max_frequency, cluster_max_frequency, i);
goto new_cluster;
}
} else {
cluster_max_frequency = processors[i].max_frequency;
cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) {
cpuinfo_log_debug(
"CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); "
"processor %"PRIu32" starts to a new cluster",
i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i);
goto new_cluster;
}
} else {
cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) {
cpuinfo_log_debug(
"CPU Variant of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")"
"processor %"PRIu32" starts to a new cluster",
i, midr_get_variant(processors[i].midr), midr_get_variant(cluster_midr), i);
goto new_cluster;
}
} else {
cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) {
if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) {
cpuinfo_log_debug(
"CPU Part of processor %"PRIu32" (0x%03"PRIx32") is different than of its expected cluster (0x%03"PRIx32")"
"processor %"PRIu32" starts to a new cluster",
i, midr_get_part(processors[i].midr), midr_get_part(cluster_midr), i);
goto new_cluster;
}
} else {
cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
}
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) {
cpuinfo_log_debug(
"CPU Revision of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")"
"processor %"PRIu32" starts to a new cluster",
i, midr_get_revision(cluster_midr), midr_get_revision(processors[i].midr), i);
goto new_cluster;
}
} else {
cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
}
}
/* All checks passed, attach processor to the preceeding cluster */
cluster_processors++;
processors[i].package_leader_id = cluster_start;
processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start);
continue;
new_cluster:
/* Create a new cluster starting with processor i */
cluster_start = i;
processors[i].package_leader_id = i;
processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
cluster_processors = 1;
/* Copy known information from processor to cluster, and set the flags accordingly */
cluster_flags = 0;
if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) {
cluster_min_frequency = processors[i].min_frequency;
cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
}
if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
cluster_max_frequency = processors[i].max_frequency;
cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER;
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT;
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) {
cluster_midr = midr_copy_part(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART;
}
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr);
cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION;
}
}
}
}
/*
* Counts the number of logical processors in each core cluster.
* This function should be called after all processors are assigned to core clusters.
*
* @param max_processors - number of elements in the @p processors array.
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
* and decoded core cluster (package_leader_id) information.
* The function expects the value of processors[i].package_processor_count to be zero.
* Upon return, processors[i].package_processor_count will contain the number of logical
* processors in the respective core cluster.
*/
void cpuinfo_arm_linux_count_cluster_processors(
uint32_t max_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
{
/* First pass: accumulate the number of processors at the group leader's package_processor_count */
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
const uint32_t package_leader_id = processors[i].package_leader_id;
processors[package_leader_id].package_processor_count += 1;
}
}
/* Second pass: copy the package_processor_count from the group leader processor */
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
const uint32_t package_leader_id = processors[i].package_leader_id;
processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
}
}
}

View File

@@ -0,0 +1,44 @@
#include <stdint.h>
#if CPUINFO_MOCK
extern uint32_t cpuinfo_arm_fpsid;
extern uint32_t cpuinfo_arm_mvfr0;
extern uint32_t cpuinfo_arm_wcid;
static inline uint32_t read_fpsid(void) {
return cpuinfo_arm_fpsid;
}
static inline uint32_t read_mvfr0(void) {
return cpuinfo_arm_mvfr0;
}
static inline uint32_t read_wcid(void) {
return cpuinfo_arm_wcid;
}
#else
#if !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 7))
/*
* CoProcessor 10 is inaccessible from user mode since ARMv7,
* and clang refuses to compile inline assembly when targeting ARMv7+
*/
static inline uint32_t read_fpsid(void) {
uint32_t fpsid;
__asm__ __volatile__("MRC p10, 0x7, %[fpsid], cr0, cr0, 0" : [fpsid] "=r" (fpsid));
return fpsid;
}
static inline uint32_t read_mvfr0(void) {
uint32_t mvfr0;
__asm__ __volatile__("MRC p10, 0x7, %[mvfr0], cr7, cr0, 0" : [mvfr0] "=r" (mvfr0));
return mvfr0;
}
#endif
static inline uint32_t read_wcid(void) {
uint32_t wcid;
__asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r" (wcid));
return wcid;
}
#endif

View File

@@ -0,0 +1,908 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <linux/api.h>
#include <arm/linux/api.h>
#include <arm/midr.h>
#include <cpuinfo/log.h>
/*
* Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo.
* This is also the limit on the length of a single line.
*/
#define BUFFER_SIZE 1024
static uint32_t parse_processor_number(
const char* processor_start,
const char* processor_end)
{
const size_t processor_length = (size_t) (processor_end - processor_start);
if (processor_length == 0) {
cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty");
return 0;
}
uint32_t processor_number = 0;
for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) {
const uint32_t digit = (uint32_t) (*digit_ptr - '0');
if (digit > 10) {
cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored",
(int) (processor_end - digit_ptr), digit_ptr);
break;
}
processor_number = processor_number * 10 + digit;
}
return processor_number;
}
/*
* Full list of ARM features reported in /proc/cpuinfo:
*
* * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
* * half - support for half-word loads and stores. These instruction are part of ARMv4,
* so no need to check it on supported CPUs.
* * thumb - support for 16-bit Thumb instruction set. Note that BX instruction is detected
* by ARMv4T architecture, not by this flag.
* * 26bit - old CPUs merged 26-bit PC and program status register (flags) into 32-bit PC
* and had special instructions for working with packed PC. Now it is all deprecated.
* * fastmult - most old ARM CPUs could only compute 2 bits of multiplication result per clock
* cycle, but CPUs with M suffix (e.g. ARM7TDMI) could compute 4 bits per cycle.
* Of course, now it makes no sense.
* * fpa - floating point accelerator available. On original ARM ABI all floating-point operations
* generated FPA instructions. If FPA was not available, these instructions generated
* "illegal operation" interrupts, and the OS processed them by emulating the FPA instructions.
* Debian used this ABI before it switched to EABI. Now FPA is deprecated.
* * vfp - vector floating point instructions. Available on most modern CPUs (as part of VFPv3).
* Required by Android ARMv7A ABI and by Ubuntu on ARM.
* Note: there is no flag for VFPv2.
* * edsp - V5E instructions: saturating add/sub and 16-bit x 16-bit -> 32/64-bit multiplications.
* Required on Android, supported by all CPUs in production.
* * java - Jazelle extension. Supported on most CPUs.
* * iwmmxt - Intel/Marvell Wireless MMX instructions. 64-bit integer SIMD.
* Supported on XScale (Since PXA270) and Sheeva (PJ1, PJ4) architectures.
* Note that there is no flag for WMMX2 instructions.
* * crunch - Maverick Crunch instructions. Junk.
* * thumbee - ThumbEE instructions. Almost no documentation is available.
* * neon - NEON instructions (aka Advanced SIMD). MVFR1 register gives more
* fine-grained information on particular supported features, but
* the Linux kernel exports only a single flag for all of them.
* According to ARMv7A docs it also implies the availability of VFPv3
* (with 32 double-precision registers d0-d31).
* * vfpv3 - VFPv3 instructions. Available on most modern CPUs. Augment VFPv2 by
* conversion to/from integers and load constant instructions.
* Required by Android ARMv7A ABI and by Ubuntu on ARM.
* * vfpv3d16 - VFPv3 instructions with only 16 double-precision registers (d0-d15).
* * tls - software thread ID registers.
* Used by kernel (and likely libc) for efficient implementation of TLS.
* * vfpv4 - fused multiply-add instructions.
* * idiva - DIV instructions available in ARM mode.
* * idivt - DIV instructions available in Thumb mode.
* * vfpd32 - VFP (of any version) with 32 double-precision registers d0-d31.
* * lpae - Large Physical Address Extension (physical address up to 40 bits).
* * evtstrm - generation of Event Stream by timer.
* * aes - AES instructions.
* * pmull - Polinomial Multiplication instructions.
* * sha1 - SHA1 instructions.
* * sha2 - SHA2 instructions.
* * crc32 - CRC32 instructions.
*
* /proc/cpuinfo on ARM is populated in file arch/arm/kernel/setup.c in Linux kernel
* Note that some devices may use patched Linux kernels with different feature names.
* However, the names above were checked on a large number of /proc/cpuinfo listings.
*/
static void parse_features(
const char* features_start,
const char* features_end,
struct cpuinfo_arm_linux_processor processor[restrict static 1])
{
const char* feature_start = features_start;
const char* feature_end;
/* Mark the features as valid */
processor->flags |= CPUINFO_ARM_LINUX_VALID_FEATURES | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
do {
feature_end = feature_start + 1;
for (; feature_end != features_end; feature_end++) {
if (*feature_end == ' ') {
break;
}
}
const size_t feature_length = (size_t) (feature_end - feature_start);
switch (feature_length) {
case 2:
if (memcmp(feature_start, "fp", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FP;
#endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "wp", feature_length) == 0) {
/*
* Some AArch64 kernels, including the one on Nexus 5X,
* erroneously report "swp" as "wp" to AArch32 programs
*/
processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP;
#endif
} else {
goto unexpected;
}
break;
case 3:
if (memcmp(feature_start, "aes", feature_length) == 0) {
#if CPUINFO_ARCH_ARM
processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_AES;
#elif CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_AES;
#endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "swp", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP;
} else if (memcmp(feature_start, "fpa", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPA;
} else if (memcmp(feature_start, "vfp", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFP;
} else if (memcmp(feature_start, "tls", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS;
#endif /* CPUINFO_ARCH_ARM */
} else {
goto unexpected;
}
break;
case 4:
if (memcmp(feature_start, "sha1", feature_length) == 0) {
#if CPUINFO_ARCH_ARM
processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA1;
#elif CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA1;
#endif
} else if (memcmp(feature_start, "sha2", feature_length) == 0) {
#if CPUINFO_ARCH_ARM
processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA2;
#elif CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA2;
#endif
} else if (memcmp(feature_start, "fphp", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPHP;
#endif
} else if (memcmp(feature_start, "fcma", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA;
#endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "half", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF;
} else if (memcmp(feature_start, "edsp", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_EDSP;
} else if (memcmp(feature_start, "java", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_JAVA;
} else if (memcmp(feature_start, "neon", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_NEON;
} else if (memcmp(feature_start, "lpae", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_LPAE;
} else if (memcmp(feature_start, "tlsi", feature_length) == 0) {
/*
* Some AArch64 kernels, including the one on Nexus 5X,
* erroneously report "tls" as "tlsi" to AArch32 programs
*/
processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS;
#endif /* CPUINFO_ARCH_ARM */
} else {
goto unexpected;
}
break;
case 5:
if (memcmp(feature_start, "pmull", feature_length) == 0) {
#if CPUINFO_ARCH_ARM
processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_PMULL;
#elif CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_PMULL;
#endif
} else if (memcmp(feature_start, "crc32", feature_length) == 0) {
#if CPUINFO_ARCH_ARM
processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_CRC32;
#elif CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRC32;
#endif
} else if (memcmp(feature_start, "asimd", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMD;
#endif
} else if (memcmp(feature_start, "cpuid", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_CPUID;
#endif
} else if (memcmp(feature_start, "jscvt", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_JSCVT;
#endif
} else if (memcmp(feature_start, "lrcpc", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_LRCPC;
#endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "thumb", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMB;
} else if (memcmp(feature_start, "26bit", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_26BIT;
} else if (memcmp(feature_start, "vfpv3", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3;
} else if (memcmp(feature_start, "vfpv4", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV4;
} else if (memcmp(feature_start, "idiva", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVA;
} else if (memcmp(feature_start, "idivt", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVT;
#endif /* CPUINFO_ARCH_ARM */
} else {
goto unexpected;
}
break;
#if CPUINFO_ARCH_ARM
case 6:
if (memcmp(feature_start, "iwmmxt", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_IWMMXT;
} else if (memcmp(feature_start, "crunch", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRUNCH;
} else if (memcmp(feature_start, "vfpd32", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPD32;
} else {
goto unexpected;
}
break;
#endif /* CPUINFO_ARCH_ARM */
case 7:
if (memcmp(feature_start, "evtstrm", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_EVTSTRM;
} else if (memcmp(feature_start, "atomics", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_ATOMICS;
#endif
} else if (memcmp(feature_start, "asimdhp", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDHP;
#endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "thumbee", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMBEE;
#endif /* CPUINFO_ARCH_ARM */
} else {
goto unexpected;
}
break;
case 8:
if (memcmp(feature_start, "asimdrdm", feature_length) == 0) {
#if CPUINFO_ARCH_ARM64
processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM;
#endif
#if CPUINFO_ARCH_ARM
} else if (memcmp(feature_start, "fastmult", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT;
} else if (memcmp(feature_start, "vfpv3d16", feature_length) == 0) {
processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3D16;
#endif /* CPUINFO_ARCH_ARM */
} else {
goto unexpected;
}
break;
default:
unexpected:
cpuinfo_log_warning("unexpected /proc/cpuinfo feature \"%.*s\" is ignored",
(int) feature_length, feature_start);
break;
}
feature_start = feature_end;
for (; feature_start != features_end; feature_start++) {
if (*feature_start != ' ') {
break;
}
}
} while (feature_start != feature_end);
}
static void parse_cpu_architecture(
const char* cpu_architecture_start,
const char* cpu_architecture_end,
struct cpuinfo_arm_linux_processor processor[restrict static 1])
{
const size_t cpu_architecture_length = (size_t) (cpu_architecture_end - cpu_architecture_start);
/* Early AArch64 kernels report "CPU architecture: AArch64" instead of a numeric value 8 */
if (cpu_architecture_length == 7) {
if (memcmp(cpu_architecture_start, "AArch64", cpu_architecture_length) == 0) {
processor->midr = midr_set_architecture(processor->midr, UINT32_C(0xF));
processor->architecture_version = 8;
processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
return;
}
}
uint32_t architecture = 0;
const char* cpu_architecture_ptr = cpu_architecture_start;
for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) {
const uint32_t digit = (*cpu_architecture_ptr) - '0';
/* Verify that CPU architecture is a decimal number */
if (digit >= 10) {
break;
}
architecture = architecture * 10 + digit;
}
if (cpu_architecture_ptr == cpu_architecture_start) {
cpuinfo_log_warning("CPU architecture %.*s in /proc/cpuinfo is ignored due to non-digit at the beginning of the string",
(int) cpu_architecture_length, cpu_architecture_start);
} else {
if (architecture != 0) {
processor->architecture_version = architecture;
processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) {
const char feature = *cpu_architecture_ptr;
switch (feature) {
#if CPUINFO_ARCH_ARM
case 'T':
processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_T;
break;
case 'E':
processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_E;
break;
case 'J':
processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_J;
break;
#endif /* CPUINFO_ARCH_ARM */
case ' ':
case '\t':
/* Ignore whitespace at the end */
break;
default:
cpuinfo_log_warning("skipped unknown architectural feature '%c' for ARMv%"PRIu32,
feature, architecture);
break;
}
}
} else {
cpuinfo_log_warning("CPU architecture %.*s in /proc/cpuinfo is ignored due to invalid value (0)",
(int) cpu_architecture_length, cpu_architecture_start);
}
}
uint32_t midr_architecture = UINT32_C(0xF);
#if CPUINFO_ARCH_ARM
switch (processor->architecture_version) {
case 6:
midr_architecture = UINT32_C(0x7); /* ARMv6 */
break;
case 5:
if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TEJ) == CPUINFO_ARM_LINUX_ARCH_TEJ) {
midr_architecture = UINT32_C(0x6); /* ARMv5TEJ */
} else if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TE) == CPUINFO_ARM_LINUX_ARCH_TE) {
midr_architecture = UINT32_C(0x5); /* ARMv5TE */
} else {
midr_architecture = UINT32_C(0x4); /* ARMv5T */
}
break;
}
#endif
processor->midr = midr_set_architecture(processor->midr, midr_architecture);
}
static void parse_cpu_part(
const char* cpu_part_start,
const char* cpu_part_end,
struct cpuinfo_arm_linux_processor processor[restrict static 1])
{
const size_t cpu_part_length = (size_t) (cpu_part_end - cpu_part_start);
/*
* CPU part should contain hex prefix (0x) and one to three hex digits.
* I have never seen less than three digits as a value of this field,
* but I don't think it is impossible to see such values in future.
* Value can not contain more than three hex digits since
* Main ID Register (MIDR) assigns only a 12-bit value for CPU part.
*/
if (cpu_part_length < 3 || cpu_part_length > 5) {
cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)",
(int) cpu_part_length, cpu_part_start, cpu_part_length);
return;
}
/* Verify the presence of hex prefix */
if (cpu_part_start[0] != '0' || cpu_part_start[1] != 'x') {
cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix",
(int) cpu_part_length, cpu_part_start);
return;
}
/* Verify that characters after hex prefix are hexadecimal digits and decode them */
uint32_t cpu_part = 0;
for (const char* digit_ptr = cpu_part_start + 2; digit_ptr != cpu_part_end; digit_ptr++) {
const char digit_char = *digit_ptr;
uint32_t digit;
if (digit_char >= '0' && digit_char <= '9') {
digit = digit_char - '0';
} else if ((uint32_t) (digit_char - 'A') < 6) {
digit = 10 + (digit_char - 'A');
} else if ((uint32_t) (digit_char - 'a') < 6) {
digit = 10 + (digit_char - 'a');
} else {
cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character %c at offset %zu",
(int) cpu_part_length, cpu_part_start, digit_char, (size_t) (digit_ptr - cpu_part_start));
return;
}
cpu_part = cpu_part * 16 + digit;
}
processor->midr = midr_set_part(processor->midr, cpu_part);
processor->flags |= CPUINFO_ARM_LINUX_VALID_PART | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
static void parse_cpu_implementer(
const char* cpu_implementer_start,
const char* cpu_implementer_end,
struct cpuinfo_arm_linux_processor processor[restrict static 1])
{
const size_t cpu_implementer_length = cpu_implementer_end - cpu_implementer_start;
/*
* Value should contain hex prefix (0x) and one or two hex digits.
* I have never seen single hex digit as a value of this field,
* but I don't think it is impossible in future.
* Value can not contain more than two hex digits since
* Main ID Register (MIDR) assigns only an 8-bit value for CPU implementer.
*/
switch (cpu_implementer_length) {
case 3:
case 4:
break;
default:
cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)",
(int) cpu_implementer_length, cpu_implementer_start, cpu_implementer_length);
return;
}
/* Verify the presence of hex prefix */
if (cpu_implementer_start[0] != '0' || cpu_implementer_start[1] != 'x') {
cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix",
(int) cpu_implementer_length, cpu_implementer_start);
return;
}
/* Verify that characters after hex prefix are hexadecimal digits and decode them */
uint32_t cpu_implementer = 0;
for (const char* digit_ptr = cpu_implementer_start + 2; digit_ptr != cpu_implementer_end; digit_ptr++) {
const char digit_char = *digit_ptr;
uint32_t digit;
if (digit_char >= '0' && digit_char <= '9') {
digit = digit_char - '0';
} else if ((uint32_t) (digit_char - 'A') < 6) {
digit = 10 + (digit_char - 'A');
} else if ((uint32_t) (digit_char - 'a') < 6) {
digit = 10 + (digit_char - 'a');
} else {
cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c' at offset %zu",
(int) cpu_implementer_length, cpu_implementer_start, digit_char, (size_t) (digit_ptr - cpu_implementer_start));
return;
}
cpu_implementer = cpu_implementer * 16 + digit;
}
processor->midr = midr_set_implementer(processor->midr, cpu_implementer);
processor->flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
static void parse_cpu_variant(
const char* cpu_variant_start,
const char* cpu_variant_end,
struct cpuinfo_arm_linux_processor processor[restrict static 1])
{
const size_t cpu_variant_length = cpu_variant_end - cpu_variant_start;
/*
* Value should contain hex prefix (0x) and one hex digit.
* Value can not contain more than one hex digits since
* Main ID Register (MIDR) assigns only a 4-bit value for CPU variant.
*/
if (cpu_variant_length != 3) {
cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)",
(int) cpu_variant_length, cpu_variant_start, cpu_variant_length);
return;
}
/* Skip if there is no hex prefix (0x) */
if (cpu_variant_start[0] != '0' || cpu_variant_start[1] != 'x') {
cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix",
(int) cpu_variant_length, cpu_variant_start);
return;
}
/* Check if the value after hex prefix is indeed a hex digit and decode it. */
const char digit_char = cpu_variant_start[2];
uint32_t cpu_variant;
if ((uint32_t) (digit_char - '0') < 10) {
cpu_variant = (uint32_t) (digit_char - '0');
} else if ((uint32_t) (digit_char - 'A') < 6) {
cpu_variant = 10 + (uint32_t) (digit_char - 'A');
} else if ((uint32_t) (digit_char - 'a') < 6) {
cpu_variant = 10 + (uint32_t) (digit_char - 'a');
} else {
cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'",
(int) cpu_variant_length, cpu_variant_start, digit_char);
return;
}
processor->midr = midr_set_variant(processor->midr, cpu_variant);
processor->flags |= CPUINFO_ARM_LINUX_VALID_VARIANT | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
static void parse_cpu_revision(
const char* cpu_revision_start,
const char* cpu_revision_end,
struct cpuinfo_arm_linux_processor processor[restrict static 1])
{
uint32_t cpu_revision = 0;
for (const char* digit_ptr = cpu_revision_start; digit_ptr != cpu_revision_end; digit_ptr++) {
const uint32_t digit = (uint32_t) (*digit_ptr - '0');
/* Verify that the character in CPU revision is a decimal digit */
if (digit >= 10) {
cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu",
(int) (cpu_revision_end - cpu_revision_start), cpu_revision_start,
*digit_ptr, (size_t) (digit_ptr - cpu_revision_start));
return;
}
cpu_revision = cpu_revision * 10 + digit;
}
processor->midr = midr_set_revision(processor->midr, cpu_revision);
processor->flags |= CPUINFO_ARM_LINUX_VALID_REVISION | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
#if CPUINFO_ARCH_ARM
/*
* Decode one of the cache-related numbers reported by Linux kernel
* for pre-ARMv7 architecture.
* An example cache-related information in /proc/cpuinfo:
*
* I size : 32768
* I assoc : 4
* I line length : 32
* I sets : 256
* D size : 16384
* D assoc : 4
* D line length : 32
* D sets : 128
*
*/
static void parse_cache_number(
const char* number_start,
const char* number_end,
const char* number_name,
uint32_t number_ptr[restrict static 1],
uint32_t flags[restrict static 1],
uint32_t number_mask)
{
uint32_t number = 0;
for (const char* digit_ptr = number_start; digit_ptr != number_end; digit_ptr++) {
const uint32_t digit = *digit_ptr - '0';
if (digit >= 10) {
cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu",
number_name, (int) (number_end - number_start), number_start,
*digit_ptr, (size_t) (digit_ptr - number_start));
return;
}
number = number * 10 + digit;
}
if (number == 0) {
cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to invalid value of zero reported by the kernel",
number_name, (int) (number_end - number_start), number_start);
}
/* If the number specifies a cache line size, verify that is a reasonable power of 2 */
if (number_mask & CPUINFO_ARM_LINUX_VALID_CACHE_LINE) {
switch (number) {
case 16:
case 32:
case 64:
case 128:
break;
default:
cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
number_name, (int) (number_end - number_start), number_start);
}
}
*number_ptr = number;
*flags |= number_mask | CPUINFO_ARM_LINUX_VALID_PROCESSOR;
}
#endif /* CPUINFO_ARCH_ARM */
struct proc_cpuinfo_parser_state {
char* hardware;
char* revision;
uint32_t processor_index;
uint32_t max_processors_count;
struct cpuinfo_arm_linux_processor* processors;
struct cpuinfo_arm_linux_processor dummy_processor;
};
/*
* Decode a single line of /proc/cpuinfo information.
* Lines have format <words-with-spaces>[ ]*:[ ]<space-separated words>
* An example of /proc/cpuinfo (from Pandaboard-ES):
*
* Processor : ARMv7 Processor rev 10 (v7l)
* processor : 0
* BogoMIPS : 1392.74
*
* processor : 1
* BogoMIPS : 1363.33
*
* Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3
* CPU implementer : 0x41
* CPU architecture: 7
* CPU variant : 0x2
* CPU part : 0xc09
* CPU revision : 10
*
* Hardware : OMAP4 Panda board
* Revision : 0020
* Serial : 0000000000000000
*/
static bool parse_line(
const char* line_start,
const char* line_end,
struct proc_cpuinfo_parser_state state[restrict static 1],
uint64_t line_number)
{
/* Empty line. Skip. */
if (line_start == line_end) {
return true;
}
/* Search for ':' on the line. */
const char* separator = line_start;
for (; separator != line_end; separator++) {
if (*separator == ':') {
break;
}
}
/* Skip line if no ':' separator was found. */
if (separator == line_end) {
cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found",
(int) (line_end - line_start), line_start);
return true;
}
/* Skip trailing spaces in key part. */
const char* key_end = separator;
for (; key_end != line_start; key_end--) {
if (key_end[-1] != ' ' && key_end[-1] != '\t') {
break;
}
}
/* Skip line if key contains nothing but spaces. */
if (key_end == line_start) {
cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces",
(int) (line_end - line_start), line_start);
return true;
}
/* Skip leading spaces in value part. */
const char* value_start = separator + 1;
for (; value_start != line_end; value_start++) {
if (*value_start != ' ') {
break;
}
}
/* Value part contains nothing but spaces. Skip line. */
if (value_start == line_end) {
cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces",
(int) (line_end - line_start), line_start);
return true;
}
/* Skip trailing spaces in value part (if any) */
const char* value_end = line_end;
for (; value_end != value_start; value_end--) {
if (value_end[-1] != ' ') {
break;
}
}
const uint32_t processor_index = state->processor_index;
const uint32_t max_processors_count = state->max_processors_count;
struct cpuinfo_arm_linux_processor* processors = state->processors;
struct cpuinfo_arm_linux_processor* processor = &state->dummy_processor;
if (processor_index < max_processors_count) {
processor = &processors[processor_index];
}
const size_t key_length = key_end - line_start;
switch (key_length) {
case 6:
if (memcmp(line_start, "Serial", key_length) == 0) {
/* Usually contains just zeros, useless */
#if CPUINFO_ARCH_ARM
} else if (memcmp(line_start, "I size", key_length) == 0) {
parse_cache_number(value_start, value_end,
"instruction cache size", &processor->proc_cpuinfo_cache.i_size,
&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE);
} else if (memcmp(line_start, "I sets", key_length) == 0) {
parse_cache_number(value_start, value_end,
"instruction cache sets", &processor->proc_cpuinfo_cache.i_sets,
&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SETS);
} else if (memcmp(line_start, "D size", key_length) == 0) {
parse_cache_number(value_start, value_end,
"data cache size", &processor->proc_cpuinfo_cache.d_size,
&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE);
} else if (memcmp(line_start, "D sets", key_length) == 0) {
parse_cache_number(value_start, value_end,
"data cache sets", &processor->proc_cpuinfo_cache.d_sets,
&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SETS);
#endif /* CPUINFO_ARCH_ARM */
} else {
goto unknown;
}
break;
#if CPUINFO_ARCH_ARM
case 7:
if (memcmp(line_start, "I assoc", key_length) == 0) {
parse_cache_number(value_start, value_end,
"instruction cache associativity", &processor->proc_cpuinfo_cache.i_assoc,
&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS);
} else if (memcmp(line_start, "D assoc", key_length) == 0) {
parse_cache_number(value_start, value_end,
"data cache associativity", &processor->proc_cpuinfo_cache.d_assoc,
&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS);
} else {
goto unknown;
}
break;
#endif /* CPUINFO_ARCH_ARM */
case 8:
if (memcmp(line_start, "CPU part", key_length) == 0) {
parse_cpu_part(value_start, value_end, processor);
} else if (memcmp(line_start, "Features", key_length) == 0) {
parse_features(value_start, value_end, processor);
} else if (memcmp(line_start, "BogoMIPS", key_length) == 0) {
/* BogoMIPS is useless, don't parse */
} else if (memcmp(line_start, "Hardware", key_length) == 0) {
size_t value_length = value_end - value_start;
if (value_length > CPUINFO_HARDWARE_VALUE_MAX) {
cpuinfo_log_info(
"length of Hardware value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit",
(int) value_length, value_start, CPUINFO_HARDWARE_VALUE_MAX);
value_length = CPUINFO_HARDWARE_VALUE_MAX;
} else {
state->hardware[value_length] = '\0';
}
memcpy(state->hardware, value_start, value_length);
cpuinfo_log_debug("parsed /proc/cpuinfo Hardware = \"%.*s\"", (int) value_length, value_start);
} else if (memcmp(line_start, "Revision", key_length) == 0) {
size_t value_length = value_end - value_start;
if (value_length > CPUINFO_REVISION_VALUE_MAX) {
cpuinfo_log_info(
"length of Revision value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit",
(int) value_length, value_start, CPUINFO_REVISION_VALUE_MAX);
value_length = CPUINFO_REVISION_VALUE_MAX;
} else {
state->revision[value_length] = '\0';
}
memcpy(state->revision, value_start, value_length);
cpuinfo_log_debug("parsed /proc/cpuinfo Revision = \"%.*s\"", (int) value_length, value_start);
} else {
goto unknown;
}
break;
case 9:
if (memcmp(line_start, "processor", key_length) == 0) {
const uint32_t new_processor_index = parse_processor_number(value_start, value_end);
if (new_processor_index < processor_index) {
/* Strange: decreasing processor number */
cpuinfo_log_warning(
"unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo",
new_processor_index, processor_index);
} else if (new_processor_index > processor_index + 1) {
/* Strange, but common: skipped processor $(processor_index + 1) */
cpuinfo_log_info(
"unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo",
new_processor_index, processor_index);
}
if (new_processor_index < max_processors_count) {
/* Record that the processor was mentioned in /proc/cpuinfo */
processors[new_processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR;
} else {
/* Log and ignore processor */
cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32,
new_processor_index, max_processors_count - 1);
}
state->processor_index = new_processor_index;
return true;
} else if (memcmp(line_start, "Processor", key_length) == 0) {
/* TODO: parse to fix misreported architecture, similar to Android's cpufeatures */
} else {
goto unknown;
}
break;
case 11:
if (memcmp(line_start, "CPU variant", key_length) == 0) {
parse_cpu_variant(value_start, value_end, processor);
} else {
goto unknown;
}
break;
case 12:
if (memcmp(line_start, "CPU revision", key_length) == 0) {
parse_cpu_revision(value_start, value_end, processor);
} else {
goto unknown;
}
break;
#if CPUINFO_ARCH_ARM
case 13:
if (memcmp(line_start, "I line length", key_length) == 0) {
parse_cache_number(value_start, value_end,
"instruction cache line size", &processor->proc_cpuinfo_cache.i_line_length,
&processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_LINE);
} else if (memcmp(line_start, "D line length", key_length) == 0) {
parse_cache_number(value_start, value_end,
"data cache line size", &processor->proc_cpuinfo_cache.d_line_length,
&processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_LINE);
} else {
goto unknown;
}
break;
#endif /* CPUINFO_ARCH_ARM */
case 15:
if (memcmp(line_start, "CPU implementer", key_length) == 0) {
parse_cpu_implementer(value_start, value_end, processor);
} else if (memcmp(line_start, "CPU implementor", key_length) == 0) {
parse_cpu_implementer(value_start, value_end, processor);
} else {
goto unknown;
}
break;
case 16:
if (memcmp(line_start, "CPU architecture", key_length) == 0) {
parse_cpu_architecture(value_start, value_end, processor);
} else {
goto unknown;
}
break;
default:
unknown:
cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start);
}
return true;
}
bool cpuinfo_arm_linux_parse_proc_cpuinfo(
char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX],
char revision[restrict static CPUINFO_REVISION_VALUE_MAX],
uint32_t max_processors_count,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count])
{
struct proc_cpuinfo_parser_state state = {
.hardware = hardware,
.revision = revision,
.processor_index = 0,
.max_processors_count = max_processors_count,
.processors = processors,
};
return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE,
(cpuinfo_line_callback) parse_line, &state);
}

View File

@@ -0,0 +1,159 @@
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <dlfcn.h>
#include <elf.h>
#if CPUINFO_MOCK
#include <cpuinfo-mock.h>
#endif
#include <cpuinfo.h>
#include <arm/linux/api.h>
#include <cpuinfo/log.h>
#if CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_ARM && !defined(__ANDROID__)
#include <sys/auxv.h>
#else
#define AT_HWCAP 16
#define AT_HWCAP2 26
#endif
#if CPUINFO_MOCK
static uint32_t mock_hwcap = 0;
void cpuinfo_set_hwcap(uint32_t hwcap) {
mock_hwcap = hwcap;
}
static uint32_t mock_hwcap2 = 0;
void cpuinfo_set_hwcap2(uint32_t hwcap2) {
mock_hwcap2 = hwcap2;
}
#endif
#if CPUINFO_ARCH_ARM
typedef unsigned long (*getauxval_function_t)(unsigned long);
bool cpuinfo_arm_linux_hwcap_from_getauxval(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1])
{
#if CPUINFO_MOCK
*hwcap = mock_hwcap;
*hwcap2 = mock_hwcap2;
return true;
#elif defined(__ANDROID__)
/* Android: dynamically check if getauxval is supported */
void* libc = NULL;
getauxval_function_t getauxval = NULL;
dlerror();
libc = dlopen("libc.so", RTLD_LAZY);
if (libc == NULL) {
cpuinfo_log_warning("failed to load libc.so: %s", dlerror());
goto cleanup;
}
getauxval = (getauxval_function_t) dlsym(libc, "getauxval");
if (getauxval == NULL) {
cpuinfo_log_info("failed to locate getauxval in libc.so: %s", dlerror());
goto cleanup;
}
*hwcap = getauxval(AT_HWCAP);
*hwcap2 = getauxval(AT_HWCAP2);
cleanup:
if (libc != NULL) {
dlclose(libc);
libc = NULL;
}
return getauxval != NULL;
#else
/* GNU/Linux: getauxval is always supported */
*hwcap = getauxval(AT_HWCAP);
*hwcap2 = getauxval(AT_HWCAP2);
return true;
#endif
}
#ifdef __ANDROID__
bool cpuinfo_arm_linux_hwcap_from_procfs(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1])
{
#if CPUINFO_MOCK
*hwcap = mock_hwcap;
*hwcap2 = mock_hwcap2;
return true;
#else
uint32_t hwcaps[2] = { 0, 0 };
bool result = false;
int file = -1;
file = open("/proc/self/auxv", O_RDONLY);
if (file == -1) {
cpuinfo_log_warning("failed to open /proc/self/auxv: %s", strerror(errno));
goto cleanup;
}
ssize_t bytes_read;
do {
Elf32_auxv_t elf_auxv;
bytes_read = read(file, &elf_auxv, sizeof(Elf32_auxv_t));
if (bytes_read < 0) {
cpuinfo_log_warning("failed to read /proc/self/auxv: %s", strerror(errno));
goto cleanup;
} else if (bytes_read > 0) {
if (bytes_read == sizeof(elf_auxv)) {
switch (elf_auxv.a_type) {
case AT_HWCAP:
hwcaps[0] = (uint32_t) elf_auxv.a_un.a_val;
break;
case AT_HWCAP2:
hwcaps[1] = (uint32_t) elf_auxv.a_un.a_val;
break;
}
} else {
cpuinfo_log_warning(
"failed to read %zu bytes from /proc/self/auxv: %zu bytes available",
sizeof(elf_auxv), (size_t) bytes_read);
goto cleanup;
}
}
} while (bytes_read == sizeof(Elf32_auxv_t));
/* Success, commit results */
*hwcap = hwcaps[0];
*hwcap2 = hwcaps[1];
result = true;
cleanup:
if (file != -1) {
close(file);
file = -1;
}
return result;
#endif
}
#endif /* __ANDROID__ */
#elif CPUINFO_ARCH_ARM64
void cpuinfo_arm_linux_hwcap_from_getauxval(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1])
{
#if CPUINFO_MOCK
*hwcap = mock_hwcap;
*hwcap2 = mock_hwcap2;
#else
*hwcap = (uint32_t) getauxval(AT_HWCAP);
*hwcap2 = (uint32_t) getauxval(AT_HWCAP2);
return ;
#endif
}
#endif

View File

@@ -0,0 +1,765 @@
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <cpuinfo.h>
#include <arm/linux/api.h>
#if defined(__ANDROID__)
#include <arm/android/api.h>
#endif
#include <arm/api.h>
#include <arm/midr.h>
#include <linux/api.h>
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
struct cpuinfo_arm_isa cpuinfo_isa = { 0 };
static struct cpuinfo_package package = { { 0 } };
static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
return (bitfield & mask) == mask;
}
static inline uint32_t min(uint32_t a, uint32_t b) {
return a < b ? a : b;
}
static inline int cmp(uint32_t a, uint32_t b) {
return (a > b) - (a < b);
}
static bool cluster_siblings_parser(
uint32_t processor, uint32_t siblings_start, uint32_t siblings_end,
struct cpuinfo_arm_linux_processor* processors)
{
processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
uint32_t package_leader_id = processors[processor].package_leader_id;
for (uint32_t sibling = siblings_start; sibling < siblings_end; sibling++) {
if (!bitmask_all(processors[sibling].flags, CPUINFO_LINUX_FLAG_VALID)) {
cpuinfo_log_info("invalid processor %"PRIu32" reported as a sibling for processor %"PRIu32,
sibling, processor);
continue;
}
const uint32_t sibling_package_leader_id = processors[sibling].package_leader_id;
if (sibling_package_leader_id < package_leader_id) {
package_leader_id = sibling_package_leader_id;
}
processors[sibling].package_leader_id = package_leader_id;
processors[sibling].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER;
}
processors[processor].package_leader_id = package_leader_id;
return true;
}
static int cmp_arm_linux_processor(const void* ptr_a, const void* ptr_b) {
const struct cpuinfo_arm_linux_processor* processor_a = (const struct cpuinfo_arm_linux_processor*) ptr_a;
const struct cpuinfo_arm_linux_processor* processor_b = (const struct cpuinfo_arm_linux_processor*) ptr_b;
/* Move usable processors towards the start of the array */
const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID);
const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID);
if (usable_a != usable_b) {
return (int) usable_b - (int) usable_a;
}
/* Compare based on core type (e.g. Cortex-A57 < Cortex-A53) */
const uint32_t midr_a = processor_a->midr;
const uint32_t midr_b = processor_b->midr;
if (midr_a != midr_b) {
const uint32_t score_a = midr_score_core(midr_a);
const uint32_t score_b = midr_score_core(midr_b);
if (score_a != score_b) {
return score_a > score_b ? -1 : 1;
}
}
/* Compare based on core frequency (e.g. 2.0 GHz < 1.2 GHz) */
const uint32_t frequency_a = processor_a->max_frequency;
const uint32_t frequency_b = processor_b->max_frequency;
if (frequency_a != frequency_b) {
return frequency_a > frequency_b ? -1 : 1;
}
/* Compare based on cluster leader id (i.e. cluster 1 < cluster 0) */
const uint32_t cluster_a = processor_a->package_leader_id;
const uint32_t cluster_b = processor_b->package_leader_id;
if (cluster_a != cluster_b) {
return cluster_a > cluster_b ? -1 : 1;
}
/* Compare based on system processor id (i.e. processor 0 < processor 1) */
const uint32_t id_a = processor_a->system_processor_id;
const uint32_t id_b = processor_b->system_processor_id;
return cmp(id_a, id_b);
}
void cpuinfo_arm_linux_init(void) {
struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL;
struct cpuinfo_processor* processors = NULL;
struct cpuinfo_core* cores = NULL;
struct cpuinfo_cluster* clusters = NULL;
struct cpuinfo_uarch_info* uarchs = NULL;
struct cpuinfo_cache* l1i = NULL;
struct cpuinfo_cache* l1d = NULL;
struct cpuinfo_cache* l2 = NULL;
struct cpuinfo_cache* l3 = NULL;
const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
uint32_t* linux_cpu_to_uarch_index_map = NULL;
const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
const uint32_t max_possible_processors_count = 1 +
cpuinfo_linux_get_max_possible_processor(max_processors_count);
cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count);
const uint32_t max_present_processors_count = 1 +
cpuinfo_linux_get_max_present_processor(max_processors_count);
cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count);
uint32_t valid_processor_mask = 0;
uint32_t arm_linux_processors_count = max_processors_count;
if (max_present_processors_count != 0) {
arm_linux_processors_count = min(arm_linux_processors_count, max_present_processors_count);
valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT;
}
if (max_possible_processors_count != 0) {
arm_linux_processors_count = min(arm_linux_processors_count, max_possible_processors_count);
valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE;
}
if ((max_present_processors_count | max_possible_processors_count) == 0) {
cpuinfo_log_error("failed to parse both lists of possible and present processors");
return;
}
arm_linux_processors = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_arm_linux_processor));
if (arm_linux_processors == NULL) {
cpuinfo_log_error(
"failed to allocate %zu bytes for descriptions of %"PRIu32" ARM logical processors",
arm_linux_processors_count * sizeof(struct cpuinfo_arm_linux_processor),
arm_linux_processors_count);
return;
}
if (max_possible_processors_count) {
cpuinfo_linux_detect_possible_processors(
arm_linux_processors_count, &arm_linux_processors->flags,
sizeof(struct cpuinfo_arm_linux_processor),
CPUINFO_LINUX_FLAG_POSSIBLE);
}
if (max_present_processors_count) {
cpuinfo_linux_detect_present_processors(
arm_linux_processors_count, &arm_linux_processors->flags,
sizeof(struct cpuinfo_arm_linux_processor),
CPUINFO_LINUX_FLAG_PRESENT);
}
#if defined(__ANDROID__)
struct cpuinfo_android_properties android_properties;
cpuinfo_arm_android_parse_properties(&android_properties);
#else
char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX];
#endif
char proc_cpuinfo_revision[CPUINFO_REVISION_VALUE_MAX];
if (!cpuinfo_arm_linux_parse_proc_cpuinfo(
#if defined(__ANDROID__)
android_properties.proc_cpuinfo_hardware,
#else
proc_cpuinfo_hardware,
#endif
proc_cpuinfo_revision,
arm_linux_processors_count,
arm_linux_processors)) {
cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo");
return;
}
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, valid_processor_mask)) {
arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
cpuinfo_log_debug("parsed processor %"PRIu32" MIDR 0x%08"PRIx32,
i, arm_linux_processors[i].midr);
}
}
uint32_t valid_processors = 0, last_midr = 0;
#if CPUINFO_ARCH_ARM
uint32_t last_architecture_version = 0, last_architecture_flags = 0;
#endif
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
arm_linux_processors[i].system_processor_id = i;
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
valid_processors += 1;
if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) {
/*
* Processor is in possible and present lists, but not reported in /proc/cpuinfo.
* This is fairly common: high-index processors can be not reported if they are offline.
*/
cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i);
}
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
last_midr = arm_linux_processors[i].midr;
}
#if CPUINFO_ARCH_ARM
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ARCHITECTURE)) {
last_architecture_version = arm_linux_processors[i].architecture_version;
last_architecture_flags = arm_linux_processors[i].architecture_flags;
}
#endif
} else {
/* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */
if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) {
cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i);
}
}
}
#if defined(__ANDROID__)
const struct cpuinfo_arm_chipset chipset =
cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0);
#else
const struct cpuinfo_arm_chipset chipset =
cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0);
#endif
#if CPUINFO_ARCH_ARM
uint32_t isa_features = 0, isa_features2 = 0;
#ifdef __ANDROID__
/*
* On Android before API 20, libc.so does not provide getauxval function.
* Thus, we try to dynamically find it, or use two fallback mechanisms:
* 1. dlopen libc.so, and try to find getauxval
* 2. Parse /proc/self/auxv procfs file
* 3. Use features reported in /proc/cpuinfo
*/
if (!cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2)) {
/* getauxval can't be used, fall back to parsing /proc/self/auxv */
if (!cpuinfo_arm_linux_hwcap_from_procfs(&isa_features, &isa_features2)) {
/*
* Reading /proc/self/auxv failed, probably due to file permissions.
* Use information from /proc/cpuinfo to detect ISA.
*
* If different processors report different ISA features, take the intersection.
*/
uint32_t processors_with_features = 0;
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_FEATURES)) {
if (processors_with_features == 0) {
isa_features = arm_linux_processors[i].features;
isa_features2 = arm_linux_processors[i].features2;
} else {
isa_features &= arm_linux_processors[i].features;
isa_features2 &= arm_linux_processors[i].features2;
}
processors_with_features += 1;
}
}
}
}
#else
/* On GNU/Linux getauxval is always available */
cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
#endif
cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
isa_features, isa_features2,
last_midr, last_architecture_version, last_architecture_flags,
&chipset, &cpuinfo_isa);
#elif CPUINFO_ARCH_ARM64
uint32_t isa_features = 0, isa_features2 = 0;
/* getauxval is always available on ARM64 Android */
cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
isa_features, isa_features2, last_midr, &chipset, &cpuinfo_isa);
#endif
/* Detect min/max frequency and package ID */
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
const uint32_t max_frequency = cpuinfo_linux_get_processor_max_frequency(i);
if (max_frequency != 0) {
arm_linux_processors[i].max_frequency = max_frequency;
arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
}
const uint32_t min_frequency = cpuinfo_linux_get_processor_min_frequency(i);
if (min_frequency != 0) {
arm_linux_processors[i].min_frequency = min_frequency;
arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY;
}
if (cpuinfo_linux_get_processor_package_id(i, &arm_linux_processors[i].package_id)) {
arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID;
}
}
}
/* Initialize topology group IDs */
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
arm_linux_processors[i].package_leader_id = i;
}
/* Propagate topology group IDs among siblings */
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
continue;
}
if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) {
cpuinfo_linux_detect_core_siblings(
arm_linux_processors_count, i,
(cpuinfo_siblings_callback) cluster_siblings_parser,
arm_linux_processors);
}
}
/* Propagate all cluster IDs */
uint32_t clustered_processors = 0;
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) {
clustered_processors += 1;
const uint32_t package_leader_id = arm_linux_processors[i].package_leader_id;
if (package_leader_id < i) {
arm_linux_processors[i].package_leader_id = arm_linux_processors[package_leader_id].package_leader_id;
}
cpuinfo_log_debug("processor %"PRIu32" clustered with processor %"PRIu32" as inferred from system siblings lists",
i, arm_linux_processors[i].package_leader_id);
}
}
if (clustered_processors != valid_processors) {
/*
* Topology information about some or all logical processors may be unavailable, for the following reasons:
* - Linux kernel is too old, or configured without support for topology information in sysfs.
* - Core is offline, and Linux kernel is configured to not report topology for offline cores.
*
* In this case, we assign processors to clusters using two methods:
* - Try heuristic cluster configurations (e.g. 6-core SoC usually has 4+2 big.LITTLE configuration).
* - If heuristic failed, assign processors to core clusters in a sequential scan.
*/
if (!cpuinfo_arm_linux_detect_core_clusters_by_heuristic(valid_processors, arm_linux_processors_count, arm_linux_processors)) {
cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(arm_linux_processors_count, arm_linux_processors);
}
}
cpuinfo_arm_linux_count_cluster_processors(arm_linux_processors_count, arm_linux_processors);
const uint32_t cluster_count = cpuinfo_arm_linux_detect_cluster_midr(
&chipset,
arm_linux_processors_count, valid_processors, arm_linux_processors);
/* Initialize core vendor, uarch, MIDR, and frequency for every logical processor */
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
const uint32_t cluster_leader = arm_linux_processors[i].package_leader_id;
if (cluster_leader == i) {
/* Cluster leader: decode core vendor and uarch */
cpuinfo_arm_decode_vendor_uarch(
arm_linux_processors[cluster_leader].midr,
#if CPUINFO_ARCH_ARM
!!(arm_linux_processors[cluster_leader].features & CPUINFO_ARM_LINUX_FEATURE_VFPV4),
#endif
&arm_linux_processors[cluster_leader].vendor,
&arm_linux_processors[cluster_leader].uarch);
} else {
/* Cluster non-leader: copy vendor, uarch, MIDR, and frequency from cluster leader */
arm_linux_processors[i].flags |= arm_linux_processors[cluster_leader].flags &
(CPUINFO_ARM_LINUX_VALID_MIDR | CPUINFO_LINUX_FLAG_MAX_FREQUENCY);
arm_linux_processors[i].midr = arm_linux_processors[cluster_leader].midr;
arm_linux_processors[i].vendor = arm_linux_processors[cluster_leader].vendor;
arm_linux_processors[i].uarch = arm_linux_processors[cluster_leader].uarch;
arm_linux_processors[i].max_frequency = arm_linux_processors[cluster_leader].max_frequency;
}
}
}
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
cpuinfo_log_debug("post-analysis processor %"PRIu32": MIDR %08"PRIx32" frequency %"PRIu32,
i, arm_linux_processors[i].midr, arm_linux_processors[i].max_frequency);
}
}
qsort(arm_linux_processors, arm_linux_processors_count,
sizeof(struct cpuinfo_arm_linux_processor), cmp_arm_linux_processor);
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
cpuinfo_log_debug("post-sort processor %"PRIu32": system id %"PRIu32" MIDR %08"PRIx32" frequency %"PRIu32,
i, arm_linux_processors[i].system_processor_id, arm_linux_processors[i].midr, arm_linux_processors[i].max_frequency);
}
}
uint32_t uarchs_count = 0;
enum cpuinfo_uarch last_uarch;
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) {
last_uarch = arm_linux_processors[i].uarch;
uarchs_count += 1;
}
arm_linux_processors[i].uarch_index = uarchs_count - 1;
}
}
/*
* Assumptions:
* - No SMP (i.e. each core supports only one hardware thread).
* - Level 1 instruction and data caches are private to the core clusters.
* - Level 2 and level 3 cache is shared between cores in the same cluster.
*/
cpuinfo_arm_chipset_to_string(&chipset, package.name);
package.processor_count = valid_processors;
package.core_count = valid_processors;
package.cluster_count = cluster_count;
processors = calloc(valid_processors, sizeof(struct cpuinfo_processor));
if (processors == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
valid_processors * sizeof(struct cpuinfo_processor), valid_processors);
goto cleanup;
}
cores = calloc(valid_processors, sizeof(struct cpuinfo_core));
if (cores == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
valid_processors * sizeof(struct cpuinfo_core), valid_processors);
goto cleanup;
}
clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster));
if (clusters == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
cluster_count * sizeof(struct cpuinfo_cluster), cluster_count);
goto cleanup;
}
uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info));
if (uarchs == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures",
uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count);
goto cleanup;
}
linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*));
if (linux_cpu_to_processor_map == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries",
arm_linux_processors_count * sizeof(struct cpuinfo_processor*), arm_linux_processors_count);
goto cleanup;
}
linux_cpu_to_core_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_core*));
if (linux_cpu_to_core_map == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" core mapping entries",
arm_linux_processors_count * sizeof(struct cpuinfo_core*), arm_linux_processors_count);
goto cleanup;
}
if (uarchs_count > 1) {
linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t));
if (linux_cpu_to_uarch_index_map == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries",
arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count);
goto cleanup;
}
}
l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache));
if (l1i == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
valid_processors * sizeof(struct cpuinfo_cache), valid_processors);
goto cleanup;
}
l1d = calloc(valid_processors, sizeof(struct cpuinfo_cache));
if (l1d == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
valid_processors * sizeof(struct cpuinfo_cache), valid_processors);
goto cleanup;
}
uint32_t uarchs_index = 0;
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) {
last_uarch = arm_linux_processors[i].uarch;
uarchs[uarchs_index] = (struct cpuinfo_uarch_info) {
.uarch = arm_linux_processors[i].uarch,
.midr = arm_linux_processors[i].midr,
};
uarchs_index += 1;
}
uarchs[uarchs_index - 1].processor_count += 1;
uarchs[uarchs_index - 1].core_count += 1;
}
}
uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
/* Indication whether L3 (if it exists) is shared between all cores */
bool shared_l3 = true;
/* Populate cache infromation structures in l1i, l1d */
for (uint32_t i = 0; i < valid_processors; i++) {
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
cluster_id += 1;
clusters[cluster_id] = (struct cpuinfo_cluster) {
.processor_start = i,
.processor_count = arm_linux_processors[i].package_processor_count,
.core_start = i,
.core_count = arm_linux_processors[i].package_processor_count,
.cluster_id = cluster_id,
.package = &package,
.vendor = arm_linux_processors[i].vendor,
.uarch = arm_linux_processors[i].uarch,
.midr = arm_linux_processors[i].midr,
};
}
processors[i].smt_id = 0;
processors[i].core = cores + i;
processors[i].cluster = clusters + cluster_id;
processors[i].package = &package;
processors[i].linux_id = (int) arm_linux_processors[i].system_processor_id;
processors[i].cache.l1i = l1i + i;
processors[i].cache.l1d = l1d + i;
linux_cpu_to_processor_map[arm_linux_processors[i].system_processor_id] = &processors[i];
cores[i].processor_start = i;
cores[i].processor_count = 1;
cores[i].core_id = i;
cores[i].cluster = clusters + cluster_id;
cores[i].package = &package;
cores[i].vendor = arm_linux_processors[i].vendor;
cores[i].uarch = arm_linux_processors[i].uarch;
cores[i].midr = arm_linux_processors[i].midr;
linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i];
if (linux_cpu_to_uarch_index_map != NULL) {
linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] =
arm_linux_processors[i].uarch_index;
}
struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 };
cpuinfo_arm_decode_cache(
arm_linux_processors[i].uarch,
arm_linux_processors[i].package_processor_count,
arm_linux_processors[i].midr,
&chipset,
cluster_id,
arm_linux_processors[i].architecture_version,
&l1i[i], &l1d[i], &temp_l2, &temp_l3);
l1i[i].processor_start = l1d[i].processor_start = i;
l1i[i].processor_count = l1d[i].processor_count = 1;
#if CPUINFO_ARCH_ARM
/* L1I reported in /proc/cpuinfo overrides defaults */
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ICACHE)) {
l1i[i] = (struct cpuinfo_cache) {
.size = arm_linux_processors[i].proc_cpuinfo_cache.i_size,
.associativity = arm_linux_processors[i].proc_cpuinfo_cache.i_assoc,
.sets = arm_linux_processors[i].proc_cpuinfo_cache.i_sets,
.partitions = 1,
.line_size = arm_linux_processors[i].proc_cpuinfo_cache.i_line_length
};
}
/* L1D reported in /proc/cpuinfo overrides defaults */
if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_DCACHE)) {
l1d[i] = (struct cpuinfo_cache) {
.size = arm_linux_processors[i].proc_cpuinfo_cache.d_size,
.associativity = arm_linux_processors[i].proc_cpuinfo_cache.d_assoc,
.sets = arm_linux_processors[i].proc_cpuinfo_cache.d_sets,
.partitions = 1,
.line_size = arm_linux_processors[i].proc_cpuinfo_cache.d_line_length
};
}
#endif
if (temp_l3.size != 0) {
/*
* Assumptions:
* - L2 is private to each core
* - L3 is shared by cores in the same cluster
* - If cores in different clusters report the same L3, it is shared between all cores.
*/
l2_count += 1;
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
if (cluster_id == 0) {
big_l3_size = temp_l3.size;
l3_count = 1;
} else if (temp_l3.size != big_l3_size) {
/* If some cores have different L3 size, L3 is not shared between all cores */
shared_l3 = false;
l3_count += 1;
}
}
} else {
/* If some cores don't have L3 cache, L3 is not shared between all cores */
shared_l3 = false;
if (temp_l2.size != 0) {
/* Assume L2 is shared by cores in the same cluster */
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
l2_count += 1;
}
}
}
}
if (l2_count != 0) {
l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
if (l2 == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
l2_count * sizeof(struct cpuinfo_cache), l2_count);
goto cleanup;
}
if (l3_count != 0) {
l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
if (l3 == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
l3_count * sizeof(struct cpuinfo_cache), l3_count);
goto cleanup;
}
}
}
cluster_id = UINT32_MAX;
uint32_t l2_index = UINT32_MAX, l3_index = UINT32_MAX;
for (uint32_t i = 0; i < valid_processors; i++) {
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
cluster_id++;
}
struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = { 0 }, temp_l3 = { 0 };
cpuinfo_arm_decode_cache(
arm_linux_processors[i].uarch,
arm_linux_processors[i].package_processor_count,
arm_linux_processors[i].midr,
&chipset,
cluster_id,
arm_linux_processors[i].architecture_version,
&dummy_l1i, &dummy_l1d, &temp_l2, &temp_l3);
if (temp_l3.size != 0) {
/*
* Assumptions:
* - L2 is private to each core
* - L3 is shared by cores in the same cluster
* - If cores in different clusters report the same L3, it is shared between all cores.
*/
l2_index += 1;
l2[l2_index] = (struct cpuinfo_cache) {
.size = temp_l2.size,
.associativity = temp_l2.associativity,
.sets = temp_l2.sets,
.partitions = 1,
.line_size = temp_l2.line_size,
.flags = temp_l2.flags,
.processor_start = i,
.processor_count = 1,
};
processors[i].cache.l2 = l2 + l2_index;
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
l3_index += 1;
if (l3_index < l3_count) {
l3[l3_index] = (struct cpuinfo_cache) {
.size = temp_l3.size,
.associativity = temp_l3.associativity,
.sets = temp_l3.sets,
.partitions = 1,
.line_size = temp_l3.line_size,
.flags = temp_l3.flags,
.processor_start = i,
.processor_count =
shared_l3 ? valid_processors : arm_linux_processors[i].package_processor_count,
};
}
}
if (shared_l3) {
processors[i].cache.l3 = l3;
} else if (l3_index < l3_count) {
processors[i].cache.l3 = l3 + l3_index;
}
} else if (temp_l2.size != 0) {
/* Assume L2 is shared by cores in the same cluster */
if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) {
l2_index += 1;
l2[l2_index] = (struct cpuinfo_cache) {
.size = temp_l2.size,
.associativity = temp_l2.associativity,
.sets = temp_l2.sets,
.partitions = 1,
.line_size = temp_l2.line_size,
.flags = temp_l2.flags,
.processor_start = i,
.processor_count = arm_linux_processors[i].package_processor_count,
};
}
processors[i].cache.l2 = l2 + l2_index;
}
}
/* Commit */
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
cpuinfo_packages = &package;
cpuinfo_uarchs = uarchs;
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
cpuinfo_processors_count = valid_processors;
cpuinfo_cores_count = valid_processors;
cpuinfo_clusters_count = cluster_count;
cpuinfo_packages_count = 1;
cpuinfo_uarchs_count = uarchs_count;
cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors;
cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors;
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
cpuinfo_linux_cpu_max = arm_linux_processors_count;
cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
__sync_synchronize();
cpuinfo_is_initialized = true;
processors = NULL;
cores = NULL;
clusters = NULL;
uarchs = NULL;
l1i = l1d = l2 = l3 = NULL;
linux_cpu_to_processor_map = NULL;
linux_cpu_to_core_map = NULL;
linux_cpu_to_uarch_index_map = NULL;
cleanup:
free(arm_linux_processors);
free(processors);
free(cores);
free(clusters);
free(uarchs);
free(l1i);
free(l1d);
free(l2);
free(l3);
free(linux_cpu_to_processor_map);
free(linux_cpu_to_core_map);
free(linux_cpu_to_uarch_index_map);
}

View File

@@ -0,0 +1,863 @@
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <cpuinfo.h>
#include <arm/linux/api.h>
#if defined(__ANDROID__)
#include <arm/android/api.h>
#endif
#include <arm/api.h>
#include <arm/midr.h>
#include <linux/api.h>
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
#include <cpuinfo/common.h>
#define CLUSTERS_MAX 3
static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
return (bitfield & mask) == mask;
}
/* Description of core clusters configuration in a chipset (identified by series and model number) */
struct cluster_config {
/* Number of cores (logical processors) */
uint8_t cores;
/* ARM chipset series (see cpuinfo_arm_chipset_series enum) */
uint8_t series;
/* Chipset model number (see cpuinfo_arm_chipset struct) */
uint16_t model;
/* Number of heterogenous clusters in the CPU package */
uint8_t clusters;
/*
* Number of cores in each cluster:
# - Symmetric configurations: [0] = # cores
* - big.LITTLE configurations: [0] = # LITTLE cores, [1] = # big cores
* - Max.Med.Min configurations: [0] = # Min cores, [1] = # Med cores, [2] = # Max cores
*/
uint8_t cluster_cores[CLUSTERS_MAX];
/*
* MIDR of cores in each cluster:
* - Symmetric configurations: [0] = core MIDR
* - big.LITTLE configurations: [0] = LITTLE core MIDR, [1] = big core MIDR
* - Max.Med.Min configurations: [0] = Min core MIDR, [1] = Med core MIDR, [2] = Max core MIDR
*/
uint32_t cluster_midr[CLUSTERS_MAX];
};
/*
* The list of chipsets where MIDR may not be unambigiously decoded at least on some devices.
* The typical reasons for impossibility to decoded MIDRs are buggy kernels, which either do not report all MIDR
* information (e.g. on ATM7029 kernel doesn't report CPU Part), or chipsets have more than one type of cores
* (i.e. 4x Cortex-A53 + 4x Cortex-A53 is out) and buggy kernels report MIDR information only about some cores
* in /proc/cpuinfo (either only online cores, or only the core that reads /proc/cpuinfo). On these kernels/chipsets,
* it is not possible to detect all core types by just parsing /proc/cpuinfo, so we use chipset name and this table to
* find their MIDR (and thus microarchitecture, cache, etc).
*
* Note: not all chipsets with heterogeneous multiprocessing need an entry in this table. The following HMP
* chipsets always list information about all cores in /proc/cpuinfo:
*
* - Snapdragon 660
* - Snapdragon 820 (MSM8996)
* - Snapdragon 821 (MSM8996PRO)
* - Snapdragon 835 (MSM8998)
* - Exynos 8895
* - Kirin 960
*
* As these are all new processors, there is hope that this table won't uncontrollably grow over time.
*/
static const struct cluster_config cluster_configs[] = {
#if CPUINFO_ARCH_ARM
{
/*
* MSM8916 (Snapdragon 410): 4x Cortex-A53
* Some AArch32 phones use non-standard /proc/cpuinfo format.
*/
.cores = 4,
.series = cpuinfo_arm_chipset_series_qualcomm_msm,
.model = UINT16_C(8916),
.clusters = 1,
.cluster_cores = {
[0] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD030),
},
},
{
/*
* MSM8939 (Snapdragon 615): 4x Cortex-A53 + 4x Cortex-A53
* Some AArch32 phones use non-standard /proc/cpuinfo format.
*/
.cores = 8,
.series = cpuinfo_arm_chipset_series_qualcomm_msm,
.model = UINT16_C(8939),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD034),
},
},
#endif
{
/* MSM8956 (Snapdragon 650): 2x Cortex-A72 + 4x Cortex-A53 */
.cores = 6,
.series = cpuinfo_arm_chipset_series_qualcomm_msm,
.model = UINT16_C(8956),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD080),
},
},
{
/* MSM8976/MSM8976PRO (Snapdragon 652/653): 4x Cortex-A72 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_qualcomm_msm,
.model = UINT16_C(8976),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD080),
},
},
{
/* MSM8992 (Snapdragon 808): 2x Cortex-A57 + 4x Cortex-A53 */
.cores = 6,
.series = cpuinfo_arm_chipset_series_qualcomm_msm,
.model = UINT16_C(8992),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD033),
[1] = UINT32_C(0x411FD072),
},
},
{
/* MSM8994/MSM8994V (Snapdragon 810): 4x Cortex-A57 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_qualcomm_msm,
.model = UINT16_C(8994),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD032),
[1] = UINT32_C(0x411FD071),
},
},
#if CPUINFO_ARCH_ARM
{
/* Exynos 5422: 4x Cortex-A15 + 4x Cortex-A7 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(5422),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC073),
[1] = UINT32_C(0x412FC0F3),
},
},
{
/* Exynos 5430: 4x Cortex-A15 + 4x Cortex-A7 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(5430),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC074),
[1] = UINT32_C(0x413FC0F3),
},
},
#endif /* CPUINFO_ARCH_ARM */
{
/* Exynos 5433: 4x Cortex-A57 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(5433),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD031),
[1] = UINT32_C(0x411FD070),
},
},
{
/* Exynos 7420: 4x Cortex-A57 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(7420),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD032),
[1] = UINT32_C(0x411FD070),
},
},
{
/* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(8890),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x531F0011),
},
},
#if CPUINFO_ARCH_ARM
{
/* Kirin 920: 4x Cortex-A15 + 4x Cortex-A7 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
.model = UINT16_C(920),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC075),
[1] = UINT32_C(0x413FC0F3),
},
},
{
/* Kirin 925: 4x Cortex-A15 + 4x Cortex-A7 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
.model = UINT16_C(925),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC075),
[1] = UINT32_C(0x413FC0F3),
},
},
{
/* Kirin 928: 4x Cortex-A15 + 4x Cortex-A7 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
.model = UINT16_C(928),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC075),
[1] = UINT32_C(0x413FC0F3),
},
},
#endif /* CPUINFO_ARCH_ARM */
{
/* Kirin 950: 4x Cortex-A72 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
.model = UINT16_C(950),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD080),
},
},
{
/* Kirin 955: 4x Cortex-A72 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_hisilicon_kirin,
.model = UINT16_C(955),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD080),
},
},
#if CPUINFO_ARCH_ARM
{
/* MediaTek MT8135: 2x Cortex-A7 + 2x Cortex-A15 */
.cores = 4,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(8135),
.clusters = 2,
.cluster_cores = {
[0] = 2,
[1] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC073),
[1] = UINT32_C(0x413FC0F2),
},
},
#endif
{
/* MediaTek MT8173: 2x Cortex-A72 + 2x Cortex-A53 */
.cores = 4,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(8173),
.clusters = 2,
.cluster_cores = {
[0] = 2,
[1] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD032),
[1] = UINT32_C(0x410FD080),
},
},
{
/* MediaTek MT8176: 2x Cortex-A72 + 4x Cortex-A53 */
.cores = 6,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(8176),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD032),
[1] = UINT32_C(0x410FD080),
},
},
#if CPUINFO_ARCH_ARM64
{
/*
* MediaTek MT8735: 4x Cortex-A53
* Some AArch64 phones use non-standard /proc/cpuinfo format.
*/
.cores = 4,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(8735),
.clusters = 1,
.cluster_cores = {
[0] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
},
},
#endif
#if CPUINFO_ARCH_ARM
{
/*
* MediaTek MT6592: 4x Cortex-A7 + 4x Cortex-A7
* Some phones use non-standard /proc/cpuinfo format.
*/
.cores = 4,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(6592),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC074),
[1] = UINT32_C(0x410FC074),
},
},
{
/* MediaTek MT6595: 4x Cortex-A17 + 4x Cortex-A7 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(6595),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC075),
[1] = UINT32_C(0x410FC0E0),
},
},
#endif
{
/* MediaTek MT6797: 2x Cortex-A72 + 4x Cortex-A53 + 4x Cortex-A53 */
.cores = 10,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(6797),
.clusters = 3,
.cluster_cores = {
[0] = 4,
[1] = 4,
[2] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD034),
[2] = UINT32_C(0x410FD081),
},
},
{
/* MediaTek MT6799: 2x Cortex-A73 + 4x Cortex-A53 + 4x Cortex-A35 */
.cores = 10,
.series = cpuinfo_arm_chipset_series_mediatek_mt,
.model = UINT16_C(6799),
.clusters = 3,
.cluster_cores = {
[0] = 4,
[1] = 4,
[2] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD041),
[1] = UINT32_C(0x410FD034),
[2] = UINT32_C(0x410FD092),
},
},
{
/* Rockchip RK3399: 2x Cortex-A72 + 4x Cortex-A53 */
.cores = 6,
.series = cpuinfo_arm_chipset_series_rockchip_rk,
.model = UINT16_C(3399),
.clusters = 2,
.cluster_cores = {
[0] = 4,
[1] = 2,
},
.cluster_midr = {
[0] = UINT32_C(0x410FD034),
[1] = UINT32_C(0x410FD082),
},
},
#if CPUINFO_ARCH_ARM
{
/* Actions ATM8029: 4x Cortex-A5
* Most devices use non-standard /proc/cpuinfo format.
*/
.cores = 4,
.series = cpuinfo_arm_chipset_series_actions_atm,
.model = UINT16_C(7029),
.clusters = 1,
.cluster_cores = {
[0] = 4,
},
.cluster_midr = {
[0] = UINT32_C(0x410FC051),
},
},
#endif
};
/*
* Searches chipset name in mapping of chipset name to cores' MIDR values. If match is successful, initializes MIDR
* for all clusters' leaders with tabulated values.
*
* @param[in] chipset - chipset (SoC) name information.
* @param clusters_count - number of CPU core clusters detected in the SoC.
* @param cluster_leaders - indices of core clusters' leaders in the @p processors array.
* @param processors_count - number of usable logical processors in the system.
* @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
* and decoded core cluster (package_leader_id) information.
* Upon successful return, processors[i].midr for all clusters' leaders contains the
* tabulated MIDR values.
* @param verify_midr - indicated whether the function should check that the MIDR values to be assigned to leaders of
* core clusters are consistent with known parts of their parsed values.
* Set if to false if the only MIDR value parsed from /proc/cpuinfo is for the last processor
* reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor.
*
* @retval true if the chipset was found in the mapping and core clusters' leaders initialized with MIDR values.
* @retval false if the chipset was not found in the mapping, or any consistency check failed.
*/
static bool cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
const struct cpuinfo_arm_chipset chipset[restrict static 1],
uint32_t clusters_count,
const uint32_t cluster_leaders[restrict static CLUSTERS_MAX],
uint32_t processors_count,
struct cpuinfo_arm_linux_processor processors[restrict static processors_count],
bool verify_midr)
{
if (clusters_count <= CLUSTERS_MAX) {
for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) {
if (cluster_configs[c].model == chipset->model && cluster_configs[c].series == chipset->series) {
/* Verify that the total number of cores and clusters of cores matches expectation */
if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) {
return false;
}
/* Verify that core cluster configuration matches expectation */
for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
const uint32_t cluster_leader = cluster_leaders[cluster];
if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) {
return false;
}
}
if (verify_midr) {
/* Verify known parts of MIDR */
for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
const uint32_t cluster_leader = cluster_leaders[cluster];
/* Create a mask of known midr bits */
uint32_t midr_mask = 0;
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
midr_mask |= CPUINFO_ARM_MIDR_IMPLEMENTER_MASK;
}
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
midr_mask |= CPUINFO_ARM_MIDR_VARIANT_MASK;
}
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_PART) {
midr_mask |= CPUINFO_ARM_MIDR_PART_MASK;
}
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
midr_mask |= CPUINFO_ARM_MIDR_REVISION_MASK;
}
/* Verify the bits under the mask */
if ((processors[cluster_leader].midr ^ cluster_configs[c].cluster_midr[cluster]) & midr_mask) {
cpuinfo_log_debug("parsed MIDR of cluster %08"PRIu32" does not match tabulated value %08"PRIu32,
processors[cluster_leader].midr, cluster_configs[c].cluster_midr[cluster]);
return false;
}
}
}
/* Assign MIDRs according to tabulated configurations */
for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
const uint32_t cluster_leader = cluster_leaders[cluster];
processors[cluster_leader].midr = cluster_configs[c].cluster_midr[cluster];
processors[cluster_leader].flags |= CPUINFO_ARM_LINUX_VALID_MIDR;
cpuinfo_log_debug("cluster %"PRIu32" MIDR = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_midr[cluster]);
}
return true;
}
}
}
return false;
}
/*
* Initializes MIDR for leaders of core clusters using a heuristic for big.LITTLE systems:
* - If the only known MIDR is for the big core cluster, guess the matching MIDR for the LITTLE cluster.
* - Estimate which of the clusters is big using maximum frequency, if known, otherwise using system processor ID.
* - Initialize the MIDR for big and LITTLE core clusters using the guesstimates values.
*
* @param clusters_count - number of CPU core clusters detected in the SoC.
* @param cluster_with_midr_count - number of CPU core clusters in the SoC with known MIDR values.
* @param last_processor_with_midr - index of the last logical processor with known MIDR in the @p processors array.
* @param cluster_leaders - indices of core clusters' leaders in the @p processors array.
* @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
* and decoded core cluster (package_leader_id) information.
* Upon successful return, processors[i].midr for all core clusters' leaders contains
* the heuristically detected MIDR value.
* @param verify_midr - indicated whether the function should check that the MIDR values to be assigned to leaders of
* core clusters are consistent with known parts of their parsed values.
* Set if to false if the only MIDR value parsed from /proc/cpuinfo is for the last processor
* reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor.
*
* @retval true if this is a big.LITTLE system with only one known MIDR and the CPU core clusters' leaders were
* initialized with MIDR values.
* @retval false if this is not a big.LITTLE system.
*/
static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
uint32_t clusters_count,
uint32_t cluster_with_midr_count,
uint32_t last_processor_with_midr,
const uint32_t cluster_leaders[restrict static CLUSTERS_MAX],
struct cpuinfo_arm_linux_processor processors[restrict static last_processor_with_midr],
bool verify_midr)
{
if (clusters_count != 2 || cluster_with_midr_count != 1) {
/* Not a big.LITTLE system, or MIDR is known for both/neither clusters */
return false;
}
const uint32_t midr_flags =
(processors[processors[last_processor_with_midr].package_leader_id].flags & CPUINFO_ARM_LINUX_VALID_MIDR);
const uint32_t big_midr = processors[processors[last_processor_with_midr].package_leader_id].midr;
const uint32_t little_midr = midr_little_core_for_big(big_midr);
/* Default assumption: the first reported cluster is LITTLE cluster (this holds on most Linux kernels) */
uint32_t little_cluster_leader = cluster_leaders[0];
const uint32_t other_cluster_leader = cluster_leaders[1];
/* If maximum frequency is known for both clusters, assume LITTLE cluster is the one with lower frequency */
if (processors[little_cluster_leader].flags & processors[other_cluster_leader].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
if (processors[little_cluster_leader].max_frequency > processors[other_cluster_leader].max_frequency) {
little_cluster_leader = other_cluster_leader;
}
}
if (verify_midr) {
/* Verify known parts of MIDR */
for (uint32_t cluster = 0; cluster < clusters_count; cluster++) {
const uint32_t cluster_leader = cluster_leaders[cluster];
/* Create a mask of known midr bits */
uint32_t midr_mask = 0;
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) {
midr_mask |= CPUINFO_ARM_MIDR_IMPLEMENTER_MASK;
}
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) {
midr_mask |= CPUINFO_ARM_MIDR_VARIANT_MASK;
}
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_PART) {
midr_mask |= CPUINFO_ARM_MIDR_PART_MASK;
}
if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_REVISION) {
midr_mask |= CPUINFO_ARM_MIDR_REVISION_MASK;
}
/* Verify the bits under the mask */
const uint32_t midr = (cluster_leader == little_cluster_leader) ? little_midr : big_midr;
if ((processors[cluster_leader].midr ^ midr) & midr_mask) {
cpuinfo_log_debug(
"parsed MIDR %08"PRIu32" of cluster leader %"PRIu32" is inconsistent with expected value %08"PRIu32,
processors[cluster_leader].midr, cluster_leader, midr);
return false;
}
}
}
for (uint32_t c = 0; c < clusters_count; c++) {
/* Skip cluster with already assigned MIDR */
const uint32_t cluster_leader = cluster_leaders[c];
if (bitmask_all(processors[cluster_leader].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
continue;
}
const uint32_t midr = (cluster_leader == little_cluster_leader) ? little_midr : big_midr;
cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, cluster_leader, midr);
/* To be consistent, we copy the MIDR entirely, rather than by parts */
processors[cluster_leader].midr = midr;
processors[cluster_leader].flags |= midr_flags;
}
return true;
}
/*
* Initializes MIDR for leaders of core clusters in a single sequential scan:
* - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value.
* - Clusters following any reported MIDR value to have that MIDR value.
*
* @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value.
* @param processors_count - number of logical processor descriptions in the @p processors array.
* @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
* and decoded core cluster (package_leader_id) information.
* Upon successful return, processors[i].midr for all core clusters' leaders contains
* the assigned MIDR value.
*/
static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
uint32_t default_midr,
uint32_t processors_count,
struct cpuinfo_arm_linux_processor processors[restrict static processors_count])
{
uint32_t midr = default_midr;
for (uint32_t i = 0; i < processors_count; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (processors[i].package_leader_id == i) {
if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
midr = processors[i].midr;
} else {
cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr);
/* To be consistent, we copy the MIDR entirely, rather than by parts */
processors[i].midr = midr;
processors[i].flags |= CPUINFO_ARM_LINUX_VALID_MIDR;
}
}
}
}
}
/*
* Detects MIDR of each CPU core clusters' leader.
*
* @param[in] chipset - chipset (SoC) name information.
* @param max_processors - number of processor descriptions in the @p processors array.
* @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and
* PRESENT flags.
* @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency,
* and decoded core cluster (package_leader_id) information.
* Upon return, processors[i].midr for all clusters' leaders contains the MIDR value.
*
* @returns The number of core clusters
*/
uint32_t cpuinfo_arm_linux_detect_cluster_midr(
const struct cpuinfo_arm_chipset chipset[restrict static 1],
uint32_t max_processors,
uint32_t usable_processors,
struct cpuinfo_arm_linux_processor processors[restrict static max_processors])
{
uint32_t clusters_count = 0;
uint32_t cluster_leaders[CLUSTERS_MAX];
uint32_t last_processor_in_cpuinfo = max_processors;
uint32_t last_processor_with_midr = max_processors;
uint32_t processors_with_midr_count = 0;
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR) {
last_processor_in_cpuinfo = i;
}
if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PART)) {
last_processor_with_midr = i;
processors_with_midr_count += 1;
}
const uint32_t group_leader = processors[i].package_leader_id;
if (group_leader == i) {
if (clusters_count < CLUSTERS_MAX) {
cluster_leaders[clusters_count] = i;
}
clusters_count += 1;
} else {
/* Copy known bits of information to cluster leader */
if ((processors[i].flags & ~processors[group_leader].flags) & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) {
processors[group_leader].max_frequency = processors[i].max_frequency;
processors[group_leader].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY;
}
if (!bitmask_all(processors[group_leader].flags, CPUINFO_ARM_LINUX_VALID_MIDR) &&
bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR))
{
processors[group_leader].midr = processors[i].midr;
processors[group_leader].flags |= CPUINFO_ARM_LINUX_VALID_MIDR;
}
}
}
}
cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count);
/*
* Two relations between reported /proc/cpuinfo information, and cores is possible:
* - /proc/cpuinfo reports information for all or some of the cores below the corresponding
* "processor : <number>" lines. Information on offline cores may be missing.
* - /proc/cpuinfo reports information only once, after all "processor : <number>" lines.
* The reported information may relate to processor #0 or to the processor which
* executed the system calls to read /proc/cpuinfo. It is also indistinguishable
* from /proc/cpuinfo reporting information only for the last core (e.g. if all other
* cores are offline).
*
* We detect the second case by checking if /proc/cpuinfo contains valid MIDR only for one,
* last reported, processor. Note, that the last reported core may be not the last
* present & possible processor, as /proc/cpuinfo may non-report high-index offline cores.
*/
if (processors_with_midr_count == 1 && last_processor_in_cpuinfo == last_processor_with_midr && clusters_count > 1) {
/*
* There are multiple core clusters, but /proc/cpuinfo reported MIDR only for one
* processor, and we don't even know which logical processor this information refers to.
*
* We make three attempts to detect MIDR for all clusters:
* 1. Search tabulated MIDR values for chipsets which have heterogeneous clusters and ship with Linux
* kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values.
* 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration,
* and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known.
* 3. Initialize MIDRs for all core clusters to the only parsed MIDR value.
*/
cpuinfo_log_debug("the only reported MIDR can not be attributed to a particular processor");
if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
chipset, clusters_count, cluster_leaders, usable_processors, processors, false))
{
return clusters_count;
}
/* Try big.LITTLE heuristic */
if (cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
clusters_count, 1, last_processor_with_midr,
cluster_leaders, processors, false))
{
return clusters_count;
}
/* Fall back to sequential initialization of MIDR values for core clusters */
cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
processors[processors[last_processor_with_midr].package_leader_id].midr,
max_processors, processors);
} else if (processors_with_midr_count < usable_processors) {
/*
* /proc/cpuinfo reported MIDR only for some processors, and probably some core clusters do not have MIDR
* for any of the cores. Check if this is the case.
*/
uint32_t clusters_with_midr_count = 0;
for (uint32_t i = 0; i < max_processors; i++) {
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_MIDR)) {
if (processors[i].package_leader_id == i) {
clusters_with_midr_count += 1;
}
}
}
if (clusters_with_midr_count < clusters_count) {
/*
* /proc/cpuinfo reported MIDR only for some clusters, need to reconstruct others.
* We make three attempts to detect MIDR for clusters without it:
* 1. Search tabulated MIDR values for chipsets which have heterogeneous clusters and ship with Linux
* kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values.
* 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration,
* and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known.
* 3. Initialize MIDRs for core clusters in a single sequential scan:
* - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
* - Clusters following any reported MIDR value to have that MIDR value.
*/
if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
chipset, clusters_count, cluster_leaders, usable_processors, processors, true))
{
return clusters_count;
}
if (last_processor_with_midr != max_processors) {
/* Try big.LITTLE heuristic */
if (cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic(
clusters_count, processors_with_midr_count, last_processor_with_midr,
cluster_leaders, processors, true))
{
return clusters_count;
}
/* Fall back to sequential initialization of MIDR values for core clusters */
cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
processors[processors[last_processor_with_midr].package_leader_id].midr,
max_processors, processors);
}
}
}
return clusters_count;
}

View File

@@ -0,0 +1,619 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <alloca.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <mach/machine.h>
#include <cpuinfo.h>
#include <mach/api.h>
#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6
#endif
#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F
#endif
#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
#endif
#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
#endif
struct cpuinfo_arm_isa cpuinfo_isa = {
#if CPUINFO_ARCH_ARM
.thumb = true,
.thumb2 = true,
.thumbee = false,
.jazelle = false,
.armv5e = true,
.armv6 = true,
.armv6k = true,
.armv7 = true,
.vfpv2 = false,
.vfpv3 = true,
.d32 = true,
.wmmx = false,
.wmmx2 = false,
.neon = true,
#endif
#if CPUINFO_ARCH_ARM64
.aes = true,
.sha1 = true,
.sha2 = true,
.pmull = true,
.crc32 = true,
#endif
};
static uint32_t get_sys_info(int type_specifier, const char* name) {
size_t size = 0;
uint32_t result = 0;
int mib[2] = { CTL_HW, type_specifier };
if (sysctl(mib, 2, NULL, &size, NULL, 0) != 0) {
cpuinfo_log_info("sysctl(\"%s\") failed: %s", name, strerror(errno));
} else if (size == sizeof(uint32_t)) {
sysctl(mib, 2, &result, &size, NULL, 0);
cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", name, result, size);
} else {
cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", name);
}
return result;
}
static uint32_t get_sys_info_by_name(const char* type_specifier) {
size_t size = 0;
uint32_t result = 0;
if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) {
cpuinfo_log_info("sysctlbyname(\"%s\") failed: %s", type_specifier, strerror(errno));
} else if (size == sizeof(uint32_t)) {
sysctlbyname(type_specifier, &result, &size, NULL, 0);
cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", type_specifier, result, size);
} else {
cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", type_specifier);
}
return result;
}
static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) {
switch (cpu_family) {
case CPUFAMILY_ARM_SWIFT:
return cpuinfo_uarch_swift;
case CPUFAMILY_ARM_CYCLONE:
return cpuinfo_uarch_cyclone;
case CPUFAMILY_ARM_TYPHOON:
return cpuinfo_uarch_typhoon;
case CPUFAMILY_ARM_TWISTER:
return cpuinfo_uarch_twister;
case CPUFAMILY_ARM_HURRICANE:
return cpuinfo_uarch_hurricane;
case CPUFAMILY_ARM_MONSOON_MISTRAL:
/* 2x Monsoon + 4x Mistral cores */
return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
case CPUFAMILY_ARM_VORTEX_TEMPEST:
/* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */
return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
case CPUFAMILY_ARM_LIGHTNING_THUNDER:
/* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */
return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
/* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */
return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm;
default:
/* Use hw.cpusubtype for detection */
break;
}
#if CPUINFO_ARCH_ARM
switch (cpu_subtype) {
case CPU_SUBTYPE_ARM_V7:
return cpuinfo_uarch_cortex_a8;
case CPU_SUBTYPE_ARM_V7F:
return cpuinfo_uarch_cortex_a9;
case CPU_SUBTYPE_ARM_V7K:
return cpuinfo_uarch_cortex_a7;
default:
return cpuinfo_uarch_unknown;
}
#else
return cpuinfo_uarch_unknown;
#endif
}
static void decode_package_name(char* package_name) {
size_t size;
if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) {
cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
return;
}
char *machine_name = alloca(size);
if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) {
cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno));
return;
}
cpuinfo_log_debug("hw.machine: %s", machine_name);
char name[10];
uint32_t major = 0, minor = 0;
if (sscanf(machine_name, "%9[^,0123456789]%"SCNu32",%"SCNu32, name, &major, &minor) != 3) {
cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno));
return;
}
uint32_t chip_model = 0;
char suffix = '\0';
if (strcmp(name, "iPhone") == 0) {
/*
* iPhone 4 and up are supported:
* - iPhone 4 [A4]: iPhone3,1, iPhone3,2, iPhone3,3
* - iPhone 4S [A5]: iPhone4,1
* - iPhone 5 [A6]: iPhone5,1, iPhone5,2
* - iPhone 5c [A6]: iPhone5,3, iPhone5,4
* - iPhone 5s [A7]: iPhone6,1, iPhone6,2
* - iPhone 6 [A8]: iPhone7,2
* - iPhone 6 Plus [A8]: iPhone7,1
* - iPhone 6s [A9]: iPhone8,1
* - iPhone 6s Plus [A9]: iPhone8,2
* - iPhone SE [A9]: iPhone8,4
* - iPhone 7 [A10]: iPhone9,1, iPhone9,3
* - iPhone 7 Plus [A10]: iPhone9,2, iPhone9,4
* - iPhone 8 [A11]: iPhone10,1, iPhone10,4
* - iPhone 8 Plus [A11]: iPhone10,2, iPhone10,5
* - iPhone X [A11]: iPhone10,3, iPhone10,6
* - iPhone XS [A12]: iPhone11,2,
* - iPhone XS Max [A12]: iPhone11,4, iPhone11,6
* - iPhone XR [A12]: iPhone11,8
*/
chip_model = major + 1;
} else if (strcmp(name, "iPad") == 0) {
switch (major) {
/* iPad 2 and up are supported */
case 2:
/*
* iPad 2 [A5]: iPad2,1, iPad2,2, iPad2,3, iPad2,4
* iPad mini [A5]: iPad2,5, iPad2,6, iPad2,7
*/
chip_model = major + 3;
break;
case 3:
/*
* iPad 3rd Gen [A5X]: iPad3,1, iPad3,2, iPad3,3
* iPad 4th Gen [A6X]: iPad3,4, iPad3,5, iPad3,6
*/
chip_model = (minor <= 3) ? 5 : 6;
suffix = 'X';
break;
case 4:
/*
* iPad Air [A7]: iPad4,1, iPad4,2, iPad4,3
* iPad mini Retina [A7]: iPad4,4, iPad4,5, iPad4,6
* iPad mini 3 [A7]: iPad4,7, iPad4,8, iPad4,9
*/
chip_model = major + 3;
break;
case 5:
/*
* iPad mini 4 [A8]: iPad5,1, iPad5,2
* iPad Air 2 [A8X]: iPad5,3, iPad5,4
*/
chip_model = major + 3;
suffix = (minor <= 2) ? '\0' : 'X';
break;
case 6:
/*
* iPad Pro 9.7" [A9X]: iPad6,3, iPad6,4
* iPad Pro [A9X]: iPad6,7, iPad6,8
* iPad 5th Gen [A9]: iPad6,11, iPad6,12
*/
chip_model = major + 3;
suffix = minor <= 8 ? 'X' : '\0';
break;
case 7:
/*
* iPad Pro 12.9" [A10X]: iPad7,1, iPad7,2
* iPad Pro 10.5" [A10X]: iPad7,3, iPad7,4
* iPad 6th Gen [A10]: iPad7,5, iPad7,6
*/
chip_model = major + 3;
suffix = minor <= 4 ? 'X' : '\0';
break;
default:
cpuinfo_log_info("unknown iPad: %s", machine_name);
break;
}
} else if (strcmp(name, "iPod") == 0) {
switch (major) {
case 5:
chip_model = 5;
break;
/* iPod touch (5th Gen) [A5]: iPod5,1 */
case 7:
/* iPod touch (6th Gen, 2015) [A8]: iPod7,1 */
chip_model = 8;
break;
default:
cpuinfo_log_info("unknown iPod: %s", machine_name);
break;
}
} else {
cpuinfo_log_info("unknown device: %s", machine_name);
}
if (chip_model != 0) {
snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%"PRIu32"%c", chip_model, suffix);
}
}
void cpuinfo_arm_mach_init(void) {
struct cpuinfo_processor* processors = NULL;
struct cpuinfo_core* cores = NULL;
struct cpuinfo_cluster* clusters = NULL;
struct cpuinfo_package* packages = NULL;
struct cpuinfo_uarch_info* uarchs = NULL;
struct cpuinfo_cache* l1i = NULL;
struct cpuinfo_cache* l1d = NULL;
struct cpuinfo_cache* l2 = NULL;
struct cpuinfo_cache* l3 = NULL;
struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
if (processors == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads);
goto cleanup;
}
cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
if (cores == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores);
goto cleanup;
}
packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
if (packages == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" packages",
mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages);
goto cleanup;
}
const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
for (uint32_t i = 0; i < mach_topology.packages; i++) {
packages[i] = (struct cpuinfo_package) {
.processor_start = i * threads_per_package,
.processor_count = threads_per_package,
.core_start = i * cores_per_package,
.core_count = cores_per_package,
};
decode_package_name(packages[i].name);
}
const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
const uint32_t cpu_type = get_sys_info_by_name("hw.cputype");
const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype");
switch (cpu_type) {
case CPU_TYPE_ARM64:
cpuinfo_isa.aes = true;
cpuinfo_isa.sha1 = true;
cpuinfo_isa.sha2 = true;
cpuinfo_isa.pmull = true;
cpuinfo_isa.crc32 = true;
break;
#if CPUINFO_ARCH_ARM
case CPU_TYPE_ARM:
switch (cpu_subtype) {
case CPU_SUBTYPE_ARM_V8:
cpuinfo_isa.armv8 = true;
cpuinfo_isa.aes = true;
cpuinfo_isa.sha1 = true;
cpuinfo_isa.sha2 = true;
cpuinfo_isa.pmull = true;
cpuinfo_isa.crc32 = true;
/* Fall-through to add ARMv7S features */
case CPU_SUBTYPE_ARM_V7S:
case CPU_SUBTYPE_ARM_V7K:
cpuinfo_isa.fma = true;
/* Fall-through to add ARMv7F features */
case CPU_SUBTYPE_ARM_V7F:
cpuinfo_isa.armv7mp = true;
cpuinfo_isa.fp16 = true;
/* Fall-through to add ARMv7 features */
case CPU_SUBTYPE_ARM_V7:
break;
default:
break;
}
break;
#endif
}
/*
* Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via
* sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments
* (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf),
* but on new iOS versions these calls just fail with EPERM.
*
* Thus, we whitelist CPUs known to support these instructions.
*/
switch (cpu_family) {
case CPUFAMILY_ARM_MONSOON_MISTRAL:
case CPUFAMILY_ARM_VORTEX_TEMPEST:
case CPUFAMILY_ARM_LIGHTNING_THUNDER:
case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
#if CPUINFO_ARCH_ARM64
cpuinfo_isa.atomics = true;
#endif
cpuinfo_isa.fp16arith = true;
}
/*
* There does not yet seem to exist an OS mechanism to detect support for
* ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs
* known to support these instruction.
*/
switch (cpu_family) {
case CPUFAMILY_ARM_LIGHTNING_THUNDER:
case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
cpuinfo_isa.dot = true;
}
uint32_t num_clusters = 1;
for (uint32_t i = 0; i < mach_topology.cores; i++) {
cores[i] = (struct cpuinfo_core) {
.processor_start = i * threads_per_core,
.processor_count = threads_per_core,
.core_id = i % cores_per_package,
.package = packages + i / cores_per_package,
.vendor = cpuinfo_vendor_apple,
.uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores),
};
if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
num_clusters++;
}
}
for (uint32_t i = 0; i < mach_topology.threads; i++) {
const uint32_t smt_id = i % threads_per_core;
const uint32_t core_id = i / threads_per_core;
const uint32_t package_id = i / threads_per_package;
processors[i].smt_id = smt_id;
processors[i].core = &cores[core_id];
processors[i].package = &packages[package_id];
}
clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster));
if (clusters == NULL) {
cpuinfo_log_error(
"failed to allocate %zu bytes for descriptions of %"PRIu32" clusters",
num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
goto cleanup;
}
uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
if (uarchs == NULL) {
cpuinfo_log_error(
"failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
num_clusters * sizeof(enum cpuinfo_uarch), num_clusters);
goto cleanup;
}
uint32_t cluster_idx = UINT32_MAX;
for (uint32_t i = 0; i < mach_topology.cores; i++) {
if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
cluster_idx++;
uarchs[cluster_idx] = (struct cpuinfo_uarch_info) {
.uarch = cores[i].uarch,
.processor_count = 1,
.core_count = 1,
};
clusters[cluster_idx] = (struct cpuinfo_cluster) {
.processor_start = i * threads_per_core,
.processor_count = 1,
.core_start = i,
.core_count = 1,
.cluster_id = cluster_idx,
.package = cores[i].package,
.vendor = cores[i].vendor,
.uarch = cores[i].uarch,
};
} else {
uarchs[cluster_idx].processor_count++;
uarchs[cluster_idx].core_count++;
clusters[cluster_idx].processor_count++;
clusters[cluster_idx].core_count++;
}
cores[i].cluster = &clusters[cluster_idx];
}
for (uint32_t i = 0; i < mach_topology.threads; i++) {
const uint32_t core_id = i / threads_per_core;
processors[i].cluster = cores[core_id].cluster;
}
for (uint32_t i = 0; i < mach_topology.packages; i++) {
packages[i].cluster_start = 0;
packages[i].cluster_count = num_clusters;
}
const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE");
const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE");
const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE");
const uint32_t l2_cache_size = get_sys_info(HW_L2CACHESIZE, "HW_L2CACHESIZE");
const uint32_t l3_cache_size = get_sys_info(HW_L3CACHESIZE, "HW_L3CACHESIZE");
const uint32_t l1_cache_associativity = 4;
const uint32_t l2_cache_associativity = 8;
const uint32_t l3_cache_associativity = 16;
const uint32_t cache_partitions = 1;
const uint32_t cache_flags = 0;
uint32_t threads_per_l1 = 0, l1_count = 0;
if (l1i_cache_size != 0 || l1d_cache_size != 0) {
/* Assume L1 caches are private to each core */
threads_per_l1 = 1;
l1_count = mach_topology.threads / threads_per_l1;
cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count);
}
uint32_t threads_per_l2 = 0, l2_count = 0;
if (l2_cache_size != 0) {
/* Assume L2 cache is shared between all cores */
threads_per_l2 = mach_topology.cores;
l2_count = 1;
cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
}
uint32_t threads_per_l3 = 0, l3_count = 0;
if (l3_cache_size != 0) {
/* Assume L3 cache is shared between all cores */
threads_per_l3 = mach_topology.cores;
l3_count = 1;
cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
}
if (l1i_cache_size != 0) {
l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
if (l1i == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
l1_count * sizeof(struct cpuinfo_cache), l1_count);
goto cleanup;
}
for (uint32_t c = 0; c < l1_count; c++) {
l1i[c] = (struct cpuinfo_cache) {
.size = l1i_cache_size,
.associativity = l1_cache_associativity,
.sets = l1i_cache_size / (l1_cache_associativity * cacheline_size),
.partitions = cache_partitions,
.line_size = cacheline_size,
.flags = cache_flags,
.processor_start = c * threads_per_l1,
.processor_count = threads_per_l1,
};
}
for (uint32_t t = 0; t < mach_topology.threads; t++) {
processors[t].cache.l1i = &l1i[t / threads_per_l1];
}
}
if (l1d_cache_size != 0) {
l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
if (l1d == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
l1_count * sizeof(struct cpuinfo_cache), l1_count);
goto cleanup;
}
for (uint32_t c = 0; c < l1_count; c++) {
l1d[c] = (struct cpuinfo_cache) {
.size = l1d_cache_size,
.associativity = l1_cache_associativity,
.sets = l1d_cache_size / (l1_cache_associativity * cacheline_size),
.partitions = cache_partitions,
.line_size = cacheline_size,
.flags = cache_flags,
.processor_start = c * threads_per_l1,
.processor_count = threads_per_l1,
};
}
for (uint32_t t = 0; t < mach_topology.threads; t++) {
processors[t].cache.l1d = &l1d[t / threads_per_l1];
}
}
if (l2_count != 0) {
l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
if (l2 == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
l2_count * sizeof(struct cpuinfo_cache), l2_count);
goto cleanup;
}
for (uint32_t c = 0; c < l2_count; c++) {
l2[c] = (struct cpuinfo_cache) {
.size = l2_cache_size,
.associativity = l2_cache_associativity,
.sets = l2_cache_size / (l2_cache_associativity * cacheline_size),
.partitions = cache_partitions,
.line_size = cacheline_size,
.flags = cache_flags,
.processor_start = c * threads_per_l2,
.processor_count = threads_per_l2,
};
}
for (uint32_t t = 0; t < mach_topology.threads; t++) {
processors[t].cache.l2 = &l2[0];
}
}
if (l3_count != 0) {
l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
if (l3 == NULL) {
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
l3_count * sizeof(struct cpuinfo_cache), l3_count);
goto cleanup;
}
for (uint32_t c = 0; c < l3_count; c++) {
l3[c] = (struct cpuinfo_cache) {
.size = l3_cache_size,
.associativity = l3_cache_associativity,
.sets = l3_cache_size / (l3_cache_associativity * cacheline_size),
.partitions = cache_partitions,
.line_size = cacheline_size,
.flags = cache_flags,
.processor_start = c * threads_per_l3,
.processor_count = threads_per_l3,
};
}
for (uint32_t t = 0; t < mach_topology.threads; t++) {
processors[t].cache.l3 = &l3[0];
}
}
/* Commit changes */
cpuinfo_processors = processors;
cpuinfo_cores = cores;
cpuinfo_clusters = clusters;
cpuinfo_packages = packages;
cpuinfo_uarchs = uarchs;
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
cpuinfo_processors_count = mach_topology.threads;
cpuinfo_cores_count = mach_topology.cores;
cpuinfo_clusters_count = num_clusters;
cpuinfo_packages_count = mach_topology.packages;
cpuinfo_uarchs_count = num_clusters;
cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
__sync_synchronize();
cpuinfo_is_initialized = true;
processors = NULL;
cores = NULL;
clusters = NULL;
packages = NULL;
uarchs = NULL;
l1i = l1d = l2 = l3 = NULL;
cleanup:
free(processors);
free(cores);
free(clusters);
free(packages);
free(uarchs);
free(l1i);
free(l1d);
free(l2);
free(l3);
}

257
dep/cpuinfo/src/arm/midr.h Normal file
View File

@@ -0,0 +1,257 @@
#pragma once
#include <stdint.h>
#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000)
#define CPUINFO_ARM_MIDR_VARIANT_MASK UINT32_C(0x00F00000)
#define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000)
#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0)
#define CPUINFO_ARM_MIDR_REVISION_MASK UINT32_C(0x0000000F)
#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24
#define CPUINFO_ARM_MIDR_VARIANT_OFFSET 20
#define CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET 16
#define CPUINFO_ARM_MIDR_PART_OFFSET 4
#define CPUINFO_ARM_MIDR_REVISION_OFFSET 0
#define CPUINFO_ARM_MIDR_ARM1156 UINT32_C(0x410FB560)
#define CPUINFO_ARM_MIDR_CORTEX_A7 UINT32_C(0x410FC070)
#define CPUINFO_ARM_MIDR_CORTEX_A9 UINT32_C(0x410FC090)
#define CPUINFO_ARM_MIDR_CORTEX_A15 UINT32_C(0x410FC0F0)
#define CPUINFO_ARM_MIDR_CORTEX_A17 UINT32_C(0x410FC0E0)
#define CPUINFO_ARM_MIDR_CORTEX_A35 UINT32_C(0x410FD040)
#define CPUINFO_ARM_MIDR_CORTEX_A53 UINT32_C(0x410FD030)
#define CPUINFO_ARM_MIDR_CORTEX_A55 UINT32_C(0x410FD050)
#define CPUINFO_ARM_MIDR_CORTEX_A57 UINT32_C(0x410FD070)
#define CPUINFO_ARM_MIDR_CORTEX_A72 UINT32_C(0x410FD080)
#define CPUINFO_ARM_MIDR_CORTEX_A73 UINT32_C(0x410FD090)
#define CPUINFO_ARM_MIDR_CORTEX_A75 UINT32_C(0x410FD0A0)
#define CPUINFO_ARM_MIDR_KRYO280_GOLD UINT32_C(0x51AF8001)
#define CPUINFO_ARM_MIDR_KRYO280_SILVER UINT32_C(0x51AF8014)
#define CPUINFO_ARM_MIDR_KRYO385_GOLD UINT32_C(0x518F802D)
#define CPUINFO_ARM_MIDR_KRYO385_SILVER UINT32_C(0x518F803C)
#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
#define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050)
#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010)
#define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030)
inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
}
inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
}
inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
}
inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
}
inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
}
inline static uint32_t midr_get_variant(uint32_t midr) {
return (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) >> CPUINFO_ARM_MIDR_VARIANT_OFFSET;
}
inline static uint32_t midr_get_implementer(uint32_t midr) {
return (midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) >> CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET;
}
inline static uint32_t midr_get_part(uint32_t midr) {
return (midr & CPUINFO_ARM_MIDR_PART_MASK) >> CPUINFO_ARM_MIDR_PART_OFFSET;
}
inline static uint32_t midr_get_revision(uint32_t midr) {
return (midr & CPUINFO_ARM_MIDR_REVISION_MASK) >> CPUINFO_ARM_MIDR_REVISION_OFFSET;
}
inline static uint32_t midr_copy_implementer(uint32_t midr, uint32_t other_midr) {
return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | (other_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
}
inline static uint32_t midr_copy_variant(uint32_t midr, uint32_t other_midr) {
return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | (other_midr & CPUINFO_ARM_MIDR_VARIANT_MASK);
}
inline static uint32_t midr_copy_architecture(uint32_t midr, uint32_t other_midr) {
return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | (other_midr & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
}
inline static uint32_t midr_copy_part(uint32_t midr, uint32_t other_midr) {
return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | (other_midr & CPUINFO_ARM_MIDR_PART_MASK);
}
inline static uint32_t midr_copy_revision(uint32_t midr, uint32_t other_midr) {
return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | (other_midr & CPUINFO_ARM_MIDR_REVISION_MASK);
}
inline static bool midr_is_arm1156(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_ARM1156 & uarch_mask);
}
inline static bool midr_is_arm11(uint32_t midr) {
return (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | 0x0000F000)) == UINT32_C(0x4100B000);
}
inline static bool midr_is_cortex_a9(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A9 & uarch_mask);
}
inline static bool midr_is_scorpion(uint32_t midr) {
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x510000F0):
case UINT32_C(0x510002D0):
return true;
default:
return false;
}
}
inline static bool midr_is_krait(uint32_t midr) {
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x510004D0):
case UINT32_C(0x510006F0):
return true;
default:
return false;
}
}
inline static bool midr_is_cortex_a53(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A53 & uarch_mask);
}
inline static bool midr_is_qualcomm_cortex_a53_silver(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_SILVER & uarch_mask);
}
inline static bool midr_is_qualcomm_cortex_a55_silver(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO385_SILVER & uarch_mask);
}
inline static bool midr_is_kryo280_gold(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_GOLD & uarch_mask);
}
inline static bool midr_is_kryo_silver(uint32_t midr) {
const uint32_t uarch_mask =
CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & uarch_mask) {
case CPUINFO_ARM_MIDR_KRYO_SILVER_820:
case CPUINFO_ARM_MIDR_KRYO_SILVER_821:
return true;
default:
return false;
}
}
inline static bool midr_is_kryo_gold(uint32_t midr) {
const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO_GOLD & uarch_mask);
}
inline static uint32_t midr_score_core(uint32_t midr) {
const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & core_mask) {
case UINT32_C(0x53000030): /* Exynos M4 */
case UINT32_C(0x53000040): /* Exynos M5 */
case UINT32_C(0x4100D440): /* Cortex-X1 */
/* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */
return 6;
case UINT32_C(0x4E000030): /* Denver 2 */
case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
case UINT32_C(0x53000020): /* Exynos M3 */
case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
case UINT32_C(0x51008020): /* Kryo 385 Gold */
case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
case UINT32_C(0x51002050): /* Kryo Gold */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x4100D410): /* Cortex-A78 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0A0): /* Cortex-A75 */
case UINT32_C(0x4100D090): /* Cortex-A73 */
case UINT32_C(0x4100D080): /* Cortex-A72 */
#if CPUINFO_ARCH_ARM
case UINT32_C(0x4100C0F0): /* Cortex-A15 */
case UINT32_C(0x4100C0E0): /* Cortex-A17 */
case UINT32_C(0x4100C0D0): /* Rockchip RK3288 cores */
case UINT32_C(0x4100C0C0): /* Cortex-A12 */
#endif /* CPUINFO_ARCH_ARM */
/* These cores are always in big role */
return 5;
case UINT32_C(0x4100D070): /* Cortex-A57 */
/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */
return 4;
#if CPUINFO_ARCH_ARM64
case UINT32_C(0x4100D060): /* Cortex-A65 */
#endif /* CPUINFO_ARCH_ARM64 */
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D030): /* Cortex-A53 */
/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
return 2;
case UINT32_C(0x4100D040): /* Cortex-A35 */
#if CPUINFO_ARCH_ARM
case UINT32_C(0x4100C070): /* Cortex-A7 */
#endif /* CPUINFO_ARCH_ARM */
case UINT32_C(0x51008050): /* Kryo 485 Silver */
case UINT32_C(0x51008030): /* Kryo 385 Silver */
case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
case UINT32_C(0x51002010): /* Kryo Silver (Snapdragon 821) */
/* These cores are always in LITTLE core */
return 1;
default:
/*
* Unknown cores, or cores which do not have big/LITTLE roles.
* To be future-proof w.r.t. cores not yet recognized in cpuinfo, assume position between
* Cortex-A57/A72/A73/A75 and Cortex-A53/A55. Then at least future cores paired with
* one of these known cores will be properly scored.
*/
return 3;
}
}
inline static uint32_t midr_little_core_for_big(uint32_t midr) {
const uint32_t core_mask =
CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & core_mask) {
case CPUINFO_ARM_MIDR_CORTEX_A75:
return CPUINFO_ARM_MIDR_CORTEX_A55;
case CPUINFO_ARM_MIDR_CORTEX_A73:
case CPUINFO_ARM_MIDR_CORTEX_A72:
case CPUINFO_ARM_MIDR_CORTEX_A57:
case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
return CPUINFO_ARM_MIDR_CORTEX_A53;
case CPUINFO_ARM_MIDR_CORTEX_A17:
case CPUINFO_ARM_MIDR_CORTEX_A15:
return CPUINFO_ARM_MIDR_CORTEX_A7;
case CPUINFO_ARM_MIDR_KRYO280_GOLD:
return CPUINFO_ARM_MIDR_KRYO280_SILVER;
case CPUINFO_ARM_MIDR_KRYO_GOLD:
return CPUINFO_ARM_MIDR_KRYO_SILVER_820;
case CPUINFO_ARM_MIDR_DENVER2:
return CPUINFO_ARM_MIDR_CORTEX_A57;
default:
return midr;
}
}

133
dep/cpuinfo/src/arm/tlb.c Normal file
View File

@@ -0,0 +1,133 @@
switch (uarch) {
case cpuinfo_uarch_cortex_a5:
/*
* Cortex-A5 Technical Reference Manual:
* 6.3.1. Micro TLB
* The first level of caching for the page table information is a micro TLB of
* 10 entries that is implemented on each of the instruction and data sides.
* 6.3.2. Main TLB
* Misses from the instruction and data micro TLBs are handled by a unified main TLB.
* The main TLB is 128-entry two-way set-associative.
*/
break;
case cpuinfo_uarch_cortex_a7:
/*
* Cortex-A7 MPCore Technical Reference Manual:
* 5.3.1. Micro TLB
* The first level of caching for the page table information is a micro TLB of
* 10 entries that is implemented on each of the instruction and data sides.
* 5.3.2. Main TLB
* Misses from the micro TLBs are handled by a unified main TLB. This is a 256-entry 2-way
* set-associative structure. The main TLB supports all the VMSAv7 page sizes of
* 4KB, 64KB, 1MB and 16MB in addition to the LPAE page sizes of 2MB and 1G.
*/
break;
case cpuinfo_uarch_cortex_a8:
/*
* Cortex-A8 Technical Reference Manual:
* 6.1. About the MMU
* The MMU features include the following:
* - separate, fully-associative, 32-entry data and instruction TLBs
* - TLB entries that support 4KB, 64KB, 1MB, and 16MB pages
*/
break;
case cpuinfo_uarch_cortex_a9:
/*
* ARM CortexA9 Technical Reference Manual:
* 6.2.1 Micro TLB
* The first level of caching for the page table information is a micro TLB of 32 entries on the data side,
* and configurable 32 or 64 entries on the instruction side.
* 6.2.2 Main TLB
* The main TLB is implemented as a combination of:
* - A fully-associative, lockable array of four elements.
* - A 2-way associative structure of 2x32, 2x64, 2x128 or 2x256 entries.
*/
break;
case cpuinfo_uarch_cortex_a15:
/*
* ARM Cortex-A15 MPCore Processor Technical Reference Manual:
* 5.2.1. L1 instruction TLB
* The L1 instruction TLB is a 32-entry fully-associative structure. This TLB caches entries at the 4KB
* granularity of Virtual Address (VA) to Physical Address (PA) mapping only. If the page tables map the
* memory region to a larger granularity than 4K, it only allocates one mapping for the particular 4K region
* to which the current access corresponds.
* 5.2.2. L1 data TLB
* There are two separate 32-entry fully-associative TLBs that are used for data loads and stores,
* respectively. Similar to the L1 instruction TLB, both of these cache entries at the 4KB granularity of
* VA to PA mappings only. At implementation time, the Cortex-A15 MPCore processor can be configured with
* the -l1tlb_1m option, to have the L1 data TLB cache entries at both the 4KB and 1MB granularity.
* With this configuration, any translation that results in a 1MB or larger page is cached in the L1 data
* TLB as a 1MB entry. Any translation that results in a page smaller than 1MB is cached in the L1 data TLB
* as a 4KB entry. By default, all translations are cached in the L1 data TLB as a 4KB entry.
* 5.2.3. L2 TLB
* Misses from the L1 instruction and data TLBs are handled by a unified L2 TLB. This is a 512-entry 4-way
* set-associative structure. The L2 TLB supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB in
* addition to the LPAE page sizes of 2MB and 1GB.
*/
break;
case cpuinfo_uarch_cortex_a17:
/*
* ARM Cortex-A17 MPCore Processor Technical Reference Manual:
* 5.2.1. Instruction micro TLB
* The instruction micro TLB is implemented as a 32, 48 or 64 entry, fully-associative structure. This TLB
* caches entries at the 4KB and 1MB granularity of Virtual Address (VA) to Physical Address (PA) mapping
* only. If the translation tables map the memory region to a larger granularity than 4KB or 1MB, it only
* allocates one mapping for the particular 4KB region to which the current access corresponds.
* 5.2.2. Data micro TLB
* The data micro TLB is a 32 entry fully-associative TLB that is used for data loads and stores. The cache
* entries have a 4KB and 1MB granularity of VA to PA mappings only.
* 5.2.3. Unified main TLB
* Misses from the instruction and data micro TLBs are handled by a unified main TLB. This is a 1024 entry
* 4-way set-associative structure. The main TLB supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB
* in addition to the LPAE page sizes of 2MB and 1GB.
*/
break;
case cpuinfo_uarch_cortex_a35:
/*
* ARM CortexA35 Processor Technical Reference Manual:
* A6.2 TLB Organization
* Micro TLB
* The first level of caching for the translation table information is a micro TLB of ten entries that
* is implemented on each of the instruction and data sides.
* Main TLB
* A unified main TLB handles misses from the micro TLBs. It has a 512-entry, 2-way, set-associative
* structure and supports all VMSAv8 block sizes, except 1GB. If it fetches a 1GB block, the TLB splits
* it into 512MB blocks and stores the appropriate block for the lookup.
*/
break;
case cpuinfo_uarch_cortex_a53:
/*
* ARM Cortex-A53 MPCore Processor Technical Reference Manual:
* 5.2.1. Micro TLB
* The first level of caching for the translation table information is a micro TLB of ten entries that is
* implemented on each of the instruction and data sides.
* 5.2.2. Main TLB
* A unified main TLB handles misses from the micro TLBs. This is a 512-entry, 4-way, set-associative
* structure. The main TLB supports all VMSAv8 block sizes, except 1GB. If a 1GB block is fetched, it is
* split into 512MB blocks and the appropriate block for the lookup stored.
*/
break;
case cpuinfo_uarch_cortex_a57:
/*
* ARM® Cortex-A57 MPCore Processor Technical Reference Manual:
* 5.2.1 L1 instruction TLB
* The L1 instruction TLB is a 48-entry fully-associative structure. This TLB caches entries of three
* different page sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings. If the page tables map the memory
* region to a larger granularity than 1MB, it only allocates one mapping for the particular 1MB region to
* which the current access corresponds.
* 5.2.2 L1 data TLB
* The L1 data TLB is a 32-entry fully-associative TLB that is used for data loads and stores. This TLB
* caches entries of three different page sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings.
* 5.2.3 L2 TLB
* Misses from the L1 instruction and data TLBs are handled by a unified L2 TLB. This is a 1024-entry 4-way
* set-associative structure. The L2 TLB supports the page sizes of 4K, 64K, 1MB and 16MB. It also supports
* page sizes of 2MB and 1GB for the long descriptor format translation in AArch32 state and in AArch64 state
* when using the 4KB translation granule. In addition, the L2 TLB supports the 512MB page map size defined
* for the AArch64 translations that use a 64KB translation granule.
*/
break;
}

367
dep/cpuinfo/src/arm/uarch.c Normal file
View File

@@ -0,0 +1,367 @@
#include <stdint.h>
#include <arm/api.h>
#include <arm/midr.h>
#include <cpuinfo/log.h>
void cpuinfo_arm_decode_vendor_uarch(
uint32_t midr,
#if CPUINFO_ARCH_ARM
bool has_vfpv4,
#endif /* CPUINFO_ARCH_ARM */
#ifndef _MSC_VER
enum cpuinfo_vendor vendor[restrict static 1],
enum cpuinfo_uarch uarch[restrict static 1])
#else
enum cpuinfo_vendor vendor[1],
enum cpuinfo_uarch uarch[1])
#endif
{
switch (midr_get_implementer(midr)) {
case 'A':
*vendor = cpuinfo_vendor_arm;
switch (midr_get_part(midr)) {
#if CPUINFO_ARCH_ARM
case 0xC05:
*uarch = cpuinfo_uarch_cortex_a5;
break;
case 0xC07:
*uarch = cpuinfo_uarch_cortex_a7;
break;
case 0xC08:
*uarch = cpuinfo_uarch_cortex_a8;
break;
case 0xC09:
*uarch = cpuinfo_uarch_cortex_a9;
break;
case 0xC0C:
*uarch = cpuinfo_uarch_cortex_a12;
break;
case 0xC0E:
*uarch = cpuinfo_uarch_cortex_a17;
break;
case 0xC0D:
/*
* Rockchip RK3288 only.
* Core information is ambiguous: some sources specify Cortex-A12, others - Cortex-A17.
* Assume it is Cortex-A12.
*/
*uarch = cpuinfo_uarch_cortex_a12;
break;
case 0xC0F:
*uarch = cpuinfo_uarch_cortex_a15;
break;
#endif /* CPUINFO_ARCH_ARM */
case 0xD01:
*uarch = cpuinfo_uarch_cortex_a32;
break;
case 0xD03:
*uarch = cpuinfo_uarch_cortex_a53;
break;
case 0xD04:
*uarch = cpuinfo_uarch_cortex_a35;
break;
case 0xD05:
// Note: use Variant, not Revision, field
*uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ?
cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55;
break;
case 0xD06:
*uarch = cpuinfo_uarch_cortex_a65;
break;
case 0xD07:
*uarch = cpuinfo_uarch_cortex_a57;
break;
case 0xD08:
*uarch = cpuinfo_uarch_cortex_a72;
break;
case 0xD09:
*uarch = cpuinfo_uarch_cortex_a73;
break;
case 0xD0A:
*uarch = cpuinfo_uarch_cortex_a75;
break;
case 0xD0B:
*uarch = cpuinfo_uarch_cortex_a76;
break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xD0C:
*uarch = cpuinfo_uarch_neoverse_n1;
break;
#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
case 0xD0D:
*uarch = cpuinfo_uarch_cortex_a77;
break;
case 0xD0E: /* Cortex-A76AE */
*uarch = cpuinfo_uarch_cortex_a76;
break;
case 0xD41: /* Cortex-A78 */
*uarch = cpuinfo_uarch_cortex_a78;
break;
case 0xD44: /* Cortex-X1 */
*uarch = cpuinfo_uarch_cortex_x1;
break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xD4A:
*uarch = cpuinfo_uarch_neoverse_e1;
break;
#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
default:
switch (midr_get_part(midr) >> 8) {
#if CPUINFO_ARCH_ARM
case 7:
*uarch = cpuinfo_uarch_arm7;
break;
case 9:
*uarch = cpuinfo_uarch_arm9;
break;
case 11:
*uarch = cpuinfo_uarch_arm11;
break;
#endif /* CPUINFO_ARCH_ARM */
default:
cpuinfo_log_warning("unknown ARM CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
}
break;
case 'B':
*vendor = cpuinfo_vendor_broadcom;
switch (midr_get_part(midr)) {
case 0x00F:
*uarch = cpuinfo_uarch_brahma_b15;
break;
case 0x100:
*uarch = cpuinfo_uarch_brahma_b53;
break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0x516:
/* Broadcom Vulkan was sold to Cavium before it reached the market, so we identify it as Cavium ThunderX2 */
*vendor = cpuinfo_vendor_cavium;
*uarch = cpuinfo_uarch_thunderx2;
break;
#endif
default:
cpuinfo_log_warning("unknown Broadcom CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 'C':
*vendor = cpuinfo_vendor_cavium;
switch (midr_get_part(midr)) {
case 0x0A0: /* ThunderX */
case 0x0A1: /* ThunderX 88XX */
case 0x0A2: /* ThunderX 81XX */
case 0x0A3: /* ThunderX 83XX */
*uarch = cpuinfo_uarch_thunderx;
break;
case 0x0AF: /* ThunderX2 99XX */
*uarch = cpuinfo_uarch_thunderx2;
break;
default:
cpuinfo_log_warning("unknown Cavium CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#endif
case 'H':
*vendor = cpuinfo_vendor_huawei;
switch (midr_get_part(midr)) {
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xD01: /* Kunpeng 920 series */
*uarch = cpuinfo_uarch_taishan_v110;
break;
#endif
case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a76;
break;
default:
cpuinfo_log_warning("unknown Huawei CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#if CPUINFO_ARCH_ARM
case 'i':
*vendor = cpuinfo_vendor_intel;
switch (midr_get_part(midr) >> 8) {
case 2: /* PXA 210/25X/26X */
case 4: /* PXA 27X */
case 6: /* PXA 3XX */
*uarch = cpuinfo_uarch_xscale;
break;
default:
cpuinfo_log_warning("unknown Intel CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#endif /* CPUINFO_ARCH_ARM */
case 'N':
*vendor = cpuinfo_vendor_nvidia;
switch (midr_get_part(midr)) {
case 0x000:
*uarch = cpuinfo_uarch_denver;
break;
case 0x003:
*uarch = cpuinfo_uarch_denver2;
break;
case 0x004:
*uarch = cpuinfo_uarch_carmel;
break;
default:
cpuinfo_log_warning("unknown Nvidia CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#if !defined(__ANDROID__)
case 'P':
*vendor = cpuinfo_vendor_apm;
switch (midr_get_part(midr)) {
case 0x000:
*uarch = cpuinfo_uarch_xgene;
break;
default:
cpuinfo_log_warning("unknown Applied Micro CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#endif
case 'Q':
*vendor = cpuinfo_vendor_qualcomm;
switch (midr_get_part(midr)) {
#if CPUINFO_ARCH_ARM
case 0x00F:
/* Mostly Scorpions, but some Cortex A5 may report this value as well */
if (has_vfpv4) {
/* Unlike Scorpion, Cortex-A5 comes with VFPv4 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a5;
} else {
*uarch = cpuinfo_uarch_scorpion;
}
break;
case 0x02D: /* Dual-core Scorpions */
*uarch = cpuinfo_uarch_scorpion;
break;
case 0x04D:
/*
* Dual-core Krait:
* - r1p0 -> Krait 200
* - r1p4 -> Krait 200
* - r2p0 -> Krait 300
*/
case 0x06F:
/*
* Quad-core Krait:
* - r0p1 -> Krait 200
* - r0p2 -> Krait 200
* - r1p0 -> Krait 300
* - r2p0 -> Krait 400 (Snapdragon 800 MSMxxxx)
* - r2p1 -> Krait 400 (Snapdragon 801 MSMxxxxPRO)
* - r3p1 -> Krait 450
*/
*uarch = cpuinfo_uarch_krait;
break;
#endif /* CPUINFO_ARCH_ARM */
case 0x201: /* Qualcomm Snapdragon 821: Low-power Kryo "Silver" */
case 0x205: /* Qualcomm Snapdragon 820 & 821: High-performance Kryo "Gold" */
case 0x211: /* Qualcomm Snapdragon 820: Low-power Kryo "Silver" */
*uarch = cpuinfo_uarch_kryo;
break;
case 0x800: /* High-performance Kryo 260 (r10p2) / Kryo 280 (r10p1) "Gold" -> Cortex-A73 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a73;
break;
case 0x801: /* Low-power Kryo 260 / 280 "Silver" -> Cortex-A53 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a53;
break;
case 0x802: /* High-performance Kryo 385 "Gold" -> Cortex-A75 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a75;
break;
case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a55r0;
break;
case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a76;
break;
case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a55;
break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xC00:
*uarch = cpuinfo_uarch_falkor;
break;
case 0xC01:
*uarch = cpuinfo_uarch_saphira;
break;
#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
default:
cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
case 'S':
*vendor = cpuinfo_vendor_samsung;
switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case 0x00100010:
/*
* Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has:
* - CPU variant 0x1
* - CPU part 0x001
*/
*uarch = cpuinfo_uarch_exynos_m1;
break;
case 0x00400010:
/*
* Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has:
* - CPU variant 0x4
* - CPU part 0x001
*/
*uarch = cpuinfo_uarch_exynos_m2;
break;
case 0x00100020:
/*
* Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has:
* - CPU variant 0x1
* - CPU part 0x002
*/
*uarch = cpuinfo_uarch_exynos_m3;
break;
case 0x00100030:
/*
* Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has:
* - CPU variant 0x1
* - CPU part 0x003
*/
*uarch = cpuinfo_uarch_exynos_m4;
break;
case 0x00100040:
/*
* Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has:
* - CPU variant 0x1
* - CPU part 0x004
*/
*uarch = cpuinfo_uarch_exynos_m5;
break;
default:
cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored",
midr_get_variant(midr), midr_get_part(midr));
}
break;
#if CPUINFO_ARCH_ARM
case 'V':
*vendor = cpuinfo_vendor_marvell;
switch (midr_get_part(midr)) {
case 0x581: /* PJ4 / PJ4B */
case 0x584: /* PJ4B-MP / PJ4C */
*uarch = cpuinfo_uarch_pj4;
break;
default:
cpuinfo_log_warning("unknown Marvell CPU part 0x%03"PRIx32" ignored", midr_get_part(midr));
}
break;
#endif /* CPUINFO_ARCH_ARM */
default:
cpuinfo_log_warning("unknown CPU implementer '%c' (0x%02"PRIx32") with CPU part 0x%03"PRIx32" ignored",
(char) midr_get_implementer(midr), midr_get_implementer(midr), midr_get_part(midr));
}
}

View File

@@ -0,0 +1,41 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <windows.h>
#include <cpuinfo.h>
#include <x86/api.h>
struct cpuinfo_arm_linux_processor {
/**
* Minimum processor ID on the package which includes this logical processor.
* This value can serve as an ID for the cluster of logical processors: it is the
* same for all logical processors on the same package.
*/
uint32_t package_leader_id;
/**
* Minimum processor ID on the core which includes this logical processor.
* This value can serve as an ID for the cluster of logical processors: it is the
* same for all logical processors on the same package.
*/
/**
* Number of logical processors in the package.
*/
uint32_t package_processor_count;
/**
* Maximum frequency, in kHZ.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq
* If failed to read or parse the file, the value is 0.
*/
uint32_t max_frequency;
/**
* Minimum frequency, in kHZ.
* The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq
* If failed to read or parse the file, the value is 0.
*/
uint32_t min_frequency;
/** Linux processor ID */
uint32_t system_processor_id;
uint32_t flags;
};

Some files were not shown because too many files have changed in this diff Show More