diff --git a/.travis.yml b/.travis.yml
index 8d6ddb2f201..6b50d49e143 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,6 +52,7 @@ matrix:
# Common
- xz-utils
- libexpat1-dev
+ - libx11-xcb-dev
- libelf-dev
- python3.5
- python3-pip
@@ -120,7 +121,6 @@ matrix:
- llvm-6.0-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -150,7 +150,6 @@ matrix:
- llvm-6.0-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -183,7 +182,6 @@ matrix:
- llvm-3.9-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -222,7 +220,6 @@ matrix:
- libclang-3.9-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -258,7 +255,6 @@ matrix:
- libclang-4.0-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -294,7 +290,6 @@ matrix:
- libclang-5.0-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -327,7 +322,6 @@ matrix:
- libclang-6.0-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -361,7 +355,6 @@ matrix:
- libclang-7-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -397,7 +390,6 @@ matrix:
- libedit-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
@@ -427,7 +419,6 @@ matrix:
- llvm-6.0-dev
# Common
- xz-utils
- - x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
diff --git a/Android.common.mk b/Android.common.mk
index aa1b266a393..d7c5f20fabc 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -31,6 +31,7 @@ LOCAL_C_INCLUDES += \
MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
LOCAL_CFLAGS += \
+ -O3 \
-Wno-error \
-Wno-unused-parameter \
-Wno-pointer-arith \
@@ -78,14 +79,23 @@ LOCAL_CFLAGS += \
-fvisibility=hidden \
-fno-math-errno \
-fno-trapping-math \
- -Wno-sign-compare
+ -Wno-sign-compare \
+ -Wno-self-assign \
+ -Wno-constant-logical-operand \
+ -Wno-format \
+ -Wno-incompatible-pointer-types \
+ -Wno-enum-conversion
LOCAL_CPPFLAGS += \
-D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS \
-D__STDC_LIMIT_MACROS \
-Wno-error=non-virtual-dtor \
- -Wno-non-virtual-dtor
+ -Wno-non-virtual-dtor \
+ -Wno-delete-non-virtual-dtor \
+ -Wno-overloaded-virtual \
+ -Wno-missing-braces \
+ -Wno-deprecated-register
# mesa requires at least c99 compiler
LOCAL_CONLYFLAGS += \
@@ -112,7 +122,7 @@ LOCAL_CFLAGS_arm64 += -DUSE_AARCH64_ASM
ifneq ($(LOCAL_IS_HOST_MODULE),true)
LOCAL_CFLAGS += -DHAVE_LIBDRM
-LOCAL_SHARED_LIBRARIES += libdrm
+LOCAL_SHARED_LIBRARIES += libdrm_pri
endif
LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\"
diff --git a/Readme.md b/Readme.md
new file mode 100644
index 00000000000..5df295abc3a
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,2 @@
+Any security related issues should be reported by following the instructions here:
+https://01.org/security
diff --git a/VERSION b/VERSION
index 8b16de0851f..a19b2d9a021 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-18.3.0-devel
+18.3.2
diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore
new file mode 100644
index 00000000000..92456c5c938
--- /dev/null
+++ b/bin/.cherry-ignore
@@ -0,0 +1,4 @@
+# fixes: Commit was squashed into the respective offenders
+c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix
+# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a
+ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support)
diff --git a/bin/get-fixes-pick-list.sh b/bin/get-fixes-pick-list.sh
deleted file mode 100755
index 047ea3bec10..00000000000
--- a/bin/get-fixes-pick-list.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/sh
-
-# Script for generating a list of candidates [referenced by a Fixes tag] for
-# cherry-picking to a stable branch
-#
-# Usage examples:
-#
-# $ bin/get-fixes-pick-list.sh
-# $ bin/get-fixes-pick-list.sh > picklist
-# $ bin/get-fixes-pick-list.sh | tee picklist
-
-# Use the last branchpoint as our limit for the search
-latest_branchpoint=`git merge-base origin/master HEAD`
-
-# List all the commits between day 1 and the branch point...
-git log --reverse --pretty=%H $latest_branchpoint > already_landed
-
-# ... and the ones cherry-picked.
-git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
- grep "cherry picked from commit" |\
- sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
-
-# Grep for commits with Fixes tag
-git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
-while read sha
-do
- # Check to see whether the patch is on the ignore list ...
- if [ -f bin/.cherry-ignore ] ; then
- if grep -q ^$sha bin/.cherry-ignore ; then
- continue
- fi
- fi
-
- # Skip if it has been already cherry-picked.
- if grep -q ^$sha already_picked ; then
- continue
- fi
-
- # Place every "fixes:" tag on its own line and join with the next word
- # on its line or a later one.
- fixes=`git show --pretty=medium -s $sha | tr -d "\n" | sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'`
-
- # For each one try to extract the tag
- fixes_count=`echo "$fixes" | wc -l`
- warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
- while [ $fixes_count -gt 0 ] ; do
- # Treat only the current line
- id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
- fixes_count=$(($fixes_count-1))
-
- # Bail out if we cannot find suitable id.
- # Any specific validation the $id is valid and not some junk, is
- # implied with the follow up code
- if [ "x$id" = x ] ; then
- continue
- fi
-
- # Check if the offending commit is in branch.
-
- # Be that cherry-picked ...
- # ... or landed before the branchpoint.
- if grep -q ^$id already_picked ||
- grep -q ^$id already_landed ; then
-
- printf "Commit \"%s\" fixes %s\n" \
- "`git log -n1 --pretty=oneline $sha`" \
- "$id"
- warn=$(($warn-1))
- fi
-
- done
-
- if [ $warn -gt 0 ] ; then
- printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
- "`git log -n1 --pretty=oneline $sha`"
- fi
-
-done
-
-rm -f already_picked
-rm -f already_landed
diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh
index 9e9a39e494b..79b7a295ea6 100755
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -7,21 +7,107 @@
# $ bin/get-pick-list.sh
# $ bin/get-pick-list.sh > picklist
# $ bin/get-pick-list.sh | tee picklist
+#
+# The output is as follows:
+# [nomination_type] commit_sha commit summary
+
+is_stable_nomination()
+{
+ git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
+}
+
+is_typod_nomination()
+{
+ git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
+}
+
+fixes=
+
+# Helper to handle various mistypos of the fixes tag.
+# The tag string itself is passed as argument and normalised within.
+#
+# Resulting string in the global variable "fixes" and contains entries
+# in the form "fixes:$sha"
+is_sha_nomination()
+{
+ fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \
+ sed -e 's/'"$2"'/\nfixes:/Ig' | \
+ grep -Eo 'fixes:[a-f0-9]{8,40}'`
+
+ fixes_count=`echo "$fixes" | grep "fixes:" | wc -l`
+ if test $fixes_count -eq 0; then
+ return 1
+ fi
+
+ # Throw a warning for each invalid sha
+ while test $fixes_count -gt 0; do
+ # Treat only the current line
+ id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
+ fixes_count=$(($fixes_count-1))
+ if ! git show $id &>/dev/null; then
+ echo WARNING: Commit $1 lists invalid sha $id
+ fi
+ done
+
+ return 0
+}
+
+# Checks if at least one of offending commits, listed in the global
+# "fixes", is in branch.
+sha_in_range()
+{
+ fixes_count=`echo "$fixes" | grep "fixes:" | wc -l`
+ while test $fixes_count -gt 0; do
+ # Treat only the current line
+ id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
+ fixes_count=$(($fixes_count-1))
+
+ # Be that cherry-picked ...
+ # ... or landed before the branchpoint.
+ if grep -q ^$id already_picked ||
+ grep -q ^$id already_landed ; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+is_fixes_nomination()
+{
+ is_sha_nomination "$1" "fixes:[[:space:]]*"
+ if test $? -eq 0; then
+ return 0
+ fi
+ is_sha_nomination "$1" "fixes[[:space:]]\+"
+}
+
+is_brokenby_nomination()
+{
+ is_sha_nomination "$1" "broken by"
+}
+
+is_revert_nomination()
+{
+ is_sha_nomination "$1" "This reverts commit "
+}
# Use the last branchpoint as our limit for the search
latest_branchpoint=`git merge-base origin/master HEAD`
-# Grep for commits with "cherry picked from commit" in the commit message.
+# List all the commits between day 1 and the branch point...
+git log --reverse --pretty=%H $latest_branchpoint > already_landed
+
+# ... and the ones cherry-picked.
git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
grep "cherry picked from commit" |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
-# Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable' $latest_branchpoint..origin/master |\
+# Grep for potential candidates
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\\|\\|This reverts commit' $latest_branchpoint..origin/master |\
while read sha
do
# Check to see whether the patch is on the ignore list.
- if [ -f bin/.cherry-ignore ] ; then
+ if test -f bin/.cherry-ignore; then
if grep -q ^$sha bin/.cherry-ignore ; then
continue
fi
@@ -32,7 +118,33 @@ do
continue
fi
- git log -n1 --pretty=oneline $sha | cat
+ if is_fixes_nomination "$sha"; then
+ tag=fixes
+ elif is_brokenby_nomination "$sha"; then
+ tag=brokenby
+ elif is_revert_nomination "$sha"; then
+ tag=revert
+ elif is_stable_nomination "$sha"; then
+ tag=stable
+ elif is_typod_nomination "$sha"; then
+ tag=typod
+ else
+ continue
+ fi
+
+ case "$tag" in
+ fixes | brokenby | revert )
+ if ! sha_in_range; then
+ continue
+ fi
+ ;;
+ * )
+ ;;
+ esac
+
+ printf "[ %8s ] " "$tag"
+ git --no-pager show --summary --oneline $sha
done
rm -f already_picked
+rm -f already_landed
diff --git a/bin/get-typod-pick-list.sh b/bin/get-typod-pick-list.sh
deleted file mode 100755
index eb4181d66b8..00000000000
--- a/bin/get-typod-pick-list.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/sh
-
-# Script for generating a list of candidates which have typos in the nomination line
-#
-# Usage examples:
-#
-# $ bin/get-typod-pick-list.sh
-# $ bin/get-typod-pick-list.sh > picklist
-# $ bin/get-typod-pick-list.sh | tee picklist
-
-# NB:
-# This script intentionally _never_ checks for specific version tag
-# Should we consider folding it with the original get-pick-list.sh
-
-# Use the last branchpoint as our limit for the search
-latest_branchpoint=`git merge-base origin/master HEAD`
-
-# Grep for commits with "cherry picked from commit" in the commit message.
-git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
- grep "cherry picked from commit" |\
- sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
-
-# Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\
-while read sha
-do
- # Check to see whether the patch is on the ignore list.
- if [ -f bin/.cherry-ignore ] ; then
- if grep -q ^$sha bin/.cherry-ignore ; then
- continue
- fi
- fi
-
- # Check to see if it has already been picked over.
- if grep -q ^$sha already_picked ; then
- continue
- fi
-
- git log -n1 --pretty=oneline $sha | cat
-done
-
-rm -f already_picked
diff --git a/configure.ac b/configure.ac
index d782f56205d..b1c6967afee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1716,6 +1716,8 @@ xdri)
if test x"$enable_dri" = xyes; then
dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED"
fi
+
+ dri_modules="$dri_modules xxf86vm"
fi
if test x"$dri_platform" = xapple ; then
DEFINES="$DEFINES -DGLX_USE_APPLEGL"
@@ -1725,12 +1727,6 @@ xdri)
fi
fi
- # add xf86vidmode if available
- PKG_CHECK_MODULES([XF86VIDMODE], [xxf86vm], HAVE_XF86VIDMODE=yes, HAVE_XF86VIDMODE=no)
- if test "$HAVE_XF86VIDMODE" = yes ; then
- dri_modules="$dri_modules xxf86vm"
- fi
-
PKG_CHECK_MODULES([DRIGL], [$dri_modules])
GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules"
X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS"
@@ -1742,10 +1738,6 @@ xdri)
;;
esac
-# This is outside the case (above) so that it is invoked even for non-GLX
-# builds.
-AM_CONDITIONAL(HAVE_XF86VIDMODE, test "x$HAVE_XF86VIDMODE" = xyes)
-
GLESv1_CM_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
GLESv1_CM_PC_LIB_PRIV="-lm $PTHREAD_LIBS $DLOPEN_LIBS"
GLESv2_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
@@ -1762,8 +1754,6 @@ AC_SUBST([GLESv1_CM_PC_LIB_PRIV])
AC_SUBST([GLESv2_LIB_DEPS])
AC_SUBST([GLESv2_PC_LIB_PRIV])
-AC_SUBST([HAVE_XF86VIDMODE])
-
dnl
dnl More GLX setup
dnl
diff --git a/docs/releasing.html b/docs/releasing.html
index 52e102207d1..c79a020efa7 100644
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -21,6 +21,7 @@ Releasing process
Overview
Release schedule
Cherry-pick and test
+Staging branch
Making a branchpoint
Pre-release announcement
Making a new release
@@ -209,6 +210,25 @@ Regression/functionality testing
idea too.
+
+A live branch, which contains the currently merge/rejected patches is available
+in the main repository under staging/X.Y. For example:
+
+
-
Mesa 18.3.0 Release Notes / TBD
+
Mesa 18.3.0 Release Notes / December 7, 2018
Mesa 18.3.0 is a new development release. People who are concerned
@@ -40,7 +40,8 @@
Mesa 18.3.0 Release Notes / TBD
SHA256 checksums
-TBD.
+17a124d4dbc712505d22a7815c9b0cee22214c96c8abb91539a2b1351e38a000 mesa-18.3.0.tar.gz
+b63f947e735d6ef3dfaa30c789a9adfbae18aea671191eaacde95a18c17fc38a mesa-18.3.0.tar.xz
@@ -61,7 +62,6 @@
New features
GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
GL_EXT_window_rectangles on radeonsi.
GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
-
GL_INTEL_fragment_shader_ordering on i965.
GL_NV_fragment_shader_interlock on i965.
EGL_EXT_device_base for all drivers.
EGL_EXT_device_drm for all drivers.
@@ -71,8 +71,206 @@
New features
Bug fixes
+
+
Bug 13728 - [G965] Some objects in Neverwinter Nights Linux version not displayed correctly
+
+
Bug 91433 - piglit.spec.arb_depth_buffer_float.fbo-depth-gl_depth_component32f-copypixels fails
+
+
Bug 93355 - [BXT,SKLGT4e] intermittent ext_framebuffer_multisample.accuracy fails
+
+
Bug 94957 - dEQP failures on llvmpipe
+
+
Bug 98699 - "float[a+++4 ? 1:1] f;" crashes glsl_compiler
+
+
Bug 99507 - Corrupted frame contents with Vulkan version of DOTA2, Talos Principle and Sascha Willems' demos when they're run Vsynched in fullscreen
+
+
Bug 99730 - Metro Redux game(s) needs override for midshader extension declaration
+
+
Bug 100200 - Default Unreal Engine 4 frag shader fails to compile
+
+
Bug 101247 - Mesa fails to link GLSL programs with unused output blocks
+
+
Bug 102597 - [Regression] mpv, high rendering times (two to three times higher)
+
+
Bug 103241 - Anv crashes when using 64-bit vertex inputs
+
+
Bug 104602 - [apitrace] Graphical artifacts in Civilization VI on RX Vega
+
+
Bug 104809 - anv: DOOM 2016 and Wolfenstein II:The New Colossus crash due to not having depthBoundsTest
+
+
Bug 104926 - swrast: Mesa 17.3.3 produces: HW cursor for format 875713089 not supported
+
+
Bug 105333 - [gallium-nine] missing geometry after commit ac: replace ac_build_kill with ac_build_kill_if_false
+
+
Bug 105371 - r600_shader_from_tgsi - GPR limit exceeded - shader requires 360 registers
+
+
Bug 105731 - linker error "fragment shader input ... has no matching output in the previous stage" when previous stage's output declaration in a separate shader object
+
+
Bug 105904 - Needed to delete mesa shader cache after driver upgrade for 32 bit wine vulkan programs to work.
+
+
Bug 105975 - i965 always reports 0 viewport subpixel bits
+
+
Bug 106231 - llvmpipe blends produce bad code after llvm patch https://reviews.llvm.org/D44785
+
+
Bug 106283 - Shader replacements works only for limited use cases
+
+
Bug 106577 - broken rendering with nine and nouveau (GM107)
+
+
Bug 106833 - glLinkProgram is expected to fail when vertex attribute aliasing happens on ES3.0 context or later
+
+
Bug 106865 - [GLK] piglit.spec.ext_framebuffer_multisample.accuracy stencil tests fail
+
+
Bug 106980 - Basemark GPU vulkan benchmark hangs on GFX9
+
+
Bug 106997 - [Regression]. Dying light game is crashing on latest mesa
+
+
Bug 107088 - [GEN8+] Hang when discarding a fragment if dual source blending is enabled but shader doesn't support it
+
+
Bug 107098 - Segfault after munmap(kms_sw_dt->ro_mapped)
+
+
Bug 107212 - Dual-Core CPU E5500 / G45: RetroArch with reicast core results in corrupted graphics
+
+
Bug 107223 - [GEN9+] 50% perf drop in SynMark Fill* tests (E2E RBC gets disabled?)
+
+
Bug 107276 - radv: OpBitfieldUExtract returns incorrect result when count is zero
+
+
Bug 107280 - [DXVK] Batman: Arkham City with tessellation enabled hangs on SKL GT4
+
+
Bug 107313 - Meson instructions on web site are non-optimal
+
+
Bug 107359 - [Regression] [bisected] [OpenGL CTS] [SKL,BDW] KHR-GL46.texture_barrier*-texels, GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners, and GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners fail with some configuration
+
+
Bug 107460 - radv: OpControlBarrier does not always work correctly (bisected)
+
+
Bug 107477 - [DXVK] Setting high shader quality in GTA V results in LLVM error
+
+
Bug 107483 - DispatchSanity_test.GL31_CORE regression
+
+
Bug 107487 - [intel] [tools] intel gpu tools don't honor -D tools=[]
+
+
Bug 107488 - gl.h:2090: error: redefinition of typedef ‘GLeglImageOES’
+
+
Bug 107510 - [GEN8+] up to 10% perf drop on several 3D benchmarks
+
+
Bug 107511 - KHR/khrplatform.h not always installed when needed
+
+
Bug 107524 - Broken packDouble2x32 at llvmpipe
+
+
Bug 107544 - intel/decoder: out of bounds group_iter
+
+
Bug 107547 - shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)
+
+
Bug 107550 - "0[2]" as function parameter hits assert
+
+
Bug 107563 - [RADV] Broken rendering in Unity demos
+
+
Bug 107565 - TypeError: __init__() got an unexpected keyword argument 'future_imports'
+
+
Bug 107579 - [SNB] The graphic corruption when we reuse the GS compiled and used for TFB when statebuffer contain magic trash in the unused space
+
+
Bug 107601 - Rise of the Tomb Raider Segmentation Fault when the game starts
+
+
Bug 107610 - Dolphin emulator mis-renders shadow overlay in Super Mario Sunshine
+
+
Bug 107626 - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test "arb_texture_multisample-large-float-texture" with parameter --fp16
+
+
Bug 107658 - [Regression] [bisected] [OpenGLES CTS] KHR-GLES3.packed_pixels.*rectangle.r*8_snorm
+
+
Bug 107734 - [GLSL] glsl-fface-invariant, glsl-fcoord-invariant and glsl-pcoord-invariant should fail
+
+
Bug 107745 - [bisected] [bdw bsw] piglit.spec.arb_fragment_shader_interlock.arb_fragment_shader_interlock-image-load-store failure
+
+
Bug 107760 - GPU Hang when Playing DiRT 3 Complete Edition using Steam Play with DXVK
+
+
Bug 107765 - [regression] Batman Arkham City crashes with DXVK under wine
+
+
Bug 107772 - Mesa preprocessor matches if(def)s & endifs incorrectly
+
+
Bug 107779 - Access violation with some games
+
+
Bug 107786 - [DXVK] MSAA reflections are broken in GTA V
+
+
Bug 107806 - glsl_get_natural_size_align_bytes() ABORT with GfxBench Vulkan AztecRuins
+
+
Bug 107810 - The 'va_end' call is missed after 'va_copy' in 'util_vsnprintf' function under windows
+
+
Bug 107832 - Gallium picking A16L16 formats when emulating INTENSITY16 conflicts with mesa
+
+
Bug 107843 - 32bit Mesa build failes with meson.
+
+
Bug 107856 - i965 incorrectly calculates the number of layers for texture views (assert)
+
+
Bug 107857 - GPU hang - GS_EMIT without shader outputs
+
+
Bug 107865 - swr fail to build with llvm-libs 6.0.1
+
+
Bug 107869 - u_thread.h:87:4: error: use of undeclared identifier 'cpu_set_t'
+
+
Bug 107870 - Undefined symbols for architecture x86_64: "_util_cpu_caps"
+
+
Bug 107879 - crash happens when link program
+
+
Bug 107891 - [wine, regression, bisected] RAGE, Wolfenstein The New Order hangs in menu
+
+
Bug 107923 - build_id.c:126: multiple definition of `build_id_length'
+
+
Bug 107926 - [anv] Rise of the Tomb Raider always misrendering, segfault and gpu hang.
+
+
Bug 107941 - GPU hang and system crash with Dota 2 using Vulkan
+
+
Bug 107971 - SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string
+
+
Bug 108012 - Compiler crashes on access of non-existent member incremental operations
+
+
Bug 108024 - [Debian Stretch]Fail to build because "xcb_randr_lease_t"
+
+
Bug 108082 - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]
+
+
Bug 108109 - [GLSL] no-overloads.vert fails
+
+
Bug 108112 - [vulkancts] some of the coherent memory tests fail.
+
+
Bug 108113 - [vulkancts] r32g32b32 transfer operations not implemented
+
+
Bug 108115 - [vulkancts] dEQP-VK.subgroups.vote.graphics.subgroupallequal.* fails
+
+
Bug 108164 - [radv] VM faults since 5d6a560a2986c9ab421b3c7904d29bb7bc35e36f
+
+
Bug 108245 - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage
+
+
Bug 108272 - [polaris10] opencl-mesa: Anything using OpenCL segfaults, XFX Radeon RX 580
+
+
Bug 108311 - Query buffer object support is broken on r600.
+
+
Bug 108319 - [GLK BXT BSW] Assertion in piglit.spec.arb_gpu_shader_fp64.execution.built-in-functions.vs-sign-sat-neg-abs
+
+
Bug 108491 - Commit baa38c14 causes output issues on my VEGA with RADV
+
+
Bug 108524 - [RADV] GPU lockup on event synchronization
+
+
Bug 108530 - (mesa-18.3) [Tracker] Mesa 18.3 Release Tracker
+
+
Bug 108532 - make check nir_copy_prop_vars_test.store_store_load_different_components regression
+
+
Bug 108560 - Mesa 32 is built without sse
+
+
Bug 108595 - ir3_compiler valgrind build error
+
+
Bug 108617 - [deqp] Mesa fails conformance for egl_ext_device
+
+
Bug 108630 - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever
+
+
Bug 108635 - Mesa master commit 68dc591af16ebb36814e4c187e4998948103c99c causes XWayland to segfault
+
+
Bug 108713 - Gallium: use after free with transform feedback
+
+
Bug 108829 - [meson] libglapi exports internal API
+
+
Bug 108894 - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard
+
+
Bug 108909 - Vkd3d test failure test_resolve_non_issued_query_data()
+
+
Bug 108914 - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this
Changes
diff --git a/docs/relnotes/18.3.1.html b/docs/relnotes/18.3.1.html
new file mode 100644
index 00000000000..8acbfb7a5f2
--- /dev/null
+++ b/docs/relnotes/18.3.1.html
@@ -0,0 +1,63 @@
+
+
+
+
+
Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 18.3.1 Release Notes / December 11, 2018
+
+
+Mesa 18.3.1 is a bug fix release which fixes bugs found since the 18.3.0 release.
+
+
+Mesa 18.3.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+
+
+
SHA256 checksums
+
+256d0c3d88e380c1b8e3fc5c6ac34001e3b7c30458b8b852407ec68b8ccd9fda mesa-18.3.1.tar.gz
+5b1f827d28684a25f6657289f8b7d47ac56395988c7ac23e0ec9a62b644bdc63 mesa-18.3.1.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
None
+
+
+
Changes
+
+
Emil Velikov (2):
+
+ docs: add sha256 checksums for 18.3.0
+ Update version to 18.3.1
+
+
+
Jason Ekstrand (1):
+
+ anv,radv: Disable VK_EXT_pci_bus_info
+
+
+
+
+
+
diff --git a/docs/relnotes/18.3.2.html b/docs/relnotes/18.3.2.html
new file mode 100644
index 00000000000..594b42cdf4e
--- /dev/null
+++ b/docs/relnotes/18.3.2.html
@@ -0,0 +1,265 @@
+
+
+
+
+
Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 18.3.2 Release Notes / January 17, 2019
+
+
+Mesa 18.3.2 is a bug fix release which fixes bugs found since the 18.3.1 release.
+
+
+Mesa 18.3.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+
+
+
+
SHA256 checksums
+
+1cde4fafd40cd1ad4ee3a13b364b7a0175a08b7afdd127fb46f918c1e1dfd4b0 mesa-18.3.2.tar.gz
+f7ce7181c07b6d8e0132da879af1729523a6c8aa87f79a9d59dfd064024cfb35 mesa-18.3.2.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 106595 - [RADV] Rendering distortions only when MSAA is enabled
+
+Bug 107728 - Wrong background in Sascha Willem's Multisampling Demo
+
+Bug 108114 - [vulkancts] new VK_KHR_16bit_storage tests fail.
+
+Bug 108116 - [vulkancts] stencil partial clear tests fail.
+
+Bug 108624 - [regression][bisected] "nir: Copy propagation between blocks" regression
+
+Bug 108910 - Vkd3d test failure test_multisample_array_texture()
+
+Bug 108911 - Vkd3d test failure test_clear_render_target_view()
+
+Bug 108943 - Build fails on ppc64le with meson
+
+Bug 109072 - GPU hang in blender 2.80
+
+Bug 109081 - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests
+
+Bug 109151 - [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.
+
+Bug 109202 - nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti
+
+Bug 109204 - [regression, bisected] retroarch's crt-royale shader crash radv
+
+
+
+
+
Changes
+
+
Alex Deucher (3):
+
+ pci_ids: add new vega10 pci ids
+ pci_ids: add new vega20 pci id
+ pci_ids: add new VegaM pci id
+
+
+
Alexander von Gluck IV (1):
+
+ egl/haiku: Fix reference to disp vs dpy
+
+
+
Andres Gomez (2):
+
+ glsl: correct typo in GLSL compilation error message
+ glsl/linker: specify proper direction in location aliasing error
+
+
+
Axel Davy (3):
+
+ st/nine: Fix volumetexture dtor on ctor failure
+ st/nine: Bind src not dst in nine_context_box_upload
+ st/nine: Add src reference to nine_context_range_upload
+
+
+
Bas Nieuwenhuizen (5):
+
+ radv: Do a cache flush if needed before reading predicates.
+ radv: Implement buffer stores with less than 4 components.
+ anv/android: Do not reject storage images.
+ radv: Fix rasterization precision bits.
+ spirv: Fix matrix parameters in function calls.
+
+
+
Caio Marcelo de Oliveira Filho (3):
+
+ nir: properly clear the entry sources in copy_prop_vars
+ nir: properly find the entry to keep in copy_prop_vars
+ nir: remove dead code from copy_prop_vars
+
+
+
Dave Airlie (2):
+
+ radv/xfb: fix counter buffer bounds checks.
+ virgl/vtest: fix front buffer flush with protocol version 0.
+
+
+
Dylan Baker (6):
+
+ meson: Fix ppc64 little endian detection
+ meson: Add support for gnu hurd
+ meson: Add toggle for glx-direct
+ meson: Override C++ standard to gnu++11 when building with altivec on ppc64
+ meson: Error out if building nouveau and using LLVM without rtti
+ autotools: Remove tegra vdpau driver
+
+
+
Emil Velikov (12):
+
+ docs: add sha256 checksums for 18.3.1
+ bin/get-pick-list.sh: rework handing of sha nominations
+ bin/get-pick-list.sh: warn when commit lists invalid sha
+ cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support)
+ glx: mandate xf86vidmode only for "drm" dri platforms
+ meson: don't require glx/egl/gbm with gallium drivers
+ pipe-loader: meson: reference correct library
+ TODO: glx: meson: build dri based glx tests, only with -Dglx=dri
+ glx: meson: drop includes from a link-only library
+ glx: meson: wire up the dispatch-index-check test
+ glx/test: meson: assorted include fixes
+ Update version to 18.3.2
+
+
+
Eric Anholt (6):
+
+ v3d: Fix a leak of the transfer helper on screen destroy.
+ vc4: Fix a leak of the transfer helper on screen destroy.
+ v3d: Fix a leak of the disassembled instruction string during debug dumps.
+ v3d: Make sure that a thrsw doesn't split a multop from its umul24.
+ v3d: Add missing flagging of SYNCB as a TSY op.
+ gallium/ttn: Fix setup of outputs_written.
+
+
+
Erik Faye-Lund (2):
+
+ virgl: wrap vertex element state in a struct
+ virgl: work around bad assumptions in virglrenderer
+
+
+
Francisco Jerez (5):
+
+ intel/fs: Handle source modifiers in lower_integer_multiplication().
+ intel/fs: Implement quad swizzles on ICL+.
+ intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.
+ intel/eu/gen7: Fix brw_MOV() with DF destination and strided source.
+ intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.
+
+
+
Ian Romanick (2):
+
+ i965/vec4/dce: Don't narrow the write mask if the flags are used
+ Revert "nir/lower_indirect: Bail early if modes == 0"
+
+
+
Jan Vesely (1):
+
+ clover: Fix build after clang r348827
+
+
+
Jason Ekstrand (6):
+
+ nir/constant_folding: Fix source bit size logic
+ intel/blorp: Be more conservative about copying clear colors
+ spirv: Handle any bit size in vector_insert/extract
+ anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic
+ spirv: Sign-extend array indices
+ intel/peephole_ffma: Fix swizzle propagation
+
+
+
Karol Herbst (1):
+
+ nv50/ir: fix use-after-free in ConstantFolding::visit
+
+
+
Kirill Burtsev (1):
+
+ loader: free error state, when checking the drawable type
+
+
+
Lionel Landwerlin (5):
+
+ anv: don't do partial resolve on layer > 0
+ i965: include draw_params/derived_draw_params for VF cache workaround
+ i965: add CS stall on VF invalidation workaround
+ anv: explictly specify format for blorp ccs/mcs op
+ anv: flush fast clear colors into compressed surfaces
+
+
+
Marek Olšák (1):
+
+ st/mesa: don't leak pipe_surface if pipe_context is not current
+
+
+
Mario Kleiner (1):
+
+ radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.
+
+
+
Nicolai Hähnle (1):
+
+ meson: link LLVM 'native' component when LLVM is available
+
+
+
Rhys Perry (3):
+
+ radv: don't set surf_index for stencil-only images
+ ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics
+ ac: split 16-bit ssbo loads that may not be dword aligned
+
+
+
Rob Clark (2):
+
+ freedreno/drm: fix memory leak
+ mesa/st/nir: fix missing nir_compact_varyings
+
+
+
Samuel Pitoiset (1):
+
+ radv: switch on EOP when primitive restart is enabled with triangle strips
+
+
+
Timothy Arceri (2):
+
+ tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()
+ tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()
+
+
+
Vinson Lee (2):
+
+ meson: Fix typo.
+ meson: Fix libsensors detection.
+
+
+
+
+
+
+
diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html
index e5350bdb2cf..d7ea0a310db 100644
--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -251,6 +251,9 @@
Nominating a commit for a stable branch
nomination request.
+
+The current patch status can be observed in the staging branch .
+
The stable tag
diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index 6f9c2c8b8cf..48060ac8de6 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1334,6 +1334,10 @@ struct __DRIdri2ExtensionRec {
#define __DRI_IMAGE_FOURCC_YVU422 0x36315659
#define __DRI_IMAGE_FOURCC_YVU444 0x34325659
+#define __DRI_IMAGE_FOURCC_P010 0x30313050
+#define __DRI_IMAGE_FOURCC_P012 0x32313050
+#define __DRI_IMAGE_FOURCC_P016 0x36313050
+
/**
* Queryable on images created by createImageFromNames.
*
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
index 35ea3559b02..75ac7761bb4 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,6 +219,7 @@ CHIPSET(0x699F, POLARIS12)
CHIPSET(0x694C, VEGAM)
CHIPSET(0x694E, VEGAM)
+CHIPSET(0x694F, VEGAM)
CHIPSET(0x6860, VEGA10)
CHIPSET(0x6861, VEGA10)
@@ -227,8 +228,14 @@ CHIPSET(0x6863, VEGA10)
CHIPSET(0x6864, VEGA10)
CHIPSET(0x6867, VEGA10)
CHIPSET(0x6868, VEGA10)
-CHIPSET(0x687F, VEGA10)
+CHIPSET(0x6869, VEGA10)
+CHIPSET(0x686A, VEGA10)
+CHIPSET(0x686B, VEGA10)
CHIPSET(0x686C, VEGA10)
+CHIPSET(0x686D, VEGA10)
+CHIPSET(0x686E, VEGA10)
+CHIPSET(0x686F, VEGA10)
+CHIPSET(0x687F, VEGA10)
CHIPSET(0x69A0, VEGA12)
CHIPSET(0x69A1, VEGA12)
@@ -240,6 +247,7 @@ CHIPSET(0x66A0, VEGA20)
CHIPSET(0x66A1, VEGA20)
CHIPSET(0x66A2, VEGA20)
CHIPSET(0x66A3, VEGA20)
+CHIPSET(0x66A4, VEGA20)
CHIPSET(0x66A7, VEGA20)
CHIPSET(0x66AF, VEGA20)
diff --git a/meson.build b/meson.build
index 18667988bac..5a20e1ea30d 100644
--- a/meson.build
+++ b/meson.build
@@ -54,6 +54,7 @@ with_valgrind = get_option('valgrind')
with_libunwind = get_option('libunwind')
with_asm = get_option('asm')
with_glx_read_only_text = get_option('glx-read-only-text')
+with_glx_direct = get_option('glx-direct')
with_osmesa = get_option('osmesa')
with_swr_arches = get_option('swr-arches')
with_tools = get_option('tools')
@@ -223,8 +224,6 @@ elif system_has_kms_drm
else
# FIXME: haiku doesn't use dri, and xlib doesn't use dri, probably should
# assert here that one of those cases has been met.
- # FIXME: GNU (hurd) ends up here as well, but meson doesn't officially
- # support Hurd at time of writing (2017/11)
# FIXME: illumos ends up here as well
with_dri_platform = 'none'
endif
@@ -370,9 +369,6 @@ if with_glvnd
endif
endif
-# TODO: toggle for this
-with_glx_direct = true
-
if with_vulkan_icd_dir == ''
with_vulkan_icd_dir = join_paths(get_option('datadir'), 'vulkan/icd.d')
endif
@@ -388,9 +384,9 @@ endif
if with_any_vk and (with_platform_x11 and not with_dri3)
error('Vulkan drivers require dri3 for X11 support')
endif
-if with_dri or with_gallium
- if with_glx == 'disabled' and not with_egl and not with_platform_haiku
- error('building dri or gallium drivers require at least one window system')
+if with_dri
+ if with_glx == 'disabled' and not with_egl and not with_gbm
+ error('building dri drivers require at least one windowing system')
endif
endif
@@ -620,7 +616,7 @@ if with_gallium_st_nine
error('The nine state tracker requires gallium softpipe/llvmpipe.')
elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600
or with_gallium_r300 or with_gallium_svga or with_gallium_i915)
- error('The nine state tracker requires at least on non-swrast gallium driver.')
+ error('The nine state tracker requires at least one non-swrast gallium driver.')
endif
if not with_dri3
error('Using nine with wine requires dri3')
@@ -628,7 +624,12 @@ if with_gallium_st_nine
endif
if get_option('power8') != 'false'
- if host_machine.cpu_family() == 'ppc64le'
+ # on old versions of meson the cpu family would return as ppc64le on little
+ # endian power8, this was changed in 0.48 such that the family would always
+ # be ppc64 regardless of endianness, and the the machine.endian() value
+ # should be checked. Since we support versions < 0.48 we need to use
+ # startswith.
+ if host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little'
if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.8')
error('Altivec is not supported with gcc version < 4.8.')
endif
@@ -650,6 +651,7 @@ if get_option('power8') != 'false'
endif
_opencl = get_option('gallium-opencl')
+clover_cpp_std = []
if _opencl != 'disabled'
if not with_gallium
error('OpenCL Clover implementation requires at least one gallium driver.')
@@ -658,6 +660,14 @@ if _opencl != 'disabled'
dep_clc = dependency('libclc')
with_gallium_opencl = true
with_opencl_icd = _opencl == 'icd'
+
+ if host_machine.cpu_family().startswith('ppc') and cpp.compiles('''
+ #if !defined(__VEC__) || !defined(__ALTIVEC__)
+ #error "AltiVec not enabled"
+ #endif''',
+ name : 'Altivec')
+ clover_cpp_std += ['cpp_std=gnu++11']
+ endif
else
dep_clc = null_dep
with_gallium_opencl = false
@@ -781,13 +791,13 @@ if cc.compiles('int foo(void) __attribute__((__noreturn__));',
endif
# TODO: this is very incomplete
-if ['linux', 'cygwin'].contains(host_machine.system())
+if ['linux', 'cygwin', 'gnu'].contains(host_machine.system())
pre_args += '-D_GNU_SOURCE'
endif
# Check for generic C arguments
c_args = []
-foreach a : ['-Wall', '-Werror=implicit-function-declaration',
+foreach a : ['-Werror=implicit-function-declaration',
'-Werror=missing-prototypes', '-Werror=return-type',
'-fno-math-errno',
'-fno-trapping-math', '-Qunused-arguments']
@@ -809,7 +819,7 @@ endif
# Check for generic C++ arguments
cpp_args = []
-foreach a : ['-Wall', '-Werror=return-type',
+foreach a : ['-Werror=return-type',
'-fno-math-errno', '-fno-trapping-math',
'-Qunused-arguments']
if cpp.has_argument(a)
@@ -905,8 +915,9 @@ if not cc.links('''#include
int main() {
return __sync_add_and_fetch(&v, (uint64_t)1);
}''',
+ dependencies : dep_atomic,
name : 'GCC 64bit atomics')
- pre_args += '-DMISSING_64_BIT_ATOMICS'
+ pre_args += '-DMISSING_64BIT_ATOMICS'
endif
# TODO: shared/static? Is this even worth doing?
@@ -939,7 +950,7 @@ endif
with_asm_arch = ''
if with_asm
if host_machine.cpu_family() == 'x86'
- if system_has_kms_drm
+ if system_has_kms_drm or host_machine.system() == 'gnu'
with_asm_arch = 'x86'
pre_args += ['-DUSE_X86_ASM', '-DUSE_MMX_ASM', '-DUSE_3DNOW_ASM',
'-DUSE_SSE_ASM']
@@ -968,7 +979,7 @@ if with_asm
with_asm_arch = 'sparc'
pre_args += ['-DUSE_SPARC_ASM']
endif
- elif host_machine.cpu_family() == 'ppc64le'
+ elif host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little'
if system_has_kms_drm
with_asm_arch = 'ppc64le'
pre_args += ['-DUSE_PPC64LE_ASM']
@@ -1162,7 +1173,7 @@ endif
llvm_modules = ['bitwriter', 'engine', 'mcdisassembler', 'mcjit']
llvm_optional_modules = []
if with_amd_vk or with_gallium_radeonsi or with_gallium_r600
- llvm_modules += ['amdgpu', 'bitreader', 'ipo']
+ llvm_modules += ['amdgpu', 'native', 'bitreader', 'ipo']
if with_gallium_r600
llvm_modules += 'asmparser'
endif
@@ -1223,6 +1234,9 @@ if with_llvm
# programs, so we need to build all C++ code in mesa without rtti as well to
# ensure that linking works.
if dep_llvm.get_configtool_variable('has-rtti') == 'NO'
+ if with_gallium_nouveau
+ error('The Nouveau driver requires rtti. You either need to turn off nouveau or use an LLVM built with LLVM_ENABLE_RTTI.')
+ endif
cpp_args += '-fno-rtti'
endif
elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
@@ -1317,13 +1331,6 @@ if with_platform_wayland
'linux-dmabuf', 'linux-dmabuf-unstable-v1.xml'
)
pre_args += ['-DHAVE_WAYLAND_PLATFORM', '-DWL_HIDE_DEPRECATED']
-else
- prog_wl_scanner = []
- wl_scanner_arg = ''
- dep_wl_protocols = null_dep
- dep_wayland_client = null_dep
- dep_wayland_server = null_dep
- wayland_dmabuf_xml = ''
endif
dep_x11 = null_dep
@@ -1356,7 +1363,6 @@ if with_platform_x11
dep_xdamage = dependency('xdamage', version : '>= 1.1')
dep_xfixes = dependency('xfixes')
dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
- dep_xxf86vm = dependency('xxf86vm', required : false)
endif
if (with_any_vk or with_glx == 'dri' or
(with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or
@@ -1383,6 +1389,7 @@ if with_platform_x11
if with_glx == 'dri'
if with_dri_platform == 'drm'
dep_dri2proto = dependency('dri2proto', version : '>= 2.8')
+ dep_xxf86vm = dependency('xxf86vm')
endif
dep_glproto = dependency('glproto', version : '>= 1.4.14')
endif
@@ -1403,7 +1410,7 @@ endif
_sensors = get_option('lmsensors')
if _sensors != 'false'
- dep_lmsensors = cc.find_library('libsensors', required : _sensors == 'true')
+ dep_lmsensors = cc.find_library('sensors', required : _sensors == 'true')
if dep_lmsensors.found()
pre_args += '-DHAVE_LIBSENSORS=1'
endif
@@ -1433,14 +1440,12 @@ elif with_glx == 'dri'
'xcb-glx >= 1.8.1']
if with_dri_platform == 'drm'
gl_priv_reqs += 'xcb-dri2 >= 1.8'
+ gl_priv_reqs += 'xxf86vm'
endif
endif
if dep_libdrm.found()
gl_priv_reqs += 'libdrm >= 2.4.75'
endif
-if dep_xxf86vm.found()
- gl_priv_reqs += 'xxf86vm'
-endif
gl_priv_libs = []
if dep_thread.found()
diff --git a/meson_options.txt b/meson_options.txt
index a1d5ab0e185..589d10bb3f3 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -318,3 +318,9 @@ option(
choices : ['auto', 'true', 'false'],
description : 'Enable VK_EXT_acquire_xlib_display.'
)
+option(
+ 'glx-direct',
+ type : 'boolean',
+ value : true,
+ description : 'Enable direct rendering in GLX and EGL for DRI',
+)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1392ec0f238..8953da7f18d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2882,9 +2882,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
if (count == num_components)
return value;
- LLVMValueRef masks[] = {
- ctx->i32_0, ctx->i32_1,
- LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
+ LLVMValueRef masks[MAX2(count, 2)];
+ masks[0] = ctx->i32_0;
+ masks[1] = ctx->i32_1;
+ for (unsigned i = 2; i < count; i++)
+ masks[i] = LLVMConstInt(ctx->i32, i, false);
if (count == 1)
return LLVMBuildExtractElement(ctx->builder, value, masks[0],
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e5fbe003f53..827cb5d85a8 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -311,9 +311,18 @@ static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
}
static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
+ LLVMValueRef src0,
+ unsigned bitsize)
{
- return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
+ LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
+ "");
+ result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
+
+ if (bitsize == 32)
+ return result;
+
+ return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
}
static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
@@ -932,7 +941,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
break;
case nir_op_b2f:
- result = emit_b2f(&ctx->ac, src[0]);
+ result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break;
case nir_op_f2b:
result = emit_f2b(&ctx->ac, src[0]);
@@ -1613,37 +1622,45 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
- LLVMValueRef results[2];
- int load_bytes;
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
int num_components = instr->num_components;
- int num_bytes = num_components * elem_size_bytes;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
LLVMValueRef glc = ctx->ac.i1false;
if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
glc = ctx->ac.i1true;
- for (int i = 0; i < num_bytes; i += load_bytes) {
- load_bytes = MIN2(num_bytes - i, 16);
- const char *load_name;
- LLVMTypeRef data_type;
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]), false);
- LLVMValueRef vindex = ctx->ac.i32_0;
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[0]), false);
+ LLVMValueRef vindex = ctx->ac.i32_0;
+
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
- int idx = i ? 1 : 0;
+ LLVMValueRef results[4];
+ for (int i = 0; i < num_components;) {
+ int num_elems = num_components - i;
+ if (elem_size_bytes < 4)
+ num_elems = 1;
+ if (num_elems * elem_size_bytes > 16)
+ num_elems = 16 / elem_size_bytes;
+ int load_bytes = num_elems * elem_size_bytes;
+
+ LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
+
+ LLVMValueRef ret;
if (load_bytes == 2) {
- results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- vindex,
- offset,
- ctx->ac.i32_0,
- immoffset,
- glc);
+ ret = ac_build_tbuffer_load_short(&ctx->ac,
+ rsrc,
+ vindex,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ glc);
} else {
+ const char *load_name;
+ LLVMTypeRef data_type;
switch (load_bytes) {
case 16:
case 12:
@@ -1669,33 +1686,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
glc,
ctx->ac.i1false,
};
- results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
- unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
- LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
- results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
+ ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
}
- }
- assume(results[0]);
- LLVMValueRef ret = results[0];
- if (num_bytes > 16 || num_components == 3) {
- LLVMValueRef masks[] = {
- LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
- LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
- };
+ LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+ ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
- if (num_bytes > 16 && num_components == 3) {
- /* we end up with a v2i64 and i64 but shuffle fails on that */
- results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
- }
+ LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
- LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
- ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
- results[num_bytes > 16 ? 1 : 0], swizzle, "");
+ for (unsigned j = 0; j < num_elems; j++) {
+ results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
+ }
+ i += num_elems;
}
- return LLVMBuildBitCast(ctx->ac.builder, ret,
- get_def_type(ctx, &instr->dest.ssa), "");
+ return ac_build_gather_values(&ctx->ac, results, num_components);
}
static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
@@ -2371,17 +2378,27 @@ static void visit_image_store(struct ac_nir_context *ctx,
glc = ctx->ac.i1true;
if (dim == GLSL_SAMPLER_DIM_BUF) {
+ char name[48];
+ const char *types[] = { "f32", "v2f32", "v4f32" };
LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
+ unsigned src_channels = ac_get_llvm_num_components(src);
- params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); /* data */
+ if (src_channels == 3)
+ src = ac_build_expand(&ctx->ac, src, 3, 4);
+
+ params[0] = src; /* data */
params[1] = rsrc;
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
ctx->ac.i32_0, ""); /* vindex */
params[3] = ctx->ac.i32_0; /* voffset */
+ snprintf(name, sizeof(name), "%s.%s",
+ "llvm.amdgcn.buffer.store.format",
+ types[CLAMP(src_channels, 1, 3) - 1]);
+
params[4] = glc; /* glc */
params[5] = ctx->ac.i1false; /* slc */
- ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
- params, 6, 0);
+ ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0);
} else {
struct ac_image_args args = {};
args.opcode = ac_image_store;
@@ -2793,7 +2810,7 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
LLVMValueRef src0 = NULL;
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
- int input_index = var->data.location - VARYING_SLOT_VAR0;
+ int input_index = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0];
switch (instr->intrinsic) {
case nir_intrinsic_interp_deref_at_centroid:
location = INTERP_CENTROID;
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 6b9a91c92a9..ee18e6c1923 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -77,6 +77,9 @@ struct ac_shader_abi {
*/
LLVMValueRef *inputs;
+ /* Varying -> attribute number mapping. Also NIR-only */
+ unsigned fs_input_attr_indices[MAX_VARYING];
+
void (*emit_outputs)(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs);
diff --git a/src/amd/vulkan/Android.mk b/src/amd/vulkan/Android.mk
index 51b03561fa7..9574bf54e5a 100644
--- a/src/amd/vulkan/Android.mk
+++ b/src/amd/vulkan/Android.mk
@@ -74,7 +74,8 @@ LOCAL_C_INCLUDES := \
$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util
LOCAL_WHOLE_STATIC_LIBRARIES := \
- libmesa_vulkan_util
+ libmesa_vulkan_util \
+ libmesa_git_sha1
LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.c
LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.h
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 0f1261d4809..cc2aa7fd17a 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -140,7 +140,7 @@ libvulkan_radeon = shared_library(
],
dependencies : [
dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
- dep_valgrind,
+ dep_valgrind, radv_deps,
idep_nir,
],
c_args : [c_vis_args, no_override_init_args, radv_flags],
diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c
index f5d70825dd2..1a4425f26a5 100644
--- a/src/amd/vulkan/radv_android.c
+++ b/src/amd/vulkan/radv_android.c
@@ -110,17 +110,6 @@ radv_image_from_gralloc(VkDevice device_h,
struct radv_bo *bo = NULL;
VkResult result;
- result = radv_image_create(device_h,
- &(struct radv_image_create_info) {
- .vk_info = base_info,
- .scanout = true,
- .no_metadata_planes = true},
- alloc,
- &image_h);
-
- if (result != VK_SUCCESS)
- return result;
-
if (gralloc_info->handle->numFds != 1) {
return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
"VkNativeBufferANDROID::handle::numFds is %d, "
@@ -133,23 +122,14 @@ radv_image_from_gralloc(VkDevice device_h,
*/
int dma_buf = gralloc_info->handle->data[0];
- image = radv_image_from_handle(image_h);
-
VkDeviceMemory memory_h;
- const VkMemoryDedicatedAllocateInfoKHR ded_alloc = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
- .pNext = NULL,
- .buffer = VK_NULL_HANDLE,
- .image = image_h
- };
-
const VkImportMemoryFdInfoKHR import_info = {
.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
- .pNext = &ded_alloc,
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
.fd = dup(dma_buf),
};
+
/* Find the first VRAM memory type, or GART for PRIME images. */
int memory_type_index = -1;
for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
@@ -168,14 +148,49 @@ radv_image_from_gralloc(VkDevice device_h,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &import_info,
- .allocationSize = image->size,
+ /* Max buffer size, unused for imports */
+ .allocationSize = 0x7FFFFFFF,
.memoryTypeIndex = memory_type_index,
},
alloc,
&memory_h);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct radeon_bo_metadata md;
+ device->ws->buffer_get_metadata(radv_device_memory_from_handle(memory_h)->bo, &md);
+
+ bool is_scanout;
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ /* Copied from radeonsi, but is hacky so should be cleaned up. */
+ is_scanout = md.u.gfx9.swizzle_mode == 0 || md.u.gfx9.swizzle_mode % 4 == 2;
+ } else {
+ is_scanout = md.u.legacy.scanout;
+ }
+
+ VkImageCreateInfo updated_base_info = *base_info;
+
+ VkExternalMemoryImageCreateInfo external_memory_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+ .pNext = updated_base_info.pNext,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+
+ updated_base_info.pNext = &external_memory_info;
+
+ result = radv_image_create(device_h,
+ &(struct radv_image_create_info) {
+ .vk_info = &updated_base_info,
+ .scanout = is_scanout,
+ .no_metadata_planes = true},
+ alloc,
+ &image_h);
+
if (result != VK_SUCCESS)
goto fail_create_image;
+ image = radv_image_from_handle(image_h);
+
radv_BindImageMemory(device_h, image_h, memory_h, 0);
image->owned_memory = memory_h;
@@ -185,9 +200,7 @@ radv_image_from_gralloc(VkDevice device_h,
return VK_SUCCESS;
fail_create_image:
-fail_size:
- radv_DestroyImage(device_h, image_h, alloc);
-
+ radv_FreeMemory(device_h, memory_h, alloc);
return result;
}
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c43e12f6d62..4ebb01c6810 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1068,7 +1068,7 @@ static void
radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
struct radv_ds_buffer_info *ds,
struct radv_image *image, VkImageLayout layout,
- bool requires_cond_write)
+ bool requires_cond_exec)
{
uint32_t db_z_info = ds->db_z_info;
uint32_t db_z_info_reg;
@@ -1092,38 +1092,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
}
/* When we don't know the last fast clear value we need to emit a
- * conditional packet, otherwise we can update DB_Z_INFO directly.
+ * conditional packet that will eventually skip the following
+ * SET_CONTEXT_REG packet.
*/
- if (requires_cond_write) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
-
- const uint32_t write_space = 0 << 8; /* register */
- const uint32_t poll_space = 1 << 4; /* memory */
- const uint32_t function = 3 << 0; /* equal to the reference */
- const uint32_t options = write_space | poll_space | function;
- radeon_emit(cmd_buffer->cs, options);
-
- /* poll address - location of the depth clear value */
+ if (requires_cond_exec) {
uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->clear_value_offset;
-
- /* In presence of stencil format, we have to adjust the base
- * address because the first value is the stencil clear value.
- */
- if (vk_format_is_stencil(image->vk_format))
- va += 4;
+ va += image->offset + image->tc_compat_zrange_offset;
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
-
- radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */
- radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */
- radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
- radeon_emit(cmd_buffer->cs, 0u); /* write address high */
- radeon_emit(cmd_buffer->cs, db_z_info);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
}
+
+ radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
}
static void
@@ -1270,6 +1253,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, fui(ds_clear_value.depth));
}
+/**
+ * Update the TC-compat metadata value for this image.
+ */
+static void
+radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ uint32_t value)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->tc_compat_zrange_offset;
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+}
+
+static void
+radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ VkClearDepthStencilValue ds_clear_value)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->tc_compat_zrange_offset;
+ uint32_t cond_val;
+
+ /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
+ * depth clear value is 0.0f.
+ */
+ cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
+
+ radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
+}
+
/**
* Update the clear depth/stencil values for this image.
*/
@@ -1283,6 +1305,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
+ if (radv_image_is_tc_compat_htile(image) &&
+ (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+ radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
+ ds_clear_value);
+ }
+
radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
aspects);
}
@@ -1950,6 +1978,8 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
va = radv_buffer_get_va(buffer->bo) + buffer->offset;
+ va += sb[i].offset;
+
/* Set the descriptor.
*
* On VI, the format must be non-INVALID, otherwise
@@ -3518,8 +3548,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
- /* Index & Vertex buffer don't change context regs, and pipeline is handled later. */
- used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE);
+ /* Index, vertex and streamout buffers don't change context regs, and
+ * pipeline is handled later.
+ */
+ used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
+ RADV_CMD_DIRTY_VERTEX_BUFFER |
+ RADV_CMD_DIRTY_STREAMOUT_BUFFER |
+ RADV_CMD_DIRTY_PIPELINE);
/* Assume all state changes except these two can imply context rolls. */
if (cmd_buffer->state.dirty & used_states)
@@ -4185,6 +4220,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
+
+ if (radv_image_is_tc_compat_htile(image)) {
+ /* Initialize the TC-compat metada value to 0 because by
+ * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
+ * need have to conditionally update its value when performing
+ * a fast depth clear.
+ */
+ radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
+ }
}
static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
@@ -4613,6 +4657,8 @@ void radv_CmdBeginConditionalRenderingEXT(
draw_visible = false;
}
+ si_emit_cache_flush(cmd_buffer);
+
/* Enable predication for this command buffer. */
si_emit_set_predication_state(cmd_buffer, draw_visible, va);
cmd_buffer->state.predicating = true;
@@ -4741,28 +4787,30 @@ void radv_CmdBeginTransformFeedbackEXT(
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t i;
radv_flush_vgt_streamout(cmd_buffer);
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
- for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) {
- if (!(so->enabled_mask & (1 << i)))
- continue;
+ for_each_bit(i, so->enabled_mask) {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
/* SI binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader through
* SGPRs what to do.
*/
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
- radeon_emit(cs, (sb[i].offset + sb[i].size) >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
- if (pCounterBuffers && pCounterBuffers[i]) {
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
/* The array of counter buffers is optional. */
- RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]);
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
uint64_t va = radv_buffer_get_va(buffer->bo);
- va += buffer->offset + pCounterBufferOffsets[i];
+ va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];
/* Append */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
@@ -4783,7 +4831,7 @@ void radv_CmdBeginTransformFeedbackEXT(
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, sb[i].offset >> 2); /* buffer offset in DW */
+ radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
}
}
@@ -4801,20 +4849,22 @@ void radv_CmdEndTransformFeedbackEXT(
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t i;
radv_flush_vgt_streamout(cmd_buffer);
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
- for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) {
- if (!(so->enabled_mask & (1 << i)))
- continue;
+ for_each_bit(i, so->enabled_mask) {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
- if (pCounterBuffers && pCounterBuffers[i]) {
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
/* The array of counters buffer is optional. */
- RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]);
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
uint64_t va = radv_buffer_get_va(buffer->bo);
- va += buffer->offset + pCounterBufferOffsets[i];
+ va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index d68111c25bf..ac6cff23d58 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -936,9 +936,9 @@ void radv_GetPhysicalDeviceProperties(
2048,
2048
},
- .subPixelPrecisionBits = 4 /* FIXME */,
- .subTexelPrecisionBits = 4 /* FIXME */,
- .mipmapPrecisionBits = 4 /* FIXME */,
+ .subPixelPrecisionBits = 8,
+ .subTexelPrecisionBits = 8,
+ .mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = UINT32_MAX,
.maxDrawIndirectCount = UINT32_MAX,
.maxSamplerLodBias = 16,
@@ -1054,16 +1054,14 @@ void radv_GetPhysicalDeviceProperties2(
(VkPhysicalDeviceSubgroupProperties*)ext;
properties->subgroupSize = 64;
properties->supportedStages = VK_SHADER_STAGE_ALL;
- /* TODO: Enable VK_SUBGROUP_FEATURE_VOTE_BIT when wwm
- * is fixed in LLVM.
- */
properties->supportedOperations =
- VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
- VK_SUBGROUP_FEATURE_QUAD_BIT;
+ VK_SUBGROUP_FEATURE_QUAD_BIT |
+ VK_SUBGROUP_FEATURE_VOTE_BIT;
if (pdevice->rad_info.chip_class >= VI) {
properties->supportedOperations |=
+ VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 6bdf988d117..4a28f8bf41c 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -105,7 +105,7 @@ def __init__(self, name, ext_version, enable):
Extension('VK_EXT_external_memory_dma_buf', 1, True),
Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'),
Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
- Extension('VK_EXT_pci_bus_info', 1, True),
+ Extension('VK_EXT_pci_bus_info', 1, False),
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_shader_stencil_export', 1, True),
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 64346aa340f..daabc489afb 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -691,7 +691,7 @@ radv_query_opaque_metadata(struct radv_device *device,
si_make_texture_descriptor(device, image, false,
(VkImageViewType)image->type, image->vk_format,
&fixedmapping, 0, image->info.levels - 1, 0,
- image->info.array_size,
+ image->info.array_size - 1,
image->info.width, image->info.height,
image->info.depth,
desc, NULL);
@@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image)
/* + 8 for storing the clear values */
image->clear_value_offset = image->htile_offset + image->surface.htile_size;
image->size = image->clear_value_offset + 8;
+ if (radv_image_is_tc_compat_htile(image)) {
+ /* Metadata for the TC-compatible HTILE hardware bug which
+ * have to be fixed by updating ZRANGE_PRECISION when doing
+ * fast depth clears to 0.0f.
+ */
+ image->tc_compat_zrange_offset = image->clear_value_offset + 8;
+ image->size = image->clear_value_offset + 16;
+ }
image->alignment = align64(image->alignment, image->surface.htile_alignment);
}
@@ -977,7 +985,7 @@ radv_image_create(VkDevice _device,
image->shareable = vk_find_struct_const(pCreateInfo->pNext,
EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
- if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
+ if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
image->info.surf_index = &device->image_mrt_offset_counter;
}
@@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device,
/* Otherwise, try to enable HTILE for depth surfaces. */
if (radv_image_can_enable_htile(image) &&
!(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
- radv_image_alloc_htile(image);
image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+ radv_image_alloc_htile(image);
} else {
image->surface.htile_size = 0;
}
@@ -1175,8 +1183,6 @@ radv_image_view_init(struct radv_image_view *iview,
if (device->physical_device->rad_info.chip_class >= GFX9 &&
vk_format_is_compressed(image->vk_format) &&
!vk_format_is_compressed(iview->vk_format)) {
- unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
- unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
@@ -1186,8 +1192,8 @@ radv_image_view_init(struct radv_image_view *iview,
lvl_width <<= range->baseMipLevel;
lvl_height <<= range->baseMipLevel;
- iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
- iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
+ iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->surface.u.gfx9.surf_pitch);
+ iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->surface.u.gfx9.surf_height);
}
}
diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c
index 6f074a70b4c..e9d680437e4 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -2061,7 +2061,7 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
if (device->physical_device->rad_info.chip_class >= GFX9 &&
- src->image->type == VK_IMAGE_TYPE_3D)
+ (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index f56eb01dc52..8c21c423511 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2242,6 +2242,8 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (LLVMIsUndef(interp_param))
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
+ if (i >= VARYING_SLOT_VAR0)
+ ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
++index;
} else if (i == VARYING_SLOT_CLIP_DIST0) {
int length = ctx->shader_info->info.ps.num_input_clips_culls;
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index bced19573c1..cc025f55ea3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3396,8 +3396,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
(pipeline->graphics.prim_restart_enable &&
(device->physical_device->rad_info.family < CHIP_POLARIS10 ||
(prim != V_008958_DI_PT_POINTLIST &&
- prim != V_008958_DI_PT_LINESTRIP &&
- prim != V_008958_DI_PT_TRISTRIP))))
+ prim != V_008958_DI_PT_LINESTRIP))))
ia_multi_vgt_param.wd_switch_on_eop = true;
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 7e9e82e3158..585702a88b2 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -595,6 +595,7 @@ struct radv_meta_state {
VkPipelineLayout p_layout;
VkPipeline occlusion_query_pipeline;
VkPipeline pipeline_statistics_query_pipeline;
+ VkPipeline tfb_query_pipeline;
} query;
};
@@ -1497,6 +1498,14 @@ struct radv_image {
uint64_t clear_value_offset;
uint64_t dcc_pred_offset;
+ /*
+ * Metadata for the TC-compat zrange workaround. If the 32-bit value
+ * stored at this offset is UINT_MAX, the driver will emit
+ * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
+ * SET_CONTEXT_REG packet.
+ */
+ uint64_t tc_compat_zrange_offset;
+
/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
VkDeviceMemory owned_memory;
};
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 57ea22fb847..cdff336f8a3 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -512,11 +512,233 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
return b.shader;
}
+static nir_shader *
+build_tfb_query_shader(struct radv_device *device)
+{
+ /* the shader this builds is roughly
+ *
+ * uint32_t src_stride = 32;
+ *
+ * location(binding = 0) buffer dst_buf;
+ * location(binding = 1) buffer src_buf;
+ *
+ * void main() {
+ * uint64_t result[2] = {};
+ * bool available = false;
+ * uint64_t src_offset = src_stride * global_id.x;
+ * uint64_t dst_offset = dst_stride * global_id.x;
+ * uint64_t *src_data = src_buf[src_offset];
+ * uint32_t avail = (src_data[0] >> 32) &
+ * (src_data[1] >> 32) &
+ * (src_data[2] >> 32) &
+ * (src_data[3] >> 32);
+ * if (avail & 0x80000000) {
+ * result[0] = src_data[3] - src_data[1];
+ * result[1] = src_data[2] - src_data[0];
+ * available = true;
+ * }
+ * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8;
+ * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+ * if (flags & VK_QUERY_RESULT_64_BIT) {
+ * dst_buf[dst_offset] = result;
+ * } else {
+ * dst_buf[dst_offset] = (uint32_t)result;
+ * }
+ * }
+ * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ * dst_buf[dst_offset + result_size] = available;
+ * }
+ * }
+ */
+ nir_builder b;
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "tfb_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
+
+ /* Create and initialize local variables. */
+ nir_variable *result =
+ nir_local_variable_create(b.impl,
+ glsl_vector_type(GLSL_TYPE_UINT64, 2),
+ "result");
+ nir_variable *available =
+ nir_local_variable_create(b.impl, glsl_int_type(), "available");
+
+ nir_store_var(&b, result,
+ nir_vec2(&b, nir_imm_int64(&b, 0),
+ nir_imm_int64(&b, 0)), 0x3);
+ nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1);
+
+ nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
+
+ /* Load resources. */
+ nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_vulkan_resource_index);
+ dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+ nir_intrinsic_set_desc_set(dst_buf, 0);
+ nir_intrinsic_set_binding(dst_buf, 0);
+ nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
+ nir_builder_instr_insert(&b, &dst_buf->instr);
+
+ nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_vulkan_resource_index);
+ src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+ nir_intrinsic_set_desc_set(src_buf, 0);
+ nir_intrinsic_set_binding(src_buf, 1);
+ nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
+ nir_builder_instr_insert(&b, &src_buf->instr);
+
+ /* Compute global ID. */
+ nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
+ nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+ nir_ssa_def *block_size = nir_imm_ivec4(&b,
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+ /* Compute src/dst strides. */
+ nir_ssa_def *input_stride = nir_imm_int(&b, 32);
+ nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+ nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride");
+ nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+ /* Load data from the query pool. */
+ nir_intrinsic_instr *load1 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
+ load1->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
+ load1->src[1] = nir_src_for_ssa(input_base);
+ nir_ssa_dest_init(&load1->instr, &load1->dest, 4, 32, NULL);
+ load1->num_components = 4;
+ nir_builder_instr_insert(&b, &load1->instr);
+
+ nir_intrinsic_instr *load2 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
+ load2->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
+ load2->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base, nir_imm_int(&b, 16)));
+ nir_ssa_dest_init(&load2->instr, &load2->dest, 4, 32, NULL);
+ load2->num_components = 4;
+ nir_builder_instr_insert(&b, &load2->instr);
+
+ /* Check if result is available. */
+ nir_ssa_def *avails[2];
+ avails[0] = nir_iand(&b, nir_channel(&b, &load1->dest.ssa, 1),
+ nir_channel(&b, &load1->dest.ssa, 3));
+ avails[1] = nir_iand(&b, nir_channel(&b, &load2->dest.ssa, 1),
+ nir_channel(&b, &load2->dest.ssa, 3));
+ nir_ssa_def *result_is_available =
+ nir_iand(&b, nir_iand(&b, avails[0], avails[1]),
+ nir_imm_int(&b, 0x80000000));
+
+ /* Only compute result if available. */
+ nir_if *available_if = nir_if_create(b.shader);
+ available_if->condition = nir_src_for_ssa(result_is_available);
+ nir_cf_node_insert(b.cursor, &available_if->cf_node);
+
+ b.cursor = nir_after_cf_list(&available_if->then_list);
+
+ /* Pack values. */
+ nir_ssa_def *packed64[4];
+ packed64[0] = nir_pack_64_2x32(&b, nir_vec2(&b,
+ nir_channel(&b, &load1->dest.ssa, 0),
+ nir_channel(&b, &load1->dest.ssa, 1)));
+ packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b,
+ nir_channel(&b, &load1->dest.ssa, 2),
+ nir_channel(&b, &load1->dest.ssa, 3)));
+ packed64[2] = nir_pack_64_2x32(&b, nir_vec2(&b,
+ nir_channel(&b, &load2->dest.ssa, 0),
+ nir_channel(&b, &load2->dest.ssa, 1)));
+ packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b,
+ nir_channel(&b, &load2->dest.ssa, 2),
+ nir_channel(&b, &load2->dest.ssa, 3)));
+
+ /* Compute result. */
+ nir_ssa_def *num_primitive_written =
+ nir_isub(&b, packed64[3], packed64[1]);
+ nir_ssa_def *primitive_storage_needed =
+ nir_isub(&b, packed64[2], packed64[0]);
+
+ nir_store_var(&b, result,
+ nir_vec2(&b, num_primitive_written,
+ primitive_storage_needed), 0x3);
+ nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1);
+
+ b.cursor = nir_after_cf_node(&available_if->cf_node);
+
+ /* Determine if result is 64 or 32 bit. */
+ nir_ssa_def *result_is_64bit =
+ nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
+ nir_ssa_def *result_size =
+ nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16),
+ nir_imm_int(&b, 8));
+
+ /* Store the result if complete or partial results have been requested. */
+ nir_if *store_if = nir_if_create(b.shader);
+ store_if->condition =
+ nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags,
+ nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)),
+ nir_load_var(&b, available)));
+ nir_cf_node_insert(b.cursor, &store_if->cf_node);
+
+ b.cursor = nir_after_cf_list(&store_if->then_list);
+
+ /* Store result. */
+ nir_if *store_64bit_if = nir_if_create(b.shader);
+ store_64bit_if->condition = nir_src_for_ssa(result_is_64bit);
+ nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node);
+
+ b.cursor = nir_after_cf_list(&store_64bit_if->then_list);
+
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+ store->src[0] = nir_src_for_ssa(nir_load_var(&b, result));
+ store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+ store->src[2] = nir_src_for_ssa(output_base);
+ nir_intrinsic_set_write_mask(store, 0x3);
+ store->num_components = 2;
+ nir_builder_instr_insert(&b, &store->instr);
+
+ b.cursor = nir_after_cf_list(&store_64bit_if->else_list);
+
+ store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+ store->src[0] = nir_src_for_ssa(nir_u2u32(&b, nir_load_var(&b, result)));
+ store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+ store->src[2] = nir_src_for_ssa(output_base);
+ nir_intrinsic_set_write_mask(store, 0x3);
+ store->num_components = 2;
+ nir_builder_instr_insert(&b, &store->instr);
+
+ b.cursor = nir_after_cf_node(&store_64bit_if->cf_node);
+
+ b.cursor = nir_after_cf_node(&store_if->cf_node);
+
+ /* Store the availability bit if requested. */
+ nir_if *availability_if = nir_if_create(b.shader);
+ availability_if->condition =
+ nir_src_for_ssa(nir_iand(&b, flags,
+ nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+ nir_cf_node_insert(b.cursor, &availability_if->cf_node);
+
+ b.cursor = nir_after_cf_list(&availability_if->then_list);
+
+ store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+ store->src[0] = nir_src_for_ssa(nir_load_var(&b, available));
+ store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+ store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base));
+ nir_intrinsic_set_write_mask(store, 0x1);
+ store->num_components = 1;
+ nir_builder_instr_insert(&b, &store->instr);
+
+ b.cursor = nir_after_cf_node(&availability_if->cf_node);
+
+ return b.shader;
+}
+
static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device)
{
VkResult result;
struct radv_shader_module occlusion_cs = { .nir = NULL };
struct radv_shader_module pipeline_statistics_cs = { .nir = NULL };
+ struct radv_shader_module tfb_cs = { .nir = NULL };
mtx_lock(&device->meta_state.mtx);
if (device->meta_state.query.pipeline_statistics_query_pipeline) {
@@ -525,6 +747,7 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d
}
occlusion_cs.nir = build_occlusion_query_shader(device);
pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device);
+ tfb_cs.nir = build_tfb_query_shader(device);
VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -611,12 +834,34 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &pipeline_statistics_vk_pipeline_info, NULL,
&device->meta_state.query.pipeline_statistics_query_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+ VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&tfb_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo tfb_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = tfb_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.query.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &tfb_pipeline_info, NULL,
+ &device->meta_state.query.tfb_query_pipeline);
fail:
if (result != VK_SUCCESS)
radv_device_finish_meta_query_state(device);
ralloc_free(occlusion_cs.nir);
ralloc_free(pipeline_statistics_cs.nir);
+ ralloc_free(tfb_cs.nir);
mtx_unlock(&device->meta_state.mtx);
return result;
}
@@ -631,6 +876,11 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_d
void radv_device_finish_meta_query_state(struct radv_device *device)
{
+ if (device->meta_state.query.tfb_query_pipeline)
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ device->meta_state.query.tfb_query_pipeline,
+ &device->meta_state.alloc);
+
if (device->meta_state.query.pipeline_statistics_query_pipeline)
radv_DestroyPipeline(radv_device_to_handle(device),
device->meta_state.query.pipeline_statistics_query_pipeline,
@@ -663,6 +913,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
+ bool old_predicating;
if (!*pipeline) {
VkResult ret = radv_device_init_meta_query_state_internal(device);
@@ -677,6 +928,12 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
RADV_META_SAVE_CONSTANTS |
RADV_META_SAVE_DESCRIPTORS);
+ /* VK_EXT_conditional_rendering says that copy commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
struct radv_buffer dst_buffer = {
.bo = dst_bo,
.offset = dst_offset,
@@ -758,6 +1015,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
radv_meta_restore(&saved_state, cmd_buffer);
}
@@ -1082,10 +1341,13 @@ void radv_CmdCopyQueryPoolResults(
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ /* Wait on the high 32 bits of the timestamp in
+ * case the low part is 0xffffffff.
+ */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, local_src_va);
- radeon_emit(cs, local_src_va >> 32);
+ radeon_emit(cs, local_src_va + 4);
+ radeon_emit(cs, (local_src_va + 4) >> 32);
radeon_emit(cs, TIMESTAMP_NOT_READY >> 32);
radeon_emit(cs, 0xffffffff);
radeon_emit(cs, 4);
@@ -1115,6 +1377,33 @@ void radv_CmdCopyQueryPoolResults(
assert(cs->cdw <= cdw_max);
}
break;
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ for(unsigned i = 0; i < queryCount; i++) {
+ unsigned query = firstQuery + i;
+ uint64_t src_va = va + query * pool->stride;
+
+ /* Wait on the upper word of all results. */
+ for (unsigned j = 0; j < 4; j++, src_va += 8) {
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL |
+ WAIT_REG_MEM_MEM_SPACE(1));
+ radeon_emit(cs, (src_va + 4));
+ radeon_emit(cs, (src_va + 4) >> 32);
+ radeon_emit(cs, 0x80000000); /* reference value */
+ radeon_emit(cs, 0xffffffff); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+ }
+ }
+ }
+
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
+ pool->bo, dst_buffer->bo,
+ firstQuery * pool->stride,
+ dst_buffer->offset + dstOffset,
+ pool->stride, stride,
+ queryCount, flags, 0, 0);
+ break;
default:
unreachable("trying to get results of unhandled query type");
}
@@ -1161,6 +1450,22 @@ static unsigned event_type_for_stream(unsigned stream)
}
}
+static void emit_query_flush(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_query_pool *pool)
+{
+ if (cmd_buffer->pending_reset_query) {
+ if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
+ /* Only need to flush caches if the query pool size is
+ * large enough to be resetted using the compute shader
+ * path. Small pools don't need any cache flushes
+ * because we use a CP dma clear.
+ */
+ si_emit_cache_flush(cmd_buffer);
+ cmd_buffer->pending_reset_query = false;
+ }
+ }
+}
+
static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
uint64_t va,
VkQueryType query_type,
@@ -1307,17 +1612,7 @@ void radv_CmdBeginQueryIndexedEXT(
radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
- if (cmd_buffer->pending_reset_query) {
- if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
- /* Only need to flush caches if the query pool size is
- * large enough to be resetted using the compute shader
- * path. Small pools don't need any cache flushes
- * because we use a CP dma clear.
- */
- si_emit_cache_flush(cmd_buffer);
- cmd_buffer->pending_reset_query = false;
- }
- }
+ emit_query_flush(cmd_buffer, pool);
va += pool->stride * query;
@@ -1394,6 +1689,8 @@ void radv_CmdWriteTimestamp(
radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
+ emit_query_flush(cmd_buffer, pool);
+
int num_queries = 1;
if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 7977d46229e..e9d541ab150 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -223,6 +223,8 @@ struct radeon_winsys {
void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
+ void (*buffer_get_metadata)(struct radeon_winsys_bo *bo,
+ struct radeon_bo_metadata *md);
void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
uint64_t offset, uint64_t size,
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 25764d93f6a..ec126bfc7cb 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -304,8 +304,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
return NULL;
}
+ unsigned virt_alignment = alignment;
+ if (size >= ws->info.pte_fragment_size)
+ virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
+
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- size, alignment, 0, &va, &va_handle,
+ size, virt_alignment, 0, &va, &va_handle,
(flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
AMDGPU_VA_RANGE_HIGH);
if (r)
@@ -536,6 +540,21 @@ radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
return true;
}
+static unsigned eg_tile_split(unsigned tile_split)
+{
+ switch (tile_split) {
+ case 0: tile_split = 64; break;
+ case 1: tile_split = 128; break;
+ case 2: tile_split = 256; break;
+ case 3: tile_split = 512; break;
+ default:
+ case 4: tile_split = 1024; break;
+ case 5: tile_split = 2048; break;
+ case 6: tile_split = 4096; break;
+ }
+ return tile_split;
+}
+
static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
{
switch (eg_tile_split) {
@@ -589,6 +608,43 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
amdgpu_bo_set_metadata(bo->bo, &metadata);
}
+static void
+radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
+ struct radeon_bo_metadata *md)
+{
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ struct amdgpu_bo_info info = {0};
+
+ int r = amdgpu_bo_query_info(bo->bo, &info);
+ if (r)
+ return;
+
+ uint64_t tiling_flags = info.metadata.tiling_info;
+
+ if (bo->ws->info.chip_class >= GFX9) {
+ md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
+ } else {
+ md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
+ md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
+ md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
+ else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
+ md->u.legacy.microtile = RADEON_LAYOUT_TILED;
+
+ md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+ md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
+ md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+ md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
+ }
+
+ md->size_metadata = info.metadata.size_metadata;
+ memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
+}
+
void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
{
ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
@@ -599,5 +655,6 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
+ ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
}
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 4f3b621fd29..54483195952 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -392,6 +392,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
for (int i = 0; i < ARRAY_SIZE(state->last_r); i++)
add_write_dep(state, &state->last_r[i], n);
add_write_dep(state, &state->last_sf, n);
+ add_write_dep(state, &state->last_rtop, n);
/* Scoreboard-locking operations have to stay after the last
* thread switch.
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
index b5a7b841ef6..4baadce294c 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -364,6 +364,7 @@ v3d_dump_qpu(struct v3d_compile *c)
for (int i = 0; i < c->qpu_inst_count; i++) {
const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
+ ralloc_free((void *)str);
}
fprintf(stderr, "\n");
}
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index 0846cc86174..147017a6594 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -551,6 +551,7 @@ bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
{
return (waddr == V3D_QPU_WADDR_SYNC ||
+ waddr == V3D_QPU_WADDR_SYNCB ||
waddr == V3D_QPU_WADDR_SYNCU);
}
diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk
index 0aabafa2673..37b3cb80251 100644
--- a/src/compiler/Android.glsl.mk
+++ b/src/compiler/Android.glsl.mk
@@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \
libmesa_nir
LOCAL_MODULE := libmesa_glsl
-
+LOCAL_CFLAGS += -Wno-error
include $(LOCAL_PATH)/Android.glsl.gen.mk
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk
index 75a247a245d..59da5dbdc1c 100644
--- a/src/compiler/Android.nir.mk
+++ b/src/compiler/Android.nir.mk
@@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
+LOCAL_CFLAGS := \
+ -Wno-missing-braces
+
LOCAL_STATIC_LIBRARIES := libmesa_compiler
LOCAL_MODULE := libmesa_nir
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 084b7021a9f..f4bd8c17db3 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -892,7 +892,8 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
}
if (unsized_array) {
if (is_initializer) {
- return rhs;
+ if (rhs->type->get_scalar_type() == lhs->type->get_scalar_type())
+ return rhs;
} else {
_mesa_glsl_error(&loc, state,
"implicitly sized arrays cannot be assigned");
@@ -7422,7 +7423,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
if (member_align == 0 ||
member_align & (member_align - 1)) {
_mesa_glsl_error(&loc, state, "align layout qualifier "
- "in not a power of 2");
+ "is not a power of 2");
} else {
fields[i].offset = glsl_align(offset, member_align);
next_offset = glsl_align(fields[i].offset + size, align);
diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp
index 5650365d1d5..b6018806865 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -525,12 +525,6 @@ supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state *state)
return state->NV_fragment_shader_interlock_enable;
}
-static bool
-supports_intel_fragment_shader_ordering(const _mesa_glsl_parse_state *state)
-{
- return state->INTEL_fragment_shader_ordering_enable;
-}
-
static bool
shader_clock(const _mesa_glsl_parse_state *state)
{
@@ -1311,11 +1305,6 @@ builtin_builder::create_intrinsics()
supports_arb_fragment_shader_interlock,
ir_intrinsic_end_invocation_interlock), NULL);
- add_function("__intrinsic_begin_fragment_shader_ordering",
- _invocation_interlock_intrinsic(
- supports_intel_fragment_shader_ordering,
- ir_intrinsic_begin_fragment_shader_ordering), NULL);
-
add_function("__intrinsic_shader_clock",
_shader_clock_intrinsic(shader_clock,
glsl_type::uvec2_type),
@@ -3430,12 +3419,6 @@ builtin_builder::create_builtins()
supports_nv_fragment_shader_interlock),
NULL);
- add_function("beginFragmentShaderOrderingINTEL",
- _invocation_interlock(
- "__intrinsic_begin_fragment_shader_ordering",
- supports_intel_fragment_shader_ordering),
- NULL);
-
add_function("anyInvocationARB",
_vote("__intrinsic_vote_any", vote),
NULL);
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index 1c095cb66f9..c951d9526ac 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -224,10 +224,12 @@ expanded_line:
glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
_glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value);
}
-| LINE_EXPANDED integer_constant NEWLINE {
+| LINE_EXPANDED expression NEWLINE {
+ if (parser->is_gles && $2.undefined_macro)
+ glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
parser->has_new_line_number = 1;
- parser->new_line_number = $2;
- _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2);
+ parser->new_line_number = $2.value;
+ _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value);
}
| LINE_EXPANDED integer_constant integer_constant NEWLINE {
parser->has_new_line_number = 1;
@@ -238,6 +240,17 @@ expanded_line:
"#line %" PRIiMAX " %" PRIiMAX "\n",
$2, $3);
}
+| LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE {
+ if (parser->is_gles && $3.undefined_macro)
+ glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro);
+ if (parser->is_gles && $6.undefined_macro)
+ glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro);
+ parser->has_new_line_number = 1;
+ parser->new_line_number = $3.value;
+ parser->has_new_source_number = 1;
+ parser->new_source_number = $6.value;
+ _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value);
+ }
;
define:
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index 1bdd7c4bf17..efd1a013dbd 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -727,7 +727,6 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT_AEP(EXT_texture_buffer),
EXT_AEP(EXT_texture_cube_map_array),
EXT(INTEL_conservative_rasterization),
- EXT(INTEL_fragment_shader_ordering),
EXT(INTEL_shader_atomic_float_minmax),
EXT(MESA_shader_integer_functions),
EXT(NV_fragment_shader_interlock),
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index 966d848509c..69aa6cf9cf3 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -812,8 +812,6 @@ struct _mesa_glsl_parse_state {
bool EXT_texture_cube_map_array_warn;
bool INTEL_conservative_rasterization_enable;
bool INTEL_conservative_rasterization_warn;
- bool INTEL_fragment_shader_ordering_enable;
- bool INTEL_fragment_shader_ordering_warn;
bool INTEL_shader_atomic_float_minmax_enable;
bool INTEL_shader_atomic_float_minmax_warn;
bool MESA_shader_integer_functions_enable;
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index 0479f8fcfe4..0956d2f6303 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -742,9 +742,6 @@ nir_visitor::visit(ir_call *ir)
case ir_intrinsic_end_invocation_interlock:
op = nir_intrinsic_end_invocation_interlock;
break;
- case ir_intrinsic_begin_fragment_shader_ordering:
- op = nir_intrinsic_begin_fragment_shader_ordering;
- break;
case ir_intrinsic_group_memory_barrier:
op = nir_intrinsic_group_memory_barrier;
break;
@@ -983,9 +980,6 @@ nir_visitor::visit(ir_call *ir)
case nir_intrinsic_end_invocation_interlock:
nir_builder_instr_insert(&b, &instr->instr);
break;
- case nir_intrinsic_begin_fragment_shader_ordering:
- nir_builder_instr_insert(&b, &instr->instr);
- break;
case nir_intrinsic_store_ssbo: {
exec_node *param = ir->actual_parameters.get_head();
ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index f478b29a6b5..d05d1998a50 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -1122,7 +1122,6 @@ enum ir_intrinsic_id {
ir_intrinsic_memory_barrier_shared,
ir_intrinsic_begin_invocation_interlock,
ir_intrinsic_end_invocation_interlock,
- ir_intrinsic_begin_fragment_shader_ordering,
ir_intrinsic_vote_all,
ir_intrinsic_vote_any,
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 52e493cb599..3969c0120b3 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -481,9 +481,10 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
/* Component aliasing is not alloed */
if (comp >= component && comp < last_comp) {
linker_error(prog,
- "%s shader has multiple outputs explicitly "
+ "%s shader has multiple %sputs explicitly "
"assigned to location %d and component %d\n",
_mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ? "in" : "out",
location, comp);
return false;
} else {
@@ -502,10 +503,12 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
if (info->interpolation != interpolation) {
linker_error(prog,
- "%s shader has multiple outputs at explicit "
+ "%s shader has multiple %sputs at explicit "
"location %u with different interpolation "
"settings\n",
- _mesa_shader_stage_to_string(stage), location);
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ?
+ "in" : "out", location);
return false;
}
@@ -513,9 +516,11 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
info->sample != sample ||
info->patch != patch) {
linker_error(prog,
- "%s shader has multiple outputs at explicit "
+ "%s shader has multiple %sputs at explicit "
"location %u with different aux storage\n",
- _mesa_shader_stage_to_string(stage), location);
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ?
+ "in" : "out", location);
return false;
}
}
diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp
index 267700e7e78..26d8ec4b75b 100644
--- a/src/compiler/glsl/serialize.cpp
+++ b/src/compiler/glsl/serialize.cpp
@@ -360,13 +360,20 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
if (xfb_stage == ~0u)
return;
+ if (shProg->TransformFeedback.VaryingNames) {
+ for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; ++i)
+ free(shProg->TransformFeedback.VaryingNames[i]);
+ }
+
/* Data set by glTransformFeedbackVaryings. */
shProg->TransformFeedback.BufferMode = blob_read_uint32(metadata);
blob_copy_bytes(metadata, &shProg->TransformFeedback.BufferStride,
sizeof(shProg->TransformFeedback.BufferStride));
shProg->TransformFeedback.NumVarying = blob_read_uint32(metadata);
+
shProg->TransformFeedback.VaryingNames = (char **)
- malloc(shProg->TransformFeedback.NumVarying * sizeof(GLchar *));
+ realloc(shProg->TransformFeedback.VaryingNames,
+ shProg->TransformFeedback.NumVarying * sizeof(GLchar *));
/* Note, malloc used with VaryingNames. */
for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++)
shProg->TransformFeedback.VaryingNames[i] =
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index ec3049ca06d..910f9c336f8 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -199,7 +199,6 @@ def barrier(name):
barrier("memory_barrier_shared")
barrier("begin_invocation_interlock")
barrier("end_invocation_interlock")
-barrier("begin_fragment_shader_ordering")
# A conditional discard, with a single boolean source.
intrinsic("discard_if", src_comp=[1])
diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c
index de6f2481def..3845ed66b49 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -195,9 +195,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
}
static uint8_t
-get_interp_type(nir_variable *var, bool default_to_smooth_interp)
+get_interp_type(nir_variable *var, const struct glsl_type *type,
+ bool default_to_smooth_interp)
{
- if (var->data.interpolation != INTERP_MODE_NONE)
+ if (glsl_type_is_integer(type))
+ return INTERP_MODE_FLAT;
+ else if (var->data.interpolation != INTERP_MODE_NONE)
return var->data.interpolation;
else if (default_to_smooth_interp)
return INTERP_MODE_SMOOTH;
@@ -252,7 +255,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list,
unsigned comps_slot2 = 0;
for (unsigned i = 0; i < slots; i++) {
interp_type[location + i] =
- get_interp_type(var, default_to_smooth_interp);
+ get_interp_type(var, type, default_to_smooth_interp);
interp_loc[location + i] = get_interp_loc(var);
if (dual_slot) {
@@ -424,7 +427,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
continue;
bool found_new_offset = false;
- uint8_t interp = get_interp_type(var, default_to_smooth_interp);
+ uint8_t interp = get_interp_type(var, type, default_to_smooth_interp);
for (; cursor[interp] < 32; cursor[interp]++) {
uint8_t cursor_used_comps = comps[cursor[interp]];
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 0be3aba9456..7ef032cd164 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -194,6 +194,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
}
case nir_op_unpack_64_2x32:
+ case nir_op_unpack_32_2x16:
return false;
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c
index 897a0620872..40b90e6a313 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -205,9 +205,6 @@ nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes)
{
bool progress = false;
- if (modes == 0)
- return false;
-
nir_foreach_function(function, shader) {
if (function->impl)
progress = lower_indirects_impl(function->impl, modes) || progress;
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 5929a60aee8..be91a2a8fd6 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -64,9 +64,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
return false;
if (bit_size == 0 &&
- !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) {
+ !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_types[i]))
bit_size = instr->src[i].src.ssa->bit_size;
- }
nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c
index 7a21ad56c79..594c4ddd0c2 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -265,7 +265,7 @@ lookup_entry_and_kill_aliases(struct util_dynarray *copies,
{
/* TODO: Take into account the write_mask. */
- struct copy_entry *entry = NULL;
+ nir_deref_instr *dst_match = NULL;
util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
if (!iter->src.is_ssa) {
/* If this write aliases the source of some entry, get rid of it */
@@ -278,13 +278,26 @@ lookup_entry_and_kill_aliases(struct util_dynarray *copies,
nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref);
if (comp & nir_derefs_equal_bit) {
- assert(entry == NULL);
- entry = iter;
+ /* Removing entries invalidate previous iter pointers, so we'll
+ * collect the matching entry later. Just make sure it is unique.
+ */
+ assert(!dst_match);
+ dst_match = iter->dst;
} else if (comp & nir_derefs_may_alias_bit) {
copy_entry_remove(copies, iter);
}
}
+ struct copy_entry *entry = NULL;
+ if (dst_match) {
+ util_dynarray_foreach(copies, struct copy_entry, iter) {
+ if (iter->dst == dst_match) {
+ entry = iter;
+ break;
+ }
+ }
+ assert(entry);
+ }
return entry;
}
@@ -337,6 +350,9 @@ store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
const struct value *value, unsigned write_mask)
{
if (value->is_ssa) {
+ /* Clear src if it was being used as non-SSA. */
+ if (!entry->src.is_ssa)
+ memset(entry->src.ssa, 0, sizeof(entry->src.ssa));
entry->src.is_ssa = true;
/* Only overwrite the written components */
for (unsigned i = 0; i < 4; i++) {
@@ -705,9 +721,9 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
struct value value;
if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
+ /* If load works, intrin (the copy_deref) is removed. */
if (value.is_ssa) {
nir_store_deref(b, dst, value.ssa[0], 0xf);
- intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b));
} else {
/* If this would be a no-op self-copy, don't bother. */
if (nir_compare_derefs(value.deref, dst) & nir_derefs_equal_bit)
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 1fe95e53766..8a971c43f24 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -391,6 +391,34 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
}
}
+static nir_ssa_def *
+clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
+ nir_ssa_def **src_defs)
+{
+ nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
+ nalu->exact = alu->exact;
+
+ nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest,
+ alu->dest.dest.ssa.num_components,
+ alu->dest.dest.ssa.bit_size, alu->dest.dest.ssa.name);
+
+ nalu->dest.saturate = alu->dest.saturate;
+ nalu->dest.write_mask = alu->dest.write_mask;
+
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ assert(alu->src[i].src.is_ssa);
+ nalu->src[i].src = nir_src_for_ssa(src_defs[i]);
+ nalu->src[i].negate = alu->src[i].negate;
+ nalu->src[i].abs = alu->src[i].abs;
+ memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+ sizeof(nalu->src[i].swizzle));
+ }
+
+ nir_builder_instr_insert(b, &nalu->instr);
+
+ return &nalu->dest.dest.ssa;;
+}
+
/*
* This propagates if condition evaluation down the chain of some alu
* instructions. For example by checking the use of some of the following alu
@@ -448,7 +476,7 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
if (!evaluate_if_condition(nif, b->cursor, &bool_value))
return false;
- nir_ssa_def *def[2] = {0};
+ nir_ssa_def *def[4] = {0};
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if (alu->src[i].src.ssa == use_src->ssa) {
def[i] = nir_imm_bool(b, bool_value);
@@ -456,7 +484,8 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
def[i] = alu->src[i].src.ssa;
}
}
- nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
+
+ nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def);
/* Rewrite use to use new alu instruction */
nir_src new_src = nir_src_for_ssa(nalu);
@@ -472,14 +501,21 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
static bool
can_propagate_through_alu(nir_src *src)
{
- if (src->parent_instr->type == nir_instr_type_alu &&
- (nir_instr_as_alu(src->parent_instr)->op == nir_op_ior ||
- nir_instr_as_alu(src->parent_instr)->op == nir_op_iand ||
- nir_instr_as_alu(src->parent_instr)->op == nir_op_inot ||
- nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i))
- return true;
+ if (src->parent_instr->type != nir_instr_type_alu)
+ return false;
- return false;
+ nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
+ switch (alu->op) {
+ case nir_op_ior:
+ case nir_op_iand:
+ case nir_op_inot:
+ case nir_op_b2i:
+ return true;
+ case nir_op_bcsel:
+ return src == &alu->src[0].src;
+ default:
+ return false;
+ }
}
static bool
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index d24f0941519..3cd61f66056 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -301,6 +301,11 @@ glsl_type_is_boolean(const struct glsl_type *type)
{
return type->is_boolean();
}
+bool
+glsl_type_is_integer(const struct glsl_type *type)
+{
+ return type->is_integer();
+}
const glsl_type *
glsl_void_type(void)
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 77454fa9fab..70d593b96ab 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -142,6 +142,7 @@ bool glsl_type_is_image(const struct glsl_type *type);
bool glsl_type_is_dual_slot(const struct glsl_type *type);
bool glsl_type_is_numeric(const struct glsl_type *type);
bool glsl_type_is_boolean(const struct glsl_type *type);
+bool glsl_type_is_integer(const struct glsl_type *type);
bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
bool glsl_sampler_type_is_array(const struct glsl_type *type);
bool glsl_contains_atomic(const struct glsl_type *type);
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 96ff09c3659..16d9c92046e 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1811,6 +1811,26 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
src[j] = src_val->constant->values[0];
}
+ /* fix up fixed size sources */
+ switch (op) {
+ case nir_op_ishl:
+ case nir_op_ishr:
+ case nir_op_ushr: {
+ if (bit_size == 32)
+ break;
+ for (unsigned i = 0; i < num_components; ++i) {
+ switch (bit_size) {
+ case 64: src[1].u32[i] = src[1].u64[i]; break;
+ case 16: src[1].u32[i] = src[1].u16[i]; break;
+ case 8: src[1].u32[i] = src[1].u8[i]; break;
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
val->constant->values[0] =
nir_eval_const_opcode(op, num_components, bit_size, src);
break;
@@ -2874,13 +2894,19 @@ vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
return &vec->dest.dest.ssa;
}
+static nir_ssa_def *
+nir_ieq_imm(nir_builder *b, nir_ssa_def *x, uint64_t i)
+{
+ return nir_ieq(b, x, nir_imm_intN_t(b, i, x->bit_size));
+}
+
nir_ssa_def *
vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
nir_ssa_def *index)
{
nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
for (unsigned i = 1; i < src->num_components; i++)
- dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+ dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i),
vtn_vector_extract(b, src, i), dest);
return dest;
@@ -2892,7 +2918,7 @@ vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
{
nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0);
for (unsigned i = 1; i < src->num_components; i++)
- dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)),
+ dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i),
vtn_vector_insert(b, src, insert, i), dest);
return dest;
diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index 6860e7dc090..a23f8c29b5c 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -696,6 +696,17 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
src[1] = tmp;
}
+ switch (op) {
+ case nir_op_ishl:
+ case nir_op_ishr:
+ case nir_op_ushr:
+ if (src[1]->bit_size != 32)
+ src[1] = nir_u2u32(&b->nb, src[1]);
+ break;
+ default:
+ break;
+ }
+
val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
break;
} /* default */
diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 726f717e8d5..6406f4911df 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -47,6 +47,7 @@ vtn_type_count_function_params(struct vtn_type *type)
{
switch (type->base_type) {
case vtn_base_type_array:
+ case vtn_base_type_matrix:
return type->length * vtn_type_count_function_params(type->array_element);
case vtn_base_type_struct: {
@@ -76,6 +77,7 @@ vtn_type_add_to_function_params(struct vtn_type *type,
switch (type->base_type) {
case vtn_base_type_array:
+ case vtn_base_type_matrix:
for (unsigned i = 0; i < type->length; i++)
vtn_type_add_to_function_params(type->array_element, func, param_idx);
break;
@@ -123,6 +125,7 @@ vtn_ssa_value_add_to_call_params(struct vtn_builder *b,
{
switch (type->base_type) {
case vtn_base_type_array:
+ case vtn_base_type_matrix:
for (unsigned i = 0; i < type->length; i++) {
vtn_ssa_value_add_to_call_params(b, value->elems[i],
type->array_element,
@@ -152,6 +155,7 @@ vtn_ssa_value_load_function_param(struct vtn_builder *b,
{
switch (type->base_type) {
case vtn_base_type_array:
+ case vtn_base_type_matrix:
for (unsigned i = 0; i < type->length; i++) {
vtn_ssa_value_load_function_param(b, value->elems[i],
type->array_element, param_idx);
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index 06a49e48e3f..0d8100384d6 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -807,10 +807,9 @@ handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode,
if (vec_array_deref) {
assert(vec_deref);
- nir_const_value *const_index = nir_src_as_const_value(vec_deref->arr.index);
- if (const_index) {
+ if (nir_src_is_const(vec_deref->arr.index)) {
val->ssa->def = vtn_vector_extract(b, &intrin->dest.ssa,
- const_index->u32[0]);
+ nir_src_as_uint(vec_deref->arr.index));
} else {
val->ssa->def = vtn_vector_extract_dynamic(b, &intrin->dest.ssa,
vec_deref->arr.index.ssa);
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index c5cf345d02a..0eb9f263436 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -132,12 +132,12 @@ vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
} else if (stride == 1) {
nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def;
if (ssa->bit_size != 32)
- ssa = nir_u2u32(&b->nb, ssa);
+ ssa = nir_i2i32(&b->nb, ssa);
return ssa;
} else {
nir_ssa_def *src0 = vtn_ssa_value(b, link.id)->def;
if (src0->bit_size != 32)
- src0 = nir_u2u32(&b->nb, src0);
+ src0 = nir_i2i32(&b->nb, src0);
return nir_imul(&b->nb, src0, nir_imm_int(&b->nb, stride));
}
}
@@ -512,9 +512,9 @@ vtn_local_load(struct vtn_builder *b, nir_deref_instr *src)
if (src_tail != src) {
val->type = src->type;
- nir_const_value *const_index = nir_src_as_const_value(src->arr.index);
- if (const_index)
- val->def = vtn_vector_extract(b, val->def, const_index->u32[0]);
+ if (nir_src_is_const(src->arr.index))
+ val->def = vtn_vector_extract(b, val->def,
+ nir_src_as_uint(src->arr.index));
else
val->def = vtn_vector_extract_dynamic(b, val->def, src->arr.index.ssa);
}
@@ -532,10 +532,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
_vtn_local_load_store(b, true, dest_tail, val);
- nir_const_value *const_index = nir_src_as_const_value(dest->arr.index);
- if (const_index)
+ if (nir_src_is_const(dest->arr.index))
val->def = vtn_vector_insert(b, val->def, src->def,
- const_index->u32[0]);
+ nir_src_as_uint(dest->arr.index));
else
val->def = vtn_vector_insert_dynamic(b, val->def, src->def,
dest->arr.index.ssa);
diff --git a/src/egl/Android.mk b/src/egl/Android.mk
index 42b391e6d86..bbc7df2aff8 100644
--- a/src/egl/Android.mk
+++ b/src/egl/Android.mk
@@ -45,7 +45,10 @@ LOCAL_CFLAGS := \
LOCAL_C_INCLUDES := \
$(MESA_TOP)/include/drm-uapi \
$(MESA_TOP)/src/egl/main \
- $(MESA_TOP)/src/egl/drivers/dri2
+ $(MESA_TOP)/src/egl/drivers/dri2 \
+ frameworks/native/libs/nativebase/include \
+ frameworks/native/libs/nativewindow/include \
+ frameworks/native/libs/arect/include
LOCAL_STATIC_LIBRARIES := \
libmesa_util \
@@ -64,6 +67,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true)
LOCAL_SHARED_LIBRARIES += libgralloc_drm
endif
+ifeq ($(strip $(BOARD_USES_GRALLOC1)),true)
+LOCAL_CFLAGS += -DHAVE_GRALLOC1
+endif
+
ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),)
LOCAL_SHARED_LIBRARIES += libnativewindow
endif
@@ -79,8 +86,12 @@ ifneq ($(MESA_BUILD_GALLIUM),)
LOCAL_REQUIRED_MODULES += gallium_dri
endif
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_HEADER_LIBRARIES += libnativebase_headers
+endif
+
LOCAL_MODULE := libGLES_mesa
LOCAL_MODULE_RELATIVE_PATH := egl
-
+LOCAL_CFLAGS += -Wno-error
include $(MESA_COMMON_MK)
include $(BUILD_SHARED_LIBRARY)
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 87e1a704c6e..81d4ea456b3 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -65,6 +65,38 @@
#include "util/u_vector.h"
#include "mapi/glapi/glapi.h"
+/* The kernel header drm_fourcc.h defines the DRM formats below. We duplicate
+ * some of the definitions here so that building Mesa won't bleeding-edge
+ * kernel headers.
+ */
+#ifndef DRM_FORMAT_R8
+#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
+#endif
+
+#ifndef DRM_FORMAT_RG88
+#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */
+#endif
+
+#ifndef DRM_FORMAT_GR88
+#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */
+#endif
+
+#ifndef DRM_FORMAT_R16
+#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R 16 little endian */
+#endif
+
+#ifndef DRM_FORMAT_GR1616
+#define DRM_FORMAT_GR1616 fourcc_code('G', 'R', '3', '2') /* [31:0] R:G 16:16 little endian */
+#endif
+
+#ifndef DRM_FORMAT_P010
+#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cb:Cr plane 10 bits per channel */
+#endif
+
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1)
+#endif
+
#define NUM_ATTRIBS 12
static void
@@ -673,7 +705,7 @@ dri2_setup_screen(_EGLDisplay *disp)
dri2_renderer_query_integer(dri2_dpy,
__DRI2_RENDERER_HAS_CONTEXT_PRIORITY);
- disp->Extensions.EXT_pixel_format_float = EGL_TRUE;
+ disp->Extensions.EXT_pixel_format_float = EGL_FALSE;
if (dri2_renderer_query_integer(dri2_dpy,
__DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
@@ -2284,6 +2316,7 @@ dri2_num_fourcc_format_planes(EGLint format)
case DRM_FORMAT_NV21:
case DRM_FORMAT_NV16:
case DRM_FORMAT_NV61:
+ case DRM_FORMAT_P010:
return 2;
case DRM_FORMAT_YUV410:
@@ -2309,7 +2342,7 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
{
unsigned plane_n = dri2_num_fourcc_format_planes(attrs->DMABufFourCC.Value);
if (plane_n == 0) {
- _eglError(EGL_BAD_ATTRIBUTE, "invalid format");
+ _eglError(EGL_BAD_MATCH, "unknown drm fourcc format");
return 0;
}
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 4abe1ba1952..3e5a567472c 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1;
#include
#endif /* HAVE_ANDROID_PLATFORM */
+#ifdef HAVE_GRALLOC1
+#include
+#endif
+
#include "eglconfig.h"
#include "eglcontext.h"
#include "egldevice.h"
@@ -237,7 +241,14 @@ struct dri2_egl_display
#endif
#ifdef HAVE_ANDROID_PLATFORM
- const gralloc_module_t *gralloc;
+ const hw_module_t *gralloc;
+ uint16_t gralloc_version;
+#ifdef HAVE_GRALLOC1
+ gralloc1_device_t *gralloc1_dvc;
+ GRALLOC1_PFN_LOCK_FLEX pfn_lockflex;
+ GRALLOC1_PFN_GET_FORMAT pfn_getFormat;
+ GRALLOC1_PFN_UNLOCK pfn_unlock;
+#endif
#endif
bool is_render_node;
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index 1e93ab4d4d2..0c79fe9b5e0 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -49,6 +49,8 @@
#define ALIGN(val, align) (((val) + (align) - 1) & ~((align) - 1))
+#define GRALLOC_DRM_GET_FORMAT 1
+
struct droid_yuv_format {
/* Lookup keys */
int native; /* HAL_PIXEL_FORMAT_ */
@@ -59,14 +61,26 @@ struct droid_yuv_format {
int fourcc; /* __DRI_IMAGE_FOURCC_ */
};
+/* This enumeration can be deleted if Android defined it in
+ * system/core/include/system/graphics.h
+ */
+enum {
+ HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100,
+ HAL_PIXEL_FORMAT_NV12 = 0x10F,
+ HAL_PIXEL_FORMAT_P010_INTEL = 0x110
+};
+
/* The following table is used to look up a DRI image FourCC based
* on native format and information contained in android_ycbcr struct. */
static const struct droid_yuv_format droid_yuv_formats[] = {
/* Native format, YCrCb, Chroma step, DRI image FourCC */
{ HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 2, __DRI_IMAGE_FOURCC_NV12 },
+ { HAL_PIXEL_FORMAT_P010_INTEL, 0, 4, __DRI_IMAGE_FOURCC_P010 },
{ HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 1, __DRI_IMAGE_FOURCC_YUV420 },
{ HAL_PIXEL_FORMAT_YCbCr_420_888, 1, 1, __DRI_IMAGE_FOURCC_YVU420 },
{ HAL_PIXEL_FORMAT_YV12, 1, 1, __DRI_IMAGE_FOURCC_YVU420 },
+ { HAL_PIXEL_FORMAT_NV12, 0, 2, __DRI_IMAGE_FOURCC_NV12 },
+ { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, 0, 2, __DRI_IMAGE_FOURCC_NV12 },
/* HACK: See droid_create_image_from_prime_fd() and
* https://issuetracker.google.com/32077885. */
{ HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 0, 2, __DRI_IMAGE_FOURCC_NV12 },
@@ -248,6 +262,51 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf)
return EGL_TRUE;
}
+static int
+droid_resolve_format(struct dri2_egl_display *dri2_dpy,
+ struct ANativeWindowBuffer *buf)
+{
+ int format = -1;
+ int ret;
+
+ if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
+ return buf->format;
+#ifdef HAVE_GRALLOC1
+ if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+
+ if (!dri2_dpy->pfn_getFormat) {
+ _eglLog(_EGL_WARNING, "Gralloc does not support getFormat");
+ return -1;
+ }
+ ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle,
+ &format);
+ if (ret) {
+ _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret);
+ return -1;
+ }
+ } else {
+#else
+ const gralloc_module_t *gralloc0;
+ gralloc0 = dri2_dpy->gralloc;
+
+ if (!gralloc0->perform) {
+ _eglLog(_EGL_WARNING, "gralloc->perform not supported");
+ return -1;
+ }
+ ret = gralloc0->perform(dri2_dpy->gralloc,
+ GRALLOC_DRM_GET_FORMAT,
+ buf->handle, &format);
+ if (ret){
+ _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret);
+ return -1;
+ }
+#endif
+#ifdef HAVE_GRALLOC1
+ }
+#endif
+ return format;
+}
+
static EGLBoolean
droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
{
@@ -462,7 +521,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy,
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
struct ANativeWindow *window = dri2_surf->window;
- if (window->setSwapInterval(window, interval))
+ if (window && window->setSwapInterval(window, interval))
return EGL_FALSE;
surf->SwapInterval = interval;
@@ -663,11 +722,18 @@ droid_query_buffer_age(_EGLDriver *drv,
{
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface);
+ /* To avoid blocking other EGL calls, release the display mutex before
+ * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon
+ * return.
+ */
+ mtx_unlock(&disp->Mutex);
if (update_buffers(dri2_surf) < 0) {
_eglError(EGL_BAD_ALLOC, "droid_query_buffer_age");
+ mtx_lock(&disp->Mutex);
return -1;
}
+ mtx_lock(&disp->Mutex);
return dri2_surf->back ? dri2_surf->back->age : 0;
}
@@ -730,6 +796,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
return EGL_TRUE;
}
+static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr)
+{
+
+ for( int i = 0; i < outFlexLayout->num_planes; i++) {
+ switch(outFlexLayout->planes[i].component){
+ case FLEX_COMPONENT_Y:
+ ycbcr->y = outFlexLayout->planes[i].top_left;
+ ycbcr->ystride = outFlexLayout->planes[i].v_increment;
+ break;
+ case FLEX_COMPONENT_Cb:
+ ycbcr->cb = outFlexLayout->planes[i].top_left;
+ ycbcr->cstride = outFlexLayout->planes[i].v_increment;
+ break;
+ case FLEX_COMPONENT_Cr:
+ ycbcr->cr = outFlexLayout->planes[i].top_left;
+ ycbcr->chroma_step = outFlexLayout->planes[i].h_increment;
+ break;
+ default:
+ _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component);
+ break;
+ }
+ }
+ return 0;
+}
+
#if ANDROID_API_LEVEL >= 23
static EGLBoolean
droid_set_damage_region(_EGLDriver *drv,
@@ -773,30 +864,70 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct android_ycbcr ycbcr;
+#ifdef HAVE_GRALLOC1
+ struct android_flex_layout outFlexLayout;
+ gralloc1_rect_t accessRegion;
+#endif
size_t offsets[3];
size_t pitches[3];
int is_ycrcb;
int fourcc;
int ret;
- if (!dri2_dpy->gralloc->lock_ycbcr) {
- _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr");
+ int format = droid_resolve_format(dri2_dpy, buf);
+ if (format < 0) {
+ _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
return NULL;
}
memset(&ycbcr, 0, sizeof(ycbcr));
- ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle,
- 0, 0, 0, 0, 0, &ycbcr);
- if (ret) {
- /* HACK: See droid_create_image_from_prime_fd() and
- * https://issuetracker.google.com/32077885.*/
- if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
- return NULL;
-
- _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret);
- return NULL;
- }
- dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle);
+#ifdef HAVE_GRALLOC1
+ if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+ if (!dri2_dpy->pfn_lockflex) {
+ _eglLog(_EGL_WARNING, "Gralloc does not support lockflex");
+ return NULL;
+ }
+
+ ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle,
+ 0, 0, &accessRegion, &outFlexLayout, -1);
+ if (ret) {
+ _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret);
+ return NULL;
+ }
+ ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr);
+ if (ret) {
+ _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret);
+ return NULL;
+ }
+ int outReleaseFence = 0;
+ dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence);
+ } else {
+#endif
+ const gralloc_module_t *gralloc0;
+ gralloc0 = dri2_dpy->gralloc;
+
+ if (!gralloc0->lock_ycbcr) {
+ _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr");
+ return NULL;
+ }
+
+ ret = gralloc0->lock_ycbcr(gralloc0, buf->handle,
+ 0, 0, 0, 0, 0, &ycbcr);
+
+ if (ret) {
+ /* HACK: See droid_create_image_from_prime_fd() and
+ * https://issuetracker.google.com/32077885.*/
+ if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
+ return NULL;
+
+ _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret);
+ return NULL;
+ }
+
+ gralloc0->unlock(dri2_dpy->gralloc, buf->handle);
+#ifdef HAVE_GRALLOC1
+ }
+#endif
/* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags
* it will return the .y/.cb/.cr pointers based on a NULL pointer,
@@ -821,14 +952,15 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
/* .chroma_step is the byte distance between the same chroma channel
* values of subsequent pixels, assumed to be the same for Cb and Cr. */
- fourcc = get_fourcc_yuv(buf->format, is_ycrcb, ycbcr.chroma_step);
+ fourcc = get_fourcc_yuv(format, is_ycrcb, ycbcr.chroma_step);
if (fourcc == -1) {
_eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, is_ycrcb = %d, chroma_step = %d",
- buf->format, is_ycrcb, ycbcr.chroma_step);
+ format, is_ycrcb, ycbcr.chroma_step);
return NULL;
}
- if (ycbcr.chroma_step == 2) {
+ /* FIXME? we should not rely on chroma_step */
+ if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) {
/* Semi-planar Y + CbCr or Y + CrCb format. */
const EGLint attr_list_2plane[] = {
EGL_WIDTH, buf->width,
@@ -870,9 +1002,16 @@ static _EGLImage *
droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
struct ANativeWindowBuffer *buf, int fd)
{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
unsigned int pitch;
- if (is_yuv(buf->format)) {
+ int format = droid_resolve_format(dri2_dpy, buf);
+ if (format < 0) {
+ _eglLog(_EGL_WARNING, "Could not resolve buffer format");
+ return NULL;
+ }
+
+ if (is_yuv(format)) {
_EGLImage *image;
image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd);
@@ -887,13 +1026,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
return image;
}
- const int fourcc = get_fourcc(buf->format);
+ const int fourcc = get_fourcc(format);
if (fourcc == -1) {
_eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
return NULL;
}
- pitch = buf->stride * get_format_bpp(buf->format);
+ pitch = buf->stride * get_format_bpp(format);
if (pitch == 0) {
_eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
return NULL;
@@ -1529,6 +1668,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
_EGLDevice *dev;
struct dri2_egl_display *dri2_dpy;
const char *err;
+ hw_device_t *device;
int ret;
/* Not supported yet */
@@ -1548,6 +1688,27 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
err = "DRI2: failed to get gralloc module";
goto cleanup;
}
+ dri2_dpy->gralloc_version = dri2_dpy->gralloc->module_api_version;
+#ifdef HAVE_GRALLOC1
+ if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+ ret = dri2_dpy->gralloc->methods->open(dri2_dpy->gralloc, GRALLOC_HARDWARE_MODULE_ID, &device);
+ if (ret) {
+ err = "Failed to open hw_device device";
+ goto cleanup;
+ } else {
+ dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device;
+
+ dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)\
+ dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX);
+
+ dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)\
+ dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT);
+
+ dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)\
+ dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK);
+ }
+ }
+#endif
disp->DriverData = (void *) dri2_dpy;
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index eb9f5e2b1e2..817e9b1988a 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1127,13 +1127,22 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device)
if (dri2_dpy->fd == -1) {
_eglLog(_EGL_WARNING, "wayland-egl: could not open %s (%s)",
dri2_dpy->device_name, strerror(errno));
+ free(dri2_dpy->device_name);
+ dri2_dpy->device_name = NULL;
return;
}
if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) {
dri2_dpy->authenticated = true;
} else {
- drmGetMagic(dri2_dpy->fd, &magic);
+ if (drmGetMagic(dri2_dpy->fd, &magic)) {
+ close(dri2_dpy->fd);
+ dri2_dpy->fd = -1;
+ free(dri2_dpy->device_name);
+ dri2_dpy->device_name = NULL;
+ _eglLog(_EGL_WARNING, "wayland-egl: drmGetMagic failed");
+ return;
+ }
wl_drm_authenticate(dri2_dpy->wl_drm, magic);
}
}
@@ -1661,8 +1670,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
if (dri2_surf->back)
return 0;
- if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
- dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
+ if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
+ dri2_surf->base.Height != dri2_surf->wl_win->height) {
dri2_wl_release_buffers(dri2_surf);
diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp
index a9c5cf8d29b..d4b046c79b4 100644
--- a/src/egl/drivers/haiku/egl_haiku.cpp
+++ b/src/egl/drivers/haiku/egl_haiku.cpp
@@ -29,6 +29,7 @@
#include "eglconfig.h"
#include "eglcontext.h"
+#include "egldevice.h"
#include "egldisplay.h"
#include "egldriver.h"
#include "eglcurrent.h"
@@ -215,7 +216,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
_eglError(EGL_NOT_INITIALIZED, "DRI2: failed to find EGLDevice");
return EGL_FALSE;
}
- disp->Device = dev;
+ dpy->Device = dev;
TRACE("Add configs\n");
if (!haiku_add_configs_for_visuals(dpy))
diff --git a/src/egl/generate/eglFunctionList.py b/src/egl/generate/eglFunctionList.py
index fb5b3c30bdf..2cd35557bc4 100644
--- a/src/egl/generate/eglFunctionList.py
+++ b/src/egl/generate/eglFunctionList.py
@@ -196,8 +196,18 @@ def _eglFunc(name, method, static=None, public=False, inheader=None, prefix="dis
# EGL_ANDROID_native_fence_sync
_eglFunc("eglDupNativeFenceFDANDROID", "display"),
+ # EGL_ANDROID_blob_cache
+ _eglFunc("eglSetBlobCacheFuncsANDROID", "display"),
+
# EGL_EXT_image_dma_buf_import_modifiers
_eglFunc("eglQueryDmaBufFormatsEXT", "display"),
_eglFunc("eglQueryDmaBufModifiersEXT", "display"),
+
+ # EGL_EXT_device_base
+ _eglFunc("eglQueryDeviceAttribEXT", "device"),
+ _eglFunc("eglQueryDeviceStringEXT", "device"),
+ _eglFunc("eglQueryDevicesEXT", "none"),
+ _eglFunc("eglQueryDisplayAttribEXT", "display"),
+
)
diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c
index 7af3011b757..545697e5662 100644
--- a/src/egl/main/eglcurrent.c
+++ b/src/egl/main/eglcurrent.c
@@ -137,13 +137,37 @@ _eglDestroyThreadInfo(_EGLThreadInfo *t)
}
+/**
+ * Delete/free a _EGLThreadInfo object.
+ */
+static void
+_eglDestroyThreadInfoCallback(_EGLThreadInfo *t)
+{
+ /* If this callback is called on thread termination then try to also give a
+ * chance to cleanup to the client drivers. If called for module termination
+ * then just release the thread information as calling eglReleaseThread
+ * would result in a deadlock.
+ */
+ if (_egl_TSDInitialized) {
+ /* The callback handler has replaced the TLS entry, which is passed in as
+ * 't', with NULL. Restore it here so that the release thread finds it in
+ * the TLS entry.
+ */
+ _eglSetTSD(t);
+ eglReleaseThread();
+ } else {
+ _eglDestroyThreadInfo(t);
+ }
+}
+
+
/**
* Make sure TSD is initialized and return current value.
*/
static inline _EGLThreadInfo *
_eglCheckedGetTSD(void)
{
- if (_eglInitTSD(&_eglDestroyThreadInfo) != EGL_TRUE) {
+ if (_eglInitTSD(&_eglDestroyThreadInfoCallback) != EGL_TRUE) {
_eglLog(_EGL_FATAL, "failed to initialize \"current\" system");
return NULL;
}
diff --git a/src/egl/main/egldispatchstubs.c b/src/egl/main/egldispatchstubs.c
index bfc3195c779..96708aeb0dc 100644
--- a/src/egl/main/egldispatchstubs.c
+++ b/src/egl/main/egldispatchstubs.c
@@ -59,6 +59,11 @@ static __eglMustCastToProperFunctionPointerType FetchVendorFunc(__EGLvendorInfo
}
if (func == NULL) {
if (errorCode != EGL_SUCCESS) {
+ // Since we have no vendor, the follow-up eglGetError() call will
+ // end up using the GLVND error code. Set it here.
+ if (vendor == NULL) {
+ exports->setEGLError(errorCode);
+ }
_eglError(errorCode, __EGL_DISPATCH_FUNC_NAMES[index]);
}
return NULL;
diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk
index acd243b8346..7618c6fcd93 100644
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -36,7 +36,8 @@ LOCAL_SRC_FILES := \
util/u_debug_stack_android.cpp
LOCAL_C_INCLUDES := \
- $(GALLIUM_TOP)/auxiliary/util
+ $(GALLIUM_TOP)/auxiliary/util \
+ $(MESA_TOP)/src/util
ifeq ($(MESA_ENABLE_LLVM),true)
LOCAL_SRC_FILES += \
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 0ad274b535a..4fa36cc7de4 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -375,7 +375,7 @@ ttn_emit_declaration(struct ttn_compile *c)
c->outputs[idx] = var;
for (int i = 0; i < array_size; i++)
- b->shader->info.outputs_written |= 1 << (var->data.location + i);
+ b->shader->info.outputs_written |= 1ull << (var->data.location + i);
}
break;
case TGSI_FILE_CONSTANT:
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 05be94cae31..9b264145347 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -142,7 +142,7 @@ pipe_loader_release(struct pipe_loader_device **devs, int ndev);
*/
bool
pipe_loader_sw_probe_dri(struct pipe_loader_device **devs,
- struct drisw_loader_funcs *drisw_lf);
+ const struct drisw_loader_funcs *drisw_lf);
/**
* Initialize a kms backed sw device given an fd.
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index d387ce90d32..587b6f8567b 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -132,7 +132,7 @@ pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev)
#ifdef HAVE_PIPE_LOADER_DRI
bool
-pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
+pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, const struct drisw_loader_funcs *drisw_lf)
{
struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
int i;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index e13500a7f7b..75c2e08632e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -1004,11 +1004,12 @@ get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
struct tgsi_full_instruction *inst;
unsigned writemask = 0;
- do {
- tgsi_parse_token(parse);
- assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
- inst = &parse->FullToken.FullInstruction;
- check_no_subroutines(inst);
+ tgsi_parse_token(parse);
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ inst = &parse->FullToken.FullInstruction;
+ check_no_subroutines(inst);
+
+ while (inst->Instruction.Opcode != end_opcode) {
/* Recursively process nested blocks. */
switch (inst->Instruction.Opcode) {
@@ -1016,20 +1017,26 @@ get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
case TGSI_OPCODE_UIF:
writemask |=
get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDIF);
- continue;
+ break;
case TGSI_OPCODE_BGNLOOP:
writemask |=
get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP);
- continue;
+ break;
case TGSI_OPCODE_BARRIER:
unreachable("nested BARRIER is illegal");
- continue;
+ break;
+
+ default:
+ writemask |= get_inst_tessfactor_writemask(info, inst);
}
- writemask |= get_inst_tessfactor_writemask(info, inst);
- } while (inst->Instruction.Opcode != end_opcode);
+ tgsi_parse_token(parse);
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ inst = &parse->FullToken.FullInstruction;
+ check_no_subroutines(inst);
+ }
return writemask;
}
@@ -1043,18 +1050,20 @@ get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
struct tgsi_full_instruction *inst;
unsigned then_tessfactor_writemask = 0;
unsigned else_tessfactor_writemask = 0;
+ unsigned writemask;
bool is_then = true;
- do {
- tgsi_parse_token(parse);
- assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
- inst = &parse->FullToken.FullInstruction;
- check_no_subroutines(inst);
+ tgsi_parse_token(parse);
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ inst = &parse->FullToken.FullInstruction;
+ check_no_subroutines(inst);
+
+ while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF) {
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ELSE:
is_then = false;
- continue;
+ break;
/* Recursively process nested blocks. */
case TGSI_OPCODE_IF:
@@ -1063,28 +1072,33 @@ get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
is_then ? &then_tessfactor_writemask :
&else_tessfactor_writemask,
cond_block_tf_writemask);
- continue;
+ break;
case TGSI_OPCODE_BGNLOOP:
*cond_block_tf_writemask |=
get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP);
- continue;
+ break;
case TGSI_OPCODE_BARRIER:
unreachable("nested BARRIER is illegal");
- continue;
- }
-
- /* Process an instruction in the current block. */
- unsigned writemask = get_inst_tessfactor_writemask(info, inst);
+ break;
+ default:
+ /* Process an instruction in the current block. */
+ writemask = get_inst_tessfactor_writemask(info, inst);
- if (writemask) {
- if (is_then)
- then_tessfactor_writemask |= writemask;
- else
- else_tessfactor_writemask |= writemask;
+ if (writemask) {
+ if (is_then)
+ then_tessfactor_writemask |= writemask;
+ else
+ else_tessfactor_writemask |= writemask;
+ }
}
- } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
+
+ tgsi_parse_token(parse);
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ inst = &parse->FullToken.FullInstruction;
+ check_no_subroutines(inst);
+ }
if (then_tessfactor_writemask || else_tessfactor_writemask) {
/* If both statements write the same tess factor channels,
@@ -1147,7 +1161,7 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
case TGSI_OPCODE_BGNLOOP:
cond_block_tf_writemask |=
- get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDIF);
+ get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDLOOP);
continue;
case TGSI_OPCODE_BARRIER:
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index b06fb111709..fa1e920b509 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -154,6 +154,25 @@ pipe_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
*dst = src;
}
+/**
+ * Same as pipe_surface_release, but used when pipe_context doesn't exist
+ * anymore.
+ */
+static inline void
+pipe_surface_release_no_context(struct pipe_surface **ptr)
+{
+ struct pipe_surface *surf = *ptr;
+
+ if (pipe_reference_described(&surf->reference, NULL,
+ (debug_reference_descriptor)
+ debug_describe_surface)) {
+ /* trivially destroy pipe_surface */
+ pipe_resource_reference(&surf->texture, NULL);
+ free(surf);
+ }
+ *ptr = NULL;
+}
+
/**
* Set *dst to \p src with proper reference counting.
*
diff --git a/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c b/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c
index f1e96740231..9736aebd7f6 100644
--- a/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c
+++ b/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c
@@ -97,6 +97,7 @@ static void
cmd_free(struct msm_cmd *cmd)
{
fd_bo_del(cmd->ring_bo);
+ free(cmd->relocs);
free(cmd);
}
@@ -655,6 +656,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
_mesa_set_destroy(msm_ring->u.ring_set, unref_rings);
+ free(msm_ring->u.reloc_bos);
free(msm_ring);
} else {
struct fd_submit *submit = msm_ring->u.submit;
@@ -663,6 +665,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
cmd_free(msm_ring->u.cmds[i]);
}
+ free(msm_ring->u.cmds);
slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring);
}
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 8767e5efb99..ca0192a9cc0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -370,7 +370,8 @@ class ConstantFolding : public Pass
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
- void opnd(Instruction *, ImmediateValue&, int s);
+ /* true if i was deleted */
+ bool opnd(Instruction *i, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -414,18 +415,21 @@ ConstantFolding::visit(BasicBlock *bb)
if (i->srcExists(2) &&
i->src(0).getImmediate(src0) &&
i->src(1).getImmediate(src1) &&
- i->src(2).getImmediate(src2))
+ i->src(2).getImmediate(src2)) {
expr(i, src0, src1, src2);
- else
+ } else
if (i->srcExists(1) &&
- i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
+ i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) {
expr(i, src0, src1);
- else
- if (i->srcExists(0) && i->src(0).getImmediate(src0))
- opnd(i, src0, 0);
- else
- if (i->srcExists(1) && i->src(1).getImmediate(src1))
- opnd(i, src1, 1);
+ } else
+ if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
+ if (opnd(i, src0, 0))
+ continue;
+ } else
+ if (i->srcExists(1) && i->src(1).getImmediate(src1)) {
+ if (opnd(i, src1, 1))
+ continue;
+ }
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
}
@@ -1010,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return false;
}
-void
+bool
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
+ bool deleted = false;
switch (i->op) {
case OP_SPLIT: {
@@ -1035,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
val >>= bitsize;
}
delete_Instruction(prog, i);
+ deleted = true;
break;
}
case OP_MUL:
@@ -1049,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
+ deleted = true;
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
@@ -1099,8 +1106,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->src(t).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
} else
if (i->postFactor && i->sType == TYPE_F32) {
/* Can't emit a postfactor with an immediate, have to fold it in */
@@ -1136,8 +1145,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2)))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
}
break;
case OP_SUB:
@@ -1207,6 +1218,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
+ deleted = true;
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
@@ -1239,6 +1251,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1270,6 +1283,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
delete_Instruction(prog, i);
+ deleted = true;
}
} else if (s == 1) {
// In this case, we still want the optimized lowering that we get
@@ -1286,6 +1300,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1298,7 +1313,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (imm0.reg.data.u32 != 0 || !si)
- return;
+ return false;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
// We do everything assuming var (cmp) 0, reverse the condition if 0 is
@@ -1324,7 +1339,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case CC_GT: break; // bool > 0 -- bool
case CC_NE: break; // bool != 0 -- bool
default:
- return;
+ return false;
}
// Update the condition of this SET to be identical to the origin set,
@@ -1359,13 +1374,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
} else if (src->asCmp()) {
CmpInstruction *cmp = src->asCmp();
if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
+ return false;
if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
+ return false;
if (imm0.reg.data.f32 != 1.0)
- return;
+ return false;
if (cmp->dType != TYPE_U32)
- return;
+ return false;
cmp->dType = TYPE_F32;
if (i->src(t).mod != Modifier(0)) {
@@ -1432,13 +1447,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_MUL:
int muls;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->src(1).getImmediate(imm1))
muls = 1;
else if (si->src(0).getImmediate(imm1))
muls = 0;
else
- return;
+ return false;
bld.setPosition(i, false);
i->op = OP_MUL;
@@ -1449,15 +1464,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_ADD:
int adds;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
adds = 0;
else if (si->src(1).getImmediate(imm1))
adds = 1;
else
- return;
+ return false;
if (si->src(!adds).mod != Modifier(0))
- return;
+ return false;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
// This is more operations, but if one of x, y is an immediate, then
@@ -1472,7 +1487,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkImm(imm0.reg.data.u32)));
break;
default:
- return;
+ return false;
}
}
break;
@@ -1497,7 +1512,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
default:
- return;
+ return false;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
@@ -1523,11 +1538,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
// TODO: handle 64-bit values properly
if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
- return;
+ return false;
// TODO: handle single byte/word extractions
if (i->subOp)
- return;
+ return false;
bld.setPosition(i, true); /* make sure bld is init'ed */
@@ -1564,7 +1579,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CLAMP(imm0.reg.data.u16, umin, umax) : \
imm0.reg.data.u16; \
break; \
- default: return; \
+ default: return false; \
} \
i->setSrc(0, bld.mkImm(res.data.dst)); \
break
@@ -1591,7 +1606,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
@@ -1612,12 +1627,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f64));
break;
default:
- return;
+ return false;
}
#undef CASE
@@ -1628,7 +1643,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
}
default:
- return;
+ return false;
}
// This can get left behind some of the optimizations which simplify
@@ -1643,6 +1658,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (newi->op != op)
foldCount++;
+ return deleted;
}
// =============================================================================
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index fb4a259ce16..e1b2e20810a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -600,25 +600,23 @@ static inline void
nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
unsigned nr, void **hwcso)
{
+ unsigned highest_found = 0;
unsigned i;
assert(nr <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nr; ++i) {
struct nv50_tsc_entry *old = nv50->samplers[s][i];
+ if (hwcso[i])
+ highest_found = i;
+
nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
if (old)
nv50_screen_tsc_unlock(nv50->screen, old);
}
assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
- for (; i < nv50->num_samplers[s]; ++i) {
- if (nv50->samplers[s][i]) {
- nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]);
- nv50->samplers[s][i] = NULL;
- }
- }
-
- nv50->num_samplers[s] = nr;
+ if (nr >= nv50->num_samplers[s])
+ nv50->num_samplers[s] = highest_found + 1;
nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index f2393cb27b5..9653de86fe9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -464,11 +464,15 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0,
unsigned s,
unsigned nr, void **hwcso)
{
+ unsigned highest_found = 0;
unsigned i;
for (i = 0; i < nr; ++i) {
struct nv50_tsc_entry *old = nvc0->samplers[s][i];
+ if (hwcso[i])
+ highest_found = i;
+
if (hwcso[i] == old)
continue;
nvc0->samplers_dirty[s] |= 1 << i;
@@ -477,14 +481,8 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0,
if (old)
nvc0_screen_tsc_unlock(nvc0->screen, old);
}
- for (; i < nvc0->num_samplers[s]; ++i) {
- if (nvc0->samplers[s][i]) {
- nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
- nvc0->samplers[s][i] = NULL;
- }
- }
-
- nvc0->num_samplers[s] = nr;
+ if (nr >= nvc0->num_samplers[s])
+ nvc0->num_samplers[s] = highest_found + 1;
}
static void
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 2680396c3d6..41e83af1db1 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -105,6 +105,12 @@ static void r600_destroy_context(struct pipe_context *context)
}
util_unreference_framebuffer_state(&rctx->framebuffer.state);
+ if (rctx->gs_rings.gsvs_ring.buffer)
+ pipe_resource_reference(&rctx->gs_rings.gsvs_ring.buffer, NULL);
+
+ if (rctx->gs_rings.esgs_ring.buffer)
+ pipe_resource_reference(&rctx->gs_rings.esgs_ring.buffer, NULL);
+
for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh)
for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i)
rctx->b.b.set_constant_buffer(context, sh, i, NULL);
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index ccabab9cdb0..92f243b5c9a 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -1636,7 +1636,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
}
if (query->buffer.previous) {
- u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16,
+ u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 256,
&tmp_buffer_offset, &tmp_buffer);
if (!tmp_buffer)
return;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index 5681fdc4425..b7d87eac9f4 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -567,7 +567,7 @@ int bc_builder::build_fetch_gds(fetch_node *n) {
const fetch_op_info *fop = bc.op_ptr;
unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f;
unsigned mem_op = 4;
- assert(fop->flags && FF_GDS);
+ assert(fop->flags & FF_GDS);
if (bc.op == FETCH_OP_TF_WRITE) {
mem_op = 5;
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index b440230d227..91f38329d59 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -580,10 +580,12 @@ static int si_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_SUPPORTED:
return (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
(si_vce_is_fw_version_supported(sscreen) ||
- sscreen->info.family == CHIP_RAVEN)) ||
+ sscreen->info.family == CHIP_RAVEN ||
+ sscreen->info.family == CHIP_RAVEN2)) ||
(profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
(sscreen->info.family == CHIP_RAVEN ||
- si_radeon_uvd_enc_supported(sscreen)));
+ sscreen->info.family == CHIP_RAVEN2 ||
+ si_radeon_uvd_enc_supported(sscreen)));
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
@@ -631,7 +633,8 @@ static int si_get_video_param(struct pipe_screen *screen,
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
return false;
case PIPE_VIDEO_FORMAT_JPEG:
- if (sscreen->info.family == CHIP_RAVEN)
+ if (sscreen->info.family == CHIP_RAVEN ||
+ sscreen->info.family == CHIP_RAVEN2)
return true;
if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10)
return false;
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index 9b09c74d48a..7a2c7afdbfd 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -793,17 +793,10 @@ static void si_query_hw_do_emit_start(struct si_context *sctx,
emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
- /* Write the timestamp from the CP not waiting for
- * outstanding draws (top-of-pipe).
- */
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_COUNT_SEL |
- COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
- COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
+ EOP_DATA_SEL_TIMESTAMP, NULL, va,
+ 0, query->b.type);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 19522cc97b1..f1d5ad31365 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -561,6 +561,14 @@ void si_llvm_load_input_vs(
/* Do multiple loads for special formats. */
switch (fix_fetch) {
+ case SI_FIX_FETCH_RG_64_FLOAT:
+ num_fetches = 1; /* 1 2-dword or 4-dword load */
+ fetch_stride = 0;
+ if (util_last_bit(info->input_usage_mask[input_index]) >= 2)
+ num_channels = 4; /* 2 doubles in 4 dwords */
+ else
+ num_channels = 2; /* 1 double in 2 dwords */
+ break;
case SI_FIX_FETCH_RGB_64_FLOAT:
num_fetches = 3; /* 3 2-dword loads */
fetch_stride = 8;
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 87ca0161b45..cd38145daec 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -989,6 +989,9 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
LLVMValueRef data[4];
unsigned loc = variable->data.location;
+ if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT)
+ ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4;
+
for (unsigned i = 0; i < attrib_count; i++) {
/* Packed components share the same location so skip
* them if we have already processed the location.
diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c
index b741bcadec8..e6d97fe6727 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -101,6 +101,10 @@ static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
static const uint32_t sample_locs_8x[] = {
FILL_SREG(-3,-5, 5, 1, -1, 3, 7,-7),
FILL_SREG(-7,-1, 3, 7, -5, 5, 1,-3),
+ /* The following are unused by hardware, but we emit them to IBs
+ * instead of multiple SET_CONTEXT_REG packets. */
+ 0,
+ 0,
};
static const uint64_t centroid_priority_8x = 0x3546012735460127ull;
diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c
index 1a9d8f8d9fa..8c9553acbf3 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -146,7 +146,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ)
{
struct si_context *ctx = (struct si_context *)context;
- bool vcn = (ctx->family == CHIP_RAVEN) ? true : false;
+ bool vcn = ctx->family == CHIP_RAVEN ||
+ ctx->family == CHIP_RAVEN2;
if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
if (vcn) {
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 1d59dbfc12a..e8f0e291dc3 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -32,6 +32,7 @@
#include "util/u_format.h"
#include "util/u_hash_table.h"
#include "util/u_screen.h"
+#include "util/u_transfer_helper.h"
#include "util/ralloc.h"
#include
@@ -74,6 +75,7 @@ v3d_screen_destroy(struct pipe_screen *pscreen)
v3d_simulator_destroy(screen);
v3d_compiler_free(screen->compiler);
+ u_transfer_helper_destroy(pscreen->transfer_helper);
close(screen->fd);
ralloc_free(pscreen);
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 94784bbdc0a..41e6ec5c1cb 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -572,7 +572,15 @@ vc4_resource_create_with_modifiers(struct pipe_screen *pscreen,
goto fail;
}
- if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) {
+ /* Set up the "scanout resource" (the dmabuf export of our buffer to
+ * the KMS handle) if the buffer might ever have
+ * resource_get_handle(WINSYS_HANDLE_TYPE_KMS) called on it.
+ * create_with_modifiers() doesn't give us usage flags, so we have to
+ * assume that all calls with modifiers are scanout-possible.
+ */
+ if (screen->ro &&
+ ((tmpl->bind & PIPE_BIND_SCANOUT) ||
+ !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
rsc->scanout =
renderonly_scanout_for_resource(prsc, screen->ro, NULL);
if (!rsc->scanout)
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 14ee6cf09e5..e7f7c82c271 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "util/u_hash_table.h"
#include "util/u_screen.h"
+#include "util/u_transfer_helper.h"
#include "util/ralloc.h"
#include
@@ -110,6 +111,8 @@ vc4_screen_destroy(struct pipe_screen *pscreen)
vc4_simulator_destroy(screen);
#endif
+ u_transfer_helper_destroy(pscreen->transfer_helper);
+
close(screen->fd);
ralloc_free(pscreen);
}
diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c
index 88a22b56f9a..f72c93f4995 100644
--- a/src/gallium/drivers/virgl/virgl_buffer.c
+++ b/src/gallium/drivers/virgl/virgl_buffer.c
@@ -106,7 +106,6 @@ static void virgl_buffer_transfer_unmap(struct pipe_context *ctx,
if (trans->base.usage & PIPE_TRANSFER_WRITE) {
if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
struct virgl_screen *vs = virgl_screen(ctx->screen);
- vbuf->base.clean = FALSE;
vctx->num_transfers++;
vs->vws->transfer_put(vs->vws, vbuf->base.hw_res,
&transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level);
diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c
index 4511bf3b2fb..61fb3f0636a 100644
--- a/src/gallium/drivers/virgl/virgl_context.c
+++ b/src/gallium/drivers/virgl/virgl_context.c
@@ -47,6 +47,12 @@
#include "virgl_resource.h"
#include "virgl_screen.h"
+struct virgl_vertex_elements_state {
+ uint32_t handle;
+ uint8_t binding_map[PIPE_MAX_ATTRIBS];
+ uint8_t num_bindings;
+};
+
static uint32_t next_handle;
uint32_t virgl_object_assign_handle(void)
{
@@ -385,29 +391,54 @@ static void *virgl_create_vertex_elements_state(struct pipe_context *ctx,
unsigned num_elements,
const struct pipe_vertex_element *elements)
{
+ struct pipe_vertex_element new_elements[PIPE_MAX_ATTRIBS];
struct virgl_context *vctx = virgl_context(ctx);
- uint32_t handle = virgl_object_assign_handle();
- virgl_encoder_create_vertex_elements(vctx, handle,
- num_elements, elements);
- return (void*)(unsigned long)handle;
+ struct virgl_vertex_elements_state *state =
+ CALLOC_STRUCT(virgl_vertex_elements_state);
+
+ for (int i = 0; i < num_elements; ++i) {
+ if (elements[i].instance_divisor) {
+ /* Virglrenderer doesn't deal with instance_divisor correctly if
+ * there isn't a 1:1 relationship between elements and bindings.
+ * So let's make sure there is, by duplicating bindings.
+ */
+ for (int j = 0; j < num_elements; ++j) {
+ new_elements[j] = elements[j];
+ new_elements[j].vertex_buffer_index = j;
+ state->binding_map[j] = elements[j].vertex_buffer_index;
+ }
+ elements = new_elements;
+ state->num_bindings = num_elements;
+ break;
+ }
+ }
+ state->handle = virgl_object_assign_handle();
+ virgl_encoder_create_vertex_elements(vctx, state->handle,
+ num_elements, elements);
+ return state;
}
static void virgl_delete_vertex_elements_state(struct pipe_context *ctx,
void *ve)
{
struct virgl_context *vctx = virgl_context(ctx);
- uint32_t handle = (unsigned long)ve;
-
- virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+ struct virgl_vertex_elements_state *state =
+ (struct virgl_vertex_elements_state *)ve;
+ virgl_encode_delete_object(vctx, state->handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+ FREE(state);
}
static void virgl_bind_vertex_elements_state(struct pipe_context *ctx,
void *ve)
{
struct virgl_context *vctx = virgl_context(ctx);
- uint32_t handle = (unsigned long)ve;
- virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+ struct virgl_vertex_elements_state *state =
+ (struct virgl_vertex_elements_state *)ve;
+ vctx->vertex_elements = state;
+ virgl_encode_bind_object(vctx, state ? state->handle : 0,
+ VIRGL_OBJECT_VERTEX_ELEMENTS);
+ vctx->vertex_array_dirty = TRUE;
}
static void virgl_set_vertex_buffers(struct pipe_context *ctx,
@@ -429,7 +460,17 @@ static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx)
struct virgl_context *vctx = virgl_context(ctx);
if (vctx->vertex_array_dirty) {
- virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
+ struct virgl_vertex_elements_state *ve = vctx->vertex_elements;
+
+ if (ve->num_bindings) {
+ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+ for (int i = 0; i < ve->num_bindings; ++i)
+ vertex_buffers[i] = vctx->vertex_buffer[ve->binding_map[i]];
+
+ virgl_encoder_set_vertex_buffers(vctx, ve->num_bindings, vertex_buffers);
+ } else
+ virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
+
virgl_attach_res_vertex_buffers(vctx);
}
}
diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h
index 20988baa3c7..09cf0db2ae4 100644
--- a/src/gallium/drivers/virgl/virgl_context.h
+++ b/src/gallium/drivers/virgl/virgl_context.h
@@ -32,6 +32,7 @@ struct pipe_screen;
struct tgsi_token;
struct u_upload_mgr;
struct virgl_cmd_buf;
+struct virgl_vertex_elements_state;
struct virgl_sampler_view {
struct pipe_sampler_view base;
@@ -53,6 +54,7 @@ struct virgl_context {
struct virgl_cmd_buf *cbuf;
struct virgl_textures_info samplers[PIPE_SHADER_TYPES];
+ struct virgl_vertex_elements_state *vertex_elements;
struct pipe_framebuffer_state framebuffer;
diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c
index e86d0711a57..ee2764d74ea 100644
--- a/src/gallium/drivers/virgl/virgl_encode.c
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -61,6 +61,12 @@ static void virgl_encoder_write_res(struct virgl_context *ctx,
}
}
+static void virgl_dirty_res(struct virgl_resource *res)
+{
+ if (res)
+ res->clean = FALSE;
+}
+
int virgl_encode_bind_object(struct virgl_context *ctx,
uint32_t handle, uint32_t object)
{
@@ -615,6 +621,7 @@ int virgl_encode_sampler_view(struct virgl_context *ctx,
if (res->u.b.target == PIPE_BUFFER) {
virgl_encoder_write_dword(ctx->cbuf, state->u.buf.offset / elem_size);
virgl_encoder_write_dword(ctx->cbuf, (state->u.buf.offset + state->u.buf.size) / elem_size - 1);
+ virgl_dirty_res(res);
} else {
virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16);
virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8);
@@ -949,6 +956,7 @@ int virgl_encode_set_shader_buffers(struct virgl_context *ctx,
virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset);
virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
virgl_encoder_write_res(ctx, res);
+ virgl_dirty_res(res);
} else {
virgl_encoder_write_dword(ctx->cbuf, 0);
virgl_encoder_write_dword(ctx->cbuf, 0);
@@ -972,6 +980,7 @@ int virgl_encode_set_hw_atomic_buffers(struct virgl_context *ctx,
virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset);
virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
virgl_encoder_write_res(ctx, res);
+ virgl_dirty_res(res);
} else {
virgl_encoder_write_dword(ctx->cbuf, 0);
virgl_encoder_write_dword(ctx->cbuf, 0);
@@ -999,6 +1008,7 @@ int virgl_encode_set_shader_images(struct virgl_context *ctx,
virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.offset);
virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.size);
virgl_encoder_write_res(ctx, res);
+ virgl_dirty_res(res);
} else {
virgl_encoder_write_dword(ctx->cbuf, 0);
virgl_encoder_write_dword(ctx->cbuf, 0);
diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c
index db5e7dd61af..9174ec5cbbd 100644
--- a/src/gallium/drivers/virgl/virgl_resource.c
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -95,7 +95,11 @@ static void virgl_buffer_subdata(struct pipe_context *pipe,
usage |= PIPE_TRANSFER_DISCARD_RANGE;
u_box_1d(offset, size, &box);
- virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data, 0, 0);
+
+ if (size >= (VIRGL_MAX_CMDBUF_DWORDS * 4))
+ u_default_buffer_subdata(pipe, resource, usage, offset, size, data);
+ else
+ virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data, 0, 0);
}
void virgl_init_context_resource_functions(struct pipe_context *ctx)
diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h
index 0e6cb7953f6..b44f8aaa54a 100644
--- a/src/gallium/drivers/virgl/virgl_winsys.h
+++ b/src/gallium/drivers/virgl/virgl_winsys.h
@@ -31,7 +31,7 @@ struct pipe_fence_handle;
struct winsys_handle;
struct virgl_hw_res;
-#define VIRGL_MAX_CMDBUF_DWORDS (16*1024)
+#define VIRGL_MAX_CMDBUF_DWORDS (64 * 1024)
struct virgl_drm_caps {
union virgl_caps caps;
diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp b/src/gallium/state_trackers/clover/llvm/compat.hpp
index 975012cbda4..b91cb95a295 100644
--- a/src/gallium/state_trackers/clover/llvm/compat.hpp
+++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
@@ -58,9 +58,14 @@
#include
#include
-#include
#include
+#if HAVE_LLVM >= 0x0800
+#include
+#else
+#include
+#endif
+
namespace clover {
namespace llvm {
namespace compat {
diff --git a/src/gallium/state_trackers/clover/meson.build b/src/gallium/state_trackers/clover/meson.build
index 1a09d8f2ca9..a6729af2fb8 100644
--- a/src/gallium/state_trackers/clover/meson.build
+++ b/src/gallium/state_trackers/clover/meson.build
@@ -30,6 +30,7 @@ libcltgsi = static_library(
files('tgsi/compiler.cpp', 'tgsi/invocation.hpp'),
include_directories : clover_incs,
cpp_args : [cpp_vis_args],
+ override_options : clover_cpp_std,
)
libclllvm = static_library(
@@ -56,6 +57,7 @@ libclllvm = static_library(
)),
],
dependencies : [dep_llvm, dep_elf],
+ override_options : clover_cpp_std,
)
clover_files = files(
@@ -119,4 +121,5 @@ libclover = static_library(
include_directories : clover_incs,
cpp_args : [clover_cpp_args, cpp_vis_args],
link_with : [libcltgsi, libclllvm],
+ override_options : clover_cpp_std,
)
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index a09787bb215..d99187c6eaa 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -176,6 +176,12 @@ static int convert_fourcc(int format, int *dri_components_p)
format = __DRI_IMAGE_FORMAT_R8;
dri_components = __DRI_IMAGE_COMPONENTS_Y_UV;
break;
+ case __DRI_IMAGE_FOURCC_P010:
+ case __DRI_IMAGE_FOURCC_P012:
+ case __DRI_IMAGE_FOURCC_P016:
+ format = __DRI_IMAGE_FORMAT_R16;
+ dri_components = __DRI_IMAGE_COMPONENTS_Y_UV;
+ break;
default:
return -1;
}
diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c
index 886f94dc02c..5a0d2e1354d 100644
--- a/src/gallium/state_trackers/dri/drisw.c
+++ b/src/gallium/state_trackers/dri/drisw.c
@@ -421,12 +421,19 @@ static const __DRIextension *drisw_screen_extensions[] = {
NULL
};
-static struct drisw_loader_funcs drisw_lf = {
+static const struct drisw_loader_funcs drisw_lf = {
.get_image = drisw_get_image,
.put_image = drisw_put_image,
.put_image2 = drisw_put_image2
};
+static const struct drisw_loader_funcs drisw_shm_lf = {
+ .get_image = drisw_get_image,
+ .put_image = drisw_put_image,
+ .put_image2 = drisw_put_image2,
+ .put_image_shm = drisw_put_image_shm
+};
+
static const __DRIconfig **
drisw_init_screen(__DRIscreen * sPriv)
{
@@ -434,6 +441,7 @@ drisw_init_screen(__DRIscreen * sPriv)
const __DRIconfig **configs;
struct dri_screen *screen;
struct pipe_screen *pscreen = NULL;
+ const struct drisw_loader_funcs *lf = &drisw_lf;
screen = CALLOC_STRUCT(dri_screen);
if (!screen)
@@ -448,10 +456,10 @@ drisw_init_screen(__DRIscreen * sPriv)
sPriv->extensions = drisw_screen_extensions;
if (loader->base.version >= 4) {
if (loader->putImageShm)
- drisw_lf.put_image_shm = drisw_put_image_shm;
+ lf = &drisw_shm_lf;
}
- if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf)) {
+ if (pipe_loader_sw_probe_dri(&screen->dev, lf)) {
dri_init_options(screen);
pscreen = pipe_loader_create_screen(screen->dev);
diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h
index b04a0a721bb..1803d8d6405 100644
--- a/src/gallium/state_trackers/nine/buffer9.h
+++ b/src/gallium/state_trackers/nine/buffer9.h
@@ -104,7 +104,9 @@ NineBuffer9_Upload( struct NineBuffer9 *This )
struct NineDevice9 *device = This->base.base.device;
assert(This->base.pool == D3DPOOL_MANAGED && This->managed.dirty);
- nine_context_range_upload(device, &This->managed.pending_upload, This->base.resource,
+ nine_context_range_upload(device, &This->managed.pending_upload,
+ (struct NineUnknown *)This,
+ This->base.resource,
This->managed.dirty_box.x,
This->managed.dirty_box.width,
(char *)This->managed.data + This->managed.dirty_box.x);
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 74aaf57a549..c5596a5ee94 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -2418,6 +2418,7 @@ CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,
}
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,
+ ARG_BIND_REF(struct NineUnknown, src_ref),
ARG_BIND_RES(struct pipe_resource, res),
ARG_VAL(unsigned, offset),
ARG_VAL(unsigned, size),
@@ -2425,11 +2426,14 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,
{
struct nine_context *context = &device->context;
+ /* Binding src_ref avoids release before upload */
+ (void)src_ref;
+
context->pipe->buffer_subdata(context->pipe, res, 0, offset, size, data);
}
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,
- ARG_BIND_REF(struct NineUnknown, dst),
+ ARG_BIND_REF(struct NineUnknown, src_ref),
ARG_BIND_RES(struct pipe_resource, res),
ARG_VAL(unsigned, level),
ARG_COPY_REF(struct pipe_box, dst_box),
@@ -2444,8 +2448,8 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,
struct pipe_transfer *transfer = NULL;
uint8_t *map;
- /* We just bind dst for the bind count */
- (void)dst;
+ /* Binding src_ref avoids release before upload */
+ (void)src_ref;
map = pipe->transfer_map(pipe,
res,
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 51e5e326527..55960007bfb 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -560,6 +560,7 @@ nine_context_gen_mipmap(struct NineDevice9 *device,
void
nine_context_range_upload(struct NineDevice9 *device,
unsigned *counter,
+ struct NineUnknown *src_ref,
struct pipe_resource *res,
unsigned offset,
unsigned size,
@@ -568,7 +569,7 @@ nine_context_range_upload(struct NineDevice9 *device,
void
nine_context_box_upload(struct NineDevice9 *device,
unsigned *counter,
- struct NineUnknown *dst,
+ struct NineUnknown *src_ref,
struct pipe_resource *res,
unsigned level,
const struct pipe_box *dst_box,
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index 5fd662fa049..10518219a0a 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -660,7 +660,7 @@ NineSurface9_CopyMemToDefault( struct NineSurface9 *This,
nine_context_box_upload(This->base.base.device,
&From->pending_uploads_counter,
- (struct NineUnknown *)This,
+ (struct NineUnknown *)From,
r_dst,
This->level,
&dst_box,
diff --git a/src/gallium/state_trackers/nine/threadpool.c b/src/gallium/state_trackers/nine/threadpool.c
index cc62fd25799..19721aab2dd 100644
--- a/src/gallium/state_trackers/nine/threadpool.c
+++ b/src/gallium/state_trackers/nine/threadpool.c
@@ -37,6 +37,7 @@
#include "os/os_thread.h"
#include "threadpool.h"
+/* POSIX thread function */
static void *
threadpool_worker(void *data)
{
@@ -76,6 +77,15 @@ threadpool_worker(void *data)
return NULL;
}
+/* Windows thread function */
+static DWORD NINE_WINAPI
+wthreadpool_worker(void *data)
+{
+ threadpool_worker(data);
+
+ return 0;
+}
+
struct threadpool *
_mesa_threadpool_create(struct NineSwapChain9 *swapchain)
{
@@ -87,7 +97,9 @@ _mesa_threadpool_create(struct NineSwapChain9 *swapchain)
pthread_mutex_init(&pool->m, NULL);
pthread_cond_init(&pool->new_work, NULL);
- pool->wthread = NineSwapChain9_CreateThread(swapchain, threadpool_worker, pool);
+ /* This uses WINE's CreateThread, so the thread function needs to use
+ * the Windows ABI */
+ pool->wthread = NineSwapChain9_CreateThread(swapchain, wthreadpool_worker, pool);
if (!pool->wthread) {
/* using pthread as fallback */
pthread_create(&pool->pthread, NULL, threadpool_worker, pool);
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index ec811aeba13..840f01dae10 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -449,7 +449,7 @@ NineVolume9_CopyMemToDefault( struct NineVolume9 *This,
nine_context_box_upload(This->base.device,
&From->pending_uploads_counter,
- (struct NineUnknown *)This,
+ (struct NineUnknown *)From,
r_dst,
This->level,
&dst_box,
diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c
index 5dec4844864..c7191bce688 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.c
+++ b/src/gallium/state_trackers/nine/volumetexture9.c
@@ -141,7 +141,8 @@ NineVolumeTexture9_dtor( struct NineVolumeTexture9 *This )
if (This->volumes) {
for (l = 0; l <= This->base.base.info.last_level; ++l)
- NineUnknown_Destroy(&This->volumes[l]->base);
+ if (This->volumes[l])
+ NineUnknown_Destroy(&This->volumes[l]->base);
FREE(This->volumes);
}
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 5376be28531..9646427ea5f 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -598,10 +598,8 @@ surface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface,
return VA_STATUS_SUCCESS;
fail:
- for (i = 0; i < VL_NUM_COMPONENTS; i++) {
- if (resources[i])
- pscreen->resource_destroy(pscreen, resources[i]);
- }
+ for (i = 0; i < VL_NUM_COMPONENTS; i++)
+ pipe_resource_reference(&resources[i], NULL);
return result;
}
diff --git a/src/gallium/state_trackers/xa/xa_context.c b/src/gallium/state_trackers/xa/xa_context.c
index ba220877c84..67d9eac53bb 100644
--- a/src/gallium/state_trackers/xa/xa_context.c
+++ b/src/gallium/state_trackers/xa/xa_context.c
@@ -91,6 +91,7 @@ xa_context_destroy(struct xa_context *r)
}
r->pipe->destroy(r->pipe);
+ free(r);
}
XA_EXPORT int
diff --git a/src/gallium/state_trackers/xvmc/Makefile.am b/src/gallium/state_trackers/xvmc/Makefile.am
index 85d0b5f4953..dc278099030 100644
--- a/src/gallium/state_trackers/xvmc/Makefile.am
+++ b/src/gallium/state_trackers/xvmc/Makefile.am
@@ -27,6 +27,7 @@ AM_CFLAGS = \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
$(VL_CFLAGS) \
+ $(X11_INCLUDES) \
$(XCB_DRI3_CFLAGS) \
$(XVMC_CFLAGS)
diff --git a/src/gallium/targets/d3dadapter9/meson.build b/src/gallium/targets/d3dadapter9/meson.build
index bd05b4f9692..bc72b1110a0 100644
--- a/src/gallium/targets/d3dadapter9/meson.build
+++ b/src/gallium/targets/d3dadapter9/meson.build
@@ -53,7 +53,7 @@ libgallium_nine = shared_library(
libswkmsdri,
],
dependencies : [
- dep_selinux, dep_expat, dep_libdrm, dep_llvm,
+ dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread,
driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
driver_i915, driver_svga,
],
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 9c43fa1e8fd..83f439071f8 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -43,9 +43,17 @@ LOCAL_SHARED_LIBRARIES := \
libbacktrace \
libdl \
libglapi \
- libexpat \
libz
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_STATIC_LIBRARIES := \
+ libexpat
+else
+LOCAL_SHARED_LIBRARIES += \
+ libexpat
+endif
+
$(foreach d, $(MESA_BUILD_GALLIUM), $(eval LOCAL_CFLAGS += $(patsubst HAVE_%,-D%,$(d))))
# sort GALLIUM_LIBS to remove any duplicates
diff --git a/src/gallium/targets/pipe-loader/meson.build b/src/gallium/targets/pipe-loader/meson.build
index 5a44102a69d..e9454d5666a 100644
--- a/src/gallium/targets/pipe-loader/meson.build
+++ b/src/gallium/targets/pipe-loader/meson.build
@@ -31,7 +31,7 @@ if (with_gallium_va or with_gallium_vdpau or with_gallium_omx != 'disabled' or
with_gallium_xvmc or with_dri)
pipe_loader_link_with += libgalliumvl
else
- pipe_loader_link_with += libgalliumvl_stubs
+ pipe_loader_link_with += libgalliumvl_stub
endif
if (with_gallium_va or with_gallium_vdpau or with_gallium_omx != 'disabled' or
with_gallium_xvmc)
diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am
index cd05a024451..2742c7acd44 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -57,8 +57,6 @@ include $(top_srcdir)/src/gallium/drivers/r300/Automake.inc
include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc
include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
-include $(top_srcdir)/src/gallium/drivers/tegra/Automake.inc
-
if HAVE_GALLIUM_STATIC_TARGETS
libvdpau_gallium_la_SOURCES += target.c
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 68f0562a644..f108058052d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -1310,6 +1310,12 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
if (bo) {
p_atomic_inc(&bo->base.reference.count);
simple_mtx_unlock(&ws->bo_export_table_lock);
+
+ /* Release the buffer handle, because we don't need it anymore.
+ * This function is returning an existing buffer, which has its own
+ * handle.
+ */
+ amdgpu_bo_free(result.buf_handle);
return &bo->base;
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index f32bbd9d086..b20d702670d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -280,6 +280,12 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
if (ws) {
pipe_reference(NULL, &ws->reference);
simple_mtx_unlock(&dev_tab_mutex);
+
+ /* Release the device handle, because we don't need it anymore.
+ * This function is returning an existing winsys instance, which
+ * has its own device handle.
+ */
+ amdgpu_device_deinitialize(dev);
return &ws->base;
}
diff --git a/src/gallium/winsys/i915/drm/Android.mk b/src/gallium/winsys/i915/drm/Android.mk
index bab3e85c5dd..bc8cd0ebe2e 100644
--- a/src/gallium/winsys/i915/drm/Android.mk
+++ b/src/gallium/winsys/i915/drm/Android.mk
@@ -30,7 +30,7 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(C_SOURCES)
-LOCAL_SHARED_LIBRARIES := libdrm_intel
+LOCAL_SHARED_LIBRARIES := libdrm_intel_pri
LOCAL_MODULE := libmesa_winsys_i915
include $(GALLIUM_COMMON_MK)
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index 739e4ea131f..0ec8c1abe11 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -1198,4 +1198,6 @@ void
vmw_ioctl_cleanup(struct vmw_winsys_screen *vws)
{
VMW_FUNC;
+
+ free(vws->ioctl.cap_3d);
}
diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
index d519bcfedd3..cd44b036c6f 100644
--- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
@@ -62,7 +62,7 @@ struct dri_sw_winsys
{
struct sw_winsys base;
- struct drisw_loader_funcs *lf;
+ const struct drisw_loader_funcs *lf;
};
static inline struct dri_sw_displaytarget *
@@ -282,7 +282,7 @@ dri_destroy_sw_winsys(struct sw_winsys *winsys)
}
struct sw_winsys *
-dri_create_sw_winsys(struct drisw_loader_funcs *lf)
+dri_create_sw_winsys(const struct drisw_loader_funcs *lf)
{
struct dri_sw_winsys *ws;
diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.h b/src/gallium/winsys/sw/dri/dri_sw_winsys.h
index 329ac06a05b..47e3777d4cd 100644
--- a/src/gallium/winsys/sw/dri/dri_sw_winsys.h
+++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.h
@@ -33,6 +33,6 @@
struct sw_winsys;
-struct sw_winsys *dri_create_sw_winsys(struct drisw_loader_funcs *lf);
+struct sw_winsys *dri_create_sw_winsys(const struct drisw_loader_funcs *lf);
#endif
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
index a589f694bb0..176d04388f2 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
@@ -559,7 +559,7 @@ virgl_cs_create_fence(struct virgl_winsys *vws)
res = virgl_vtest_winsys_resource_cache_create(vws,
PIPE_BUFFER,
PIPE_FORMAT_R8_UNORM,
- PIPE_BIND_CUSTOM,
+ VIRGL_BIND_CUSTOM,
8, 1, 1, 0, 0, 0, 8);
return (struct pipe_fence_handle *)res;
@@ -639,7 +639,7 @@ static void virgl_vtest_flush_frontbuffer(struct virgl_winsys *vws,
* get the data. */
virgl_vtest_recv_transfer_get_data(vtws, map + offset, size, valid_stride,
&box, res->format,
- util_format_get_stride(res->format, res->width));
+ vtws->protocol_version == 0 ? valid_stride : util_format_get_stride(res->format, res->width));
vtws->sws->displaytarget_unmap(vtws->sws, res->dt);
diff --git a/src/gbm/meson.build b/src/gbm/meson.build
index 2e9d380c0b4..719f9c1a9b8 100644
--- a/src/gbm/meson.build
+++ b/src/gbm/meson.build
@@ -32,7 +32,6 @@ args_gbm = []
deps_gbm = []
incs_gbm = [
include_directories('main'), inc_include, inc_src, inc_loader,
- inc_wayland_drm,
]
if with_dri2
diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index 8f9d80c9f41..d06ae2972e9 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -24,10 +24,6 @@ SUBDIRS =
EXTRA_DIST = SConscript meson.build
-if HAVE_XF86VIDMODE
-EXTRA_DEFINES_XF86VIDMODE = -DXF86VIDMODE
-endif
-
AM_CFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/include/GL/internal \
@@ -38,7 +34,6 @@ AM_CFLAGS = \
-I$(top_builddir)/src/mapi/glapi \
-I$(top_srcdir)/src/mapi/glapi \
$(VISIBILITY_CFLAGS) \
- $(EXTRA_DEFINES_XF86VIDMODE) \
-D_REENTRANT \
-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
$(DEFINES) \
diff --git a/src/glx/SConscript b/src/glx/SConscript
index 8ce17715814..051f55b7669 100644
--- a/src/glx/SConscript
+++ b/src/glx/SConscript
@@ -36,10 +36,7 @@ env.Prepend(LIBS = [
env.PkgUseModules('X11')
env.PkgUseModules('XCB')
env.PkgUseModules('DRM')
-
-if env['HAVE_XF86VIDMODE']:
- env.Append(CPPDEFINES = ['XF86VIDMODE'])
- env.PkgUseModules('XF86VIDMODE')
+env.PkgUseModules('XF86VIDMODE')
sources = [
'clientattrib.c',
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 4db0228eaba..424008fd670 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -46,9 +46,9 @@
#include "util/debug.h"
#else
#include
-#ifdef XF86VIDMODE
+#ifndef GLX_USE_WINDOWSGL
#include
-#endif
+#endif /* GLX_USE_WINDOWSGL */
#endif
#endif
@@ -2071,7 +2071,7 @@ _X_HIDDEN GLboolean
__glxGetMscRate(struct glx_screen *psc,
int32_t * numerator, int32_t * denominator)
{
-#ifdef XF86VIDMODE
+#if !defined(GLX_USE_WINDOWSGL)
XF86VidModeModeLine mode_line;
int dot_clock;
int i;
@@ -2118,7 +2118,6 @@ __glxGetMscRate(struct glx_screen *psc,
return True;
}
- else
#endif
return False;
@@ -2145,7 +2144,7 @@ _X_HIDDEN GLboolean
__glXGetMscRateOML(Display * dpy, GLXDrawable drawable,
int32_t * numerator, int32_t * denominator)
{
-#if defined( GLX_DIRECT_RENDERING ) && defined( XF86VIDMODE )
+#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) && !defined(GLX_USE_WINDOWSGL)
__GLXDRIdrawable *draw = GetGLXDRIDrawable(dpy, drawable);
if (draw == NULL)
diff --git a/src/glx/meson.build b/src/glx/meson.build
index dd8ba60ad80..a61f959e800 100644
--- a/src/glx/meson.build
+++ b/src/glx/meson.build
@@ -137,10 +137,6 @@ gl_lib_cargs = [
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
]
-if dep_xxf86vm.found()
- gl_lib_cargs += '-DHAVE_XF86VIDMODE'
-endif
-
libglx = static_library(
'glx',
[files_libglx, glx_generated],
@@ -154,26 +150,22 @@ libglx = static_library(
extra_libs_libglx,
],
dependencies : [dep_libdrm, dep_dri2proto, dep_glproto, dep_x11, dep_glvnd],
- build_by_default : false,
)
-if with_glx == 'dri'
- libgl = shared_library(
- gl_lib_name,
- [],
- include_directories : [inc_common, inc_glapi, inc_loader, inc_gl_internal],
- link_with : [libglapi_static, libglapi],
- link_whole : libglx,
- link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl],
- dependencies : [
- dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb,
- dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage,
- extra_deps_libgl,
- ],
- version : gl_lib_version,
- install : true,
- )
-endif
+libgl = shared_library(
+ gl_lib_name,
+ [],
+ link_with : [libglapi_static, libglapi],
+ link_whole : libglx,
+ link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl],
+ dependencies : [
+ dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb,
+ dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, dep_xxf86vm,
+ extra_deps_libgl,
+ ],
+ version : gl_lib_version,
+ install : true,
+)
if with_tests
subdir('tests')
diff --git a/src/glx/tests/meson.build b/src/glx/tests/meson.build
index fd9d4d433b2..e59b42d19a6 100644
--- a/src/glx/tests/meson.build
+++ b/src/glx/tests/meson.build
@@ -33,6 +33,11 @@ if with_shared_glapi
files_glx_test += files('query_renderer_implementation_unittest.cpp')
endif
+ test(
+ 'dispatch-index-check',
+ files('dispatch-index-check'),
+ suite : ['glx'],
+ )
test(
'glx-test',
executable(
@@ -41,9 +46,9 @@ if with_shared_glapi
link_with : [libglx, libglapi],
include_directories : [
inc_src, inc_include, inc_mesa, inc_mapi, inc_gl_internal,
- include_directories('..'),
+ inc_glx,
],
- dependencies : [dep_libdrm, dep_thread, idep_gtest]
- )
+ dependencies : [dep_libdrm, dep_glproto, dep_thread, idep_gtest]
+ ),
)
endif
diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk
index 12cea6e5472..12bd8947e2e 100644
--- a/src/intel/Android.common.mk
+++ b/src/intel/Android.common.mk
@@ -38,7 +38,17 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa
-LOCAL_SHARED_LIBRARIES := libexpat libz
+LOCAL_SHARED_LIBRARIES := libz
+
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_STATIC_LIBRARIES := \
+ libexpat
+LOCAL_HEADER_LIBRARIES += liblog_headers
+else
+LOCAL_SHARED_LIBRARIES += \
+ libexpat
+endif
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml
diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk
index c2b01221dfc..41af7b20b9c 100644
--- a/src/intel/Android.compiler.mk
+++ b/src/intel/Android.compiler.mk
@@ -28,7 +28,7 @@
# ---------------------------------------
include $(CLEAR_VARS)
-
+LOCAL_CFLAGS += -Wno-error
LOCAL_MODULE := libmesa_intel_compiler
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk
index cd2ed66a176..3011ee232ed 100644
--- a/src/intel/Android.dev.mk
+++ b/src/intel/Android.dev.mk
@@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi
LOCAL_SRC_FILES := $(DEV_FILES)
+LOCAL_CFLAGS := \
+ -Wno-gnu-variable-sized-type-not-at-end
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk
index db81fada277..f4fda3f86ce 100644
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -38,7 +38,10 @@ VULKAN_COMMON_INCLUDES := \
$(MESA_TOP)/src/intel \
$(MESA_TOP)/include/drm-uapi \
$(MESA_TOP)/src/intel/vulkan \
- frameworks/native/vulkan/include
+ frameworks/native/vulkan/include \
+ frameworks/native/libs/nativebase/include \
+ frameworks/native/libs/nativewindow/include \
+ frameworks/native/libs/arect/include
# libmesa_anv_entrypoints with header and dummy.c
#
@@ -72,7 +75,9 @@ $(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c
LOCAL_EXPORT_C_INCLUDE_DIRS := \
$(intermediates)
-LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_SHARED_LIBRARIES := libdrm_pri
+
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -84,7 +89,7 @@ ANV_INCLUDES := \
$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_common,,)/vulkan \
$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util
-ANV_SHARED_LIBRARIES := libdrm
+ANV_SHARED_LIBRARIES := libdrm_pri
ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),)
ANV_SHARED_LIBRARIES += libnativewindow
@@ -107,6 +112,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -120,13 +127,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_SRC_FILES := $(VULKAN_GEN75_FILES)
LOCAL_CFLAGS := -DGEN_VERSIONx10=75
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
LOCAL_C_INCLUDES := $(ANV_INCLUDES)
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -140,13 +149,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_SRC_FILES := $(VULKAN_GEN8_FILES)
LOCAL_CFLAGS := -DGEN_VERSIONx10=80
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
LOCAL_C_INCLUDES := $(ANV_INCLUDES)
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -160,13 +171,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_SRC_FILES := $(VULKAN_GEN9_FILES)
LOCAL_CFLAGS := -DGEN_VERSIONx10=90
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
LOCAL_C_INCLUDES := $(ANV_INCLUDES)
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -180,13 +193,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_SRC_FILES := $(VULKAN_GEN10_FILES)
LOCAL_CFLAGS := -DGEN_VERSIONx10=100
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
LOCAL_C_INCLUDES := $(ANV_INCLUDES)
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -207,6 +222,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -217,7 +234,7 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_vulkan_common
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
+LOCAL_CFLAGS += -Wno-error
intermediates := $(call local-generated-sources-dir)
LOCAL_SRC_FILES := $(VULKAN_FILES)
@@ -261,6 +278,8 @@ $(intermediates)/vulkan/anv_extensions.h:
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
@@ -310,5 +329,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
include $(MESA_COMMON_MK)
include $(BUILD_SHARED_LIBRARY)
diff --git a/src/intel/Makefile.compiler.am b/src/intel/Makefile.compiler.am
index cd7e6882fb9..7c33e35816b 100644
--- a/src/intel/Makefile.compiler.am
+++ b/src/intel/Makefile.compiler.am
@@ -64,6 +64,7 @@ COMPILER_TESTS = \
compiler/test_vf_float_conversions \
compiler/test_vec4_cmod_propagation \
compiler/test_vec4_copy_propagation \
+ compiler/test_vec4_dead_code_eliminate \
compiler/test_vec4_register_coalesce
TESTS += $(COMPILER_TESTS)
@@ -97,6 +98,10 @@ compiler_test_vec4_cmod_propagation_SOURCES = \
compiler/test_vec4_cmod_propagation.cpp
compiler_test_vec4_cmod_propagation_LDADD = $(TEST_LIBS)
+compiler_test_vec4_dead_code_eliminate_SOURCES = \
+ compiler/test_vec4_dead_code_eliminate.cpp
+compiler_test_vec4_dead_code_eliminate_LDADD = $(TEST_LIBS)
+
# Strictly speaking this is neither a C++ test nor using gtest - we can address
# address that at a later point. Until then, this allows us a to simplify things.
compiler_test_eu_compact_SOURCES = \
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index 065980616ec..01bea99d3d8 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1326,7 +1326,7 @@ blorp_emit_memcpy(struct blorp_batch *batch,
static void
blorp_emit_surface_state(struct blorp_batch *batch,
const struct brw_blorp_surface_info *surface,
- enum isl_aux_op op,
+ enum isl_aux_op aux_op,
void *state, uint32_t state_offset,
const bool color_write_disables[4],
bool is_render_target)
@@ -1382,7 +1382,7 @@ blorp_emit_surface_state(struct blorp_batch *batch,
surface->aux_addr, *aux_addr);
}
- if (surface->clear_color_addr.buffer) {
+ if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) {
#if GEN_GEN >= 10
assert((surface->clear_color_addr.offset & 0x3f) == 0);
uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;
@@ -1390,7 +1390,10 @@ blorp_emit_surface_state(struct blorp_batch *batch,
isl_dev->ss.clear_color_state_offset,
surface->clear_color_addr, *clear_addr);
#elif GEN_GEN >= 7
- if (op == ISL_AUX_OP_FULL_RESOLVE || op == ISL_AUX_OP_PARTIAL_RESOLVE) {
+ /* Fast clears just whack the AUX surface and don't actually use the
+ * clear color for anything. We can avoid the MI memcpy on that case.
+ */
+ if (aux_op != ISL_AUX_OP_FAST_CLEAR) {
struct blorp_address dst_addr = blorp_get_surface_base_address(batch);
dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset;
blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr,
diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
index 63f04627572..36ee7706e40 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -214,7 +214,7 @@ handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
surface_modify = iter.raw_value;
} else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
dynamic_modify = iter.raw_value;
- } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) {
+ } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
instruction_modify = iter.raw_value;
}
}
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f5818..8990d208207 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz", DEBUG_NO_HIZ },
{ "color", DEBUG_COLOR },
{ "reemit", DEBUG_REEMIT },
+ { "heur32", DEBUG_HEUR32 },
{ NULL, 0 }
};
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index 72d7ca20a39..c2ca2e2ebd6 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_NO_HIZ (1ull << 39)
#define DEBUG_COLOR (1ull << 40)
#define DEBUG_REEMIT (1ull << 41)
+#define DEBUG_HEUR32 (1ull << 42)
/* These flags are not compatible with the disk shader cache */
#define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
@@ -90,7 +91,7 @@ extern uint64_t INTEL_DEBUG;
/* These flags may affect program generation */
#define DEBUG_DISK_CACHE_MASK \
(DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \
- DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32)
+ DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_HEUR32)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index d8c9499065f..785acdb3343 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -38,6 +38,15 @@ struct ra_regs;
struct nir_shader;
struct brw_program;
+struct brw_simd32_heuristics_control {
+ bool grouped_sends_check;
+ int max_grouped_sends;
+ bool inst_count_check;
+ float inst_count_ratio;
+ bool mrt_check;
+ int max_mrts;
+};
+
struct brw_compiler {
const struct gen_device_info *devinfo;
@@ -118,6 +127,8 @@ struct brw_compiler {
* whether nir_opt_large_constants will be run.
*/
bool supports_shader_constants;
+
+ struct brw_simd32_heuristics_control simd32_heuristics_control;
};
/**
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 4630b83b1a0..2618e9c2e93 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -925,8 +925,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
const struct gen_device_info *devinfo = p->devinfo;
/* When converting F->DF on IVB/BYT, every odd source channel is ignored.
- * To avoid the problems that causes, we use a <1,2,0> source region to read
- * each element twice.
+ * To avoid the problems that causes, we use an source region to
+ * read each element twice.
*/
if (devinfo->gen == 7 && !devinfo->is_haswell &&
brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
@@ -935,11 +935,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD) &&
!has_scalar_region(src0)) {
- assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
- src0.width == BRW_WIDTH_4 &&
- src0.hstride == BRW_HORIZONTAL_STRIDE_1);
-
- src0.vstride = BRW_VERTICAL_STRIDE_1;
+ assert(src0.vstride == src0.width + src0.hstride);
+ src0.vstride = src0.hstride;
src0.width = BRW_WIDTH_2;
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
}
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 3e083723471..6826226e209 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -315,6 +315,24 @@ fs_inst::has_source_and_destination_hazard() const
* may stomp all over it.
*/
return true;
+ case SHADER_OPCODE_QUAD_SWIZZLE:
+ switch (src[1].ud) {
+ case BRW_SWIZZLE_XXXX:
+ case BRW_SWIZZLE_YYYY:
+ case BRW_SWIZZLE_ZZZZ:
+ case BRW_SWIZZLE_WWWW:
+ case BRW_SWIZZLE_XXZZ:
+ case BRW_SWIZZLE_YYWW:
+ case BRW_SWIZZLE_XYXY:
+ case BRW_SWIZZLE_ZWZW:
+ /* These can be implemented as a single Align1 region on all
+ * platforms, so there's never a hazard between source and
+ * destination. C.f. fs_generator::generate_quad_swizzle().
+ */
+ return false;
+ default:
+ return !is_uniform(src[0]);
+ }
default:
/* The SIMD16 compressed instruction
*
@@ -3853,6 +3871,9 @@ fs_visitor::lower_integer_multiplication()
high.offset = inst->dst.offset % REG_SIZE;
if (devinfo->gen >= 7) {
+ if (inst->src[1].abs)
+ lower_src_modifiers(this, block, inst, 1);
+
if (inst->src[1].file == IMM) {
ibld.MUL(low, inst->src[0],
brw_imm_uw(inst->src[1].ud & 0xffff));
@@ -3865,6 +3886,9 @@ fs_visitor::lower_integer_multiplication()
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
}
} else {
+ if (inst->src[0].abs)
+ lower_src_modifiers(this, block, inst, 0);
+
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
inst->src[1]);
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
@@ -3882,6 +3906,18 @@ fs_visitor::lower_integer_multiplication()
}
} else if (inst->opcode == SHADER_OPCODE_MULH) {
+ /* According to the BDW+ BSpec page for the "Multiply Accumulate
+ * High" instruction:
+ *
+ * "An added preliminary mov is required for source modification on
+ * src1:
+ * mov (8) r3.0<1>:d -r3<8;8,1>:d
+ * mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw
+ * mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"
+ */
+ if (devinfo->gen >= 8 && (inst->src[1].negate || inst->src[1].abs))
+ lower_src_modifiers(this, block, inst, 1);
+
/* Should have been lowered to 8-wide. */
assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst));
const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
@@ -3897,8 +3933,6 @@ fs_visitor::lower_integer_multiplication()
* On Gen8, the multiply instruction does a full 32x32-bit
* multiply, but in order to do a 64-bit multiply we can simulate
* the previous behavior and then use a MACH instruction.
- *
- * FINISHME: Don't use source modifiers on src1.
*/
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
mul->src[1].type == BRW_REGISTER_TYPE_UD);
@@ -5534,9 +5568,14 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
return MIN2(8, inst->exec_size);
- case SHADER_OPCODE_QUAD_SWIZZLE:
- return 8;
-
+ case SHADER_OPCODE_QUAD_SWIZZLE: {
+ const unsigned swiz = inst->src[1].ud;
+ return (is_uniform(inst->src[0]) ?
+ get_fpu_lowered_simd_width(devinfo, inst) :
+ devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ? 8 :
+ swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :
+ get_fpu_lowered_simd_width(devinfo, inst));
+ }
case SHADER_OPCODE_MOV_INDIRECT: {
/* From IVB and HSW PRMs:
*
@@ -5601,8 +5640,10 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
static fs_reg
emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
{
+ assert(lbld.group() >= inst->group);
+
/* Specified channel group from the source region. */
- const fs_reg src = horiz_offset(inst->src[i], lbld.group());
+ const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group);
if (needs_src_copy(lbld, inst, i)) {
/* Builder of the right width to perform the copy avoiding uninitialized
@@ -5691,9 +5732,10 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
{
assert(lbld_before.dispatch_width() == lbld_after.dispatch_width());
assert(lbld_before.group() == lbld_after.group());
+ assert(lbld_after.group() >= inst->group);
/* Specified channel group from the destination region. */
- const fs_reg dst = horiz_offset(inst->dst, lbld_after.group());
+ const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group);
const unsigned dst_size = inst->size_written /
inst->dst.component_size(inst->exec_size);
@@ -7127,6 +7169,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
+ bool simd16_failed = false;
+ bool simd16_spilled = false;
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
@@ -7194,10 +7238,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
shader_time_index16);
v16.import_uniforms(&v8);
if (!v16.run_fs(allow_spilling, use_rep_send)) {
+ simd16_failed = true;
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
v16.fail_msg);
} else {
+ simd16_spilled = v16.spilled_any_registers;
simd16_cfg = v16.cfg;
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
@@ -7205,9 +7251,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
}
/* Currently, the compiler only supports SIMD32 on SNB+ */
+ const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control;
+ uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0;
+
if (v8.max_dispatch_width >= 32 && !use_rep_send &&
compiler->devinfo->gen >= 6 &&
- unlikely(INTEL_DEBUG & DEBUG_DO32)) {
+ (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+ (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+ !simd16_failed && !simd16_spilled &&
+ (!ctrl->mrt_check ||
+ (ctrl->mrt_check &&
+ u_count_bits64(&mrts) <= ctrl->max_mrts))))) {
/* Try a SIMD32 compile */
fs_visitor v32(compiler, log_data, mem_ctx, key,
&prog_data->base, prog, shader, 32,
@@ -7218,9 +7272,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
"SIMD32 shader failed to compile: %s",
v32.fail_msg);
} else {
- simd32_cfg = v32.cfg;
- prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
- prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+ if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) ||
+ v32.run_heuristic(ctrl)) {
+ simd32_cfg = v32.cfg;
+ prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
+ prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+ }
}
}
@@ -7299,13 +7356,49 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
}
if (simd32_cfg) {
- prog_data->dispatch_32 = true;
- prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32);
+ uint32_t offset = g.generate_code(simd32_cfg, 32);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+ (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+ (!simd16_cfg ||
+ (simd16_cfg &&
+ (!ctrl->inst_count_check ||
+ (ctrl->inst_count_check &&
+ (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) {
+ prog_data->dispatch_32 = true;
+ prog_data->prog_offset_32 = offset;
+ }
}
return g.get_assembly();
}
+bool
+fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) {
+ int grouped_sends = 0;
+ int max_grouped_sends = 0;
+ bool pass = true;
+
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= SHADER_OPCODE_SAMPLEINFO_LOGICAL) {
+ ++grouped_sends;
+ } else if (grouped_sends > 0) {
+ if (grouped_sends > max_grouped_sends) {
+ max_grouped_sends = grouped_sends;
+ }
+ grouped_sends = 0;
+ }
+ }
+
+ if (ctrl->grouped_sends_check) {
+ if (max_grouped_sends > ctrl->max_grouped_sends) {
+ pass = false;
+ }
+ }
+
+ return pass;
+}
+
fs_reg *
fs_visitor::emit_cs_work_group_id_setup()
{
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index aba19d5ab2c..26868c10107 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -286,6 +286,8 @@ class fs_visitor : public backend_shader
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);
+ bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl);
+
const void *const key;
const struct brw_sampler_prog_key_data *key_tex;
@@ -397,6 +399,7 @@ class fs_generator
void enable_debug(const char *shader_name);
int generate_code(const cfg_t *cfg, int dispatch_width);
+ int get_inst_count(int dispatch_width);
const unsigned *get_assembly();
private:
@@ -478,6 +481,10 @@ class fs_generator
struct brw_reg src,
struct brw_reg idx);
+ void generate_quad_swizzle(const fs_inst *inst,
+ struct brw_reg dst, struct brw_reg src,
+ unsigned swiz);
+
bool patch_discard_jumps_to_fb_writes();
const struct brw_compiler *compiler;
@@ -489,6 +496,7 @@ class fs_generator
struct brw_stage_prog_data * const prog_data;
unsigned dispatch_width; /**< 8, 16 or 32 */
+ int inst_count[3]; /* for 8, 16 and 32 */
exec_list discard_halt_patches;
unsigned promoted_constants;
@@ -529,6 +537,25 @@ namespace brw {
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
}
}
+
+ /**
+ * Remove any modifiers from the \p i-th source region of the instruction,
+ * including negate, abs and any implicit type conversion to the execution
+ * type. Instead any source modifiers will be implemented as a separate
+ * MOV instruction prior to the original instruction.
+ */
+ inline bool
+ lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
+ {
+ assert(inst->components_read(i) == 1);
+ const fs_builder ibld(v, block, inst);
+ const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
+
+ ibld.MOV(tmp, inst->src[i]);
+ inst->src[i] = tmp;
+
+ return true;
+ }
}
void shuffle_from_32bit_read(const brw::fs_builder &bld,
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index ab34b63748e..a76e0f3a6b5 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
if (stride > 4)
return false;
+ /* Bail if the channels of the source need to be aligned to the byte offset
+ * of the corresponding channel of the destination, and the provided stride
+ * would break this restriction.
+ */
+ if (has_dst_aligned_region_restriction(devinfo, inst) &&
+ !(type_sz(inst->src[arg].type) * stride ==
+ type_sz(inst->dst.type) * inst->dst.stride ||
+ stride == 0))
+ return false;
+
/* 3-source instructions can only be Align16, which restricts what strides
* they can take. They can only take a stride of 1 (the usual case), or 0
* with a special "repctrl" bit. But the repctrl bit doesn't work for
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 08dd83dded7..4c452e1c38a 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst *inst,
}
}
+void
+fs_generator::generate_quad_swizzle(const fs_inst *inst,
+ struct brw_reg dst, struct brw_reg src,
+ unsigned swiz)
+{
+ /* Requires a quad. */
+ assert(inst->exec_size >= 4);
+
+ if (src.file == BRW_IMMEDIATE_VALUE ||
+ has_scalar_region(src)) {
+ /* The value is uniform across all channels */
+ brw_MOV(p, dst, src);
+
+ } else if (devinfo->gen < 11 && type_sz(src.type) == 4) {
+ /* This only works on 8-wide 32-bit values */
+ assert(inst->exec_size == 8);
+ assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ assert(src.vstride == src.width + 1);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ struct brw_reg swiz_src = stride(src, 4, 4, 1);
+ swiz_src.swizzle = swiz;
+ brw_MOV(p, dst, swiz_src);
+
+ } else {
+ assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ assert(src.vstride == src.width + 1);
+ const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz, 0));
+
+ switch (swiz) {
+ case BRW_SWIZZLE_XXXX:
+ case BRW_SWIZZLE_YYYY:
+ case BRW_SWIZZLE_ZZZZ:
+ case BRW_SWIZZLE_WWWW:
+ brw_MOV(p, dst, stride(src_0, 4, 4, 0));
+ break;
+
+ case BRW_SWIZZLE_XXZZ:
+ case BRW_SWIZZLE_YYWW:
+ brw_MOV(p, dst, stride(src_0, 2, 2, 0));
+ break;
+
+ case BRW_SWIZZLE_XYXY:
+ case BRW_SWIZZLE_ZWZW:
+ assert(inst->exec_size == 4);
+ brw_MOV(p, dst, stride(src_0, 0, 2, 1));
+ break;
+
+ default:
+ assert(inst->force_writemask_all);
+ brw_set_default_exec_size(p, cvt(inst->exec_size / 4) - 1);
+
+ for (unsigned c = 0; c < 4; c++) {
+ brw_inst *insn = brw_MOV(
+ p, stride(suboffset(dst, c),
+ 4 * inst->dst.stride, 1, 4 * inst->dst.stride),
+ stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4, 1, 0));
+
+ brw_inst_set_no_dd_clear(devinfo, insn, c < 3);
+ brw_inst_set_no_dd_check(devinfo, insn, c > 0);
+ }
+
+ break;
+ }
+ }
+}
+
void
fs_generator::generate_urb_read(fs_inst *inst,
struct brw_reg dst,
@@ -2303,23 +2369,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_QUAD_SWIZZLE:
- /* This only works on 8-wide 32-bit values */
- assert(inst->exec_size == 8);
- assert(type_sz(src[0].type) == 4);
- assert(inst->force_writemask_all);
assert(src[1].file == BRW_IMMEDIATE_VALUE);
assert(src[1].type == BRW_REGISTER_TYPE_UD);
-
- if (src[0].file == BRW_IMMEDIATE_VALUE ||
- (src[0].vstride == 0 && src[0].hstride == 0)) {
- /* The value is uniform across all channels */
- brw_MOV(p, dst, src[0]);
- } else {
- brw_set_default_access_mode(p, BRW_ALIGN_16);
- struct brw_reg swiz_src = stride(src[0], 4, 4, 1);
- swiz_src.swizzle = inst->src[1].ud;
- brw_MOV(p, dst, swiz_src);
- }
+ generate_quad_swizzle(inst, dst, src[0], src[1].ud);
break;
case SHADER_OPCODE_CLUSTER_BROADCAST: {
@@ -2486,6 +2538,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
fill_count, promoted_constants, before_size,
after_size);
+ inst_count[ffs(dispatch_width) - 4] = before_size / 16;
+
return start_offset;
}
@@ -2494,3 +2548,13 @@ fs_generator::get_assembly()
{
return brw_get_program(p, &prog_data->program_size);
}
+
+int
+fs_generator::get_inst_count(int dispatch_width)
+{
+ if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) {
+ return inst_count[ffs(dispatch_width) - 4];
+ } else {
+ return 0;
+ }
+}
\ No newline at end of file
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index c845d87d59b..c33394d10d4 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4804,7 +4804,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_begin_fragment_shader_ordering:
case nir_intrinsic_begin_invocation_interlock: {
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 07e7224e0f8..95b069a2e02 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst)
return type_sz(get_exec_type(inst));
}
+/**
+ * Return whether the following regioning restriction applies to the specified
+ * instruction. From the Cherryview PRM Vol 7. "Register Region
+ * Restrictions":
+ *
+ * "When source or destination datatype is 64b or operation is integer DWord
+ * multiply, regioning in Align1 must follow these rules:
+ *
+ * 1. Source and Destination horizontal stride must be aligned to the same qword.
+ * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
+ * 3. Source and Destination offset must be the same, except the case of
+ * scalar source."
+ */
+static inline bool
+has_dst_aligned_region_restriction(const gen_device_info *devinfo,
+ const fs_inst *inst)
+{
+ const brw_reg_type exec_type = get_exec_type(inst);
+ const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) &&
+ (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD);
+
+ if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
+ (type_sz(exec_type) == 4 && is_int_multiply))
+ return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
+ else
+ return false;
+}
+
#endif
diff --git a/src/intel/compiler/brw_nir_opt_peephole_ffma.c b/src/intel/compiler/brw_nir_opt_peephole_ffma.c
index cc225e1847b..7271bdbca43 100644
--- a/src/intel/compiler/brw_nir_opt_peephole_ffma.c
+++ b/src/intel/compiler/brw_nir_opt_peephole_ffma.c
@@ -68,7 +68,7 @@ are_all_uses_fadd(nir_ssa_def *def)
}
static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, int num_components,
+get_mul_for_src(nir_alu_src *src, unsigned num_components,
uint8_t swizzle[4], bool *negate, bool *abs)
{
uint8_t swizzle_tmp[4];
@@ -93,16 +93,19 @@ get_mul_for_src(nir_alu_src *src, int num_components,
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+ swizzle, negate, abs);
break;
case nir_op_fneg:
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+ swizzle, negate, abs);
*negate = !*negate;
break;
case nir_op_fabs:
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+ swizzle, negate, abs);
*negate = false;
*abs = true;
break;
diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
index c09a3d7ebe9..99e4c9cacaf 100644
--- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
+++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp
@@ -81,17 +81,46 @@ vec4_visitor::dead_code_eliminate()
result_live[3] = result;
}
- for (int c = 0; c < 4; c++) {
- if (!result_live[c] && inst->dst.writemask & (1 << c)) {
- inst->dst.writemask &= ~(1 << c);
+ if (inst->writes_flag()) {
+ /* Independently calculate the usage of the flag components and
+ * the destination value components.
+ */
+ uint8_t flag_mask = inst->dst.writemask;
+ uint8_t dest_mask = inst->dst.writemask;
+
+ for (int c = 0; c < 4; c++) {
+ if (!result_live[c] && dest_mask & (1 << c))
+ dest_mask &= ~(1 << c);
+
+ if (!BITSET_TEST(flag_live, c))
+ flag_mask &= ~(1 << c);
+ }
+
+ if (inst->dst.writemask != (flag_mask | dest_mask)) {
progress = true;
+ inst->dst.writemask = flag_mask | dest_mask;
+ }
- if (inst->dst.writemask == 0) {
- if (inst->writes_accumulator || inst->writes_flag()) {
- inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
- } else {
- inst->opcode = BRW_OPCODE_NOP;
- break;
+ /* If none of the destination components are read, replace the
+ * destination register with the NULL register.
+ */
+ if (dest_mask == 0) {
+ progress = true;
+ inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
+ }
+ } else {
+ for (int c = 0; c < 4; c++) {
+ if (!result_live[c] && inst->dst.writemask & (1 << c)) {
+ inst->dst.writemask &= ~(1 << c);
+ progress = true;
+
+ if (inst->dst.writemask == 0) {
+ if (inst->writes_accumulator) {
+ inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
+ } else {
+ inst->opcode = BRW_OPCODE_NOP;
+ break;
+ }
}
}
}
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build
index 3cdeb6214a8..f2854be779a 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -145,7 +145,8 @@ if with_tests
foreach t : ['fs_cmod_propagation', 'fs_copy_propagation',
'fs_saturate_propagation', 'vf_float_conversions',
'vec4_register_coalesce', 'vec4_copy_propagation',
- 'vec4_cmod_propagation', 'eu_compact', 'eu_validate']
+ 'vec4_cmod_propagation', 'vec4_dead_code_eliminate',
+ 'eu_compact', 'eu_validate']
test(
t,
executable(
diff --git a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
new file mode 100644
index 00000000000..25739c2895a
--- /dev/null
+++ b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include "brw_vec4.h"
+#include "program/program.h"
+
+using namespace brw;
+
+class dead_code_eliminate_test : public ::testing::Test {
+ virtual void SetUp();
+
+public:
+ struct brw_compiler *compiler;
+ struct gen_device_info *devinfo;
+ struct gl_context *ctx;
+ struct gl_shader_program *shader_prog;
+ struct brw_vue_prog_data *prog_data;
+ vec4_visitor *v;
+};
+
+class dead_code_eliminate_vec4_visitor : public vec4_visitor
+{
+public:
+ dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler,
+ nir_shader *shader,
+ struct brw_vue_prog_data *prog_data)
+ : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
+ false /* no_spills */, -1)
+ {
+ prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+ }
+
+protected:
+ virtual dst_reg *make_reg_for_system_value(int /* location */)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void setup_payload()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_prolog()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_thread_end()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_urb_write_header(int /* mrf */)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
+ {
+ unreachable("Not reached");
+ }
+};
+
+
+void dead_code_eliminate_test::SetUp()
+{
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo));
+ prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
+ compiler->devinfo = devinfo;
+
+ nir_shader *shader =
+ nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL, NULL);
+
+ v = new dead_code_eliminate_vec4_visitor(compiler, shader, prog_data);
+
+ devinfo->gen = 4;
+}
+
+static void
+dead_code_eliminate(vec4_visitor *v)
+{
+ bool print = false;
+
+ if (print) {
+ fprintf(stderr, "instructions before:\n");
+ v->dump_instructions();
+ }
+
+ v->calculate_cfg();
+ v->dead_code_eliminate();
+
+ if (print) {
+ fprintf(stderr, "instructions after:\n");
+ v->dump_instructions();
+ }
+}
+
+TEST_F(dead_code_eliminate_test, some_dead_channels_all_flags_used)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ src_reg r1 = src_reg(v, glsl_type::vec4_type);
+ src_reg r2 = src_reg(v, glsl_type::vec4_type);
+ src_reg r3 = src_reg(v, glsl_type::vec4_type);
+ src_reg r4 = src_reg(v, glsl_type::vec4_type);
+ src_reg r5 = src_reg(v, glsl_type::vec4_type);
+ src_reg r6 = src_reg(v, glsl_type::vec4_type);
+
+ /* Sequence like the following should not be modified by DCE.
+ *
+ * cmp.l.f0(8) g4<1>F g2<4,4,1>.wF g1<4,4,1>.xF
+ * mov(8) g5<1>.xF g4<4,4,1>.xF
+ * (+f0.x) sel(8) g6<1>UD g3<4>UD g6<4>UD
+ */
+ vec4_instruction *test_cmp =
+ bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L);
+
+ test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW;
+ test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX;
+
+ vec4_instruction *test_mov =
+ bld.MOV(dst_reg(r5), r4);
+
+ test_mov->dst.writemask = WRITEMASK_X;
+ test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX;
+
+ vec4_instruction *test_sel =
+ bld.SEL(dst_reg(r6), r3, r6);
+
+ set_predicate(BRW_PREDICATE_NORMAL, test_sel);
+
+ /* The scratch write is here just to make r5 and r6 be live so that the
+ * whole program doesn't get eliminated by DCE.
+ */
+ v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5));
+
+ dead_code_eliminate(v);
+
+ EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW);
+}
diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
index abd5da297d6..acded759335 100644
--- a/src/intel/genxml/gen10.xml
+++ b/src/intel/genxml/gen10.xml
@@ -3553,6 +3553,14 @@
+
+
+
+
+
+
+
+
diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
index c69d7dc89c2..d39bf09a5d7 100644
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -3551,6 +3551,14 @@
+
+
+
+
+
+
+
+
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index ca268254503..b7ce3095ab4 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -3491,6 +3491,14 @@
+
+
+
+
+
+
+
+
diff --git a/src/intel/tools/aubinator_viewer_decoder.cpp b/src/intel/tools/aubinator_viewer_decoder.cpp
index 5311a8afc31..59cde530409 100644
--- a/src/intel/tools/aubinator_viewer_decoder.cpp
+++ b/src/intel/tools/aubinator_viewer_decoder.cpp
@@ -172,7 +172,7 @@ handle_state_base_address(struct aub_viewer_decode_ctx *ctx,
surface_modify = iter.raw_value;
} else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
dynamic_modify = iter.raw_value;
- } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) {
+ } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
instruction_modify = iter.raw_value;
}
}
diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index 46c41d57861..4720095c6cd 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -128,7 +128,7 @@ anv_image_from_gralloc(VkDevice device_h,
*/
int dma_buf = gralloc_info->handle->data[0];
- uint64_t bo_flags = 0;
+ uint64_t bo_flags = ANV_BO_EXTERNAL;
if (device->instance->physicalDevice.supports_48bit_addresses)
bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
if (device->instance->physicalDevice.use_softpin)
@@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
*grallocUsage = 0;
intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
- /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+ /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
* returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
* The relevant code in libvulkan/swapchain.cpp contains this fun comment:
*
@@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
* dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
*/
- const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
+ VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
.format = format,
.type = VK_IMAGE_TYPE_2D,
@@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
.usage = imageUsage,
};
+ /* Android P and earlier doesn't check if the physical device supports a
+ * given format and usage combination before calling this function. Omit the
+ * storage requirement to make the tests pass.
+ */
+#if ANDROID_API_LEVEL <= 28
+ if (format == VK_FORMAT_R8G8B8A8_SRGB ||
+ format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
+ image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
+ }
+#endif
+
VkImageFormatProperties2KHR image_format_props = {
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
};
@@ -268,19 +279,13 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
"inside %s", __func__);
}
- /* Reject STORAGE here to avoid complexity elsewhere. */
- if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) {
- return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED,
- "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc "
- "swapchain");
- }
-
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
*grallocUsage |= GRALLOC_USAGE_HW_RENDER;
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
*grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 478b8e7a3db..acc9a22c484 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1658,6 +1658,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
void
anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ enum isl_format format,
VkImageAspectFlagBits aspect,
uint32_t base_layer, uint32_t layer_count,
enum isl_aux_op mcs_op, union isl_color_value *clear_value,
@@ -1713,12 +1714,12 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
switch (mcs_op) {
case ISL_AUX_OP_FAST_CLEAR:
- blorp_fast_clear(&batch, &surf, surf.surf->format,
+ blorp_fast_clear(&batch, &surf, format,
0, base_layer, layer_count,
0, 0, image->extent.width, image->extent.height);
break;
case ISL_AUX_OP_PARTIAL_RESOLVE:
- blorp_mcs_partial_resolve(&batch, &surf, surf.surf->format,
+ blorp_mcs_partial_resolve(&batch, &surf, format,
base_layer, layer_count);
break;
case ISL_AUX_OP_FULL_RESOLVE:
@@ -1736,6 +1737,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
void
anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ enum isl_format format,
VkImageAspectFlagBits aspect, uint32_t level,
uint32_t base_layer, uint32_t layer_count,
enum isl_aux_op ccs_op, union isl_color_value *clear_value,
@@ -1799,14 +1801,14 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
switch (ccs_op) {
case ISL_AUX_OP_FAST_CLEAR:
- blorp_fast_clear(&batch, &surf, surf.surf->format,
+ blorp_fast_clear(&batch, &surf, format,
level, base_layer, layer_count,
0, 0, level_width, level_height);
break;
case ISL_AUX_OP_FULL_RESOLVE:
case ISL_AUX_OP_PARTIAL_RESOLVE:
blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
- surf.surf->format, ccs_op);
+ format, ccs_op);
break;
case ISL_AUX_OP_AMBIGUATE:
for (uint32_t a = 0; a < layer_count; a++) {
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index ee35e013329..924470b3005 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -636,7 +636,7 @@ VkResult anv_CreateInstance(
}
if (instance->app_info.api_version == 0)
- anv_EnumerateInstanceVersion(&instance->app_info.api_version);
+ instance->app_info.api_version = VK_API_VERSION_1_0;
instance->enabled_extensions = enabled_extensions;
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index e9afe06bb13..dd7111b58e1 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -71,9 +71,9 @@ def __init__(self, version, enable):
EXTENSIONS = [
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'),
Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'),
- Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8'),
+ Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8 && !ANDROID'),
Extension('VK_KHR_bind_memory2', 1, True),
- Extension('VK_KHR_create_renderpass2', 1, True),
+ Extension('VK_KHR_create_renderpass2', 1, '!ANDROID'),
Extension('VK_KHR_dedicated_allocation', 1, True),
Extension('VK_KHR_descriptor_update_template', 1, True),
Extension('VK_KHR_device_group', 1, True),
@@ -121,7 +121,7 @@ def __init__(self, version, enable):
Extension('VK_EXT_external_memory_dma_buf', 1, True),
Extension('VK_EXT_global_priority', 1,
'device->has_context_priority'),
- Extension('VK_EXT_pci_bus_info', 1, True),
+ Extension('VK_EXT_pci_bus_info', 1, False),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen >= 9'),
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
index ed1bc096c66..f6b8ded20a9 100644
--- a/src/intel/vulkan/anv_intel.c
+++ b/src/intel/vulkan/anv_intel.c
@@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL(
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
.tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT,
.flags = 0,
}},
pAllocator, &image_h);
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 6868288e486..60c196e5c4f 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -171,6 +171,8 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
{
nir_builder *b = &state->builder;
+ b->cursor = nir_before_instr(&intrin->instr);
+
/* For us, the resource indices are just indices into the binding table and
* array elements are sequential. A resource_reindex just turns into an
* add of the two indices.
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index ad0f08253e7..f170366d030 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -446,6 +446,9 @@ anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+ const bool rba = pipeline->device->robust_buffer_access;
+ _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
if (stages[s].entrypoint)
anv_pipeline_hash_shader(&ctx, &stages[s]);
@@ -466,6 +469,9 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+ const bool rba = pipeline->device->robust_buffer_access;
+ _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+
anv_pipeline_hash_shader(&ctx, stage);
_mesa_sha1_final(&ctx, sha1_out);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index d8a08d9d67f..37c710ad09a 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1747,6 +1747,13 @@ enum anv_pipe_bits {
* we would have to CS stall on every flush which could be bad.
*/
ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
+
+ /* This bit does not exist directly in PIPE_CONTROL. It means that render
+ * target operations are ongoing. Some operations like copies on the
+ * command streamer might need to be aware of this to trigger the
+ * appropriate stall before they can proceed with the copy.
+ */
+ ANV_PIPE_RENDER_TARGET_WRITES = (1 << 22),
};
#define ANV_PIPE_FLUSH_BITS ( \
@@ -2950,6 +2957,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
void
anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ enum isl_format format,
VkImageAspectFlagBits aspect,
uint32_t base_layer, uint32_t layer_count,
enum isl_aux_op mcs_op, union isl_color_value *clear_value,
@@ -2957,6 +2965,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
void
anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ enum isl_format format,
VkImageAspectFlagBits aspect, uint32_t level,
uint32_t base_layer, uint32_t layer_count,
enum isl_aux_op ccs_op, union isl_color_value *clear_value,
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 2035017ce0e..c573e890946 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -263,4 +263,5 @@ genX(blorp_exec)(struct blorp_batch *batch,
cmd_buffer->state.gfx.vb_dirty = ~0;
cmd_buffer->state.gfx.dirty = ~0;
cmd_buffer->state.push_constants_dirty = ~0;
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
}
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 43a02f22567..dcf37654954 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -737,6 +737,7 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
static void
anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ enum isl_format format,
VkImageAspectFlagBits aspect,
uint32_t level, uint32_t array_layer,
enum isl_aux_op resolve_op,
@@ -761,13 +762,14 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
resolve_op = ISL_AUX_OP_FULL_RESOLVE;
- anv_image_ccs_op(cmd_buffer, image, aspect, level,
+ anv_image_ccs_op(cmd_buffer, image, format, aspect, level,
array_layer, 1, resolve_op, NULL, true);
}
static void
anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
+ enum isl_format format,
VkImageAspectFlagBits aspect,
uint32_t array_layer,
enum isl_aux_op resolve_op,
@@ -781,7 +783,7 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
aspect, 0, array_layer,
resolve_op, fast_clear_supported);
- anv_image_mcs_op(cmd_buffer, image, aspect,
+ anv_image_mcs_op(cmd_buffer, image, format, aspect,
array_layer, 1, resolve_op, NULL, true);
#else
unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail");
@@ -1037,8 +1039,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
uint32_t level_layer_count =
MIN2(layer_count, aux_layers - base_layer);
- anv_image_ccs_op(cmd_buffer, image, aspect, level,
- base_layer, level_layer_count,
+ anv_image_ccs_op(cmd_buffer, image,
+ image->planes[plane].surface.isl.format,
+ aspect, level, base_layer, level_layer_count,
ISL_AUX_OP_AMBIGUATE, NULL, false);
if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) {
@@ -1055,8 +1058,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
}
assert(base_level == 0 && level_count == 1);
- anv_image_mcs_op(cmd_buffer, image, aspect,
- base_layer, layer_count,
+ anv_image_mcs_op(cmd_buffer, image,
+ image->planes[plane].surface.isl.format,
+ aspect, base_layer, layer_count,
ISL_AUX_OP_FAST_CLEAR, NULL, false);
}
return;
@@ -1133,12 +1137,22 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
for (uint32_t a = 0; a < level_layer_count; a++) {
uint32_t array_layer = base_layer + a;
if (image->samples == 1) {
- anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect,
- level, array_layer, resolve_op,
+ anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
+ image->planes[plane].surface.isl.format,
+ aspect, level, array_layer, resolve_op,
final_fast_clear);
} else {
- anv_cmd_predicated_mcs_resolve(cmd_buffer, image, aspect,
- array_layer, resolve_op,
+ /* We only support fast-clear on the first layer so partial
+ * resolves should not be used on other layers as they will use
+ * the clear color stored in memory that is only valid for layer0.
+ */
+ if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE &&
+ array_layer != 0)
+ continue;
+
+ anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
+ image->planes[plane].surface.isl.format,
+ aspect, array_layer, resolve_op,
final_fast_clear);
}
}
@@ -1758,6 +1772,12 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
pipe.StallAtPixelScoreboard = true;
}
+ /* If a render target flush was emitted, then we can toggle off the bit
+ * saying that render target writes are ongoing.
+ */
+ if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
+ bits &= ~(ANV_PIPE_RENDER_TARGET_WRITES);
+
bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT);
}
@@ -2769,6 +2789,8 @@ void genX(CmdDraw)(
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = 0;
}
+
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
}
void genX(CmdDrawIndexed)(
@@ -2808,6 +2830,8 @@ void genX(CmdDrawIndexed)(
prim.StartInstanceLocation = firstInstance;
prim.BaseVertexLocation = vertexOffset;
}
+
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
}
/* Auto-Draw / Indirect Registers */
@@ -2941,6 +2965,8 @@ void genX(CmdDrawIndirect)(
offset += stride;
}
+
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
}
void genX(CmdDrawIndexedIndirect)(
@@ -2980,6 +3006,8 @@ void genX(CmdDrawIndexedIndirect)(
offset += stride;
}
+
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
}
static VkResult
@@ -3645,12 +3673,16 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
union isl_color_value clear_color = {};
anv_clear_color_from_att_state(&clear_color, att_state, iview);
if (iview->image->samples == 1) {
- anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+ anv_image_ccs_op(cmd_buffer, image,
+ iview->planes[0].isl.format,
+ VK_IMAGE_ASPECT_COLOR_BIT,
0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
&clear_color,
false);
} else {
- anv_image_mcs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+ anv_image_mcs_op(cmd_buffer, image,
+ iview->planes[0].isl.format,
+ VK_IMAGE_ASPECT_COLOR_BIT,
0, 1, ISL_AUX_OP_FAST_CLEAR,
&clear_color,
false);
@@ -3870,6 +3902,55 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
struct anv_image_view *iview = fb->attachments[a];
const struct anv_image *image = iview->image;
+ if ((image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) &&
+ image->vk_format != iview->vk_format) {
+ enum anv_fast_clear_type fast_clear_type =
+ anv_layout_to_fast_clear_type(&cmd_buffer->device->info,
+ image, VK_IMAGE_ASPECT_COLOR_BIT,
+ att_state->current_layout);
+
+ /* If any clear color was used, flush it down the aux surfaces. If we
+ * don't do it now using the view's format we might use the clear
+ * color incorrectly in the following resolves (for example with an
+ * SRGB view & a UNORM image).
+ */
+ if (fast_clear_type != ANV_FAST_CLEAR_NONE) {
+ anv_perf_warn(cmd_buffer->device->instance, fb,
+ "Doing a partial resolve to get rid of clear color at the "
+ "end of a renderpass due to an image/view format mismatch");
+
+ uint32_t base_layer, layer_count;
+ if (image->type == VK_IMAGE_TYPE_3D) {
+ base_layer = 0;
+ layer_count = anv_minify(iview->image->extent.depth,
+ iview->planes[0].isl.base_level);
+ } else {
+ base_layer = iview->planes[0].isl.base_array_layer;
+ layer_count = fb->layers;
+ }
+
+ for (uint32_t a = 0; a < layer_count; a++) {
+ uint32_t array_layer = base_layer + a;
+ if (image->samples == 1) {
+ anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
+ iview->planes[0].isl.format,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ iview->planes[0].isl.base_level,
+ array_layer,
+ ISL_AUX_OP_PARTIAL_RESOLVE,
+ ANV_FAST_CLEAR_NONE);
+ } else {
+ anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
+ iview->planes[0].isl.format,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ base_layer,
+ ISL_AUX_OP_PARTIAL_RESOLVE,
+ ANV_FAST_CLEAR_NONE);
+ }
+ }
+ }
+ }
+
/* Transition the image into the final layout for this render pass */
VkImageLayout target_layout =
cmd_state->pass->attachments[a].final_layout;
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
index 81522986550..1bee1c6dc17 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -302,4 +302,5 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
}
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
}
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index ce8757f2643..71b7a1352f0 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -729,11 +729,19 @@ void genX(CmdCopyQueryPoolResults)(
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.CommandStreamerStallEnable = true;
- pc.StallAtPixelScoreboard = true;
- }
+ /* If render target writes are ongoing, request a render target cache flush
+ * to ensure proper ordering of the commands from the 3d pipe and the
+ * command streamer.
+ */
+ if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_WRITES) {
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+ }
+
+ if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
+ (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) {
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
struct anv_address dest_addr = anv_address_add(buffer->address, destOffset);
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 42800a2581e..99b86f68b3a 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -231,6 +231,22 @@ genX(init_device_state)(struct anv_device *device)
#endif
}
+#if GEN_GEN >= 10
+ /* A fixed function pipe flush is required before modifying this field */
+ anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.PipeControlFlushEnable = true;
+ }
+ /* enable object level preemption */
+ uint32_t csc1;
+ anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
+ .ReplayMode = ObjectLevelPreemption,
+ .ReplayModeMask = 1);
+ anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+ lri.RegisterOffset = GENX(CS_CHICKEN1_num);
+ lri.DataDWord = csc1;
+ }
+#endif
+
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
assert(batch.next <= batch.end);
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 1981b5f0515..7cd6b1e8ab6 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -1509,6 +1509,7 @@ dri3_update_drawable(struct loader_dri3_drawable *draw)
mtx_unlock(&draw->mtx);
return false;
}
+ free(error);
draw->is_pixmap = true;
xcb_unregister_for_special_event(draw->conn, draw->special_event);
draw->special_event = NULL;
diff --git a/src/mapi/shared-glapi/meson.build b/src/mapi/shared-glapi/meson.build
index dcc6079af3d..3f041471fb9 100644
--- a/src/mapi/shared-glapi/meson.build
+++ b/src/mapi/shared-glapi/meson.build
@@ -40,7 +40,7 @@ libglapi = shared_library(
'glapi',
[files_mapi_glapi, files_mapi_util, shared_glapi_mapi_tmp_h],
c_args : [
- c_msvc_compat_args, '-DMAPI_MODE_GLAPI',
+ c_msvc_compat_args, c_vis_args, '-DMAPI_MODE_GLAPI',
'-DMAPI_ABI_HEADER="@0@"'.format(shared_glapi_mapi_tmp_h.full_path()),
],
link_args : [ld_args_gc_sections],
diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk
index 53ff4b4f632..dc1f98364c8 100644
--- a/src/mesa/drivers/dri/Android.mk
+++ b/src/mesa/drivers/dri/Android.mk
@@ -49,11 +49,18 @@ MESA_DRI_WHOLE_STATIC_LIBRARIES := \
MESA_DRI_SHARED_LIBRARIES := \
libcutils \
libdl \
- libexpat \
libglapi \
liblog \
libz
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+MESA_DRI_WHOLE_STATIC_LIBRARIES += \
+ libexpat
+else
+MESA_DRI_SHARED_LIBRARIES += \
+ libexpat
+endif
#-----------------------------------------------
# Build drivers and libmesa_dri_common
diff --git a/src/mesa/drivers/dri/i915/Android.mk b/src/mesa/drivers/dri/i915/Android.mk
index b1054aa6e28..7c9c8210dff 100644
--- a/src/mesa/drivers/dri/i915/Android.mk
+++ b/src/mesa/drivers/dri/i915/Android.mk
@@ -47,7 +47,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
LOCAL_SHARED_LIBRARIES := \
$(MESA_DRI_SHARED_LIBRARIES) \
- libdrm_intel
+ libdrm_intel_pri
LOCAL_GENERATED_SOURCES := \
$(MESA_DRI_OPTIONS_H) \
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk
index fbad63a0824..580b5443965 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -310,6 +310,8 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS)
LOCAL_CFLAGS := \
$(MESA_DRI_CFLAGS)
+LOCAL_CFLAGS += -Wno-error
+
LOCAL_C_INCLUDES := \
$(MESA_DRI_C_INCLUDES) \
$(MESA_TOP)/include/drm-uapi
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index f1675b191c1..d4e6ba039c9 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -1487,7 +1487,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
brw_bo_make_external(bo);
if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
- DRM_CLOEXEC, prime_fd) != 0)
+ DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
return -errno;
bo->reusable = false;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6ba64e4e06d..8cc0529d7e8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -893,6 +893,19 @@ brw_process_driconf_options(struct brw_context *brw)
ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
driComputeOptionsSha1(&brw->screen->optionCache,
ctx->Const.dri_config_options_sha1);
+
+ brw->screen->compiler->simd32_heuristics_control.grouped_sends_check =
+ driQueryOptionb(&brw->optionCache, "simd32_heuristic_grouped_check");
+ brw->screen->compiler->simd32_heuristics_control.max_grouped_sends =
+ driQueryOptioni(&brw->optionCache, "simd32_heuristic_grouped_sends");
+ brw->screen->compiler->simd32_heuristics_control.inst_count_check =
+ driQueryOptionb(&brw->optionCache, "simd32_heuristic_inst_check");
+ brw->screen->compiler->simd32_heuristics_control.inst_count_ratio =
+ driQueryOptionf(&brw->optionCache, "simd32_heuristic_inst_ratio");
+ brw->screen->compiler->simd32_heuristics_control.mrt_check =
+ driQueryOptionb(&brw->optionCache, "simd32_heuristic_mrt_check");
+ brw->screen->compiler->simd32_heuristics_control.max_mrts =
+ driQueryOptioni(&brw->optionCache, "simd32_heuristic_max_mrts");
}
GLboolean
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7fd15669eb9..47183da66bc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -686,6 +686,7 @@ enum brw_query_kind {
OA_COUNTERS,
OA_COUNTERS_RAW,
PIPELINE_STATS,
+ NULL_RENDERER,
};
struct brw_perf_query_register_prog {
@@ -842,6 +843,8 @@ struct brw_context
GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
+ bool object_preemption; /**< Object level preemption enabled. */
+
GLenum reduced_primitive;
/**
@@ -1242,6 +1245,7 @@ struct brw_context
int n_active_oa_queries;
int n_active_pipeline_stats_queries;
+ int n_active_null_renderers;
/* The number of queries depending on running OA counters which
* extends beyond brw_end_perf_query() since we need to wait until
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 97a787a2ab3..0ec50e1d27a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode {
#define GEN10_CACHE_MODE_SS 0x0e420
#define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
-#define INSTPM 0x20c0
+#define INSTPM 0x20c0 /* Gen6-8 */
# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE (1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE (1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE (1 << 1)
#define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE (1 << 0)
#define GEN7_RPSTAT1 0xA01C
#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7
@@ -1681,4 +1687,9 @@ enum brw_pixel_shader_coverage_mask_mode {
# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5)
# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5)
+#define CS_CHICKEN1 0x2580 /* Gen9+ */
+# define GEN9_REPLAY_MODE_MIDBUFFER (0 << 0)
+# define GEN9_REPLAY_MODE_MIDOBJECT (1 << 0)
+# define GEN9_REPLAY_MODE_MASK REG_MASK(1 << 0)
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 10e3d024f17..85d14a83c7e 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -330,6 +330,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void)
o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"),
obj->pipeline_stats.bo ? "yes" : "no");
break;
+ case NULL_RENDERER:
+ DBG("%4d: %-6s %-8s NULL_RENDERER\n",
+ id,
+ o->Used ? "Dirty," : "New,",
+ o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
+ break;
default:
unreachable("Unknown query type");
break;
@@ -431,6 +437,10 @@ brw_get_perf_query_info(struct gl_context *ctx,
*n_active = brw->perfquery.n_active_pipeline_stats_queries;
break;
+ case NULL_RENDERER:
+ *n_active = brw->perfquery.n_active_null_renderers;
+ break;
+
default:
unreachable("Unknown query type");
break;
@@ -1020,6 +1030,7 @@ brw_begin_perf_query(struct gl_context *ctx,
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *obj = brw_perf_query(o);
const struct brw_perf_query_info *query = obj->query;
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* We can assume the frontend hides mistaken attempts to Begin a
* query object multiple times before its End. Similarly if an
@@ -1104,7 +1115,6 @@ brw_begin_perf_query(struct gl_context *ctx,
/* If the OA counters aren't already on, enable them. */
if (brw->perfquery.oa_stream_fd == -1) {
__DRIscreen *screen = brw->screen->driScrnPriv;
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* The period_exponent gives a sampling period as follows:
* sample_period = timestamp_period * 2^(period_exponent + 1)
@@ -1250,6 +1260,23 @@ brw_begin_perf_query(struct gl_context *ctx,
++brw->perfquery.n_active_pipeline_stats_queries;
break;
+ case NULL_RENDERER:
+ ++brw->perfquery.n_active_null_renderers;
+ if (devinfo->gen >= 9) {
+ brw_load_register_imm32(brw, CS_DEBUG_MODE2,
+ REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) |
+ CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE);
+ } else {
+ brw_load_register_imm32(brw, INSTPM,
+ REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+ INSTPM_MEDIA_INSTRUCTION_DISABLE) |
+ INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+ INSTPM_MEDIA_INSTRUCTION_DISABLE);
+ }
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_LRI_WRITE_IMMEDIATE);
+ break;
+
default:
unreachable("Unknown query type");
break;
@@ -1270,6 +1297,7 @@ brw_end_perf_query(struct gl_context *ctx,
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *obj = brw_perf_query(o);
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
DBG("End(%d)\n", o->Id);
@@ -1312,6 +1340,21 @@ brw_end_perf_query(struct gl_context *ctx,
--brw->perfquery.n_active_pipeline_stats_queries;
break;
+ case NULL_RENDERER:
+ if (--brw->perfquery.n_active_null_renderers == 0) {
+ if (devinfo->gen >= 9) {
+ brw_load_register_imm32(brw, CS_DEBUG_MODE2,
+ REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE));
+ } else {
+ brw_load_register_imm32(brw, INSTPM,
+ REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+ INSTPM_MEDIA_INSTRUCTION_DISABLE));
+ }
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_LRI_WRITE_IMMEDIATE);
+ }
+ break;
+
default:
unreachable("Unknown query type");
break;
@@ -1337,6 +1380,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
bo = obj->pipeline_stats.bo;
break;
+ case NULL_RENDERER:
+ break;
+
default:
unreachable("Unknown query type");
break;
@@ -1387,6 +1433,8 @@ brw_is_perf_query_ready(struct gl_context *ctx,
return (obj->pipeline_stats.bo &&
!brw_batch_references(&brw->batch, obj->pipeline_stats.bo) &&
!brw_bo_busy(obj->pipeline_stats.bo));
+ case NULL_RENDERER:
+ return true;
default:
unreachable("Unknown query type");
@@ -1602,6 +1650,9 @@ brw_get_perf_query_data(struct gl_context *ctx,
written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data);
break;
+ case NULL_RENDERER:
+ break;
+
default:
unreachable("Unknown query type");
break;
@@ -1672,6 +1723,9 @@ brw_delete_perf_query(struct gl_context *ctx,
}
break;
+ case NULL_RENDERER:
+ break;
+
default:
unreachable("Unknown query type");
break;
@@ -2152,6 +2206,15 @@ get_register_queries_function(const struct gen_device_info *devinfo)
return NULL;
}
+static void
+fill_null_renderer_perf_query_info(struct brw_context *brw,
+ struct brw_perf_query_info *query)
+{
+ query->kind = NULL_RENDERER;
+ query->name = "Intel_Null_Hardware_Query";
+ query->n_counters = 0;
+}
+
static unsigned
brw_init_perf_query_info(struct gl_context *ctx)
{
@@ -2210,6 +2273,10 @@ brw_init_perf_query_info(struct gl_context *ctx)
enumerate_sysfs_metrics(brw);
brw_perf_query_register_mdapi_oa_query(brw);
+
+ struct brw_perf_query_info *null_query =
+ brw_perf_query_append_query_info(brw);
+ fill_null_renderer_perf_query_info(brw, null_query);
}
brw->perfquery.unaccumulated =
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index f6acf81b899..546d103d1a4 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -128,7 +128,7 @@ void brw_disk_cache_write_compute_program(struct brw_context *brw);
void brw_disk_cache_write_render_programs(struct brw_context *brw);
/***********************************************************************
- * brw_state.c
+ * brw_state_upload.c
*/
void brw_upload_render_state(struct brw_context *brw);
void brw_render_state_finished(struct brw_context *brw);
@@ -138,6 +138,7 @@ void brw_init_state(struct brw_context *brw);
void brw_destroy_state(struct brw_context *brw);
void brw_emit_select_pipeline(struct brw_context *brw,
enum brw_pipeline pipeline);
+void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
static inline void
brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7f20579fb87..2e42dfb36d6 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -45,6 +45,28 @@
#include "brw_cs.h"
#include "main/framebuffer.h"
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
+{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ assert(devinfo->gen >= 9);
+
+ if (enable == brw->object_preemption)
+ return;
+
+ /* A fixed function pipe flush is required before modifying this field */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
+ bool replay_mode = enable ?
+ GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
+
+ /* enable object level preemption */
+ brw_load_register_imm32(brw, CS_CHICKEN1,
+ replay_mode | GEN9_REPLAY_MODE_MASK);
+
+ brw->object_preemption = enable;
+}
+
static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
@@ -153,6 +175,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
ADVANCE_BATCH();
}
}
+
+ if (devinfo->gen >= 10)
+ brw_enable_obj_preemption(brw, true);
}
static inline const struct brw_tracked_state *
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8d21cf5fa70..3286c222e5b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1499,18 +1499,6 @@ update_buffer_image_param(struct brw_context *brw,
param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
}
-static unsigned
-get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
- unsigned level)
-{
- if (target == GL_TEXTURE_CUBE_MAP)
- return 6;
-
- return target == GL_TEXTURE_3D ?
- minify(mt->surf.logical_level0_px.depth, level) :
- mt->surf.logical_level0_px.array_len;
-}
-
static void
update_image_surface(struct brw_context *brw,
struct gl_image_unit *u,
@@ -1541,14 +1529,29 @@ update_image_surface(struct brw_context *brw,
} else {
struct intel_texture_object *intel_obj = intel_texture_object(obj);
struct intel_mipmap_tree *mt = intel_obj->mt;
- const unsigned num_layers = u->Layered ?
- get_image_num_layers(mt, obj->Target, u->Level) : 1;
+
+ unsigned base_layer, num_layers;
+ if (u->Layered) {
+ if (obj->Target == GL_TEXTURE_3D) {
+ base_layer = 0;
+ num_layers = minify(mt->surf.logical_level0_px.depth, u->Level);
+ } else {
+ assert(obj->Immutable || obj->MinLayer == 0);
+ base_layer = obj->MinLayer;
+ num_layers = obj->Immutable ?
+ obj->NumLayers :
+ mt->surf.logical_level0_px.array_len;
+ }
+ } else {
+ base_layer = obj->MinLayer + u->_Layer;
+ num_layers = 1;
+ }
struct isl_view view = {
.format = format,
.base_level = obj->MinLevel + u->Level,
.levels = 1,
- .base_array_layer = obj->MinLayer + u->_Layer,
+ .base_array_layer = base_layer,
.array_len = num_layers,
.swizzle = ISL_SWIZZLE_IDENTITY,
.usage = ISL_SURF_USAGE_STORAGE_BIT,
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index fd9ce93c6c7..97ae2707049 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -213,7 +213,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
}
if (need_invalidate) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
}
#endif
}
@@ -268,7 +268,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
assert(batch->blorp->driver_ctx == batch->driver_batch);
struct brw_context *brw = batch->driver_batch;
struct gl_context *ctx = &brw->ctx;
- bool check_aperture_failed_once;
+ bool check_aperture_failed_once = false;
#if GEN_GEN >= 11
/* The PIPE_CONTROL command description says:
@@ -309,7 +309,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
intel_batchbuffer_require_space(brw, 1400);
brw_require_statebuffer_space(brw, 600);
intel_batchbuffer_save_state(brw);
- check_aperture_failed_once = intel_batchbuffer_saved_state_is_empty(brw);
+ check_aperture_failed_once |= intel_batchbuffer_saved_state_is_empty(brw);
brw->batch.no_wrap = true;
#if GEN_GEN == 6
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 9cd017a5cff..5d2572cb4dc 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -505,9 +505,8 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
{
#if GEN_GEN >= 8
bool need_invalidate = false;
- unsigned i;
- for (i = 0; i < brw->vb.nr_buffers; i++) {
+ for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
if (high_bits != brw->vb.last_bo_high_bits[i]) {
@@ -516,12 +515,26 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
}
}
- /* Don't bother with draw parameter buffers - those are generated by
- * the driver so we can select a consistent memory zone.
- */
+ if (brw->draw.draw_params_bo) {
+ uint16_t high_bits = pinned_bo_high_bits(brw->draw.draw_params_bo);
+
+ if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers] != high_bits) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[brw->vb.nr_buffers] = high_bits;
+ }
+ }
+
+ if (brw->draw.derived_draw_params_bo) {
+ uint16_t high_bits = pinned_bo_high_bits(brw->draw.derived_draw_params_bo);
+
+ if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] != high_bits) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] = high_bits;
+ }
+ }
if (need_invalidate) {
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
}
#endif
}
@@ -5602,6 +5615,50 @@ static const struct brw_tracked_state genX(blend_constant_color) = {
/* ---------------------------------------------------------------------- */
+#if GEN_GEN == 9
+
+/**
+ * Implement workarounds for preemption:
+ * - WaDisableMidObjectPreemptionForGSLineStripAdj
+ * - WaDisableMidObjectPreemptionForTrifanOrPolygon
+ */
+static void
+gen9_emit_preempt_wa(struct brw_context *brw)
+{
+ /* WaDisableMidObjectPreemptionForGSLineStripAdj
+ *
+ * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and
+ * GS is enabled.
+ */
+ bool object_preemption =
+ !(brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled);
+
+ /* WaDisableMidObjectPreemptionForTrifanOrPolygon
+ *
+ * TriFan miscompare in Execlist Preemption test. Cut index that is on a
+ * previous context. End the previous, the resume another context with a
+ * tri-fan or polygon, and the vertex count is corrupted. If we prempt
+ * again we will cause corruption.
+ *
+ * WA: Disable mid-draw preemption when draw-call has a tri-fan.
+ */
+ object_preemption =
+ object_preemption && !(brw->primitive == _3DPRIM_TRIFAN);
+
+ brw_enable_obj_preemption(brw, object_preemption);
+}
+
+static const struct brw_tracked_state gen9_preempt_wa = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM,
+ },
+ .emit = gen9_emit_preempt_wa,
+};
+#endif
+
+/* ---------------------------------------------------------------------- */
+
void
genX(init_atoms)(struct brw_context *brw)
{
@@ -5906,6 +5963,9 @@ genX(init_atoms)(struct brw_context *brw)
&genX(cut_index),
&gen8_pma_fix,
+#if GEN_GEN == 9
+ &gen9_preempt_wa,
+#endif
};
#endif
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index d7e02efb54d..0cfe2acbdd4 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -247,7 +247,6 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.OES_primitive_bounding_box = true;
ctx->Extensions.OES_texture_buffer = true;
ctx->Extensions.ARB_fragment_shader_interlock = true;
- ctx->Extensions.INTEL_fragment_shader_ordering = true;
if (can_do_pipelined_register_writes(brw->screen)) {
ctx->Extensions.ARB_draw_indirect = true;
diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h
index a8193c6def9..ca604159dc2 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -89,9 +89,6 @@ struct __DRIimageRec {
GLuint tile_y;
bool has_depthstencil;
- /** The image was created with EGL_EXT_image_dma_buf_import. */
- bool dma_buf_imported;
-
/** Offset of the auxiliary compression surface in the bo. */
uint32_t aux_offset;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index c3bd30f7837..89110e60a8d 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -61,6 +61,33 @@ DRI_CONF_BEGIN
DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
DRI_CONF_DESC_END
DRI_CONF_OPT_END
+
+ DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true")
+ DRI_CONF_DESC(en, "Enable/disable grouped texture fetch "
+ "check in the SIMD32 selection heuristic.")
+ DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999")
+ DRI_CONF_DESC(en, "How many grouped texture fetches should "
+ "the SIMD32 selection heuristic allow.")
+ DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true")
+ DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction "
+ "count ratio check in the SIMD32 selection "
+ "heuristic.")
+ DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999")
+ DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio "
+ "the SIMD32 selection heuristic should allow.")
+ DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true")
+ DRI_CONF_DESC(en, "Enable/disable MRT write check in the "
+ "SIMD32 selection heuristic.")
+ DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8")
+ DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 "
+ "selection heuristic allow.")
+ DRI_CONF_OPT_END
+
DRI_CONF_MESA_NO_ERROR("false")
DRI_CONF_SECTION_END
@@ -282,6 +309,18 @@ static const struct intel_image_format intel_image_formats[] = {
{ { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
{ 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
+ { __DRI_IMAGE_FOURCC_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
+ { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
+
+ { __DRI_IMAGE_FOURCC_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
+ { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
+
+ { __DRI_IMAGE_FOURCC_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
+ { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
+
{ __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
{ { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
{ 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
@@ -957,7 +996,6 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
image->tile_y = orig_image->tile_y;
image->has_depthstencil = orig_image->has_depthstencil;
image->data = loaderPrivate;
- image->dma_buf_imported = orig_image->dma_buf_imported;
image->aux_offset = orig_image->aux_offset;
image->aux_pitch = orig_image->aux_pitch;
@@ -1237,7 +1275,6 @@ intel_create_image_from_dma_bufs2(__DRIscreen *dri_screen,
return NULL;
}
- image->dma_buf_imported = true;
image->yuv_color_space = yuv_color_space;
image->sample_range = sample_range;
image->horizontal_siting = horizontal_siting;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index bdcdb7736e6..674fa1c6fbf 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -614,16 +614,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
if (image == NULL)
return;
- /* We support external textures only for EGLImages created with
- * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future.
- */
- if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glEGLImageTargetTexture2DOES(external target is enabled only "
- "for images created with EGL_EXT_image_dma_buf_import");
- return;
- }
-
/* Disallow depth/stencil textures: we don't have a way to pass the
* separate stencil miptree of a GL_DEPTH_STENCIL texture through.
*/
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 47db1583135..aac96290ded 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -317,7 +317,6 @@ EXT(IBM_texture_mirrored_repeat , dummy_true
EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999)
EXT(INTEL_conservative_rasterization , INTEL_conservative_rasterization , x , GLC, x , 31, 2013)
-EXT(INTEL_fragment_shader_ordering , INTEL_fragment_shader_ordering , GLL, GLC, x , x , 2013)
EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013)
EXT(INTEL_shader_atomic_float_minmax , INTEL_shader_atomic_float_minmax , GLL, GLC, x , x , 2018)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 9ed49b7ff24..f30b778a7b1 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4296,7 +4296,6 @@ struct gl_extensions
GLboolean ATI_fragment_shader;
GLboolean GREMEDY_string_marker;
GLboolean INTEL_conservative_rasterization;
- GLboolean INTEL_fragment_shader_ordering;
GLboolean INTEL_performance_query;
GLboolean INTEL_shader_atomic_float_minmax;
GLboolean KHR_blend_equation_advanced;
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 556c860d393..d9a12d2387f 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -922,6 +922,8 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum format, GLenum type,
case GL_RGBA:
if (type == GL_FLOAT && data_type == GL_FLOAT)
return GL_NO_ERROR; /* EXT_color_buffer_float */
+ if (type == GL_HALF_FLOAT && data_type == GL_FLOAT)
+ return GL_NO_ERROR;
if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED)
return GL_NO_ERROR;
if (internalFormat == GL_RGB10_A2 &&
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 0ab9ed445d6..bb4f7006618 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -900,8 +900,7 @@ select_tex_image(const struct gl_texture_object *texObj, GLenum target,
/**
* Error-check the offset and size arguments to
- * glGet[Compressed]TextureSubImage(). Also checks if the specified
- * texture image is missing.
+ * glGet[Compressed]TextureSubImage().
* \return true if error, false if no error.
*/
static bool
@@ -913,6 +912,7 @@ dimensions_error_check(struct gl_context *ctx,
const char *caller)
{
const struct gl_texture_image *texImage;
+ GLuint imageWidth = 0, imageHeight = 0, imageDepth = 0;
if (xoffset < 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset);
@@ -981,82 +981,44 @@ dimensions_error_check(struct gl_context *ctx,
"%s(zoffset + depth = %d)", caller, zoffset + depth);
return true;
}
- /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"):
- *
- * "An INVALID_OPERATION error is generated by GetTextureImage if the
- * effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY ,
- * and the texture object is not cube complete or cube array complete,
- * respectively."
- *
- * This applies also to GetTextureSubImage, GetCompressedTexImage,
- * GetCompressedTextureImage, and GetnCompressedTexImage.
- */
- if (!_mesa_cube_complete(texObj)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "%s(cube incomplete)", caller);
- return true;
- }
break;
default:
; /* nothing */
}
texImage = select_tex_image(texObj, target, level, zoffset);
- if (!texImage) {
- /* Trying to return a non-defined level is a valid operation per se, as
- * OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries") does not
- * handle this case as an error.
- *
- * Rather, we need to look at section 8.22 ("Texture State and Proxy
- * State"):
- *
- * "Each initial texture image is null. It has zero width, height, and
- * depth, internal format RGBA, or R8 for buffer textures, component
- * sizes set to zero and component types set to NONE, the compressed
- * flag set to FALSE, a zero compressed size, and the bound buffer
- * object name is zero."
- *
- * This means we need to assume the image for the non-defined level is
- * an empty image. With this assumption, we can go back to section
- * 8.11.4 and checking again the errors:
- *
- * "An INVALID_VALUE error is generated if xoffset + width is greater
- * than the texture’s width, yoffset + height is greater than the
- * texture’s height, or zoffset + depth is greater than the texture’s
- * depth."
- *
- * Thus why we return INVALID_VALUE.
- */
- _mesa_error(ctx, GL_INVALID_VALUE, "%s(missing image)", caller);
- return true;
+ if (texImage) {
+ imageWidth = texImage->Width;
+ imageHeight = texImage->Height;
+ imageDepth = texImage->Depth;
}
- if (xoffset + width > texImage->Width) {
+ if (xoffset + width > imageWidth) {
_mesa_error(ctx, GL_INVALID_VALUE,
"%s(xoffset %d + width %d > %u)",
- caller, xoffset, width, texImage->Width);
+ caller, xoffset, width, imageWidth);
return true;
}
- if (yoffset + height > texImage->Height) {
+ if (yoffset + height > imageHeight) {
_mesa_error(ctx, GL_INVALID_VALUE,
"%s(yoffset %d + height %d > %u)",
- caller, yoffset, height, texImage->Height);
+ caller, yoffset, height, imageHeight);
return true;
}
if (target != GL_TEXTURE_CUBE_MAP) {
/* Cube map error checking was done above */
- if (zoffset + depth > texImage->Depth) {
+ if (zoffset + depth > imageDepth) {
_mesa_error(ctx, GL_INVALID_VALUE,
"%s(zoffset %d + depth %d > %u)",
- caller, zoffset, depth, texImage->Depth);
+ caller, zoffset, depth, imageDepth);
return true;
}
}
/* Extra checks for compressed textures */
- {
+ if (texImage) {
GLuint bw, bh, bd;
_mesa_get_format_block_size_3d(texImage->TexFormat, &bw, &bh, &bd);
if (bw > 1 || bh > 1 || bd > 1) {
@@ -1162,53 +1124,15 @@ pbo_error_check(struct gl_context *ctx, GLenum target,
/**
- * Do error checking for all (non-compressed) get-texture-image functions.
- * \return true if any error, false if no errors.
+ * Do teximage-related error checking for getting uncompressed images.
+ * \return true if there was an error
*/
static bool
-getteximage_error_check(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- GLenum target, GLint level,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type, GLsizei bufSize,
- GLvoid *pixels, const char *caller)
+teximage_error_check(struct gl_context *ctx,
+ struct gl_texture_image *texImage,
+ GLenum format, const char *caller)
{
- struct gl_texture_image *texImage;
- GLenum baseFormat, err;
- GLint maxLevels;
-
- assert(texObj);
-
- if (texObj->Target == 0) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
- return true;
- }
-
- maxLevels = _mesa_max_texture_levels(ctx, target);
- if (level < 0 || level >= maxLevels) {
- _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
- return true;
- }
-
- err = _mesa_error_check_format_and_type(ctx, format, type);
- if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err, "%s(format/type)", caller);
- return true;
- }
-
- if (dimensions_error_check(ctx, texObj, target, level,
- xoffset, yoffset, zoffset,
- width, height, depth, caller)) {
- return true;
- }
-
- if (pbo_error_check(ctx, target, width, height, depth,
- format, type, bufSize, pixels, caller)) {
- return true;
- }
-
- texImage = select_tex_image(texObj, target, level, zoffset);
+ GLenum baseFormat;
assert(texImage);
/*
@@ -1241,8 +1165,8 @@ getteximage_error_check(struct gl_context *ctx,
return true;
}
else if (_mesa_is_stencil_format(format)
- && !_mesa_is_depthstencil_format(baseFormat)
- && !_mesa_is_stencil_format(baseFormat)) {
+ && !_mesa_is_depthstencil_format(baseFormat)
+ && !_mesa_is_stencil_format(baseFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(format mismatch)", caller);
return true;
@@ -1271,6 +1195,142 @@ getteximage_error_check(struct gl_context *ctx,
}
+/**
+ * Do common teximage-related error checking for getting uncompressed images.
+ * \return true if there was an error
+ */
+static bool
+common_error_check(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei bufSize,
+ GLvoid *pixels, const char *caller)
+{
+ GLenum err;
+ GLint maxLevels;
+
+ if (texObj->Target == 0) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+ return true;
+ }
+
+ maxLevels = _mesa_max_texture_levels(ctx, target);
+ if (level < 0 || level >= maxLevels) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
+ return true;
+ }
+
+ err = _mesa_error_check_format_and_type(ctx, format, type);
+ if (err != GL_NO_ERROR) {
+ _mesa_error(ctx, err, "%s(format/type)", caller);
+ return true;
+ }
+
+ /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"):
+ *
+ * "An INVALID_OPERATION error is generated by GetTextureImage if the
+ * effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY ,
+ * and the texture object is not cube complete or cube array complete,
+ * respectively."
+ *
+ * This applies also to GetTextureSubImage, GetCompressedTexImage,
+ * GetCompressedTextureImage, and GetnCompressedTexImage.
+ */
+ if (target == GL_TEXTURE_CUBE_MAP && !_mesa_cube_complete(texObj)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(cube incomplete)", caller);
+ return true;
+ }
+
+ return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+getteximage_error_check(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei bufSize,
+ GLvoid *pixels, const char *caller)
+{
+ struct gl_texture_image *texImage;
+
+ assert(texObj);
+
+ if (common_error_check(ctx, texObj, target, level, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
+ return true;
+ }
+
+ if (width == 0 || height == 0 || depth == 0) {
+ /* Not an error, but nothing to do. Return 'true' so that the
+ * caller simply returns.
+ */
+ return true;
+ }
+
+ if (pbo_error_check(ctx, target, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
+ return true;
+ }
+
+ texImage = select_tex_image(texObj, target, level, 0);
+ if (teximage_error_check(ctx, texImage, format, caller)) {
+ return true;
+ }
+
+ return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+gettexsubimage_error_check(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei bufSize,
+ GLvoid *pixels, const char *caller)
+{
+ struct gl_texture_image *texImage;
+
+ assert(texObj);
+
+ if (common_error_check(ctx, texObj, target, level, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
+ return true;
+ }
+
+ if (dimensions_error_check(ctx, texObj, target, level,
+ xoffset, yoffset, zoffset,
+ width, height, depth, caller)) {
+ return true;
+ }
+
+ if (pbo_error_check(ctx, target, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
+ return true;
+ }
+
+ texImage = select_tex_image(texObj, target, level, zoffset);
+ if (teximage_error_check(ctx, texImage, format, caller)) {
+ return true;
+ }
+
+ return false;
+}
+
+
/**
* Return the width, height and depth of a texture image.
* This function must be resilient to bad parameter values since
@@ -1399,7 +1459,7 @@ _mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type,
get_texture_image_dims(texObj, target, level, &width, &height, &depth);
if (getteximage_error_check(ctx, texObj, target, level,
- 0, 0, 0, width, height, depth,
+ width, height, depth,
format, type, bufSize, pixels, caller)) {
return;
}
@@ -1430,7 +1490,7 @@ _mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type,
get_texture_image_dims(texObj, target, level, &width, &height, &depth);
if (getteximage_error_check(ctx, texObj, target, level,
- 0, 0, 0, width, height, depth,
+ width, height, depth,
format, type, INT_MAX, pixels, caller)) {
return;
}
@@ -1464,7 +1524,7 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type,
&width, &height, &depth);
if (getteximage_error_check(ctx, texObj, texObj->Target, level,
- 0, 0, 0, width, height, depth,
+ width, height, depth,
format, type, bufSize, pixels, caller)) {
return;
}
@@ -1497,9 +1557,10 @@ _mesa_GetTextureSubImage(GLuint texture, GLint level,
return;
}
- if (getteximage_error_check(ctx, texObj, texObj->Target, level,
- xoffset, yoffset, zoffset, width, height, depth,
- format, type, bufSize, pixels, caller)) {
+ if (gettexsubimage_error_check(ctx, texObj, texObj->Target, level,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, bufSize, pixels, caller)) {
return;
}
diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c
index a46c9f94bca..8eccdc20b76 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -40,6 +40,7 @@
#include "shaderapi.h"
#include "shaderobj.h"
+#include "program/program.h"
#include "program/prog_parameter.h"
struct using_program_tuple
@@ -470,6 +471,7 @@ begin_transform_feedback(struct gl_context *ctx, GLenum mode, bool no_error)
if (obj->program != source) {
ctx->NewDriverState |= ctx->DriverFlags.NewTransformFeedbackProg;
+ _mesa_reference_program_(ctx, &obj->program, source);
obj->program = source;
}
@@ -504,6 +506,7 @@ end_transform_feedback(struct gl_context *ctx,
assert(ctx->Driver.EndTransformFeedback);
ctx->Driver.EndTransformFeedback(ctx, obj);
+ _mesa_reference_program_(ctx, &obj->program, NULL);
ctx->TransformFeedback.CurrentObject->Active = GL_FALSE;
ctx->TransformFeedback.CurrentObject->Paused = GL_FALSE;
ctx->TransformFeedback.CurrentObject->EndedAnytime = GL_TRUE;
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index c6470e6289e..13d0da85882 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -41,7 +41,7 @@ endef
include $(MESA_TOP)/src/mesa/Makefile.sources
include $(CLEAR_VARS)
-
+LOCAL_CFLAGS += -Wno-error
LOCAL_MODULE := libmesa_program
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_STATIC_LIBRARIES := libmesa_nir \
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 0e535257cb4..bdc8dda82c2 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -285,8 +285,11 @@ st_renderbuffer_delete(struct gl_context *ctx, struct gl_renderbuffer *rb)
struct st_context *st = st_context(ctx);
pipe_surface_release(st->pipe, &strb->surface_srgb);
pipe_surface_release(st->pipe, &strb->surface_linear);
- strb->surface = NULL;
+ } else {
+ pipe_surface_release_no_context(&strb->surface_srgb);
+ pipe_surface_release_no_context(&strb->surface_linear);
}
+ strb->surface = NULL;
pipe_resource_reference(&strb->texture, NULL);
free(strb->data);
_mesa_delete_renderbuffer(ctx, rb);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index c58deadc957..581a8639ef0 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -749,7 +749,8 @@ st_link_nir(struct gl_context *ctx,
* the pipe_stream_output->output_register field is based on the
* pre-compacted driver_locations.
*/
- if (!prev_shader->sh.LinkedTransformFeedback)
+ if (!(prev_shader->sh.LinkedTransformFeedback &&
+ prev_shader->sh.LinkedTransformFeedback->NumVarying > 0))
nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
nir, ctx->API != API_OPENGL_COMPAT);
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5322903b93a..0783f67f2b7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4072,7 +4072,6 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
case ir_intrinsic_generic_atomic_comp_swap:
case ir_intrinsic_begin_invocation_interlock:
case ir_intrinsic_end_invocation_interlock:
- case ir_intrinsic_begin_fragment_shader_ordering:
unreachable("Invalid intrinsic");
}
}
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index ceb48dd4903..776b563e50e 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -1069,15 +1069,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
* of the referenced drawables no longer exist.
*/
st_framebuffers_purge(st);
-
- /* Notify the driver that the context thread may have been changed.
- * This should pin all driver threads to a specific L3 cache for optimal
- * performance on AMD Zen CPUs.
- */
- struct glthread_state *glthread = st->ctx->GLThread;
- thrd_t *upper_thread = glthread ? &glthread->queue.threads[0] : NULL;
-
- util_context_thread_changed(st->pipe, upper_thread);
}
else {
ret = _mesa_make_current(NULL, NULL, NULL);
diff --git a/src/meson.build b/src/meson.build
index 73146d37143..3b91c6a88c5 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -51,8 +51,12 @@ subdir('util')
subdir('mapi')
# TODO: opengl
subdir('compiler')
-subdir('egl/wayland/wayland-drm')
-subdir('vulkan')
+if with_platform_wayland
+ subdir('egl/wayland/wayland-drm')
+endif
+if with_any_vk
+ subdir('vulkan')
+endif
if with_gallium_radeonsi or with_amd_vk
subdir('amd')
endif
@@ -67,7 +71,7 @@ subdir('loader')
if with_platform_haiku
subdir('hgl')
endif
-if with_glx != 'disabled'
+if with_glx == 'dri'
subdir('glx')
endif
if with_gbm
diff --git a/src/util/Android.mk b/src/util/Android.mk
index 2d59e1ae15e..6d770ca9575 100644
--- a/src/util/Android.mk
+++ b/src/util/Android.mk
@@ -41,8 +41,14 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/gallium/include \
$(MESA_TOP)/src/gallium/auxiliary
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_STATIC_LIBRARIES := \
+ libexpat
+else
LOCAL_SHARED_LIBRARIES := \
libexpat
+endif
LOCAL_MODULE := libmesa_util
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index b857db8a866..4bda54c551d 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -60,7 +60,8 @@ libmesautil_la_LIBADD = \
$(PTHREAD_LIBS) \
$(CLOCK_LIB) \
$(ZLIB_LIBS) \
- $(LIBATOMIC_LIBS)
+ $(LIBATOMIC_LIBS) \
+ -lm
libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES)
libxmlconfig_la_CFLAGS = \
diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index dc89ac93f28..cdfecafaf01 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask)
return i;
}
+/* Count bits set in mask */
+static inline int
+u_count_bits(unsigned *mask)
+{
+ unsigned v = *mask;
+ int c;
+ v = v - ((v >> 1) & 0x55555555);
+ v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+ v = (v + (v >> 4)) & 0xF0F0F0F;
+ c = (int)((v * 0x1010101) >> 24);
+ return c;
+}
+
+static inline int
+u_count_bits64(uint64_t *mask)
+{
+ uint64_t v = *mask;
+ int c;
+ v = v - ((v >> 1) & 0x5555555555555555ull);
+ v = (v & 0x3333333333333333ull) + ((v >> 2) & 0x3333333333333333ull);
+ v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full;
+ c = (int)((v * 0x101010101010101ull) >> 56);
+ return c;
+}
+
/* Determine if an unsigned value is a power of two.
*
* \note
diff --git a/src/util/meson.build b/src/util/meson.build
index 7caea27d660..156621aff65 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -119,7 +119,7 @@ libmesa_util = static_library(
'mesa_util',
[files_mesa_util, format_srgb],
include_directories : inc_common,
- dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic],
+ dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic, dep_m],
c_args : [c_msvc_compat_args, c_vis_args],
build_by_default : false
)
diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index 5d77f75ee85..5a7fa7e84e9 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -554,10 +554,18 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
*/
#define MIN_LINEAR_BUFSIZE 2048
-#define SUBALLOC_ALIGNMENT sizeof(uintptr_t)
+#define SUBALLOC_ALIGNMENT 8
#define LMAGIC 0x87b9c7d3
-struct linear_header {
+struct
+#ifdef _MSC_VER
+ __declspec(align(8))
+#elif defined(__LP64__)
+ __attribute__((aligned(16)))
+#else
+ __attribute__((aligned(8)))
+#endif
+ linear_header {
#ifdef DEBUG
unsigned magic; /* for debugging */
#endif
@@ -651,6 +659,8 @@ linear_alloc_child(void *parent, unsigned size)
ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset);
ptr->size = size;
latest->offset += full_size;
+
+ assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0);
return &ptr[1];
}
diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk
index 6253f1c3be9..730d036d18c 100644
--- a/src/vulkan/Android.mk
+++ b/src/vulkan/Android.mk
@@ -32,12 +32,15 @@ include $(LOCAL_PATH)/Makefile.sources
include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_vulkan_util
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
intermediates := $(call local-generated-sources-dir)
LOCAL_C_INCLUDES := \
$(MESA_TOP)/include/vulkan \
- $(MESA_TOP)/src/vulkan/util
+ $(MESA_TOP)/src/vulkan/util \
+ frameworks/native/libs/nativebase/include \
+ frameworks/native/libs/nativewindow/include \
+ frameworks/native/libs/arect/include
LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, \
$(VULKAN_UTIL_GENERATED_FILES))
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index 1cd5f8d62c5..58e25214149 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -954,8 +954,8 @@ wsi_common_queue_present(const struct wsi_device *wsi,
/* We only need/want to wait on semaphores once. After that, we're
* guaranteed ordering since it all happens on the same queue.
*/
- submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount,
- submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores,
+ submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount;
+ submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores;
/* Set up the pWaitDstStageMasks */
stage_flags = vk_alloc(&swapchain->alloc,
diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c
index fd0d30ad80c..856040b4fe1 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -1062,6 +1062,8 @@ wsi_display_swapchain_destroy(struct wsi_swapchain *drv_chain,
for (uint32_t i = 0; i < chain->base.image_count; i++)
wsi_display_image_finish(drv_chain, allocator, &chain->images[i]);
+
+ wsi_swapchain_finish(&chain->base);
vk_free(allocator, chain);
return VK_SUCCESS;
}
diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c
index e9cc22ec603..3d3a60167bf 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -455,10 +455,11 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device,
(struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
struct wsi_wl_display display;
- int ret = wsi_wl_display_init(wsi, &display, wl_display, false);
- wsi_wl_display_finish(&display);
+ VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false);
+ if (ret == VK_SUCCESS)
+ wsi_wl_display_finish(&display);
- return ret == 0;
+ return ret == VK_SUCCESS;
}
static VkResult