diff --git a/.travis.yml b/.travis.yml index 8d6ddb2f201..6b50d49e143 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,6 +52,7 @@ matrix: # Common - xz-utils - libexpat1-dev + - libx11-xcb-dev - libelf-dev - python3.5 - python3-pip @@ -120,7 +121,6 @@ matrix: - llvm-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -150,7 +150,6 @@ matrix: - llvm-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -183,7 +182,6 @@ matrix: - llvm-3.9-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -222,7 +220,6 @@ matrix: - libclang-3.9-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -258,7 +255,6 @@ matrix: - libclang-4.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -294,7 +290,6 @@ matrix: - libclang-5.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -327,7 +322,6 @@ matrix: - libclang-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -361,7 +355,6 @@ matrix: - libclang-7-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -397,7 +390,6 @@ matrix: - libedit-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev @@ -427,7 +419,6 @@ matrix: - llvm-6.0-dev # Common - xz-utils - - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev diff --git a/Android.common.mk b/Android.common.mk index aa1b266a393..d7c5f20fabc 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -31,6 +31,7 @@ LOCAL_C_INCLUDES += \ MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION) LOCAL_CFLAGS += \ + -O3 \ -Wno-error \ -Wno-unused-parameter \ -Wno-pointer-arith \ @@ -78,14 +79,23 @@ LOCAL_CFLAGS += \ -fvisibility=hidden \ -fno-math-errno \ -fno-trapping-math \ - -Wno-sign-compare + -Wno-sign-compare \ + -Wno-self-assign \ + -Wno-constant-logical-operand \ + -Wno-format \ + -Wno-incompatible-pointer-types \ + -Wno-enum-conversion LOCAL_CPPFLAGS += \ -D__STDC_CONSTANT_MACROS \ -D__STDC_FORMAT_MACROS \ -D__STDC_LIMIT_MACROS \ -Wno-error=non-virtual-dtor \ - -Wno-non-virtual-dtor + -Wno-non-virtual-dtor \ + -Wno-delete-non-virtual-dtor \ + -Wno-overloaded-virtual \ + -Wno-missing-braces \ + -Wno-deprecated-register # mesa requires at least c99 compiler LOCAL_CONLYFLAGS += \ @@ -112,7 +122,7 @@ LOCAL_CFLAGS_arm64 += -DUSE_AARCH64_ASM ifneq ($(LOCAL_IS_HOST_MODULE),true) LOCAL_CFLAGS += -DHAVE_LIBDRM -LOCAL_SHARED_LIBRARIES += libdrm +LOCAL_SHARED_LIBRARIES += libdrm_pri endif LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\" diff --git a/Readme.md b/Readme.md new file mode 100644 index 00000000000..5df295abc3a --- /dev/null +++ b/Readme.md @@ -0,0 +1,2 @@ +Any security related issues should be reported by following the instructions here: +https://01.org/security diff --git a/VERSION b/VERSION index 8b16de0851f..a19b2d9a021 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.3.0-devel +18.3.2 diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore new file mode 100644 index 00000000000..92456c5c938 --- /dev/null +++ b/bin/.cherry-ignore @@ -0,0 +1,4 @@ +# fixes: Commit was squashed into the respective offenders +c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix +# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a +ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support) diff --git a/bin/get-fixes-pick-list.sh b/bin/get-fixes-pick-list.sh deleted file mode 100755 index 047ea3bec10..00000000000 --- a/bin/get-fixes-pick-list.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/sh - -# Script for generating a list of candidates [referenced by a Fixes tag] for -# cherry-picking to a stable branch -# -# Usage examples: -# -# $ bin/get-fixes-pick-list.sh -# $ bin/get-fixes-pick-list.sh > picklist -# $ bin/get-fixes-pick-list.sh | tee picklist - -# Use the last branchpoint as our limit for the search -latest_branchpoint=`git merge-base origin/master HEAD` - -# List all the commits between day 1 and the branch point... -git log --reverse --pretty=%H $latest_branchpoint > already_landed - -# ... and the ones cherry-picked. -git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\ - grep "cherry picked from commit" |\ - sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked - -# Grep for commits with Fixes tag -git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\ -while read sha -do - # Check to see whether the patch is on the ignore list ... - if [ -f bin/.cherry-ignore ] ; then - if grep -q ^$sha bin/.cherry-ignore ; then - continue - fi - fi - - # Skip if it has been already cherry-picked. - if grep -q ^$sha already_picked ; then - continue - fi - - # Place every "fixes:" tag on its own line and join with the next word - # on its line or a later one. - fixes=`git show --pretty=medium -s $sha | tr -d "\n" | sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'` - - # For each one try to extract the tag - fixes_count=`echo "$fixes" | wc -l` - warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0` - while [ $fixes_count -gt 0 ] ; do - # Treat only the current line - id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` - fixes_count=$(($fixes_count-1)) - - # Bail out if we cannot find suitable id. - # Any specific validation the $id is valid and not some junk, is - # implied with the follow up code - if [ "x$id" = x ] ; then - continue - fi - - # Check if the offending commit is in branch. - - # Be that cherry-picked ... - # ... or landed before the branchpoint. - if grep -q ^$id already_picked || - grep -q ^$id already_landed ; then - - printf "Commit \"%s\" fixes %s\n" \ - "`git log -n1 --pretty=oneline $sha`" \ - "$id" - warn=$(($warn-1)) - fi - - done - - if [ $warn -gt 0 ] ; then - printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \ - "`git log -n1 --pretty=oneline $sha`" - fi - -done - -rm -f already_picked -rm -f already_landed diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 9e9a39e494b..79b7a295ea6 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -7,21 +7,107 @@ # $ bin/get-pick-list.sh # $ bin/get-pick-list.sh > picklist # $ bin/get-pick-list.sh | tee picklist +# +# The output is as follows: +# [nomination_type] commit_sha commit summary + +is_stable_nomination() +{ + git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable" +} + +is_typod_nomination() +{ + git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev" +} + +fixes= + +# Helper to handle various mistypos of the fixes tag. +# The tag string itself is passed as argument and normalised within. +# +# Resulting string in the global variable "fixes" and contains entries +# in the form "fixes:$sha" +is_sha_nomination() +{ + fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \ + sed -e 's/'"$2"'/\nfixes:/Ig' | \ + grep -Eo 'fixes:[a-f0-9]{8,40}'` + + fixes_count=`echo "$fixes" | grep "fixes:" | wc -l` + if test $fixes_count -eq 0; then + return 1 + fi + + # Throw a warning for each invalid sha + while test $fixes_count -gt 0; do + # Treat only the current line + id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` + fixes_count=$(($fixes_count-1)) + if ! git show $id &>/dev/null; then + echo WARNING: Commit $1 lists invalid sha $id + fi + done + + return 0 +} + +# Checks if at least one of offending commits, listed in the global +# "fixes", is in branch. +sha_in_range() +{ + fixes_count=`echo "$fixes" | grep "fixes:" | wc -l` + while test $fixes_count -gt 0; do + # Treat only the current line + id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2` + fixes_count=$(($fixes_count-1)) + + # Be that cherry-picked ... + # ... or landed before the branchpoint. + if grep -q ^$id already_picked || + grep -q ^$id already_landed ; then + return 0 + fi + done + return 1 +} + +is_fixes_nomination() +{ + is_sha_nomination "$1" "fixes:[[:space:]]*" + if test $? -eq 0; then + return 0 + fi + is_sha_nomination "$1" "fixes[[:space:]]\+" +} + +is_brokenby_nomination() +{ + is_sha_nomination "$1" "broken by" +} + +is_revert_nomination() +{ + is_sha_nomination "$1" "This reverts commit " +} # Use the last branchpoint as our limit for the search latest_branchpoint=`git merge-base origin/master HEAD` -# Grep for commits with "cherry picked from commit" in the commit message. +# List all the commits between day 1 and the branch point... +git log --reverse --pretty=%H $latest_branchpoint > already_landed + +# ... and the ones cherry-picked. git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\ grep "cherry picked from commit" |\ sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked -# Grep for commits that were marked as a candidate for the stable tree. -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable' $latest_branchpoint..origin/master |\ +# Grep for potential candidates +git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\\|\\|This reverts commit' $latest_branchpoint..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. - if [ -f bin/.cherry-ignore ] ; then + if test -f bin/.cherry-ignore; then if grep -q ^$sha bin/.cherry-ignore ; then continue fi @@ -32,7 +118,33 @@ do continue fi - git log -n1 --pretty=oneline $sha | cat + if is_fixes_nomination "$sha"; then + tag=fixes + elif is_brokenby_nomination "$sha"; then + tag=brokenby + elif is_revert_nomination "$sha"; then + tag=revert + elif is_stable_nomination "$sha"; then + tag=stable + elif is_typod_nomination "$sha"; then + tag=typod + else + continue + fi + + case "$tag" in + fixes | brokenby | revert ) + if ! sha_in_range; then + continue + fi + ;; + * ) + ;; + esac + + printf "[ %8s ] " "$tag" + git --no-pager show --summary --oneline $sha done rm -f already_picked +rm -f already_landed diff --git a/bin/get-typod-pick-list.sh b/bin/get-typod-pick-list.sh deleted file mode 100755 index eb4181d66b8..00000000000 --- a/bin/get-typod-pick-list.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh - -# Script for generating a list of candidates which have typos in the nomination line -# -# Usage examples: -# -# $ bin/get-typod-pick-list.sh -# $ bin/get-typod-pick-list.sh > picklist -# $ bin/get-typod-pick-list.sh | tee picklist - -# NB: -# This script intentionally _never_ checks for specific version tag -# Should we consider folding it with the original get-pick-list.sh - -# Use the last branchpoint as our limit for the search -latest_branchpoint=`git merge-base origin/master HEAD` - -# Grep for commits with "cherry picked from commit" in the commit message. -git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\ - grep "cherry picked from commit" |\ - sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked - -# Grep for commits that were marked as a candidate for the stable tree. -git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\ -while read sha -do - # Check to see whether the patch is on the ignore list. - if [ -f bin/.cherry-ignore ] ; then - if grep -q ^$sha bin/.cherry-ignore ; then - continue - fi - fi - - # Check to see if it has already been picked over. - if grep -q ^$sha already_picked ; then - continue - fi - - git log -n1 --pretty=oneline $sha | cat -done - -rm -f already_picked diff --git a/configure.ac b/configure.ac index d782f56205d..b1c6967afee 100644 --- a/configure.ac +++ b/configure.ac @@ -1716,6 +1716,8 @@ xdri) if test x"$enable_dri" = xyes; then dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED" fi + + dri_modules="$dri_modules xxf86vm" fi if test x"$dri_platform" = xapple ; then DEFINES="$DEFINES -DGLX_USE_APPLEGL" @@ -1725,12 +1727,6 @@ xdri) fi fi - # add xf86vidmode if available - PKG_CHECK_MODULES([XF86VIDMODE], [xxf86vm], HAVE_XF86VIDMODE=yes, HAVE_XF86VIDMODE=no) - if test "$HAVE_XF86VIDMODE" = yes ; then - dri_modules="$dri_modules xxf86vm" - fi - PKG_CHECK_MODULES([DRIGL], [$dri_modules]) GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules" X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS" @@ -1742,10 +1738,6 @@ xdri) ;; esac -# This is outside the case (above) so that it is invoked even for non-GLX -# builds. -AM_CONDITIONAL(HAVE_XF86VIDMODE, test "x$HAVE_XF86VIDMODE" = xyes) - GLESv1_CM_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS" GLESv1_CM_PC_LIB_PRIV="-lm $PTHREAD_LIBS $DLOPEN_LIBS" GLESv2_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS" @@ -1762,8 +1754,6 @@ AC_SUBST([GLESv1_CM_PC_LIB_PRIV]) AC_SUBST([GLESv2_LIB_DEPS]) AC_SUBST([GLESv2_PC_LIB_PRIV]) -AC_SUBST([HAVE_XF86VIDMODE]) - dnl dnl More GLX setup dnl diff --git a/docs/releasing.html b/docs/releasing.html index 52e102207d1..c79a020efa7 100644 --- a/docs/releasing.html +++ b/docs/releasing.html @@ -21,6 +21,7 @@

Releasing process

  • Overview
  • Release schedule
  • Cherry-pick and test +
  • Staging branch
  • Making a branchpoint
  • Pre-release announcement
  • Making a new release @@ -209,6 +210,25 @@

    Regression/functionality testing

    idea too.

    +

    Staging branch

    + +

    +A live branch, which contains the currently merge/rejected patches is available +in the main repository under staging/X.Y. For example: +

    +
    +	staging/18.1 - WIP branch for the 18.1 series
    +	staging/18.2 - WIP branch for the 18.2 series
    +
    + +

    +Notes: +

    +
      +
    • People are encouraged to test the branch and report regressions.
    • +
    • The branch history is not stable and it will be rebased,
    • +
    +

    Making a branchpoint

    diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html index 8af225a61e1..370d5e823e2 100644 --- a/docs/relnotes/18.3.0.html +++ b/docs/relnotes/18.3.0.html @@ -14,7 +14,7 @@

    The Mesa 3D Graphics Library

    -

    Mesa 18.3.0 Release Notes / TBD

    +

    Mesa 18.3.0 Release Notes / December 7, 2018

    Mesa 18.3.0 is a new development release. People who are concerned @@ -40,7 +40,8 @@

    Mesa 18.3.0 Release Notes / TBD

    SHA256 checksums

    -TBD.
    +17a124d4dbc712505d22a7815c9b0cee22214c96c8abb91539a2b1351e38a000  mesa-18.3.0.tar.gz
    +b63f947e735d6ef3dfaa30c789a9adfbae18aea671191eaacde95a18c17fc38a  mesa-18.3.0.tar.xz
     
    @@ -61,7 +62,6 @@

    New features

  • GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
  • GL_EXT_window_rectangles on radeonsi.
  • GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
  • -
  • GL_INTEL_fragment_shader_ordering on i965.
  • GL_NV_fragment_shader_interlock on i965.
  • EGL_EXT_device_base for all drivers.
  • EGL_EXT_device_drm for all drivers.
  • @@ -71,8 +71,206 @@

    New features

    Bug fixes

    + +
  • Bug 13728 - [G965] Some objects in Neverwinter Nights Linux version not displayed correctly
  • + +
  • Bug 91433 - piglit.spec.arb_depth_buffer_float.fbo-depth-gl_depth_component32f-copypixels fails
  • + +
  • Bug 93355 - [BXT,SKLGT4e] intermittent ext_framebuffer_multisample.accuracy fails
  • + +
  • Bug 94957 - dEQP failures on llvmpipe
  • + +
  • Bug 98699 - "float[a+++4 ? 1:1] f;" crashes glsl_compiler
  • + +
  • Bug 99507 - Corrupted frame contents with Vulkan version of DOTA2, Talos Principle and Sascha Willems' demos when they're run Vsynched in fullscreen
  • + +
  • Bug 99730 - Metro Redux game(s) needs override for midshader extension declaration
  • + +
  • Bug 100200 - Default Unreal Engine 4 frag shader fails to compile
  • + +
  • Bug 101247 - Mesa fails to link GLSL programs with unused output blocks
  • + +
  • Bug 102597 - [Regression] mpv, high rendering times (two to three times higher)
  • + +
  • Bug 103241 - Anv crashes when using 64-bit vertex inputs
  • + +
  • Bug 104602 - [apitrace] Graphical artifacts in Civilization VI on RX Vega
  • + +
  • Bug 104809 - anv: DOOM 2016 and Wolfenstein II:The New Colossus crash due to not having depthBoundsTest
  • + +
  • Bug 104926 - swrast: Mesa 17.3.3 produces: HW cursor for format 875713089 not supported
  • + +
  • Bug 105333 - [gallium-nine] missing geometry after commit ac: replace ac_build_kill with ac_build_kill_if_false
  • + +
  • Bug 105371 - r600_shader_from_tgsi - GPR limit exceeded - shader requires 360 registers
  • + +
  • Bug 105731 - linker error "fragment shader input ... has no matching output in the previous stage" when previous stage's output declaration in a separate shader object
  • + +
  • Bug 105904 - Needed to delete mesa shader cache after driver upgrade for 32 bit wine vulkan programs to work.
  • + +
  • Bug 105975 - i965 always reports 0 viewport subpixel bits
  • + +
  • Bug 106231 - llvmpipe blends produce bad code after llvm patch https://reviews.llvm.org/D44785
  • + +
  • Bug 106283 - Shader replacements works only for limited use cases
  • + +
  • Bug 106577 - broken rendering with nine and nouveau (GM107)
  • + +
  • Bug 106833 - glLinkProgram is expected to fail when vertex attribute aliasing happens on ES3.0 context or later
  • + +
  • Bug 106865 - [GLK] piglit.spec.ext_framebuffer_multisample.accuracy stencil tests fail
  • + +
  • Bug 106980 - Basemark GPU vulkan benchmark hangs on GFX9
  • + +
  • Bug 106997 - [Regression]. Dying light game is crashing on latest mesa
  • + +
  • Bug 107088 - [GEN8+] Hang when discarding a fragment if dual source blending is enabled but shader doesn't support it
  • + +
  • Bug 107098 - Segfault after munmap(kms_sw_dt->ro_mapped)
  • + +
  • Bug 107212 - Dual-Core CPU E5500 / G45: RetroArch with reicast core results in corrupted graphics
  • + +
  • Bug 107223 - [GEN9+] 50% perf drop in SynMark Fill* tests (E2E RBC gets disabled?)
  • + +
  • Bug 107276 - radv: OpBitfieldUExtract returns incorrect result when count is zero
  • + +
  • Bug 107280 - [DXVK] Batman: Arkham City with tessellation enabled hangs on SKL GT4
  • + +
  • Bug 107313 - Meson instructions on web site are non-optimal
  • + +
  • Bug 107359 - [Regression] [bisected] [OpenGL CTS] [SKL,BDW] KHR-GL46.texture_barrier*-texels, GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners, and GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners fail with some configuration
  • + +
  • Bug 107460 - radv: OpControlBarrier does not always work correctly (bisected)
  • + +
  • Bug 107477 - [DXVK] Setting high shader quality in GTA V results in LLVM error
  • + +
  • Bug 107483 - DispatchSanity_test.GL31_CORE regression
  • + +
  • Bug 107487 - [intel] [tools] intel gpu tools don't honor -D tools=[]
  • + +
  • Bug 107488 - gl.h:2090: error: redefinition of typedef ‘GLeglImageOES’
  • + +
  • Bug 107510 - [GEN8+] up to 10% perf drop on several 3D benchmarks
  • + +
  • Bug 107511 - KHR/khrplatform.h not always installed when needed
  • + +
  • Bug 107524 - Broken packDouble2x32 at llvmpipe
  • + +
  • Bug 107544 - intel/decoder: out of bounds group_iter
  • + +
  • Bug 107547 - shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)
  • + +
  • Bug 107550 - "0[2]" as function parameter hits assert
  • + +
  • Bug 107563 - [RADV] Broken rendering in Unity demos
  • + +
  • Bug 107565 - TypeError: __init__() got an unexpected keyword argument 'future_imports'
  • + +
  • Bug 107579 - [SNB] The graphic corruption when we reuse the GS compiled and used for TFB when statebuffer contain magic trash in the unused space
  • + +
  • Bug 107601 - Rise of the Tomb Raider Segmentation Fault when the game starts
  • + +
  • Bug 107610 - Dolphin emulator mis-renders shadow overlay in Super Mario Sunshine
  • + +
  • Bug 107626 - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test "arb_texture_multisample-large-float-texture" with parameter --fp16
  • + +
  • Bug 107658 - [Regression] [bisected] [OpenGLES CTS] KHR-GLES3.packed_pixels.*rectangle.r*8_snorm
  • + +
  • Bug 107734 - [GLSL] glsl-fface-invariant, glsl-fcoord-invariant and glsl-pcoord-invariant should fail
  • + +
  • Bug 107745 - [bisected] [bdw bsw] piglit.­spec.­arb_fragment_shader_interlock.­arb_fragment_shader_interlock-image-load-store failure
  • + +
  • Bug 107760 - GPU Hang when Playing DiRT 3 Complete Edition using Steam Play with DXVK
  • + +
  • Bug 107765 - [regression] Batman Arkham City crashes with DXVK under wine
  • + +
  • Bug 107772 - Mesa preprocessor matches if(def)s & endifs incorrectly
  • + +
  • Bug 107779 - Access violation with some games
  • + +
  • Bug 107786 - [DXVK] MSAA reflections are broken in GTA V
  • + +
  • Bug 107806 - glsl_get_natural_size_align_bytes() ABORT with GfxBench Vulkan AztecRuins
  • + +
  • Bug 107810 - The 'va_end' call is missed after 'va_copy' in 'util_vsnprintf' function under windows
  • + +
  • Bug 107832 - Gallium picking A16L16 formats when emulating INTENSITY16 conflicts with mesa
  • + +
  • Bug 107843 - 32bit Mesa build failes with meson.
  • + +
  • Bug 107856 - i965 incorrectly calculates the number of layers for texture views (assert)
  • + +
  • Bug 107857 - GPU hang - GS_EMIT without shader outputs
  • + +
  • Bug 107865 - swr fail to build with llvm-libs 6.0.1
  • + +
  • Bug 107869 - u_thread.h:87:4: error: use of undeclared identifier 'cpu_set_t'
  • + +
  • Bug 107870 - Undefined symbols for architecture x86_64: "_util_cpu_caps"
  • + +
  • Bug 107879 - crash happens when link program
  • + +
  • Bug 107891 - [wine, regression, bisected] RAGE, Wolfenstein The New Order hangs in menu
  • + +
  • Bug 107923 - build_id.c:126: multiple definition of `build_id_length'
  • + +
  • Bug 107926 - [anv] Rise of the Tomb Raider always misrendering, segfault and gpu hang.
  • + +
  • Bug 107941 - GPU hang and system crash with Dota 2 using Vulkan
  • + +
  • Bug 107971 - SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string
  • + +
  • Bug 108012 - Compiler crashes on access of non-existent member incremental operations
  • + +
  • Bug 108024 - [Debian Stretch]Fail to build because "xcb_randr_lease_t"
  • + +
  • Bug 108082 - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]
  • + +
  • Bug 108109 - [GLSL] no-overloads.vert fails
  • + +
  • Bug 108112 - [vulkancts] some of the coherent memory tests fail.
  • + +
  • Bug 108113 - [vulkancts] r32g32b32 transfer operations not implemented
  • + +
  • Bug 108115 - [vulkancts] dEQP-VK.subgroups.vote.graphics.subgroupallequal.* fails
  • + +
  • Bug 108164 - [radv] VM faults since 5d6a560a2986c9ab421b3c7904d29bb7bc35e36f
  • + +
  • Bug 108245 - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage
  • + +
  • Bug 108272 - [polaris10] opencl-mesa: Anything using OpenCL segfaults, XFX Radeon RX 580
  • + +
  • Bug 108311 - Query buffer object support is broken on r600.
  • + +
  • Bug 108319 - [GLK BXT BSW] Assertion in piglit.spec.arb_gpu_shader_fp64.execution.built-in-functions.vs-sign-sat-neg-abs
  • + +
  • Bug 108491 - Commit baa38c14 causes output issues on my VEGA with RADV
  • + +
  • Bug 108524 - [RADV] GPU lockup on event synchronization
  • + +
  • Bug 108530 - (mesa-18.3) [Tracker] Mesa 18.3 Release Tracker
  • + +
  • Bug 108532 - make check nir_copy_prop_vars_test.store_store_load_different_components regression
  • + +
  • Bug 108560 - Mesa 32 is built without sse
  • + +
  • Bug 108595 - ir3_compiler valgrind build error
  • + +
  • Bug 108617 - [deqp] Mesa fails conformance for egl_ext_device
  • + +
  • Bug 108630 - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever
  • + +
  • Bug 108635 - Mesa master commit 68dc591af16ebb36814e4c187e4998948103c99c causes XWayland to segfault
  • + +
  • Bug 108713 - Gallium: use after free with transform feedback
  • + +
  • Bug 108829 - [meson] libglapi exports internal API
  • + +
  • Bug 108894 - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard
  • + +
  • Bug 108909 - Vkd3d test failure test_resolve_non_issued_query_data()
  • + +
  • Bug 108914 - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this
  • Changes

    diff --git a/docs/relnotes/18.3.1.html b/docs/relnotes/18.3.1.html new file mode 100644 index 00000000000..8acbfb7a5f2 --- /dev/null +++ b/docs/relnotes/18.3.1.html @@ -0,0 +1,63 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.3.1 Release Notes / December 11, 2018

    + +

    +Mesa 18.3.1 is a bug fix release which fixes bugs found since the 18.3.0 release. +

    +

    +Mesa 18.3.0 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +256d0c3d88e380c1b8e3fc5c6ac34001e3b7c30458b8b852407ec68b8ccd9fda  mesa-18.3.1.tar.gz
    +5b1f827d28684a25f6657289f8b7d47ac56395988c7ac23e0ec9a62b644bdc63  mesa-18.3.1.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    +

    None

    + + +

    Changes

    + +

    Emil Velikov (2):

    +
      +
    • docs: add sha256 checksums for 18.3.0
    • +
    • Update version to 18.3.1
    • +
    + +

    Jason Ekstrand (1):

    +
      +
    • anv,radv: Disable VK_EXT_pci_bus_info
    • +
    + + +
    + + diff --git a/docs/relnotes/18.3.2.html b/docs/relnotes/18.3.2.html new file mode 100644 index 00000000000..594b42cdf4e --- /dev/null +++ b/docs/relnotes/18.3.2.html @@ -0,0 +1,265 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.3.2 Release Notes / January 17, 2019

    + +

    +Mesa 18.3.2 is a bug fix release which fixes bugs found since the 18.3.1 release. +

    +

    +Mesa 18.3.2 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +1cde4fafd40cd1ad4ee3a13b364b7a0175a08b7afdd127fb46f918c1e1dfd4b0  mesa-18.3.2.tar.gz
    +f7ce7181c07b6d8e0132da879af1729523a6c8aa87f79a9d59dfd064024cfb35  mesa-18.3.2.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 106595 - [RADV] Rendering distortions only when MSAA is enabled
    • + +
    • Bug 107728 - Wrong background in Sascha Willem's Multisampling Demo
    • + +
    • Bug 108114 - [vulkancts] new VK_KHR_16bit_storage tests fail.
    • + +
    • Bug 108116 - [vulkancts] stencil partial clear tests fail.
    • + +
    • Bug 108624 - [regression][bisected] "nir: Copy propagation between blocks" regression
    • + +
    • Bug 108910 - Vkd3d test failure test_multisample_array_texture()
    • + +
    • Bug 108911 - Vkd3d test failure test_clear_render_target_view()
    • + +
    • Bug 108943 - Build fails on ppc64le with meson
    • + +
    • Bug 109072 - GPU hang in blender 2.80
    • + +
    • Bug 109081 - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests
    • + +
    • Bug 109151 - [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.
    • + +
    • Bug 109202 - nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti
    • + +
    • Bug 109204 - [regression, bisected] retroarch's crt-royale shader crash radv
    • + +
    + + +

    Changes

    + +

    Alex Deucher (3):

    +
      +
    • pci_ids: add new vega10 pci ids
    • +
    • pci_ids: add new vega20 pci id
    • +
    • pci_ids: add new VegaM pci id
    • +
    + +

    Alexander von Gluck IV (1):

    +
      +
    • egl/haiku: Fix reference to disp vs dpy
    • +
    + +

    Andres Gomez (2):

    +
      +
    • glsl: correct typo in GLSL compilation error message
    • +
    • glsl/linker: specify proper direction in location aliasing error
    • +
    + +

    Axel Davy (3):

    +
      +
    • st/nine: Fix volumetexture dtor on ctor failure
    • +
    • st/nine: Bind src not dst in nine_context_box_upload
    • +
    • st/nine: Add src reference to nine_context_range_upload
    • +
    + +

    Bas Nieuwenhuizen (5):

    +
      +
    • radv: Do a cache flush if needed before reading predicates.
    • +
    • radv: Implement buffer stores with less than 4 components.
    • +
    • anv/android: Do not reject storage images.
    • +
    • radv: Fix rasterization precision bits.
    • +
    • spirv: Fix matrix parameters in function calls.
    • +
    + +

    Caio Marcelo de Oliveira Filho (3):

    +
      +
    • nir: properly clear the entry sources in copy_prop_vars
    • +
    • nir: properly find the entry to keep in copy_prop_vars
    • +
    • nir: remove dead code from copy_prop_vars
    • +
    + +

    Dave Airlie (2):

    +
      +
    • radv/xfb: fix counter buffer bounds checks.
    • +
    • virgl/vtest: fix front buffer flush with protocol version 0.
    • +
    + +

    Dylan Baker (6):

    +
      +
    • meson: Fix ppc64 little endian detection
    • +
    • meson: Add support for gnu hurd
    • +
    • meson: Add toggle for glx-direct
    • +
    • meson: Override C++ standard to gnu++11 when building with altivec on ppc64
    • +
    • meson: Error out if building nouveau and using LLVM without rtti
    • +
    • autotools: Remove tegra vdpau driver
    • +
    + +

    Emil Velikov (12):

    +
      +
    • docs: add sha256 checksums for 18.3.1
    • +
    • bin/get-pick-list.sh: rework handing of sha nominations
    • +
    • bin/get-pick-list.sh: warn when commit lists invalid sha
    • +
    • cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support)
    • +
    • glx: mandate xf86vidmode only for "drm" dri platforms
    • +
    • meson: don't require glx/egl/gbm with gallium drivers
    • +
    • pipe-loader: meson: reference correct library
    • +
    • TODO: glx: meson: build dri based glx tests, only with -Dglx=dri
    • +
    • glx: meson: drop includes from a link-only library
    • +
    • glx: meson: wire up the dispatch-index-check test
    • +
    • glx/test: meson: assorted include fixes
    • +
    • Update version to 18.3.2
    • +
    + +

    Eric Anholt (6):

    +
      +
    • v3d: Fix a leak of the transfer helper on screen destroy.
    • +
    • vc4: Fix a leak of the transfer helper on screen destroy.
    • +
    • v3d: Fix a leak of the disassembled instruction string during debug dumps.
    • +
    • v3d: Make sure that a thrsw doesn't split a multop from its umul24.
    • +
    • v3d: Add missing flagging of SYNCB as a TSY op.
    • +
    • gallium/ttn: Fix setup of outputs_written.
    • +
    + +

    Erik Faye-Lund (2):

    +
      +
    • virgl: wrap vertex element state in a struct
    • +
    • virgl: work around bad assumptions in virglrenderer
    • +
    + +

    Francisco Jerez (5):

    +
      +
    • intel/fs: Handle source modifiers in lower_integer_multiplication().
    • +
    • intel/fs: Implement quad swizzles on ICL+.
    • +
    • intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.
    • +
    • intel/eu/gen7: Fix brw_MOV() with DF destination and strided source.
    • +
    • intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.
    • +
    + +

    Ian Romanick (2):

    +
      +
    • i965/vec4/dce: Don't narrow the write mask if the flags are used
    • +
    • Revert "nir/lower_indirect: Bail early if modes == 0"
    • +
    + +

    Jan Vesely (1):

    +
      +
    • clover: Fix build after clang r348827
    • +
    + +

    Jason Ekstrand (6):

    +
      +
    • nir/constant_folding: Fix source bit size logic
    • +
    • intel/blorp: Be more conservative about copying clear colors
    • +
    • spirv: Handle any bit size in vector_insert/extract
    • +
    • anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic
    • +
    • spirv: Sign-extend array indices
    • +
    • intel/peephole_ffma: Fix swizzle propagation
    • +
    + +

    Karol Herbst (1):

    +
      +
    • nv50/ir: fix use-after-free in ConstantFolding::visit
    • +
    + +

    Kirill Burtsev (1):

    +
      +
    • loader: free error state, when checking the drawable type
    • +
    + +

    Lionel Landwerlin (5):

    +
      +
    • anv: don't do partial resolve on layer > 0
    • +
    • i965: include draw_params/derived_draw_params for VF cache workaround
    • +
    • i965: add CS stall on VF invalidation workaround
    • +
    • anv: explictly specify format for blorp ccs/mcs op
    • +
    • anv: flush fast clear colors into compressed surfaces
    • +
    + +

    Marek Olšák (1):

    +
      +
    • st/mesa: don't leak pipe_surface if pipe_context is not current
    • +
    + +

    Mario Kleiner (1):

    +
      +
    • radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.
    • +
    + +

    Nicolai Hähnle (1):

    +
      +
    • meson: link LLVM 'native' component when LLVM is available
    • +
    + +

    Rhys Perry (3):

    +
      +
    • radv: don't set surf_index for stencil-only images
    • +
    • ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics
    • +
    • ac: split 16-bit ssbo loads that may not be dword aligned
    • +
    + +

    Rob Clark (2):

    +
      +
    • freedreno/drm: fix memory leak
    • +
    • mesa/st/nir: fix missing nir_compact_varyings
    • +
    + +

    Samuel Pitoiset (1):

    +
      +
    • radv: switch on EOP when primitive restart is enabled with triangle strips
    • +
    + +

    Timothy Arceri (2):

    +
      +
    • tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()
    • +
    • tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()
    • +
    + +

    Vinson Lee (2):

    +
      +
    • meson: Fix typo.
    • +
    • meson: Fix libsensors detection.
    • +
    + + + +
    + + diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html index e5350bdb2cf..d7ea0a310db 100644 --- a/docs/submittingpatches.html +++ b/docs/submittingpatches.html @@ -251,6 +251,9 @@

    Nominating a commit for a stable branch

    nomination request.

    +

    +The current patch status can be observed in the staging branch. +

    The stable tag

    diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 6f9c2c8b8cf..48060ac8de6 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1334,6 +1334,10 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_FOURCC_YVU422 0x36315659 #define __DRI_IMAGE_FOURCC_YVU444 0x34325659 +#define __DRI_IMAGE_FOURCC_P010 0x30313050 +#define __DRI_IMAGE_FOURCC_P012 0x32313050 +#define __DRI_IMAGE_FOURCC_P016 0x36313050 + /** * Queryable on images created by createImageFromNames. * diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index 35ea3559b02..75ac7761bb4 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -219,6 +219,7 @@ CHIPSET(0x699F, POLARIS12) CHIPSET(0x694C, VEGAM) CHIPSET(0x694E, VEGAM) +CHIPSET(0x694F, VEGAM) CHIPSET(0x6860, VEGA10) CHIPSET(0x6861, VEGA10) @@ -227,8 +228,14 @@ CHIPSET(0x6863, VEGA10) CHIPSET(0x6864, VEGA10) CHIPSET(0x6867, VEGA10) CHIPSET(0x6868, VEGA10) -CHIPSET(0x687F, VEGA10) +CHIPSET(0x6869, VEGA10) +CHIPSET(0x686A, VEGA10) +CHIPSET(0x686B, VEGA10) CHIPSET(0x686C, VEGA10) +CHIPSET(0x686D, VEGA10) +CHIPSET(0x686E, VEGA10) +CHIPSET(0x686F, VEGA10) +CHIPSET(0x687F, VEGA10) CHIPSET(0x69A0, VEGA12) CHIPSET(0x69A1, VEGA12) @@ -240,6 +247,7 @@ CHIPSET(0x66A0, VEGA20) CHIPSET(0x66A1, VEGA20) CHIPSET(0x66A2, VEGA20) CHIPSET(0x66A3, VEGA20) +CHIPSET(0x66A4, VEGA20) CHIPSET(0x66A7, VEGA20) CHIPSET(0x66AF, VEGA20) diff --git a/meson.build b/meson.build index 18667988bac..5a20e1ea30d 100644 --- a/meson.build +++ b/meson.build @@ -54,6 +54,7 @@ with_valgrind = get_option('valgrind') with_libunwind = get_option('libunwind') with_asm = get_option('asm') with_glx_read_only_text = get_option('glx-read-only-text') +with_glx_direct = get_option('glx-direct') with_osmesa = get_option('osmesa') with_swr_arches = get_option('swr-arches') with_tools = get_option('tools') @@ -223,8 +224,6 @@ elif system_has_kms_drm else # FIXME: haiku doesn't use dri, and xlib doesn't use dri, probably should # assert here that one of those cases has been met. - # FIXME: GNU (hurd) ends up here as well, but meson doesn't officially - # support Hurd at time of writing (2017/11) # FIXME: illumos ends up here as well with_dri_platform = 'none' endif @@ -370,9 +369,6 @@ if with_glvnd endif endif -# TODO: toggle for this -with_glx_direct = true - if with_vulkan_icd_dir == '' with_vulkan_icd_dir = join_paths(get_option('datadir'), 'vulkan/icd.d') endif @@ -388,9 +384,9 @@ endif if with_any_vk and (with_platform_x11 and not with_dri3) error('Vulkan drivers require dri3 for X11 support') endif -if with_dri or with_gallium - if with_glx == 'disabled' and not with_egl and not with_platform_haiku - error('building dri or gallium drivers require at least one window system') +if with_dri + if with_glx == 'disabled' and not with_egl and not with_gbm + error('building dri drivers require at least one windowing system') endif endif @@ -620,7 +616,7 @@ if with_gallium_st_nine error('The nine state tracker requires gallium softpipe/llvmpipe.') elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600 or with_gallium_r300 or with_gallium_svga or with_gallium_i915) - error('The nine state tracker requires at least on non-swrast gallium driver.') + error('The nine state tracker requires at least one non-swrast gallium driver.') endif if not with_dri3 error('Using nine with wine requires dri3') @@ -628,7 +624,12 @@ if with_gallium_st_nine endif if get_option('power8') != 'false' - if host_machine.cpu_family() == 'ppc64le' + # on old versions of meson the cpu family would return as ppc64le on little + # endian power8, this was changed in 0.48 such that the family would always + # be ppc64 regardless of endianness, and the the machine.endian() value + # should be checked. Since we support versions < 0.48 we need to use + # startswith. + if host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little' if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.8') error('Altivec is not supported with gcc version < 4.8.') endif @@ -650,6 +651,7 @@ if get_option('power8') != 'false' endif _opencl = get_option('gallium-opencl') +clover_cpp_std = [] if _opencl != 'disabled' if not with_gallium error('OpenCL Clover implementation requires at least one gallium driver.') @@ -658,6 +660,14 @@ if _opencl != 'disabled' dep_clc = dependency('libclc') with_gallium_opencl = true with_opencl_icd = _opencl == 'icd' + + if host_machine.cpu_family().startswith('ppc') and cpp.compiles(''' + #if !defined(__VEC__) || !defined(__ALTIVEC__) + #error "AltiVec not enabled" + #endif''', + name : 'Altivec') + clover_cpp_std += ['cpp_std=gnu++11'] + endif else dep_clc = null_dep with_gallium_opencl = false @@ -781,13 +791,13 @@ if cc.compiles('int foo(void) __attribute__((__noreturn__));', endif # TODO: this is very incomplete -if ['linux', 'cygwin'].contains(host_machine.system()) +if ['linux', 'cygwin', 'gnu'].contains(host_machine.system()) pre_args += '-D_GNU_SOURCE' endif # Check for generic C arguments c_args = [] -foreach a : ['-Wall', '-Werror=implicit-function-declaration', +foreach a : ['-Werror=implicit-function-declaration', '-Werror=missing-prototypes', '-Werror=return-type', '-fno-math-errno', '-fno-trapping-math', '-Qunused-arguments'] @@ -809,7 +819,7 @@ endif # Check for generic C++ arguments cpp_args = [] -foreach a : ['-Wall', '-Werror=return-type', +foreach a : ['-Werror=return-type', '-fno-math-errno', '-fno-trapping-math', '-Qunused-arguments'] if cpp.has_argument(a) @@ -905,8 +915,9 @@ if not cc.links('''#include int main() { return __sync_add_and_fetch(&v, (uint64_t)1); }''', + dependencies : dep_atomic, name : 'GCC 64bit atomics') - pre_args += '-DMISSING_64_BIT_ATOMICS' + pre_args += '-DMISSING_64BIT_ATOMICS' endif # TODO: shared/static? Is this even worth doing? @@ -939,7 +950,7 @@ endif with_asm_arch = '' if with_asm if host_machine.cpu_family() == 'x86' - if system_has_kms_drm + if system_has_kms_drm or host_machine.system() == 'gnu' with_asm_arch = 'x86' pre_args += ['-DUSE_X86_ASM', '-DUSE_MMX_ASM', '-DUSE_3DNOW_ASM', '-DUSE_SSE_ASM'] @@ -968,7 +979,7 @@ if with_asm with_asm_arch = 'sparc' pre_args += ['-DUSE_SPARC_ASM'] endif - elif host_machine.cpu_family() == 'ppc64le' + elif host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little' if system_has_kms_drm with_asm_arch = 'ppc64le' pre_args += ['-DUSE_PPC64LE_ASM'] @@ -1162,7 +1173,7 @@ endif llvm_modules = ['bitwriter', 'engine', 'mcdisassembler', 'mcjit'] llvm_optional_modules = [] if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 - llvm_modules += ['amdgpu', 'bitreader', 'ipo'] + llvm_modules += ['amdgpu', 'native', 'bitreader', 'ipo'] if with_gallium_r600 llvm_modules += 'asmparser' endif @@ -1223,6 +1234,9 @@ if with_llvm # programs, so we need to build all C++ code in mesa without rtti as well to # ensure that linking works. if dep_llvm.get_configtool_variable('has-rtti') == 'NO' + if with_gallium_nouveau + error('The Nouveau driver requires rtti. You either need to turn off nouveau or use an LLVM built with LLVM_ENABLE_RTTI.') + endif cpp_args += '-fno-rtti' endif elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr @@ -1317,13 +1331,6 @@ if with_platform_wayland 'linux-dmabuf', 'linux-dmabuf-unstable-v1.xml' ) pre_args += ['-DHAVE_WAYLAND_PLATFORM', '-DWL_HIDE_DEPRECATED'] -else - prog_wl_scanner = [] - wl_scanner_arg = '' - dep_wl_protocols = null_dep - dep_wayland_client = null_dep - dep_wayland_server = null_dep - wayland_dmabuf_xml = '' endif dep_x11 = null_dep @@ -1356,7 +1363,6 @@ if with_platform_x11 dep_xdamage = dependency('xdamage', version : '>= 1.1') dep_xfixes = dependency('xfixes') dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1') - dep_xxf86vm = dependency('xxf86vm', required : false) endif if (with_any_vk or with_glx == 'dri' or (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or @@ -1383,6 +1389,7 @@ if with_platform_x11 if with_glx == 'dri' if with_dri_platform == 'drm' dep_dri2proto = dependency('dri2proto', version : '>= 2.8') + dep_xxf86vm = dependency('xxf86vm') endif dep_glproto = dependency('glproto', version : '>= 1.4.14') endif @@ -1403,7 +1410,7 @@ endif _sensors = get_option('lmsensors') if _sensors != 'false' - dep_lmsensors = cc.find_library('libsensors', required : _sensors == 'true') + dep_lmsensors = cc.find_library('sensors', required : _sensors == 'true') if dep_lmsensors.found() pre_args += '-DHAVE_LIBSENSORS=1' endif @@ -1433,14 +1440,12 @@ elif with_glx == 'dri' 'xcb-glx >= 1.8.1'] if with_dri_platform == 'drm' gl_priv_reqs += 'xcb-dri2 >= 1.8' + gl_priv_reqs += 'xxf86vm' endif endif if dep_libdrm.found() gl_priv_reqs += 'libdrm >= 2.4.75' endif -if dep_xxf86vm.found() - gl_priv_reqs += 'xxf86vm' -endif gl_priv_libs = [] if dep_thread.found() diff --git a/meson_options.txt b/meson_options.txt index a1d5ab0e185..589d10bb3f3 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -318,3 +318,9 @@ option( choices : ['auto', 'true', 'false'], description : 'Enable VK_EXT_acquire_xlib_display.' ) +option( + 'glx-direct', + type : 'boolean', + value : true, + description : 'Enable direct rendering in GLX and EGL for DRI', +) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1392ec0f238..8953da7f18d 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2882,9 +2882,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, if (count == num_components) return value; - LLVMValueRef masks[] = { - ctx->i32_0, ctx->i32_1, - LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)}; + LLVMValueRef masks[MAX2(count, 2)]; + masks[0] = ctx->i32_0; + masks[1] = ctx->i32_1; + for (unsigned i = 2; i < count; i++) + masks[i] = LLVMConstInt(ctx->i32, i, false); if (count == 1) return LLVMBuildExtractElement(ctx->builder, value, masks[0], diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index e5fbe003f53..827cb5d85a8 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -311,9 +311,18 @@ static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, } static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, - LLVMValueRef src0) + LLVMValueRef src0, + unsigned bitsize) { - return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, + LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), + ""); + result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, ""); + + if (bitsize == 32) + return result; + + return LLVMBuildFPExt(ctx->builder, result, ctx->f64, ""); } static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, @@ -932,7 +941,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); break; case nir_op_b2f: - result = emit_b2f(&ctx->ac, src[0]); + result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; case nir_op_f2b: result = emit_f2b(&ctx->ac, src[0]); @@ -1613,37 +1622,45 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - LLVMValueRef results[2]; - int load_bytes; int elem_size_bytes = instr->dest.ssa.bit_size / 8; int num_components = instr->num_components; - int num_bytes = num_components * elem_size_bytes; enum gl_access_qualifier access = nir_intrinsic_access(instr); LLVMValueRef glc = ctx->ac.i1false; if (access & (ACCESS_VOLATILE | ACCESS_COHERENT)) glc = ctx->ac.i1true; - for (int i = 0; i < num_bytes; i += load_bytes) { - load_bytes = MIN2(num_bytes - i, 16); - const char *load_name; - LLVMTypeRef data_type; - LLVMValueRef offset = get_src(ctx, instr->src[1]); - LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false); - LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[0]), false); - LLVMValueRef vindex = ctx->ac.i32_0; + LLVMValueRef offset = get_src(ctx, instr->src[1]); + LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[0]), false); + LLVMValueRef vindex = ctx->ac.i32_0; + + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa); + LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type; - int idx = i ? 1 : 0; + LLVMValueRef results[4]; + for (int i = 0; i < num_components;) { + int num_elems = num_components - i; + if (elem_size_bytes < 4) + num_elems = 1; + if (num_elems * elem_size_bytes > 16) + num_elems = 16 / elem_size_bytes; + int load_bytes = num_elems * elem_size_bytes; + + LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false); + + LLVMValueRef ret; if (load_bytes == 2) { - results[idx] = ac_build_tbuffer_load_short(&ctx->ac, - rsrc, - vindex, - offset, - ctx->ac.i32_0, - immoffset, - glc); + ret = ac_build_tbuffer_load_short(&ctx->ac, + rsrc, + vindex, + offset, + ctx->ac.i32_0, + immoffset, + glc); } else { + const char *load_name; + LLVMTypeRef data_type; switch (load_bytes) { case 16: case 12: @@ -1669,33 +1686,23 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, glc, ctx->ac.i1false, }; - results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); - unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes; - LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems); - results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, ""); + ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); } - } - assume(results[0]); - LLVMValueRef ret = results[0]; - if (num_bytes > 16 || num_components == 3) { - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - }; + LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, ""); + ret = ac_trim_vector(&ctx->ac, ret, load_bytes); - if (num_bytes > 16 && num_components == 3) { - /* we end up with a v2i64 and i64 but shuffle fails on that */ - results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2); - } + LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems); + ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, ""); - LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0], - results[num_bytes > 16 ? 1 : 0], swizzle, ""); + for (unsigned j = 0; j < num_elems; j++) { + results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), ""); + } + i += num_elems; } - return LLVMBuildBitCast(ctx->ac.builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); + return ac_build_gather_values(&ctx->ac, results, num_components); } static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, @@ -2371,17 +2378,27 @@ static void visit_image_store(struct ac_nir_context *ctx, glc = ctx->ac.i1true; if (dim == GLSL_SAMPLER_DIM_BUF) { + char name[48]; + const char *types[] = { "f32", "v2f32", "v4f32" }; LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true); + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); + unsigned src_channels = ac_get_llvm_num_components(src); - params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); /* data */ + if (src_channels == 3) + src = ac_build_expand(&ctx->ac, src, 3, 4); + + params[0] = src; /* data */ params[1] = rsrc; params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[3] = ctx->ac.i32_0; /* voffset */ + snprintf(name, sizeof(name), "%s.%s", + "llvm.amdgcn.buffer.store.format", + types[CLAMP(src_channels, 1, 3) - 1]); + params[4] = glc; /* glc */ params[5] = ctx->ac.i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, - params, 6, 0); + ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0); } else { struct ac_image_args args = {}; args.opcode = ac_image_store; @@ -2793,7 +2810,7 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx, LLVMValueRef src0 = NULL; nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); - int input_index = var->data.location - VARYING_SLOT_VAR0; + int input_index = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0]; switch (instr->intrinsic) { case nir_intrinsic_interp_deref_at_centroid: location = INTERP_CENTROID; diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 6b9a91c92a9..ee18e6c1923 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -77,6 +77,9 @@ struct ac_shader_abi { */ LLVMValueRef *inputs; + /* Varying -> attribute number mapping. Also NIR-only */ + unsigned fs_input_attr_indices[MAX_VARYING]; + void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs); diff --git a/src/amd/vulkan/Android.mk b/src/amd/vulkan/Android.mk index 51b03561fa7..9574bf54e5a 100644 --- a/src/amd/vulkan/Android.mk +++ b/src/amd/vulkan/Android.mk @@ -74,7 +74,8 @@ LOCAL_C_INCLUDES := \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util LOCAL_WHOLE_STATIC_LIBRARIES := \ - libmesa_vulkan_util + libmesa_vulkan_util \ + libmesa_git_sha1 LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.c LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.h diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 0f1261d4809..cc2aa7fd17a 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -140,7 +140,7 @@ libvulkan_radeon = shared_library( ], dependencies : [ dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m, - dep_valgrind, + dep_valgrind, radv_deps, idep_nir, ], c_args : [c_vis_args, no_override_init_args, radv_flags], diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c index f5d70825dd2..1a4425f26a5 100644 --- a/src/amd/vulkan/radv_android.c +++ b/src/amd/vulkan/radv_android.c @@ -110,17 +110,6 @@ radv_image_from_gralloc(VkDevice device_h, struct radv_bo *bo = NULL; VkResult result; - result = radv_image_create(device_h, - &(struct radv_image_create_info) { - .vk_info = base_info, - .scanout = true, - .no_metadata_planes = true}, - alloc, - &image_h); - - if (result != VK_SUCCESS) - return result; - if (gralloc_info->handle->numFds != 1) { return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, "VkNativeBufferANDROID::handle::numFds is %d, " @@ -133,23 +122,14 @@ radv_image_from_gralloc(VkDevice device_h, */ int dma_buf = gralloc_info->handle->data[0]; - image = radv_image_from_handle(image_h); - VkDeviceMemory memory_h; - const VkMemoryDedicatedAllocateInfoKHR ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, - .pNext = NULL, - .buffer = VK_NULL_HANDLE, - .image = image_h - }; - const VkImportMemoryFdInfoKHR import_info = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .pNext = &ded_alloc, .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, .fd = dup(dma_buf), }; + /* Find the first VRAM memory type, or GART for PRIME images. */ int memory_type_index = -1; for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { @@ -168,14 +148,49 @@ radv_image_from_gralloc(VkDevice device_h, &(VkMemoryAllocateInfo) { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = &import_info, - .allocationSize = image->size, + /* Max buffer size, unused for imports */ + .allocationSize = 0x7FFFFFFF, .memoryTypeIndex = memory_type_index, }, alloc, &memory_h); + if (result != VK_SUCCESS) + return result; + + struct radeon_bo_metadata md; + device->ws->buffer_get_metadata(radv_device_memory_from_handle(memory_h)->bo, &md); + + bool is_scanout; + if (device->physical_device->rad_info.chip_class >= GFX9) { + /* Copied from radeonsi, but is hacky so should be cleaned up. */ + is_scanout = md.u.gfx9.swizzle_mode == 0 || md.u.gfx9.swizzle_mode % 4 == 2; + } else { + is_scanout = md.u.legacy.scanout; + } + + VkImageCreateInfo updated_base_info = *base_info; + + VkExternalMemoryImageCreateInfo external_memory_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = updated_base_info.pNext, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + updated_base_info.pNext = &external_memory_info; + + result = radv_image_create(device_h, + &(struct radv_image_create_info) { + .vk_info = &updated_base_info, + .scanout = is_scanout, + .no_metadata_planes = true}, + alloc, + &image_h); + if (result != VK_SUCCESS) goto fail_create_image; + image = radv_image_from_handle(image_h); + radv_BindImageMemory(device_h, image_h, memory_h, 0); image->owned_memory = memory_h; @@ -185,9 +200,7 @@ radv_image_from_gralloc(VkDevice device_h, return VK_SUCCESS; fail_create_image: -fail_size: - radv_DestroyImage(device_h, image_h, alloc); - + radv_FreeMemory(device_h, memory_h, alloc); return result; } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c43e12f6d62..4ebb01c6810 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1068,7 +1068,7 @@ static void radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image *image, VkImageLayout layout, - bool requires_cond_write) + bool requires_cond_exec) { uint32_t db_z_info = ds->db_z_info; uint32_t db_z_info_reg; @@ -1092,38 +1092,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, } /* When we don't know the last fast clear value we need to emit a - * conditional packet, otherwise we can update DB_Z_INFO directly. + * conditional packet that will eventually skip the following + * SET_CONTEXT_REG packet. */ - if (requires_cond_write) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0)); - - const uint32_t write_space = 0 << 8; /* register */ - const uint32_t poll_space = 1 << 4; /* memory */ - const uint32_t function = 3 << 0; /* equal to the reference */ - const uint32_t options = write_space | poll_space | function; - radeon_emit(cmd_buffer->cs, options); - - /* poll address - location of the depth clear value */ + if (requires_cond_exec) { uint64_t va = radv_buffer_get_va(image->bo); - va += image->offset + image->clear_value_offset; - - /* In presence of stencil format, we have to adjust the base - * address because the first value is the stencil clear value. - */ - if (vk_format_is_stencil(image->vk_format)) - va += 4; + va += image->offset + image->tc_compat_zrange_offset; + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); - - radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */ - radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */ - radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */ - radeon_emit(cmd_buffer->cs, 0u); /* write address high */ - radeon_emit(cmd_buffer->cs, db_z_info); - } else { - radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ } + + radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); } static void @@ -1270,6 +1253,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, fui(ds_clear_value.depth)); } +/** + * Update the TC-compat metadata value for this image. + */ +static void +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + uint32_t value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); +} + +static void +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + VkClearDepthStencilValue ds_clear_value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + uint32_t cond_val; + + /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last + * depth clear value is 0.0f. + */ + cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; + + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val); +} + /** * Update the clear depth/stencil values for this image. */ @@ -1283,6 +1305,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects); + if (radv_image_is_tc_compat_htile(image) && + (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + radv_update_tc_compat_zrange_metadata(cmd_buffer, image, + ds_clear_value); + } + radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value, aspects); } @@ -1950,6 +1978,8 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) va = radv_buffer_get_va(buffer->bo) + buffer->offset; + va += sb[i].offset; + /* Set the descriptor. * * On VI, the format must be non-INVALID, otherwise @@ -3518,8 +3548,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; - /* Index & Vertex buffer don't change context regs, and pipeline is handled later. */ - used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE); + /* Index, vertex and streamout buffers don't change context regs, and + * pipeline is handled later. + */ + used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | + RADV_CMD_DIRTY_VERTEX_BUFFER | + RADV_CMD_DIRTY_STREAMOUT_BUFFER | + RADV_CMD_DIRTY_PIPELINE); /* Assume all state changes except these two can imply context rolls. */ if (cmd_buffer->state.dirty & used_states) @@ -4185,6 +4220,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects); + + if (radv_image_is_tc_compat_htile(image)) { + /* Initialize the TC-compat metada value to 0 because by + * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only + * need have to conditionally update its value when performing + * a fast depth clear. + */ + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0); + } } static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, @@ -4613,6 +4657,8 @@ void radv_CmdBeginConditionalRenderingEXT( draw_visible = false; } + si_emit_cache_flush(cmd_buffer); + /* Enable predication for this command buffer. */ si_emit_set_predication_state(cmd_buffer, draw_visible, va); cmd_buffer->state.predicating = true; @@ -4741,28 +4787,30 @@ void radv_CmdBeginTransformFeedbackEXT( struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t i; radv_flush_vgt_streamout(cmd_buffer); assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) { - if (!(so->enabled_mask & (1 << i))) - continue; + for_each_bit(i, so->enabled_mask) { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + counter_buffer_idx = -1; /* SI binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader through * SGPRs what to do. */ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); - radeon_emit(cs, (sb[i].offset + sb[i].size) >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ - if (pCounterBuffers && pCounterBuffers[i]) { + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { /* The array of counter buffers is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); uint64_t va = radv_buffer_get_va(buffer->bo); - va += buffer->offset + pCounterBufferOffsets[i]; + va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx]; /* Append */ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); @@ -4783,7 +4831,7 @@ void radv_CmdBeginTransformFeedbackEXT( STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, sb[i].offset >> 2); /* buffer offset in DW */ + radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */ } } @@ -4801,20 +4849,22 @@ void radv_CmdEndTransformFeedbackEXT( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t i; radv_flush_vgt_streamout(cmd_buffer); assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) { - if (!(so->enabled_mask & (1 << i))) - continue; + for_each_bit(i, so->enabled_mask) { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + counter_buffer_idx = -1; - if (pCounterBuffers && pCounterBuffers[i]) { + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { /* The array of counters buffer is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]); + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); uint64_t va = radv_buffer_get_va(buffer->bo); - va += buffer->offset + pCounterBufferOffsets[i]; + va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx]; radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index d68111c25bf..ac6cff23d58 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -936,9 +936,9 @@ void radv_GetPhysicalDeviceProperties( 2048, 2048 }, - .subPixelPrecisionBits = 4 /* FIXME */, - .subTexelPrecisionBits = 4 /* FIXME */, - .mipmapPrecisionBits = 4 /* FIXME */, + .subPixelPrecisionBits = 8, + .subTexelPrecisionBits = 8, + .mipmapPrecisionBits = 8, .maxDrawIndexedIndexValue = UINT32_MAX, .maxDrawIndirectCount = UINT32_MAX, .maxSamplerLodBias = 16, @@ -1054,16 +1054,14 @@ void radv_GetPhysicalDeviceProperties2( (VkPhysicalDeviceSubgroupProperties*)ext; properties->subgroupSize = 64; properties->supportedStages = VK_SHADER_STAGE_ALL; - /* TODO: Enable VK_SUBGROUP_FEATURE_VOTE_BIT when wwm - * is fixed in LLVM. - */ properties->supportedOperations = - VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | - VK_SUBGROUP_FEATURE_QUAD_BIT; + VK_SUBGROUP_FEATURE_QUAD_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT; if (pdevice->rad_info.chip_class >= VI) { properties->supportedOperations |= + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; } diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 6bdf988d117..4a28f8bf41c 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -105,7 +105,7 @@ def __init__(self, name, ext_version, enable): Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'), Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), - Extension('VK_EXT_pci_bus_info', 1, True), + Extension('VK_EXT_pci_bus_info', 1, False), Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 64346aa340f..daabc489afb 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -691,7 +691,7 @@ radv_query_opaque_metadata(struct radv_device *device, si_make_texture_descriptor(device, image, false, (VkImageViewType)image->type, image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0, - image->info.array_size, + image->info.array_size - 1, image->info.width, image->info.height, image->info.depth, desc, NULL); @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image) /* + 8 for storing the clear values */ image->clear_value_offset = image->htile_offset + image->surface.htile_size; image->size = image->clear_value_offset + 8; + if (radv_image_is_tc_compat_htile(image)) { + /* Metadata for the TC-compatible HTILE hardware bug which + * have to be fixed by updating ZRANGE_PRECISION when doing + * fast depth clears to 0.0f. + */ + image->tc_compat_zrange_offset = image->clear_value_offset + 8; + image->size = image->clear_value_offset + 16; + } image->alignment = align64(image->alignment, image->surface.htile_alignment); } @@ -977,7 +985,7 @@ radv_image_create(VkDevice _device, image->shareable = vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL; - if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) { + if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) { image->info.surf_index = &device->image_mrt_offset_counter; } @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device, /* Otherwise, try to enable HTILE for depth surfaces. */ if (radv_image_can_enable_htile(image) && !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { - radv_image_alloc_htile(image); image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; + radv_image_alloc_htile(image); } else { image->surface.htile_size = 0; } @@ -1175,8 +1183,6 @@ radv_image_view_init(struct radv_image_view *iview, if (device->physical_device->rad_info.chip_class >= GFX9 && vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) { - unsigned rounded_img_w = util_next_power_of_two(iview->extent.width); - unsigned rounded_img_h = util_next_power_of_two(iview->extent.height); unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel); unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel); @@ -1186,8 +1192,8 @@ radv_image_view_init(struct radv_image_view *iview, lvl_width <<= range->baseMipLevel; lvl_height <<= range->baseMipLevel; - iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w); - iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h); + iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->surface.u.gfx9.surf_pitch); + iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->surface.u.gfx9.surf_height); } } diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 6f074a70b4c..e9d680437e4 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -2061,7 +2061,7 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); if (device->physical_device->rad_info.chip_class >= GFX9 && - src->image->type == VK_IMAGE_TYPE_3D) + (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D)) pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index f56eb01dc52..8c21c423511 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2242,6 +2242,8 @@ handle_fs_inputs(struct radv_shader_context *ctx, if (LLVMIsUndef(interp_param)) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; + if (i >= VARYING_SLOT_VAR0) + ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index; ++index; } else if (i == VARYING_SLOT_CLIP_DIST0) { int length = ctx->shader_info->info.ps.num_input_clips_culls; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index bced19573c1..cc025f55ea3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3396,8 +3396,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, (pipeline->graphics.prim_restart_enable && (device->physical_device->rad_info.family < CHIP_POLARIS10 || (prim != V_008958_DI_PT_POINTLIST && - prim != V_008958_DI_PT_LINESTRIP && - prim != V_008958_DI_PT_TRISTRIP)))) + prim != V_008958_DI_PT_LINESTRIP)))) ia_multi_vgt_param.wd_switch_on_eop = true; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7e9e82e3158..585702a88b2 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -595,6 +595,7 @@ struct radv_meta_state { VkPipelineLayout p_layout; VkPipeline occlusion_query_pipeline; VkPipeline pipeline_statistics_query_pipeline; + VkPipeline tfb_query_pipeline; } query; }; @@ -1497,6 +1498,14 @@ struct radv_image { uint64_t clear_value_offset; uint64_t dcc_pred_offset; + /* + * Metadata for the TC-compat zrange workaround. If the 32-bit value + * stored at this offset is UINT_MAX, the driver will emit + * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the + * SET_CONTEXT_REG packet. + */ + uint64_t tc_compat_zrange_offset; + /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ VkDeviceMemory owned_memory; }; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 57ea22fb847..cdff336f8a3 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -512,11 +512,233 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { return b.shader; } +static nir_shader * +build_tfb_query_shader(struct radv_device *device) +{ + /* the shader this builds is roughly + * + * uint32_t src_stride = 32; + * + * location(binding = 0) buffer dst_buf; + * location(binding = 1) buffer src_buf; + * + * void main() { + * uint64_t result[2] = {}; + * bool available = false; + * uint64_t src_offset = src_stride * global_id.x; + * uint64_t dst_offset = dst_stride * global_id.x; + * uint64_t *src_data = src_buf[src_offset]; + * uint32_t avail = (src_data[0] >> 32) & + * (src_data[1] >> 32) & + * (src_data[2] >> 32) & + * (src_data[3] >> 32); + * if (avail & 0x80000000) { + * result[0] = src_data[3] - src_data[1]; + * result[1] = src_data[2] - src_data[0]; + * available = true; + * } + * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8; + * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) { + * if (flags & VK_QUERY_RESULT_64_BIT) { + * dst_buf[dst_offset] = result; + * } else { + * dst_buf[dst_offset] = (uint32_t)result; + * } + * } + * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + * dst_buf[dst_offset + result_size] = available; + * } + * } + */ + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "tfb_query"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; + + /* Create and initialize local variables. */ + nir_variable *result = + nir_local_variable_create(b.impl, + glsl_vector_type(GLSL_TYPE_UINT64, 2), + "result"); + nir_variable *available = + nir_local_variable_create(b.impl, glsl_int_type(), "available"); + + nir_store_var(&b, result, + nir_vec2(&b, nir_imm_int64(&b, 0), + nir_imm_int64(&b, 0)), 0x3); + nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1); + + nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); + + /* Load resources. */ + nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader, + nir_intrinsic_vulkan_resource_index); + dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_intrinsic_set_desc_set(dst_buf, 0); + nir_intrinsic_set_binding(dst_buf, 0); + nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL); + nir_builder_instr_insert(&b, &dst_buf->instr); + + nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader, + nir_intrinsic_vulkan_resource_index); + src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_intrinsic_set_desc_set(src_buf, 0); + nir_intrinsic_set_binding(src_buf, 1); + nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); + nir_builder_instr_insert(&b, &src_buf->instr); + + /* Compute global ID. */ + nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); + nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + global_id = nir_channel(&b, global_id, 0); // We only care about x here. + + /* Compute src/dst strides. */ + nir_ssa_def *input_stride = nir_imm_int(&b, 32); + nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id); + nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride"); + nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id); + + /* Load data from the query pool. */ + nir_intrinsic_instr *load1 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); + load1->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); + load1->src[1] = nir_src_for_ssa(input_base); + nir_ssa_dest_init(&load1->instr, &load1->dest, 4, 32, NULL); + load1->num_components = 4; + nir_builder_instr_insert(&b, &load1->instr); + + nir_intrinsic_instr *load2 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); + load2->src[0] = nir_src_for_ssa(&src_buf->dest.ssa); + load2->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base, nir_imm_int(&b, 16))); + nir_ssa_dest_init(&load2->instr, &load2->dest, 4, 32, NULL); + load2->num_components = 4; + nir_builder_instr_insert(&b, &load2->instr); + + /* Check if result is available. */ + nir_ssa_def *avails[2]; + avails[0] = nir_iand(&b, nir_channel(&b, &load1->dest.ssa, 1), + nir_channel(&b, &load1->dest.ssa, 3)); + avails[1] = nir_iand(&b, nir_channel(&b, &load2->dest.ssa, 1), + nir_channel(&b, &load2->dest.ssa, 3)); + nir_ssa_def *result_is_available = + nir_iand(&b, nir_iand(&b, avails[0], avails[1]), + nir_imm_int(&b, 0x80000000)); + + /* Only compute result if available. */ + nir_if *available_if = nir_if_create(b.shader); + available_if->condition = nir_src_for_ssa(result_is_available); + nir_cf_node_insert(b.cursor, &available_if->cf_node); + + b.cursor = nir_after_cf_list(&available_if->then_list); + + /* Pack values. */ + nir_ssa_def *packed64[4]; + packed64[0] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load1->dest.ssa, 0), + nir_channel(&b, &load1->dest.ssa, 1))); + packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load1->dest.ssa, 2), + nir_channel(&b, &load1->dest.ssa, 3))); + packed64[2] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load2->dest.ssa, 0), + nir_channel(&b, &load2->dest.ssa, 1))); + packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b, + nir_channel(&b, &load2->dest.ssa, 2), + nir_channel(&b, &load2->dest.ssa, 3))); + + /* Compute result. */ + nir_ssa_def *num_primitive_written = + nir_isub(&b, packed64[3], packed64[1]); + nir_ssa_def *primitive_storage_needed = + nir_isub(&b, packed64[2], packed64[0]); + + nir_store_var(&b, result, + nir_vec2(&b, num_primitive_written, + primitive_storage_needed), 0x3); + nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1); + + b.cursor = nir_after_cf_node(&available_if->cf_node); + + /* Determine if result is 64 or 32 bit. */ + nir_ssa_def *result_is_64bit = + nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); + nir_ssa_def *result_size = + nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), + nir_imm_int(&b, 8)); + + /* Store the result if complete or partial results have been requested. */ + nir_if *store_if = nir_if_create(b.shader); + store_if->condition = + nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, + nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), + nir_load_var(&b, available))); + nir_cf_node_insert(b.cursor, &store_if->cf_node); + + b.cursor = nir_after_cf_list(&store_if->then_list); + + /* Store result. */ + nir_if *store_64bit_if = nir_if_create(b.shader); + store_64bit_if->condition = nir_src_for_ssa(result_is_64bit); + nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node); + + b.cursor = nir_after_cf_list(&store_64bit_if->then_list); + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(nir_load_var(&b, result)); + store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); + store->src[2] = nir_src_for_ssa(output_base); + nir_intrinsic_set_write_mask(store, 0x3); + store->num_components = 2; + nir_builder_instr_insert(&b, &store->instr); + + b.cursor = nir_after_cf_list(&store_64bit_if->else_list); + + store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(nir_u2u32(&b, nir_load_var(&b, result))); + store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); + store->src[2] = nir_src_for_ssa(output_base); + nir_intrinsic_set_write_mask(store, 0x3); + store->num_components = 2; + nir_builder_instr_insert(&b, &store->instr); + + b.cursor = nir_after_cf_node(&store_64bit_if->cf_node); + + b.cursor = nir_after_cf_node(&store_if->cf_node); + + /* Store the availability bit if requested. */ + nir_if *availability_if = nir_if_create(b.shader); + availability_if->condition = + nir_src_for_ssa(nir_iand(&b, flags, + nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); + nir_cf_node_insert(b.cursor, &availability_if->cf_node); + + b.cursor = nir_after_cf_list(&availability_if->then_list); + + store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(nir_load_var(&b, available)); + store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); + store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base)); + nir_intrinsic_set_write_mask(store, 0x1); + store->num_components = 1; + nir_builder_instr_insert(&b, &store->instr); + + b.cursor = nir_after_cf_node(&availability_if->cf_node); + + return b.shader; +} + static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device) { VkResult result; struct radv_shader_module occlusion_cs = { .nir = NULL }; struct radv_shader_module pipeline_statistics_cs = { .nir = NULL }; + struct radv_shader_module tfb_cs = { .nir = NULL }; mtx_lock(&device->meta_state.mtx); if (device->meta_state.query.pipeline_statistics_query_pipeline) { @@ -525,6 +747,7 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d } occlusion_cs.nir = build_occlusion_query_shader(device); pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device); + tfb_cs.nir = build_tfb_query_shader(device); VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, @@ -611,12 +834,34 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, &pipeline_statistics_vk_pipeline_info, NULL, &device->meta_state.query.pipeline_statistics_query_pipeline); + if (result != VK_SUCCESS) + goto fail; + VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&tfb_cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo tfb_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = tfb_pipeline_shader_stage, + .flags = 0, + .layout = device->meta_state.query.p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&device->meta_state.cache), + 1, &tfb_pipeline_info, NULL, + &device->meta_state.query.tfb_query_pipeline); fail: if (result != VK_SUCCESS) radv_device_finish_meta_query_state(device); ralloc_free(occlusion_cs.nir); ralloc_free(pipeline_statistics_cs.nir); + ralloc_free(tfb_cs.nir); mtx_unlock(&device->meta_state.mtx); return result; } @@ -631,6 +876,11 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_d void radv_device_finish_meta_query_state(struct radv_device *device) { + if (device->meta_state.query.tfb_query_pipeline) + radv_DestroyPipeline(radv_device_to_handle(device), + device->meta_state.query.tfb_query_pipeline, + &device->meta_state.alloc); + if (device->meta_state.query.pipeline_statistics_query_pipeline) radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.query.pipeline_statistics_query_pipeline, @@ -663,6 +913,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, { struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; + bool old_predicating; if (!*pipeline) { VkResult ret = radv_device_init_meta_query_state_internal(device); @@ -677,6 +928,12 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + /* VK_EXT_conditional_rendering says that copy commands should not be + * affected by conditional rendering. + */ + old_predicating = cmd_buffer->state.predicating; + cmd_buffer->state.predicating = false; + struct radv_buffer dst_buffer = { .bo = dst_bo, .offset = dst_offset, @@ -758,6 +1015,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 | RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH; + /* Restore conditional rendering. */ + cmd_buffer->state.predicating = old_predicating; radv_meta_restore(&saved_state, cmd_buffer); } @@ -1082,10 +1341,13 @@ void radv_CmdCopyQueryPoolResults( if (flags & VK_QUERY_RESULT_WAIT_BIT) { + /* Wait on the high 32 bits of the timestamp in + * case the low part is 0xffffffff. + */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false)); radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, local_src_va); - radeon_emit(cs, local_src_va >> 32); + radeon_emit(cs, local_src_va + 4); + radeon_emit(cs, (local_src_va + 4) >> 32); radeon_emit(cs, TIMESTAMP_NOT_READY >> 32); radeon_emit(cs, 0xffffffff); radeon_emit(cs, 4); @@ -1115,6 +1377,33 @@ void radv_CmdCopyQueryPoolResults( assert(cs->cdw <= cdw_max); } break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + for(unsigned i = 0; i < queryCount; i++) { + unsigned query = firstQuery + i; + uint64_t src_va = va + query * pool->stride; + + /* Wait on the upper word of all results. */ + for (unsigned j = 0; j < 4; j++, src_va += 8) { + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL | + WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, (src_va + 4)); + radeon_emit(cs, (src_va + 4) >> 32); + radeon_emit(cs, 0x80000000); /* reference value */ + radeon_emit(cs, 0xffffffff); /* mask */ + radeon_emit(cs, 4); /* poll interval */ + } + } + } + + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline, + pool->bo, dst_buffer->bo, + firstQuery * pool->stride, + dst_buffer->offset + dstOffset, + pool->stride, stride, + queryCount, flags, 0, 0); + break; default: unreachable("trying to get results of unhandled query type"); } @@ -1161,6 +1450,22 @@ static unsigned event_type_for_stream(unsigned stream) } } +static void emit_query_flush(struct radv_cmd_buffer *cmd_buffer, + struct radv_query_pool *pool) +{ + if (cmd_buffer->pending_reset_query) { + if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) { + /* Only need to flush caches if the query pool size is + * large enough to be resetted using the compute shader + * path. Small pools don't need any cache flushes + * because we use a CP dma clear. + */ + si_emit_cache_flush(cmd_buffer); + cmd_buffer->pending_reset_query = false; + } + } +} + static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkQueryType query_type, @@ -1307,17 +1612,7 @@ void radv_CmdBeginQueryIndexedEXT( radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo); - if (cmd_buffer->pending_reset_query) { - if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) { - /* Only need to flush caches if the query pool size is - * large enough to be resetted using the compute shader - * path. Small pools don't need any cache flushes - * because we use a CP dma clear. - */ - si_emit_cache_flush(cmd_buffer); - cmd_buffer->pending_reset_query = false; - } - } + emit_query_flush(cmd_buffer, pool); va += pool->stride * query; @@ -1394,6 +1689,8 @@ void radv_CmdWriteTimestamp( radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo); + emit_query_flush(cmd_buffer, pool); + int num_queries = 1; if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask); diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 7977d46229e..e9d541ab150 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -223,6 +223,8 @@ struct radeon_winsys { void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md); + void (*buffer_get_metadata)(struct radeon_winsys_bo *bo, + struct radeon_bo_metadata *md); void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent, uint64_t offset, uint64_t size, diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 25764d93f6a..ec126bfc7cb 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -304,8 +304,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, return NULL; } + unsigned virt_alignment = alignment; + if (size >= ws->info.pte_fragment_size) + virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size); + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - size, alignment, 0, &va, &va_handle, + size, virt_alignment, 0, &va, &va_handle, (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH); if (r) @@ -536,6 +540,21 @@ radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, return true; } +static unsigned eg_tile_split(unsigned tile_split) +{ + switch (tile_split) { + case 0: tile_split = 64; break; + case 1: tile_split = 128; break; + case 2: tile_split = 256; break; + case 3: tile_split = 512; break; + default: + case 4: tile_split = 1024; break; + case 5: tile_split = 2048; break; + case 6: tile_split = 4096; break; + } + return tile_split; +} + static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split) { switch (eg_tile_split) { @@ -589,6 +608,43 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo, amdgpu_bo_set_metadata(bo->bo, &metadata); } +static void +radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo, + struct radeon_bo_metadata *md) +{ + struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + struct amdgpu_bo_info info = {0}; + + int r = amdgpu_bo_query_info(bo->bo, &info); + if (r) + return; + + uint64_t tiling_flags = info.metadata.tiling_info; + + if (bo->ws->info.chip_class >= GFX9) { + md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); + } else { + md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; + md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; + + if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ + md->u.legacy.macrotile = RADEON_LAYOUT_TILED; + else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ + md->u.legacy.microtile = RADEON_LAYOUT_TILED; + + md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); + md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); + md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); + md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); + md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); + md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ + } + + md->size_metadata = info.metadata.size_metadata; + memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata)); +} + void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) { ws->base.buffer_create = radv_amdgpu_winsys_bo_create; @@ -599,5 +655,6 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; + ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata; ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; } diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 4f3b621fd29..54483195952 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -392,6 +392,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) for (int i = 0; i < ARRAY_SIZE(state->last_r); i++) add_write_dep(state, &state->last_r[i], n); add_write_dep(state, &state->last_sf, n); + add_write_dep(state, &state->last_rtop, n); /* Scoreboard-locking operations have to stay after the last * thread switch. diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index b5a7b841ef6..4baadce294c 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -364,6 +364,7 @@ v3d_dump_qpu(struct v3d_compile *c) for (int i = 0; i < c->qpu_inst_count; i++) { const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str); + ralloc_free((void *)str); } fprintf(stderr, "\n"); } diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 0846cc86174..147017a6594 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -551,6 +551,7 @@ bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) { return (waddr == V3D_QPU_WADDR_SYNC || + waddr == V3D_QPU_WADDR_SYNCB || waddr == V3D_QPU_WADDR_SYNCU); } diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk index 0aabafa2673..37b3cb80251 100644 --- a/src/compiler/Android.glsl.mk +++ b/src/compiler/Android.glsl.mk @@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_nir LOCAL_MODULE := libmesa_glsl - +LOCAL_CFLAGS += -Wno-error include $(LOCAL_PATH)/Android.glsl.gen.mk include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk index 75a247a245d..59da5dbdc1c 100644 --- a/src/compiler/Android.nir.mk +++ b/src/compiler/Android.nir.mk @@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +LOCAL_CFLAGS := \ + -Wno-missing-braces + LOCAL_STATIC_LIBRARIES := libmesa_compiler LOCAL_MODULE := libmesa_nir diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 084b7021a9f..f4bd8c17db3 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -892,7 +892,8 @@ validate_assignment(struct _mesa_glsl_parse_state *state, } if (unsized_array) { if (is_initializer) { - return rhs; + if (rhs->type->get_scalar_type() == lhs->type->get_scalar_type()) + return rhs; } else { _mesa_glsl_error(&loc, state, "implicitly sized arrays cannot be assigned"); @@ -7422,7 +7423,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions, if (member_align == 0 || member_align & (member_align - 1)) { _mesa_glsl_error(&loc, state, "align layout qualifier " - "in not a power of 2"); + "is not a power of 2"); } else { fields[i].offset = glsl_align(offset, member_align); next_offset = glsl_align(fields[i].offset + size, align); diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 5650365d1d5..b6018806865 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -525,12 +525,6 @@ supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state *state) return state->NV_fragment_shader_interlock_enable; } -static bool -supports_intel_fragment_shader_ordering(const _mesa_glsl_parse_state *state) -{ - return state->INTEL_fragment_shader_ordering_enable; -} - static bool shader_clock(const _mesa_glsl_parse_state *state) { @@ -1311,11 +1305,6 @@ builtin_builder::create_intrinsics() supports_arb_fragment_shader_interlock, ir_intrinsic_end_invocation_interlock), NULL); - add_function("__intrinsic_begin_fragment_shader_ordering", - _invocation_interlock_intrinsic( - supports_intel_fragment_shader_ordering, - ir_intrinsic_begin_fragment_shader_ordering), NULL); - add_function("__intrinsic_shader_clock", _shader_clock_intrinsic(shader_clock, glsl_type::uvec2_type), @@ -3430,12 +3419,6 @@ builtin_builder::create_builtins() supports_nv_fragment_shader_interlock), NULL); - add_function("beginFragmentShaderOrderingINTEL", - _invocation_interlock( - "__intrinsic_begin_fragment_shader_ordering", - supports_intel_fragment_shader_ordering), - NULL); - add_function("anyInvocationARB", _vote("__intrinsic_vote_any", vote), NULL); diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 1c095cb66f9..c951d9526ac 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -224,10 +224,12 @@ expanded_line: glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); } -| LINE_EXPANDED integer_constant NEWLINE { +| LINE_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); parser->has_new_line_number = 1; - parser->new_line_number = $2; - _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2); + parser->new_line_number = $2.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value); } | LINE_EXPANDED integer_constant integer_constant NEWLINE { parser->has_new_line_number = 1; @@ -238,6 +240,17 @@ expanded_line: "#line %" PRIiMAX " %" PRIiMAX "\n", $2, $3); } +| LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE { + if (parser->is_gles && $3.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro); + if (parser->is_gles && $6.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro); + parser->has_new_line_number = 1; + parser->new_line_number = $3.value; + parser->has_new_source_number = 1; + parser->new_source_number = $6.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value); + } ; define: diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 1bdd7c4bf17..efd1a013dbd 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -727,7 +727,6 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT_AEP(EXT_texture_buffer), EXT_AEP(EXT_texture_cube_map_array), EXT(INTEL_conservative_rasterization), - EXT(INTEL_fragment_shader_ordering), EXT(INTEL_shader_atomic_float_minmax), EXT(MESA_shader_integer_functions), EXT(NV_fragment_shader_interlock), diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 966d848509c..69aa6cf9cf3 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -812,8 +812,6 @@ struct _mesa_glsl_parse_state { bool EXT_texture_cube_map_array_warn; bool INTEL_conservative_rasterization_enable; bool INTEL_conservative_rasterization_warn; - bool INTEL_fragment_shader_ordering_enable; - bool INTEL_fragment_shader_ordering_warn; bool INTEL_shader_atomic_float_minmax_enable; bool INTEL_shader_atomic_float_minmax_warn; bool MESA_shader_integer_functions_enable; diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 0479f8fcfe4..0956d2f6303 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -742,9 +742,6 @@ nir_visitor::visit(ir_call *ir) case ir_intrinsic_end_invocation_interlock: op = nir_intrinsic_end_invocation_interlock; break; - case ir_intrinsic_begin_fragment_shader_ordering: - op = nir_intrinsic_begin_fragment_shader_ordering; - break; case ir_intrinsic_group_memory_barrier: op = nir_intrinsic_group_memory_barrier; break; @@ -983,9 +980,6 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_end_invocation_interlock: nir_builder_instr_insert(&b, &instr->instr); break; - case nir_intrinsic_begin_fragment_shader_ordering: - nir_builder_instr_insert(&b, &instr->instr); - break; case nir_intrinsic_store_ssbo: { exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index f478b29a6b5..d05d1998a50 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -1122,7 +1122,6 @@ enum ir_intrinsic_id { ir_intrinsic_memory_barrier_shared, ir_intrinsic_begin_invocation_interlock, ir_intrinsic_end_invocation_interlock, - ir_intrinsic_begin_fragment_shader_ordering, ir_intrinsic_vote_all, ir_intrinsic_vote_any, diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 52e493cb599..3969c0120b3 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -481,9 +481,10 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4], /* Component aliasing is not alloed */ if (comp >= component && comp < last_comp) { linker_error(prog, - "%s shader has multiple outputs explicitly " + "%s shader has multiple %sputs explicitly " "assigned to location %d and component %d\n", _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? "in" : "out", location, comp); return false; } else { @@ -502,10 +503,12 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4], if (info->interpolation != interpolation) { linker_error(prog, - "%s shader has multiple outputs at explicit " + "%s shader has multiple %sputs at explicit " "location %u with different interpolation " "settings\n", - _mesa_shader_stage_to_string(stage), location); + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location); return false; } @@ -513,9 +516,11 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4], info->sample != sample || info->patch != patch) { linker_error(prog, - "%s shader has multiple outputs at explicit " + "%s shader has multiple %sputs at explicit " "location %u with different aux storage\n", - _mesa_shader_stage_to_string(stage), location); + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location); return false; } } diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index 267700e7e78..26d8ec4b75b 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -360,13 +360,20 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg) if (xfb_stage == ~0u) return; + if (shProg->TransformFeedback.VaryingNames) { + for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; ++i) + free(shProg->TransformFeedback.VaryingNames[i]); + } + /* Data set by glTransformFeedbackVaryings. */ shProg->TransformFeedback.BufferMode = blob_read_uint32(metadata); blob_copy_bytes(metadata, &shProg->TransformFeedback.BufferStride, sizeof(shProg->TransformFeedback.BufferStride)); shProg->TransformFeedback.NumVarying = blob_read_uint32(metadata); + shProg->TransformFeedback.VaryingNames = (char **) - malloc(shProg->TransformFeedback.NumVarying * sizeof(GLchar *)); + realloc(shProg->TransformFeedback.VaryingNames, + shProg->TransformFeedback.NumVarying * sizeof(GLchar *)); /* Note, malloc used with VaryingNames. */ for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++) shProg->TransformFeedback.VaryingNames[i] = diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ec3049ca06d..910f9c336f8 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -199,7 +199,6 @@ def barrier(name): barrier("memory_barrier_shared") barrier("begin_invocation_interlock") barrier("end_invocation_interlock") -barrier("begin_fragment_shader_ordering") # A conditional discard, with a single boolean source. intrinsic("discard_if", src_comp=[1]) diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index de6f2481def..3845ed66b49 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -195,9 +195,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) } static uint8_t -get_interp_type(nir_variable *var, bool default_to_smooth_interp) +get_interp_type(nir_variable *var, const struct glsl_type *type, + bool default_to_smooth_interp) { - if (var->data.interpolation != INTERP_MODE_NONE) + if (glsl_type_is_integer(type)) + return INTERP_MODE_FLAT; + else if (var->data.interpolation != INTERP_MODE_NONE) return var->data.interpolation; else if (default_to_smooth_interp) return INTERP_MODE_SMOOTH; @@ -252,7 +255,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list, unsigned comps_slot2 = 0; for (unsigned i = 0; i < slots; i++) { interp_type[location + i] = - get_interp_type(var, default_to_smooth_interp); + get_interp_type(var, type, default_to_smooth_interp); interp_loc[location + i] = get_interp_loc(var); if (dual_slot) { @@ -424,7 +427,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps, continue; bool found_new_offset = false; - uint8_t interp = get_interp_type(var, default_to_smooth_interp); + uint8_t interp = get_interp_type(var, type, default_to_smooth_interp); for (; cursor[interp] < 32; cursor[interp]++) { uint8_t cursor_used_comps = comps[cursor[interp]]; diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 0be3aba9456..7ef032cd164 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -194,6 +194,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) } case nir_op_unpack_64_2x32: + case nir_op_unpack_32_2x16: return false; LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c index 897a0620872..40b90e6a313 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs.c +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -205,9 +205,6 @@ nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes) { bool progress = false; - if (modes == 0) - return false; - nir_foreach_function(function, shader) { if (function->impl) progress = lower_indirects_impl(function->impl, modes) || progress; diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index 5929a60aee8..be91a2a8fd6 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -64,9 +64,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) return false; if (bit_size == 0 && - !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) { + !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_types[i])) bit_size = instr->src[i].src.ssa->bit_size; - } nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 7a21ad56c79..594c4ddd0c2 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -265,7 +265,7 @@ lookup_entry_and_kill_aliases(struct util_dynarray *copies, { /* TODO: Take into account the write_mask. */ - struct copy_entry *entry = NULL; + nir_deref_instr *dst_match = NULL; util_dynarray_foreach_reverse(copies, struct copy_entry, iter) { if (!iter->src.is_ssa) { /* If this write aliases the source of some entry, get rid of it */ @@ -278,13 +278,26 @@ lookup_entry_and_kill_aliases(struct util_dynarray *copies, nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref); if (comp & nir_derefs_equal_bit) { - assert(entry == NULL); - entry = iter; + /* Removing entries invalidate previous iter pointers, so we'll + * collect the matching entry later. Just make sure it is unique. + */ + assert(!dst_match); + dst_match = iter->dst; } else if (comp & nir_derefs_may_alias_bit) { copy_entry_remove(copies, iter); } } + struct copy_entry *entry = NULL; + if (dst_match) { + util_dynarray_foreach(copies, struct copy_entry, iter) { + if (iter->dst == dst_match) { + entry = iter; + break; + } + } + assert(entry); + } return entry; } @@ -337,6 +350,9 @@ store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry, const struct value *value, unsigned write_mask) { if (value->is_ssa) { + /* Clear src if it was being used as non-SSA. */ + if (!entry->src.is_ssa) + memset(entry->src.ssa, 0, sizeof(entry->src.ssa)); entry->src.is_ssa = true; /* Only overwrite the written components */ for (unsigned i = 0; i < 4; i++) { @@ -705,9 +721,9 @@ copy_prop_vars_block(struct copy_prop_var_state *state, lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit); struct value value; if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) { + /* If load works, intrin (the copy_deref) is removed. */ if (value.is_ssa) { nir_store_deref(b, dst, value.ssa[0], 0xf); - intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b)); } else { /* If this would be a no-op self-copy, don't bother. */ if (nir_compare_derefs(value.deref, dst) & nir_derefs_equal_bit) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index 1fe95e53766..8a971c43f24 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -391,6 +391,34 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value) } } +static nir_ssa_def * +clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, + nir_ssa_def **src_defs) +{ + nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op); + nalu->exact = alu->exact; + + nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest, + alu->dest.dest.ssa.num_components, + alu->dest.dest.ssa.bit_size, alu->dest.dest.ssa.name); + + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + assert(alu->src[i].src.is_ssa); + nalu->src[i].src = nir_src_for_ssa(src_defs[i]); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + nir_builder_instr_insert(b, &nalu->instr); + + return &nalu->dest.dest.ssa;; +} + /* * This propagates if condition evaluation down the chain of some alu * instructions. For example by checking the use of some of the following alu @@ -448,7 +476,7 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, if (!evaluate_if_condition(nif, b->cursor, &bool_value)) return false; - nir_ssa_def *def[2] = {0}; + nir_ssa_def *def[4] = {0}; for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { if (alu->src[i].src.ssa == use_src->ssa) { def[i] = nir_imm_bool(b, bool_value); @@ -456,7 +484,8 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, def[i] = alu->src[i].src.ssa; } } - nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL); + + nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def); /* Rewrite use to use new alu instruction */ nir_src new_src = nir_src_for_ssa(nalu); @@ -472,14 +501,21 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, static bool can_propagate_through_alu(nir_src *src) { - if (src->parent_instr->type == nir_instr_type_alu && - (nir_instr_as_alu(src->parent_instr)->op == nir_op_ior || - nir_instr_as_alu(src->parent_instr)->op == nir_op_iand || - nir_instr_as_alu(src->parent_instr)->op == nir_op_inot || - nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i)) - return true; + if (src->parent_instr->type != nir_instr_type_alu) + return false; - return false; + nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); + switch (alu->op) { + case nir_op_ior: + case nir_op_iand: + case nir_op_inot: + case nir_op_b2i: + return true; + case nir_op_bcsel: + return src == &alu->src[0].src; + default: + return false; + } } static bool diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index d24f0941519..3cd61f66056 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -301,6 +301,11 @@ glsl_type_is_boolean(const struct glsl_type *type) { return type->is_boolean(); } +bool +glsl_type_is_integer(const struct glsl_type *type) +{ + return type->is_integer(); +} const glsl_type * glsl_void_type(void) diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 77454fa9fab..70d593b96ab 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -142,6 +142,7 @@ bool glsl_type_is_image(const struct glsl_type *type); bool glsl_type_is_dual_slot(const struct glsl_type *type); bool glsl_type_is_numeric(const struct glsl_type *type); bool glsl_type_is_boolean(const struct glsl_type *type); +bool glsl_type_is_integer(const struct glsl_type *type); bool glsl_sampler_type_is_shadow(const struct glsl_type *type); bool glsl_sampler_type_is_array(const struct glsl_type *type); bool glsl_contains_atomic(const struct glsl_type *type); diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 96ff09c3659..16d9c92046e 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1811,6 +1811,26 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, src[j] = src_val->constant->values[0]; } + /* fix up fixed size sources */ + switch (op) { + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: { + if (bit_size == 32) + break; + for (unsigned i = 0; i < num_components; ++i) { + switch (bit_size) { + case 64: src[1].u32[i] = src[1].u64[i]; break; + case 16: src[1].u32[i] = src[1].u16[i]; break; + case 8: src[1].u32[i] = src[1].u8[i]; break; + } + } + break; + } + default: + break; + } + val->constant->values[0] = nir_eval_const_opcode(op, num_components, bit_size, src); break; @@ -2874,13 +2894,19 @@ vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, return &vec->dest.dest.ssa; } +static nir_ssa_def * +nir_ieq_imm(nir_builder *b, nir_ssa_def *x, uint64_t i) +{ + return nir_ieq(b, x, nir_imm_intN_t(b, i, x->bit_size)); +} + nir_ssa_def * vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *index) { nir_ssa_def *dest = vtn_vector_extract(b, src, 0); for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i), vtn_vector_extract(b, src, i), dest); return dest; @@ -2892,7 +2918,7 @@ vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, { nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); for (unsigned i = 1; i < src->num_components; i++) - dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i), vtn_vector_insert(b, src, insert, i), dest); return dest; diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 6860e7dc090..a23f8c29b5c 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -696,6 +696,17 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, src[1] = tmp; } + switch (op) { + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: + if (src[1]->bit_size != 32) + src[1] = nir_u2u32(&b->nb, src[1]); + break; + default: + break; + } + val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); break; } /* default */ diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c index 726f717e8d5..6406f4911df 100644 --- a/src/compiler/spirv/vtn_cfg.c +++ b/src/compiler/spirv/vtn_cfg.c @@ -47,6 +47,7 @@ vtn_type_count_function_params(struct vtn_type *type) { switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: return type->length * vtn_type_count_function_params(type->array_element); case vtn_base_type_struct: { @@ -76,6 +77,7 @@ vtn_type_add_to_function_params(struct vtn_type *type, switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: for (unsigned i = 0; i < type->length; i++) vtn_type_add_to_function_params(type->array_element, func, param_idx); break; @@ -123,6 +125,7 @@ vtn_ssa_value_add_to_call_params(struct vtn_builder *b, { switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: for (unsigned i = 0; i < type->length; i++) { vtn_ssa_value_add_to_call_params(b, value->elems[i], type->array_element, @@ -152,6 +155,7 @@ vtn_ssa_value_load_function_param(struct vtn_builder *b, { switch (type->base_type) { case vtn_base_type_array: + case vtn_base_type_matrix: for (unsigned i = 0; i < type->length; i++) { vtn_ssa_value_load_function_param(b, value->elems[i], type->array_element, param_idx); diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 06a49e48e3f..0d8100384d6 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -807,10 +807,9 @@ handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode, if (vec_array_deref) { assert(vec_deref); - nir_const_value *const_index = nir_src_as_const_value(vec_deref->arr.index); - if (const_index) { + if (nir_src_is_const(vec_deref->arr.index)) { val->ssa->def = vtn_vector_extract(b, &intrin->dest.ssa, - const_index->u32[0]); + nir_src_as_uint(vec_deref->arr.index)); } else { val->ssa->def = vtn_vector_extract_dynamic(b, &intrin->dest.ssa, vec_deref->arr.index.ssa); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index c5cf345d02a..0eb9f263436 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -132,12 +132,12 @@ vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, } else if (stride == 1) { nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def; if (ssa->bit_size != 32) - ssa = nir_u2u32(&b->nb, ssa); + ssa = nir_i2i32(&b->nb, ssa); return ssa; } else { nir_ssa_def *src0 = vtn_ssa_value(b, link.id)->def; if (src0->bit_size != 32) - src0 = nir_u2u32(&b->nb, src0); + src0 = nir_i2i32(&b->nb, src0); return nir_imul(&b->nb, src0, nir_imm_int(&b->nb, stride)); } } @@ -512,9 +512,9 @@ vtn_local_load(struct vtn_builder *b, nir_deref_instr *src) if (src_tail != src) { val->type = src->type; - nir_const_value *const_index = nir_src_as_const_value(src->arr.index); - if (const_index) - val->def = vtn_vector_extract(b, val->def, const_index->u32[0]); + if (nir_src_is_const(src->arr.index)) + val->def = vtn_vector_extract(b, val->def, + nir_src_as_uint(src->arr.index)); else val->def = vtn_vector_extract_dynamic(b, val->def, src->arr.index.ssa); } @@ -532,10 +532,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); _vtn_local_load_store(b, true, dest_tail, val); - nir_const_value *const_index = nir_src_as_const_value(dest->arr.index); - if (const_index) + if (nir_src_is_const(dest->arr.index)) val->def = vtn_vector_insert(b, val->def, src->def, - const_index->u32[0]); + nir_src_as_uint(dest->arr.index)); else val->def = vtn_vector_insert_dynamic(b, val->def, src->def, dest->arr.index.ssa); diff --git a/src/egl/Android.mk b/src/egl/Android.mk index 42b391e6d86..bbc7df2aff8 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -45,7 +45,10 @@ LOCAL_CFLAGS := \ LOCAL_C_INCLUDES := \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/egl/main \ - $(MESA_TOP)/src/egl/drivers/dri2 + $(MESA_TOP)/src/egl/drivers/dri2 \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include LOCAL_STATIC_LIBRARIES := \ libmesa_util \ @@ -64,6 +67,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true) LOCAL_SHARED_LIBRARIES += libgralloc_drm endif +ifeq ($(strip $(BOARD_USES_GRALLOC1)),true) +LOCAL_CFLAGS += -DHAVE_GRALLOC1 +endif + ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),) LOCAL_SHARED_LIBRARIES += libnativewindow endif @@ -79,8 +86,12 @@ ifneq ($(MESA_BUILD_GALLIUM),) LOCAL_REQUIRED_MODULES += gallium_dri endif +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_HEADER_LIBRARIES += libnativebase_headers +endif + LOCAL_MODULE := libGLES_mesa LOCAL_MODULE_RELATIVE_PATH := egl - +LOCAL_CFLAGS += -Wno-error include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 87e1a704c6e..81d4ea456b3 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -65,6 +65,38 @@ #include "util/u_vector.h" #include "mapi/glapi/glapi.h" +/* The kernel header drm_fourcc.h defines the DRM formats below. We duplicate + * some of the definitions here so that building Mesa won't bleeding-edge + * kernel headers. + */ +#ifndef DRM_FORMAT_R8 +#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ +#endif + +#ifndef DRM_FORMAT_RG88 +#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */ +#endif + +#ifndef DRM_FORMAT_GR88 +#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */ +#endif + +#ifndef DRM_FORMAT_R16 +#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R 16 little endian */ +#endif + +#ifndef DRM_FORMAT_GR1616 +#define DRM_FORMAT_GR1616 fourcc_code('G', 'R', '3', '2') /* [31:0] R:G 16:16 little endian */ +#endif + +#ifndef DRM_FORMAT_P010 +#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cb:Cr plane 10 bits per channel */ +#endif + +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1) +#endif + #define NUM_ATTRIBS 12 static void @@ -673,7 +705,7 @@ dri2_setup_screen(_EGLDisplay *disp) dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_CONTEXT_PRIORITY); - disp->Extensions.EXT_pixel_format_float = EGL_TRUE; + disp->Extensions.EXT_pixel_format_float = EGL_FALSE; if (dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB)) @@ -2284,6 +2316,7 @@ dri2_num_fourcc_format_planes(EGLint format) case DRM_FORMAT_NV21: case DRM_FORMAT_NV16: case DRM_FORMAT_NV61: + case DRM_FORMAT_P010: return 2; case DRM_FORMAT_YUV410: @@ -2309,7 +2342,7 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) { unsigned plane_n = dri2_num_fourcc_format_planes(attrs->DMABufFourCC.Value); if (plane_n == 0) { - _eglError(EGL_BAD_ATTRIBUTE, "invalid format"); + _eglError(EGL_BAD_MATCH, "unknown drm fourcc format"); return 0; } diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 4abe1ba1952..3e5a567472c 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1; #include #endif /* HAVE_ANDROID_PLATFORM */ +#ifdef HAVE_GRALLOC1 +#include +#endif + #include "eglconfig.h" #include "eglcontext.h" #include "egldevice.h" @@ -237,7 +241,14 @@ struct dri2_egl_display #endif #ifdef HAVE_ANDROID_PLATFORM - const gralloc_module_t *gralloc; + const hw_module_t *gralloc; + uint16_t gralloc_version; +#ifdef HAVE_GRALLOC1 + gralloc1_device_t *gralloc1_dvc; + GRALLOC1_PFN_LOCK_FLEX pfn_lockflex; + GRALLOC1_PFN_GET_FORMAT pfn_getFormat; + GRALLOC1_PFN_UNLOCK pfn_unlock; +#endif #endif bool is_render_node; diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 1e93ab4d4d2..0c79fe9b5e0 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -49,6 +49,8 @@ #define ALIGN(val, align) (((val) + (align) - 1) & ~((align) - 1)) +#define GRALLOC_DRM_GET_FORMAT 1 + struct droid_yuv_format { /* Lookup keys */ int native; /* HAL_PIXEL_FORMAT_ */ @@ -59,14 +61,26 @@ struct droid_yuv_format { int fourcc; /* __DRI_IMAGE_FOURCC_ */ }; +/* This enumeration can be deleted if Android defined it in + * system/core/include/system/graphics.h + */ +enum { + HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100, + HAL_PIXEL_FORMAT_NV12 = 0x10F, + HAL_PIXEL_FORMAT_P010_INTEL = 0x110 +}; + /* The following table is used to look up a DRI image FourCC based * on native format and information contained in android_ycbcr struct. */ static const struct droid_yuv_format droid_yuv_formats[] = { /* Native format, YCrCb, Chroma step, DRI image FourCC */ { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_P010_INTEL, 0, 4, __DRI_IMAGE_FOURCC_P010 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 1, __DRI_IMAGE_FOURCC_YUV420 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, { HAL_PIXEL_FORMAT_YV12, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, + { HAL_PIXEL_FORMAT_NV12, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, /* HACK: See droid_create_image_from_prime_fd() and * https://issuetracker.google.com/32077885. */ { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, @@ -248,6 +262,51 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf) return EGL_TRUE; } +static int +droid_resolve_format(struct dri2_egl_display *dri2_dpy, + struct ANativeWindowBuffer *buf) +{ + int format = -1; + int ret; + + if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return buf->format; +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + + if (!dri2_dpy->pfn_getFormat) { + _eglLog(_EGL_WARNING, "Gralloc does not support getFormat"); + return -1; + } + ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle, + &format); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret); + return -1; + } + } else { +#else + const gralloc_module_t *gralloc0; + gralloc0 = dri2_dpy->gralloc; + + if (!gralloc0->perform) { + _eglLog(_EGL_WARNING, "gralloc->perform not supported"); + return -1; + } + ret = gralloc0->perform(dri2_dpy->gralloc, + GRALLOC_DRM_GET_FORMAT, + buf->handle, &format); + if (ret){ + _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret); + return -1; + } +#endif +#ifdef HAVE_GRALLOC1 + } +#endif + return format; +} + static EGLBoolean droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf) { @@ -462,7 +521,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy, struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); struct ANativeWindow *window = dri2_surf->window; - if (window->setSwapInterval(window, interval)) + if (window && window->setSwapInterval(window, interval)) return EGL_FALSE; surf->SwapInterval = interval; @@ -663,11 +722,18 @@ droid_query_buffer_age(_EGLDriver *drv, { struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface); + /* To avoid blocking other EGL calls, release the display mutex before + * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon + * return. + */ + mtx_unlock(&disp->Mutex); if (update_buffers(dri2_surf) < 0) { _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age"); + mtx_lock(&disp->Mutex); return -1; } + mtx_lock(&disp->Mutex); return dri2_surf->back ? dri2_surf->back->age : 0; } @@ -730,6 +796,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) return EGL_TRUE; } +static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr) +{ + + for( int i = 0; i < outFlexLayout->num_planes; i++) { + switch(outFlexLayout->planes[i].component){ + case FLEX_COMPONENT_Y: + ycbcr->y = outFlexLayout->planes[i].top_left; + ycbcr->ystride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cb: + ycbcr->cb = outFlexLayout->planes[i].top_left; + ycbcr->cstride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cr: + ycbcr->cr = outFlexLayout->planes[i].top_left; + ycbcr->chroma_step = outFlexLayout->planes[i].h_increment; + break; + default: + _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component); + break; + } + } + return 0; +} + #if ANDROID_API_LEVEL >= 23 static EGLBoolean droid_set_damage_region(_EGLDriver *drv, @@ -773,30 +864,70 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct android_ycbcr ycbcr; +#ifdef HAVE_GRALLOC1 + struct android_flex_layout outFlexLayout; + gralloc1_rect_t accessRegion; +#endif size_t offsets[3]; size_t pitches[3]; int is_ycrcb; int fourcc; int ret; - if (!dri2_dpy->gralloc->lock_ycbcr) { - _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } memset(&ycbcr, 0, sizeof(ycbcr)); - ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle, - 0, 0, 0, 0, 0, &ycbcr); - if (ret) { - /* HACK: See droid_create_image_from_prime_fd() and - * https://issuetracker.google.com/32077885.*/ - if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) - return NULL; - - _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret); - return NULL; - } - dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + if (!dri2_dpy->pfn_lockflex) { + _eglLog(_EGL_WARNING, "Gralloc does not support lockflex"); + return NULL; + } + + ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle, + 0, 0, &accessRegion, &outFlexLayout, -1); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + int outReleaseFence = 0; + dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence); + } else { +#endif + const gralloc_module_t *gralloc0; + gralloc0 = dri2_dpy->gralloc; + + if (!gralloc0->lock_ycbcr) { + _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + return NULL; + } + + ret = gralloc0->lock_ycbcr(gralloc0, buf->handle, + 0, 0, 0, 0, 0, &ycbcr); + + if (ret) { + /* HACK: See droid_create_image_from_prime_fd() and + * https://issuetracker.google.com/32077885.*/ + if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return NULL; + + _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret); + return NULL; + } + + gralloc0->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + } +#endif /* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags * it will return the .y/.cb/.cr pointers based on a NULL pointer, @@ -821,14 +952,15 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, /* .chroma_step is the byte distance between the same chroma channel * values of subsequent pixels, assumed to be the same for Cb and Cr. */ - fourcc = get_fourcc_yuv(buf->format, is_ycrcb, ycbcr.chroma_step); + fourcc = get_fourcc_yuv(format, is_ycrcb, ycbcr.chroma_step); if (fourcc == -1) { _eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, is_ycrcb = %d, chroma_step = %d", - buf->format, is_ycrcb, ycbcr.chroma_step); + format, is_ycrcb, ycbcr.chroma_step); return NULL; } - if (ycbcr.chroma_step == 2) { + /* FIXME? we should not rely on chroma_step */ + if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) { /* Semi-planar Y + CbCr or Y + CrCb format. */ const EGLint attr_list_2plane[] = { EGL_WIDTH, buf->width, @@ -870,9 +1002,16 @@ static _EGLImage * droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, struct ANativeWindowBuffer *buf, int fd) { + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); unsigned int pitch; - if (is_yuv(buf->format)) { + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglLog(_EGL_WARNING, "Could not resolve buffer format"); + return NULL; + } + + if (is_yuv(format)) { _EGLImage *image; image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd); @@ -887,13 +1026,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, return image; } - const int fourcc = get_fourcc(buf->format); + const int fourcc = get_fourcc(format); if (fourcc == -1) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } - pitch = buf->stride * get_format_bpp(buf->format); + pitch = buf->stride * get_format_bpp(format); if (pitch == 0) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; @@ -1529,6 +1668,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) _EGLDevice *dev; struct dri2_egl_display *dri2_dpy; const char *err; + hw_device_t *device; int ret; /* Not supported yet */ @@ -1548,6 +1688,27 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) err = "DRI2: failed to get gralloc module"; goto cleanup; } + dri2_dpy->gralloc_version = dri2_dpy->gralloc->module_api_version; +#ifdef HAVE_GRALLOC1 + if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + ret = dri2_dpy->gralloc->methods->open(dri2_dpy->gralloc, GRALLOC_HARDWARE_MODULE_ID, &device); + if (ret) { + err = "Failed to open hw_device device"; + goto cleanup; + } else { + dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device; + + dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX); + + dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT); + + dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK); + } + } +#endif disp->DriverData = (void *) dri2_dpy; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index eb9f5e2b1e2..817e9b1988a 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -1127,13 +1127,22 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device) if (dri2_dpy->fd == -1) { _eglLog(_EGL_WARNING, "wayland-egl: could not open %s (%s)", dri2_dpy->device_name, strerror(errno)); + free(dri2_dpy->device_name); + dri2_dpy->device_name = NULL; return; } if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) { dri2_dpy->authenticated = true; } else { - drmGetMagic(dri2_dpy->fd, &magic); + if (drmGetMagic(dri2_dpy->fd, &magic)) { + close(dri2_dpy->fd); + dri2_dpy->fd = -1; + free(dri2_dpy->device_name); + dri2_dpy->device_name = NULL; + _eglLog(_EGL_WARNING, "wayland-egl: drmGetMagic failed"); + return; + } wl_drm_authenticate(dri2_dpy->wl_drm, magic); } } @@ -1661,8 +1670,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf) if (dri2_surf->back) return 0; - if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width || - dri2_surf->base.Height != dri2_surf->wl_win->attached_height) { + if (dri2_surf->base.Width != dri2_surf->wl_win->width || + dri2_surf->base.Height != dri2_surf->wl_win->height) { dri2_wl_release_buffers(dri2_surf); diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp index a9c5cf8d29b..d4b046c79b4 100644 --- a/src/egl/drivers/haiku/egl_haiku.cpp +++ b/src/egl/drivers/haiku/egl_haiku.cpp @@ -29,6 +29,7 @@ #include "eglconfig.h" #include "eglcontext.h" +#include "egldevice.h" #include "egldisplay.h" #include "egldriver.h" #include "eglcurrent.h" @@ -215,7 +216,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy) _eglError(EGL_NOT_INITIALIZED, "DRI2: failed to find EGLDevice"); return EGL_FALSE; } - disp->Device = dev; + dpy->Device = dev; TRACE("Add configs\n"); if (!haiku_add_configs_for_visuals(dpy)) diff --git a/src/egl/generate/eglFunctionList.py b/src/egl/generate/eglFunctionList.py index fb5b3c30bdf..2cd35557bc4 100644 --- a/src/egl/generate/eglFunctionList.py +++ b/src/egl/generate/eglFunctionList.py @@ -196,8 +196,18 @@ def _eglFunc(name, method, static=None, public=False, inheader=None, prefix="dis # EGL_ANDROID_native_fence_sync _eglFunc("eglDupNativeFenceFDANDROID", "display"), + # EGL_ANDROID_blob_cache + _eglFunc("eglSetBlobCacheFuncsANDROID", "display"), + # EGL_EXT_image_dma_buf_import_modifiers _eglFunc("eglQueryDmaBufFormatsEXT", "display"), _eglFunc("eglQueryDmaBufModifiersEXT", "display"), + + # EGL_EXT_device_base + _eglFunc("eglQueryDeviceAttribEXT", "device"), + _eglFunc("eglQueryDeviceStringEXT", "device"), + _eglFunc("eglQueryDevicesEXT", "none"), + _eglFunc("eglQueryDisplayAttribEXT", "display"), + ) diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index 7af3011b757..545697e5662 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -137,13 +137,37 @@ _eglDestroyThreadInfo(_EGLThreadInfo *t) } +/** + * Delete/free a _EGLThreadInfo object. + */ +static void +_eglDestroyThreadInfoCallback(_EGLThreadInfo *t) +{ + /* If this callback is called on thread termination then try to also give a + * chance to cleanup to the client drivers. If called for module termination + * then just release the thread information as calling eglReleaseThread + * would result in a deadlock. + */ + if (_egl_TSDInitialized) { + /* The callback handler has replaced the TLS entry, which is passed in as + * 't', with NULL. Restore it here so that the release thread finds it in + * the TLS entry. + */ + _eglSetTSD(t); + eglReleaseThread(); + } else { + _eglDestroyThreadInfo(t); + } +} + + /** * Make sure TSD is initialized and return current value. */ static inline _EGLThreadInfo * _eglCheckedGetTSD(void) { - if (_eglInitTSD(&_eglDestroyThreadInfo) != EGL_TRUE) { + if (_eglInitTSD(&_eglDestroyThreadInfoCallback) != EGL_TRUE) { _eglLog(_EGL_FATAL, "failed to initialize \"current\" system"); return NULL; } diff --git a/src/egl/main/egldispatchstubs.c b/src/egl/main/egldispatchstubs.c index bfc3195c779..96708aeb0dc 100644 --- a/src/egl/main/egldispatchstubs.c +++ b/src/egl/main/egldispatchstubs.c @@ -59,6 +59,11 @@ static __eglMustCastToProperFunctionPointerType FetchVendorFunc(__EGLvendorInfo } if (func == NULL) { if (errorCode != EGL_SUCCESS) { + // Since we have no vendor, the follow-up eglGetError() call will + // end up using the GLVND error code. Set it here. + if (vendor == NULL) { + exports->setEGLError(errorCode); + } _eglError(errorCode, __EGL_DISPATCH_FUNC_NAMES[index]); } return NULL; diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk index acd243b8346..7618c6fcd93 100644 --- a/src/gallium/auxiliary/Android.mk +++ b/src/gallium/auxiliary/Android.mk @@ -36,7 +36,8 @@ LOCAL_SRC_FILES := \ util/u_debug_stack_android.cpp LOCAL_C_INCLUDES := \ - $(GALLIUM_TOP)/auxiliary/util + $(GALLIUM_TOP)/auxiliary/util \ + $(MESA_TOP)/src/util ifeq ($(MESA_ENABLE_LLVM),true) LOCAL_SRC_FILES += \ diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0ad274b535a..4fa36cc7de4 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -375,7 +375,7 @@ ttn_emit_declaration(struct ttn_compile *c) c->outputs[idx] = var; for (int i = 0; i < array_size; i++) - b->shader->info.outputs_written |= 1 << (var->data.location + i); + b->shader->info.outputs_written |= 1ull << (var->data.location + i); } break; case TGSI_FILE_CONSTANT: diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h index 05be94cae31..9b264145347 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h @@ -142,7 +142,7 @@ pipe_loader_release(struct pipe_loader_device **devs, int ndev); */ bool pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, - struct drisw_loader_funcs *drisw_lf); + const struct drisw_loader_funcs *drisw_lf); /** * Initialize a kms backed sw device given an fd. diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index d387ce90d32..587b6f8567b 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -132,7 +132,7 @@ pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev) #ifdef HAVE_PIPE_LOADER_DRI bool -pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf) +pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, const struct drisw_loader_funcs *drisw_lf) { struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); int i; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index e13500a7f7b..75c2e08632e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -1004,11 +1004,12 @@ get_block_tessfactor_writemask(const struct tgsi_shader_info *info, struct tgsi_full_instruction *inst; unsigned writemask = 0; - do { - tgsi_parse_token(parse); - assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); - inst = &parse->FullToken.FullInstruction; - check_no_subroutines(inst); + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + + while (inst->Instruction.Opcode != end_opcode) { /* Recursively process nested blocks. */ switch (inst->Instruction.Opcode) { @@ -1016,20 +1017,26 @@ get_block_tessfactor_writemask(const struct tgsi_shader_info *info, case TGSI_OPCODE_UIF: writemask |= get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDIF); - continue; + break; case TGSI_OPCODE_BGNLOOP: writemask |= get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP); - continue; + break; case TGSI_OPCODE_BARRIER: unreachable("nested BARRIER is illegal"); - continue; + break; + + default: + writemask |= get_inst_tessfactor_writemask(info, inst); } - writemask |= get_inst_tessfactor_writemask(info, inst); - } while (inst->Instruction.Opcode != end_opcode); + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + } return writemask; } @@ -1043,18 +1050,20 @@ get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, struct tgsi_full_instruction *inst; unsigned then_tessfactor_writemask = 0; unsigned else_tessfactor_writemask = 0; + unsigned writemask; bool is_then = true; - do { - tgsi_parse_token(parse); - assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); - inst = &parse->FullToken.FullInstruction; - check_no_subroutines(inst); + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + + while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF) { switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ELSE: is_then = false; - continue; + break; /* Recursively process nested blocks. */ case TGSI_OPCODE_IF: @@ -1063,28 +1072,33 @@ get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info, is_then ? &then_tessfactor_writemask : &else_tessfactor_writemask, cond_block_tf_writemask); - continue; + break; case TGSI_OPCODE_BGNLOOP: *cond_block_tf_writemask |= get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP); - continue; + break; case TGSI_OPCODE_BARRIER: unreachable("nested BARRIER is illegal"); - continue; - } - - /* Process an instruction in the current block. */ - unsigned writemask = get_inst_tessfactor_writemask(info, inst); + break; + default: + /* Process an instruction in the current block. */ + writemask = get_inst_tessfactor_writemask(info, inst); - if (writemask) { - if (is_then) - then_tessfactor_writemask |= writemask; - else - else_tessfactor_writemask |= writemask; + if (writemask) { + if (is_then) + then_tessfactor_writemask |= writemask; + else + else_tessfactor_writemask |= writemask; + } } - } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF); + + tgsi_parse_token(parse); + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + inst = &parse->FullToken.FullInstruction; + check_no_subroutines(inst); + } if (then_tessfactor_writemask || else_tessfactor_writemask) { /* If both statements write the same tess factor channels, @@ -1147,7 +1161,7 @@ tgsi_scan_tess_ctrl(const struct tgsi_token *tokens, case TGSI_OPCODE_BGNLOOP: cond_block_tf_writemask |= - get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDIF); + get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDLOOP); continue; case TGSI_OPCODE_BARRIER: diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index b06fb111709..fa1e920b509 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -154,6 +154,25 @@ pipe_resource_reference(struct pipe_resource **dst, struct pipe_resource *src) *dst = src; } +/** + * Same as pipe_surface_release, but used when pipe_context doesn't exist + * anymore. + */ +static inline void +pipe_surface_release_no_context(struct pipe_surface **ptr) +{ + struct pipe_surface *surf = *ptr; + + if (pipe_reference_described(&surf->reference, NULL, + (debug_reference_descriptor) + debug_describe_surface)) { + /* trivially destroy pipe_surface */ + pipe_resource_reference(&surf->texture, NULL); + free(surf); + } + *ptr = NULL; +} + /** * Set *dst to \p src with proper reference counting. * diff --git a/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c b/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c index f1e96740231..9736aebd7f6 100644 --- a/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c +++ b/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c @@ -97,6 +97,7 @@ static void cmd_free(struct msm_cmd *cmd) { fd_bo_del(cmd->ring_bo); + free(cmd->relocs); free(cmd); } @@ -655,6 +656,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring) _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); + free(msm_ring->u.reloc_bos); free(msm_ring); } else { struct fd_submit *submit = msm_ring->u.submit; @@ -663,6 +665,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring) cmd_free(msm_ring->u.cmds[i]); } + free(msm_ring->u.cmds); slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 8767e5efb99..ca0192a9cc0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -370,7 +370,8 @@ class ConstantFolding : public Pass void expr(Instruction *, ImmediateValue&, ImmediateValue&); void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); - void opnd(Instruction *, ImmediateValue&, int s); + /* true if i was deleted */ + bool opnd(Instruction *i, ImmediateValue&, int s); void opnd3(Instruction *, ImmediateValue&); void unary(Instruction *, const ImmediateValue&); @@ -414,18 +415,21 @@ ConstantFolding::visit(BasicBlock *bb) if (i->srcExists(2) && i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1) && - i->src(2).getImmediate(src2)) + i->src(2).getImmediate(src2)) { expr(i, src0, src1, src2); - else + } else if (i->srcExists(1) && - i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) + i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) { expr(i, src0, src1); - else - if (i->srcExists(0) && i->src(0).getImmediate(src0)) - opnd(i, src0, 0); - else - if (i->srcExists(1) && i->src(1).getImmediate(src1)) - opnd(i, src1, 1); + } else + if (i->srcExists(0) && i->src(0).getImmediate(src0)) { + if (opnd(i, src0, 0)) + continue; + } else + if (i->srcExists(1) && i->src(1).getImmediate(src1)) { + if (opnd(i, src1, 1)) + continue; + } if (i->srcExists(2) && i->src(2).getImmediate(src2)) opnd3(i, src2); } @@ -1010,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value * return false; } -void +bool ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { const int t = !s; const operation op = i->op; Instruction *newi = i; + bool deleted = false; switch (i->op) { case OP_SPLIT: { @@ -1035,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) val >>= bitsize; } delete_Instruction(prog, i); + deleted = true; break; } case OP_MUL: @@ -1049,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0), TYPE_S32, i->getSrc(t), bld.mkImm(0)); delete_Instruction(prog, i); + deleted = true; } else if (imm0.isInteger(0) || imm0.isInteger(1)) { // The high bits can't be set in this case (either mul by 0 or // unsigned by 1) @@ -1099,8 +1106,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) if (!isFloatType(i->dType) && !i->src(t).mod) { bld.setPosition(i, false); int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) { delete_Instruction(prog, i); + deleted = true; + } } else if (i->postFactor && i->sType == TYPE_F32) { /* Can't emit a postfactor with an immediate, have to fold it in */ @@ -1136,8 +1145,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) { bld.setPosition(i, false); int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; - if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) { delete_Instruction(prog, i); + deleted = true; + } } break; case OP_SUB: @@ -1207,6 +1218,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); delete_Instruction(prog, i); + deleted = true; } else if (imm0.reg.data.s32 == -1) { i->op = OP_NEG; @@ -1239,6 +1251,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); delete_Instruction(prog, i); + deleted = true; } break; @@ -1270,6 +1283,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2); delete_Instruction(prog, i); + deleted = true; } } else if (s == 1) { // In this case, we still want the optimized lowering that we get @@ -1286,6 +1300,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) newi->src(1).mod = Modifier(NV50_IR_MOD_NEG); delete_Instruction(prog, i); + deleted = true; } break; @@ -1298,7 +1313,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); CondCode cc, ccZ; if (imm0.reg.data.u32 != 0 || !si) - return; + return false; cc = si->setCond; ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); // We do everything assuming var (cmp) 0, reverse the condition if 0 is @@ -1324,7 +1339,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case CC_GT: break; // bool > 0 -- bool case CC_NE: break; // bool != 0 -- bool default: - return; + return false; } // Update the condition of this SET to be identical to the origin set, @@ -1359,13 +1374,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) } else if (src->asCmp()) { CmpInstruction *cmp = src->asCmp(); if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1) - return; + return false; if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32)) - return; + return false; if (imm0.reg.data.f32 != 1.0) - return; + return false; if (cmp->dType != TYPE_U32) - return; + return false; cmp->dType = TYPE_F32; if (i->src(t).mod != Modifier(0)) { @@ -1432,13 +1447,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case OP_MUL: int muls; if (isFloatType(si->dType)) - return; + return false; if (si->src(1).getImmediate(imm1)) muls = 1; else if (si->src(0).getImmediate(imm1)) muls = 0; else - return; + return false; bld.setPosition(i, false); i->op = OP_MUL; @@ -1449,15 +1464,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case OP_ADD: int adds; if (isFloatType(si->dType)) - return; + return false; if (si->op != OP_SUB && si->src(0).getImmediate(imm1)) adds = 0; else if (si->src(1).getImmediate(imm1)) adds = 1; else - return; + return false; if (si->src(!adds).mod != Modifier(0)) - return; + return false; // SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z)) // This is more operations, but if one of x, y is an immediate, then @@ -1472,7 +1487,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkImm(imm0.reg.data.u32))); break; default: - return; + return false; } } break; @@ -1497,7 +1512,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break; case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break; default: - return; + return false; } if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0) res = 31 - res; @@ -1523,11 +1538,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) // TODO: handle 64-bit values properly if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8) - return; + return false; // TODO: handle single byte/word extractions if (i->subOp) - return; + return false; bld.setPosition(i, true); /* make sure bld is init'ed */ @@ -1564,7 +1579,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) CLAMP(imm0.reg.data.u16, umin, umax) : \ imm0.reg.data.u16; \ break; \ - default: return; \ + default: return false; \ } \ i->setSrc(0, bld.mkImm(res.data.dst)); \ break @@ -1591,7 +1606,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; default: - return; + return false; } i->setSrc(0, bld.mkImm(res.data.f32)); break; @@ -1612,12 +1627,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break; case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break; default: - return; + return false; } i->setSrc(0, bld.mkImm(res.data.f64)); break; default: - return; + return false; } #undef CASE @@ -1628,7 +1643,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) break; } default: - return; + return false; } // This can get left behind some of the optimizations which simplify @@ -1643,6 +1658,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) if (newi->op != op) foldCount++; + return deleted; } // ============================================================================= diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index fb4a259ce16..e1b2e20810a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -600,25 +600,23 @@ static inline void nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, unsigned nr, void **hwcso) { + unsigned highest_found = 0; unsigned i; assert(nr <= PIPE_MAX_SAMPLERS); for (i = 0; i < nr; ++i) { struct nv50_tsc_entry *old = nv50->samplers[s][i]; + if (hwcso[i]) + highest_found = i; + nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]); if (old) nv50_screen_tsc_unlock(nv50->screen, old); } assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS); - for (; i < nv50->num_samplers[s]; ++i) { - if (nv50->samplers[s][i]) { - nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]); - nv50->samplers[s][i] = NULL; - } - } - - nv50->num_samplers[s] = nr; + if (nr >= nv50->num_samplers[s]) + nv50->num_samplers[s] = highest_found + 1; nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index f2393cb27b5..9653de86fe9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -464,11 +464,15 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, unsigned s, unsigned nr, void **hwcso) { + unsigned highest_found = 0; unsigned i; for (i = 0; i < nr; ++i) { struct nv50_tsc_entry *old = nvc0->samplers[s][i]; + if (hwcso[i]) + highest_found = i; + if (hwcso[i] == old) continue; nvc0->samplers_dirty[s] |= 1 << i; @@ -477,14 +481,8 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, if (old) nvc0_screen_tsc_unlock(nvc0->screen, old); } - for (; i < nvc0->num_samplers[s]; ++i) { - if (nvc0->samplers[s][i]) { - nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]); - nvc0->samplers[s][i] = NULL; - } - } - - nvc0->num_samplers[s] = nr; + if (nr >= nvc0->num_samplers[s]) + nvc0->num_samplers[s] = highest_found + 1; } static void diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 2680396c3d6..41e83af1db1 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -105,6 +105,12 @@ static void r600_destroy_context(struct pipe_context *context) } util_unreference_framebuffer_state(&rctx->framebuffer.state); + if (rctx->gs_rings.gsvs_ring.buffer) + pipe_resource_reference(&rctx->gs_rings.gsvs_ring.buffer, NULL); + + if (rctx->gs_rings.esgs_ring.buffer) + pipe_resource_reference(&rctx->gs_rings.esgs_ring.buffer, NULL); + for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh) for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i) rctx->b.b.set_constant_buffer(context, sh, i, NULL); diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index ccabab9cdb0..92f243b5c9a 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -1636,7 +1636,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, } if (query->buffer.previous) { - u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16, + u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 256, &tmp_buffer_offset, &tmp_buffer); if (!tmp_buffer) return; diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index 5681fdc4425..b7d87eac9f4 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -567,7 +567,7 @@ int bc_builder::build_fetch_gds(fetch_node *n) { const fetch_op_info *fop = bc.op_ptr; unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; unsigned mem_op = 4; - assert(fop->flags && FF_GDS); + assert(fop->flags & FF_GDS); if (bc.op == FETCH_OP_TF_WRITE) { mem_op = 5; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index b440230d227..91f38329d59 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -580,10 +580,12 @@ static int si_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_SUPPORTED: return (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && (si_vce_is_fw_version_supported(sscreen) || - sscreen->info.family == CHIP_RAVEN)) || + sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2)) || (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN && (sscreen->info.family == CHIP_RAVEN || - si_radeon_uvd_enc_supported(sscreen))); + sscreen->info.family == CHIP_RAVEN2 || + si_radeon_uvd_enc_supported(sscreen))); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: @@ -631,7 +633,8 @@ static int si_get_video_param(struct pipe_screen *screen, return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN; return false; case PIPE_VIDEO_FORMAT_JPEG: - if (sscreen->info.family == CHIP_RAVEN) + if (sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2) return true; if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10) return false; diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 9b09c74d48a..7a2c7afdbfd 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -793,17 +793,10 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, emit_sample_streamout(cs, va + 32 * stream, stream); break; case PIPE_QUERY_TIME_ELAPSED: - /* Write the timestamp from the CP not waiting for - * outstanding draws (top-of-pipe). - */ - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_COUNT_SEL | - COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, + EOP_DATA_SEL_TIMESTAMP, NULL, va, + 0, query->b.type); break; case PIPE_QUERY_PIPELINE_STATISTICS: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 19522cc97b1..f1d5ad31365 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -561,6 +561,14 @@ void si_llvm_load_input_vs( /* Do multiple loads for special formats. */ switch (fix_fetch) { + case SI_FIX_FETCH_RG_64_FLOAT: + num_fetches = 1; /* 1 2-dword or 4-dword load */ + fetch_stride = 0; + if (util_last_bit(info->input_usage_mask[input_index]) >= 2) + num_channels = 4; /* 2 doubles in 4 dwords */ + else + num_channels = 2; /* 1 double in 2 dwords */ + break; case SI_FIX_FETCH_RGB_64_FLOAT: num_fetches = 3; /* 3 2-dword loads */ fetch_stride = 8; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 87ca0161b45..cd38145daec 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -989,6 +989,9 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) LLVMValueRef data[4]; unsigned loc = variable->data.location; + if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT) + ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4; + for (unsigned i = 0; i < attrib_count; i++) { /* Packed components share the same location so skip * them if we have already processed the location. diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c index b741bcadec8..e6d97fe6727 100644 --- a/src/gallium/drivers/radeonsi/si_state_msaa.c +++ b/src/gallium/drivers/radeonsi/si_state_msaa.c @@ -101,6 +101,10 @@ static const uint64_t centroid_priority_4x = 0x3210321032103210ull; static const uint32_t sample_locs_8x[] = { FILL_SREG(-3,-5, 5, 1, -1, 3, 7,-7), FILL_SREG(-7,-1, 3, 7, -5, 5, 1,-3), + /* The following are unused by hardware, but we emit them to IBs + * instead of multiple SET_CONTEXT_REG packets. */ + 0, + 0, }; static const uint64_t centroid_priority_8x = 0x3546012735460127ull; diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 1a9d8f8d9fa..8c9553acbf3 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -146,7 +146,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ) { struct si_context *ctx = (struct si_context *)context; - bool vcn = (ctx->family == CHIP_RAVEN) ? true : false; + bool vcn = ctx->family == CHIP_RAVEN || + ctx->family == CHIP_RAVEN2; if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { if (vcn) { diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 1d59dbfc12a..e8f0e291dc3 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -32,6 +32,7 @@ #include "util/u_format.h" #include "util/u_hash_table.h" #include "util/u_screen.h" +#include "util/u_transfer_helper.h" #include "util/ralloc.h" #include @@ -74,6 +75,7 @@ v3d_screen_destroy(struct pipe_screen *pscreen) v3d_simulator_destroy(screen); v3d_compiler_free(screen->compiler); + u_transfer_helper_destroy(pscreen->transfer_helper); close(screen->fd); ralloc_free(pscreen); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 94784bbdc0a..41e6ec5c1cb 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -572,7 +572,15 @@ vc4_resource_create_with_modifiers(struct pipe_screen *pscreen, goto fail; } - if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) { + /* Set up the "scanout resource" (the dmabuf export of our buffer to + * the KMS handle) if the buffer might ever have + * resource_get_handle(WINSYS_HANDLE_TYPE_KMS) called on it. + * create_with_modifiers() doesn't give us usage flags, so we have to + * assume that all calls with modifiers are scanout-possible. + */ + if (screen->ro && + ((tmpl->bind & PIPE_BIND_SCANOUT) || + !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) { rsc->scanout = renderonly_scanout_for_resource(prsc, screen->ro, NULL); if (!rsc->scanout) diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 14ee6cf09e5..e7f7c82c271 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "util/u_hash_table.h" #include "util/u_screen.h" +#include "util/u_transfer_helper.h" #include "util/ralloc.h" #include @@ -110,6 +111,8 @@ vc4_screen_destroy(struct pipe_screen *pscreen) vc4_simulator_destroy(screen); #endif + u_transfer_helper_destroy(pscreen->transfer_helper); + close(screen->fd); ralloc_free(pscreen); } diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c index 88a22b56f9a..f72c93f4995 100644 --- a/src/gallium/drivers/virgl/virgl_buffer.c +++ b/src/gallium/drivers/virgl/virgl_buffer.c @@ -106,7 +106,6 @@ static void virgl_buffer_transfer_unmap(struct pipe_context *ctx, if (trans->base.usage & PIPE_TRANSFER_WRITE) { if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { struct virgl_screen *vs = virgl_screen(ctx->screen); - vbuf->base.clean = FALSE; vctx->num_transfers++; vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level); diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c index 4511bf3b2fb..61fb3f0636a 100644 --- a/src/gallium/drivers/virgl/virgl_context.c +++ b/src/gallium/drivers/virgl/virgl_context.c @@ -47,6 +47,12 @@ #include "virgl_resource.h" #include "virgl_screen.h" +struct virgl_vertex_elements_state { + uint32_t handle; + uint8_t binding_map[PIPE_MAX_ATTRIBS]; + uint8_t num_bindings; +}; + static uint32_t next_handle; uint32_t virgl_object_assign_handle(void) { @@ -385,29 +391,54 @@ static void *virgl_create_vertex_elements_state(struct pipe_context *ctx, unsigned num_elements, const struct pipe_vertex_element *elements) { + struct pipe_vertex_element new_elements[PIPE_MAX_ATTRIBS]; struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = virgl_object_assign_handle(); - virgl_encoder_create_vertex_elements(vctx, handle, - num_elements, elements); - return (void*)(unsigned long)handle; + struct virgl_vertex_elements_state *state = + CALLOC_STRUCT(virgl_vertex_elements_state); + + for (int i = 0; i < num_elements; ++i) { + if (elements[i].instance_divisor) { + /* Virglrenderer doesn't deal with instance_divisor correctly if + * there isn't a 1:1 relationship between elements and bindings. + * So let's make sure there is, by duplicating bindings. + */ + for (int j = 0; j < num_elements; ++j) { + new_elements[j] = elements[j]; + new_elements[j].vertex_buffer_index = j; + state->binding_map[j] = elements[j].vertex_buffer_index; + } + elements = new_elements; + state->num_bindings = num_elements; + break; + } + } + state->handle = virgl_object_assign_handle(); + virgl_encoder_create_vertex_elements(vctx, state->handle, + num_elements, elements); + return state; } static void virgl_delete_vertex_elements_state(struct pipe_context *ctx, void *ve) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = (unsigned long)ve; - - virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); + struct virgl_vertex_elements_state *state = + (struct virgl_vertex_elements_state *)ve; + virgl_encode_delete_object(vctx, state->handle, VIRGL_OBJECT_VERTEX_ELEMENTS); + FREE(state); } static void virgl_bind_vertex_elements_state(struct pipe_context *ctx, void *ve) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = (unsigned long)ve; - virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); + struct virgl_vertex_elements_state *state = + (struct virgl_vertex_elements_state *)ve; + vctx->vertex_elements = state; + virgl_encode_bind_object(vctx, state ? state->handle : 0, + VIRGL_OBJECT_VERTEX_ELEMENTS); + vctx->vertex_array_dirty = TRUE; } static void virgl_set_vertex_buffers(struct pipe_context *ctx, @@ -429,7 +460,17 @@ static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx) struct virgl_context *vctx = virgl_context(ctx); if (vctx->vertex_array_dirty) { - virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer); + struct virgl_vertex_elements_state *ve = vctx->vertex_elements; + + if (ve->num_bindings) { + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + for (int i = 0; i < ve->num_bindings; ++i) + vertex_buffers[i] = vctx->vertex_buffer[ve->binding_map[i]]; + + virgl_encoder_set_vertex_buffers(vctx, ve->num_bindings, vertex_buffers); + } else + virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer); + virgl_attach_res_vertex_buffers(vctx); } } diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h index 20988baa3c7..09cf0db2ae4 100644 --- a/src/gallium/drivers/virgl/virgl_context.h +++ b/src/gallium/drivers/virgl/virgl_context.h @@ -32,6 +32,7 @@ struct pipe_screen; struct tgsi_token; struct u_upload_mgr; struct virgl_cmd_buf; +struct virgl_vertex_elements_state; struct virgl_sampler_view { struct pipe_sampler_view base; @@ -53,6 +54,7 @@ struct virgl_context { struct virgl_cmd_buf *cbuf; struct virgl_textures_info samplers[PIPE_SHADER_TYPES]; + struct virgl_vertex_elements_state *vertex_elements; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c index e86d0711a57..ee2764d74ea 100644 --- a/src/gallium/drivers/virgl/virgl_encode.c +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -61,6 +61,12 @@ static void virgl_encoder_write_res(struct virgl_context *ctx, } } +static void virgl_dirty_res(struct virgl_resource *res) +{ + if (res) + res->clean = FALSE; +} + int virgl_encode_bind_object(struct virgl_context *ctx, uint32_t handle, uint32_t object) { @@ -615,6 +621,7 @@ int virgl_encode_sampler_view(struct virgl_context *ctx, if (res->u.b.target == PIPE_BUFFER) { virgl_encoder_write_dword(ctx->cbuf, state->u.buf.offset / elem_size); virgl_encoder_write_dword(ctx->cbuf, (state->u.buf.offset + state->u.buf.size) / elem_size - 1); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16); virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8); @@ -949,6 +956,7 @@ int virgl_encode_set_shader_buffers(struct virgl_context *ctx, virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size); virgl_encoder_write_res(ctx, res); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, 0); virgl_encoder_write_dword(ctx->cbuf, 0); @@ -972,6 +980,7 @@ int virgl_encode_set_hw_atomic_buffers(struct virgl_context *ctx, virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size); virgl_encoder_write_res(ctx, res); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, 0); virgl_encoder_write_dword(ctx->cbuf, 0); @@ -999,6 +1008,7 @@ int virgl_encode_set_shader_images(struct virgl_context *ctx, virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.offset); virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.size); virgl_encoder_write_res(ctx, res); + virgl_dirty_res(res); } else { virgl_encoder_write_dword(ctx->cbuf, 0); virgl_encoder_write_dword(ctx->cbuf, 0); diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c index db5e7dd61af..9174ec5cbbd 100644 --- a/src/gallium/drivers/virgl/virgl_resource.c +++ b/src/gallium/drivers/virgl/virgl_resource.c @@ -95,7 +95,11 @@ static void virgl_buffer_subdata(struct pipe_context *pipe, usage |= PIPE_TRANSFER_DISCARD_RANGE; u_box_1d(offset, size, &box); - virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data, 0, 0); + + if (size >= (VIRGL_MAX_CMDBUF_DWORDS * 4)) + u_default_buffer_subdata(pipe, resource, usage, offset, size, data); + else + virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data, 0, 0); } void virgl_init_context_resource_functions(struct pipe_context *ctx) diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h index 0e6cb7953f6..b44f8aaa54a 100644 --- a/src/gallium/drivers/virgl/virgl_winsys.h +++ b/src/gallium/drivers/virgl/virgl_winsys.h @@ -31,7 +31,7 @@ struct pipe_fence_handle; struct winsys_handle; struct virgl_hw_res; -#define VIRGL_MAX_CMDBUF_DWORDS (16*1024) +#define VIRGL_MAX_CMDBUF_DWORDS (64 * 1024) struct virgl_drm_caps { union virgl_caps caps; diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp b/src/gallium/state_trackers/clover/llvm/compat.hpp index 975012cbda4..b91cb95a295 100644 --- a/src/gallium/state_trackers/clover/llvm/compat.hpp +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp @@ -58,9 +58,14 @@ #include #include -#include #include +#if HAVE_LLVM >= 0x0800 +#include +#else +#include +#endif + namespace clover { namespace llvm { namespace compat { diff --git a/src/gallium/state_trackers/clover/meson.build b/src/gallium/state_trackers/clover/meson.build index 1a09d8f2ca9..a6729af2fb8 100644 --- a/src/gallium/state_trackers/clover/meson.build +++ b/src/gallium/state_trackers/clover/meson.build @@ -30,6 +30,7 @@ libcltgsi = static_library( files('tgsi/compiler.cpp', 'tgsi/invocation.hpp'), include_directories : clover_incs, cpp_args : [cpp_vis_args], + override_options : clover_cpp_std, ) libclllvm = static_library( @@ -56,6 +57,7 @@ libclllvm = static_library( )), ], dependencies : [dep_llvm, dep_elf], + override_options : clover_cpp_std, ) clover_files = files( @@ -119,4 +121,5 @@ libclover = static_library( include_directories : clover_incs, cpp_args : [clover_cpp_args, cpp_vis_args], link_with : [libcltgsi, libclllvm], + override_options : clover_cpp_std, ) diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index a09787bb215..d99187c6eaa 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -176,6 +176,12 @@ static int convert_fourcc(int format, int *dri_components_p) format = __DRI_IMAGE_FORMAT_R8; dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; break; + case __DRI_IMAGE_FOURCC_P010: + case __DRI_IMAGE_FOURCC_P012: + case __DRI_IMAGE_FOURCC_P016: + format = __DRI_IMAGE_FORMAT_R16; + dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; + break; default: return -1; } diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c index 886f94dc02c..5a0d2e1354d 100644 --- a/src/gallium/state_trackers/dri/drisw.c +++ b/src/gallium/state_trackers/dri/drisw.c @@ -421,12 +421,19 @@ static const __DRIextension *drisw_screen_extensions[] = { NULL }; -static struct drisw_loader_funcs drisw_lf = { +static const struct drisw_loader_funcs drisw_lf = { .get_image = drisw_get_image, .put_image = drisw_put_image, .put_image2 = drisw_put_image2 }; +static const struct drisw_loader_funcs drisw_shm_lf = { + .get_image = drisw_get_image, + .put_image = drisw_put_image, + .put_image2 = drisw_put_image2, + .put_image_shm = drisw_put_image_shm +}; + static const __DRIconfig ** drisw_init_screen(__DRIscreen * sPriv) { @@ -434,6 +441,7 @@ drisw_init_screen(__DRIscreen * sPriv) const __DRIconfig **configs; struct dri_screen *screen; struct pipe_screen *pscreen = NULL; + const struct drisw_loader_funcs *lf = &drisw_lf; screen = CALLOC_STRUCT(dri_screen); if (!screen) @@ -448,10 +456,10 @@ drisw_init_screen(__DRIscreen * sPriv) sPriv->extensions = drisw_screen_extensions; if (loader->base.version >= 4) { if (loader->putImageShm) - drisw_lf.put_image_shm = drisw_put_image_shm; + lf = &drisw_shm_lf; } - if (pipe_loader_sw_probe_dri(&screen->dev, &drisw_lf)) { + if (pipe_loader_sw_probe_dri(&screen->dev, lf)) { dri_init_options(screen); pscreen = pipe_loader_create_screen(screen->dev); diff --git a/src/gallium/state_trackers/nine/buffer9.h b/src/gallium/state_trackers/nine/buffer9.h index b04a0a721bb..1803d8d6405 100644 --- a/src/gallium/state_trackers/nine/buffer9.h +++ b/src/gallium/state_trackers/nine/buffer9.h @@ -104,7 +104,9 @@ NineBuffer9_Upload( struct NineBuffer9 *This ) struct NineDevice9 *device = This->base.base.device; assert(This->base.pool == D3DPOOL_MANAGED && This->managed.dirty); - nine_context_range_upload(device, &This->managed.pending_upload, This->base.resource, + nine_context_range_upload(device, &This->managed.pending_upload, + (struct NineUnknown *)This, + This->base.resource, This->managed.dirty_box.x, This->managed.dirty_box.width, (char *)This->managed.data + This->managed.dirty_box.x); diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 74aaf57a549..c5596a5ee94 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -2418,6 +2418,7 @@ CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap, } CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload, + ARG_BIND_REF(struct NineUnknown, src_ref), ARG_BIND_RES(struct pipe_resource, res), ARG_VAL(unsigned, offset), ARG_VAL(unsigned, size), @@ -2425,11 +2426,14 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload, { struct nine_context *context = &device->context; + /* Binding src_ref avoids release before upload */ + (void)src_ref; + context->pipe->buffer_subdata(context->pipe, res, 0, offset, size, data); } CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload, - ARG_BIND_REF(struct NineUnknown, dst), + ARG_BIND_REF(struct NineUnknown, src_ref), ARG_BIND_RES(struct pipe_resource, res), ARG_VAL(unsigned, level), ARG_COPY_REF(struct pipe_box, dst_box), @@ -2444,8 +2448,8 @@ CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload, struct pipe_transfer *transfer = NULL; uint8_t *map; - /* We just bind dst for the bind count */ - (void)dst; + /* Binding src_ref avoids release before upload */ + (void)src_ref; map = pipe->transfer_map(pipe, res, diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 51e5e326527..55960007bfb 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -560,6 +560,7 @@ nine_context_gen_mipmap(struct NineDevice9 *device, void nine_context_range_upload(struct NineDevice9 *device, unsigned *counter, + struct NineUnknown *src_ref, struct pipe_resource *res, unsigned offset, unsigned size, @@ -568,7 +569,7 @@ nine_context_range_upload(struct NineDevice9 *device, void nine_context_box_upload(struct NineDevice9 *device, unsigned *counter, - struct NineUnknown *dst, + struct NineUnknown *src_ref, struct pipe_resource *res, unsigned level, const struct pipe_box *dst_box, diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 5fd662fa049..10518219a0a 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -660,7 +660,7 @@ NineSurface9_CopyMemToDefault( struct NineSurface9 *This, nine_context_box_upload(This->base.base.device, &From->pending_uploads_counter, - (struct NineUnknown *)This, + (struct NineUnknown *)From, r_dst, This->level, &dst_box, diff --git a/src/gallium/state_trackers/nine/threadpool.c b/src/gallium/state_trackers/nine/threadpool.c index cc62fd25799..19721aab2dd 100644 --- a/src/gallium/state_trackers/nine/threadpool.c +++ b/src/gallium/state_trackers/nine/threadpool.c @@ -37,6 +37,7 @@ #include "os/os_thread.h" #include "threadpool.h" +/* POSIX thread function */ static void * threadpool_worker(void *data) { @@ -76,6 +77,15 @@ threadpool_worker(void *data) return NULL; } +/* Windows thread function */ +static DWORD NINE_WINAPI +wthreadpool_worker(void *data) +{ + threadpool_worker(data); + + return 0; +} + struct threadpool * _mesa_threadpool_create(struct NineSwapChain9 *swapchain) { @@ -87,7 +97,9 @@ _mesa_threadpool_create(struct NineSwapChain9 *swapchain) pthread_mutex_init(&pool->m, NULL); pthread_cond_init(&pool->new_work, NULL); - pool->wthread = NineSwapChain9_CreateThread(swapchain, threadpool_worker, pool); + /* This uses WINE's CreateThread, so the thread function needs to use + * the Windows ABI */ + pool->wthread = NineSwapChain9_CreateThread(swapchain, wthreadpool_worker, pool); if (!pool->wthread) { /* using pthread as fallback */ pthread_create(&pool->pthread, NULL, threadpool_worker, pool); diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index ec811aeba13..840f01dae10 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -449,7 +449,7 @@ NineVolume9_CopyMemToDefault( struct NineVolume9 *This, nine_context_box_upload(This->base.device, &From->pending_uploads_counter, - (struct NineUnknown *)This, + (struct NineUnknown *)From, r_dst, This->level, &dst_box, diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index 5dec4844864..c7191bce688 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -141,7 +141,8 @@ NineVolumeTexture9_dtor( struct NineVolumeTexture9 *This ) if (This->volumes) { for (l = 0; l <= This->base.base.info.last_level; ++l) - NineUnknown_Destroy(&This->volumes[l]->base); + if (This->volumes[l]) + NineUnknown_Destroy(&This->volumes[l]->base); FREE(This->volumes); } diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 5376be28531..9646427ea5f 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -598,10 +598,8 @@ surface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface, return VA_STATUS_SUCCESS; fail: - for (i = 0; i < VL_NUM_COMPONENTS; i++) { - if (resources[i]) - pscreen->resource_destroy(pscreen, resources[i]); - } + for (i = 0; i < VL_NUM_COMPONENTS; i++) + pipe_resource_reference(&resources[i], NULL); return result; } diff --git a/src/gallium/state_trackers/xa/xa_context.c b/src/gallium/state_trackers/xa/xa_context.c index ba220877c84..67d9eac53bb 100644 --- a/src/gallium/state_trackers/xa/xa_context.c +++ b/src/gallium/state_trackers/xa/xa_context.c @@ -91,6 +91,7 @@ xa_context_destroy(struct xa_context *r) } r->pipe->destroy(r->pipe); + free(r); } XA_EXPORT int diff --git a/src/gallium/state_trackers/xvmc/Makefile.am b/src/gallium/state_trackers/xvmc/Makefile.am index 85d0b5f4953..dc278099030 100644 --- a/src/gallium/state_trackers/xvmc/Makefile.am +++ b/src/gallium/state_trackers/xvmc/Makefile.am @@ -27,6 +27,7 @@ AM_CFLAGS = \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) \ $(VL_CFLAGS) \ + $(X11_INCLUDES) \ $(XCB_DRI3_CFLAGS) \ $(XVMC_CFLAGS) diff --git a/src/gallium/targets/d3dadapter9/meson.build b/src/gallium/targets/d3dadapter9/meson.build index bd05b4f9692..bc72b1110a0 100644 --- a/src/gallium/targets/d3dadapter9/meson.build +++ b/src/gallium/targets/d3dadapter9/meson.build @@ -53,7 +53,7 @@ libgallium_nine = shared_library( libswkmsdri, ], dependencies : [ - dep_selinux, dep_expat, dep_libdrm, dep_llvm, + dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread, driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau, driver_i915, driver_svga, ], diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 9c43fa1e8fd..83f439071f8 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -43,9 +43,17 @@ LOCAL_SHARED_LIBRARIES := \ libbacktrace \ libdl \ libglapi \ - libexpat \ libz +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else +LOCAL_SHARED_LIBRARIES += \ + libexpat +endif + $(foreach d, $(MESA_BUILD_GALLIUM), $(eval LOCAL_CFLAGS += $(patsubst HAVE_%,-D%,$(d)))) # sort GALLIUM_LIBS to remove any duplicates diff --git a/src/gallium/targets/pipe-loader/meson.build b/src/gallium/targets/pipe-loader/meson.build index 5a44102a69d..e9454d5666a 100644 --- a/src/gallium/targets/pipe-loader/meson.build +++ b/src/gallium/targets/pipe-loader/meson.build @@ -31,7 +31,7 @@ if (with_gallium_va or with_gallium_vdpau or with_gallium_omx != 'disabled' or with_gallium_xvmc or with_dri) pipe_loader_link_with += libgalliumvl else - pipe_loader_link_with += libgalliumvl_stubs + pipe_loader_link_with += libgalliumvl_stub endif if (with_gallium_va or with_gallium_vdpau or with_gallium_omx != 'disabled' or with_gallium_xvmc) diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am index cd05a024451..2742c7acd44 100644 --- a/src/gallium/targets/vdpau/Makefile.am +++ b/src/gallium/targets/vdpau/Makefile.am @@ -57,8 +57,6 @@ include $(top_srcdir)/src/gallium/drivers/r300/Automake.inc include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc -include $(top_srcdir)/src/gallium/drivers/tegra/Automake.inc - if HAVE_GALLIUM_STATIC_TARGETS libvdpau_gallium_la_SOURCES += target.c diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 68f0562a644..f108058052d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1310,6 +1310,12 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, if (bo) { p_atomic_inc(&bo->base.reference.count); simple_mtx_unlock(&ws->bo_export_table_lock); + + /* Release the buffer handle, because we don't need it anymore. + * This function is returning an existing buffer, which has its own + * handle. + */ + amdgpu_bo_free(result.buf_handle); return &bo->base; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index f32bbd9d086..b20d702670d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -280,6 +280,12 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (ws) { pipe_reference(NULL, &ws->reference); simple_mtx_unlock(&dev_tab_mutex); + + /* Release the device handle, because we don't need it anymore. + * This function is returning an existing winsys instance, which + * has its own device handle. + */ + amdgpu_device_deinitialize(dev); return &ws->base; } diff --git a/src/gallium/winsys/i915/drm/Android.mk b/src/gallium/winsys/i915/drm/Android.mk index bab3e85c5dd..bc8cd0ebe2e 100644 --- a/src/gallium/winsys/i915/drm/Android.mk +++ b/src/gallium/winsys/i915/drm/Android.mk @@ -30,7 +30,7 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(C_SOURCES) -LOCAL_SHARED_LIBRARIES := libdrm_intel +LOCAL_SHARED_LIBRARIES := libdrm_intel_pri LOCAL_MODULE := libmesa_winsys_i915 include $(GALLIUM_COMMON_MK) diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c index 739e4ea131f..0ec8c1abe11 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c @@ -1198,4 +1198,6 @@ void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws) { VMW_FUNC; + + free(vws->ioctl.cap_3d); } diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c index d519bcfedd3..cd44b036c6f 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c @@ -62,7 +62,7 @@ struct dri_sw_winsys { struct sw_winsys base; - struct drisw_loader_funcs *lf; + const struct drisw_loader_funcs *lf; }; static inline struct dri_sw_displaytarget * @@ -282,7 +282,7 @@ dri_destroy_sw_winsys(struct sw_winsys *winsys) } struct sw_winsys * -dri_create_sw_winsys(struct drisw_loader_funcs *lf) +dri_create_sw_winsys(const struct drisw_loader_funcs *lf) { struct dri_sw_winsys *ws; diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.h b/src/gallium/winsys/sw/dri/dri_sw_winsys.h index 329ac06a05b..47e3777d4cd 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.h +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.h @@ -33,6 +33,6 @@ struct sw_winsys; -struct sw_winsys *dri_create_sw_winsys(struct drisw_loader_funcs *lf); +struct sw_winsys *dri_create_sw_winsys(const struct drisw_loader_funcs *lf); #endif diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c index a589f694bb0..176d04388f2 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c @@ -559,7 +559,7 @@ virgl_cs_create_fence(struct virgl_winsys *vws) res = virgl_vtest_winsys_resource_cache_create(vws, PIPE_BUFFER, PIPE_FORMAT_R8_UNORM, - PIPE_BIND_CUSTOM, + VIRGL_BIND_CUSTOM, 8, 1, 1, 0, 0, 0, 8); return (struct pipe_fence_handle *)res; @@ -639,7 +639,7 @@ static void virgl_vtest_flush_frontbuffer(struct virgl_winsys *vws, * get the data. */ virgl_vtest_recv_transfer_get_data(vtws, map + offset, size, valid_stride, &box, res->format, - util_format_get_stride(res->format, res->width)); + vtws->protocol_version == 0 ? valid_stride : util_format_get_stride(res->format, res->width)); vtws->sws->displaytarget_unmap(vtws->sws, res->dt); diff --git a/src/gbm/meson.build b/src/gbm/meson.build index 2e9d380c0b4..719f9c1a9b8 100644 --- a/src/gbm/meson.build +++ b/src/gbm/meson.build @@ -32,7 +32,6 @@ args_gbm = [] deps_gbm = [] incs_gbm = [ include_directories('main'), inc_include, inc_src, inc_loader, - inc_wayland_drm, ] if with_dri2 diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index 8f9d80c9f41..d06ae2972e9 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -24,10 +24,6 @@ SUBDIRS = EXTRA_DIST = SConscript meson.build -if HAVE_XF86VIDMODE -EXTRA_DEFINES_XF86VIDMODE = -DXF86VIDMODE -endif - AM_CFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/include/GL/internal \ @@ -38,7 +34,6 @@ AM_CFLAGS = \ -I$(top_builddir)/src/mapi/glapi \ -I$(top_srcdir)/src/mapi/glapi \ $(VISIBILITY_CFLAGS) \ - $(EXTRA_DEFINES_XF86VIDMODE) \ -D_REENTRANT \ -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \ $(DEFINES) \ diff --git a/src/glx/SConscript b/src/glx/SConscript index 8ce17715814..051f55b7669 100644 --- a/src/glx/SConscript +++ b/src/glx/SConscript @@ -36,10 +36,7 @@ env.Prepend(LIBS = [ env.PkgUseModules('X11') env.PkgUseModules('XCB') env.PkgUseModules('DRM') - -if env['HAVE_XF86VIDMODE']: - env.Append(CPPDEFINES = ['XF86VIDMODE']) - env.PkgUseModules('XF86VIDMODE') +env.PkgUseModules('XF86VIDMODE') sources = [ 'clientattrib.c', diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 4db0228eaba..424008fd670 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -46,9 +46,9 @@ #include "util/debug.h" #else #include -#ifdef XF86VIDMODE +#ifndef GLX_USE_WINDOWSGL #include -#endif +#endif /* GLX_USE_WINDOWSGL */ #endif #endif @@ -2071,7 +2071,7 @@ _X_HIDDEN GLboolean __glxGetMscRate(struct glx_screen *psc, int32_t * numerator, int32_t * denominator) { -#ifdef XF86VIDMODE +#if !defined(GLX_USE_WINDOWSGL) XF86VidModeModeLine mode_line; int dot_clock; int i; @@ -2118,7 +2118,6 @@ __glxGetMscRate(struct glx_screen *psc, return True; } - else #endif return False; @@ -2145,7 +2144,7 @@ _X_HIDDEN GLboolean __glXGetMscRateOML(Display * dpy, GLXDrawable drawable, int32_t * numerator, int32_t * denominator) { -#if defined( GLX_DIRECT_RENDERING ) && defined( XF86VIDMODE ) +#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) && !defined(GLX_USE_WINDOWSGL) __GLXDRIdrawable *draw = GetGLXDRIDrawable(dpy, drawable); if (draw == NULL) diff --git a/src/glx/meson.build b/src/glx/meson.build index dd8ba60ad80..a61f959e800 100644 --- a/src/glx/meson.build +++ b/src/glx/meson.build @@ -137,10 +137,6 @@ gl_lib_cargs = [ '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), ] -if dep_xxf86vm.found() - gl_lib_cargs += '-DHAVE_XF86VIDMODE' -endif - libglx = static_library( 'glx', [files_libglx, glx_generated], @@ -154,26 +150,22 @@ libglx = static_library( extra_libs_libglx, ], dependencies : [dep_libdrm, dep_dri2proto, dep_glproto, dep_x11, dep_glvnd], - build_by_default : false, ) -if with_glx == 'dri' - libgl = shared_library( - gl_lib_name, - [], - include_directories : [inc_common, inc_glapi, inc_loader, inc_gl_internal], - link_with : [libglapi_static, libglapi], - link_whole : libglx, - link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl], - dependencies : [ - dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb, - dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, - extra_deps_libgl, - ], - version : gl_lib_version, - install : true, - ) -endif +libgl = shared_library( + gl_lib_name, + [], + link_with : [libglapi_static, libglapi], + link_whole : libglx, + link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl], + dependencies : [ + dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb, + dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, dep_xxf86vm, + extra_deps_libgl, + ], + version : gl_lib_version, + install : true, +) if with_tests subdir('tests') diff --git a/src/glx/tests/meson.build b/src/glx/tests/meson.build index fd9d4d433b2..e59b42d19a6 100644 --- a/src/glx/tests/meson.build +++ b/src/glx/tests/meson.build @@ -33,6 +33,11 @@ if with_shared_glapi files_glx_test += files('query_renderer_implementation_unittest.cpp') endif + test( + 'dispatch-index-check', + files('dispatch-index-check'), + suite : ['glx'], + ) test( 'glx-test', executable( @@ -41,9 +46,9 @@ if with_shared_glapi link_with : [libglx, libglapi], include_directories : [ inc_src, inc_include, inc_mesa, inc_mapi, inc_gl_internal, - include_directories('..'), + inc_glx, ], - dependencies : [dep_libdrm, dep_thread, idep_gtest] - ) + dependencies : [dep_libdrm, dep_glproto, dep_thread, idep_gtest] + ), ) endif diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk index 12cea6e5472..12bd8947e2e 100644 --- a/src/intel/Android.common.mk +++ b/src/intel/Android.common.mk @@ -38,7 +38,17 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa -LOCAL_SHARED_LIBRARIES := libexpat libz +LOCAL_SHARED_LIBRARIES := libz + +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +LOCAL_HEADER_LIBRARIES += liblog_headers +else +LOCAL_SHARED_LIBRARIES += \ + libexpat +endif LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk index c2b01221dfc..41af7b20b9c 100644 --- a/src/intel/Android.compiler.mk +++ b/src/intel/Android.compiler.mk @@ -28,7 +28,7 @@ # --------------------------------------- include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_intel_compiler LOCAL_MODULE_CLASS := STATIC_LIBRARIES diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk index cd2ed66a176..3011ee232ed 100644 --- a/src/intel/Android.dev.mk +++ b/src/intel/Android.dev.mk @@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi LOCAL_SRC_FILES := $(DEV_FILES) +LOCAL_CFLAGS := \ + -Wno-gnu-variable-sized-type-not-at-end + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index db81fada277..f4fda3f86ce 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -38,7 +38,10 @@ VULKAN_COMMON_INCLUDES := \ $(MESA_TOP)/src/intel \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/intel/vulkan \ - frameworks/native/vulkan/include + frameworks/native/vulkan/include \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include # libmesa_anv_entrypoints with header and dummy.c # @@ -72,7 +75,9 @@ $(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(intermediates) -LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_SHARED_LIBRARIES := libdrm_pri + +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -84,7 +89,7 @@ ANV_INCLUDES := \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_common,,)/vulkan \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util -ANV_SHARED_LIBRARIES := libdrm +ANV_SHARED_LIBRARIES := libdrm_pri ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),) ANV_SHARED_LIBRARIES += libnativewindow @@ -107,6 +112,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -120,13 +127,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN75_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=75 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -140,13 +149,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN8_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=80 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -160,13 +171,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN9_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=90 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -180,13 +193,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN10_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=100 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -207,6 +222,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -217,7 +234,7 @@ include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libmesa_vulkan_common LOCAL_MODULE_CLASS := STATIC_LIBRARIES - +LOCAL_CFLAGS += -Wno-error intermediates := $(call local-generated-sources-dir) LOCAL_SRC_FILES := $(VULKAN_FILES) @@ -261,6 +278,8 @@ $(intermediates)/vulkan/anv_extensions.h: LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -310,5 +329,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/intel/Makefile.compiler.am b/src/intel/Makefile.compiler.am index cd7e6882fb9..7c33e35816b 100644 --- a/src/intel/Makefile.compiler.am +++ b/src/intel/Makefile.compiler.am @@ -64,6 +64,7 @@ COMPILER_TESTS = \ compiler/test_vf_float_conversions \ compiler/test_vec4_cmod_propagation \ compiler/test_vec4_copy_propagation \ + compiler/test_vec4_dead_code_eliminate \ compiler/test_vec4_register_coalesce TESTS += $(COMPILER_TESTS) @@ -97,6 +98,10 @@ compiler_test_vec4_cmod_propagation_SOURCES = \ compiler/test_vec4_cmod_propagation.cpp compiler_test_vec4_cmod_propagation_LDADD = $(TEST_LIBS) +compiler_test_vec4_dead_code_eliminate_SOURCES = \ + compiler/test_vec4_dead_code_eliminate.cpp +compiler_test_vec4_dead_code_eliminate_LDADD = $(TEST_LIBS) + # Strictly speaking this is neither a C++ test nor using gtest - we can address # address that at a later point. Until then, this allows us a to simplify things. compiler_test_eu_compact_SOURCES = \ diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 065980616ec..01bea99d3d8 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1326,7 +1326,7 @@ blorp_emit_memcpy(struct blorp_batch *batch, static void blorp_emit_surface_state(struct blorp_batch *batch, const struct brw_blorp_surface_info *surface, - enum isl_aux_op op, + enum isl_aux_op aux_op, void *state, uint32_t state_offset, const bool color_write_disables[4], bool is_render_target) @@ -1382,7 +1382,7 @@ blorp_emit_surface_state(struct blorp_batch *batch, surface->aux_addr, *aux_addr); } - if (surface->clear_color_addr.buffer) { + if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) { #if GEN_GEN >= 10 assert((surface->clear_color_addr.offset & 0x3f) == 0); uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset; @@ -1390,7 +1390,10 @@ blorp_emit_surface_state(struct blorp_batch *batch, isl_dev->ss.clear_color_state_offset, surface->clear_color_addr, *clear_addr); #elif GEN_GEN >= 7 - if (op == ISL_AUX_OP_FULL_RESOLVE || op == ISL_AUX_OP_PARTIAL_RESOLVE) { + /* Fast clears just whack the AUX surface and don't actually use the + * clear color for anything. We can avoid the MI memcpy on that case. + */ + if (aux_op != ISL_AUX_OP_FAST_CLEAR) { struct blorp_address dst_addr = blorp_get_surface_base_address(batch); dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c index 63f04627572..36ee7706e40 100644 --- a/src/intel/common/gen_batch_decoder.c +++ b/src/intel/common/gen_batch_decoder.c @@ -214,7 +214,7 @@ handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p) surface_modify = iter.raw_value; } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) { dynamic_modify = iter.raw_value; - } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) { + } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) { instruction_modify = iter.raw_value; } } diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index a978f2f5818..8990d208207 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = { { "nohiz", DEBUG_NO_HIZ }, { "color", DEBUG_COLOR }, { "reemit", DEBUG_REEMIT }, + { "heur32", DEBUG_HEUR32 }, { NULL, 0 } }; diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h index 72d7ca20a39..c2ca2e2ebd6 100644 --- a/src/intel/common/gen_debug.h +++ b/src/intel/common/gen_debug.h @@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_NO_HIZ (1ull << 39) #define DEBUG_COLOR (1ull << 40) #define DEBUG_REEMIT (1ull << 41) +#define DEBUG_HEUR32 (1ull << 42) /* These flags are not compatible with the disk shader cache */ #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME @@ -90,7 +91,7 @@ extern uint64_t INTEL_DEBUG; /* These flags may affect program generation */ #define DEBUG_DISK_CACHE_MASK \ (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \ - DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32) + DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_HEUR32) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index d8c9499065f..785acdb3343 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -38,6 +38,15 @@ struct ra_regs; struct nir_shader; struct brw_program; +struct brw_simd32_heuristics_control { + bool grouped_sends_check; + int max_grouped_sends; + bool inst_count_check; + float inst_count_ratio; + bool mrt_check; + int max_mrts; +}; + struct brw_compiler { const struct gen_device_info *devinfo; @@ -118,6 +127,8 @@ struct brw_compiler { * whether nir_opt_large_constants will be run. */ bool supports_shader_constants; + + struct brw_simd32_heuristics_control simd32_heuristics_control; }; /** diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 4630b83b1a0..2618e9c2e93 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -925,8 +925,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) const struct gen_device_info *devinfo = p->devinfo; /* When converting F->DF on IVB/BYT, every odd source channel is ignored. - * To avoid the problems that causes, we use a <1,2,0> source region to read - * each element twice. + * To avoid the problems that causes, we use an source region to + * read each element twice. */ if (devinfo->gen == 7 && !devinfo->is_haswell && brw_get_default_access_mode(p) == BRW_ALIGN_1 && @@ -935,11 +935,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD) && !has_scalar_region(src0)) { - assert(src0.vstride == BRW_VERTICAL_STRIDE_4 && - src0.width == BRW_WIDTH_4 && - src0.hstride == BRW_HORIZONTAL_STRIDE_1); - - src0.vstride = BRW_VERTICAL_STRIDE_1; + assert(src0.vstride == src0.width + src0.hstride); + src0.vstride = src0.hstride; src0.width = BRW_WIDTH_2; src0.hstride = BRW_HORIZONTAL_STRIDE_0; } diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3e083723471..6826226e209 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -315,6 +315,24 @@ fs_inst::has_source_and_destination_hazard() const * may stomp all over it. */ return true; + case SHADER_OPCODE_QUAD_SWIZZLE: + switch (src[1].ud) { + case BRW_SWIZZLE_XXXX: + case BRW_SWIZZLE_YYYY: + case BRW_SWIZZLE_ZZZZ: + case BRW_SWIZZLE_WWWW: + case BRW_SWIZZLE_XXZZ: + case BRW_SWIZZLE_YYWW: + case BRW_SWIZZLE_XYXY: + case BRW_SWIZZLE_ZWZW: + /* These can be implemented as a single Align1 region on all + * platforms, so there's never a hazard between source and + * destination. C.f. fs_generator::generate_quad_swizzle(). + */ + return false; + default: + return !is_uniform(src[0]); + } default: /* The SIMD16 compressed instruction * @@ -3853,6 +3871,9 @@ fs_visitor::lower_integer_multiplication() high.offset = inst->dst.offset % REG_SIZE; if (devinfo->gen >= 7) { + if (inst->src[1].abs) + lower_src_modifiers(this, block, inst, 1); + if (inst->src[1].file == IMM) { ibld.MUL(low, inst->src[0], brw_imm_uw(inst->src[1].ud & 0xffff)); @@ -3865,6 +3886,9 @@ fs_visitor::lower_integer_multiplication() subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1)); } } else { + if (inst->src[0].abs) + lower_src_modifiers(this, block, inst, 0); + ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0), inst->src[1]); ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1), @@ -3882,6 +3906,18 @@ fs_visitor::lower_integer_multiplication() } } else if (inst->opcode == SHADER_OPCODE_MULH) { + /* According to the BDW+ BSpec page for the "Multiply Accumulate + * High" instruction: + * + * "An added preliminary mov is required for source modification on + * src1: + * mov (8) r3.0<1>:d -r3<8;8,1>:d + * mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw + * mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d" + */ + if (devinfo->gen >= 8 && (inst->src[1].negate || inst->src[1].abs)) + lower_src_modifiers(this, block, inst, 1); + /* Should have been lowered to 8-wide. */ assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst)); const fs_reg acc = retype(brw_acc_reg(inst->exec_size), @@ -3897,8 +3933,6 @@ fs_visitor::lower_integer_multiplication() * On Gen8, the multiply instruction does a full 32x32-bit * multiply, but in order to do a 64-bit multiply we can simulate * the previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. */ assert(mul->src[1].type == BRW_REGISTER_TYPE_D || mul->src[1].type == BRW_REGISTER_TYPE_UD); @@ -5534,9 +5568,14 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: return MIN2(8, inst->exec_size); - case SHADER_OPCODE_QUAD_SWIZZLE: - return 8; - + case SHADER_OPCODE_QUAD_SWIZZLE: { + const unsigned swiz = inst->src[1].ud; + return (is_uniform(inst->src[0]) ? + get_fpu_lowered_simd_width(devinfo, inst) : + devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ? 8 : + swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 : + get_fpu_lowered_simd_width(devinfo, inst)); + } case SHADER_OPCODE_MOV_INDIRECT: { /* From IVB and HSW PRMs: * @@ -5601,8 +5640,10 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i) static fs_reg emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i) { + assert(lbld.group() >= inst->group); + /* Specified channel group from the source region. */ - const fs_reg src = horiz_offset(inst->src[i], lbld.group()); + const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group); if (needs_src_copy(lbld, inst, i)) { /* Builder of the right width to perform the copy avoiding uninitialized @@ -5691,9 +5732,10 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after, { assert(lbld_before.dispatch_width() == lbld_after.dispatch_width()); assert(lbld_before.group() == lbld_after.group()); + assert(lbld_after.group() >= inst->group); /* Specified channel group from the destination region. */ - const fs_reg dst = horiz_offset(inst->dst, lbld_after.group()); + const fs_reg dst = horiz_offset(inst->dst, lbld_after.group() - inst->group); const unsigned dst_size = inst->size_written / inst->dst.component_size(inst->exec_size); @@ -7127,6 +7169,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; + bool simd16_failed = false; + bool simd16_spilled = false; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); @@ -7194,10 +7238,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, shader_time_index16); v16.import_uniforms(&v8); if (!v16.run_fs(allow_spilling, use_rep_send)) { + simd16_failed = true; compiler->shader_perf_log(log_data, "SIMD16 shader failed to compile: %s", v16.fail_msg); } else { + simd16_spilled = v16.spilled_any_registers; simd16_cfg = v16.cfg; prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used); @@ -7205,9 +7251,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } /* Currently, the compiler only supports SIMD32 on SNB+ */ + const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control; + uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0; + if (v8.max_dispatch_width >= 32 && !use_rep_send && compiler->devinfo->gen >= 6 && - unlikely(INTEL_DEBUG & DEBUG_DO32)) { + (unlikely(INTEL_DEBUG & DEBUG_DO32) || + (unlikely(INTEL_DEBUG & DEBUG_HEUR32) && + !simd16_failed && !simd16_spilled && + (!ctrl->mrt_check || + (ctrl->mrt_check && + u_count_bits64(&mrts) <= ctrl->max_mrts))))) { /* Try a SIMD32 compile */ fs_visitor v32(compiler, log_data, mem_ctx, key, &prog_data->base, prog, shader, 32, @@ -7218,9 +7272,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, "SIMD32 shader failed to compile: %s", v32.fail_msg); } else { - simd32_cfg = v32.cfg; - prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; - prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) || + v32.run_heuristic(ctrl)) { + simd32_cfg = v32.cfg; + prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; + prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + } } } @@ -7299,13 +7356,49 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } if (simd32_cfg) { - prog_data->dispatch_32 = true; - prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32); + uint32_t offset = g.generate_code(simd32_cfg, 32); + + if (unlikely(INTEL_DEBUG & DEBUG_DO32) || + (unlikely(INTEL_DEBUG & DEBUG_HEUR32) && + (!simd16_cfg || + (simd16_cfg && + (!ctrl->inst_count_check || + (ctrl->inst_count_check && + (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) { + prog_data->dispatch_32 = true; + prog_data->prog_offset_32 = offset; + } } return g.get_assembly(); } +bool +fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) { + int grouped_sends = 0; + int max_grouped_sends = 0; + bool pass = true; + + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= SHADER_OPCODE_SAMPLEINFO_LOGICAL) { + ++grouped_sends; + } else if (grouped_sends > 0) { + if (grouped_sends > max_grouped_sends) { + max_grouped_sends = grouped_sends; + } + grouped_sends = 0; + } + } + + if (ctrl->grouped_sends_check) { + if (max_grouped_sends > ctrl->max_grouped_sends) { + pass = false; + } + } + + return pass; +} + fs_reg * fs_visitor::emit_cs_work_group_id_setup() { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index aba19d5ab2c..26868c10107 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -286,6 +286,8 @@ class fs_visitor : public backend_shader void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); + bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl); + const void *const key; const struct brw_sampler_prog_key_data *key_tex; @@ -397,6 +399,7 @@ class fs_generator void enable_debug(const char *shader_name); int generate_code(const cfg_t *cfg, int dispatch_width); + int get_inst_count(int dispatch_width); const unsigned *get_assembly(); private: @@ -478,6 +481,10 @@ class fs_generator struct brw_reg src, struct brw_reg idx); + void generate_quad_swizzle(const fs_inst *inst, + struct brw_reg dst, struct brw_reg src, + unsigned swiz); + bool patch_discard_jumps_to_fb_writes(); const struct brw_compiler *compiler; @@ -489,6 +496,7 @@ class fs_generator struct brw_stage_prog_data * const prog_data; unsigned dispatch_width; /**< 8, 16 or 32 */ + int inst_count[3]; /* for 8, 16 and 32 */ exec_list discard_halt_patches; unsigned promoted_constants; @@ -529,6 +537,25 @@ namespace brw { return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); } } + + /** + * Remove any modifiers from the \p i-th source region of the instruction, + * including negate, abs and any implicit type conversion to the execution + * type. Instead any source modifiers will be implemented as a separate + * MOV instruction prior to the original instruction. + */ + inline bool + lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) + { + assert(inst->components_read(i) == 1); + const fs_builder ibld(v, block, inst); + const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); + + ibld.MOV(tmp, inst->src[i]); + inst->src[i] = tmp; + + return true; + } } void shuffle_from_32bit_read(const brw::fs_builder &bld, diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index ab34b63748e..a76e0f3a6b5 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, if (stride > 4) return false; + /* Bail if the channels of the source need to be aligned to the byte offset + * of the corresponding channel of the destination, and the provided stride + * would break this restriction. + */ + if (has_dst_aligned_region_restriction(devinfo, inst) && + !(type_sz(inst->src[arg].type) * stride == + type_sz(inst->dst.type) * inst->dst.stride || + stride == 0)) + return false; + /* 3-source instructions can only be Align16, which restricts what strides * they can take. They can only take a stride of 1 (the usual case), or 0 * with a special "repctrl" bit. But the repctrl bit doesn't work for diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 08dd83dded7..4c452e1c38a 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -582,6 +582,72 @@ fs_generator::generate_shuffle(fs_inst *inst, } } +void +fs_generator::generate_quad_swizzle(const fs_inst *inst, + struct brw_reg dst, struct brw_reg src, + unsigned swiz) +{ + /* Requires a quad. */ + assert(inst->exec_size >= 4); + + if (src.file == BRW_IMMEDIATE_VALUE || + has_scalar_region(src)) { + /* The value is uniform across all channels */ + brw_MOV(p, dst, src); + + } else if (devinfo->gen < 11 && type_sz(src.type) == 4) { + /* This only works on 8-wide 32-bit values */ + assert(inst->exec_size == 8); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.vstride == src.width + 1); + brw_set_default_access_mode(p, BRW_ALIGN_16); + struct brw_reg swiz_src = stride(src, 4, 4, 1); + swiz_src.swizzle = swiz; + brw_MOV(p, dst, swiz_src); + + } else { + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.vstride == src.width + 1); + const struct brw_reg src_0 = suboffset(src, BRW_GET_SWZ(swiz, 0)); + + switch (swiz) { + case BRW_SWIZZLE_XXXX: + case BRW_SWIZZLE_YYYY: + case BRW_SWIZZLE_ZZZZ: + case BRW_SWIZZLE_WWWW: + brw_MOV(p, dst, stride(src_0, 4, 4, 0)); + break; + + case BRW_SWIZZLE_XXZZ: + case BRW_SWIZZLE_YYWW: + brw_MOV(p, dst, stride(src_0, 2, 2, 0)); + break; + + case BRW_SWIZZLE_XYXY: + case BRW_SWIZZLE_ZWZW: + assert(inst->exec_size == 4); + brw_MOV(p, dst, stride(src_0, 0, 2, 1)); + break; + + default: + assert(inst->force_writemask_all); + brw_set_default_exec_size(p, cvt(inst->exec_size / 4) - 1); + + for (unsigned c = 0; c < 4; c++) { + brw_inst *insn = brw_MOV( + p, stride(suboffset(dst, c), + 4 * inst->dst.stride, 1, 4 * inst->dst.stride), + stride(suboffset(src, BRW_GET_SWZ(swiz, c)), 4, 1, 0)); + + brw_inst_set_no_dd_clear(devinfo, insn, c < 3); + brw_inst_set_no_dd_check(devinfo, insn, c > 0); + } + + break; + } + } +} + void fs_generator::generate_urb_read(fs_inst *inst, struct brw_reg dst, @@ -2303,23 +2369,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_QUAD_SWIZZLE: - /* This only works on 8-wide 32-bit values */ - assert(inst->exec_size == 8); - assert(type_sz(src[0].type) == 4); - assert(inst->force_writemask_all); assert(src[1].file == BRW_IMMEDIATE_VALUE); assert(src[1].type == BRW_REGISTER_TYPE_UD); - - if (src[0].file == BRW_IMMEDIATE_VALUE || - (src[0].vstride == 0 && src[0].hstride == 0)) { - /* The value is uniform across all channels */ - brw_MOV(p, dst, src[0]); - } else { - brw_set_default_access_mode(p, BRW_ALIGN_16); - struct brw_reg swiz_src = stride(src[0], 4, 4, 1); - swiz_src.swizzle = inst->src[1].ud; - brw_MOV(p, dst, swiz_src); - } + generate_quad_swizzle(inst, dst, src[0], src[1].ud); break; case SHADER_OPCODE_CLUSTER_BROADCAST: { @@ -2486,6 +2538,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count, promoted_constants, before_size, after_size); + inst_count[ffs(dispatch_width) - 4] = before_size / 16; + return start_offset; } @@ -2494,3 +2548,13 @@ fs_generator::get_assembly() { return brw_get_program(p, &prog_data->program_size); } + +int +fs_generator::get_inst_count(int dispatch_width) +{ + if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) { + return inst_count[ffs(dispatch_width) - 4]; + } else { + return 0; + } +} \ No newline at end of file diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c845d87d59b..c33394d10d4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4804,7 +4804,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_begin_fragment_shader_ordering: case nir_intrinsic_begin_invocation_interlock: { const fs_builder ubld = bld.group(8, 0); const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 07e7224e0f8..95b069a2e02 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst) return type_sz(get_exec_type(inst)); } +/** + * Return whether the following regioning restriction applies to the specified + * instruction. From the Cherryview PRM Vol 7. "Register Region + * Restrictions": + * + * "When source or destination datatype is 64b or operation is integer DWord + * multiply, regioning in Align1 must follow these rules: + * + * 1. Source and Destination horizontal stride must be aligned to the same qword. + * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. + * 3. Source and Destination offset must be the same, except the case of + * scalar source." + */ +static inline bool +has_dst_aligned_region_restriction(const gen_device_info *devinfo, + const fs_inst *inst) +{ + const brw_reg_type exec_type = get_exec_type(inst); + const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) && + (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD); + + if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 || + (type_sz(exec_type) == 4 && is_int_multiply)) + return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); + else + return false; +} + #endif diff --git a/src/intel/compiler/brw_nir_opt_peephole_ffma.c b/src/intel/compiler/brw_nir_opt_peephole_ffma.c index cc225e1847b..7271bdbca43 100644 --- a/src/intel/compiler/brw_nir_opt_peephole_ffma.c +++ b/src/intel/compiler/brw_nir_opt_peephole_ffma.c @@ -68,7 +68,7 @@ are_all_uses_fadd(nir_ssa_def *def) } static nir_alu_instr * -get_mul_for_src(nir_alu_src *src, int num_components, +get_mul_for_src(nir_alu_src *src, unsigned num_components, uint8_t swizzle[4], bool *negate, bool *abs) { uint8_t swizzle_tmp[4]; @@ -93,16 +93,19 @@ get_mul_for_src(nir_alu_src *src, int num_components, switch (alu->op) { case nir_op_imov: case nir_op_fmov: - alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, + swizzle, negate, abs); break; case nir_op_fneg: - alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, + swizzle, negate, abs); *negate = !*negate; break; case nir_op_fabs: - alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs); + alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components, + swizzle, negate, abs); *negate = false; *abs = true; break; diff --git a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp index c09a3d7ebe9..99e4c9cacaf 100644 --- a/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_vec4_dead_code_eliminate.cpp @@ -81,17 +81,46 @@ vec4_visitor::dead_code_eliminate() result_live[3] = result; } - for (int c = 0; c < 4; c++) { - if (!result_live[c] && inst->dst.writemask & (1 << c)) { - inst->dst.writemask &= ~(1 << c); + if (inst->writes_flag()) { + /* Independently calculate the usage of the flag components and + * the destination value components. + */ + uint8_t flag_mask = inst->dst.writemask; + uint8_t dest_mask = inst->dst.writemask; + + for (int c = 0; c < 4; c++) { + if (!result_live[c] && dest_mask & (1 << c)) + dest_mask &= ~(1 << c); + + if (!BITSET_TEST(flag_live, c)) + flag_mask &= ~(1 << c); + } + + if (inst->dst.writemask != (flag_mask | dest_mask)) { progress = true; + inst->dst.writemask = flag_mask | dest_mask; + } - if (inst->dst.writemask == 0) { - if (inst->writes_accumulator || inst->writes_flag()) { - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->opcode = BRW_OPCODE_NOP; - break; + /* If none of the destination components are read, replace the + * destination register with the NULL register. + */ + if (dest_mask == 0) { + progress = true; + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); + } + } else { + for (int c = 0; c < 4; c++) { + if (!result_live[c] && inst->dst.writemask & (1 << c)) { + inst->dst.writemask &= ~(1 << c); + progress = true; + + if (inst->dst.writemask == 0) { + if (inst->writes_accumulator) { + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); + } else { + inst->opcode = BRW_OPCODE_NOP; + break; + } } } } diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 3cdeb6214a8..f2854be779a 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -145,7 +145,8 @@ if with_tests foreach t : ['fs_cmod_propagation', 'fs_copy_propagation', 'fs_saturate_propagation', 'vf_float_conversions', 'vec4_register_coalesce', 'vec4_copy_propagation', - 'vec4_cmod_propagation', 'eu_compact', 'eu_validate'] + 'vec4_cmod_propagation', 'vec4_dead_code_eliminate', + 'eu_compact', 'eu_validate'] test( t, executable( diff --git a/src/intel/compiler/test_vec4_dead_code_eliminate.cpp b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp new file mode 100644 index 00000000000..25739c2895a --- /dev/null +++ b/src/intel/compiler/test_vec4_dead_code_eliminate.cpp @@ -0,0 +1,163 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "brw_vec4.h" +#include "program/program.h" + +using namespace brw; + +class dead_code_eliminate_test : public ::testing::Test { + virtual void SetUp(); + +public: + struct brw_compiler *compiler; + struct gen_device_info *devinfo; + struct gl_context *ctx; + struct gl_shader_program *shader_prog; + struct brw_vue_prog_data *prog_data; + vec4_visitor *v; +}; + +class dead_code_eliminate_vec4_visitor : public vec4_visitor +{ +public: + dead_code_eliminate_vec4_visitor(struct brw_compiler *compiler, + nir_shader *shader, + struct brw_vue_prog_data *prog_data) + : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL, + false /* no_spills */, -1) + { + prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + } + +protected: + virtual dst_reg *make_reg_for_system_value(int /* location */) + { + unreachable("Not reached"); + } + + virtual void setup_payload() + { + unreachable("Not reached"); + } + + virtual void emit_prolog() + { + unreachable("Not reached"); + } + + virtual void emit_thread_end() + { + unreachable("Not reached"); + } + + virtual void emit_urb_write_header(int /* mrf */) + { + unreachable("Not reached"); + } + + virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) + { + unreachable("Not reached"); + } +}; + + +void dead_code_eliminate_test::SetUp() +{ + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo)); + prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data)); + compiler->devinfo = devinfo; + + nir_shader *shader = + nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL, NULL); + + v = new dead_code_eliminate_vec4_visitor(compiler, shader, prog_data); + + devinfo->gen = 4; +} + +static void +dead_code_eliminate(vec4_visitor *v) +{ + bool print = false; + + if (print) { + fprintf(stderr, "instructions before:\n"); + v->dump_instructions(); + } + + v->calculate_cfg(); + v->dead_code_eliminate(); + + if (print) { + fprintf(stderr, "instructions after:\n"); + v->dump_instructions(); + } +} + +TEST_F(dead_code_eliminate_test, some_dead_channels_all_flags_used) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + src_reg r1 = src_reg(v, glsl_type::vec4_type); + src_reg r2 = src_reg(v, glsl_type::vec4_type); + src_reg r3 = src_reg(v, glsl_type::vec4_type); + src_reg r4 = src_reg(v, glsl_type::vec4_type); + src_reg r5 = src_reg(v, glsl_type::vec4_type); + src_reg r6 = src_reg(v, glsl_type::vec4_type); + + /* Sequence like the following should not be modified by DCE. + * + * cmp.l.f0(8) g4<1>F g2<4,4,1>.wF g1<4,4,1>.xF + * mov(8) g5<1>.xF g4<4,4,1>.xF + * (+f0.x) sel(8) g6<1>UD g3<4>UD g6<4>UD + */ + vec4_instruction *test_cmp = + bld.CMP(dst_reg(r4), r2, r1, BRW_CONDITIONAL_L); + + test_cmp->src[0].swizzle = BRW_SWIZZLE_WWWW; + test_cmp->src[1].swizzle = BRW_SWIZZLE_XXXX; + + vec4_instruction *test_mov = + bld.MOV(dst_reg(r5), r4); + + test_mov->dst.writemask = WRITEMASK_X; + test_mov->src[0].swizzle = BRW_SWIZZLE_XXXX; + + vec4_instruction *test_sel = + bld.SEL(dst_reg(r6), r3, r6); + + set_predicate(BRW_PREDICATE_NORMAL, test_sel); + + /* The scratch write is here just to make r5 and r6 be live so that the + * whole program doesn't get eliminated by DCE. + */ + v->emit(v->SCRATCH_WRITE(dst_reg(r4), r6, r5)); + + dead_code_eliminate(v); + + EXPECT_EQ(test_cmp->dst.writemask, WRITEMASK_XYZW); +} diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index abd5da297d6..acded759335 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -3553,6 +3553,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml index c69d7dc89c2..d39bf09a5d7 100644 --- a/src/intel/genxml/gen11.xml +++ b/src/intel/genxml/gen11.xml @@ -3551,6 +3551,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index ca268254503..b7ce3095ab4 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -3491,6 +3491,14 @@ + + + + + + + + diff --git a/src/intel/tools/aubinator_viewer_decoder.cpp b/src/intel/tools/aubinator_viewer_decoder.cpp index 5311a8afc31..59cde530409 100644 --- a/src/intel/tools/aubinator_viewer_decoder.cpp +++ b/src/intel/tools/aubinator_viewer_decoder.cpp @@ -172,7 +172,7 @@ handle_state_base_address(struct aub_viewer_decode_ctx *ctx, surface_modify = iter.raw_value; } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) { dynamic_modify = iter.raw_value; - } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) { + } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) { instruction_modify = iter.raw_value; } } diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index 46c41d57861..4720095c6cd 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -128,7 +128,7 @@ anv_image_from_gralloc(VkDevice device_h, */ int dma_buf = gralloc_info->handle->data[0]; - uint64_t bo_flags = 0; + uint64_t bo_flags = ANV_BO_EXTERNAL; if (device->instance->physicalDevice.supports_48bit_addresses) bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; if (device->instance->physicalDevice.use_softpin) @@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( *grallocUsage = 0; intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage); - /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. * The relevant code in libvulkan/swapchain.cpp contains this fun comment: * @@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. */ - const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { + VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .format = format, .type = VK_IMAGE_TYPE_2D, @@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( .usage = imageUsage, }; + /* Android P and earlier doesn't check if the physical device supports a + * given format and usage combination before calling this function. Omit the + * storage requirement to make the tests pass. + */ +#if ANDROID_API_LEVEL <= 28 + if (format == VK_FORMAT_R8G8B8A8_SRGB || + format == VK_FORMAT_R5G6B5_UNORM_PACK16) { + image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT; + } +#endif + VkImageFormatProperties2KHR image_format_props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, }; @@ -268,19 +279,13 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( "inside %s", __func__); } - /* Reject STORAGE here to avoid complexity elsewhere. */ - if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) { - return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED, - "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc " - "swapchain"); - } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_RENDER; if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 478b8e7a3db..acc9a22c484 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1658,6 +1658,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, void anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op mcs_op, union isl_color_value *clear_value, @@ -1713,12 +1714,12 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, switch (mcs_op) { case ISL_AUX_OP_FAST_CLEAR: - blorp_fast_clear(&batch, &surf, surf.surf->format, + blorp_fast_clear(&batch, &surf, format, 0, base_layer, layer_count, 0, 0, image->extent.width, image->extent.height); break; case ISL_AUX_OP_PARTIAL_RESOLVE: - blorp_mcs_partial_resolve(&batch, &surf, surf.surf->format, + blorp_mcs_partial_resolve(&batch, &surf, format, base_layer, layer_count); break; case ISL_AUX_OP_FULL_RESOLVE: @@ -1736,6 +1737,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t level, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op ccs_op, union isl_color_value *clear_value, @@ -1799,14 +1801,14 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, switch (ccs_op) { case ISL_AUX_OP_FAST_CLEAR: - blorp_fast_clear(&batch, &surf, surf.surf->format, + blorp_fast_clear(&batch, &surf, format, level, base_layer, layer_count, 0, 0, level_width, level_height); break; case ISL_AUX_OP_FULL_RESOLVE: case ISL_AUX_OP_PARTIAL_RESOLVE: blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count, - surf.surf->format, ccs_op); + format, ccs_op); break; case ISL_AUX_OP_AMBIGUATE: for (uint32_t a = 0; a < layer_count; a++) { diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index ee35e013329..924470b3005 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -636,7 +636,7 @@ VkResult anv_CreateInstance( } if (instance->app_info.api_version == 0) - anv_EnumerateInstanceVersion(&instance->app_info.api_version); + instance->app_info.api_version = VK_API_VERSION_1_0; instance->enabled_extensions = enabled_extensions; diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index e9afe06bb13..dd7111b58e1 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -71,9 +71,9 @@ def __init__(self, version, enable): EXTENSIONS = [ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'), Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'), - Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8'), + Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8 && !ANDROID'), Extension('VK_KHR_bind_memory2', 1, True), - Extension('VK_KHR_create_renderpass2', 1, True), + Extension('VK_KHR_create_renderpass2', 1, '!ANDROID'), Extension('VK_KHR_dedicated_allocation', 1, True), Extension('VK_KHR_descriptor_update_template', 1, True), Extension('VK_KHR_device_group', 1, True), @@ -121,7 +121,7 @@ def __init__(self, version, enable): Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_global_priority', 1, 'device->has_context_priority'), - Extension('VK_EXT_pci_bus_info', 1, True), + Extension('VK_EXT_pci_bus_info', 1, False), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen >= 9'), Extension('VK_EXT_vertex_attribute_divisor', 3, True), diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index ed1bc096c66..f6b8ded20a9 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL( .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, .flags = 0, }}, pAllocator, &image_h); diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 6868288e486..60c196e5c4f 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -171,6 +171,8 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, { nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&intrin->instr); + /* For us, the resource indices are just indices into the binding table and * array elements are sequential. A resource_reindex just turns into an * add of the two indices. diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index ad0f08253e7..f170366d030 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -446,6 +446,9 @@ anv_pipeline_hash_graphics(struct anv_pipeline *pipeline, if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + const bool rba = pipeline->device->robust_buffer_access; + _mesa_sha1_update(&ctx, &rba, sizeof(rba)); + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { if (stages[s].entrypoint) anv_pipeline_hash_shader(&ctx, &stages[s]); @@ -466,6 +469,9 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline, if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + const bool rba = pipeline->device->robust_buffer_access; + _mesa_sha1_update(&ctx, &rba, sizeof(rba)); + anv_pipeline_hash_shader(&ctx, stage); _mesa_sha1_final(&ctx, sha1_out); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d8a08d9d67f..37c710ad09a 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1747,6 +1747,13 @@ enum anv_pipe_bits { * we would have to CS stall on every flush which could be bad. */ ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21), + + /* This bit does not exist directly in PIPE_CONTROL. It means that render + * target operations are ongoing. Some operations like copies on the + * command streamer might need to be aware of this to trigger the + * appropriate stall before they can proceed with the copy. + */ + ANV_PIPE_RENDER_TARGET_WRITES = (1 << 22), }; #define ANV_PIPE_FLUSH_BITS ( \ @@ -2950,6 +2957,7 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, void anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op mcs_op, union isl_color_value *clear_value, @@ -2957,6 +2965,7 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, void anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t level, uint32_t base_layer, uint32_t layer_count, enum isl_aux_op ccs_op, union isl_color_value *clear_value, diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 2035017ce0e..c573e890946 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -263,4 +263,5 @@ genX(blorp_exec)(struct blorp_batch *batch, cmd_buffer->state.gfx.vb_dirty = ~0; cmd_buffer->state.gfx.dirty = ~0; cmd_buffer->state.push_constants_dirty = ~0; + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 43a02f22567..dcf37654954 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -737,6 +737,7 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, static void anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t level, uint32_t array_layer, enum isl_aux_op resolve_op, @@ -761,13 +762,14 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) resolve_op = ISL_AUX_OP_FULL_RESOLVE; - anv_image_ccs_op(cmd_buffer, image, aspect, level, + anv_image_ccs_op(cmd_buffer, image, format, aspect, level, array_layer, 1, resolve_op, NULL, true); } static void anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + enum isl_format format, VkImageAspectFlagBits aspect, uint32_t array_layer, enum isl_aux_op resolve_op, @@ -781,7 +783,7 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer, aspect, 0, array_layer, resolve_op, fast_clear_supported); - anv_image_mcs_op(cmd_buffer, image, aspect, + anv_image_mcs_op(cmd_buffer, image, format, aspect, array_layer, 1, resolve_op, NULL, true); #else unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail"); @@ -1037,8 +1039,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, uint32_t level_layer_count = MIN2(layer_count, aux_layers - base_layer); - anv_image_ccs_op(cmd_buffer, image, aspect, level, - base_layer, level_layer_count, + anv_image_ccs_op(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, level, base_layer, level_layer_count, ISL_AUX_OP_AMBIGUATE, NULL, false); if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) { @@ -1055,8 +1058,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, } assert(base_level == 0 && level_count == 1); - anv_image_mcs_op(cmd_buffer, image, aspect, - base_layer, layer_count, + anv_image_mcs_op(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, base_layer, layer_count, ISL_AUX_OP_FAST_CLEAR, NULL, false); } return; @@ -1133,12 +1137,22 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, for (uint32_t a = 0; a < level_layer_count; a++) { uint32_t array_layer = base_layer + a; if (image->samples == 1) { - anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect, - level, array_layer, resolve_op, + anv_cmd_predicated_ccs_resolve(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, level, array_layer, resolve_op, final_fast_clear); } else { - anv_cmd_predicated_mcs_resolve(cmd_buffer, image, aspect, - array_layer, resolve_op, + /* We only support fast-clear on the first layer so partial + * resolves should not be used on other layers as they will use + * the clear color stored in memory that is only valid for layer0. + */ + if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE && + array_layer != 0) + continue; + + anv_cmd_predicated_mcs_resolve(cmd_buffer, image, + image->planes[plane].surface.isl.format, + aspect, array_layer, resolve_op, final_fast_clear); } } @@ -1758,6 +1772,12 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) pipe.StallAtPixelScoreboard = true; } + /* If a render target flush was emitted, then we can toggle off the bit + * saying that render target writes are ongoing. + */ + if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT) + bits &= ~(ANV_PIPE_RENDER_TARGET_WRITES); + bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT); } @@ -2769,6 +2789,8 @@ void genX(CmdDraw)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = 0; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } void genX(CmdDrawIndexed)( @@ -2808,6 +2830,8 @@ void genX(CmdDrawIndexed)( prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = vertexOffset; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } /* Auto-Draw / Indirect Registers */ @@ -2941,6 +2965,8 @@ void genX(CmdDrawIndirect)( offset += stride; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } void genX(CmdDrawIndexedIndirect)( @@ -2980,6 +3006,8 @@ void genX(CmdDrawIndexedIndirect)( offset += stride; } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } static VkResult @@ -3645,12 +3673,16 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, union isl_color_value clear_color = {}; anv_clear_color_from_att_state(&clear_color, att_state, iview); if (iview->image->samples == 1) { - anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + anv_image_ccs_op(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, &clear_color, false); } else { - anv_image_mcs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT, + anv_image_mcs_op(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, ISL_AUX_OP_FAST_CLEAR, &clear_color, false); @@ -3870,6 +3902,55 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) struct anv_image_view *iview = fb->attachments[a]; const struct anv_image *image = iview->image; + if ((image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && + image->vk_format != iview->vk_format) { + enum anv_fast_clear_type fast_clear_type = + anv_layout_to_fast_clear_type(&cmd_buffer->device->info, + image, VK_IMAGE_ASPECT_COLOR_BIT, + att_state->current_layout); + + /* If any clear color was used, flush it down the aux surfaces. If we + * don't do it now using the view's format we might use the clear + * color incorrectly in the following resolves (for example with an + * SRGB view & a UNORM image). + */ + if (fast_clear_type != ANV_FAST_CLEAR_NONE) { + anv_perf_warn(cmd_buffer->device->instance, fb, + "Doing a partial resolve to get rid of clear color at the " + "end of a renderpass due to an image/view format mismatch"); + + uint32_t base_layer, layer_count; + if (image->type == VK_IMAGE_TYPE_3D) { + base_layer = 0; + layer_count = anv_minify(iview->image->extent.depth, + iview->planes[0].isl.base_level); + } else { + base_layer = iview->planes[0].isl.base_array_layer; + layer_count = fb->layers; + } + + for (uint32_t a = 0; a < layer_count; a++) { + uint32_t array_layer = base_layer + a; + if (image->samples == 1) { + anv_cmd_predicated_ccs_resolve(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, + iview->planes[0].isl.base_level, + array_layer, + ISL_AUX_OP_PARTIAL_RESOLVE, + ANV_FAST_CLEAR_NONE); + } else { + anv_cmd_predicated_mcs_resolve(cmd_buffer, image, + iview->planes[0].isl.format, + VK_IMAGE_ASPECT_COLOR_BIT, + base_layer, + ISL_AUX_OP_PARTIAL_RESOLVE, + ANV_FAST_CLEAR_NONE); + } + } + } + } + /* Transition the image into the final layout for this render pass */ VkImageLayout target_layout = cmd_state->pass->attachments[a].final_layout; diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 81522986550..1bee1c6dc17 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -302,4 +302,5 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, } cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index ce8757f2643..71b7a1352f0 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -729,11 +729,19 @@ void genX(CmdCopyQueryPoolResults)( ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = true; - pc.StallAtPixelScoreboard = true; - } + /* If render target writes are ongoing, request a render target cache flush + * to ensure proper ordering of the commands from the 3d pipe and the + * command streamer. + */ + if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_WRITES) { + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + } + + if ((flags & VK_QUERY_RESULT_WAIT_BIT) || + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) { + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } struct anv_address dest_addr = anv_address_add(buffer->address, destOffset); diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 42800a2581e..99b86f68b3a 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -231,6 +231,22 @@ genX(init_device_state)(struct anv_device *device) #endif } +#if GEN_GEN >= 10 + /* A fixed function pipe flush is required before modifying this field */ + anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) { + pipe.PipeControlFlushEnable = true; + } + /* enable object level preemption */ + uint32_t csc1; + anv_pack_struct(&csc1, GENX(CS_CHICKEN1), + .ReplayMode = ObjectLevelPreemption, + .ReplayModeMask = 1); + anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(CS_CHICKEN1_num); + lri.DataDWord = csc1; + } +#endif + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe); assert(batch.next <= batch.end); diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c index 1981b5f0515..7cd6b1e8ab6 100644 --- a/src/loader/loader_dri3_helper.c +++ b/src/loader/loader_dri3_helper.c @@ -1509,6 +1509,7 @@ dri3_update_drawable(struct loader_dri3_drawable *draw) mtx_unlock(&draw->mtx); return false; } + free(error); draw->is_pixmap = true; xcb_unregister_for_special_event(draw->conn, draw->special_event); draw->special_event = NULL; diff --git a/src/mapi/shared-glapi/meson.build b/src/mapi/shared-glapi/meson.build index dcc6079af3d..3f041471fb9 100644 --- a/src/mapi/shared-glapi/meson.build +++ b/src/mapi/shared-glapi/meson.build @@ -40,7 +40,7 @@ libglapi = shared_library( 'glapi', [files_mapi_glapi, files_mapi_util, shared_glapi_mapi_tmp_h], c_args : [ - c_msvc_compat_args, '-DMAPI_MODE_GLAPI', + c_msvc_compat_args, c_vis_args, '-DMAPI_MODE_GLAPI', '-DMAPI_ABI_HEADER="@0@"'.format(shared_glapi_mapi_tmp_h.full_path()), ], link_args : [ld_args_gc_sections], diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk index 53ff4b4f632..dc1f98364c8 100644 --- a/src/mesa/drivers/dri/Android.mk +++ b/src/mesa/drivers/dri/Android.mk @@ -49,11 +49,18 @@ MESA_DRI_WHOLE_STATIC_LIBRARIES := \ MESA_DRI_SHARED_LIBRARIES := \ libcutils \ libdl \ - libexpat \ libglapi \ liblog \ libz +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +MESA_DRI_WHOLE_STATIC_LIBRARIES += \ + libexpat +else +MESA_DRI_SHARED_LIBRARIES += \ + libexpat +endif #----------------------------------------------- # Build drivers and libmesa_dri_common diff --git a/src/mesa/drivers/dri/i915/Android.mk b/src/mesa/drivers/dri/i915/Android.mk index b1054aa6e28..7c9c8210dff 100644 --- a/src/mesa/drivers/dri/i915/Android.mk +++ b/src/mesa/drivers/dri/i915/Android.mk @@ -47,7 +47,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ LOCAL_SHARED_LIBRARIES := \ $(MESA_DRI_SHARED_LIBRARIES) \ - libdrm_intel + libdrm_intel_pri LOCAL_GENERATED_SOURCES := \ $(MESA_DRI_OPTIONS_H) \ diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index fbad63a0824..580b5443965 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -310,6 +310,8 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS) LOCAL_CFLAGS := \ $(MESA_DRI_CFLAGS) +LOCAL_CFLAGS += -Wno-error + LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) \ $(MESA_TOP)/include/drm-uapi diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index f1675b191c1..d4e6ba039c9 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -1487,7 +1487,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) brw_bo_make_external(bo); if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, - DRM_CLOEXEC, prime_fd) != 0) + DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) return -errno; bo->reusable = false; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6ba64e4e06d..8cc0529d7e8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -893,6 +893,19 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20); driComputeOptionsSha1(&brw->screen->optionCache, ctx->Const.dri_config_options_sha1); + + brw->screen->compiler->simd32_heuristics_control.grouped_sends_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_grouped_check"); + brw->screen->compiler->simd32_heuristics_control.max_grouped_sends = + driQueryOptioni(&brw->optionCache, "simd32_heuristic_grouped_sends"); + brw->screen->compiler->simd32_heuristics_control.inst_count_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_inst_check"); + brw->screen->compiler->simd32_heuristics_control.inst_count_ratio = + driQueryOptionf(&brw->optionCache, "simd32_heuristic_inst_ratio"); + brw->screen->compiler->simd32_heuristics_control.mrt_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_mrt_check"); + brw->screen->compiler->simd32_heuristics_control.max_mrts = + driQueryOptioni(&brw->optionCache, "simd32_heuristic_max_mrts"); } GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7fd15669eb9..47183da66bc 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -686,6 +686,7 @@ enum brw_query_kind { OA_COUNTERS, OA_COUNTERS_RAW, PIPELINE_STATS, + NULL_RENDERER, }; struct brw_perf_query_register_prog { @@ -842,6 +843,8 @@ struct brw_context GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ + bool object_preemption; /**< Object level preemption enabled. */ + GLenum reduced_primitive; /** @@ -1242,6 +1245,7 @@ struct brw_context int n_active_oa_queries; int n_active_pipeline_stats_queries; + int n_active_null_renderers; /* The number of queries depending on running OA counters which * extends beyond brw_end_perf_query() since we need to wait until diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 97a787a2ab3..0ec50e1d27a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode { #define GEN10_CACHE_MODE_SS 0x0e420 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) -#define INSTPM 0x20c0 +#define INSTPM 0x20c0 /* Gen6-8 */ # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6) +# define INSTPM_GLOBAL_DEBUG_ENABLE (1 << 4) +# define INSTPM_MEDIA_INSTRUCTION_DISABLE (1 << 3) +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE (1 << 2) +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE (1 << 1) #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE (1 << 1) +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE (1 << 0) #define GEN7_RPSTAT1 0xA01C #define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 @@ -1681,4 +1687,9 @@ enum brw_pixel_shader_coverage_mask_mode { # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5) # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5) +#define CS_CHICKEN1 0x2580 /* Gen9+ */ +# define GEN9_REPLAY_MODE_MIDBUFFER (0 << 0) +# define GEN9_REPLAY_MODE_MIDOBJECT (1 << 0) +# define GEN9_REPLAY_MODE_MASK REG_MASK(1 << 0) + #endif diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 10e3d024f17..85d14a83c7e 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -330,6 +330,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"), obj->pipeline_stats.bo ? "yes" : "no"); break; + case NULL_RENDERER: + DBG("%4d: %-6s %-8s NULL_RENDERER\n", + id, + o->Used ? "Dirty," : "New,", + o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,")); + break; default: unreachable("Unknown query type"); break; @@ -431,6 +437,10 @@ brw_get_perf_query_info(struct gl_context *ctx, *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + *n_active = brw->perfquery.n_active_null_renderers; + break; + default: unreachable("Unknown query type"); break; @@ -1020,6 +1030,7 @@ brw_begin_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); const struct brw_perf_query_info *query = obj->query; + const struct gen_device_info *devinfo = &brw->screen->devinfo; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -1104,7 +1115,6 @@ brw_begin_perf_query(struct gl_context *ctx, /* If the OA counters aren't already on, enable them. */ if (brw->perfquery.oa_stream_fd == -1) { __DRIscreen *screen = brw->screen->driScrnPriv; - const struct gen_device_info *devinfo = &brw->screen->devinfo; /* The period_exponent gives a sampling period as follows: * sample_period = timestamp_period * 2^(period_exponent + 1) @@ -1250,6 +1260,23 @@ brw_begin_perf_query(struct gl_context *ctx, ++brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + ++brw->perfquery.n_active_null_renderers; + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) | + CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE) | + INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE); + } + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_LRI_WRITE_IMMEDIATE); + break; + default: unreachable("Unknown query type"); break; @@ -1270,6 +1297,7 @@ brw_end_perf_query(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); + const struct gen_device_info *devinfo = &brw->screen->devinfo; DBG("End(%d)\n", o->Id); @@ -1312,6 +1340,21 @@ brw_end_perf_query(struct gl_context *ctx, --brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + if (--brw->perfquery.n_active_null_renderers == 0) { + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE)); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE)); + } + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_LRI_WRITE_IMMEDIATE); + } + break; + default: unreachable("Unknown query type"); break; @@ -1337,6 +1380,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) bo = obj->pipeline_stats.bo; break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -1387,6 +1433,8 @@ brw_is_perf_query_ready(struct gl_context *ctx, return (obj->pipeline_stats.bo && !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); + case NULL_RENDERER: + return true; default: unreachable("Unknown query type"); @@ -1602,6 +1650,9 @@ brw_get_perf_query_data(struct gl_context *ctx, written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -1672,6 +1723,9 @@ brw_delete_perf_query(struct gl_context *ctx, } break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -2152,6 +2206,15 @@ get_register_queries_function(const struct gen_device_info *devinfo) return NULL; } +static void +fill_null_renderer_perf_query_info(struct brw_context *brw, + struct brw_perf_query_info *query) +{ + query->kind = NULL_RENDERER; + query->name = "Intel_Null_Hardware_Query"; + query->n_counters = 0; +} + static unsigned brw_init_perf_query_info(struct gl_context *ctx) { @@ -2210,6 +2273,10 @@ brw_init_perf_query_info(struct gl_context *ctx) enumerate_sysfs_metrics(brw); brw_perf_query_register_mdapi_oa_query(brw); + + struct brw_perf_query_info *null_query = + brw_perf_query_append_query_info(brw); + fill_null_renderer_perf_query_info(brw, null_query); } brw->perfquery.unaccumulated = diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index f6acf81b899..546d103d1a4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -128,7 +128,7 @@ void brw_disk_cache_write_compute_program(struct brw_context *brw); void brw_disk_cache_write_render_programs(struct brw_context *brw); /*********************************************************************** - * brw_state.c + * brw_state_upload.c */ void brw_upload_render_state(struct brw_context *brw); void brw_render_state_finished(struct brw_context *brw); @@ -138,6 +138,7 @@ void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); void brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline); +void brw_enable_obj_preemption(struct brw_context *brw, bool enable); static inline void brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7f20579fb87..2e42dfb36d6 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -45,6 +45,28 @@ #include "brw_cs.h" #include "main/framebuffer.h" +void +brw_enable_obj_preemption(struct brw_context *brw, bool enable) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + assert(devinfo->gen >= 9); + + if (enable == brw->object_preemption) + return; + + /* A fixed function pipe flush is required before modifying this field */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + + bool replay_mode = enable ? + GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER; + + /* enable object level preemption */ + brw_load_register_imm32(brw, CS_CHICKEN1, + replay_mode | GEN9_REPLAY_MODE_MASK); + + brw->object_preemption = enable; +} + static void brw_upload_initial_gpu_state(struct brw_context *brw) { @@ -153,6 +175,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw) ADVANCE_BATCH(); } } + + if (devinfo->gen >= 10) + brw_enable_obj_preemption(brw, true); } static inline const struct brw_tracked_state * diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 8d21cf5fa70..3286c222e5b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1499,18 +1499,6 @@ update_buffer_image_param(struct brw_context *brw, param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); } -static unsigned -get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target, - unsigned level) -{ - if (target == GL_TEXTURE_CUBE_MAP) - return 6; - - return target == GL_TEXTURE_3D ? - minify(mt->surf.logical_level0_px.depth, level) : - mt->surf.logical_level0_px.array_len; -} - static void update_image_surface(struct brw_context *brw, struct gl_image_unit *u, @@ -1541,14 +1529,29 @@ update_image_surface(struct brw_context *brw, } else { struct intel_texture_object *intel_obj = intel_texture_object(obj); struct intel_mipmap_tree *mt = intel_obj->mt; - const unsigned num_layers = u->Layered ? - get_image_num_layers(mt, obj->Target, u->Level) : 1; + + unsigned base_layer, num_layers; + if (u->Layered) { + if (obj->Target == GL_TEXTURE_3D) { + base_layer = 0; + num_layers = minify(mt->surf.logical_level0_px.depth, u->Level); + } else { + assert(obj->Immutable || obj->MinLayer == 0); + base_layer = obj->MinLayer; + num_layers = obj->Immutable ? + obj->NumLayers : + mt->surf.logical_level0_px.array_len; + } + } else { + base_layer = obj->MinLayer + u->_Layer; + num_layers = 1; + } struct isl_view view = { .format = format, .base_level = obj->MinLevel + u->Level, .levels = 1, - .base_array_layer = obj->MinLayer + u->_Layer, + .base_array_layer = base_layer, .array_len = num_layers, .swizzle = ISL_SWIZZLE_IDENTITY, .usage = ISL_SURF_USAGE_STORAGE_BIT, diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index fd9ce93c6c7..97ae2707049 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -213,7 +213,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch, } if (need_invalidate) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); } #endif } @@ -268,7 +268,7 @@ genX(blorp_exec)(struct blorp_batch *batch, assert(batch->blorp->driver_ctx == batch->driver_batch); struct brw_context *brw = batch->driver_batch; struct gl_context *ctx = &brw->ctx; - bool check_aperture_failed_once; + bool check_aperture_failed_once = false; #if GEN_GEN >= 11 /* The PIPE_CONTROL command description says: @@ -309,7 +309,7 @@ genX(blorp_exec)(struct blorp_batch *batch, intel_batchbuffer_require_space(brw, 1400); brw_require_statebuffer_space(brw, 600); intel_batchbuffer_save_state(brw); - check_aperture_failed_once = intel_batchbuffer_saved_state_is_empty(brw); + check_aperture_failed_once |= intel_batchbuffer_saved_state_is_empty(brw); brw->batch.no_wrap = true; #if GEN_GEN == 6 diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 9cd017a5cff..5d2572cb4dc 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -505,9 +505,8 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) { #if GEN_GEN >= 8 bool need_invalidate = false; - unsigned i; - for (i = 0; i < brw->vb.nr_buffers; i++) { + for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo); if (high_bits != brw->vb.last_bo_high_bits[i]) { @@ -516,12 +515,26 @@ vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) } } - /* Don't bother with draw parameter buffers - those are generated by - * the driver so we can select a consistent memory zone. - */ + if (brw->draw.draw_params_bo) { + uint16_t high_bits = pinned_bo_high_bits(brw->draw.draw_params_bo); + + if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers] != high_bits) { + need_invalidate = true; + brw->vb.last_bo_high_bits[brw->vb.nr_buffers] = high_bits; + } + } + + if (brw->draw.derived_draw_params_bo) { + uint16_t high_bits = pinned_bo_high_bits(brw->draw.derived_draw_params_bo); + + if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] != high_bits) { + need_invalidate = true; + brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] = high_bits; + } + } if (need_invalidate) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL); } #endif } @@ -5602,6 +5615,50 @@ static const struct brw_tracked_state genX(blend_constant_color) = { /* ---------------------------------------------------------------------- */ +#if GEN_GEN == 9 + +/** + * Implement workarounds for preemption: + * - WaDisableMidObjectPreemptionForGSLineStripAdj + * - WaDisableMidObjectPreemptionForTrifanOrPolygon + */ +static void +gen9_emit_preempt_wa(struct brw_context *brw) +{ + /* WaDisableMidObjectPreemptionForGSLineStripAdj + * + * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and + * GS is enabled. + */ + bool object_preemption = + !(brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled); + + /* WaDisableMidObjectPreemptionForTrifanOrPolygon + * + * TriFan miscompare in Execlist Preemption test. Cut index that is on a + * previous context. End the previous, the resume another context with a + * tri-fan or polygon, and the vertex count is corrupted. If we prempt + * again we will cause corruption. + * + * WA: Disable mid-draw preemption when draw-call has a tri-fan. + */ + object_preemption = + object_preemption && !(brw->primitive == _3DPRIM_TRIFAN); + + brw_enable_obj_preemption(brw, object_preemption); +} + +static const struct brw_tracked_state gen9_preempt_wa = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM, + }, + .emit = gen9_emit_preempt_wa, +}; +#endif + +/* ---------------------------------------------------------------------- */ + void genX(init_atoms)(struct brw_context *brw) { @@ -5906,6 +5963,9 @@ genX(init_atoms)(struct brw_context *brw) &genX(cut_index), &gen8_pma_fix, +#if GEN_GEN == 9 + &gen9_preempt_wa, +#endif }; #endif diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index d7e02efb54d..0cfe2acbdd4 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -247,7 +247,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.OES_primitive_bounding_box = true; ctx->Extensions.OES_texture_buffer = true; ctx->Extensions.ARB_fragment_shader_interlock = true; - ctx->Extensions.INTEL_fragment_shader_ordering = true; if (can_do_pipelined_register_writes(brw->screen)) { ctx->Extensions.ARB_draw_indirect = true; diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h index a8193c6def9..ca604159dc2 100644 --- a/src/mesa/drivers/dri/i965/intel_image.h +++ b/src/mesa/drivers/dri/i965/intel_image.h @@ -89,9 +89,6 @@ struct __DRIimageRec { GLuint tile_y; bool has_depthstencil; - /** The image was created with EGL_EXT_image_dma_buf_import. */ - bool dma_buf_imported; - /** Offset of the auxiliary compression surface in the bo. */ uint32_t aux_offset; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index c3bd30f7837..89110e60a8d 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -61,6 +61,33 @@ DRI_CONF_BEGIN DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true") + DRI_CONF_DESC(en, "Enable/disable grouped texture fetch " + "check in the SIMD32 selection heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999") + DRI_CONF_DESC(en, "How many grouped texture fetches should " + "the SIMD32 selection heuristic allow.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true") + DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction " + "count ratio check in the SIMD32 selection " + "heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999") + DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio " + "the SIMD32 selection heuristic should allow.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true") + DRI_CONF_DESC(en, "Enable/disable MRT write check in the " + "SIMD32 selection heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8") + DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 " + "selection heuristic allow.") + DRI_CONF_OPT_END + DRI_CONF_MESA_NO_ERROR("false") DRI_CONF_SECTION_END @@ -282,6 +309,18 @@ static const struct intel_image_format intel_image_formats[] = { { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + { __DRI_IMAGE_FOURCC_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + + { __DRI_IMAGE_FOURCC_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + + { __DRI_IMAGE_FOURCC_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, @@ -957,7 +996,6 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) image->tile_y = orig_image->tile_y; image->has_depthstencil = orig_image->has_depthstencil; image->data = loaderPrivate; - image->dma_buf_imported = orig_image->dma_buf_imported; image->aux_offset = orig_image->aux_offset; image->aux_pitch = orig_image->aux_pitch; @@ -1237,7 +1275,6 @@ intel_create_image_from_dma_bufs2(__DRIscreen *dri_screen, return NULL; } - image->dma_buf_imported = true; image->yuv_color_space = yuv_color_space; image->sample_range = sample_range; image->horizontal_siting = horizontal_siting; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index bdcdb7736e6..674fa1c6fbf 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -614,16 +614,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, if (image == NULL) return; - /* We support external textures only for EGLImages created with - * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future. - */ - if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetTexture2DOES(external target is enabled only " - "for images created with EGL_EXT_image_dma_buf_import"); - return; - } - /* Disallow depth/stencil textures: we don't have a way to pass the * separate stencil miptree of a GL_DEPTH_STENCIL texture through. */ diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 47db1583135..aac96290ded 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -317,7 +317,6 @@ EXT(IBM_texture_mirrored_repeat , dummy_true EXT(INGR_blend_func_separate , EXT_blend_func_separate , GLL, x , x , x , 1999) EXT(INTEL_conservative_rasterization , INTEL_conservative_rasterization , x , GLC, x , 31, 2013) -EXT(INTEL_fragment_shader_ordering , INTEL_fragment_shader_ordering , GLL, GLC, x , x , 2013) EXT(INTEL_performance_query , INTEL_performance_query , GLL, GLC, x , ES2, 2013) EXT(INTEL_shader_atomic_float_minmax , INTEL_shader_atomic_float_minmax , GLL, GLC, x , x , 2018) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 9ed49b7ff24..f30b778a7b1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4296,7 +4296,6 @@ struct gl_extensions GLboolean ATI_fragment_shader; GLboolean GREMEDY_string_marker; GLboolean INTEL_conservative_rasterization; - GLboolean INTEL_fragment_shader_ordering; GLboolean INTEL_performance_query; GLboolean INTEL_shader_atomic_float_minmax; GLboolean KHR_blend_equation_advanced; diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 556c860d393..d9a12d2387f 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -922,6 +922,8 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum format, GLenum type, case GL_RGBA: if (type == GL_FLOAT && data_type == GL_FLOAT) return GL_NO_ERROR; /* EXT_color_buffer_float */ + if (type == GL_HALF_FLOAT && data_type == GL_FLOAT) + return GL_NO_ERROR; if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED) return GL_NO_ERROR; if (internalFormat == GL_RGB10_A2 && diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 0ab9ed445d6..bb4f7006618 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -900,8 +900,7 @@ select_tex_image(const struct gl_texture_object *texObj, GLenum target, /** * Error-check the offset and size arguments to - * glGet[Compressed]TextureSubImage(). Also checks if the specified - * texture image is missing. + * glGet[Compressed]TextureSubImage(). * \return true if error, false if no error. */ static bool @@ -913,6 +912,7 @@ dimensions_error_check(struct gl_context *ctx, const char *caller) { const struct gl_texture_image *texImage; + GLuint imageWidth = 0, imageHeight = 0, imageDepth = 0; if (xoffset < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset); @@ -981,82 +981,44 @@ dimensions_error_check(struct gl_context *ctx, "%s(zoffset + depth = %d)", caller, zoffset + depth); return true; } - /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"): - * - * "An INVALID_OPERATION error is generated by GetTextureImage if the - * effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY , - * and the texture object is not cube complete or cube array complete, - * respectively." - * - * This applies also to GetTextureSubImage, GetCompressedTexImage, - * GetCompressedTextureImage, and GetnCompressedTexImage. - */ - if (!_mesa_cube_complete(texObj)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(cube incomplete)", caller); - return true; - } break; default: ; /* nothing */ } texImage = select_tex_image(texObj, target, level, zoffset); - if (!texImage) { - /* Trying to return a non-defined level is a valid operation per se, as - * OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries") does not - * handle this case as an error. - * - * Rather, we need to look at section 8.22 ("Texture State and Proxy - * State"): - * - * "Each initial texture image is null. It has zero width, height, and - * depth, internal format RGBA, or R8 for buffer textures, component - * sizes set to zero and component types set to NONE, the compressed - * flag set to FALSE, a zero compressed size, and the bound buffer - * object name is zero." - * - * This means we need to assume the image for the non-defined level is - * an empty image. With this assumption, we can go back to section - * 8.11.4 and checking again the errors: - * - * "An INVALID_VALUE error is generated if xoffset + width is greater - * than the texture’s width, yoffset + height is greater than the - * texture’s height, or zoffset + depth is greater than the texture’s - * depth." - * - * Thus why we return INVALID_VALUE. - */ - _mesa_error(ctx, GL_INVALID_VALUE, "%s(missing image)", caller); - return true; + if (texImage) { + imageWidth = texImage->Width; + imageHeight = texImage->Height; + imageDepth = texImage->Depth; } - if (xoffset + width > texImage->Width) { + if (xoffset + width > imageWidth) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset %d + width %d > %u)", - caller, xoffset, width, texImage->Width); + caller, xoffset, width, imageWidth); return true; } - if (yoffset + height > texImage->Height) { + if (yoffset + height > imageHeight) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset %d + height %d > %u)", - caller, yoffset, height, texImage->Height); + caller, yoffset, height, imageHeight); return true; } if (target != GL_TEXTURE_CUBE_MAP) { /* Cube map error checking was done above */ - if (zoffset + depth > texImage->Depth) { + if (zoffset + depth > imageDepth) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset %d + depth %d > %u)", - caller, zoffset, depth, texImage->Depth); + caller, zoffset, depth, imageDepth); return true; } } /* Extra checks for compressed textures */ - { + if (texImage) { GLuint bw, bh, bd; _mesa_get_format_block_size_3d(texImage->TexFormat, &bw, &bh, &bd); if (bw > 1 || bh > 1 || bd > 1) { @@ -1162,53 +1124,15 @@ pbo_error_check(struct gl_context *ctx, GLenum target, /** - * Do error checking for all (non-compressed) get-texture-image functions. - * \return true if any error, false if no errors. + * Do teximage-related error checking for getting uncompressed images. + * \return true if there was an error */ static bool -getteximage_error_check(struct gl_context *ctx, - struct gl_texture_object *texObj, - GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, GLsizei bufSize, - GLvoid *pixels, const char *caller) +teximage_error_check(struct gl_context *ctx, + struct gl_texture_image *texImage, + GLenum format, const char *caller) { - struct gl_texture_image *texImage; - GLenum baseFormat, err; - GLint maxLevels; - - assert(texObj); - - if (texObj->Target == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); - return true; - } - - maxLevels = _mesa_max_texture_levels(ctx, target); - if (level < 0 || level >= maxLevels) { - _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level); - return true; - } - - err = _mesa_error_check_format_and_type(ctx, format, type); - if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, "%s(format/type)", caller); - return true; - } - - if (dimensions_error_check(ctx, texObj, target, level, - xoffset, yoffset, zoffset, - width, height, depth, caller)) { - return true; - } - - if (pbo_error_check(ctx, target, width, height, depth, - format, type, bufSize, pixels, caller)) { - return true; - } - - texImage = select_tex_image(texObj, target, level, zoffset); + GLenum baseFormat; assert(texImage); /* @@ -1241,8 +1165,8 @@ getteximage_error_check(struct gl_context *ctx, return true; } else if (_mesa_is_stencil_format(format) - && !_mesa_is_depthstencil_format(baseFormat) - && !_mesa_is_stencil_format(baseFormat)) { + && !_mesa_is_depthstencil_format(baseFormat) + && !_mesa_is_stencil_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(format mismatch)", caller); return true; @@ -1271,6 +1195,142 @@ getteximage_error_check(struct gl_context *ctx, } +/** + * Do common teximage-related error checking for getting uncompressed images. + * \return true if there was an error + */ +static bool +common_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) +{ + GLenum err; + GLint maxLevels; + + if (texObj->Target == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); + return true; + } + + maxLevels = _mesa_max_texture_levels(ctx, target); + if (level < 0 || level >= maxLevels) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level); + return true; + } + + err = _mesa_error_check_format_and_type(ctx, format, type); + if (err != GL_NO_ERROR) { + _mesa_error(ctx, err, "%s(format/type)", caller); + return true; + } + + /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"): + * + * "An INVALID_OPERATION error is generated by GetTextureImage if the + * effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY , + * and the texture object is not cube complete or cube array complete, + * respectively." + * + * This applies also to GetTextureSubImage, GetCompressedTexImage, + * GetCompressedTextureImage, and GetnCompressedTexImage. + */ + if (target == GL_TEXTURE_CUBE_MAP && !_mesa_cube_complete(texObj)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(cube incomplete)", caller); + return true; + } + + return false; +} + + +/** + * Do error checking for all (non-compressed) get-texture-image functions. + * \return true if any error, false if no errors. + */ +static bool +getteximage_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) +{ + struct gl_texture_image *texImage; + + assert(texObj); + + if (common_error_check(ctx, texObj, target, level, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + if (width == 0 || height == 0 || depth == 0) { + /* Not an error, but nothing to do. Return 'true' so that the + * caller simply returns. + */ + return true; + } + + if (pbo_error_check(ctx, target, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + texImage = select_tex_image(texObj, target, level, 0); + if (teximage_error_check(ctx, texImage, format, caller)) { + return true; + } + + return false; +} + + +/** + * Do error checking for all (non-compressed) get-texture-image functions. + * \return true if any error, false if no errors. + */ +static bool +gettexsubimage_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) +{ + struct gl_texture_image *texImage; + + assert(texObj); + + if (common_error_check(ctx, texObj, target, level, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + if (dimensions_error_check(ctx, texObj, target, level, + xoffset, yoffset, zoffset, + width, height, depth, caller)) { + return true; + } + + if (pbo_error_check(ctx, target, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + texImage = select_tex_image(texObj, target, level, zoffset); + if (teximage_error_check(ctx, texImage, format, caller)) { + return true; + } + + return false; +} + + /** * Return the width, height and depth of a texture image. * This function must be resilient to bad parameter values since @@ -1399,7 +1459,7 @@ _mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type, get_texture_image_dims(texObj, target, level, &width, &height, &depth); if (getteximage_error_check(ctx, texObj, target, level, - 0, 0, 0, width, height, depth, + width, height, depth, format, type, bufSize, pixels, caller)) { return; } @@ -1430,7 +1490,7 @@ _mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type, get_texture_image_dims(texObj, target, level, &width, &height, &depth); if (getteximage_error_check(ctx, texObj, target, level, - 0, 0, 0, width, height, depth, + width, height, depth, format, type, INT_MAX, pixels, caller)) { return; } @@ -1464,7 +1524,7 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type, &width, &height, &depth); if (getteximage_error_check(ctx, texObj, texObj->Target, level, - 0, 0, 0, width, height, depth, + width, height, depth, format, type, bufSize, pixels, caller)) { return; } @@ -1497,9 +1557,10 @@ _mesa_GetTextureSubImage(GLuint texture, GLint level, return; } - if (getteximage_error_check(ctx, texObj, texObj->Target, level, - xoffset, yoffset, zoffset, width, height, depth, - format, type, bufSize, pixels, caller)) { + if (gettexsubimage_error_check(ctx, texObj, texObj->Target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, bufSize, pixels, caller)) { return; } diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index a46c9f94bca..8eccdc20b76 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -40,6 +40,7 @@ #include "shaderapi.h" #include "shaderobj.h" +#include "program/program.h" #include "program/prog_parameter.h" struct using_program_tuple @@ -470,6 +471,7 @@ begin_transform_feedback(struct gl_context *ctx, GLenum mode, bool no_error) if (obj->program != source) { ctx->NewDriverState |= ctx->DriverFlags.NewTransformFeedbackProg; + _mesa_reference_program_(ctx, &obj->program, source); obj->program = source; } @@ -504,6 +506,7 @@ end_transform_feedback(struct gl_context *ctx, assert(ctx->Driver.EndTransformFeedback); ctx->Driver.EndTransformFeedback(ctx, obj); + _mesa_reference_program_(ctx, &obj->program, NULL); ctx->TransformFeedback.CurrentObject->Active = GL_FALSE; ctx->TransformFeedback.CurrentObject->Paused = GL_FALSE; ctx->TransformFeedback.CurrentObject->EndedAnytime = GL_TRUE; diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index c6470e6289e..13d0da85882 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -41,7 +41,7 @@ endef include $(MESA_TOP)/src/mesa/Makefile.sources include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_program LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_STATIC_LIBRARIES := libmesa_nir \ diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 0e535257cb4..bdc8dda82c2 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -285,8 +285,11 @@ st_renderbuffer_delete(struct gl_context *ctx, struct gl_renderbuffer *rb) struct st_context *st = st_context(ctx); pipe_surface_release(st->pipe, &strb->surface_srgb); pipe_surface_release(st->pipe, &strb->surface_linear); - strb->surface = NULL; + } else { + pipe_surface_release_no_context(&strb->surface_srgb); + pipe_surface_release_no_context(&strb->surface_linear); } + strb->surface = NULL; pipe_resource_reference(&strb->texture, NULL); free(strb->data); _mesa_delete_renderbuffer(ctx, rb); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index c58deadc957..581a8639ef0 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -749,7 +749,8 @@ st_link_nir(struct gl_context *ctx, * the pipe_stream_output->output_register field is based on the * pre-compacted driver_locations. */ - if (!prev_shader->sh.LinkedTransformFeedback) + if (!(prev_shader->sh.LinkedTransformFeedback && + prev_shader->sh.LinkedTransformFeedback->NumVarying > 0)) nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir, nir, ctx->API != API_OPENGL_COMPAT); } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5322903b93a..0783f67f2b7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4072,7 +4072,6 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) case ir_intrinsic_generic_atomic_comp_swap: case ir_intrinsic_begin_invocation_interlock: case ir_intrinsic_end_invocation_interlock: - case ir_intrinsic_begin_fragment_shader_ordering: unreachable("Invalid intrinsic"); } } diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index ceb48dd4903..776b563e50e 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -1069,15 +1069,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, * of the referenced drawables no longer exist. */ st_framebuffers_purge(st); - - /* Notify the driver that the context thread may have been changed. - * This should pin all driver threads to a specific L3 cache for optimal - * performance on AMD Zen CPUs. - */ - struct glthread_state *glthread = st->ctx->GLThread; - thrd_t *upper_thread = glthread ? &glthread->queue.threads[0] : NULL; - - util_context_thread_changed(st->pipe, upper_thread); } else { ret = _mesa_make_current(NULL, NULL, NULL); diff --git a/src/meson.build b/src/meson.build index 73146d37143..3b91c6a88c5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -51,8 +51,12 @@ subdir('util') subdir('mapi') # TODO: opengl subdir('compiler') -subdir('egl/wayland/wayland-drm') -subdir('vulkan') +if with_platform_wayland + subdir('egl/wayland/wayland-drm') +endif +if with_any_vk + subdir('vulkan') +endif if with_gallium_radeonsi or with_amd_vk subdir('amd') endif @@ -67,7 +71,7 @@ subdir('loader') if with_platform_haiku subdir('hgl') endif -if with_glx != 'disabled' +if with_glx == 'dri' subdir('glx') endif if with_gbm diff --git a/src/util/Android.mk b/src/util/Android.mk index 2d59e1ae15e..6d770ca9575 100644 --- a/src/util/Android.mk +++ b/src/util/Android.mk @@ -41,8 +41,14 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else LOCAL_SHARED_LIBRARIES := \ libexpat +endif LOCAL_MODULE := libmesa_util diff --git a/src/util/Makefile.am b/src/util/Makefile.am index b857db8a866..4bda54c551d 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -60,7 +60,8 @@ libmesautil_la_LIBADD = \ $(PTHREAD_LIBS) \ $(CLOCK_LIB) \ $(ZLIB_LIBS) \ - $(LIBATOMIC_LIBS) + $(LIBATOMIC_LIBS) \ + -lm libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES) libxmlconfig_la_CFLAGS = \ diff --git a/src/util/bitscan.h b/src/util/bitscan.h index dc89ac93f28..cdfecafaf01 100644 --- a/src/util/bitscan.h +++ b/src/util/bitscan.h @@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask) return i; } +/* Count bits set in mask */ +static inline int +u_count_bits(unsigned *mask) +{ + unsigned v = *mask; + int c; + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + v = (v + (v >> 4)) & 0xF0F0F0F; + c = (int)((v * 0x1010101) >> 24); + return c; +} + +static inline int +u_count_bits64(uint64_t *mask) +{ + uint64_t v = *mask; + int c; + v = v - ((v >> 1) & 0x5555555555555555ull); + v = (v & 0x3333333333333333ull) + ((v >> 2) & 0x3333333333333333ull); + v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full; + c = (int)((v * 0x101010101010101ull) >> 56); + return c; +} + /* Determine if an unsigned value is a power of two. * * \note diff --git a/src/util/meson.build b/src/util/meson.build index 7caea27d660..156621aff65 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -119,7 +119,7 @@ libmesa_util = static_library( 'mesa_util', [files_mesa_util, format_srgb], include_directories : inc_common, - dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic], + dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic, dep_m], c_args : [c_msvc_compat_args, c_vis_args], build_by_default : false ) diff --git a/src/util/ralloc.c b/src/util/ralloc.c index 5d77f75ee85..5a7fa7e84e9 100644 --- a/src/util/ralloc.c +++ b/src/util/ralloc.c @@ -554,10 +554,18 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, */ #define MIN_LINEAR_BUFSIZE 2048 -#define SUBALLOC_ALIGNMENT sizeof(uintptr_t) +#define SUBALLOC_ALIGNMENT 8 #define LMAGIC 0x87b9c7d3 -struct linear_header { +struct +#ifdef _MSC_VER + __declspec(align(8)) +#elif defined(__LP64__) + __attribute__((aligned(16))) +#else + __attribute__((aligned(8))) +#endif + linear_header { #ifdef DEBUG unsigned magic; /* for debugging */ #endif @@ -651,6 +659,8 @@ linear_alloc_child(void *parent, unsigned size) ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset); ptr->size = size; latest->offset += full_size; + + assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0); return &ptr[1]; } diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk index 6253f1c3be9..730d036d18c 100644 --- a/src/vulkan/Android.mk +++ b/src/vulkan/Android.mk @@ -32,12 +32,15 @@ include $(LOCAL_PATH)/Makefile.sources include $(CLEAR_VARS) LOCAL_MODULE := libmesa_vulkan_util LOCAL_MODULE_CLASS := STATIC_LIBRARIES - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers intermediates := $(call local-generated-sources-dir) LOCAL_C_INCLUDES := \ $(MESA_TOP)/include/vulkan \ - $(MESA_TOP)/src/vulkan/util + $(MESA_TOP)/src/vulkan/util \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, \ $(VULKAN_UTIL_GENERATED_FILES)) diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index 1cd5f8d62c5..58e25214149 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -954,8 +954,8 @@ wsi_common_queue_present(const struct wsi_device *wsi, /* We only need/want to wait on semaphores once. After that, we're * guaranteed ordering since it all happens on the same queue. */ - submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount, - submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores, + submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount; + submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores; /* Set up the pWaitDstStageMasks */ stage_flags = vk_alloc(&swapchain->alloc, diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index fd0d30ad80c..856040b4fe1 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1062,6 +1062,8 @@ wsi_display_swapchain_destroy(struct wsi_swapchain *drv_chain, for (uint32_t i = 0; i < chain->base.image_count; i++) wsi_display_image_finish(drv_chain, allocator, &chain->images[i]); + + wsi_swapchain_finish(&chain->base); vk_free(allocator, chain); return VK_SUCCESS; } diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index e9cc22ec603..3d3a60167bf 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -455,10 +455,11 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device, (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; struct wsi_wl_display display; - int ret = wsi_wl_display_init(wsi, &display, wl_display, false); - wsi_wl_display_finish(&display); + VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false); + if (ret == VK_SUCCESS) + wsi_wl_display_finish(&display); - return ret == 0; + return ret == VK_SUCCESS; } static VkResult