diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 764 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 479 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 36 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 23 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 29 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 30 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h | 291 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 27 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 371 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 |
15 files changed, 973 insertions, 1142 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index c7118843db05..0c4c5499bb5c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx11_hex[] = { - 0xbfa00001, 0xbfa0021e, + 0xbfa00001, 0xbfa00221, 0xb0804006, 0xb8f8f802, 0x9178ff78, 0x00020006, - 0xb8fbf803, 0xbf0d9f6d, - 0xbfa20006, 0x8b6eff78, - 0x00002000, 0xbfa10009, - 0x8b6eff6d, 0x00ff0000, - 0xbfa2001e, 0x8b6eff7b, - 0x00000400, 0xbfa20041, - 0xbf830010, 0xb8fbf803, - 0xbfa0fffa, 0x8b6eff7b, - 0x00000900, 0xbfa20015, - 0x8b6eff7b, 0x000071ff, - 0xbfa10008, 0x8b6fff7b, - 0x00007080, 0xbfa10001, - 0xbeee1287, 0xb8eff801, - 0x846e8c6e, 0x8b6e6f6e, - 0xbfa2000a, 0x8b6eff6d, - 0x00ff0000, 0xbfa20007, - 0xb8eef801, 0x8b6eff6e, - 0x00000800, 0xbfa20003, + 0xb8fbf803, 0xbf0d9e6d, + 0xbfa10001, 0xbfbd0000, + 0xbf0d9f6d, 0xbfa20006, + 0x8b6eff78, 0x00002000, + 0xbfa10009, 0x8b6eff6d, + 0x00ff0000, 0xbfa2001e, 0x8b6eff7b, 0x00000400, - 0xbfa20026, 0xbefa4d82, - 0xbf89fc07, 0x84fa887a, - 0xf4005bbd, 0xf8000010, - 0xbf89fc07, 0x846e976e, - 0x9177ff77, 0x00800000, - 0x8c776e77, 0xf4045bbd, - 0xf8000000, 0xbf89fc07, - 0xf4045ebd, 0xf8000008, - 0xbf89fc07, 0x8bee6e6e, - 0xbfa10001, 0xbe80486e, - 0x8b6eff6d, 0x01ff0000, - 0xbfa20005, 0x8c78ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbfa00005, - 0x8b6eff6d, 0x01000000, - 0xbfa20002, 0x806c846c, - 0x826d806d, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb978f802, - 0xbe804a6c, 0x8b6dff6d, - 0x0000ffff, 0xbefa0080, - 0xb97a0283, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbefe4d84, 0xbf89fc07, - 0x8b7aff7f, 0x04000000, - 0x847a857a, 0x8c6d7a6d, - 0xbefa007e, 0x8b7bff7f, - 0x0000ffff, 0xbefe00c1, - 0xbeff00c1, 0xdca6c000, - 0x007a0000, 0x7e000280, - 0xbefe007a, 0xbeff007b, - 0xb8fb02dc, 0x847b997b, - 0xb8fa3b05, 0x807a817a, - 0xbf0d997b, 0xbfa20002, - 0x847a897a, 0xbfa00001, - 0x847a8a7a, 0xb8fb1e06, - 0x847b8a7b, 0x807a7b7a, + 0xbfa20041, 0xbf830010, + 0xb8fbf803, 0xbfa0fffa, + 0x8b6eff7b, 0x00000900, + 0xbfa20015, 0x8b6eff7b, + 0x000071ff, 0xbfa10008, + 0x8b6fff7b, 0x00007080, + 0xbfa10001, 0xbeee1287, + 0xb8eff801, 0x846e8c6e, + 0x8b6e6f6e, 0xbfa2000a, + 0x8b6eff6d, 0x00ff0000, + 0xbfa20007, 0xb8eef801, + 0x8b6eff6e, 0x00000800, + 0xbfa20003, 0x8b6eff7b, + 0x00000400, 0xbfa20026, + 0xbefa4d82, 0xbf89fc07, + 0x84fa887a, 0xf4005bbd, + 0xf8000010, 0xbf89fc07, + 0x846e976e, 0x9177ff77, + 0x00800000, 0x8c776e77, + 0xf4045bbd, 0xf8000000, + 0xbf89fc07, 0xf4045ebd, + 0xf8000008, 0xbf89fc07, + 0x8bee6e6e, 0xbfa10001, + 0xbe80486e, 0x8b6eff6d, + 0x01ff0000, 0xbfa20005, + 0x8c78ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbfa00005, 0x8b6eff6d, + 0x01000000, 0xbfa20002, + 0x806c846c, 0x826d806d, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb978f802, 0xbe804a6c, + 0x8b6dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbefe4d84, + 0xbf89fc07, 0x8b7aff7f, + 0x04000000, 0x847a857a, + 0x8c6d7a6d, 0xbefa007e, 0x8b7bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xd7610000, 0x00010870, - 0xd7610000, 0x00010a71, - 0xd7610000, 0x00010c72, - 0xd7610000, 0x00010e73, - 0xd7610000, 0x00011074, - 0xd7610000, 0x00011275, - 0xd7610000, 0x00011476, - 0xd7610000, 0x00011677, - 0xd7610000, 0x00011a79, - 0xd7610000, 0x00011c7e, - 0xd7610000, 0x00011e7f, - 0xbefe00ff, 0x00003fff, - 0xbeff0080, 0xdca6c040, - 0x007a0000, 0xd760007a, - 0x00011d00, 0xd760007b, - 0x00011f00, 0xbefe007a, - 0xbeff007b, 0xbef4007e, - 0x8b75ff7f, 0x0000ffff, - 0x8c75ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x10807fac, 0xbef1007d, - 0xbef00080, 0xb8f302dc, - 0x84739973, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00002, - 0xbeff00c1, 0xbfa00009, + 0xbefe00c1, 0xbeff00c1, + 0xdca6c000, 0x007a0000, + 0x7e000280, 0xbefe007a, + 0xbeff007b, 0xb8fb02dc, + 0x847b997b, 0xb8fa3b05, + 0x807a817a, 0xbf0d997b, + 0xbfa20002, 0x847a897a, + 0xbfa00001, 0x847a8a7a, + 0xb8fb1e06, 0x847b8a7b, + 0x807a7b7a, 0x8b7bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe00ff, + 0x00003fff, 0xbeff0080, + 0xdca6c040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe007a, 0xbeff007b, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007d, 0xbef00080, + 0xb8f302dc, 0x84739973, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00002, 0xbeff00c1, + 0xbfa00009, 0xbef600ff, + 0x01000000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0xbfa00008, 0xbef600ff, 0x01000000, - 0xe0685080, 0x701d0100, - 0xe0685100, 0x701d0200, - 0xe0685180, 0x701d0300, - 0xbfa00008, 0xbef600ff, - 0x01000000, 0xe0685100, - 0x701d0100, 0xe0685200, - 0x701d0200, 0xe0685300, - 0x701d0300, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0xbef600ff, - 0x01000000, 0x7e000280, - 0x7e020280, 0x7e040280, - 0xbefd0080, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xd7610002, - 0x0000fa6f, 0x807d817d, - 0xd7610002, 0x0000fa78, - 0x807d817d, 0xb8faf803, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xd7610002, - 0x0000fa7b, 0x807d817d, - 0xb8f1f801, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f814, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f815, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xbefe00ff, 0x0000ffff, - 0xbeff0080, 0xe0685000, - 0x701d0200, 0xbefe00c1, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, 0xb8f03b05, 0x80708170, 0xbf0d9973, 0xbfa20002, 0x84708970, 0xbfa00001, 0x84708a70, 0xb8fa1e06, 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0xbef600ff, 0x01000000, - 0xbef90080, 0xbefd0080, - 0xbf800000, 0xbe804100, - 0xbe824102, 0xbe844104, - 0xbe864106, 0xbe884108, - 0xbe8a410a, 0xbe8c410c, - 0xbe8e410e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefd0080, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xd7610002, + 0x0000fa6c, 0x807d817d, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x0000fa7a, + 0x807d817d, 0xd7610002, + 0x0000fa6e, 0x807d817d, + 0xd7610002, 0x0000fa6f, + 0x807d817d, 0xd7610002, + 0x0000fa78, 0x807d817d, + 0xb8faf803, 0xd7610002, + 0x0000fa7a, 0x807d817d, + 0xd7610002, 0x0000fa7b, + 0x807d817d, 0xb8f1f801, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f814, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f815, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xbefe00ff, + 0x0000ffff, 0xbeff0080, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0xb8f03b05, + 0x80708170, 0xbf0d9973, + 0xbfa20002, 0x84708970, + 0xbfa00001, 0x84708a70, + 0xb8fa1e06, 0x847a8a7a, + 0x80707a70, 0xbef600ff, + 0x01000000, 0xbef90080, + 0xbefd0080, 0xbf800000, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xbe8c410c, 0xbe8e410e, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbfa10006, 0xe0685000, - 0x701d0200, 0x8070ff70, - 0x00000080, 0xbef90080, - 0x7e040280, 0x807d907d, - 0xbf0aff7d, 0x00000060, - 0xbfa2ffbc, 0xbe804100, - 0xbe824102, 0xbe844104, - 0xbe864106, 0xbe884108, - 0xbe8a410a, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbfa10006, + 0xe0685000, 0x701d0200, + 0x8070ff70, 0x00000080, + 0xbef90080, 0x7e040280, + 0x807d907d, 0xbf0aff7d, + 0x00000060, 0xbfa2ffbc, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xe0685000, - 0x701d0200, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8fb4306, - 0x8b7bc17b, 0xbfa10044, - 0xbfbd0000, 0x8b7aff6d, - 0x80000000, 0xbfa10040, - 0x847b867b, 0x847b827b, - 0xbef6007b, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xd71f0000, - 0x000100c1, 0xd7200000, - 0x000200c1, 0x16000084, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa20012, 0xbe8300ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf890000, 0xe0685000, - 0x701d0100, 0x807d037d, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000080, - 0xbf0a7b7d, 0xbfa2fff4, - 0xbfa00011, 0xbe8300ff, - 0x00000100, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf890000, 0xe0685000, - 0x701d0100, 0x807d037d, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, - 0xbf0a7b7d, 0xbfa2fff4, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0685000, 0x701d0200, 0xbefe00c1, 0x857d9973, 0x8b7d817d, 0xbf06817d, - 0xbfa20004, 0xbef000ff, - 0x00000200, 0xbeff0080, - 0xbfa00003, 0xbef000ff, - 0x00000400, 0xbeff00c1, - 0xb8fb3b05, 0x807b817b, - 0x847b827b, 0x857d9973, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0xbfbd0000, + 0x8b7aff6d, 0x80000000, + 0xbfa10040, 0x847b867b, + 0x847b827b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, 0x8b7d817d, 0xbf06817d, - 0xbfa20017, 0xbef600ff, - 0x01000000, 0xbefd0084, - 0xbf0a7b7d, 0xbfa10037, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xe0685000, 0x701d0000, - 0xe0685080, 0x701d0100, - 0xe0685100, 0x701d0200, - 0xe0685180, 0x701d0300, - 0x807d847d, 0x8070ff70, - 0x00000200, 0xbf0a7b7d, - 0xbfa2ffef, 0xbfa00025, + 0xbefd0080, 0xbfa20012, + 0xbe8300ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff4, 0xbfa00011, + 0xbe8300ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff4, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20017, 0xbef600ff, 0x01000000, 0xbefd0084, 0xbf0a7b7d, - 0xbfa10011, 0x7e008700, + 0xbfa10037, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0685000, - 0x701d0000, 0xe0685100, - 0x701d0100, 0xe0685200, - 0x701d0200, 0xe0685300, + 0x701d0000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, 0x701d0300, 0x807d847d, - 0x8070ff70, 0x00000400, + 0x8070ff70, 0x00000200, 0xbf0a7b7d, 0xbfa2ffef, - 0xb8fb1e06, 0x8b7bc17b, - 0xbfa1000c, 0x847b837b, - 0x807b7d7b, 0xbefe00c1, - 0xbeff0080, 0x7e008700, + 0xbfa00025, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10011, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xe0685000, 0x701d0000, - 0x807d817d, 0x8070ff70, - 0x00000080, 0xbf0a7b7d, - 0xbfa2fff8, 0xbfa00146, - 0xbef4007e, 0x8b75ff7f, - 0x0000ffff, 0x8c75ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x10807fac, - 0xb8f202dc, 0x84729972, - 0x8b6eff7f, 0x04000000, - 0xbfa1003a, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef4306, - 0x8b6fc16f, 0xbfa1002f, - 0x846f866f, 0x846f826f, - 0xbef6006f, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0x857d9972, - 0x8b7d817d, 0xbf06817d, - 0xbefd0080, 0xbfa2000c, - 0xe0500000, 0x781d0000, - 0xbf8903f7, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff5, 0xbfa0000b, - 0xe0500000, 0x781d0000, - 0xbf8903f7, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7d, - 0xbfa2fff5, 0xbef80080, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffef, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000c, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xe0685000, + 0x701d0000, 0x807d817d, + 0x8070ff70, 0x00000080, + 0xbf0a7b7d, 0xbfa2fff8, + 0xbfa00146, 0xbef4007e, + 0x8b75ff7f, 0x0000ffff, + 0x8c75ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x10807fac, 0xb8f202dc, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003a, 0xbefe00c1, 0x857d9972, 0x8b7d817d, 0xbf06817d, 0xbfa20002, 0xbeff0080, 0xbfa00001, 0xbeff00c1, - 0xb8ef3b05, 0x806f816f, - 0x846f826f, 0x857d9972, - 0x8b7d817d, 0xbf06817d, - 0xbfa20024, 0xbef600ff, - 0x01000000, 0xbeee0078, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa1002f, 0x846f866f, + 0x846f826f, 0xbef6006f, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbefd0084, 0xbf0a6f7d, - 0xbfa10050, 0xe0505000, - 0x781d0000, 0xe0505080, - 0x781d0100, 0xe0505100, - 0x781d0200, 0xe0505180, - 0x781d0300, 0xbf8903f7, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807d847d, 0x8078ff78, - 0x00000200, 0xbf0a6f7d, - 0xbfa2ffee, 0xe0505000, - 0x6e1d0000, 0xe0505080, - 0x6e1d0100, 0xe0505100, - 0x6e1d0200, 0xe0505180, - 0x6e1d0300, 0xbf8903f7, - 0xbfa00034, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefd0084, 0xbf0a6f7d, - 0xbfa10012, 0xe0505000, - 0x781d0000, 0xe0505100, - 0x781d0100, 0xe0505200, - 0x781d0200, 0xe0505300, - 0x781d0300, 0xbf8903f7, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807d847d, 0x8078ff78, - 0x00000400, 0xbf0a6f7d, - 0xbfa2ffee, 0xb8ef1e06, - 0x8b6fc16f, 0xbfa1000e, - 0x846f836f, 0x806f7d6f, - 0xbefe00c1, 0xbeff0080, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbefd0080, + 0xbfa2000c, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbfa0000b, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbef80080, 0xbefe00c1, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20002, + 0xbeff0080, 0xbfa00001, + 0xbeff00c1, 0xb8ef3b05, + 0x806f816f, 0x846f826f, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20024, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000200, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10050, 0xe0505000, 0x781d0000, + 0xe0505080, 0x781d0100, + 0xe0505100, 0x781d0200, + 0xe0505180, 0x781d0300, 0xbf8903f7, 0x7e008500, - 0x807d817d, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff7, 0xbeff00c1, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000200, + 0xbf0a6f7d, 0xbfa2ffee, 0xe0505000, 0x6e1d0000, - 0xe0505100, 0x6e1d0100, - 0xe0505200, 0x6e1d0200, - 0xe0505300, 0x6e1d0300, - 0xbf8903f7, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef600ff, - 0x01000000, 0xbefd00ff, - 0x0000006c, 0x80f89078, - 0xf428403a, 0xf0000000, - 0xbf89fc07, 0x80fd847d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0x80f8a078, - 0xf42c403a, 0xf0000000, - 0xbf89fc07, 0x80fd887d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0x80f8c078, - 0xf430403a, 0xf0000000, - 0xbf89fc07, 0x80fd907d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0xbe884308, - 0xbe8a430a, 0xbe8c430c, - 0xbe8e430e, 0xbf06807d, - 0xbfa1fff0, 0xb980f801, - 0x00000000, 0xbfbd0000, + 0xe0505080, 0x6e1d0100, + 0xe0505100, 0x6e1d0200, + 0xe0505180, 0x6e1d0300, + 0xbf8903f7, 0xbfa00034, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10012, + 0xe0505000, 0x781d0000, + 0xe0505100, 0x781d0100, + 0xe0505200, 0x781d0200, + 0xe0505300, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000400, + 0xbf0a6f7d, 0xbfa2ffee, + 0xb8ef1e06, 0x8b6fc16f, + 0xbfa1000e, 0x846f836f, + 0x806f7d6f, 0xbefe00c1, + 0xbeff0080, 0xe0505000, + 0x781d0000, 0xbf8903f7, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff7, + 0xbeff00c1, 0xe0505000, + 0x6e1d0000, 0xe0505100, + 0x6e1d0100, 0xe0505200, + 0x6e1d0200, 0xe0505300, + 0x6e1d0300, 0xbf8903f7, 0xb8f83b05, 0x80788178, 0xbf0d9972, 0xbfa20002, 0x84788978, 0xbfa00001, 0x84788a78, 0xb8ee1e06, 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, 0xbef600ff, 0x01000000, - 0xf4205bfa, 0xf0000000, - 0x80788478, 0xf4205b3a, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf428403a, + 0xf0000000, 0xbf89fc07, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf42c403a, + 0xf0000000, 0xbf89fc07, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf430403a, + 0xf0000000, 0xbf89fc07, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xbfbd0000, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef600ff, + 0x01000000, 0xf4205bfa, 0xf0000000, 0x80788478, - 0xf4205b7a, 0xf0000000, - 0x80788478, 0xf4205c3a, + 0xf4205b3a, 0xf0000000, + 0x80788478, 0xf4205b7a, 0xf0000000, 0x80788478, - 0xf4205c7a, 0xf0000000, - 0x80788478, 0xf4205eba, + 0xf4205c3a, 0xf0000000, + 0x80788478, 0xf4205c7a, 0xf0000000, 0x80788478, - 0xf4205efa, 0xf0000000, - 0x80788478, 0xf4205e7a, + 0xf4205eba, 0xf0000000, + 0x80788478, 0xf4205efa, 0xf0000000, 0x80788478, - 0xf4205cfa, 0xf0000000, - 0x80788478, 0xf4205bba, + 0xf4205e7a, 0xf0000000, + 0x80788478, 0xf4205cfa, 0xf0000000, 0x80788478, - 0xbf89fc07, 0xb96ef814, 0xf4205bba, 0xf0000000, 0x80788478, 0xbf89fc07, - 0xb96ef815, 0xbefd006f, - 0xbefe0070, 0xbeff0071, - 0x8b6f7bff, 0x000003ff, - 0xb96f4803, 0x8b6f7bff, - 0xfffff800, 0x856f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee3b05, 0x806e816e, - 0xbf0d9972, 0xbfa20002, - 0x846e896e, 0xbfa00001, - 0x846e8a6e, 0xb8ef1e06, - 0x846f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x8b6fff6f, 0x0000ffff, - 0xf4085c37, 0xf8000050, - 0xf4085d37, 0xf8000060, - 0xf4005e77, 0xf8000074, - 0xbf89fc07, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb8eef802, - 0xbf0d866e, 0xbfa20002, - 0xb97af802, 0xbe80486c, - 0xb97af802, 0xbe804a6c, - 0xbfb00000, 0xbf9f0000, + 0xb96ef814, 0xf4205bba, + 0xf0000000, 0x80788478, + 0xbf89fc07, 0xb96ef815, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0x8b6f7bff, + 0x000003ff, 0xb96f4803, + 0x8b6f7bff, 0xfffff800, + 0x856f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4085c37, + 0xf8000050, 0xf4085d37, + 0xf8000060, 0xf4005e77, + 0xf8000074, 0xbf89fc07, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb8eef802, 0xbf0d866e, + 0xbfa20002, 0xb97af802, + 0xbe80486c, 0xb97af802, + 0xbe804a6c, 0xbfb00000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 0f81670f6f9c..8b92c33c2a7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -186,6 +186,12 @@ L_SKIP_RESTORE: s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) #if SW_SA_TRAP + // If ttmp1[30] is set then issue s_barrier to unblock dependent waves. + s_bitcmp1_b32 s_save_pc_hi, 30 + s_cbranch_scc0 L_TRAP_NO_BARRIER + s_barrier + +L_TRAP_NO_BARRIER: // If ttmp1[31] is set then trap may occur early. // Spin wait until SAVECTX exception is raised. s_bitcmp1_b32 s_save_pc_hi, 31 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5feaba6a77de..6d291aa6386b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1950,7 +1950,7 @@ static int criu_checkpoint(struct file *filep, { int ret; uint32_t num_devices, num_bos, num_objects; - uint64_t priv_size, priv_offset = 0; + uint64_t priv_size, priv_offset = 0, bo_priv_offset; if (!args->devices || !args->bos || !args->priv_data) return -EINVAL; @@ -1994,38 +1994,34 @@ static int criu_checkpoint(struct file *filep, if (ret) goto exit_unlock; - ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos, - (uint8_t __user *)args->priv_data, &priv_offset); - if (ret) - goto exit_unlock; + /* Leave room for BOs in the private data. They need to be restored + * before events, but we checkpoint them last to simplify the error + * handling. + */ + bo_priv_offset = priv_offset; + priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data); if (num_objects) { ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) - goto close_bo_fds; + goto exit_unlock; ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) - goto close_bo_fds; + goto exit_unlock; ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) - goto close_bo_fds; + goto exit_unlock; } -close_bo_fds: - if (ret) { - /* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */ - uint32_t i; - struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos; - - for (i = 0; i < num_bos; i++) { - if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) - close_fd(bo_buckets[i].dmabuf_fd); - } - } + /* This must be the last thing in this function that can fail. + * Otherwise we leak dmabuf file descriptors. + */ + ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos, + (uint8_t __user *)args->priv_data, &bo_priv_offset); exit_unlock: mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd5f8b219bf9..3251f4783ba1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_gpu_processor_id( return current_id; } -/* Static table to describe GPU Cache information */ -struct kfd_gpu_cache_info { - uint32_t cache_size; - uint32_t cache_level; - uint32_t flags; - /* Indicates how many Compute Units share this cache - * within a SA. Value = 1 indicates the cache is not shared - */ - uint32_t num_cu_shared; -}; static struct kfd_gpu_cache_info kaveri_cache_info[] = { { @@ -795,6 +785,150 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { }, }; +static struct kfd_gpu_cache_info gfx1037_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 256, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, +}; + +static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 256, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, +}; + +static struct kfd_gpu_cache_info dummy_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache per SQC */ + .cache_size = 32, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache per SQC */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* GL1 Data Cache per SA */ + .cache_size = 128, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 6, + }, + { + /* L2 Data Cache per GPU (Total Tex Cache) */ + .cache_size = 2048, + .cache_level = 2, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 6, + }, +}; + static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { @@ -975,8 +1109,12 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, props->cachelines_per_tag = cache->lines_per_tag; props->cache_assoc = cache->associativity; props->cache_latency = cache->cache_latency; + memcpy(props->sibling_map, cache->sibling_map, - sizeof(props->sibling_map)); + CRAT_SIBLINGMAP_SIZE); + + /* set the sibling_map_size as 32 for CRAT from ACPI */ + props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) props->cache_type |= HSA_CACHE_TYPE_DATA; @@ -987,7 +1125,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) props->cache_type |= HSA_CACHE_TYPE_HSACU; - dev->cache_count++; dev->node_props.caches_count++; list_add_tail(&props->list, &dev->cache_props); @@ -1195,125 +1332,6 @@ err: return ret; } -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ -static int fill_in_l1_pcache(struct crat_subtype_cache *pcache, - struct kfd_gpu_cache_info *pcache_info, - struct kfd_cu_info *cu_info, - int mem_available, - int cu_bitmask, - int cache_type, unsigned int cu_processor_id, - int cu_block) -{ - unsigned int cu_sibling_map_mask; - int first_active_cu; - - /* First check if enough memory is available */ - if (sizeof(struct crat_subtype_cache) > mem_available) - return -ENOMEM; - - cu_sibling_map_mask = cu_bitmask; - cu_sibling_map_mask >>= cu_block; - cu_sibling_map_mask &= - ((1 << pcache_info[cache_type].num_cu_shared) - 1); - first_active_cu = ffs(cu_sibling_map_mask); - - /* CU could be inactive. In case of shared cache find the first active - * CU. and incase of non-shared cache check if the CU is inactive. If - * inactive active skip it - */ - if (first_active_cu) { - memset(pcache, 0, sizeof(struct crat_subtype_cache)); - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; - pcache->length = sizeof(struct crat_subtype_cache); - pcache->flags = pcache_info[cache_type].flags; - pcache->processor_id_low = cu_processor_id - + (first_active_cu - 1); - pcache->cache_level = pcache_info[cache_type].cache_level; - pcache->cache_size = pcache_info[cache_type].cache_size; - - /* Sibling map is w.r.t processor_id_low, so shift out - * inactive CU - */ - cu_sibling_map_mask = - cu_sibling_map_mask >> (first_active_cu - 1); - - pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[1] = - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[2] = - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[3] = - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - return 0; - } - return 1; -} - -/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ -static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, - struct kfd_gpu_cache_info *pcache_info, - struct kfd_cu_info *cu_info, - int mem_available, - int cache_type, unsigned int cu_processor_id) -{ - unsigned int cu_sibling_map_mask; - int first_active_cu; - int i, j, k; - - /* First check if enough memory is available */ - if (sizeof(struct crat_subtype_cache) > mem_available) - return -ENOMEM; - - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; - cu_sibling_map_mask &= - ((1 << pcache_info[cache_type].num_cu_shared) - 1); - first_active_cu = ffs(cu_sibling_map_mask); - - /* CU could be inactive. In case of shared cache find the first active - * CU. and incase of non-shared cache check if the CU is inactive. If - * inactive active skip it - */ - if (first_active_cu) { - memset(pcache, 0, sizeof(struct crat_subtype_cache)); - pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; - pcache->length = sizeof(struct crat_subtype_cache); - pcache->flags = pcache_info[cache_type].flags; - pcache->processor_id_low = cu_processor_id - + (first_active_cu - 1); - pcache->cache_level = pcache_info[cache_type].cache_level; - pcache->cache_size = pcache_info[cache_type].cache_size; - - /* Sibling map is w.r.t processor_id_low, so shift out - * inactive CU - */ - cu_sibling_map_mask = - cu_sibling_map_mask >> (first_active_cu - 1); - k = 0; - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; - j++) { - pcache->sibling_map[k] = - (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[k+1] = - (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[k+2] = - (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[k+3] = - (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - k += 4; - cu_sibling_map_mask = - cu_info->cu_bitmap[i % 4][j + i / 4]; - cu_sibling_map_mask &= ( - (1 << pcache_info[cache_type].num_cu_shared) - - 1); - } - } - return 0; - } - return 1; -} - -#define KFD_MAX_CACHE_TYPES 6 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, struct kfd_gpu_cache_info *pcache_info) @@ -1387,222 +1405,134 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, return i; } -/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info - * tables - * - * @kdev - [IN] GPU device - * @gpu_processor_id - [IN] GPU processor ID to which these caches - * associate - * @available_size - [IN] Amount of memory available in pcache - * @cu_info - [IN] Compute Unit info obtained from KGD - * @pcache - [OUT] memory into which cache data is to be filled in. - * @size_filled - [OUT] amount of data used up in pcache. - * @num_of_entries - [OUT] number of caches added - */ -static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, - int gpu_processor_id, - int available_size, - struct kfd_cu_info *cu_info, - struct crat_subtype_cache *pcache, - int *size_filled, - int *num_of_entries) +int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) { - struct kfd_gpu_cache_info *pcache_info; - struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; int num_of_cache_types = 0; - int i, j, k; - int ct = 0; - int mem_available = available_size; - unsigned int cu_processor_id; - int ret; - unsigned int num_cu_shared; switch (kdev->adev->asic_type) { case CHIP_KAVERI: - pcache_info = kaveri_cache_info; + *pcache_info = kaveri_cache_info; num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); break; case CHIP_HAWAII: - pcache_info = hawaii_cache_info; + *pcache_info = hawaii_cache_info; num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); break; case CHIP_CARRIZO: - pcache_info = carrizo_cache_info; + *pcache_info = carrizo_cache_info; num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); break; case CHIP_TONGA: - pcache_info = tonga_cache_info; + *pcache_info = tonga_cache_info; num_of_cache_types = ARRAY_SIZE(tonga_cache_info); break; case CHIP_FIJI: - pcache_info = fiji_cache_info; + *pcache_info = fiji_cache_info; num_of_cache_types = ARRAY_SIZE(fiji_cache_info); break; case CHIP_POLARIS10: - pcache_info = polaris10_cache_info; + *pcache_info = polaris10_cache_info; num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); break; case CHIP_POLARIS11: - pcache_info = polaris11_cache_info; + *pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; case CHIP_POLARIS12: - pcache_info = polaris12_cache_info; + *pcache_info = polaris12_cache_info; num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); break; case CHIP_VEGAM: - pcache_info = vegam_cache_info; + *pcache_info = vegam_cache_info; num_of_cache_types = ARRAY_SIZE(vegam_cache_info); break; default: switch (KFD_GC_VERSION(kdev)) { case IP_VERSION(9, 0, 1): - pcache_info = vega10_cache_info; + *pcache_info = vega10_cache_info; num_of_cache_types = ARRAY_SIZE(vega10_cache_info); break; case IP_VERSION(9, 2, 1): - pcache_info = vega12_cache_info; + *pcache_info = vega12_cache_info; num_of_cache_types = ARRAY_SIZE(vega12_cache_info); break; case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): - pcache_info = vega20_cache_info; + *pcache_info = vega20_cache_info; num_of_cache_types = ARRAY_SIZE(vega20_cache_info); break; case IP_VERSION(9, 4, 2): - pcache_info = aldebaran_cache_info; + *pcache_info = aldebaran_cache_info; num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); break; case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): - pcache_info = raven_cache_info; + *pcache_info = raven_cache_info; num_of_cache_types = ARRAY_SIZE(raven_cache_info); break; case IP_VERSION(9, 3, 0): - pcache_info = renoir_cache_info; + *pcache_info = renoir_cache_info; num_of_cache_types = ARRAY_SIZE(renoir_cache_info); break; case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): case IP_VERSION(10, 1, 4): - pcache_info = navi10_cache_info; + *pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; case IP_VERSION(10, 1, 1): - pcache_info = navi14_cache_info; + *pcache_info = navi14_cache_info; num_of_cache_types = ARRAY_SIZE(navi14_cache_info); break; case IP_VERSION(10, 3, 0): - pcache_info = sienna_cichlid_cache_info; + *pcache_info = sienna_cichlid_cache_info; num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); break; case IP_VERSION(10, 3, 2): - pcache_info = navy_flounder_cache_info; + *pcache_info = navy_flounder_cache_info; num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); break; case IP_VERSION(10, 3, 4): - pcache_info = dimgrey_cavefish_cache_info; + *pcache_info = dimgrey_cavefish_cache_info; num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); break; case IP_VERSION(10, 3, 1): - pcache_info = vangogh_cache_info; + *pcache_info = vangogh_cache_info; num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); break; case IP_VERSION(10, 3, 5): - pcache_info = beige_goby_cache_info; + *pcache_info = beige_goby_cache_info; num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); break; case IP_VERSION(10, 3, 3): - case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */ - case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */ - pcache_info = yellow_carp_cache_info; + *pcache_info = yellow_carp_cache_info; num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); break; + case IP_VERSION(10, 3, 6): + *pcache_info = gc_10_3_6_cache_info; + num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); + break; + case IP_VERSION(10, 3, 7): + *pcache_info = gfx1037_cache_info; + num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info); + break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - pcache_info = cache_info; + case IP_VERSION(11, 0, 4): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); break; default: - return -EINVAL; - } - } - - *size_filled = 0; - *num_of_entries = 0; - - /* For each type of cache listed in the kfd_gpu_cache_info table, - * go through all available Compute Units. - * The [i,j,k] loop will - * if kfd_gpu_cache_info.num_cu_shared = 1 - * will parse through all available CU - * If (kfd_gpu_cache_info.num_cu_shared != 1) - * then it will consider only one CU from - * the shared unit - */ - - for (ct = 0; ct < num_of_cache_types; ct++) { - cu_processor_id = gpu_processor_id; - if (pcache_info[ct].cache_level == 1) { - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { - for (k = 0; k < cu_info->num_cu_per_sh; - k += pcache_info[ct].num_cu_shared) { - ret = fill_in_l1_pcache(pcache, - pcache_info, - cu_info, - mem_available, - cu_info->cu_bitmap[i % 4][j + i / 4], - ct, - cu_processor_id, - k); - - if (ret < 0) + *pcache_info = dummy_cache_info; + num_of_cache_types = ARRAY_SIZE(dummy_cache_info); + pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); break; - - if (!ret) { - pcache++; - (*num_of_entries)++; - mem_available -= sizeof(*pcache); - (*size_filled) += sizeof(*pcache); - } - - /* Move to next CU block */ - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= - cu_info->num_cu_per_sh) ? - pcache_info[ct].num_cu_shared : - (cu_info->num_cu_per_sh - k); - cu_processor_id += num_cu_shared; } - } - } - } else { - ret = fill_in_l2_l3_pcache(pcache, - pcache_info, - cu_info, - mem_available, - ct, - cu_processor_id); - - if (ret < 0) - break; - - if (!ret) { - pcache++; - (*num_of_entries)++; - mem_available -= sizeof(*pcache); - (*size_filled) += sizeof(*pcache); - } - } } - - pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); - - return 0; + return num_of_cache_types; } static bool kfd_ignore_crat(void) @@ -1961,8 +1891,8 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) struct acpi_table_header *table_header = NULL; struct acpi_subtable_header *sub_header = NULL; unsigned long table_end, subtable_len; - u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 | - pci_dev_id(kdev->pdev); + u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | + pci_dev_id(kdev->adev->pdev); u32 bdf; acpi_status status; struct acpi_srat_cpu_affinity *cpu; @@ -2037,7 +1967,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) numa_node = 0; if (numa_node != NUMA_NO_NODE) - set_dev_node(&kdev->pdev->dev, numa_node); + set_dev_node(&kdev->adev->pdev->dev, numa_node); } #endif @@ -2098,14 +2028,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->proximity_domain_from = proximity_domain; #ifdef CONFIG_ACPI_NUMA - if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) kfd_find_numa_node_in_srat(kdev); #endif #ifdef CONFIG_NUMA - if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) sub_type_hdr->proximity_domain_to = 0; else - sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; + sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; #else sub_type_hdr->proximity_domain_to = 0; #endif @@ -2161,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct kfd_cu_info cu_info; int avail_size = *size; uint32_t total_num_of_cu; - int num_of_cache_entries = 0; - int cache_mem_filled = 0; uint32_t nid = 0; int ret = 0; @@ -2263,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, crat_table->length += sizeof(struct crat_subtype_memory); crat_table->total_entries++; - /* TODO: Fill in cache information. This information is NOT readily - * available in KGD - */ - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length); - ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, - avail_size, - &cu_info, - (struct crat_subtype_cache *)sub_type_hdr, - &cache_mem_filled, - &num_of_cache_entries); - - if (ret < 0) - return ret; - - crat_table->length += cache_mem_filled; - crat_table->total_entries += num_of_cache_entries; - avail_size -= cache_mem_filled; - /* Fill in Subtype: IO_LINKS * Only direct links are added here which is Link from GPU to * its NUMA node. Indirect links are added by userspace. */ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - cache_mem_filled); + sub_type_hdr->length); ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 482ba84a728d..8d1e8ba58dee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -29,11 +29,10 @@ #pragma pack(1) /* - * 4CC signature values for the CRAT and CDIT ACPI tables + * 4CC signature value for the CRAT ACPI table */ #define CRAT_SIGNATURE "CRAT" -#define CDIT_SIGNATURE "CDIT" /* * Component Resource Association Table (CRAT) @@ -292,31 +291,22 @@ struct crat_subtype_generic { uint32_t flags; }; -/* - * Component Locality Distance Information Table (CDIT) - */ -#define CDIT_OEMID_LENGTH 6 -#define CDIT_OEMTABLEID_LENGTH 8 - -struct cdit_header { - uint32_t signature; - uint32_t length; - uint8_t revision; - uint8_t checksum; - uint8_t oem_id[CDIT_OEMID_LENGTH]; - uint8_t oem_table_id[CDIT_OEMTABLEID_LENGTH]; - uint32_t oem_revision; - uint32_t creator_id; - uint32_t creator_revision; - uint32_t total_entries; - uint16_t num_domains; - uint8_t entry[1]; -}; - #pragma pack() struct kfd_dev; +/* Static table to describe GPU Cache information */ +struct kfd_gpu_cache_info { + uint32_t cache_size; + uint32_t cache_level; + uint32_t flags; + /* Indicates how many Compute Units share this cache + * within a SA. Value = 1 indicates the cache is not shared + */ + uint32_t num_cu_shared; +}; +int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info); + int kfd_create_crat_image_acpi(void **crat_image, size_t *size); void kfd_destroy_crat_image(void *crat_image); int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 65a1d4f9004b..b8936340742b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; break; default: @@ -227,7 +228,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) { struct kfd_dev *kfd = NULL; const struct kfd2kgd_calls *f2g = NULL; - struct pci_dev *pdev = adev->pdev; uint32_t gfx_target_version = 0; switch (adev->asic_type) { @@ -395,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &gfx_v11_kfd2kgd; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): gfx_target_version = 110003; f2g = &gfx_v11_kfd2kgd; break; @@ -429,7 +430,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) kfd->adev = adev; kfd_device_info_init(kfd, vf, gfx_target_version); - kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; atomic_set(&kfd->compute_profile, 0); @@ -497,7 +497,10 @@ static int kfd_gws_init(struct kfd_dev *kfd) (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) && kfd->mec2_fw_version >= 0x30) || (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) - && kfd->mec2_fw_version >= 0x28)))) + && kfd->mec2_fw_version >= 0x28) || + (KFD_GC_VERSION(kfd) >= IP_VERSION(10, 3, 0) + && KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0) + && kfd->mec2_fw_version >= 0x6b)))) ret = amdgpu_amdkfd_alloc_gws(kfd->adev, kfd->adev->gds.gws_size, &kfd->gws); @@ -511,12 +514,10 @@ static void kfd_smi_init(struct kfd_dev *dev) } bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size, map_process_packet_size; - kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, @@ -541,7 +542,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { dev_info(kfd_device, "skipped device %x:%x, PCI rejects atomics %d<%d\n", - kfd->pdev->vendor, kfd->pdev->device, + kfd->adev->pdev->vendor, kfd->adev->pdev->device, kfd->mec_fw_version, kfd->device_info.no_atomic_fw_version); return false; @@ -650,8 +651,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_smi_init(kfd); kfd->init_complete = true; - dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, - kfd->pdev->device); + dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, + kfd->adev->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", kfd->dqm->sched_policy); @@ -676,7 +677,7 @@ alloc_gtt_mem_failure: amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", - kfd->pdev->vendor, kfd->pdev->device); + kfd->adev->pdev->vendor, kfd->adev->pdev->device); out: return kfd->init_complete; } @@ -789,7 +790,7 @@ int kgd2kfd_resume_iommu(struct kfd_dev *kfd) if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); + kfd->adev->pdev->vendor, kfd->adev->pdev->device); return err; } @@ -801,7 +802,7 @@ static int kfd_resume(struct kfd_dev *kfd) if (err) dev_err(kfd_device, "Error starting queue manager for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); + kfd->adev->pdev->vendor, kfd->adev->pdev->device); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 83e3ce9f6049..729d26d648af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -506,6 +506,7 @@ int kfd_criu_restore_event(struct file *devkfd, ret = create_other_event(p, ev, &ev_priv->event_id); break; } + mutex_unlock(&p->event_mutex); exit: if (ret) @@ -513,8 +514,6 @@ exit: kfree(ev_priv); - mutex_unlock(&p->event_mutex); - return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index fbd0afe4da42..ec1bf611624e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -49,7 +49,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd) return -ENODEV; iommu_info.flags = 0; - err = amd_iommu_device_info(kfd->pdev, &iommu_info); + err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info); if (err) return err; @@ -71,7 +71,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) return 0; iommu_info.flags = 0; - err = amd_iommu_device_info(kfd->pdev, &iommu_info); + err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info); if (err < 0) { dev_err(kfd_device, "error getting iommu info. is the iommu enabled?\n"); @@ -121,7 +121,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) return -EINVAL; } - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); + err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread); if (!err) pdd->bound = PDD_BOUND; @@ -139,7 +139,8 @@ void kfd_iommu_unbind_process(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) if (p->pdds[i]->bound == PDD_BOUND) - amd_iommu_unbind_pasid(p->pdds[i]->dev->pdev, p->pasid); + amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev, + p->pasid); } /* Callback for process shutdown invoked by the IOMMU driver */ @@ -222,7 +223,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd) continue; } - err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, + err = amd_iommu_bind_pasid(kfd->adev->pdev, p->pasid, p->lead_thread); if (err < 0) { pr_err("Unexpected pasid 0x%x binding failure\n", @@ -282,9 +283,9 @@ void kfd_iommu_suspend(struct kfd_dev *kfd) kfd_unbind_processes_from_device(kfd); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); + amd_iommu_free_device(kfd->adev->pdev); } /** kfd_iommu_resume - Restore IOMMU after resume @@ -302,20 +303,20 @@ int kfd_iommu_resume(struct kfd_dev *kfd) pasid_limit = kfd_get_pasid_limit(); - err = amd_iommu_init_device(kfd->pdev, pasid_limit); + err = amd_iommu_init_device(kfd->adev->pdev, pasid_limit); if (err) return -ENXIO; - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, + amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, iommu_invalid_ppr_cb); err = kfd_bind_processes_to_device(kfd); if (err) { - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL); + amd_iommu_free_device(kfd->adev->pdev); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2797029bd500..10048ce16aea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -28,7 +28,6 @@ #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" -#include "amdgpu_mn.h" #include "amdgpu_res_cursor.h" #include "kfd_priv.h" #include "kfd_svm.h" @@ -65,8 +64,11 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = npages * 8; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_DELAYED, &job); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, + &job); if (r) return r; @@ -89,18 +91,10 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, cpu_addr = &job->ibs[0].ptr[num_dw]; amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - + fence = amdgpu_job_submit(job); dma_fence_put(fence); return r; - -error_free: - amdgpu_job_free(job); - return r; } /** @@ -529,8 +523,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, for (addr = start; addr < end;) { unsigned long next; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) + vma = vma_lookup(mm, addr); + if (!vma) break; next = min(vma->vm_end, end); @@ -798,8 +792,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, for (addr = start; addr < end;) { unsigned long next; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) { + vma = vma_lookup(mm, addr); + if (!vma) { pr_debug("failed to find vma for prange %p\n", prange); r = -EFAULT; break; @@ -973,12 +967,10 @@ out_unlock_prange: out_unlock_svms: mutex_unlock(&p->svms.lock); out_unref_process: + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); kfd_unref_process(p); out_mmput: mmput(mm); - - pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); - return r ? VM_FAULT_SIGBUS : 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h deleted file mode 100644 index f9cd28690151..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h +++ /dev/null @@ -1,291 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Copyright 2014-2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef KFD_PM4_HEADERS_DIQ_H_ -#define KFD_PM4_HEADERS_DIQ_H_ - -/*--------------------_INDIRECT_BUFFER-------------------- */ - -#ifndef _PM4__INDIRECT_BUFFER_DEFINED -#define _PM4__INDIRECT_BUFFER_DEFINED -enum _INDIRECT_BUFFER_cache_policy_enum { - cache_policy___indirect_buffer__lru = 0, - cache_policy___indirect_buffer__stream = 1, - cache_policy___indirect_buffer__bypass = 2 -}; - -enum { - IT_INDIRECT_BUFFER_PASID = 0x5C -}; - -struct pm4__indirect_buffer_pasid { - union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int reserved1:2; - unsigned int ib_base_lo:30; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int ib_base_hi:16; - unsigned int reserved2:16; - } bitfields3; - unsigned int ordinal3; - }; - - union { - unsigned int control; - unsigned int ordinal4; - }; - - union { - struct { - unsigned int pasid:10; - unsigned int reserved4:22; - } bitfields5; - unsigned int ordinal5; - }; - -}; - -#endif - -/*--------------------_RELEASE_MEM-------------------- */ - -#ifndef _PM4__RELEASE_MEM_DEFINED -#define _PM4__RELEASE_MEM_DEFINED -enum _RELEASE_MEM_event_index_enum { - event_index___release_mem__end_of_pipe = 5, - event_index___release_mem__shader_done = 6 -}; - -enum _RELEASE_MEM_cache_policy_enum { - cache_policy___release_mem__lru = 0, - cache_policy___release_mem__stream = 1, - cache_policy___release_mem__bypass = 2 -}; - -enum _RELEASE_MEM_dst_sel_enum { - dst_sel___release_mem__memory_controller = 0, - dst_sel___release_mem__tc_l2 = 1, - dst_sel___release_mem__queue_write_pointer_register = 2, - dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 -}; - -enum _RELEASE_MEM_int_sel_enum { - int_sel___release_mem__none = 0, - int_sel___release_mem__send_interrupt_only = 1, - int_sel___release_mem__send_interrupt_after_write_confirm = 2, - int_sel___release_mem__send_data_after_write_confirm = 3 -}; - -enum _RELEASE_MEM_data_sel_enum { - data_sel___release_mem__none = 0, - data_sel___release_mem__send_32_bit_low = 1, - data_sel___release_mem__send_64_bit_data = 2, - data_sel___release_mem__send_gpu_clock_counter = 3, - data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, - data_sel___release_mem__store_gds_data_to_memory = 5 -}; - -struct pm4__release_mem { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int event_type:6; - unsigned int reserved1:2; - enum _RELEASE_MEM_event_index_enum event_index:4; - unsigned int tcl1_vol_action_ena:1; - unsigned int tc_vol_action_ena:1; - unsigned int reserved2:1; - unsigned int tc_wb_action_ena:1; - unsigned int tcl1_action_ena:1; - unsigned int tc_action_ena:1; - unsigned int reserved3:6; - unsigned int atc:1; - enum _RELEASE_MEM_cache_policy_enum cache_policy:2; - unsigned int reserved4:5; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int reserved5:16; - enum _RELEASE_MEM_dst_sel_enum dst_sel:2; - unsigned int reserved6:6; - enum _RELEASE_MEM_int_sel_enum int_sel:3; - unsigned int reserved7:2; - enum _RELEASE_MEM_data_sel_enum data_sel:3; - } bitfields3; - unsigned int ordinal3; - }; - - union { - struct { - unsigned int reserved8:2; - unsigned int address_lo_32b:30; - } bitfields4; - struct { - unsigned int reserved9:3; - unsigned int address_lo_64b:29; - } bitfields5; - unsigned int ordinal4; - }; - - unsigned int address_hi; - - unsigned int data_lo; - - unsigned int data_hi; - -}; -#endif - - -/*--------------------_SET_CONFIG_REG-------------------- */ - -#ifndef _PM4__SET_CONFIG_REG_DEFINED -#define _PM4__SET_CONFIG_REG_DEFINED - -struct pm4__set_config_reg { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - unsigned int reg_offset:16; - unsigned int reserved1:7; - unsigned int vmid_shift:5; - unsigned int insert_vmid:1; - unsigned int reserved2:3; - } bitfields2; - unsigned int ordinal2; - }; - - unsigned int reg_data[1]; /*1..N of these fields */ - -}; -#endif - -/*--------------------_WAIT_REG_MEM-------------------- */ - -#ifndef _PM4__WAIT_REG_MEM_DEFINED -#define _PM4__WAIT_REG_MEM_DEFINED -enum _WAIT_REG_MEM_function_enum { - function___wait_reg_mem__always_pass = 0, - function___wait_reg_mem__less_than_ref_value = 1, - function___wait_reg_mem__less_than_equal_to_the_ref_value = 2, - function___wait_reg_mem__equal_to_the_reference_value = 3, - function___wait_reg_mem__not_equal_reference_value = 4, - function___wait_reg_mem__greater_than_or_equal_reference_value = 5, - function___wait_reg_mem__greater_than_reference_value = 6, - function___wait_reg_mem__reserved = 7 -}; - -enum _WAIT_REG_MEM_mem_space_enum { - mem_space___wait_reg_mem__register_space = 0, - mem_space___wait_reg_mem__memory_space = 1 -}; - -enum _WAIT_REG_MEM_operation_enum { - operation___wait_reg_mem__wait_reg_mem = 0, - operation___wait_reg_mem__wr_wait_wr_reg = 1 -}; - -struct pm4__wait_reg_mem { - union { - union PM4_MES_TYPE_3_HEADER header; /*header */ - unsigned int ordinal1; - }; - - union { - struct { - enum _WAIT_REG_MEM_function_enum function:3; - unsigned int reserved1:1; - enum _WAIT_REG_MEM_mem_space_enum mem_space:2; - enum _WAIT_REG_MEM_operation_enum operation:2; - unsigned int reserved2:24; - } bitfields2; - unsigned int ordinal2; - }; - - union { - struct { - unsigned int reserved3:2; - unsigned int memory_poll_addr_lo:30; - } bitfields3; - struct { - unsigned int register_poll_addr:16; - unsigned int reserved4:16; - } bitfields4; - struct { - unsigned int register_write_addr:16; - unsigned int reserved5:16; - } bitfields5; - unsigned int ordinal3; - }; - - union { - struct { - unsigned int poll_address_hi:16; - unsigned int reserved6:16; - } bitfields6; - struct { - unsigned int register_write_addr:16; - unsigned int reserved7:16; - } bitfields7; - unsigned int ordinal4; - }; - - unsigned int reference; - - unsigned int mask; - - union { - struct { - unsigned int poll_interval:16; - unsigned int reserved8:16; - } bitfields8; - unsigned int ordinal7; - }; - -}; -#endif - - -#endif /* KFD_PM4_HEADERS_DIQ_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bf610e3b683b..552c3ac85a13 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -254,8 +254,6 @@ struct kfd_dev { struct amdgpu_device *adev; struct kfd_device_info device_info; - struct pci_dev *pdev; - struct drm_device *ddev; unsigned int id; /* topology stub index */ @@ -1365,7 +1363,7 @@ void kfd_dec_compute_active(struct kfd_dev *dev); static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) { #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) - struct drm_device *ddev = kfd->ddev; + struct drm_device *ddev = adev_to_drm(kfd->adev); return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 951b63677248..a26257171ab7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1050,8 +1050,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) * for auto suspend */ if (pdd->runtime_inuse) { - pm_runtime_mark_last_busy(pdd->dev->ddev->dev); - pm_runtime_put_autosuspend(pdd->dev->ddev->dev); + pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev); pdd->runtime_inuse = false; } @@ -1633,9 +1633,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, * pdd is destroyed. */ if (!pdd->runtime_inuse) { - err = pm_runtime_get_sync(dev->ddev->dev); + err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev); if (err < 0) { - pm_runtime_put_autosuspend(dev->ddev->dev); + pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); return ERR_PTR(err); } } @@ -1655,8 +1655,8 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, out: /* balance runpm reference count and exit with error */ if (!pdd->runtime_inuse) { - pm_runtime_mark_last_busy(dev->ddev->dev); - pm_runtime_put_autosuspend(dev->ddev->dev); + pm_runtime_mark_last_busy(adev_to_drm(dev->adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); } return ERR_PTR(err); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 64fdf63093a0..814f99888ab1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -26,7 +26,7 @@ #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" -#include "amdgpu_mn.h" +#include "amdgpu_hmm.h" #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "kfd_priv.h" @@ -259,7 +259,7 @@ void svm_range_free_dma_mappings(struct svm_range *prange) pr_debug("failed to find device idx %d\n", gpuidx); continue; } - dev = &pdd->dev->pdev->dev; + dev = &pdd->dev->adev->pdev->dev; svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); kvfree(dma_addr); prange->dma_addr[gpuidx] = NULL; @@ -1586,8 +1586,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, unsigned long npages; bool readonly; - vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) { + vma = vma_lookup(mm, addr); + if (!vma) { r = -EFAULT; goto unreserve_out; } @@ -1596,9 +1596,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm, next = min(vma->vm_end, end); npages = (next - addr) >> PAGE_SHIFT; WRITE_ONCE(p->svms.faulting_task, current); - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, - addr, npages, &hmm_range, - readonly, true, owner); + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, + readonly, owner, NULL, + &hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) { pr_debug("failed %d to get svm range pages\n", r); @@ -2542,8 +2542,8 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, struct interval_tree_node *node; unsigned long start_limit, end_limit; - vma = find_vma(p->mm, addr << PAGE_SHIFT); - if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + vma = vma_lookup(p->mm, addr << PAGE_SHIFT); + if (!vma) { pr_debug("VMA does not exist in address [0x%llx]\n", addr); return -EFAULT; } @@ -2871,8 +2871,8 @@ retry_write_locked: /* __do_munmap removed VMA, return success as we are handling stale * retry fault. */ - vma = find_vma(mm, addr << PAGE_SHIFT); - if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + vma = vma_lookup(mm, addr << PAGE_SHIFT); + if (!vma) { pr_debug("address 0x%llx VMA is removed\n", addr); r = 0; goto out_unlock_range; @@ -3152,9 +3152,8 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size) start <<= PAGE_SHIFT; end = start + (size << PAGE_SHIFT); do { - vma = find_vma(p->mm, start); - if (!vma || start < vma->vm_start || - (vma->vm_flags & device_vma)) + vma = vma_lookup(p->mm, start); + if (!vma || (vma->vm_flags & device_vma)) return -EFAULT; start = min(end, vma->vm_end); } while (start < end); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3f0a4a415907..bceb1a5b2518 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -115,7 +115,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->pdev == pdev) { + if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { device = top_dev->gpu; break; } @@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, /* Making sure that the buffer is an empty string */ buffer[0] = 0; - cache = container_of(attr, struct kfd_cache_properties, attr); if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) return -EPERM; @@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); - for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) + for (i = 0; i < cache->sibling_map_size; i++) for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) /* Check each bit */ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", - (cache->sibling_map[i] >> j) & 1); + (cache->sibling_map[i] >> j) & 1); /* Replace the last "," with end of line */ buffer[offs-1] = '\n'; @@ -1169,13 +1169,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) local_mem_size = gpu->local_mem_info.local_mem_size_private + gpu->local_mem_info.local_mem_size_public; - - buf[0] = gpu->pdev->devfn; - buf[1] = gpu->pdev->subsystem_vendor | - (gpu->pdev->subsystem_device << 16); - buf[2] = pci_domain_nr(gpu->pdev->bus); - buf[3] = gpu->pdev->device; - buf[4] = gpu->pdev->bus->number; + buf[0] = gpu->adev->pdev->devfn; + buf[1] = gpu->adev->pdev->subsystem_vendor | + (gpu->adev->pdev->subsystem_device << 16); + buf[2] = pci_domain_nr(gpu->adev->pdev->bus); + buf[3] = gpu->adev->pdev->device; + buf[4] = gpu->adev->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); buf[6] = upper_32_bits(local_mem_size); @@ -1198,7 +1197,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_iolink_properties *iolink; struct kfd_iolink_properties *p2plink; - down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) { /* Discrete GPUs need their own topology device list * entries. Don't assign them to CPU/APU nodes. @@ -1222,7 +1220,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) break; } } - up_write(&topology_lock); return out_dev; } @@ -1269,7 +1266,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, if (target_gpu_dev) { uint32_t cap; - pcie_capability_read_dword(target_gpu_dev->gpu->pdev, + pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, PCI_EXP_DEVCAP2, &cap); if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | @@ -1593,21 +1590,290 @@ out: return ret; } + +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int cu_bitmask, + int cache_type, unsigned int cu_processor_id, + int cu_block) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + struct kfd_cache_properties *pcache = NULL; + + cu_sibling_map_mask = cu_bitmask; + cu_sibling_map_mask >>= cu_block; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + pcache = kfd_alloc_struct(pcache); + if (!pcache) + return -ENOMEM; + + memset(pcache, 0, sizeof(struct kfd_cache_properties)); + pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_DATA; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_CPU; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_HSACU; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = + cu_sibling_map_mask >> (first_active_cu - 1); + + pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[1] = + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[2] = + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[3] = + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + + pcache->sibling_map_size = 4; + *props_ext = pcache; + + return 0; + } + return 1; +} + +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int cache_type, unsigned int cu_processor_id) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + int i, j, k; + struct kfd_cache_properties *pcache = NULL; + + cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + cu_sibling_map_mask &= + ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + pcache = kfd_alloc_struct(pcache); + if (!pcache) + return -ENOMEM; + + memset(pcache, 0, sizeof(struct kfd_cache_properties)); + pcache->processor_id_low = cu_processor_id + + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_DATA; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_CPU; + if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + pcache->cache_type |= HSA_CACHE_TYPE_HSACU; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); + k = 0; + + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; + + cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + } + } + pcache->sibling_map_size = k; + *props_ext = pcache; + return 0; + } + return 1; +} + +#define KFD_MAX_CACHE_TYPES 6 + +/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info + * tables + */ +static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) +{ + struct kfd_gpu_cache_info *pcache_info = NULL; + int i, j, k; + int ct = 0; + unsigned int cu_processor_id; + int ret; + unsigned int num_cu_shared; + struct kfd_cu_info cu_info; + struct kfd_cu_info *pcu_info; + int gpu_processor_id; + struct kfd_cache_properties *props_ext; + int num_of_entries = 0; + int num_of_cache_types = 0; + struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; + + amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); + pcu_info = &cu_info; + + gpu_processor_id = dev->node_props.simd_id_base; + + pcache_info = cache_info; + num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info); + if (!num_of_cache_types) { + pr_warn("no cache info found\n"); + return; + } + + /* For each type of cache listed in the kfd_gpu_cache_info table, + * go through all available Compute Units. + * The [i,j,k] loop will + * if kfd_gpu_cache_info.num_cu_shared = 1 + * will parse through all available CU + * If (kfd_gpu_cache_info.num_cu_shared != 1) + * then it will consider only one CU from + * the shared unit + */ + for (ct = 0; ct < num_of_cache_types; ct++) { + cu_processor_id = gpu_processor_id; + if (pcache_info[ct].cache_level == 1) { + for (i = 0; i < pcu_info->num_shader_engines; i++) { + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { + + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, + pcu_info->cu_bitmap[i % 4][j + i / 4], ct, + cu_processor_id, k); + + if (ret < 0) + break; + + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } + + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + pcu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (pcu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; + } + } + } + } else { + ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, + pcu_info, ct, cu_processor_id); + + if (ret < 0) + break; + + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } + } + } + dev->node_props.caches_count += num_of_entries; + pr_debug("Added [%d] GPU cache entries\n", num_of_entries); +} + +static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id, + struct kfd_topology_device **dev) +{ + int proximity_domain = ++topology_crat_proximity_domain; + struct list_head temp_topology_device_list; + void *crat_image = NULL; + size_t image_size = 0; + int res; + + res = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_GPU, gpu, + proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + topology_crat_proximity_domain--; + goto err; + } + + INIT_LIST_HEAD(&temp_topology_device_list); + + res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + topology_crat_proximity_domain--; + goto err; + } + + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + + *dev = kfd_assign_gpu(gpu); + if (WARN_ON(!*dev)) { + res = -ENODEV; + goto err; + } + + /* Fill the cache affinity information here for the GPUs + * using VCRAT + */ + kfd_fill_cache_non_crat_info(*dev, gpu); + + /* Update the SYSFS tree, since we added another topology + * device + */ + res = kfd_topology_update_sysfs(); + if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + +err: + kfd_destroy_crat_image(crat_image); + return res; +} + int kfd_topology_add_device(struct kfd_dev *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; struct kfd_cu_info cu_info; int res = 0; - struct list_head temp_topology_device_list; - void *crat_image = NULL; - size_t image_size = 0; - int proximity_domain; int i; const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; - INIT_LIST_HEAD(&temp_topology_device_list); - gpu_id = kfd_generate_gpu_id(gpu); pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); @@ -1617,50 +1883,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * CRAT to create a new topology device. Once created assign the gpu to * that topology device */ + down_write(&topology_lock); dev = kfd_assign_gpu(gpu); - if (!dev) { - down_write(&topology_lock); - proximity_domain = ++topology_crat_proximity_domain; - - res = kfd_create_crat_image_virtual(&crat_image, &image_size, - COMPUTE_UNIT_GPU, gpu, - proximity_domain); - if (res) { - pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", - gpu_id); - topology_crat_proximity_domain--; - return res; - } - res = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, - proximity_domain); - if (res) { - pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", - gpu_id); - topology_crat_proximity_domain--; - goto err; - } - - kfd_topology_update_device_list(&temp_topology_device_list, - &topology_device_list); - - /* Update the SYSFS tree, since we added another topology - * device - */ - res = kfd_topology_update_sysfs(); - up_write(&topology_lock); - - if (!res) - sys_props.generation_count++; - else - pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", - gpu_id, res); - dev = kfd_assign_gpu(gpu); - if (WARN_ON(!dev)) { - res = -ENODEV; - goto err; - } - } + if (!dev) + res = kfd_topology_add_device_locked(gpu, gpu_id, &dev); + up_write(&topology_lock); + if (res) + return res; dev->gpu_id = gpu_id; gpu->id = gpu_id; @@ -1688,13 +1917,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) cu_info.num_shader_arrays_per_engine; dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; - dev->node_props.vendor_id = gpu->pdev->vendor; - dev->node_props.device_id = gpu->pdev->device; + dev->node_props.vendor_id = gpu->adev->pdev->vendor; + dev->node_props.device_id = gpu->adev->pdev->device; dev->node_props.capability |= ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); - dev->node_props.location_id = pci_dev_id(gpu->pdev); - dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); + dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); + dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = @@ -1783,11 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) kfd_debug_print_topology(); - if (!res) - kfd_notify_gpu_change(gpu_id, 1); -err: - kfd_destroy_crat_image(crat_image); - return res; + kfd_notify_gpu_change(gpu_id, 1); + + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 9f6c949186c1..fca30d00a9bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -80,6 +80,8 @@ struct kfd_mem_properties { struct attribute attr; }; +#define CACHE_SIBLINGMAP_SIZE 64 + struct kfd_cache_properties { struct list_head list; uint32_t processor_id_low; @@ -90,10 +92,11 @@ struct kfd_cache_properties { uint32_t cache_assoc; uint32_t cache_latency; uint32_t cache_type; - uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE]; struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; + uint32_t sibling_map_size; }; struct kfd_iolink_properties { @@ -128,7 +131,6 @@ struct kfd_topology_device { uint32_t proximity_domain; struct kfd_node_properties node_props; struct list_head mem_props; - uint32_t cache_count; struct list_head cache_props; struct list_head io_link_props; struct list_head p2p_link_props; |