0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. From 4deea5f5ad38d8b398355a636b9113150ca9da9f Mon Sep 17 00:00:00 2001
  2. From: Eric Anholt <eric@anholt.net>
  3. Date: Fri, 30 Oct 2015 10:09:02 -0700
  4. Subject: [PATCH] drm/vc4: Add an interface for capturing the GPU state after a
  5. hang.
  6. This can be parsed with vc4-gpu-tools tools for trying to figure out
  7. what was going on.
  8. Signed-off-by: Eric Anholt <eric@anholt.net>
  9. ---
  10. drivers/gpu/drm/vc4/vc4_bo.c | 4 +-
  11. drivers/gpu/drm/vc4/vc4_drv.c | 1 +
  12. drivers/gpu/drm/vc4/vc4_drv.h | 4 +
  13. drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++
  14. include/uapi/drm/vc4_drm.h | 45 ++++++++++
  15. 5 files changed, 237 insertions(+), 2 deletions(-)
  16. --- a/drivers/gpu/drm/vc4/vc4_bo.c
  17. +++ b/drivers/gpu/drm/vc4/vc4_bo.c
  18. @@ -415,8 +415,8 @@ int vc4_mmap(struct file *filp, struct v
  19. gem_obj = vma->vm_private_data;
  20. bo = to_vc4_bo(gem_obj);
  21. - if (bo->validated_shader) {
  22. - DRM_ERROR("mmaping of shader BOs not allowed.\n");
  23. + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
  24. + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
  25. return -EINVAL;
  26. }
  27. --- a/drivers/gpu/drm/vc4/vc4_drv.c
  28. +++ b/drivers/gpu/drm/vc4/vc4_drv.c
  29. @@ -81,6 +81,7 @@ static const struct drm_ioctl_desc vc4_d
  30. DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
  31. DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
  32. DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
  33. + DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY),
  34. };
  35. static struct drm_driver vc4_drm_driver = {
  36. --- a/drivers/gpu/drm/vc4/vc4_drv.h
  37. +++ b/drivers/gpu/drm/vc4/vc4_drv.h
  38. @@ -20,6 +20,8 @@ struct vc4_dev {
  39. struct drm_fbdev_cma *fbdev;
  40. struct rpi_firmware *firmware;
  41. + struct vc4_hang_state *hang_state;
  42. +
  43. /* The kernel-space BO cache. Tracks buffers that have been
  44. * unreferenced by all other users (refcounts of 0!) but not
  45. * yet freed, so we can do cheap allocations.
  46. @@ -366,6 +368,8 @@ int vc4_create_shader_bo_ioctl(struct dr
  47. struct drm_file *file_priv);
  48. int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
  49. struct drm_file *file_priv);
  50. +int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  51. + struct drm_file *file_priv);
  52. int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
  53. int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
  54. void *vc4_prime_vmap(struct drm_gem_object *obj);
  55. --- a/drivers/gpu/drm/vc4/vc4_gem.c
  56. +++ b/drivers/gpu/drm/vc4/vc4_gem.c
  57. @@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *d
  58. round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  59. }
  60. +struct vc4_hang_state {
  61. + struct drm_vc4_get_hang_state user_state;
  62. +
  63. + u32 bo_count;
  64. + struct drm_gem_object **bo;
  65. +};
  66. +
  67. +static void
  68. +vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
  69. +{
  70. + unsigned int i;
  71. +
  72. + mutex_lock(&dev->struct_mutex);
  73. + for (i = 0; i < state->user_state.bo_count; i++) {
  74. + drm_gem_object_unreference(state->bo[i]);
  75. + }
  76. + mutex_unlock(&dev->struct_mutex);
  77. +
  78. + kfree(state);
  79. +}
  80. +
  81. +int
  82. +vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  83. + struct drm_file *file_priv)
  84. +{
  85. + struct drm_vc4_get_hang_state *get_state = data;
  86. + struct drm_vc4_get_hang_state_bo *bo_state;
  87. + struct vc4_hang_state *kernel_state;
  88. + struct drm_vc4_get_hang_state *state;
  89. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  90. + unsigned long irqflags;
  91. + u32 i;
  92. + int ret;
  93. +
  94. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  95. + kernel_state = vc4->hang_state;
  96. + if (!kernel_state) {
  97. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  98. + return -ENOENT;
  99. + }
  100. + state = &kernel_state->user_state;
  101. +
  102. + /* If the user's array isn't big enough, just return the
  103. + * required array size.
  104. + */
  105. + if (get_state->bo_count < state->bo_count) {
  106. + get_state->bo_count = state->bo_count;
  107. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  108. + return 0;
  109. + }
  110. +
  111. + vc4->hang_state = NULL;
  112. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  113. +
  114. + /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
  115. + state->bo = get_state->bo;
  116. + memcpy(get_state, state, sizeof(*state));
  117. +
  118. + bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
  119. + if (!bo_state) {
  120. + ret = -ENOMEM;
  121. + goto err_free;
  122. + }
  123. +
  124. + for (i = 0; i < state->bo_count; i++) {
  125. + struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
  126. + u32 handle;
  127. + ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
  128. + &handle);
  129. +
  130. + if (ret) {
  131. + state->bo_count = i - 1;
  132. + goto err;
  133. + }
  134. + bo_state[i].handle = handle;
  135. + bo_state[i].paddr = vc4_bo->base.paddr;
  136. + bo_state[i].size = vc4_bo->base.base.size;
  137. + }
  138. +
  139. + ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
  140. + bo_state,
  141. + state->bo_count * sizeof(*bo_state));
  142. + kfree(bo_state);
  143. +
  144. + err_free:
  145. +
  146. + vc4_free_hang_state(dev, kernel_state);
  147. +
  148. +err:
  149. + return ret;
  150. +}
  151. +
  152. +static void
  153. +vc4_save_hang_state(struct drm_device *dev)
  154. +{
  155. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  156. + struct drm_vc4_get_hang_state *state;
  157. + struct vc4_hang_state *kernel_state;
  158. + struct vc4_exec_info *exec;
  159. + struct vc4_bo *bo;
  160. + unsigned long irqflags;
  161. + unsigned int i, unref_list_count;
  162. +
  163. + kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
  164. + if (!kernel_state)
  165. + return;
  166. +
  167. + state = &kernel_state->user_state;
  168. +
  169. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  170. + exec = vc4_first_job(vc4);
  171. + if (!exec) {
  172. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  173. + return;
  174. + }
  175. +
  176. + unref_list_count = 0;
  177. + list_for_each_entry(bo, &exec->unref_list, unref_head)
  178. + unref_list_count++;
  179. +
  180. + state->bo_count = exec->bo_count + unref_list_count;
  181. + kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
  182. + GFP_ATOMIC);
  183. + if (!kernel_state->bo) {
  184. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  185. + return;
  186. + }
  187. +
  188. + for (i = 0; i < exec->bo_count; i++) {
  189. + drm_gem_object_reference(&exec->bo[i].bo->base);
  190. + kernel_state->bo[i] = &exec->bo[i].bo->base;
  191. + }
  192. +
  193. + list_for_each_entry(bo, &exec->unref_list, unref_head) {
  194. + drm_gem_object_reference(&bo->base.base);
  195. + kernel_state->bo[i] = &bo->base.base;
  196. + i++;
  197. + }
  198. +
  199. + state->start_bin = exec->ct0ca;
  200. + state->start_render = exec->ct1ca;
  201. +
  202. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  203. +
  204. + state->ct0ca = V3D_READ(V3D_CTNCA(0));
  205. + state->ct0ea = V3D_READ(V3D_CTNEA(0));
  206. +
  207. + state->ct1ca = V3D_READ(V3D_CTNCA(1));
  208. + state->ct1ea = V3D_READ(V3D_CTNEA(1));
  209. +
  210. + state->ct0cs = V3D_READ(V3D_CTNCS(0));
  211. + state->ct1cs = V3D_READ(V3D_CTNCS(1));
  212. +
  213. + state->ct0ra0 = V3D_READ(V3D_CT00RA0);
  214. + state->ct1ra0 = V3D_READ(V3D_CT01RA0);
  215. +
  216. + state->bpca = V3D_READ(V3D_BPCA);
  217. + state->bpcs = V3D_READ(V3D_BPCS);
  218. + state->bpoa = V3D_READ(V3D_BPOA);
  219. + state->bpos = V3D_READ(V3D_BPOS);
  220. +
  221. + state->vpmbase = V3D_READ(V3D_VPMBASE);
  222. +
  223. + state->dbge = V3D_READ(V3D_DBGE);
  224. + state->fdbgo = V3D_READ(V3D_FDBGO);
  225. + state->fdbgb = V3D_READ(V3D_FDBGB);
  226. + state->fdbgr = V3D_READ(V3D_FDBGR);
  227. + state->fdbgs = V3D_READ(V3D_FDBGS);
  228. + state->errstat = V3D_READ(V3D_ERRSTAT);
  229. +
  230. + spin_lock_irqsave(&vc4->job_lock, irqflags);
  231. + if (vc4->hang_state) {
  232. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  233. + vc4_free_hang_state(dev, kernel_state);
  234. + } else {
  235. + vc4->hang_state = kernel_state;
  236. + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  237. + }
  238. +}
  239. +
  240. static void
  241. vc4_reset(struct drm_device *dev)
  242. {
  243. @@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work)
  244. struct vc4_dev *vc4 =
  245. container_of(work, struct vc4_dev, hangcheck.reset_work);
  246. + vc4_save_hang_state(vc4->dev);
  247. +
  248. vc4_reset(vc4->dev);
  249. }
  250. @@ -673,4 +855,7 @@ vc4_gem_destroy(struct drm_device *dev)
  251. }
  252. vc4_bo_cache_destroy(dev);
  253. +
  254. + if (vc4->hang_state)
  255. + vc4_free_hang_state(dev, vc4->hang_state);
  256. }
  257. --- a/include/uapi/drm/vc4_drm.h
  258. +++ b/include/uapi/drm/vc4_drm.h
  259. @@ -32,6 +32,7 @@
  260. #define DRM_VC4_CREATE_BO 0x03
  261. #define DRM_VC4_MMAP_BO 0x04
  262. #define DRM_VC4_CREATE_SHADER_BO 0x05
  263. +#define DRM_VC4_GET_HANG_STATE 0x06
  264. #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
  265. #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
  266. @@ -39,6 +40,7 @@
  267. #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
  268. #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
  269. #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
  270. +#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
  271. struct drm_vc4_submit_rcl_surface {
  272. uint32_t hindex; /* Handle index, or ~0 if not present. */
  273. @@ -226,4 +228,47 @@ struct drm_vc4_mmap_bo {
  274. uint64_t offset;
  275. };
  276. +struct drm_vc4_get_hang_state_bo {
  277. + uint32_t handle;
  278. + uint32_t paddr;
  279. + uint32_t size;
  280. + uint32_t pad;
  281. +};
  282. +
  283. +/**
  284. + * struct drm_vc4_hang_state - ioctl argument for collecting state
  285. + * from a GPU hang for analysis.
  286. +*/
  287. +struct drm_vc4_get_hang_state {
  288. + /** Pointer to array of struct drm_vc4_get_hang_state_bo. */
  289. + uint64_t bo;
  290. + /**
  291. + * On input, the size of the bo array. Output is the number
  292. + * of bos to be returned.
  293. + */
  294. + uint32_t bo_count;
  295. +
  296. + uint32_t start_bin, start_render;
  297. +
  298. + uint32_t ct0ca, ct0ea;
  299. + uint32_t ct1ca, ct1ea;
  300. + uint32_t ct0cs, ct1cs;
  301. + uint32_t ct0ra0, ct1ra0;
  302. +
  303. + uint32_t bpca, bpcs;
  304. + uint32_t bpoa, bpos;
  305. +
  306. + uint32_t vpmbase;
  307. +
  308. + uint32_t dbge;
  309. + uint32_t fdbgo;
  310. + uint32_t fdbgb;
  311. + uint32_t fdbgr;
  312. + uint32_t fdbgs;
  313. + uint32_t errstat;
  314. +
  315. + /* Pad that we may save more registers into in the future. */
  316. + uint32_t pad[16];
  317. +};
  318. +
  319. #endif /* _UAPI_VC4_DRM_H_ */