0281-drm-vc4-Add-support-for-scaling-of-display-planes.patch 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. From 446bf09980764ce077b74a85af9f141e6192d3dc Mon Sep 17 00:00:00 2001
  2. From: Eric Anholt <eric@anholt.net>
  3. Date: Tue, 20 Oct 2015 16:06:57 +0100
  4. Subject: [PATCH] drm/vc4: Add support for scaling of display planes.
  5. This implements a simple policy for choosing scaling modes
  6. (trapezoidal for decimation, PPF for magnification), and a single PPF
  7. filter (Mitchell/Netravali's recommendation).
  8. Signed-off-by: Eric Anholt <eric@anholt.net>
  9. (cherry picked from commit 21af94cf1a4c2d3450ab7fead58e6e2291ab92a9)
  10. ---
  11. drivers/gpu/drm/vc4/vc4_drv.h | 4 +
  12. drivers/gpu/drm/vc4/vc4_hvs.c | 84 +++++++++++++
  13. drivers/gpu/drm/vc4/vc4_plane.c | 253 +++++++++++++++++++++++++++++++++++++---
  14. drivers/gpu/drm/vc4/vc4_regs.h | 46 ++++++++
  15. 4 files changed, 374 insertions(+), 13 deletions(-)
  16. --- a/drivers/gpu/drm/vc4/vc4_drv.h
  17. +++ b/drivers/gpu/drm/vc4/vc4_drv.h
  18. @@ -156,7 +156,11 @@ struct vc4_hvs {
  19. * list. Units are dwords.
  20. */
  21. struct drm_mm dlist_mm;
  22. + /* Memory manager for the LBM memory used by HVS scaling. */
  23. + struct drm_mm lbm_mm;
  24. spinlock_t mm_lock;
  25. +
  26. + struct drm_mm_node mitchell_netravali_filter;
  27. };
  28. struct vc4_plane {
  29. --- a/drivers/gpu/drm/vc4/vc4_hvs.c
  30. +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
  31. @@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file
  32. }
  33. #endif
  34. +/* The filter kernel is composed of dwords each containing 3 9-bit
  35. + * signed integers packed next to each other.
  36. + */
  37. +#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
  38. +#define VC4_PPF_FILTER_WORD(c0, c1, c2) \
  39. + ((((c0) & 0x1ff) << 0) | \
  40. + (((c1) & 0x1ff) << 9) | \
  41. + (((c2) & 0x1ff) << 18))
  42. +
  43. +/* The whole filter kernel is arranged as the coefficients 0-16 going
  44. + * up, then a pad, then 17-31 going down and reversed within the
  45. + * dwords. This means that a linear phase kernel (where it's
  46. + * symmetrical at the boundary between 15 and 16) has the last 5
  47. + * dwords matching the first 5, but reversed.
  48. + */
  49. +#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \
  50. + c9, c10, c11, c12, c13, c14, c15) \
  51. + {VC4_PPF_FILTER_WORD(c0, c1, c2), \
  52. + VC4_PPF_FILTER_WORD(c3, c4, c5), \
  53. + VC4_PPF_FILTER_WORD(c6, c7, c8), \
  54. + VC4_PPF_FILTER_WORD(c9, c10, c11), \
  55. + VC4_PPF_FILTER_WORD(c12, c13, c14), \
  56. + VC4_PPF_FILTER_WORD(c15, c15, 0)}
  57. +
  58. +#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
  59. +#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
  60. +
  61. +/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
  62. + * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
  63. + */
  64. +static const u32 mitchell_netravali_1_3_1_3_kernel[] =
  65. + VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
  66. + 50, 82, 119, 155, 187, 213, 227);
  67. +
  68. +static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
  69. + struct drm_mm_node *space,
  70. + const u32 *kernel)
  71. +{
  72. + int ret, i;
  73. + u32 __iomem *dst_kernel;
  74. +
  75. + ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1,
  76. + 0);
  77. + if (ret) {
  78. + DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
  79. + ret);
  80. + return ret;
  81. + }
  82. +
  83. + dst_kernel = hvs->dlist + space->start;
  84. +
  85. + for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
  86. + if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
  87. + writel(kernel[i], &dst_kernel[i]);
  88. + else {
  89. + writel(kernel[VC4_KERNEL_DWORDS - i - 1],
  90. + &dst_kernel[i]);
  91. + }
  92. + }
  93. +
  94. + return 0;
  95. +}
  96. +
  97. static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
  98. {
  99. struct platform_device *pdev = to_platform_device(dev);
  100. struct drm_device *drm = dev_get_drvdata(master);
  101. struct vc4_dev *vc4 = drm->dev_private;
  102. struct vc4_hvs *hvs = NULL;
  103. + int ret;
  104. hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
  105. if (!hvs)
  106. @@ -130,6 +194,22 @@ static int vc4_hvs_bind(struct device *d
  107. HVS_BOOTLOADER_DLIST_END,
  108. (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
  109. + /* Set up the HVS LBM memory manager. We could have some more
  110. + * complicated data structure that allowed reuse of LBM areas
  111. + * between planes when they don't overlap on the screen, but
  112. + * for now we just allocate globally.
  113. + */
  114. + drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024);
  115. +
  116. + /* Upload filter kernels. We only have the one for now, so we
  117. + * keep it around for the lifetime of the driver.
  118. + */
  119. + ret = vc4_hvs_upload_linear_kernel(hvs,
  120. + &hvs->mitchell_netravali_filter,
  121. + mitchell_netravali_1_3_1_3_kernel);
  122. + if (ret)
  123. + return ret;
  124. +
  125. vc4->hvs = hvs;
  126. return 0;
  127. }
  128. @@ -140,7 +220,11 @@ static void vc4_hvs_unbind(struct device
  129. struct drm_device *drm = dev_get_drvdata(master);
  130. struct vc4_dev *vc4 = drm->dev_private;
  131. + if (vc4->hvs->mitchell_netravali_filter.allocated)
  132. + drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
  133. +
  134. drm_mm_takedown(&vc4->hvs->dlist_mm);
  135. + drm_mm_takedown(&vc4->hvs->lbm_mm);
  136. vc4->hvs = NULL;
  137. }
  138. --- a/drivers/gpu/drm/vc4/vc4_plane.c
  139. +++ b/drivers/gpu/drm/vc4/vc4_plane.c
  140. @@ -24,6 +24,12 @@
  141. #include "drm_fb_cma_helper.h"
  142. #include "drm_plane_helper.h"
  143. +enum vc4_scaling_mode {
  144. + VC4_SCALING_NONE,
  145. + VC4_SCALING_TPZ,
  146. + VC4_SCALING_PPF,
  147. +};
  148. +
  149. struct vc4_plane_state {
  150. struct drm_plane_state base;
  151. /* System memory copy of the display list for this element, computed
  152. @@ -47,13 +53,19 @@ struct vc4_plane_state {
  153. /* Clipped coordinates of the plane on the display. */
  154. int crtc_x, crtc_y, crtc_w, crtc_h;
  155. - /* Clipped size of the area scanned from in the FB. */
  156. - u32 src_w, src_h;
  157. + /* Clipped area being scanned from in the FB. */
  158. + u32 src_x, src_y, src_w, src_h;
  159. +
  160. + enum vc4_scaling_mode x_scaling, y_scaling;
  161. + bool is_unity;
  162. /* Offset to start scanning out from the start of the plane's
  163. * BO.
  164. */
  165. u32 offset;
  166. +
  167. + /* Our allocation in LBM for temporary storage during scaling. */
  168. + struct drm_mm_node lbm;
  169. };
  170. static inline struct vc4_plane_state *
  171. @@ -106,6 +118,16 @@ static const struct hvs_format *vc4_get_
  172. return NULL;
  173. }
  174. +static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
  175. +{
  176. + if (dst > src)
  177. + return VC4_SCALING_PPF;
  178. + else if (dst < src)
  179. + return VC4_SCALING_TPZ;
  180. + else
  181. + return VC4_SCALING_NONE;
  182. +}
  183. +
  184. static bool plane_enabled(struct drm_plane_state *state)
  185. {
  186. return state->fb && state->crtc;
  187. @@ -122,6 +144,8 @@ static struct drm_plane_state *vc4_plane
  188. if (!vc4_state)
  189. return NULL;
  190. + memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
  191. +
  192. __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
  193. if (vc4_state->dlist) {
  194. @@ -141,8 +165,17 @@ static struct drm_plane_state *vc4_plane
  195. static void vc4_plane_destroy_state(struct drm_plane *plane,
  196. struct drm_plane_state *state)
  197. {
  198. + struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
  199. struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
  200. + if (vc4_state->lbm.allocated) {
  201. + unsigned long irqflags;
  202. +
  203. + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
  204. + drm_mm_remove_node(&vc4_state->lbm);
  205. + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
  206. + }
  207. +
  208. kfree(vc4_state->dlist);
  209. __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base);
  210. kfree(state);
  211. @@ -181,23 +214,60 @@ static void vc4_dlist_write(struct vc4_p
  212. vc4_state->dlist[vc4_state->dlist_count++] = val;
  213. }
  214. +/* Returns the scl0/scl1 field based on whether the dimensions need to
  215. + * be up/down/non-scaled.
  216. + *
  217. + * This is a replication of a table from the spec.
  218. + */
  219. +static u32 vc4_get_scl_field(struct drm_plane_state *state)
  220. +{
  221. + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
  222. +
  223. + switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) {
  224. + case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
  225. + return SCALER_CTL0_SCL_H_PPF_V_PPF;
  226. + case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
  227. + return SCALER_CTL0_SCL_H_TPZ_V_PPF;
  228. + case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
  229. + return SCALER_CTL0_SCL_H_PPF_V_TPZ;
  230. + case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
  231. + return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
  232. + case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
  233. + return SCALER_CTL0_SCL_H_PPF_V_NONE;
  234. + case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
  235. + return SCALER_CTL0_SCL_H_NONE_V_PPF;
  236. + case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
  237. + return SCALER_CTL0_SCL_H_NONE_V_TPZ;
  238. + case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
  239. + return SCALER_CTL0_SCL_H_TPZ_V_NONE;
  240. + default:
  241. + case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
  242. + /* The unity case is independently handled by
  243. + * SCALER_CTL0_UNITY.
  244. + */
  245. + return 0;
  246. + }
  247. +}
  248. +
  249. static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
  250. {
  251. + struct drm_plane *plane = state->plane;
  252. struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
  253. struct drm_framebuffer *fb = state->fb;
  254. + u32 subpixel_src_mask = (1 << 16) - 1;
  255. vc4_state->offset = fb->offsets[0];
  256. - if (state->crtc_w << 16 != state->src_w ||
  257. - state->crtc_h << 16 != state->src_h) {
  258. - /* We don't support scaling yet, which involves
  259. - * allocating the LBM memory for scaling temporary
  260. - * storage, and putting filter kernels in the HVS
  261. - * context.
  262. - */
  263. + /* We don't support subpixel source positioning for scaling. */
  264. + if ((state->src_x & subpixel_src_mask) ||
  265. + (state->src_y & subpixel_src_mask) ||
  266. + (state->src_w & subpixel_src_mask) ||
  267. + (state->src_h & subpixel_src_mask)) {
  268. return -EINVAL;
  269. }
  270. + vc4_state->src_x = state->src_x >> 16;
  271. + vc4_state->src_y = state->src_y >> 16;
  272. vc4_state->src_w = state->src_w >> 16;
  273. vc4_state->src_h = state->src_h >> 16;
  274. @@ -206,6 +276,23 @@ static int vc4_plane_setup_clipping_and_
  275. vc4_state->crtc_w = state->crtc_w;
  276. vc4_state->crtc_h = state->crtc_h;
  277. + vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w,
  278. + vc4_state->crtc_w);
  279. + vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h,
  280. + vc4_state->crtc_h);
  281. + vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE &&
  282. + vc4_state->y_scaling == VC4_SCALING_NONE);
  283. +
  284. + /* No configuring scaling on the cursor plane, since it gets
  285. + non-vblank-synced updates, and scaling requires requires
  286. + LBM changes which have to be vblank-synced.
  287. + */
  288. + if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
  289. + return -EINVAL;
  290. +
  291. + /* Clamp the on-screen start x/y to 0. The hardware doesn't
  292. + * support negative y, and negative x wastes bandwidth.
  293. + */
  294. if (vc4_state->crtc_x < 0) {
  295. vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
  296. 0) *
  297. @@ -223,6 +310,87 @@ static int vc4_plane_setup_clipping_and_
  298. return 0;
  299. }
  300. +static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
  301. +{
  302. + u32 scale, recip;
  303. +
  304. + scale = (1 << 16) * src / dst;
  305. +
  306. + /* The specs note that while the reciprocal would be defined
  307. + * as (1<<32)/scale, ~0 is close enough.
  308. + */
  309. + recip = ~0 / scale;
  310. +
  311. + vc4_dlist_write(vc4_state,
  312. + VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
  313. + VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
  314. + vc4_dlist_write(vc4_state,
  315. + VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
  316. +}
  317. +
  318. +static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
  319. +{
  320. + u32 scale = (1 << 16) * src / dst;
  321. +
  322. + vc4_dlist_write(vc4_state,
  323. + SCALER_PPF_AGC |
  324. + VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
  325. + VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
  326. +}
  327. +
  328. +static u32 vc4_lbm_size(struct drm_plane_state *state)
  329. +{
  330. + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
  331. + /* This is the worst case number. One of the two sizes will
  332. + * be used depending on the scaling configuration.
  333. + */
  334. + u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w);
  335. + u32 lbm;
  336. +
  337. + if (vc4_state->is_unity)
  338. + return 0;
  339. + else if (vc4_state->y_scaling == VC4_SCALING_TPZ)
  340. + lbm = pix_per_line * 8;
  341. + else {
  342. + /* In special cases, this multiplier might be 12. */
  343. + lbm = pix_per_line * 16;
  344. + }
  345. +
  346. + lbm = roundup(lbm, 32);
  347. +
  348. + return lbm;
  349. +}
  350. +
  351. +static void vc4_write_scaling_parameters(struct drm_plane_state *state)
  352. +{
  353. + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
  354. +
  355. + /* Ch0 H-PPF Word 0: Scaling Parameters */
  356. + if (vc4_state->x_scaling == VC4_SCALING_PPF) {
  357. + vc4_write_ppf(vc4_state,
  358. + vc4_state->src_w, vc4_state->crtc_w);
  359. + }
  360. +
  361. + /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
  362. + if (vc4_state->y_scaling == VC4_SCALING_PPF) {
  363. + vc4_write_ppf(vc4_state,
  364. + vc4_state->src_h, vc4_state->crtc_h);
  365. + vc4_dlist_write(vc4_state, 0xc0c0c0c0);
  366. + }
  367. +
  368. + /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
  369. + if (vc4_state->x_scaling == VC4_SCALING_TPZ) {
  370. + vc4_write_tpz(vc4_state,
  371. + vc4_state->src_w, vc4_state->crtc_w);
  372. + }
  373. +
  374. + /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
  375. + if (vc4_state->y_scaling == VC4_SCALING_TPZ) {
  376. + vc4_write_tpz(vc4_state,
  377. + vc4_state->src_h, vc4_state->crtc_h);
  378. + vc4_dlist_write(vc4_state, 0xc0c0c0c0);
  379. + }
  380. +}
  381. /* Writes out a full display list for an active plane to the plane's
  382. * private dlist state.
  383. @@ -230,22 +398,50 @@ static int vc4_plane_setup_clipping_and_
  384. static int vc4_plane_mode_set(struct drm_plane *plane,
  385. struct drm_plane_state *state)
  386. {
  387. + struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
  388. struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
  389. struct drm_framebuffer *fb = state->fb;
  390. struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
  391. u32 ctl0_offset = vc4_state->dlist_count;
  392. const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
  393. + u32 scl;
  394. + u32 lbm_size;
  395. + unsigned long irqflags;
  396. int ret;
  397. ret = vc4_plane_setup_clipping_and_scaling(state);
  398. if (ret)
  399. return ret;
  400. + /* Allocate the LBM memory that the HVS will use for temporary
  401. + * storage due to our scaling/format conversion.
  402. + */
  403. + lbm_size = vc4_lbm_size(state);
  404. + if (lbm_size) {
  405. + if (!vc4_state->lbm.allocated) {
  406. + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
  407. + ret = drm_mm_insert_node(&vc4->hvs->lbm_mm,
  408. + &vc4_state->lbm,
  409. + lbm_size, 32, 0);
  410. + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
  411. + } else {
  412. + WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
  413. + }
  414. + }
  415. +
  416. + if (ret)
  417. + return ret;
  418. +
  419. + scl = vc4_get_scl_field(state);
  420. +
  421. + /* Control word */
  422. vc4_dlist_write(vc4_state,
  423. SCALER_CTL0_VALID |
  424. (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
  425. (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
  426. - SCALER_CTL0_UNITY);
  427. + (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
  428. + VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) |
  429. + VC4_SET_FIELD(scl, SCALER_CTL0_SCL1));
  430. /* Position Word 0: Image Positions and Alpha Value */
  431. vc4_state->pos0_offset = vc4_state->dlist_count;
  432. @@ -254,9 +450,14 @@ static int vc4_plane_mode_set(struct drm
  433. VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
  434. VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
  435. - /* Position Word 1: Scaled Image Dimensions.
  436. - * Skipped due to SCALER_CTL0_UNITY scaling.
  437. - */
  438. + /* Position Word 1: Scaled Image Dimensions. */
  439. + if (!vc4_state->is_unity) {
  440. + vc4_dlist_write(vc4_state,
  441. + VC4_SET_FIELD(vc4_state->crtc_w,
  442. + SCALER_POS1_SCL_WIDTH) |
  443. + VC4_SET_FIELD(vc4_state->crtc_h,
  444. + SCALER_POS1_SCL_HEIGHT));
  445. + }
  446. /* Position Word 2: Source Image Size, Alpha Mode */
  447. vc4_state->pos2_offset = vc4_state->dlist_count;
  448. @@ -282,6 +483,32 @@ static int vc4_plane_mode_set(struct drm
  449. vc4_dlist_write(vc4_state,
  450. VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
  451. + if (!vc4_state->is_unity) {
  452. + /* LBM Base Address. */
  453. + if (vc4_state->y_scaling != VC4_SCALING_NONE)
  454. + vc4_dlist_write(vc4_state, vc4_state->lbm.start);
  455. +
  456. + vc4_write_scaling_parameters(state);
  457. +
  458. + /* If any PPF setup was done, then all the kernel
  459. + * pointers get uploaded.
  460. + */
  461. + if (vc4_state->x_scaling == VC4_SCALING_PPF ||
  462. + vc4_state->y_scaling == VC4_SCALING_PPF) {
  463. + u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
  464. + SCALER_PPF_KERNEL_OFFSET);
  465. +
  466. + /* HPPF plane 0 */
  467. + vc4_dlist_write(vc4_state, kernel);
  468. + /* VPPF plane 0 */
  469. + vc4_dlist_write(vc4_state, kernel);
  470. + /* HPPF plane 1 */
  471. + vc4_dlist_write(vc4_state, kernel);
  472. + /* VPPF plane 1 */
  473. + vc4_dlist_write(vc4_state, kernel);
  474. + }
  475. + }
  476. +
  477. vc4_state->dlist[ctl0_offset] |=
  478. VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
  479. --- a/drivers/gpu/drm/vc4/vc4_regs.h
  480. +++ b/drivers/gpu/drm/vc4/vc4_regs.h
  481. @@ -536,6 +536,21 @@ enum hvs_pixel_format {
  482. #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13)
  483. #define SCALER_CTL0_ORDER_SHIFT 13
  484. +#define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8)
  485. +#define SCALER_CTL0_SCL1_SHIFT 8
  486. +
  487. +#define SCALER_CTL0_SCL0_MASK VC4_MASK(7, 5)
  488. +#define SCALER_CTL0_SCL0_SHIFT 5
  489. +
  490. +#define SCALER_CTL0_SCL_H_PPF_V_PPF 0
  491. +#define SCALER_CTL0_SCL_H_TPZ_V_PPF 1
  492. +#define SCALER_CTL0_SCL_H_PPF_V_TPZ 2
  493. +#define SCALER_CTL0_SCL_H_TPZ_V_TPZ 3
  494. +#define SCALER_CTL0_SCL_H_PPF_V_NONE 4
  495. +#define SCALER_CTL0_SCL_H_NONE_V_PPF 5
  496. +#define SCALER_CTL0_SCL_H_NONE_V_TPZ 6
  497. +#define SCALER_CTL0_SCL_H_TPZ_V_NONE 7
  498. +
  499. /* Set to indicate no scaling. */
  500. #define SCALER_CTL0_UNITY BIT(4)
  501. @@ -551,6 +566,12 @@ enum hvs_pixel_format {
  502. #define SCALER_POS0_START_X_MASK VC4_MASK(11, 0)
  503. #define SCALER_POS0_START_X_SHIFT 0
  504. +#define SCALER_POS1_SCL_HEIGHT_MASK VC4_MASK(27, 16)
  505. +#define SCALER_POS1_SCL_HEIGHT_SHIFT 16
  506. +
  507. +#define SCALER_POS1_SCL_WIDTH_MASK VC4_MASK(11, 0)
  508. +#define SCALER_POS1_SCL_WIDTH_SHIFT 0
  509. +
  510. #define SCALER_POS2_ALPHA_MODE_MASK VC4_MASK(31, 30)
  511. #define SCALER_POS2_ALPHA_MODE_SHIFT 30
  512. #define SCALER_POS2_ALPHA_MODE_PIPELINE 0
  513. @@ -564,6 +585,31 @@ enum hvs_pixel_format {
  514. #define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0)
  515. #define SCALER_POS2_WIDTH_SHIFT 0
  516. +#define SCALER_TPZ0_VERT_RECALC BIT(31)
  517. +#define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8)
  518. +#define SCALER_TPZ0_SCALE_SHIFT 8
  519. +#define SCALER_TPZ0_IPHASE_MASK VC4_MASK(7, 0)
  520. +#define SCALER_TPZ0_IPHASE_SHIFT 0
  521. +#define SCALER_TPZ1_RECIP_MASK VC4_MASK(15, 0)
  522. +#define SCALER_TPZ1_RECIP_SHIFT 0
  523. +
  524. +/* Skips interpolating coefficients to 64 phases, so just 8 are used.
  525. + * Required for nearest neighbor.
  526. + */
  527. +#define SCALER_PPF_NOINTERP BIT(31)
  528. +/* Replaes the highest valued coefficient with one that makes all 4
  529. + * sum to unity.
  530. + */
  531. +#define SCALER_PPF_AGC BIT(30)
  532. +#define SCALER_PPF_SCALE_MASK VC4_MASK(24, 8)
  533. +#define SCALER_PPF_SCALE_SHIFT 8
  534. +#define SCALER_PPF_IPHASE_MASK VC4_MASK(6, 0)
  535. +#define SCALER_PPF_IPHASE_SHIFT 0
  536. +
  537. +#define SCALER_PPF_KERNEL_OFFSET_MASK VC4_MASK(13, 0)
  538. +#define SCALER_PPF_KERNEL_OFFSET_SHIFT 0
  539. +#define SCALER_PPF_KERNEL_UNCACHED BIT(31)
  540. +
  541. #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0)
  542. #define SCALER_SRC_PITCH_SHIFT 0