123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358 |
- From 50cb4c343d766b0a3efa441a2c62fb890f0b3e45 Mon Sep 17 00:00:00 2001
- From: Mario Kleiner <mario.kleiner.de@gmail.com>
- Date: Thu, 23 Jun 2016 08:17:50 +0200
- Subject: [PATCH] drm/vc4: Implement precise vblank timestamping.
- Precise vblank timestamping is implemented via the
- usual scanout position based method. On VC4 the
- pixelvalves PV do not have a scanout position
- register. Only the hardware video scaler HVS has a
- similar register which describes which scanline for
- the output is currently composited and stored in the
- HVS fifo for later consumption by the PV.
- This causes a problem in that the HVS runs at a much
- faster clock (system clock / audio gate) than the PV
- which runs at video mode dot clock, so the unless the
- fifo between HVS and PV is full, the HVS will progress
- faster in its observable read line position than video
- scan rate, so the HVS position reading can't be directly
- translated into a scanout position for timestamp correction.
- Additionally when the PV is in vblank, it doesn't consume
- from the fifo, so the fifo gets full very quickly and then
- the HVS stops compositing until the PV enters active scanout
- and starts consuming scanlines from the fifo again, making
- new space for the HVS to composite.
- Therefore a simple translation of HVS read position into
- elapsed time since (or to) start of active scanout does
- not work, but for the most interesting cases we can still
- get useful and sufficiently accurate results:
- 1. The PV enters active scanout of a new frame with the
- fifo of the HVS completely full, and the HVS can refill
- any fifo line which gets consumed and thereby freed up by
- the PV during active scanout very quickly. Therefore the
- PV and HVS work effectively in lock-step during active
- scanout with the fifo never having more than 1 scanline
- freed up by the PV before it gets refilled. The PV's
- real scanout position is therefore trailing the HVS
- compositing position as scanoutpos = hvspos - fifosize
- and we can get the true scanoutpos as HVS readpos minus
- fifo size, so precise timestamping works while in active
- scanout, except for the last few scanlines of the frame,
- when the HVS reaches end of frame, stops compositing and
- the PV catches up and drains the fifo. This special case
- would only introduce minor errors though.
- 2. If we are in vblank, then we can only guess something
- reasonable. If called from vblank irq, we assume the irq is
- usually dispatched with minimum delay, so we can take a
- timestamp taken at entry into the vblank irq handler as a
- baseline and then add a full vblank duration until the
- guessed start of active scanout. As irq dispatch is usually
- pretty low latency this works with relatively low jitter and
- good results.
- If we aren't called from vblank then we could be anywhere
- within the vblank interval, so we return a neutral result,
- simply the current system timestamp, and hope for the best.
- Measurement shows the generated timestamps to be rather precise,
- and at least never off more than 1 vblank duration worst-case.
- Limitations: Doesn't work well yet for interlaced video modes,
- therefore disabled in interlaced mode for now.
- v2: Use the DISPBASE registers to determine the FIFO size (changes
- by anholt)
- Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
- Signed-off-by: Eric Anholt <eric@anholt.net>
- Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> (v2)
- (cherry picked from commit 1bf59f1dcbe25272f6b5d870054647e58a8a9c55)
- ---
- drivers/gpu/drm/vc4/vc4_crtc.c | 162 +++++++++++++++++++++++++++++++++++++++++
- drivers/gpu/drm/vc4/vc4_drv.c | 2 +
- drivers/gpu/drm/vc4/vc4_drv.h | 7 ++
- drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++-
- 4 files changed, 192 insertions(+), 1 deletion(-)
- --- a/drivers/gpu/drm/vc4/vc4_crtc.c
- +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
- @@ -47,12 +47,17 @@ struct vc4_crtc {
- const struct vc4_crtc_data *data;
- void __iomem *regs;
-
- + /* Timestamp at start of vblank irq - unaffected by lock delays. */
- + ktime_t t_vblank;
- +
- /* Which HVS channel we're using for our CRTC. */
- int channel;
-
- u8 lut_r[256];
- u8 lut_g[256];
- u8 lut_b[256];
- + /* Size in pixels of the COB memory allocated to this CRTC. */
- + u32 cob_size;
-
- struct drm_pending_vblank_event *event;
- };
- @@ -134,6 +139,144 @@ int vc4_crtc_debugfs_regs(struct seq_fil
- }
- #endif
-
- +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
- + unsigned int flags, int *vpos, int *hpos,
- + ktime_t *stime, ktime_t *etime,
- + const struct drm_display_mode *mode)
- +{
- + struct vc4_dev *vc4 = to_vc4_dev(dev);
- + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
- + u32 val;
- + int fifo_lines;
- + int vblank_lines;
- + int ret = 0;
- +
- + /*
- + * XXX Doesn't work well in interlaced mode yet, partially due
- + * to problems in vc4 kms or drm core interlaced mode handling,
- + * so disable for now in interlaced mode.
- + */
- + if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- + return ret;
- +
- + /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
- +
- + /* Get optional system timestamp before query. */
- + if (stime)
- + *stime = ktime_get();
- +
- + /*
- + * Read vertical scanline which is currently composed for our
- + * pixelvalve by the HVS, and also the scaler status.
- + */
- + val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
- +
- + /* Get optional system timestamp after query. */
- + if (etime)
- + *etime = ktime_get();
- +
- + /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
- +
- + /* Vertical position of hvs composed scanline. */
- + *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
- +
- + /* No hpos info available. */
- + if (hpos)
- + *hpos = 0;
- +
- + /* This is the offset we need for translating hvs -> pv scanout pos. */
- + fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
- +
- + if (fifo_lines > 0)
- + ret |= DRM_SCANOUTPOS_VALID;
- +
- + /* HVS more than fifo_lines into frame for compositing? */
- + if (*vpos > fifo_lines) {
- + /*
- + * We are in active scanout and can get some meaningful results
- + * from HVS. The actual PV scanout can not trail behind more
- + * than fifo_lines as that is the fifo's capacity. Assume that
- + * in active scanout the HVS and PV work in lockstep wrt. HVS
- + * refilling the fifo and PV consuming from the fifo, ie.
- + * whenever the PV consumes and frees up a scanline in the
- + * fifo, the HVS will immediately refill it, therefore
- + * incrementing vpos. Therefore we choose HVS read position -
- + * fifo size in scanlines as a estimate of the real scanout
- + * position of the PV.
- + */
- + *vpos -= fifo_lines + 1;
- + if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- + *vpos /= 2;
- +
- + ret |= DRM_SCANOUTPOS_ACCURATE;
- + return ret;
- + }
- +
- + /*
- + * Less: This happens when we are in vblank and the HVS, after getting
- + * the VSTART restart signal from the PV, just started refilling its
- + * fifo with new lines from the top-most lines of the new framebuffers.
- + * The PV does not scan out in vblank, so does not remove lines from
- + * the fifo, so the fifo will be full quickly and the HVS has to pause.
- + * We can't get meaningful readings wrt. scanline position of the PV
- + * and need to make things up in a approximative but consistent way.
- + */
- + ret |= DRM_SCANOUTPOS_IN_VBLANK;
- + vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
- +
- + if (flags & DRM_CALLED_FROM_VBLIRQ) {
- + /*
- + * Assume the irq handler got called close to first
- + * line of vblank, so PV has about a full vblank
- + * scanlines to go, and as a base timestamp use the
- + * one taken at entry into vblank irq handler, so it
- + * is not affected by random delays due to lock
- + * contention on event_lock or vblank_time lock in
- + * the core.
- + */
- + *vpos = -vblank_lines;
- +
- + if (stime)
- + *stime = vc4_crtc->t_vblank;
- + if (etime)
- + *etime = vc4_crtc->t_vblank;
- +
- + /*
- + * If the HVS fifo is not yet full then we know for certain
- + * we are at the very beginning of vblank, as the hvs just
- + * started refilling, and the stime and etime timestamps
- + * truly correspond to start of vblank.
- + */
- + if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
- + ret |= DRM_SCANOUTPOS_ACCURATE;
- + } else {
- + /*
- + * No clue where we are inside vblank. Return a vpos of zero,
- + * which will cause calling code to just return the etime
- + * timestamp uncorrected. At least this is no worse than the
- + * standard fallback.
- + */
- + *vpos = 0;
- + }
- +
- + return ret;
- +}
- +
- +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
- + int *max_error, struct timeval *vblank_time,
- + unsigned flags)
- +{
- + struct vc4_dev *vc4 = to_vc4_dev(dev);
- + struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
- + struct drm_crtc *crtc = &vc4_crtc->base;
- + struct drm_crtc_state *state = crtc->state;
- +
- + /* Helper routine in DRM core does all the work: */
- + return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
- + vblank_time, flags,
- + &state->adjusted_mode);
- +}
- +
- static void vc4_crtc_destroy(struct drm_crtc *crtc)
- {
- drm_crtc_cleanup(crtc);
- @@ -535,6 +678,7 @@ static irqreturn_t vc4_crtc_irq_handler(
- irqreturn_t ret = IRQ_NONE;
-
- if (stat & PV_INT_VFP_START) {
- + vc4_crtc->t_vblank = ktime_get();
- CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
- drm_crtc_handle_vblank(&vc4_crtc->base);
- vc4_crtc_handle_page_flip(vc4_crtc);
- @@ -759,6 +903,22 @@ static void vc4_set_crtc_possible_masks(
- }
- }
-
- +static void
- +vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
- +{
- + struct drm_device *drm = vc4_crtc->base.dev;
- + struct vc4_dev *vc4 = to_vc4_dev(drm);
- + u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
- + /* Top/base are supposed to be 4-pixel aligned, but the
- + * Raspberry Pi firmware fills the low bits (which are
- + * presumably ignored).
- + */
- + u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
- + u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
- +
- + vc4_crtc->cob_size = top - base + 4;
- +}
- +
- static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
- {
- struct platform_device *pdev = to_platform_device(dev);
- @@ -835,6 +995,8 @@ static int vc4_crtc_bind(struct device *
- crtc->cursor = cursor_plane;
- }
-
- + vc4_crtc_get_cob_allocation(vc4_crtc);
- +
- CRTC_WRITE(PV_INTEN, 0);
- CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
- ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
- --- a/drivers/gpu/drm/vc4/vc4_drv.c
- +++ b/drivers/gpu/drm/vc4/vc4_drv.c
- @@ -116,6 +116,8 @@ static struct drm_driver vc4_drm_driver
- .enable_vblank = vc4_enable_vblank,
- .disable_vblank = vc4_disable_vblank,
- .get_vblank_counter = drm_vblank_no_hw_counter,
- + .get_scanout_position = vc4_crtc_get_scanoutpos,
- + .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
-
- #if defined(CONFIG_DEBUG_FS)
- .debugfs_init = vc4_debugfs_init,
- --- a/drivers/gpu/drm/vc4/vc4_drv.h
- +++ b/drivers/gpu/drm/vc4/vc4_drv.h
- @@ -419,6 +419,13 @@ int vc4_enable_vblank(struct drm_device
- void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
- void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
- int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
- +int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
- + unsigned int flags, int *vpos, int *hpos,
- + ktime_t *stime, ktime_t *etime,
- + const struct drm_display_mode *mode);
- +int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
- + int *max_error, struct timeval *vblank_time,
- + unsigned flags);
-
- /* vc4_debugfs.c */
- int vc4_debugfs_init(struct drm_minor *minor);
- --- a/drivers/gpu/drm/vc4/vc4_regs.h
- +++ b/drivers/gpu/drm/vc4/vc4_regs.h
- @@ -368,7 +368,6 @@
- # define SCALER_DISPBKGND_FILL BIT(24)
-
- #define SCALER_DISPSTAT0 0x00000048
- -#define SCALER_DISPBASE0 0x0000004c
- # define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30)
- # define SCALER_DISPSTATX_MODE_SHIFT 30
- # define SCALER_DISPSTATX_MODE_DISABLED 0
- @@ -377,6 +376,24 @@
- # define SCALER_DISPSTATX_MODE_EOF 3
- # define SCALER_DISPSTATX_FULL BIT(29)
- # define SCALER_DISPSTATX_EMPTY BIT(28)
- +# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
- +# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
- +# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
- +# define SCALER_DISPSTATX_LINE_SHIFT 0
- +
- +#define SCALER_DISPBASE0 0x0000004c
- +/* Last pixel in the COB (display FIFO memory) allocated to this HVS
- + * channel. Must be 4-pixel aligned (and thus 4 pixels less than the
- + * next COB base).
- + */
- +# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16)
- +# define SCALER_DISPBASEX_TOP_SHIFT 16
- +/* First pixel in the COB (display FIFO memory) allocated to this HVS
- + * channel. Must be 4-pixel aligned.
- + */
- +# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0)
- +# define SCALER_DISPBASEX_BASE_SHIFT 0
- +
- #define SCALER_DISPCTRL1 0x00000050
- #define SCALER_DISPBKGND1 0x00000054
- #define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \
- @@ -387,6 +404,9 @@
- (x) * (SCALER_DISPSTAT1 - \
- SCALER_DISPSTAT0))
- #define SCALER_DISPBASE1 0x0000005c
- +#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
- + (x) * (SCALER_DISPBASE1 - \
- + SCALER_DISPBASE0))
- #define SCALER_DISPCTRL2 0x00000060
- #define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \
- (x) * (SCALER_DISPCTRL1 - \
|