// Copyright 2017 The Periph Authors. All rights reserved. // Use of this source code is governed under the Apache License, Version 2.0 // that can be found in the LICENSE file. // The DMA controller can be used for two functionality: // - implement zero-CPU continuous PWM. // - bitbang a large stream of bits over a GPIO pin, for example for WS2812b // support. // // The way it works under the hood is that the bcm283x has two registers, one // to set a bit and one to clear a bit. // // So two DMA controllers are used, one writing a "clear bit" stream and one // for the "set bit" stream. This requires two independent 32 bits wide streams // per period for write but only one for read. // // References // // Page 7: // " Software accessing RAM directly must use physical addresses (based at // 0x00000000). Software accessing RAM using the DMA engines must use bus // addresses (based at 0xC0000000) " ... to skip the L1 cache. // // " The BCM2835 DMA Controller provides a total of 16 DMA channels. Each // channel operates independently from the others and is internally arbitrated // onto one of the 3 system buses. This means that the amount of bandwidth that // a DMA channel may consume can be controlled by the arbiter settings. " // // The CPU has 16 DMA channels but only the first 7 (#0 to #6) can do strides. // 7~15 have half the bandwidth. // // References // // DMA channel allocation: // https://github.com/raspberrypi/linux/issues/1327 // // DMA location: // https://www.raspberrypi.org/forums/viewtopic.php?f=71&t=19797 package bcm283x import ( "errors" "fmt" "log" "os" "strings" "time" "periph.io/x/periph" "periph.io/x/periph/conn/gpio/gpiostream" "periph.io/x/periph/conn/physic" "periph.io/x/periph/host/pmem" "periph.io/x/periph/host/videocore" ) const ( periphMask = 0x00FFFFFF periphBus = 0x7E000000 // maxLite is the maximum transfer allowed by a lite channel. maxLite = 65535 ) // Pages 47-50 type dmaStatus uint32 const ( dmaReset dmaStatus = 1 << 31 // RESET; Writing a 1 to this bit will reset the DMA dmaAbort dmaStatus = 1 << 30 // ABORT; Writing a 1 to this bit will abort the current DMA CB. The DMA will load the next CB and attempt to continue. dmaDisableDebug dmaStatus = 1 << 29 // DISDEBUG; When set to 1, the DMA will not stop when the debug pause signal is asserted. // When set to 1, the DMA will keep a tally of the AXI writes going out and // the write responses coming in. At the very end of the current DMA transfer // it will wait until the last outstanding write response has been received // before indicating the transfer is complete. Whilst waiting it will load // the next CB address (but will not fetch the CB), clear the active flag (if // the next CB address = zero), and it will defer setting the END flag or the // INT flag until the last outstanding write response has been received. // In this mode, the DMA will pause if it has more than 13 outstanding writes // at any one time. dmaWaitForOutstandingWrites dmaStatus = 1 << 28 // WAIT_FOR_OUTSTANDING_WRITES // 27:24 reserved // 23:20 Lowest has higher priority on AXI. dmaPanicPriorityShift = 20 dmaPanicPriorityMask = 0xF << 20 // PANIC_PRIORITY // 19:16 Lowest has higher priority on AXI. dmaPriorityShift = 16 dmaPriorityMask = 0xF << dmaPriorityShift // PRIORITY // 15:9 reserved dmaErrorStatus dmaStatus = 1 << 8 // ERROR DMA error was detected; must be cleared manually. // 7 reserved dmaWaitingForOutstandingWrites dmaStatus = 1 << 6 // WAITING_FOR_OUTSTANDING_WRITES; Indicates if the DMA is currently waiting for any outstanding writes to be received, and is not transferring data. dmaDreqStopsDMA dmaStatus = 1 << 5 // DREQ_STOPS_DMA; Indicates if the DMA is currently paused and not transferring data due to the DREQ being inactive. // Indicates if the DMA is currently paused and not transferring data. This // will occur if: the active bit has been cleared, if the DMA is currently // executing wait cycles or if the debug_pause signal has been set by the // debug block, or the number of outstanding writes has exceeded the max // count. dmaPaused dmaStatus = 1 << 4 // PAUSED // Indicates the state of the selected DREQ (Data Request) signal, ie. the // DREQ selected by the PERMAP field of the transfer info. // 1 = Requesting data. This will only be valid once the DMA has started and // the PERMAP field has been loaded from the CB. It will remain valid, // indicating the selected DREQ signal, until a new CB is loaded. If // PERMAP is set to zero (unpaced transfer) then this bit will read back // as 1. // 0 = No data request. dmaDreq dmaStatus = 1 << 3 // DREQ // This is set when the transfer for the CB ends and INTEN is set to 1. Once // set it must be manually cleared down, even if the next CB has INTEN = 0. // Write 1 to clear. dmaInterrupt dmaStatus = 1 << 2 // INT // Set when the transfer described by the current control block is complete. // Write 1 to clear. dmaEnd dmaStatus = 1 << 1 // END // This bit enables the DMA. The DMA will start if this bit is set and the // CB_ADDR is non zero. The DMA transfer can be paused and resumed by // clearing, then setting it again. // This bit is automatically cleared at the end of the complete DMA transfer, // ie. after a NEXTCONBK = 0x0000_0000 has been loaded. dmaActive dmaStatus = 1 << 0 // ACTIVE ) var dmaStatusMap = []struct { v dmaStatus s string }{ {dmaReset, "Reset"}, {dmaAbort, "Abort"}, {dmaDisableDebug, "DisableDebug"}, {dmaWaitForOutstandingWrites, "WaitForOutstandingWrites"}, {dmaErrorStatus, "ErrorStatus"}, {dmaWaitingForOutstandingWrites, "WaitingForOutstandingWrites"}, {dmaDreqStopsDMA, "DreqStopsDMA"}, {dmaPaused, "Paused"}, {dmaDreq, "Dreq"}, {dmaInterrupt, "Interrupt"}, {dmaEnd, "End"}, {dmaActive, "Active"}, } func (d dmaStatus) String() string { var out []string for _, l := range dmaStatusMap { if d&l.v != 0 { d &^= l.v out = append(out, l.s) } } if v := d & dmaPanicPriorityMask; v != 0 { out = append(out, fmt.Sprintf("pp%d", v>>dmaPanicPriorityShift)) d &^= dmaPanicPriorityMask } if v := d & dmaPriorityMask; v != 0 { out = append(out, fmt.Sprintf("p%d", v>>dmaPriorityShift)) d &^= dmaPriorityMask } if d != 0 { out = append(out, fmt.Sprintf("dmaStatus(0x%x)", uint32(d))) } if len(out) == 0 { return "0" } return strings.Join(out, "|") } // Pages 50-52 type dmaTransferInfo uint32 const ( // 31:27 reserved // Don't do wide writes as 2 beat burst; only for channels 0 to 6 dmaNoWideBursts dmaTransferInfo = 1 << 26 // NO_WIDE_BURSTS // 25:21 Slows down the DMA throughput by setting the number of dummy cycles // burnt after each DMA read or write is completed. dmaWaitCyclesShift = 21 dmaWaitcyclesMax = 0x1F dmaWaitCyclesMask dmaTransferInfo = dmaWaitcyclesMax << dmaWaitCyclesShift // WAITS // 20:16 Peripheral mapping (1-31) whose ready signal shall be used to // control the rate of the transfers. 0 means continuous un-paced transfer. // // It is the source used to pace the data reads and writes operations, each // pace being a DReq (Data Request). // // Page 61 dmaPerMapShift = 16 dmaPerMapMask dmaTransferInfo = 31 << dmaPerMapShift dmaFire dmaTransferInfo = 0 << dmaPerMapShift // PERMAP; Continuous trigger dmaDSI dmaTransferInfo = 1 << dmaPerMapShift // Display Serial Interface (?) dmaPCMTX dmaTransferInfo = 2 << dmaPerMapShift // dmaPCMRX dmaTransferInfo = 3 << dmaPerMapShift // dmaSMI dmaTransferInfo = 4 << dmaPerMapShift // Secondary Memory Interface (?) dmaPWM dmaTransferInfo = 5 << dmaPerMapShift // dmaSPITX dmaTransferInfo = 6 << dmaPerMapShift // dmaSPIRX dmaTransferInfo = 7 << dmaPerMapShift // dmaBscSPIslaveTX dmaTransferInfo = 8 << dmaPerMapShift // dmaBscSPIslaveRX dmaTransferInfo = 9 << dmaPerMapShift // dmaUnused dmaTransferInfo = 10 << dmaPerMapShift // dmaEMMC dmaTransferInfo = 11 << dmaPerMapShift // dmaUARTTX dmaTransferInfo = 12 << dmaPerMapShift // dmaSDHost dmaTransferInfo = 13 << dmaPerMapShift // dmaUARTRX dmaTransferInfo = 14 << dmaPerMapShift // dmaDSI2 dmaTransferInfo = 15 << dmaPerMapShift // Same as DSI dmaSlimBusMCTX dmaTransferInfo = 16 << dmaPerMapShift // dmaHDMI dmaTransferInfo = 17 << dmaPerMapShift // 216MHz; potentially a (216MHz/(26+1)) 8MHz copy rate but it fails if HDMI is disabled dmaSlimBusMCRX dmaTransferInfo = 18 << dmaPerMapShift // dmaSlimBusDC0 dmaTransferInfo = 19 << dmaPerMapShift // dmaSlimBusDC1 dmaTransferInfo = 20 << dmaPerMapShift // dmaSlimBusDC2 dmaTransferInfo = 21 << dmaPerMapShift // dmaSlimBusDC3 dmaTransferInfo = 22 << dmaPerMapShift // dmaSlimBusDC4 dmaTransferInfo = 23 << dmaPerMapShift // dmaScalerFIFO0 dmaTransferInfo = 24 << dmaPerMapShift // Also on SMI; SMI can be disabled with smiDisable dmaScalerFIFO1 dmaTransferInfo = 25 << dmaPerMapShift // dmaScalerFIFO2 dmaTransferInfo = 26 << dmaPerMapShift // dmaSlimBusDC5 dmaTransferInfo = 27 << dmaPerMapShift // dmaSlimBusDC6 dmaTransferInfo = 28 << dmaPerMapShift // dmaSlimBusDC7 dmaTransferInfo = 29 << dmaPerMapShift // dmaSlimBusDC8 dmaTransferInfo = 30 << dmaPerMapShift // dmaSlimBusDC9 dmaTransferInfo = 31 << dmaPerMapShift // dmaBurstLengthShift = 12 dmaBurstLengthMask dmaTransferInfo = 0xF << dmaBurstLengthShift // BURST_LENGTH 15:12 0 means a single transfer. dmaSrcIgnore dmaTransferInfo = 1 << 11 // SRC_IGNORE Source won't be read, output will be zeros. dmaSrcDReq dmaTransferInfo = 1 << 10 // SRC_DREQ dmaSrcWidth128 dmaTransferInfo = 1 << 9 // SRC_WIDTH 128 bits reads if set, 32 bits otherwise. dmaSrcInc dmaTransferInfo = 1 << 8 // SRC_INC Increment read pointer by 32/128bits at each read if set. dmaDstIgnore dmaTransferInfo = 1 << 7 // DEST_IGNORE Do not write. dmaDstDReq dmaTransferInfo = 1 << 6 // DEST_DREQ dmaDstWidth128 dmaTransferInfo = 1 << 5 // DEST_WIDTH 128 bits writes if set, 32 bits otherwise. dmaDstInc dmaTransferInfo = 1 << 4 // DEST_INC Increment write pointer by 32/128bits at each read if set. dmaWaitResp dmaTransferInfo = 1 << 3 // WAIT_RESP DMA waits for AXI write response. // 2 reserved // 2D mode interpret of txLen; linear if unset; only for channels 0 to 6. dmaTransfer2DMode dmaTransferInfo = 1 << 1 // TDMODE dmaInterruptEnable dmaTransferInfo = 1 << 0 // INTEN Generate an interrupt upon completion. ) var dmaTransferInfoMap = []struct { v dmaTransferInfo s string }{ {dmaNoWideBursts, "NoWideBursts"}, {dmaSrcIgnore, "SrcIgnore"}, {dmaSrcDReq, "SrcDReq"}, {dmaSrcWidth128, "SrcWidth128"}, {dmaSrcInc, "SrcInc"}, {dmaDstIgnore, "DstIgnore"}, {dmaDstDReq, "DstDReq"}, {dmaDstWidth128, "DstWidth128"}, {dmaDstInc, "DstInc"}, {dmaWaitResp, "WaitResp"}, {dmaTransfer2DMode, "Transfer2DMode"}, {dmaInterruptEnable, "InterruptEnable"}, } var dmaPerMap = []string{ "Fire", "DSI", "PCMTX", "PCMRX", "SMI", "PWM", "SPITX", "SPIRX", "BscSPISlaveTX", "BscSPISlaveRX", "Unused", "EMMC", "UARTTX", "SDHOST", "UARTRX", "DSI2", "SlimBusMCTX", "HDMI", "SlimBusMCRX", "SlimBusDC0", "SlimBusDC1", "SlimBusDC2", "SlimBusDC3", "SlimBusDC4", "ScalerFIFO0", "ScalerFIFO1", "ScalerFIFO2", "SlimBusDC5", "SlimBusDC6", "SlimBusDC7", "SlimBusDC8", "SlimBusDC9", } func (d dmaTransferInfo) String() string { var out []string for _, l := range dmaTransferInfoMap { if d&l.v != 0 { d &^= l.v out = append(out, l.s) } } if v := d & dmaWaitCyclesMask; v != 0 { out = append(out, fmt.Sprintf("waits=%d", v>>dmaWaitCyclesShift)) d &^= dmaWaitCyclesMask } if v := d & dmaBurstLengthMask; v != 0 { out = append(out, fmt.Sprintf("burst=%d", v>>dmaBurstLengthShift)) d &^= dmaBurstLengthMask } out = append(out, dmaPerMap[(d&dmaPerMapMask)>>dmaPerMapShift]) d &^= dmaPerMapMask if d != 0 { out = append(out, fmt.Sprintf("dmaTransferInfo(0x%x)", uint32(d))) } return strings.Join(out, "|") } // Page 55 type dmaDebug uint32 const ( // 31:29 reserved dmaLite dmaDebug = 1 << 28 // LITE RO set for lite DMA controllers // 27:25 version dmaVersionShift = 25 dmaVersionMask dmaDebug = 7 << dmaVersionShift // VERSION // 24:16 dmaState dmaStateShift = 16 dmaStateMask dmaDebug = 0x1FF << dmaStateShift // DMA_STATE; the actual states are not documented // 15:8 dmaID dmaIDShift = 8 dmaIDMask = 0xFF << dmaIDShift // DMA_ID; the index of the DMA controller // 7:4 outstandingWrites dmaOutstandingWritesShift = 4 dmaOutstandingWritesMask = 0xF << dmaOutstandingWritesShift // OUTSTANDING_WRITES // 3 reserved dmaReadError dmaDebug = 1 << 2 // READ_ERROR slave read error; clear by writing a 1 dmaFIFOError dmaDebug = 1 << 1 // FIF_ERROR fifo error; clear by writing a 1 dmaReadLastNotSetError dmaDebug = 1 << 0 // READ_LAST_NOT_SET_ERROR last AXI read signal was not set when expected ) var dmaDebugMap = []struct { v dmaDebug s string }{ {dmaLite, "Lite"}, {dmaReadError, "ReadError"}, {dmaFIFOError, "FIFOError"}, {dmaReadLastNotSetError, "ReadLastNotSetError"}, } func (d dmaDebug) String() string { var out []string for _, l := range dmaDebugMap { if d&l.v != 0 { d &^= l.v out = append(out, l.s) } } if v := d & dmaVersionMask; v != 0 { out = append(out, fmt.Sprintf("v%d", uint32(v>>dmaVersionShift))) d &^= dmaVersionMask } if v := d & dmaStateMask; v != 0 { out = append(out, fmt.Sprintf("state(%x)", uint32(v>>dmaStateShift))) d &^= dmaStateMask } if v := d & dmaIDMask; v != 0 { out = append(out, fmt.Sprintf("#%x", uint32(v>>dmaIDShift))) d &^= dmaIDMask } if v := d & dmaOutstandingWritesMask; v != 0 { out = append(out, fmt.Sprintf("OutstandingWrites=%d", uint32(v>>dmaOutstandingWritesShift))) d &^= dmaOutstandingWritesMask } if d != 0 { out = append(out, fmt.Sprintf("dmaDebug(0x%x)", uint32(d))) } if len(out) == 0 { return "0" } return strings.Join(out, "|") } // 31:30 0 // 29:16 yLength (only for channels #0 to #6) // 15:0 xLength type dmaTransferLen uint32 // 31:16 dstStride byte increment to apply at the end of each row in 2D mode // 15:0 srcStride byte increment to apply at the end of each row in 2D mode type dmaStride uint32 func (d dmaStride) String() string { y := (d >> 16) & 0xFFFF if y != 0 { return fmt.Sprintf("0x%x,0x%x", uint32(y), uint32(d&0xFFFF)) } return fmt.Sprintf("0x%x", uint32(d&0xFFFF)) } // controlBlock is 256 bits (32 bytes) in length. // // https://www.raspberrypi.org/wp-content/uploads/2012/02/BCM2835-ARM-Peripherals.pdf // Page 40. type controlBlock struct { transferInfo dmaTransferInfo // 0x00 TI srcAddr uint32 // 0x04 SOURCE_AD pointer to source in physical address space dstAddr uint32 // 0x08 DEST_AD pointer to destination in physical address space txLen dmaTransferLen // 0x0C TXFR_LEN length in bytes stride dmaStride // 0x10 STRIDE // Pointer to the next chained controlBlock; must be 32 bytes aligned. // Set it to 0 to stop. nextCB uint32 // 0x14 NEXTCONBK reserved [2]uint32 // 0x18+0x1C } // initBlock initializes a controlBlock for any valid DMA operation. // // l is in bytes, not in words. // // dreq can be dmaFire, dmaPwm, dmaPcmTx, etc. waits is additional wait state // between clocks. func (c *controlBlock) initBlock(srcAddr, dstAddr, l uint32, srcIO, dstIO, srcInc, dstInc bool, dreq dmaTransferInfo) error { if srcIO && dstIO { return errors.New("only one of src and dst can be I/O") } if srcAddr == 0 && dstAddr == 0 { return errors.New("at least one source or destination is required") } if srcAddr == 0 && srcIO { return errors.New("using src as I/O requires src") } if dstAddr == 0 && dstIO { return errors.New("using dst as I/O requires dst") } if dreq&^dmaPerMapMask != 0 { return errors.New("dreq must be one of the clock source, nothing else") } t := dmaNoWideBursts | dmaWaitResp if srcAddr == 0 { t |= dmaSrcIgnore c.srcAddr = 0 } else { if srcIO { // Memory mapped register c.srcAddr = physToBus(srcAddr) } else { // Normal memory c.srcAddr = physToUncachedPhys(srcAddr) } if srcInc { t |= dmaSrcInc } } if dstAddr == 0 { t |= dmaDstIgnore c.dstAddr = 0 } else { if dstIO { // Memory mapped register c.dstAddr = physToBus(dstAddr) } else { // Normal memory c.dstAddr = physToUncachedPhys(dstAddr) } if dstInc { t |= dmaDstInc } } if dreq != dmaFire { // Inserting a wait prevents multiple transfers in a single DReq cycle. waits := 1 t |= dreq | dmaTransferInfo(waits<= 0; i-- { for _, exclude := range blacklist { if i == exclude { goto skip } } if drvDMA.dmaMemory.channels[i].isAvailable() { drvDMA.dmaMemory.channels[i].reset() return i, &drvDMA.dmaMemory.channels[i] } skip: } } // Uncomment to understand the state of the DMA channels. //log.Printf("%#v", drvDMA.dmaMemory) return -1, nil } // runIO picks a DMA channel, initialize it and runs a transfer. // // It tries to release the channel as soon as it can. func runIO(pCB pmem.Mem, liteOk bool) error { var blacklist []int if !liteOk { blacklist = []int{7, 8, 9, 10, 11, 12, 13, 14, 15} } _, ch := pickChannel(blacklist...) if ch == nil { return errors.New("bcm283x-dma: no channel available") } defer ch.reset() ch.startIO(uint32(pCB.PhysAddr())) return ch.wait() } func allocateCB(size int) ([]controlBlock, *videocore.Mem, error) { buf, err := drvDMA.dmaBufAllocator((size + 0xFFF) &^ 0xFFF) if err != nil { return nil, nil, err } var cb []controlBlock if err := buf.AsPOD(&cb); err != nil { _ = buf.Close() return nil, nil, err } return cb, buf, nil } // dmaWriteStreamPCM streams data to a PCM enabled pin as a half-duplex I²S // channel. func dmaWriteStreamPCM(p *Pin, w gpiostream.Stream) error { d := w.Duration() if d == 0 { return nil } f := w.Frequency() _, _, _, actualfreq, err := calcSource(f, 1) if err != nil { return err } if actualfreq != f { return errors.New("TODO(maruel): handle oversampling") } // Start clock earlier. drvDMA.pcmMemory.reset() _, _, err = setPCMClockSource(f) if err != nil { return err } // Calculate the number of bytes needed. l := (int(w.Frequency()/f) + 7) / 8 // Bytes buf, err := drvDMA.dmaBufAllocator((l + 0xFFF) &^ 0xFFF) if err != nil { return err } defer buf.Close() if err := copyStreamToDMABuf(w, buf.Uint32()); err != nil { return err } cb, pCB, err := allocateCB(4096) if err != nil { return err } defer pCB.Close() reg := drvDMA.pcmBaseAddr + 0x4 // pcmMap.fifo if err = cb[0].initBlock(uint32(buf.PhysAddr()), reg, uint32(l), false, true, true, false, dmaPCMTX); err != nil { return err } defer drvDMA.pcmMemory.reset() // Start transfer drvDMA.pcmMemory.set() err = runIO(pCB, l <= maxLite) // We have to wait PCM to be finished even after DMA finished. for drvDMA.pcmMemory.cs&pcmTXErr == 0 { Nanospin(10 * time.Nanosecond) } return err } func dmaWritePWMFIFO() (*dmaChannel, *videocore.Mem, error) { if drvDMA.dmaMemory == nil { return nil, nil, errors.New("bcm283x-dma is not initialized; try running as root?") } cb, buf, err := allocateCB(32 + 4) // CB + data if err != nil { return nil, nil, err } u := buf.Uint32() offsetBytes := uint32(32) u[offsetBytes/4] = 0x0 physBuf := uint32(buf.PhysAddr()) physBit := physBuf + offsetBytes dest := drvDMA.pwmBaseAddr + 0x18 // PWM FIFO if err := cb[0].initBlock(physBit, dest, 4, false, true, false, false, dmaPWM); err != nil { _ = buf.Close() return nil, nil, err } cb[0].nextCB = physBuf // Loop back to self. _, ch := pickChannel() if ch == nil { _ = buf.Close() return nil, nil, errors.New("bcm283x-dma: no channel available") } ch.startIO(physBuf) return ch, buf, nil } func startPWMbyDMA(p *Pin, rng, data uint32) (*dmaChannel, *videocore.Mem, error) { if drvDMA.dmaMemory == nil { return nil, nil, errors.New("bcm283x-dma is not initialized; try running as root?") } cb, buf, err := allocateCB(2*32 + 4) // 2 CBs + mask if err != nil { return nil, nil, err } u := buf.Uint32() cbBytes := uint32(32) offsetBytes := cbBytes * 2 u[offsetBytes/4] = uint32(1) << uint(p.number&31) physBuf := uint32(buf.PhysAddr()) physBit := physBuf + offsetBytes dest := [2]uint32{ drvGPIO.gpioBaseAddr + 0x28 + 4*uint32(p.number/32), // clear drvGPIO.gpioBaseAddr + 0x1C + 4*uint32(p.number/32), // set } // High if err := cb[0].initBlock(physBit, dest[1], data*4, false, true, false, false, dmaPWM); err != nil { _ = buf.Close() return nil, nil, err } cb[0].nextCB = physBuf + cbBytes // Low if err := cb[1].initBlock(physBit, dest[0], (rng-data)*4, false, true, false, false, dmaPWM); err != nil { _ = buf.Close() return nil, nil, err } cb[1].nextCB = physBuf // Loop back to cb[0] var blacklist []int if data*4 >= 1<<16 || (rng-data)*4 >= 1<<16 { // Don't use lite channels. blacklist = []int{7, 8, 9, 10, 11, 12, 13, 14, 15} } _, ch := pickChannel(blacklist...) if ch == nil { _ = buf.Close() return nil, nil, errors.New("bcm283x-dma: no channel available") } ch.startIO(physBuf) return ch, buf, nil } // overSamples calculates the skip value which are the values that are read but // discarded as the clock is too fast. func overSamples(s gpiostream.Stream) (int, error) { desired := s.Frequency() skip := drvDMA.pwmDMAFreq / desired if skip < 1 { return 0, fmt.Errorf("frequency is too high(%s)", desired) } actualFreq := drvDMA.pwmDMAFreq / skip errorPercent := 100 * (actualFreq - desired) / desired if errorPercent < -10 || errorPercent > 10 { return 0, fmt.Errorf("actual resolution differs more than 10%%(%s vs %s)", desired, actualFreq) } return int(skip), nil } // dmaReadStream streams input from a pin. func dmaReadStream(p *Pin, b *gpiostream.BitStream) error { skip, err := overSamples(b) if err != nil { return err } if _, err := setPWMClockSource(); err != nil { return err } // Needs 32x the memory since each read is one full uint32. On the other // hand one could read 32 contiguous pins simultaneously at no cost. // TODO(simokawa): Implement a function to get number of bits for all type of // Stream l := len(b.Bits) * 8 * 4 * int(skip) // TODO(simokawa): Allocate multiple pages and CBs for huge buffer. buf, err := drvDMA.dmaBufAllocator((l + 0xFFF) &^ 0xFFF) if err != nil { return err } defer buf.Close() cb, pCB, err := allocateCB(4) if err != nil { return err } defer pCB.Close() reg := drvGPIO.gpioBaseAddr + 0x34 + 4*uint32(p.number/32) // GPIO Pin Level 0 if err := cb[0].initBlock(reg, uint32(buf.PhysAddr()), uint32(l), true, false, false, true, dmaPWM); err != nil { return err } err = runIO(pCB, l <= maxLite) uint32ToBitLSBF(b.Bits, buf.Bytes(), uint8(p.number&31), skip*4) return err } // dmaWriteStreamEdges streams data to a pin as a half-duplex one controlBlock // per bit toggle DMA stream. // // Memory usage is 32 bytes x number of bit changes rounded up to nearest // 4Kb, so an arbitrary stream of 1s or 0s only takes 4Kb but a stream of // 101010s will takes 256x the memory. // // TODO(maruel): Use huffman-coding-like repeated patterns detection to // "compress" the bitstream. This trades off upfront computation for lower // memory usage. The "compressing" function should be public, so the user can // call it only once yet stream multiple times. // // TODO(maruel): Mutate the program as it goes to reduce duplication by having // the DMA controller write in a following controlBlock.nextCB. // handling gpiostream.Program explicitly. func dmaWriteStreamEdges(p *Pin, w gpiostream.Stream) error { d := w.Duration() if d == 0 { return nil } var bits []byte var msb bool switch v := w.(type) { case *gpiostream.BitStream: bits = v.Bits msb = !v.LSBF default: return fmt.Errorf("Unknown type: %T", v) } skip, err := overSamples(w) if err != nil { return err } // Calculate the number of controlBlock needed. count := 1 stride := uint32(skip) last := getBit(bits[0], 0, msb) l := int(int64(d) * int64(w.Frequency()) / int64(physic.Hertz)) // Bits for i := 1; i < l; i++ { if v := getBit(bits[i/8], i%8, msb); v != last || stride == maxLite { last = v count++ stride = 0 } stride += uint32(skip) } // 32 bytes for each CB and 4 bytes for the mask. bufBytes := count*32 + 4 cb, buf, err := allocateCB((bufBytes + 0xFFF) &^ 0xFFF) if err != nil { return err } defer buf.Close() // Setup the single mask buffer of 4Kb. mask := uint32(1) << uint(p.number&31) u := buf.Uint32() offset := (len(buf.Bytes()) - 4) u[offset/4] = mask physBit := uint32(buf.PhysAddr()) + uint32(offset) // Other constants during the loop. // Waits does not seem to work as expected. Not counted as DREQ pulses? // Use PWM's rng1 instead for this. //waits := divs - 1 dest := [2]uint32{ drvGPIO.gpioBaseAddr + 0x28 + 4*uint32(p.number/32), // clear drvGPIO.gpioBaseAddr + 0x1C + 4*uint32(p.number/32), // set } // Render the controlBlock's to trigger the bit trigger for either Set or // Clear GPIO memory registers. last = getBit(bits[0], 0, msb) index := 0 stride = uint32(skip) for i := 1; i < l; i++ { if v := getBit(bits[i/8], i%8, msb); v != last || stride == maxLite { if err := cb[index].initBlock(physBit, dest[last], stride*4, false, true, false, false, dmaPWM); err != nil { return err } // Hardcoded len(controlBlock) == 32. It is not necessary to use // physToUncachedPhys() here. cb[index].nextCB = uint32(buf.PhysAddr()) + uint32(32*(index+1)) index++ stride = 0 last = v } stride += uint32(skip) } if err := cb[index].initBlock(physBit, dest[last], stride*4, false, true, false, false, dmaPWM); err != nil { return err } // Start clock before DMA _, err = setPWMClockSource() if err != nil { return err } return runIO(buf, true) } // dmaWriteStreamDualChannel streams data to a pin using two DMA channels. // // In practice this leads to a glitchy stream. func dmaWriteStreamDualChannel(p *Pin, w gpiostream.Stream) error { // TODO(maruel): Analyse 'w' to figure out the programs to load, and create // the number of controlBlock needed to reduce memory usage. // TODO(maruel): When only one channel is needed, it is much more memory // efficient to use DMA to write to PWM FIFO. skip, err := overSamples(w) if err != nil { return err } // Calculates the number of needed bytes. l := int(int64(w.Duration())*int64(w.Frequency())/int64(physic.Hertz)) * skip * 4 bufLen := (l + 0xFFF) &^ 0xFFF bufSet, err := drvDMA.dmaBufAllocator(bufLen) if err != nil { return err } defer bufSet.Close() bufClear, err := drvDMA.dmaBufAllocator(bufLen) if err != nil { return err } defer bufClear.Close() cb, pCB, err := allocateCB(4096) if err != nil { return err } defer pCB.Close() // Needs 64x the memory since each write is 2 full uint32. On the other // hand one could write 32 contiguous pins simultaneously at no cost. mask := uint32(1) << uint(p.number&31) if err := raster32(w, skip, bufClear.Uint32(), bufSet.Uint32(), mask); err != nil { return err } // Start clock before DMA start _, err = setPWMClockSource() if err != nil { return err } regSet := drvGPIO.gpioBaseAddr + 0x1C + 4*uint32(p.number/32) if err := cb[0].initBlock(uint32(bufSet.PhysAddr()), regSet, uint32(l), false, true, true, false, dmaPWM); err != nil { return err } regClear := drvGPIO.gpioBaseAddr + 0x28 + 4*uint32(p.number/32) if err := cb[1].initBlock(uint32(bufClear.PhysAddr()), regClear, uint32(l), false, true, true, false, dmaPWM); err != nil { return err } // The first channel must be a full bandwidth one. The "light" ones are // effectively a single one, which means that they are interleaved. If both // are "light" then the jitter is largely increased. x, chSet := pickChannel(6, 7, 8, 9, 10, 11, 12, 13, 14, 15) if chSet == nil { return errors.New("bcm283x-dma: no channel available") } defer chSet.reset() _, chClear := pickChannel(x) if chClear == nil { return errors.New("bcm283x-dma: no secondary channel available") } defer chClear.reset() // Two channel need to be synchronized but there is not such a mechanism. chSet.startIO(uint32(pCB.PhysAddr())) // cb[0] chClear.startIO(uint32(pCB.PhysAddr()) + 32) // cb[1] err1 := chSet.wait() err2 := chClear.wait() if err1 == nil { return err2 } return err1 } // physToUncachedPhys returns the uncached physical memory address backing a // physical memory address. // // p must be rooted at a page boundary (4096). func physToUncachedPhys(p uint32) uint32 { // http://en.wikibooks.org/wiki/Aros/Platforms/Arm_Raspberry_Pi_support#Framebuffer return p | drvGPIO.dramBus } func physToBus(p uint32) uint32 { return (p & periphMask) | periphBus } // smokeTest allocates two physical pages, ask the DMA controller to copy the // data from one page to another and make sure the content is as expected. // // This should take a fraction of a second and will make sure the driver is // usable. This ensures there's at least one DMA channel available. func smokeTest() error { // If these are commented out due to a new processor having different // characteristics, the corresponding code needs to be updated. if drvDMA.dmaMemory.channels[6].debug&dmaLite != 0 { return errors.New("unexpected hardware: DMA channel #6 shouldn't be lite") } if drvDMA.dmaMemory.channels[7].debug&dmaLite == 0 { return errors.New("unexpected hardware: DMA channel #7 should be lite") } if drvDMA.dmaMemory.enable != 0x7FFF { return errors.New("unexpected hardware: DMA enable is not fully set") } const size = 4096 * 4 // 16kb const holeSize = 1 // Minimum DMA alignment alloc := func(s int) (pmem.Mem, error) { return videocore.Alloc(s) } copyMem := func(pDst, pSrc uint64) error { // Allocate a control block and initialize it. pCB, err2 := videocore.Alloc(4096) if err2 != nil { return err2 } defer pCB.Close() var cb *controlBlock if err := pCB.AsPOD(&cb); err != nil { return err } if false { // This code is not run by default because it resets the PWM clock on // process startup, which may cause undesirable glitches. // Initializes the PWM clock right away to 1MHz. _, err := setPWMClockSource() if err != nil { return err } if err := cb.initBlock(uint32(pSrc), uint32(pDst)+holeSize, size-2*holeSize, false, false, true, true, dmaPWM); err != nil { return err } } else { // Use maximum performance. if err := cb.initBlock(uint32(pSrc), uint32(pDst)+holeSize, size-2*holeSize, false, false, true, true, dmaFire); err != nil { return err } } return runIO(pCB, size-2*holeSize <= maxLite) } return pmem.TestCopy(size, holeSize, alloc, copyMem) } // driverDMA implements periph.Driver. // // It implements much more than the DMA controller, it also exposes the clocks, // the PWM and PCM controllers. type driverDMA struct { pcmBaseAddr uint32 pwmBaseAddr uint32 dmaMemory *dmaMap dmaChannel15 *dmaChannel pcmMemory *pcmMap clockMemory *clockMap timerMemory *timerMap gpioPadMemory *gpioPadMap // Page 138 // - Two independent bit-streams // - Each channel either a PWM or serialised version of a 32-bit word // - Variable input and output resolutions. // - Load data from a FIFO storage block, to extent to 8 32-bit words (256 // bits). // // Author note: 100Mhz base resolution with a 256 bits 1-bit stream is actually // good enough to generate a DAC. pwmMemory *pwmMap // These clocks are shared with hardware PWM, DMA driven PWM and BitStream. pwmBaseFreq physic.Frequency pwmDMAFreq physic.Frequency pwmDMACh *dmaChannel pwmDMABuf *videocore.Mem // dmaBufAllocator is overriden for unit testing. dmaBufAllocator func(s int) (*videocore.Mem, error) // Set to videocore.Alloc } func (d *driverDMA) Close() error { // TODO(maruel): Stop DMA and PWM controllers. d.pcmBaseAddr = 0 d.pwmBaseAddr = 0 d.dmaMemory = nil d.dmaChannel15 = nil d.pcmMemory = nil d.clockMemory = nil d.timerMemory = nil d.pwmMemory = nil d.pwmBaseFreq = 0 d.pwmDMAFreq = 0 d.pwmDMACh = nil d.pwmDMABuf = nil d.dmaBufAllocator = nil return nil } func (d *driverDMA) String() string { return "bcm283x-dma" } func (d *driverDMA) Prerequisites() []string { return []string{"bcm283x-gpio"} } func (d *driverDMA) After() []string { return nil } func (d *driverDMA) Init() (bool, error) { d.dmaBufAllocator = videocore.Alloc d.pwmBaseFreq = 25 * physic.MegaHertz d.pwmDMAFreq = 200 * physic.KiloHertz // baseAddr is initialized by prerequisite driver bcm283x-gpio. if err := pmem.MapAsPOD(uint64(drvGPIO.baseAddr+0x7000), &d.dmaMemory); err != nil { if os.IsPermission(err) { return true, fmt.Errorf("need more access, try as root: %v", err) } return true, err } // Channel #15 is "physically removed from the other DMA Channels so it has a // different address base". if err := pmem.MapAsPOD(uint64(drvGPIO.baseAddr+0xE05000), &d.dmaChannel15); err != nil { return true, err } d.pcmBaseAddr = drvGPIO.baseAddr + 0x203000 if err := pmem.MapAsPOD(uint64(d.pcmBaseAddr), &d.pcmMemory); err != nil { return true, err } d.pwmBaseAddr = drvGPIO.baseAddr + 0x20C000 if err := pmem.MapAsPOD(uint64(d.pwmBaseAddr), &d.pwmMemory); err != nil { return true, err } if err := pmem.MapAsPOD(uint64(drvGPIO.baseAddr+0x101000), &d.clockMemory); err != nil { return true, err } if err := pmem.MapAsPOD(uint64(drvGPIO.baseAddr+0x3000), &d.timerMemory); err != nil { return true, err } if err := pmem.MapAsPOD(uint64(drvGPIO.baseAddr+0x100000), &d.gpioPadMemory); err != nil { return true, err } // Do not run smokeTest() unless it's clear it is not dangerous. return true, nil } func debugDMA() { for i, ch := range drvDMA.dmaMemory.channels { log.Println(i, ch.cs.String()) if ch.cs&dmaActive != 0 { log.Printf("%x: %s", ch.cbAddr, ch.GoString()) } } log.Println(15, drvDMA.dmaChannel15.cs.String()) } func resetDMA(ch int) error { if ch < len(drvDMA.dmaMemory.channels) { drvDMA.dmaMemory.channels[ch].reset() } else if ch == 15 { drvDMA.dmaChannel15.reset() } else { return fmt.Errorf("invalid dma channel %d", ch) } return nil } func init() { if isArm { periph.MustRegister(&drvDMA) } } var drvDMA driverDMA