rpi-rgb-led-matrix/led-matrix.cc
2014-09-04 05:00:04 +00:00

182 lines
6.4 KiB
C++

// -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
// Some experimental code.
// (c) H. Zeller <h.zeller@acm.org>. License: do whatever you want with it :)
//
// Using GPIO to control a 32x32 rgb LED panel (typically you find them with the
// suffix such as P4 or P5: that is the pitch in mm.
// So "32x32 rgb led p5" should find you something on 'the internets'.
#include "led-matrix.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "gpio.h"
// Clocking in a row takes about 3.4usec (TODO: this is actually per board)
// Because clocking the data in is part of the 'wait time', we need to
// substract that from the row sleep time.
static const int kRowClockTime = 3400;
static const int kBaseTime = kRowClockTime; // smallest possible value.
const long row_sleep_nanos[8] = { // Only using the first kPWMBits elements.
(1 * kBaseTime) - kRowClockTime,
(2 * kBaseTime) - kRowClockTime,
(4 * kBaseTime) - kRowClockTime,
(8 * kBaseTime) - kRowClockTime,
(16 * kBaseTime) - kRowClockTime,
(32 * kBaseTime) - kRowClockTime,
(64 * kBaseTime) - kRowClockTime,
// Too much flicker with 8 bits. We should have a separate screen pass
// with this bit plane. Or interlace. Or trick with -OE switch on in the
// middle of row-clocking, thus have kRowClockTime / 2
(128 * kBaseTime) - kRowClockTime, // too much flicker.
};
static void sleep_nanos(long nanos) {
// For sleep times above 20usec, nanosleep seems to be fine, but it has
// an offset of about 20usec (on the RPi distribution I was testing it on).
// That means, we need to give it 80us to get 100us.
// For values lower than roughly 30us, this is not accurate anymore and we
// need to switch to busy wait.
// TODO: compile Linux kernel realtime extensions and watch if the offset-time
// changes and hope for less jitter.
if (nanos > 28000) {
struct timespec sleep_time = { 0, nanos - 20000 };
nanosleep(&sleep_time, NULL);
} else {
// The following loop is determined empirically on a 700Mhz RPi
for (int i = nanos >> 2; i != 0; --i) {
asm(""); // force GCC not to optimize this away.
}
}
}
RGBMatrix::RGBMatrix(GPIO *io) : io_(io) {
// Tell GPIO about all bits we intend to use.
IoBits b;
b.raw = 0;
b.bits.output_enable = b.bits.clock = b.bits.strobe = 1;
b.bits.r1 = b.bits.g1 = b.bits.b1 = 1;
b.bits.r2 = b.bits.g2 = b.bits.b2 = 1;
b.bits.row = kRowMask;
// Initialize outputs, make sure that all of these are supported bits.
const uint32_t result = io_->InitOutputs(b.raw);
assert(result == b.raw);
assert(kPWMBits < 8); // only up to 7 makes sense.
ClearScreen();
}
void RGBMatrix::ClearScreen() {
memset(&bitplane_, 0, sizeof(bitplane_));
}
void RGBMatrix::FillScreen(uint8_t red, uint8_t green, uint8_t blue) {
for (int x = 0; x < width(); ++x) {
for (int y = 0; y < height(); ++y) {
SetPixel(x, y, red, green, blue);
}
}
}
void RGBMatrix::SetPixel(uint8_t x, uint8_t y,
uint8_t red, uint8_t green, uint8_t blue) {
if (x >= width() || y >= height()) return;
#if 0
// My setup: having four panels connected [>] [>]
// v
// [<] [<]
// So we have up to column 64 one direction, then folding around. Lets map
// that backward
if (y > 31) {
x = 127 - x;
y = 63 - y;
}
#endif
// TODO: re-map values to be luminance corrected (sometimes called 'gamma').
// Ideally, we had like 10PWM bits for this, but we're too slow for that :/
// Scale to the number of bit planes we actually have, so that MSB matches
// MSB of PWM.
red >>= 8 - kPWMBits;
green >>= 8 - kPWMBits;
blue >>= 8 - kPWMBits;
for (int b = 0; b < kPWMBits; ++b) {
uint8_t mask = 1 << b;
IoBits *bits = &bitplane_[b].row[y & kRowMask].column[x];
if (y < kDoubleRows) { // Upper sub-panel.
bits->bits.r1 = (red & mask) == mask;
bits->bits.g1 = (green & mask) == mask;
bits->bits.b1 = (blue & mask) == mask;
} else { // Lower sub-panel.
bits->bits.r2 = (red & mask) == mask;
bits->bits.g2 = (green & mask) == mask;
bits->bits.b2 = (blue & mask) == mask;
}
}
}
void RGBMatrix::UpdateScreen() {
IoBits serial_mask; // Mask of bits we need to set while clocking in.
serial_mask.bits.r1 = serial_mask.bits.g1 = serial_mask.bits.b1 = 1;
serial_mask.bits.r2 = serial_mask.bits.g2 = serial_mask.bits.b2 = 1;
serial_mask.bits.clock = 1;
IoBits row_mask;
row_mask.bits.row = kRowMask;
IoBits clock, output_enable, strobe;
clock.bits.clock = 1;
output_enable.bits.output_enable = 1;
strobe.bits.strobe = 1;
IoBits row_bits;
for (uint8_t row = 0; row < kDoubleRows; ++row) {
// Rows can't be switched very quickly without ghosting, so we do the
// full PWM of one row before switching rows.
for (int b = 0; b < kPWMBits; ++b) {
const DoubleRow &rowdata = bitplane_[b].row[row];
// Clock in the row. The time this takes is the smalles time we can
// leave the LEDs on, thus the smallest time-constant we can use for
// PWM (doubling the sleep time with each bit).
// So this is the critical path; I'd love to know if we can employ some
// DMA techniques to speed this up.
// (With this code, one row roughly takes 3.0 - 3.4usec to clock in).
//
// However, in particular for longer chaining, it seems we need some more
// wait time to settle.
const long kIOStabilizeWaitNanos = 5;
for (uint8_t col = 0; col < kColumns; ++col) {
const IoBits &out = rowdata.column[col];
io_->ClearBits(~out.raw & serial_mask.raw); // also: resets clock.
sleep_nanos(kIOStabilizeWaitNanos);
io_->SetBits(out.raw & serial_mask.raw);
sleep_nanos(kIOStabilizeWaitNanos);
io_->SetBits(clock.raw);
sleep_nanos(kIOStabilizeWaitNanos);
}
io_->SetBits(output_enable.raw); // switch off while strobe.
row_bits.bits.row = row;
io_->SetBits(row_bits.raw & row_mask.raw);
io_->ClearBits(~row_bits.raw & row_mask.raw);
io_->SetBits(strobe.raw); // Strobe
io_->ClearBits(strobe.raw);
// Now switch on for the given sleep time.
io_->ClearBits(output_enable.raw);
// If we use less bits, then use the upper areas which leaves us more
// CPU time to do other stuff.
sleep_nanos(row_sleep_nanos[b + (7 - kPWMBits)]);
}
}
}