/* Copyright (C) 2000-2003 Constantin Kaplinsky.  All Rights Reserved.
 * Copyright (C) 2011 D. R. Commander.  All Rights Reserved.
 * Copyright 2014-2018 Pierre Ossman for Cendio AB
 * Copyright 2018 Peter Astrand for Cendio AB
 * Copyright (C) 2020-2025 m-privacy GmbH, Berlin
 * 
 * This is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this software; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 */

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#ifndef WIN32
#include <pwd.h>
#endif

#include <rfb/EncodeManager.h>
#include <rfb/Encoder.h>
#include <rfb/Palette.h>
#include <rfb/SConnection.h>
#include <rfb/SMsgWriter.h>
#include <rfb/UpdateTracker.h>
#include <rfb/LogWriter.h>
#include <rfb/Exception.h>
#include <rfb/Configuration.h>

#include <rfb/RawEncoder.h>
#include <rfb/RREEncoder.h>
#include <rfb/HextileEncoder.h>
#include <rfb/ZRLEEncoder.h>
#include <rfb/TightEncoder.h>
#include <rfb/TightJPEGEncoder.h>
#include <rfb/TightMPEncoder.h>
#include <rfb/MP.h>

#include <rdr/MultiOutStream.h>

using namespace rfb;

static LogWriter vlog("EncodeManager");

IntParameter subRectMaxAreaK("SubRectMaxAreaK", "Max Tight rect size in KPixel", 64, 64, 1024);
IntParameter encoderThreads("EncoderThreads", "Number of encoder threads", ENCODEMANAGERTHREADSDEF, 1, ENCODEMANAGERTHREADSMAX);
IntParameter threadWakeupInterval("ThreadWakeupInterval", "Wake up next encoder thread every n rects", 8, 1, 1024);
IntParameter maxRectsPerThreadRun("MaxRectsPerThreadRun", "Max number of rects to be processed per thread run", 32, 1, 1024);
IntParameter pressureCheckInterval("PressureCheckInterval", "Number of seconds to wait between pressure checks", 10, 1, 120);
BoolParameter noMP("NoMP", "Disable MP compression in Tight encoding", false);
IntParameter maxRectK("MaxRectK", "Maximum Rect size in KPixel", 30, 1, 1000);

int encoderThreadsUsed;
int encoderThreadsStarted;
int encoderThreadsMaxUse;

// Split each rectangle into smaller ones no larger than this area,
// and no wider than this width.
static int SubRectMaxArea = 64 * 1024;
static const int SubRectMaxWidth = 2048;

// The size in pixels of either side of each block tested when looking
// for solid blocks.
static const int SolidSearchBlock = 16;
// Don't bother with blocks smaller than this
static const int SolidBlockMinArea = 2048;

// How long we consider a region recently changed (in ms)
static const int RecentChangeTimeout = 50;

bool runThreads = true;
TGVNC_CONDITION_TYPE EncodeManager::threadsReadyCondition;
MUTEX_TYPE EncodeManager::threadsReadyConditionLock;
TGVNC_CONDITION_TYPE EncodeManager::rectsWaitingCondition;
MUTEX_TYPE EncodeManager::rectsWaitingConditionLock;


namespace rfb {

enum EncoderClass {
  encoderRaw,
  encoderRRE,
  encoderHextile,
  encoderTight,
  encoderTightJPEG,
  encoderZRLE,
  encoderTightMP,
  encoderClassMax,
};

enum EncoderType {
  encoderSolid,
  encoderBitmap,
  encoderBitmapRLE,
  encoderIndexed,
  encoderIndexedRLE,
  encoderFullColour,
  encoderTypeMax,
};

struct RectInfo {
  int rleRuns;
  Palette palette;
};

struct RectQueueItem {
  Point tl;
  Point br;
  const PixelBuffer *pb;
  struct RectQueueItem * next;
};

struct RectQueueHead {
  struct RectQueueItem * first;
  struct RectQueueItem * last;
  struct RectQueueHead * next;
  int count;
};

struct threadParam {
  EncodeManager* myself;
  int threadNum;
};

};

static const char *encoderClassName(EncoderClass klass)
{
  switch (klass) {
  case encoderRaw:
    return "Raw";
  case encoderRRE:
    return "RRE";
  case encoderHextile:
    return "Hextile";
  case encoderTight:
    return "Tight";
  case encoderTightJPEG:
    return "Tight (JPEG)";
  case encoderTightMP:
    return "Tight (MP)";
  case encoderZRLE:
    return "ZRLE";
  case encoderClassMax:
    break;
  }

  return "Unknown Encoder Class";
}

static const char *encoderTypeName(EncoderType type)
{
  switch (type) {
  case encoderSolid:
    return "Solid";
  case encoderBitmap:
    return "Bitmap";
  case encoderBitmapRLE:
    return "Bitmap RLE";
  case encoderIndexed:
    return "Indexed";
  case encoderIndexedRLE:
    return "Indexed RLE";
  case encoderFullColour:
    return "Full Colour";
  case encoderTypeMax:
    break;
  }

  return "Unknown Encoder Type";
}

EncodeManager::EncodeManager(SConnection* conn_)
  : rectQueueFirstHead(NULL), rectQueueLastHead(NULL), conn(conn_), recentChangeTimer(this), maxRectsUsed(0)
{
  StatsVector::iterator iter;

  encoderThreadsMaxUse = encoderThreadsUsed = encoderThreadsStarted = encoderThreads;

  MUTEX_INIT(&threadsReadyConditionLock);
  TGVNC_CONDITION_INIT(&threadsReadyCondition);
  MUTEX_INIT(&rectsWaitingConditionLock);
  TGVNC_CONDITION_INIT(&rectsWaitingCondition);
  MUTEX_INIT(&rectQueueMutex);
  MUTEX_INIT(&regionMutex);

  for (int threadNum = 0; threadNum < encoderThreads ; threadNum++) {
    encoders[threadNum].resize(encoderClassMax, NULL);
    activeEncoders[threadNum].resize(encoderTypeMax, encoderRaw);
    encoders[threadNum][encoderRaw] = new RawEncoder(conn);
    encoders[threadNum][encoderRRE] = new RREEncoder(conn);
    encoders[threadNum][encoderHextile] = new HextileEncoder(conn);
    encoders[threadNum][encoderTight] = new TightEncoder(conn);
    encoders[threadNum][encoderTightJPEG] = new TightJPEGEncoder(conn);
    encoders[threadNum][encoderTightMP] = new TightMPEncoder(conn);
    encoders[threadNum][encoderZRLE] = new ZRLEEncoder(conn);
  }
  /* we start one less threads than we have sets of encoders, because one set of
   * encoders is reserved for direct use
   */
  vlog.info("starting %u encoder threads", encoderThreadsStarted);
  for (int threadNum = 0; threadNum < encoderThreadsStarted ; threadNum++) {
    struct threadParam * threadP;

    threadP = (struct threadParam *) malloc(sizeof(struct threadParam));
    threadP->threadNum = threadNum;
    threadP->myself = this;
    threadBusy[threadNum] = false;
    THREAD_CREATE(writeSubRectThread, writeSubRectThreadId[threadNum], threadP);
    THREAD_SET_NAME(writeSubRectThreadId[threadNum], "tg-writeSubRect");
  }
  vlog.info("starting pressure monitor thread");
  THREAD_CREATE(pressureMonitorThread, pressureMonitorThreadId, this);
  THREAD_SET_NAME(pressureMonitorThreadId, "tg-pressureMonitor");

  updates = 0;
  memset(&copyStats, 0, sizeof(copyStats));
  stats.resize(encoderClassMax);
  for (iter = stats.begin();iter != stats.end();++iter) {
    StatsVector::value_type::iterator iter2;
    iter->resize(encoderTypeMax);
    for (iter2 = iter->begin();iter2 != iter->end();++iter2)
      memset(&*iter2, 0, sizeof(EncoderStats));
  }
  SubRectMaxArea = subRectMaxAreaK * 1024;
  vlog.info("SubRectMaxArea: %u", SubRectMaxArea);
}

EncodeManager::~EncodeManager()
{
  std::vector<Encoder*>::iterator iter;

  logStats();

  runThreads = false;
  THREAD_CANCEL(pressureMonitorThreadId);
  for (int threadNum = 0; threadNum < encoderThreadsStarted ; threadNum++) {
    THREAD_CANCEL(writeSubRectThreadId[threadNum]);
  }
  for (int threadNum = 0; threadNum < encoderThreads ; threadNum++) {
    for (iter = encoders[threadNum].begin();iter != encoders[threadNum].end();iter++)
      delete *iter;
  }
#if 0
  MUTEX_DESTROY(&rectQueueMutex);
  MUTEX_DESTROY(&regionMutex);
  TGVNC_CONDITION_DESTROY(&threadsReadyCondition);
  MUTEX_DESTROY(&threadsReadyConditionLock);
  TGVNC_CONDITION_DESTROY(&rectsWaitingCondition);
  MUTEX_DESTROY(&rectsWaitingConditionLock);
#endif
}

struct RectQueueItem * EncodeManager::rectQueuePop(int threadNum) {
  struct RectQueueHead * head;

  MUTEX_LOCK(&rectQueueMutex);
  head = rectQueueFirstHead;
  if (head) {
    struct RectQueueItem * item;

    rectQueueFirstHead = rectQueueFirstHead->next;
    if (!rectQueueFirstHead)
       rectQueueLastHead = NULL;
    MUTEX_UNLOCK(&rectQueueMutex);
    threadBusy[threadNum] = true;
    item = head->first;
    free (head);
    return item;
  } else {
    MUTEX_UNLOCK(&rectQueueMutex);
    return NULL;
  }
}

void EncodeManager::rectQueueAppend(const Rect& rect, const PixelBuffer *pb)
{
  struct RectQueueItem * item;

//  vlog.verbose("rectQueueAppend(): add rect item x: %u, y: %u, w: %u, h: %u", rect.tl.x, rect.tl.y, rect.width(), rect.height());
  item = (struct RectQueueItem *) malloc(sizeof(struct RectQueueItem));
  if (!item) {
    vlog.error("rectQueueAppend(): failed to alloc item");
    return;
  }
  item->tl = rect.tl;
  item->br = rect.br;
  item->pb = pb;
  item->next = NULL;

  MUTEX_LOCK(&rectQueueMutex);
  if (rectQueueLastHead && rectQueueLastHead->count < maxRectsPerThreadRun) {
      rectQueueLastHead->last->next = item;
      rectQueueLastHead->last = item;
      rectQueueLastHead->count++;
  } else {
    struct RectQueueHead * head;

    head = (struct RectQueueHead *) malloc(sizeof(struct RectQueueHead));
    if (!head) {
      vlog.error("rectQueueAppend(): failed to alloc head");
      MUTEX_UNLOCK(&rectQueueMutex);
      return;
    }
    head->first = item;
    head->last = item;
    head->count = 1;
    head->next = NULL;
    if (rectQueueLastHead)
      rectQueueLastHead->next = head;
    else
      rectQueueFirstHead = head;
    rectQueueLastHead = head;
  }
  MUTEX_UNLOCK(&rectQueueMutex);
}

THREAD_FUNC EncodeManager::writeSubRectThread(void* param) {
  EncodeManager* myself = ((struct threadParam *) param)->myself;
  int myNum = ((struct threadParam *) param)->threadNum;
//  int count;
  struct RectQueueItem * queueItem;
  struct RectQueueItem * nextQueueItem;

  free(param);
#ifdef WIN32
  vlog.debug("writeSubRectThread %u (tid %lu) created", myNum, GetCurrentThreadId());
#else
#if defined(__APPLE__)
  vlog.debug("writeSubRectThread %u (tid %u) created", myNum, gettid());
#else
  vlog.debug("writeSubRectThread %u (tid %lu) created", myNum, gettid());
#endif
#endif

  while (runThreads) {
    /* rectQueuePop() sets threadBusy[myNum], if item returned */
    while (runThreads && !(queueItem = myself->rectQueuePop(myNum))) {
//      vlog.verbose("writeSubRectThread %u: nothing to be done, sleeping", myNum);
      MUTEX_LOCK(&rectsWaitingConditionLock);
      TGVNC_CONDITION_TIMED_WAIT(&rectsWaitingCondition, &rectsWaitingConditionLock, 2000);
      MUTEX_UNLOCK(&rectsWaitingConditionLock);
    }
    if (!runThreads) {
      myself->threadBusy[myNum] = false;
      THREAD_EXIT(THREAD_NULL);
    }
//    count = 0;
    while (queueItem) {
//      vlog.verbose("writeSubRectThread %u: processing rect item x: %u, y: %u, w: %u, h: %u", myNum, queueItem->tl.x, queueItem->tl.y, queueItem->br.x - queueItem->tl.x, queueItem->br.y - queueItem->tl.y);
      myself->writeSubRect(myNum, Rect(queueItem->tl, queueItem->br), queueItem->pb);
//      vlog.verbose("writeSubRectThread %u: finished rect item x: %u, y: %u, w: %u, h: %u", myNum, queueItem->tl.x, queueItem->tl.y, queueItem->br.x - queueItem->tl.x, queueItem->br.y - queueItem->tl.y);
      nextQueueItem = queueItem->next;
      free(queueItem);
      queueItem = nextQueueItem;
//      count++;
    }
//    vlog.debug("writeSubRectThread %u: wrote %u items", myNum, count);
    myself->threadBusy[myNum] = false;
    if (!myself->rectQueueFirstHead)
      TGVNC_CONDITION_SEND_SIG(&threadsReadyCondition);
  }
  THREAD_EXIT(THREAD_NULL);
}

THREAD_FUNC EncodeManager::pressureMonitorThread(void* param) {
  const EncodeManager * myself = (EncodeManager *) param;
#ifndef WIN32
  const char * cgroupFileBase = "/cgroup/";
  const char * cgroupFileCpu = "/cpu.pressure";
  const char * bwFileBase = "/home/user/.bandwidth/";
  const char * bwFileBaseHigh = "/home/user/.bandwidthhigh/";
  const char * bwPaStart = "/usr/local/bin/startpatunnel";
#endif
  const char * globalFile = "/proc/pressure/cpu";
  char cpuFilename[256];
  bool saveBandwidth = false;
  bool saveBandwidthHigh = false;
#ifndef WIN32
  struct passwd * pw;
#endif
  FILE *file;
  char cpuString[256];
  char * p;
  char * q;
  rdr::U8 cpuVal = 0;

#ifdef WIN32
  vlog.debug("pressureMonitorThread (tid %lu) created", GetCurrentThreadId());
#else
#if defined(__APPLE__)
  vlog.debug("pressureMonitorThread (tid %u) created", gettid());
#else
  vlog.debug("pressureMonitorThread (tid %lu) created", gettid());
#endif
#endif

  while (runThreads) {
#ifndef WIN32
    pw = getpwuid(getuid());
    if (pw) {
      char bwFilename[256];
      bool changed = false;

      snprintf(cpuFilename, 255, "%s%s%s", cgroupFileBase, pw->pw_name, cgroupFileCpu);
      cpuFilename[255] = 0;
      if (!access(cpuFilename, R_OK)) {
        vlog.verbose("pressureMonitorThread: using cgroup file %s", cpuFilename);
      } else {
        vlog.debug("pressureMonitorThread: cgroup file %s not readable, using global file %s", cpuFilename, globalFile);
        sprintf(cpuFilename, "%s", globalFile);
      }
      snprintf(bwFilename, 255, "%s%s", bwFileBaseHigh, pw->pw_name);
      bwFilename[255] = 0;
      if (!access(bwFilename, F_OK)) {
        if (!saveBandwidthHigh) {
          vlog.debug("pressureMonitorThread: changed to save bandwidth high");
          changed = true;
        }
        saveBandwidthHigh = true;
        saveBandwidth = false;
      } else {
        snprintf(bwFilename, 255, "%s%s", bwFileBase, pw->pw_name);
        bwFilename[255] = 0;
        if (!access(bwFilename, F_OK)) {
          if (!saveBandwidth) {
            vlog.debug("pressureMonitorThread: changed to save bandwidth");
            changed = true;
          }
          saveBandwidth = true;
        } else {
          if (saveBandwidth || saveBandwidthHigh) {
            vlog.debug("pressureMonitorThread: changed to full bandwidth");
            changed = true;
          }
          saveBandwidth = false;
        }
        saveBandwidthHigh = false;
      }
      if (!access(bwPaStart, X_OK)) {
        char cmd[256];

        if (saveBandwidthHigh) {
          snprintf(cmd, 255, "%s high &>/dev/null", bwPaStart);
          if (changed)
            vlog.debug("pressureMonitorThread: starting Pulseaudio resampling tunnel for high");
          if (rdr::MultiOutStream::pulseZstdLevel != 10)
            rdr::MultiOutStream::pulseZstdLevel.setParam(10);
        } else if (saveBandwidth) {
          snprintf(cmd, 255, "%s low &>/dev/null", bwPaStart);
          if (changed)
            vlog.debug("pressureMonitorThread: starting Pulseaudio resampling tunnel for low");
          if (rdr::MultiOutStream::pulseZstdLevel != 5)
            rdr::MultiOutStream::pulseZstdLevel.setParam(5);
        } else {
          snprintf(cmd, 255, "%s none &>/dev/null", bwPaStart);
          if (changed)
            vlog.debug("pressureMonitorThread: starting Pulseaudio resampling tunnel for none");
          if (rdr::MultiOutStream::pulseZstdLevel != 3)
            rdr::MultiOutStream::pulseZstdLevel.setParam(3);
        }
        cmd[255] = 0;
        system(cmd);
      }
    } else {
#endif
      vlog.debug("pressureMonitorThread: failed to get username, using global file %s", globalFile);
      sprintf(cpuFilename, "%s", globalFile);
#ifndef WIN32
    }
#endif

    file = fopen(cpuFilename, "r");
    if (file) {
      if (fread(cpuString, 1, 255, file) > 5) {
        p = strstr(cpuString, "avg10=");
        if (p) {
          p += 6;
          q = p;
          while (*q && *q != '.')
            q++;
          if (*q) {
            *q = 0;
            cpuVal = atoi(p);
            vlog.verbose("pressureMonitorThread: cpu pressure %u", cpuVal);
          } else {
            cpuVal = 0;
            vlog.debug("pressureMonitorThread: invalid cpu pressure value");
          }
        } else {
          cpuVal = 0;
          vlog.debug("pressureMonitorThread: cpu pressure value avg10 not found");
        }
      } else {
        cpuVal = 0;
        vlog.debug("pressureMonitorThread: failed to read %s", cpuFilename);
      }
      fclose(file);
    } else {
      cpuVal = 0;
      vlog.debug("pressureMonitorThread: failed to open %s", cpuFilename);
    }
    if (mpCompressionSlow(myself->conn->client.mpCompression)) {
      if (encoderThreadsMaxUse != encoderThreadsStarted) {
        vlog.debug("pressureMonitorThread: change encoderThreadsMaxUse to %u for mpCompression %u", encoderThreadsStarted, myself->conn->client.mpCompression);
        encoderThreadsMaxUse = encoderThreadsStarted;
      }
      rdr::MultiOutStream::setMultiMaxVNCChunk(true);
    } else {
      if (encoderThreadsMaxUse != 1) {
        vlog.debug("pressureMonitorThread: change encoderThreadsMaxUse to 1 for mpCompression %u", myself->conn->client.mpCompression);
        encoderThreadsMaxUse = 1;
      }
      rdr::MultiOutStream::setMultiMaxVNCChunk(false);
    }
    MPCompressor::setCpuPressure(cpuVal);
    MPCompressor::setSaveBandwidth(saveBandwidth);
    MPCompressor::setSaveBandwidthHigh(saveBandwidthHigh);
#if !defined(__APPLE__)
    MPCompressor::setSupportsTightMPJpegXL(myself->conn->client.supportsTightMPJpegXL);
#endif

    sleep(pressureCheckInterval);
  }
  THREAD_EXIT(THREAD_NULL);
}

void EncodeManager::logStats()
{
  size_t i, j;

  unsigned rects;
  unsigned long long pixels, bytes, equivalent;

  double ratio;

  char a[1024], b[1024];

  rects = 0;
  pixels = bytes = equivalent = 0;

  vlog.info("Framebuffer updates: %u", updates);

  if (copyStats.rects != 0) {
    vlog.info("  %s:", "CopyRect");

    rects += copyStats.rects;
    pixels += copyStats.pixels;
    bytes += copyStats.bytes;
    equivalent += copyStats.equivalent;

    ratio = (double)copyStats.equivalent / copyStats.bytes;

    siPrefix(copyStats.rects, "rects", a, sizeof(a));
    siPrefix(copyStats.pixels, "pixels", b, sizeof(b));
    vlog.info("    %s: %s, %s", "Copies", a, b);
    iecPrefix(copyStats.bytes, "B", a, sizeof(a));
    vlog.info("    %*s  %s (1:%g ratio)",
              (int)strlen("Copies"), "",
              a, ratio);
  }

  for (i = 0;i < stats.size();i++) {
    // Did this class do anything at all?
    for (j = 0;j < stats[i].size();j++) {
      if (stats[i][j].rects != 0)
        break;
    }
    if (j == stats[i].size())
      continue;

    vlog.info("  %s:", encoderClassName((EncoderClass)i));

    for (j = 0;j < stats[i].size();j++) {
      if (stats[i][j].rects == 0)
        continue;

      rects += stats[i][j].rects;
      pixels += stats[i][j].pixels;
      bytes += stats[i][j].bytes;
      equivalent += stats[i][j].equivalent;

      ratio = (double)stats[i][j].equivalent / stats[i][j].bytes;

      siPrefix(stats[i][j].rects, "rects", a, sizeof(a));
      siPrefix(stats[i][j].pixels, "pixels", b, sizeof(b));
      vlog.info("    %s: %s, %s", encoderTypeName((EncoderType)j), a, b);
      iecPrefix(stats[i][j].bytes, "B", a, sizeof(a));
      vlog.info("    %*s  %s (1:%g ratio)",
                (int)strlen(encoderTypeName((EncoderType)j)), "",
                a, ratio);
    }
  }

  ratio = (double)equivalent / bytes;

  siPrefix(rects, "rects", a, sizeof(a));
  siPrefix(pixels, "pixels", b, sizeof(b));
  vlog.info("  Total: %s, %s", a, b);
  iecPrefix(bytes, "B", a, sizeof(a));
  vlog.info("         %s (1:%g ratio)", a, ratio);
  vlog.info("  Max rects used: %u", maxRectsUsed);
}

bool EncodeManager::supported(int encoding)
{
  switch (encoding) {
  case encodingRaw:
  case encodingRRE:
  case encodingHextile:
  case encodingZRLE:
  case encodingTight:
    return true;
  default:
    return false;
  }
}

bool EncodeManager::needsLosslessRefresh(const Region& req)
{
  bool result;

  MUTEX_LOCK(&regionMutex);
  result = !lossyRegion.intersect(req).is_empty() & recentChangeTimer.isStarted();
  MUTEX_UNLOCK(&regionMutex);
  return result;
}

int EncodeManager::getNextLosslessRefresh(const Region& req)
{
  // Do we have something we can send right away?
  MUTEX_LOCK(&regionMutex);
  if (!pendingRefreshRegion.intersect(req).is_empty()) {
    MUTEX_UNLOCK(&regionMutex);
    return 0;
  }
  MUTEX_UNLOCK(&regionMutex);
  if(!recentChangeTimer.isStarted())
    return 0;

  assert(needsLosslessRefresh(req));

  return recentChangeTimer.getNextTimeout();
}

void EncodeManager::pruneLosslessRefresh(const Region& limits)
{
  MUTEX_LOCK(&regionMutex);
  lossyRegion.assign_intersect(limits);
  pendingRefreshRegion.assign_intersect(limits);
  MUTEX_UNLOCK(&regionMutex);
}

void EncodeManager::forceRefresh(const Region& req)
{
  lossyRegion.assign_union(req);
  if (!recentChangeTimer.isStarted())
    pendingRefreshRegion.assign_union(req);
}

void EncodeManager::writeUpdate(const UpdateInfo& ui, const PixelBuffer* pb,
                                const RenderedCursor* renderedCursor)
{
  doUpdate(true, ui.changed, ui.copied, ui.copy_delta, pb, renderedCursor);

  MUTEX_LOCK(&regionMutex);
  recentlyChangedRegion.assign_union(ui.changed);
  recentlyChangedRegion.assign_union(ui.copied);
  MUTEX_UNLOCK(&regionMutex);
  if (!recentChangeTimer.isStarted())
    recentChangeTimer.start(RecentChangeTimeout);
}

void EncodeManager::writeLosslessRefresh(const Region& req, const PixelBuffer* pb,
                                         const RenderedCursor* renderedCursor)
{
  doUpdate(false, getLosslessRefresh(req),
           Region(), Point(), pb, renderedCursor);
}

bool EncodeManager::handleTimeout(Timer* t)
{
  bool result = false;

  if (t == &recentChangeTimer) {
    // Any lossy region that wasn't recently updated can
    // now be scheduled for a refresh
    MUTEX_LOCK(&regionMutex);
    pendingRefreshRegion.assign_union(lossyRegion.subtract(recentlyChangedRegion));
    recentlyChangedRegion.clear();

    // Will there be more to do? (i.e. do we need another round)
    if (!lossyRegion.subtract(pendingRefreshRegion).is_empty())
      result = true;
    MUTEX_UNLOCK(&regionMutex);
  }

  return result;
}

void EncodeManager::doUpdate(bool allowLossy, const Region& changed_,
                             const Region& copied, const Point& copyDelta,
                             const PixelBuffer* pb,
                             const RenderedCursor* renderedCursor)
{
    int nRects;
    Region changed, cursorRegion;

    updates++;

    prepareEncoders(allowLossy);

    changed = changed_;

    if (!conn->client.supportsEncoding(encodingCopyRect))
      changed.assign_union(copied);

    /*
     * We need to render the cursor seperately as it has its own
     * magical pixel buffer, so split it out from the changed region.
     */
    if (renderedCursor != NULL) {
      cursorRegion = changed.intersect(renderedCursor->getEffectiveRect());
      changed.assign_subtract(renderedCursor->getEffectiveRect());
    }

    if (conn->client.supportsEncoding(pseudoEncodingLastRect))
      nRects = 0xFFFF;
    else {
      nRects = 0;
      if (conn->client.supportsEncoding(encodingCopyRect))
        nRects += copied.numRects();
      nRects += computeNumRects(changed);
      nRects += computeNumRects(cursorRegion);
    }

    conn->writer()->writeFramebufferUpdateStart(nRects);

    if (conn->client.supportsEncoding(encodingCopyRect))
      writeCopyRects(copied, copyDelta);

    /*
     * We start by searching for solid rects, which are then removed
     * from the changed region.
     */
    if (conn->client.supportsEncoding(pseudoEncodingLastRect))
      writeSolidRects(&changed, pb);

    writeRects(changed, pb);
    writeRects(cursorRegion, renderedCursor);

    conn->writer()->writeFramebufferUpdateEnd();
}

void EncodeManager::prepareEncoders(bool allowLossy)
{
  enum EncoderClass solid, bitmap, bitmapRLE;
  enum EncoderClass indexed, indexedRLE, fullColour;

  bool allowJPEG;

  rdr::S32 preferred;

  std::vector<int>::iterator iter;

  solid = bitmap = bitmapRLE = encoderRaw;
  indexed = indexedRLE = fullColour = encoderRaw;

  allowJPEG = conn->client.pf().bpp >= 16;

  for (int threadNum = 0; threadNum < encoderThreads ; threadNum++) {
    const bool MPEnabled = !noMP && conn->client.supportsTightMP && encoders[threadNum][encoderTightMP]->isSupported();

    if (!allowLossy) {
      if (encoders[threadNum][encoderTightJPEG]->losslessQuality == -1)
        allowJPEG = false;
    }
    encoders[threadNum][encoderTightMP]->setMPLevel(conn->client.mpLevel);
    encoders[threadNum][encoderTightMP]->setMPCompression(conn->client.mpCompression);

    // Try to respect the client's wishes
    preferred = conn->getPreferredEncoding();
    encoderThreadsUsed = encoderThreadsMaxUse;
    switch (preferred) {
    case encodingRRE:
      // Horrible for anything high frequency and/or lots of colours
      bitmapRLE = indexedRLE = encoderRRE;
      break;
    case encodingHextile:
      // Slightly less horrible
      bitmapRLE = indexedRLE = fullColour = encoderHextile;
      break;
    case encodingTight:
      if (MPEnabled) {
//        vlog.verbose("Select encoderTightMP for Full Colour");
        fullColour = encoderTightMP;
      } else if (encoders[threadNum][encoderTightJPEG]->isSupported() && allowJPEG) {
//        vlog.verbose("Select encoderTightJPEG for Full Colour");
        fullColour = encoderTightJPEG;
        encoderThreadsUsed = 1;
      } else {
//        vlog.verbose("Select encoderTight for everything");
        fullColour = encoderTight;
        indexed = indexedRLE = encoderTight;
        bitmap = bitmapRLE = encoderTight;
        encoderThreadsUsed = 1;
      }
      break;
    case encodingZRLE:
      fullColour = encoderZRLE;
      bitmapRLE = indexedRLE = encoderZRLE;
      bitmap = indexed = encoderZRLE;
      encoderThreadsUsed = 1;
      break;
    }

    // Any encoders still unassigned?

    if (fullColour == encoderRaw) {
      if (MPEnabled) {
//        vlog.verbose("Select encoderTightMP for Full Colour");
        fullColour = encoderTightMP;
      } else if (encoders[threadNum][encoderTightJPEG]->isSupported() && allowJPEG) {
//        vlog.verbose("Select encoderTightJPEG for Full Colour");
        fullColour = encoderTightJPEG;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderZRLE]->isSupported()) {
        fullColour = encoderZRLE;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderTight]->isSupported()) {
        fullColour = encoderTight;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderHextile]->isSupported())
        fullColour = encoderHextile;
    }

    if (solid == encoderRaw) {
      if (MPEnabled && conn->client.supportsTightMPSolid) {
//        vlog.verbose("Select solid encoderTightMP");
        solid = encoderTightMP;
      } else if (encoders[threadNum][encoderTight]->isSupported()) {
        solid = encoderTight;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderRRE]->isSupported())
        solid = encoderRRE;
      else if (encoders[threadNum][encoderZRLE]->isSupported()) {
        solid = encoderZRLE;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderHextile]->isSupported())
        solid = encoderHextile;
    }

    if (indexed == encoderRaw) {
      if (encoderThreadsUsed > 1 && encoders[threadNum][encoderHextile]->isSupported())
        indexed = encoderHextile;
      else if (encoders[threadNum][encoderZRLE]->isSupported()) {
        indexed = encoderZRLE;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderTight]->isSupported()) {
        indexed = encoderTight;
        encoderThreadsUsed = 1;
      } else if (encoders[threadNum][encoderHextile]->isSupported())
        indexed = encoderHextile;
    }

    if (indexedRLE == encoderRaw)
      indexedRLE = indexed;

    if (bitmap == encoderRaw)
      bitmap = indexed;
    if (bitmapRLE == encoderRaw)
      bitmapRLE = bitmap;

    // JPEG is the only encoder that can reduce things to grayscale
    if ((conn->client.subsampling == subsampleGray) &&
        encoders[threadNum][encoderTightJPEG]->isSupported() && allowLossy) {
      solid = bitmap = bitmapRLE = encoderTightJPEG;
      indexed = indexedRLE = fullColour = encoderTightJPEG;
      encoderThreadsUsed = 1;
    }

//    vlog.verbose("Using %u encoder threads", encoderThreadsUsed);

    activeEncoders[threadNum][encoderSolid] = solid;
    activeEncoders[threadNum][encoderBitmap] = bitmap;
    activeEncoders[threadNum][encoderBitmapRLE] = bitmapRLE;
    activeEncoders[threadNum][encoderIndexed] = indexed;
    activeEncoders[threadNum][encoderIndexedRLE] = indexedRLE;
    activeEncoders[threadNum][encoderFullColour] = fullColour;

    for (iter = activeEncoders[threadNum].begin(); iter != activeEncoders[threadNum].end(); ++iter) {
      Encoder *encoder;

      encoder = encoders[threadNum][*iter];

      encoder->setCompressLevel(conn->client.compressLevel);
      encoder->setMPLevel(conn->client.mpLevel);
      encoder->setMPCompression(conn->client.mpCompression);

      if (allowLossy) {
        encoder->setQualityLevel(conn->client.qualityLevel);
        encoder->setFineQualityLevel(conn->client.fineQualityLevel,
                                     conn->client.subsampling);
      } else {
        int level = __rfbmax(conn->client.qualityLevel,
                             encoder->losslessQuality);
        encoder->setQualityLevel(level);
        encoder->setFineQualityLevel(-1, subsampleUndefined);
      }
    }
  }
}

Region EncodeManager::getLosslessRefresh(const Region& req)
{
  std::vector<Rect> rects;
  Region refresh;
  const size_t maxUpdateSize = maxRectK * 1000;
  size_t area;

  area = 0;
  MUTEX_LOCK(&regionMutex);
  pendingRefreshRegion.intersect(req).get_rects(&rects);
  while (!rects.empty()) {
    size_t idx;
    Rect rect;

    // Grab a random rect so we don't keep damaging and restoring the
    // same rect over and over
    idx = rand() % rects.size();

    rect = rects[idx];

    // Add rects until we exceed the threshold, then include as much as
    // possible of the final rect
    if ((area + rect.area()) > maxUpdateSize) {
      // Use the narrowest axis to avoid getting to thin rects
      if (rect.width() > rect.height()) {
        int width = (maxUpdateSize - area) / rect.height();
        rect.br.x = rect.tl.x + __rfbmax(1, width);
      } else {
        int height = (maxUpdateSize - area) / rect.width();
        rect.br.y = rect.tl.y + __rfbmax(1, height);
      }
      refresh.assign_union(Region(rect));
      break;
    }

    area += rect.area();
    refresh.assign_union(Region(rect));

    rects.erase(rects.begin() + idx);
  }
  MUTEX_UNLOCK(&regionMutex);

  return refresh;
}

int EncodeManager::computeNumRects(const Region& changed)
{
  int numRects;
  std::vector<Rect> rects;
  std::vector<Rect>::const_iterator rect;

  numRects = 0;
  changed.get_rects(&rects);
  for (rect = rects.begin(); rect != rects.end(); ++rect) {
    int w, h, sw, sh;

    w = rect->width();
    h = rect->height();

    // No split necessary?
    if (((w*h) < SubRectMaxArea) && (w < SubRectMaxWidth)) {
      numRects += 1;
      continue;
    }

    if (w <= SubRectMaxWidth)
      sw = w;
    else
      sw = SubRectMaxWidth;

    sh = SubRectMaxArea / sw;

    // ceil(w/sw) * ceil(h/sh)
    numRects += (((w - 1)/sw) + 1) * (((h - 1)/sh) + 1);
  }

  return numRects;
}

Encoder *EncodeManager::startRectPrepare(int threadNum, const Rect& rect, int type)
{
  Encoder *encoder;
  int klass, equiv;

//  vlog.verbose("startRectPrepare(): type %s", encoderTypeName((enum EncoderType) type));

  activeType = type;
  klass = activeEncoders[threadNum][activeType];

  beforeLength = conn->getOutStream()->length();

  stats[klass][activeType].rects++;
  stats[klass][activeType].pixels += rect.area();
  equiv = 12 + rect.area() * (conn->client.pf().bpp/8);
  stats[klass][activeType].equivalent += equiv;

  encoder = encoders[threadNum][klass];

  MUTEX_LOCK(&regionMutex);
  if ((encoder->flags & EncoderLossy) &&
      ((encoder->losslessQuality == -1) ||
       (encoder->getQualityLevel() < encoder->losslessQuality)))
    lossyRegion.assign_union(Region(rect));
  else
    lossyRegion.assign_subtract(Region(rect));

  // This was either a rect getting refreshed, or a rect that just got
  // new content. Either way we should not try to refresh it anymore.
  pendingRefreshRegion.assign_subtract(Region(rect));
  MUTEX_UNLOCK(&regionMutex);

  return encoder;
}

void EncodeManager::endRectStats(int threadNum)
{
  int klass;
  int length;

  length = conn->getOutStream()->length() - beforeLength;

  klass = activeEncoders[threadNum][activeType];
  stats[klass][activeType].bytes += length;
}

void EncodeManager::writeCopyRects(const Region& copied, const Point& delta)
{
  std::vector<Rect> rects;
  std::vector<Rect>::const_iterator rect;

  Region lossyCopy;

  beforeLength = conn->getOutStream()->length();

  copied.get_rects(&rects, delta.x <= 0, delta.y <= 0);
  for (rect = rects.begin(); rect != rects.end(); ++rect) {
    int equiv;

    copyStats.rects++;
    copyStats.pixels += rect->area();
    equiv = 12 + rect->area() * (conn->client.pf().bpp/8);
    copyStats.equivalent += equiv;

    conn->writer()->writeCopyRect(*rect, rect->tl.x - delta.x,
                                   rect->tl.y - delta.y);
  }

  copyStats.bytes += conn->getOutStream()->length() - beforeLength;

  MUTEX_LOCK(&regionMutex);
  lossyCopy = lossyRegion;
  lossyCopy.translate(delta);
  lossyCopy.assign_intersect(copied);
  lossyRegion.assign_union(lossyCopy);

  // Stop any pending refresh as a copy is enough that we consider
  // this region to be recently changed
  pendingRefreshRegion.assign_subtract(copied);
  MUTEX_UNLOCK(&regionMutex);
}

void EncodeManager::writeSolidRects(Region *changed, const PixelBuffer* pb)
{
  std::vector<Rect> rects;
  std::vector<Rect>::const_iterator rect;

  changed->get_rects(&rects);
  for (rect = rects.begin(); rect != rects.end(); ++rect)
    findSolidRect(*rect, changed, pb);
}

void EncodeManager::findSolidRect(const Rect& rect, Region *changed,
                                  const PixelBuffer* pb, int recursionLevel)
{
  Rect sr;
  int dx, dy, dw, dh;

  // We start by finding a solid 16x16 block
  for (dy = rect.tl.y; dy < rect.br.y; dy += SolidSearchBlock) {

    dh = SolidSearchBlock;
    if (dy + dh > rect.br.y)
      dh = rect.br.y - dy;

    for (dx = rect.tl.x; dx < rect.br.x; dx += SolidSearchBlock) {
      // We define it like this to guarantee alignment
      rdr::U32 _buffer;
      rdr::U8* colourValue = (rdr::U8*)&_buffer;

      dw = SolidSearchBlock;
      if (dx + dw > rect.br.x)
        dw = rect.br.x - dx;

      pb->getImage(colourValue, Rect(dx, dy, dx+1, dy+1));

      sr.setXYWH(dx, dy, dw, dh);
      if (checkSolidTile(sr, colourValue, pb)) {
        Rect erb, erp;

        Encoder *encoder;

        // We then try extending the area by adding more blocks
        // in both directions and pick the combination that gives
        // the largest area.
        sr.setXYWH(dx, dy, rect.br.x - dx, rect.br.y - dy);
        extendSolidAreaByBlock(sr, colourValue, pb, &erb);

        // Did we end up getting the entire rectangle?
        if (erb.equals(rect))
          erp = erb;
        else {
          // Don't bother with sending tiny rectangles
          if (erb.area() < SolidBlockMinArea)
            continue;

          // Extend the area again, but this time one pixel
          // row/column at a time.
          extendSolidAreaByPixel(rect, erb, colourValue, pb, &erp);
        }

        // Send solid-color rectangle.
        encoder = startRectPrepare(ENCODEMANAGERSOLIDENCODER, erp, encoderSolid);

//        vlog.verbose("writeSolidRect() %ux%u", erp.width(), erp.height());
	conn->writer()->startRect(rect, encoder->encoding);

        if (encoder->flags & EncoderUseNativePF) {
          encoder->writeSolidRect(erp.width(), erp.height(),
                                  pb->getPF(), colourValue);
        } else {
          rdr::U32 _buffer2;
          rdr::U8* converted = (rdr::U8*)&_buffer2;

          conn->client.pf().bufferFromBuffer(converted, pb->getPF(),
                                         colourValue, 1);

          encoder->writeSolidRect(erp.width(), erp.height(),
                                  conn->client.pf(), converted);
        }

	/* moved this os->flush() to end of recursion top level to reduce flush load */
//	conn->writer()->endRect();

        endRectStats(ENCODEMANAGERSOLIDENCODER);

        changed->assign_subtract(Region(erp));

        // Search remaining areas by recursion
        // FIXME: Is this the best way to divide things up?

        // Left? (Note that we've already searched a SolidSearchBlock
        //        pixels high strip here)
        if ((erp.tl.x != rect.tl.x) && (erp.height() > SolidSearchBlock)) {
          sr.setXYWH(rect.tl.x, erp.tl.y + SolidSearchBlock,
                     erp.tl.x - rect.tl.x, erp.height() - SolidSearchBlock);
          findSolidRect(sr, changed, pb, recursionLevel + 1);
        }

        // Right?
        if (erp.br.x != rect.br.x) {
          sr.setXYWH(erp.br.x, erp.tl.y, rect.br.x - erp.br.x, erp.height());
          findSolidRect(sr, changed, pb, recursionLevel + 1);
        }

        // Below?
        if (erp.br.y != rect.br.y) {
          sr.setXYWH(rect.tl.x, erp.br.y, rect.width(), rect.br.y - erp.br.y);
          findSolidRect(sr, changed, pb, recursionLevel + 1);
        }
        if (recursionLevel == 0)
          conn->writer()->endRect();
        return;
      }
    }
  }
  if (recursionLevel == 0)
    conn->writer()->endRect();
}

inline bool EncodeManager::checkThreadsBusy()
{
  bool result = false;

  if (rectQueueFirstHead)
    return true;
  MUTEX_LOCK(&rectQueueMutex);
  for (int i = 0; i < encoderThreadsStarted; i++) {
    if (threadBusy[i]) {
      result = true;
      break;
    }
  }
  MUTEX_UNLOCK(&rectQueueMutex);
  return result;
}

void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb)
{
  std::vector<Rect> rects;
  std::vector<Rect>::const_iterator rect;
  int count = 0;

  changed.get_rects(&rects);
  for (rect = rects.begin(); rect != rects.end(); ++rect) {
    int w, h, sw, sh;
    Rect sr;

    w = rect->width();
    h = rect->height();

    // No split necessary?
    if (((w*h) < SubRectMaxArea) && (w < SubRectMaxWidth)) {
      if (encoderThreadsUsed > 1) {
        rectQueueAppend(*rect, pb);
        count++;
        if (count % threadWakeupInterval == 0)
          TGVNC_CONDITION_SEND_SIG(&rectsWaitingCondition);
      } else {
        writeSubRect(ENCODEMANAGERSOLIDENCODER, *rect, pb);
      }
      continue;
    }

    if (w <= SubRectMaxWidth)
      sw = w;
    else
      sw = SubRectMaxWidth;

    sh = SubRectMaxArea / sw;

    for (sr.tl.y = rect->tl.y; sr.tl.y < rect->br.y; sr.tl.y += sh) {
      sr.br.y = sr.tl.y + sh;
      if (sr.br.y > rect->br.y)
        sr.br.y = rect->br.y;

      for (sr.tl.x = rect->tl.x; sr.tl.x < rect->br.x; sr.tl.x += sw) {
        sr.br.x = sr.tl.x + sw;
        if (sr.br.x > rect->br.x)
          sr.br.x = rect->br.x;

        if (encoderThreadsUsed > 1) {
          rectQueueAppend(sr, pb);
          count++;
          if (count % threadWakeupInterval == 0)
            TGVNC_CONDITION_SEND_SIG(&rectsWaitingCondition);
        } else {
          writeSubRect(ENCODEMANAGERSOLIDENCODER, sr, pb);
        }
      }
    }
  }
  // sleep to wait for threads
  if (count > 0) {
    int loopCountTimeout = 0;

//    vlog.verbose2("writeRects(): queued %u rects, waiting for %u threads", count, encoderThreadsUsed);
    if (count > maxRectsUsed)
      maxRectsUsed = count;
    while (checkThreadsBusy()) {
      TGVNC_CONDITION_SEND_SIG(&rectsWaitingCondition);
      MUTEX_LOCK(&threadsReadyConditionLock);
      if (TGVNC_CONDITION_TIMED_WAIT(&threadsReadyCondition, &threadsReadyConditionLock, 50) < 0)
        loopCountTimeout++;
      MUTEX_UNLOCK(&threadsReadyConditionLock);
    }
    vlog.verbose("writeRects(): all %u rects processed by %u threads with %u loop timeouts, pressureLevel %u", count, encoderThreadsUsed, loopCountTimeout, MPCompressor::getPressureLevel());
  }
}

/* called by threads */
void EncodeManager::writeSubRect(int threadNum, const Rect& rect, const PixelBuffer *pb)
{
  PixelBuffer *ppb;

  Encoder *encoder;

  struct RectInfo info;
  unsigned int divisor, maxColours;

  bool useRLE;
  EncoderType type;

  // FIXME: This is roughly the algorithm previously used by the Tight
  //        encoder. It seems a bit backwards though, that higher
  //        compression setting means spending less effort in building
  //        a palette. It might be that they figured the increase in
  //        zlib setting compensated for the loss.
  if (conn->client.compressLevel == -1)
    divisor = 2 * 8;
  else
    divisor = conn->client.compressLevel * 8;
  if (divisor < 4)
    divisor = 4;

  maxColours = rect.area()/divisor;

  // Special exception inherited from the Tight encoder
  if (activeEncoders[threadNum][encoderFullColour] == encoderTightJPEG) {
    if ((conn->client.compressLevel != -1) && (conn->client.compressLevel < 2))
      maxColours = 24;
    else
      maxColours = 96;
  } else if (activeEncoders[threadNum][encoderFullColour] == encoderTightMP) {
    maxColours = 16;
  }

  if (maxColours < 2)
    maxColours = 2;

  encoder = encoders[threadNum][activeEncoders[threadNum][encoderIndexedRLE]];
  if (maxColours > encoder->maxPaletteSize)
    maxColours = encoder->maxPaletteSize;
  encoder = encoders[threadNum][activeEncoders[threadNum][encoderIndexed]];
  if (maxColours > encoder->maxPaletteSize)
    maxColours = encoder->maxPaletteSize;

  ppb = preparePixelBuffer(rect, pb, true, threadNum);

  if (!analyseRect(ppb, &info, maxColours))
    info.palette.clear();

  // Different encoders might have different RLE overhead, but
  // here we do a guess at RLE being the better choice if reduces
  // the pixel count by 50%.
  useRLE = info.rleRuns <= (rect.area() * 2);

  switch (info.palette.size()) {
  case 0:
    type = encoderFullColour;
    break;
  case 1:
    type = encoderSolid;
    break;
  case 2:
    if (useRLE)
      type = encoderBitmapRLE;
    else
      type = encoderBitmap;
    break;
  default:
    if (useRLE)
      type = encoderIndexedRLE;
    else
      type = encoderIndexed;
  }

  encoder = startRectPrepare(threadNum, rect, type);
  if (encoder->flags & EncoderUseNativePF)
    ppb = preparePixelBuffer(rect, pb, false, threadNum);
//  vlog.verbose("writeSubRect(): encoder %u", encoder->encoding);
  encoder->writeRect(ppb, info.palette, true, rect);
  endRectStats(threadNum);
}

bool EncodeManager::checkSolidTile(const Rect& r, const rdr::U8* colourValue,
                                   const PixelBuffer *pb)
{
  switch (pb->getPF().bpp) {
  case 32:
    return checkSolidTile(r, *(const rdr::U32*)colourValue, pb);
  case 16:
    return checkSolidTile(r, *(const rdr::U16*)colourValue, pb);
  default:
    return checkSolidTile(r, *(const rdr::U8*)colourValue, pb);
  }
}

void EncodeManager::extendSolidAreaByBlock(const Rect& r,
                                           const rdr::U8* colourValue,
                                           const PixelBuffer *pb, Rect* er)
{
  int dx, dy, dw, dh;
  int w_prev;
  Rect sr;
  int w_best = 0, h_best = 0;

  w_prev = r.width();

  // We search width first, back off when we hit a different colour,
  // and restart with a larger height. We keep track of the
  // width/height combination that gives us the largest area.
  for (dy = r.tl.y; dy < r.br.y; dy += SolidSearchBlock) {

    dh = SolidSearchBlock;
    if (dy + dh > r.br.y)
      dh = r.br.y - dy;

    // We test one block here outside the x loop in order to break
    // the y loop right away.
    dw = SolidSearchBlock;
    if (dw > w_prev)
      dw = w_prev;

    sr.setXYWH(r.tl.x, dy, dw, dh);
    if (!checkSolidTile(sr, colourValue, pb))
      break;

    for (dx = r.tl.x + dw; dx < r.tl.x + w_prev;) {

      dw = SolidSearchBlock;
      if (dx + dw > r.tl.x + w_prev)
        dw = r.tl.x + w_prev - dx;

      sr.setXYWH(dx, dy, dw, dh);
      if (!checkSolidTile(sr, colourValue, pb))
        break;

      dx += dw;
    }

    w_prev = dx - r.tl.x;
    if (w_prev * (dy + dh - r.tl.y) > w_best * h_best) {
      w_best = w_prev;
      h_best = dy + dh - r.tl.y;
    }
  }

  er->tl.x = r.tl.x;
  er->tl.y = r.tl.y;
  er->br.x = er->tl.x + w_best;
  er->br.y = er->tl.y + h_best;
}

void EncodeManager::extendSolidAreaByPixel(const Rect& r, const Rect& sr,
                                           const rdr::U8* colourValue,
                                           const PixelBuffer *pb, Rect* er)
{
  int cx, cy;
  Rect tr;

  // Try to extend the area upwards.
  for (cy = sr.tl.y - 1; cy >= r.tl.y; cy--) {
    tr.setXYWH(sr.tl.x, cy, sr.width(), 1);
    if (!checkSolidTile(tr, colourValue, pb))
      break;
  }
  er->tl.y = cy + 1;

  // ... downwards.
  for (cy = sr.br.y; cy < r.br.y; cy++) {
    tr.setXYWH(sr.tl.x, cy, sr.width(), 1);
    if (!checkSolidTile(tr, colourValue, pb))
      break;
  }
  er->br.y = cy;

  // ... to the left.
  for (cx = sr.tl.x - 1; cx >= r.tl.x; cx--) {
    tr.setXYWH(cx, er->tl.y, 1, er->height());
    if (!checkSolidTile(tr, colourValue, pb))
      break;
  }
  er->tl.x = cx + 1;

  // ... to the right.
  for (cx = sr.br.x; cx < r.br.x; cx++) {
    tr.setXYWH(cx, er->tl.y, 1, er->height());
    if (!checkSolidTile(tr, colourValue, pb))
      break;
  }
  er->br.x = cx;
}

PixelBuffer* EncodeManager::preparePixelBuffer(const Rect& rect,
                                               const PixelBuffer *pb,
                                               bool convert,
                                               int threadNum)
{
  const rdr::U8* buffer;
  int stride;

  // Do wo need to convert the data?
  if (convert && !conn->client.pf().equal(pb->getPF())) {
    convertedPixelBuffer[threadNum].setPF(conn->client.pf());
    convertedPixelBuffer[threadNum].setSize(rect.width(), rect.height());

    buffer = pb->getBuffer(rect, &stride);
    convertedPixelBuffer[threadNum].imageRect(pb->getPF(),
                                   convertedPixelBuffer[threadNum].getRect(),
                                   buffer, stride);

    return &convertedPixelBuffer[threadNum];
  }

  // Otherwise we still need to shift the coordinates. We have our own
  // abusive subclass of FullFramePixelBuffer for this.

  buffer = pb->getBuffer(rect, &stride);

  offsetPixelBuffer[threadNum].update(pb->getPF(), rect.width(), rect.height(),
                           buffer, stride);

  return &offsetPixelBuffer[threadNum];
}

bool EncodeManager::analyseRect(const PixelBuffer *pb,
                                struct RectInfo *info, int maxColours)
{
  const rdr::U8* buffer;
  int stride;

  buffer = pb->getBuffer(pb->getRect(), &stride);

  switch (pb->getPF().bpp) {
  case 32:
    return analyseRect(pb->width(), pb->height(),
                       (const rdr::U32*)buffer, stride,
                       info, maxColours);
  case 16:
    return analyseRect(pb->width(), pb->height(),
                       (const rdr::U16*)buffer, stride,
                       info, maxColours);
  default:
    return analyseRect(pb->width(), pb->height(),
                       (const rdr::U8*)buffer, stride,
                       info, maxColours);
  }
}

void EncodeManager::OffsetPixelBuffer::update(const PixelFormat& pf,
                                              int width, int height,
                                              const rdr::U8* data_,
                                              int stride_)
{
  format = pf;
  // Forced cast. We never write anything though, so it should be safe.
  setBuffer(width, height, (rdr::U8*)data_, stride_);
}

rdr::U8* EncodeManager::OffsetPixelBuffer::getBufferRW(const Rect& r, int* stride)
{
  throw rfb::Exception("Invalid write attempt to OffsetPixelBuffer");
}

// Preprocessor generated, optimised methods

#define BPP 8
#include "EncodeManagerBPP.cxx"
#undef BPP
#define BPP 16
#include "EncodeManagerBPP.cxx"
#undef BPP
#define BPP 32
#include "EncodeManagerBPP.cxx"
#undef BPP
