#include <stdio.h>
#include <string.h>

#include "tilegfx.h"
#include "tiledata.h"
#include "world.h"
#include "parse.h"

// returns carry flag
// FIXME: NEED TO RETURN WIDTH AND HEIGHT
// so this actually unpacks *multiple* layers into multiple slots?
// based on the "base tile" ID?

// initialise=1 will wipe all the tile slots and start us
// from a blank set of tile slots, so the layers will start from 0

static u8_t subtract_with_carry (u8_t a, u8_t b, u8_t *carry_inout);

static citerr_t tiledata_to_t400  (tile_data_t *tiledata,
                                   u16_t ptr955,
                                   u16_t t_400_pos);
                                   
static citerr_t tile_plot_branch_A (tile_data_t *tiledata,
                                    u8_t t_500[T_500_LEN],
                                    tile_slot_t *slot,
                                    u8_t plot_mode,
                                    u8_t xcoord,
                                    u8_t ycoord,
                                    vram_t *vram);
                                    
static citerr_t tile_plot_branch_B  (tile_data_t *tiledata,
                                     u8_t t_500[T_500_LEN],
                                     tile_slot_t *slot,
                                     u8_t override_500buf_offset,
                                     u8_t overridden_500buf_offset,
                                     u8_t plot_mode,
                                     u8_t xcoord,
                                     u8_t ycoord,
                                     vram_t *vram);
                                     
static citerr_t two_pixels  (u8_t t500[T_500_LEN],
                             u8_t t500_offset,  // applied via selfmod of low byte
                             u8_t *t500_sub_offset_inout,     // sub-offset into t500
                             s8_t vram_offset,
                             u8_t or_modifier,
                             u8_t operation,
                             u16_t vram_ptr,
                             u16_t *misc16_inout, // probably not used for this purpose
                             vram_t *vram,
                             u8_t final);

static u8_t subtract_with_carry (u8_t a, u8_t b, u8_t *carry_inout) {
  s16_t x;
//printf("a = %x, b = %x, carry = %x\n", a, b, *carry_inout);
  x = (0xff & a) - ((0xff & b) + (*carry_inout ? 0 : 1));
//printf("x = %x\n", x);
  if (x < 0) {
    *carry_inout = 1;
  }
  return (x & 0xff);
}

citerr_t unpack_tile_gfx (tile_data_t *tiledata,
                          u8_t tile_id,
                          u8_t t_500_inout[T_500_LEN],
                          u8_t t_xyzzy_out[4],
                          u8_t *next_tile_slot_inout,
                          // FIXME: just pass in a single 16-bit number for these two, i.e. 0x400 usually:
                          u8_t t400_ptr_L_initial, // don't know if these are needed; they're usually 0
                          u8_t t400_ptr_H_initial, // and 4, if initialised, but the NYI animation code seems to call it w/o initialisation at 39ad
                          u8_t tile_500buf_pos_1_initial,
                          u8_t tile_500buf_pos_2_initial,
                          s8_t *tile_width_maybe_out, // s8? u8? tile width? returned value only needed by flood tile fill, discarded elsewhere
                          tile_slot_t tileslots_inout[NUM_TILESLOTS],
                          u8_t initialise,
                          u8_t pixel_base_pairs[WORLD_NUM_PXPAIRS],
                          u8_t animation_speeds[WORLD_NUM_ANIMSPEEDS],
                          u8_t *carry_out) {

  // u8_t bufpos_extra;
  u8_t tile_500buf_pos_1, tile_500buf_pos_2;
  u8_t t400_ptr_L, t400_ptr_H;
  u8_t tileV_lo2_ctrlflow;
  u8_t j, x;
  citerr_t e;
  u8_t carry;
  u8_t tileptr_end_L;
  u32_t d;
  u8_t stripe, tile_permeable_mod;
  u8_t index;
  u16_t alt_tileptr;
  s8_t k;
  s8_t tile_width_maybe;
  
//printf("tiledata first byte %x\n", tiledata->t955[0]);
  
  // bufpos_extra = 0;      // equivalent to y-register offset applied to tileptr
  tile_width_maybe = -1;
  t_xyzzy_out[0] = 0;
  
  if (*next_tile_slot_inout >= NUM_TILESLOTS) {
    printf("ERROR: unpack_tile_gfx() called with bad tile slot (%u)\n",
           *next_tile_slot_inout);
    return CE_TILESLOTS_OVERFLOW;
  }
  
  //trace2(sprintf("\n***** 30ad unpack_tile_gfx(id=%u)\n", tile_id));
  
  //trace2("30b7 t400_ptr_L = 0x0");
  
  if (initialise) {
    *next_tile_slot_inout = 0;
    tile_500buf_pos_1 = 0;
    tile_500buf_pos_2 = 0;
    t400_ptr_L = 0;
    t400_ptr_H = 4;
    memset(t_500_inout, 0, T_500_LEN);
  } else {
    t400_ptr_L = t400_ptr_L_initial;
    t400_ptr_H = t400_ptr_H_initial;
    tile_500buf_pos_1 = tile_500buf_pos_1_initial;
    tile_500buf_pos_2 = tile_500buf_pos_2_initial;
  }
  
  // unpack_tile_gfx_4a
  
  tile_data_setptr (tiledata, TILEDATA_START);
  
  //trace2("30c9 setting tileV_lo2_ctrlflow = 0");
  tileV_lo2_ctrlflow = 0;
  
//printf("init ptr = %x\n", tile_data_getptr(tiledata));
  
  // find pointer to requested tile (walk chain)
  // FIXME: consider moving this onto TileData
  for ( k = tile_id - 1; k >= 0; k-- ) {  // && !(buf[i] & 0x80)) { // ???
    u8_t tb;
//printf("k=%d, ptr=%x\n", k, tile_data_getptr(tiledata));
    //e = tile_data_offread (tiledata, bufpos_extra, &tb);
    //if (CE_OK != e) { return e; }
    //if (0 == tb) { break; }
    e = tile_data_read(tiledata, &tb);
    if (CE_OK != e) { return e; }
    // sanity
    if (0 == tb) {
      printf("ERROR: bug or corrupted tile data; tile len = 0\n");
      return CE_FIND_TILE_ZL;
    }
    e = tile_data_advance(tiledata, tb, &carry);
    if (CE_OK != e) { return e; }
  }
  
  //printf ("Found tile w/ID %u at buf[0x%x]\n", tile_id, p.bufpos_at_tileptr + bufpos_extra);
  //trace(sprintf("tile is at %x\n", tld.getptr()));
  
  // get length of this record, use it to work out the end pointer
  
  j=0;
  e = tile_data_read(tiledata, &j);
  if (CE_OK != e) { return e; }
  d = tile_data_getptr(tiledata) + j;
  if (d > 0xffff) {
    printf("ERROR: tileptr_end_L overflow\n");
    return CE_TILEPTR_END_L_OVERFLOW;
  }
  
  tileptr_end_L = 0xff & d;
  
  //trace(sprintf("tileptr_end_L = %x", tileptr_end_L));
  
  // tile[1]
  // bits 0-3:  tslots_anim1
  // bit  4:    ?? never used ? goes unto t_tslots_unk2[], apparently unused??
  // bits 5-6:  permeability modifier?
  // bit  7:    stripe permeability modifier?
  
  e = tile_data_offread (tiledata, 1, &x);
  if (CE_OK != e) { return e; }
  
  tile_slot_init (tileslots_inout + *next_tile_slot_inout);
  tileslots_inout[*next_tile_slot_inout].anim1 = 0xf & x;
  tileslots_inout[*next_tile_slot_inout].unk2 = 0x10 & x;
  stripe = 0x80 & x;                    // collision modifier?
  tile_permeable_mod = (x << 1) & 0xc0; // collision / colour modifier? object is permeable?
  
  // tile[2]
  // bits 0-3:  animation subroutine to use
  // bits 5-7:  animation speed (chosen from values in 40d1 table)
  
  e = tile_data_offread(tiledata, 2, &x);
  if (CE_OK != e) { return e; }
  
  tileslots_inout[*next_tile_slot_inout].anim_type = x & 0xf;
  index = (x >> 4) & 0xf;
  tileslots_inout[*next_tile_slot_inout].anim_speed = animation_speeds[index];
  //trace ("3113 tileptr pre-advance = ".sprintf("%x", tld.getptr()));
  
  e = tile_data_advance(tiledata, 3, &carry);
  if (CE_OK != e) { return e; }
  
  carry=0;
  
/*
P_tile_long
Lpunk_top
Lpunk_middle
Lpunk_bottom
*/

  alt_tileptr = 0;
//printf("[3] setting atp = %x\n", alt_tileptr);
//printf("init tld ptr = %x\n", tile_data_getptr(tiledata));
  
  //trace3(sprintf("init tld ptr = %x\n", tld.getptr()));
  
  while ((tileV_lo2_ctrlflow != 3) && (tiledata->ptr_L != tileptr_end_L)) { // P_tile_long
  
//printf("P_tile_long: tileV_lo2_ctrlflow = %x, tiledata->ptr_L = %x, tileptr_end_L = %x\n",
//       tileV_lo2_ctrlflow, tiledata->ptr_L, tileptr_end_L);
  
    u8_t jmp_P_tile_long;
    u8_t x_low_2, tileheight_copy;
    u8_t base_pair_1_index, base_pair_2_index, base_pair_3_index;
    //u8_t tile_width_maybe;
    u8_t skip_31cc;
    s8_t left_plane, right_plane;
    u8_t left_pixel_colour, right_pixel_colour;
    u8_t pos500;
    u8_t xreg, yreg;
    u8_t wide;
    u8_t punk_top, punk_middle, punk_bottom;
    // u8_t to_next_tile_slot;
    u16_t last_955_alt_tileptr;
    s32_t unk19_areg_thing;
    //u16_t alt_tileptr_saved;
  
    jmp_P_tile_long = 0;

    //~ trace2(sprintf("3116 P_tile_long: tileV_lo2_ctrlflow = ".tileV_lo2_ctrlflow.
                   //~ ", tileptr_end_L = %x, tileptr_L = %x, slot %u, sm slot %u",
                   //~ tileptr_end_L, tileptr_L, next_tile_slot_inout, p.sm_tile_slot));

    //trace("3120 bne Ltile_carry_on");
  
    // Ltile_carry_on
    
    // tile[U]
    // bits 0-1: tile height (minus one): each unit is 8 pixels, so one MODE 2 "stripe"
    // bits 2-4: 3-bit value (a T_40C1 index -- base-pairs index)
    // bits 5-7: 3-bit value (another base-pairs index)

    e = tile_data_read(tiledata, &x);
    if (CE_OK != e) { return e; }
    
    //trace2("3129 ptr=".sprintf("%x ",tld.getptr())."tile[U]=".sprintf("0x%x", x));
    
    x_low_2 = (x & 3);
    tileheight_copy = x_low_2;
    tileslots_inout[*next_tile_slot_inout].height = x_low_2;
    base_pair_1_index = (x >> 2) & 7;
    base_pair_2_index = (x >> 5) & 7;
    tileslots_inout[*next_tile_slot_inout].stripe = stripe;
    
    //trace ("3144");
    
    // tile[U+1]
    
    e = tile_data_offread(tiledata, 1, &x);
    if (CE_OK != e) { return e; }
    e = tile_data_advance(tiledata, 1, &j); // dummy carry
    if (CE_OK != e) { return e; }
    
//printf("[1] ptr_L = %x\n", tiledata->ptr_L);
    
    //trace2("314a tile[U+1]=".sprintf("0x%x", x));
    
    if (x == 0) {
      // optional byte
      // tile[U+1] == 0 boosts tile height to >= 32, used by monks
      tileheight_copy |= 4;
      tileslots_inout[*next_tile_slot_inout].height = tileheight_copy;
      // and then tile[V] is tile [U+2]
      // tile [U+2]
      // proceed using value from tile[U+2] instead of tile[U+1] ...
      e = tile_data_offread(tiledata, 1, &x);
      if (CE_OK != e) { return e; }
      e = tile_data_advance(tiledata, 1, &j); // dummy carry
      if (CE_OK != e) { return e; }
      //trace("3158 tile[U+2]=".sprintf("0x%x", x));
    }
    
//printf("[2] ptr_L = %x\n", tiledata->ptr_L);
    
    e = tile_data_advance(tiledata, 1, &j); // dummy carry
    if (CE_OK != e) { return e; }
    
//printf("[3] ptr_L = %x\n", tiledata->ptr_L);
    
    // call this position tile[V] (which is either tile[U+1] or tile[U+2])
    
    //trace ("315a tileV_lo2_ctrlflow=".sprintf("0x%x", (x & 3)));
    
    // tile V is quite prolific: 2/3/3
    
    //trace(sprintf("315a tile[V]=0x%x", x));
    //trace(sprintf("(tile slot is %u)\n", next_tile_slot_inout)); // breakw 191 (=1)
    
    // bits 0-1 of tile[V]: determine punk top/middle/bottom control flow
    // bits 2-4           : determine which base px pair to use in t_xyzzy[3];
    //                      also, if non-black base pair chosen, branch B will
    //                      be used to draw the tile
    // bits 5-7           : tile width? in pairs of pixels?
    
    tileV_lo2_ctrlflow = (x & 3);
//printf("setting ctrlflow = %u\n",tileV_lo2_ctrlflow);
    //trace2("315d setting tileV_lo2_ctrlflow = ".tileV_lo2_ctrlflow);
    base_pair_3_index = (x >> 2) & 7;
    tileslots_inout[*next_tile_slot_inout].use_branch_B = base_pair_3_index; // this decides whether to use branch A or B in tile_plot when tile is blitted

//printf("unpack: x = %x\n", x);

    tile_width_maybe = ((x >> 5) & 7) + 1; // 1 to 8
    
    //~ trace(sprintf("3174 tile_V_position=0x%x, unk19_tileV_hi3=0x%x, tileV_lo2_ctrlflow=0x%x, t_xyzzy[3]=0x%x",
                  //~ tile_V_position, p.unk19_tileV_hi3_out, tileV_lo2_ctrlflow, p.t_xyzzy[3]));

    if ((base_pair_1_index != 0) || (base_pair_2_index == 0)) {
      
      //trace ("317f");
      
      skip_31cc = 0;
      
      tile_500buf_pos_1 = tile_500buf_pos_2;
      
      t_xyzzy_out[1] = pixel_base_pairs[base_pair_1_index];
      t_xyzzy_out[2] = pixel_base_pairs[base_pair_2_index];
      t_xyzzy_out[3] = (base_pair_3_index == 0) ? 0 : pixel_base_pairs[base_pair_3_index];

      if (base_pair_3_index != 0) {
        
        //trace ("31a7");
        
        // write values into 500 buffer
        
        for (left_plane = 3, pos500 = 0xf;  left_plane >= 0;  left_plane--) { // P_unpk_fill500_O: four iterations: loop for collision, then B, G, R
          left_pixel_colour = (t_xyzzy_out[left_plane] << 1) & 0xfe; // left-hand pixel colour value
          for (right_plane = 3;  right_plane >= 0;  right_plane--, pos500--) { // P_unpk_fill500_I
            right_pixel_colour = t_xyzzy_out[right_plane];
            t_500_inout[tile_500buf_pos_1 + pos500] = (left_pixel_colour | right_pixel_colour | tile_permeable_mod);
          }
        }
        
        //trace ("31c4");
        
        tile_500buf_pos_2 = 0xff & (tile_500buf_pos_1 + 0x10); // +C ?
        
        if (tile_500buf_pos_2 != 0) {
          skip_31cc = 1;
        }
      }
      
      if ( ! skip_31cc ) { // ?? branch A version? only fills four bytes
        //trace ("31cc tile_500buf_pos_1=".sprintf("%x", tile_500buf_pos_1));
        yreg = tile_500buf_pos_1;
        xreg = ((t_xyzzy_out[1] << 1) & 0xfe) | tile_permeable_mod;
        t_500_inout[yreg] = xreg | t_xyzzy_out[1];
        yreg++;
        t_500_inout[yreg] = xreg | t_xyzzy_out[2];
        yreg++;
        xreg = ((t_xyzzy_out[2] << 1) & 0xfe) | tile_permeable_mod;
        t_500_inout[yreg] = xreg | t_xyzzy_out[1];
        yreg++;
        t_500_inout[yreg] = xreg | t_xyzzy_out[2];
        yreg++;
        tile_500buf_pos_2 = yreg;
      }
      
    }
    
    //trace ("31f6 tileV_lo2_ctrlflow=".sprintf("0x%x", tileV_lo2_ctrlflow));
    //trace3(sprintf("31f6 tld ptr = %x\n", tld.getptr()));
    
    tileslots_inout[*next_tile_slot_inout].offset_500buf = tile_500buf_pos_1;
    wide = 0;
    if (tile_width_maybe & 0x20) {
      // extra wide tile??
      wide = 1;
    }
    
    //trace(sprintf("3204 tile_width_maybe=0x%x, t_tslots_vram_target[0x%x] = 0x%x",
    //              tile_width_maybe, next_tile_slot_inout, ((tile_width_maybe * 8) - (4 + carry))));
    //trace(sprintf("3204 tileptr=%x", tld.getptr()));

    tileslots_inout[*next_tile_slot_inout].vram_target = ((tile_width_maybe * 8) - (4 + ((wide==0)?1:0))); // & 0xff ??
    
    // control flow hackery; at least one of these must be set every loop iteration, or the thing just hangs:
    punk_top = 0;
    punk_middle = 0;
    punk_bottom = 0;
    
    if (tileV_lo2_ctrlflow == 0) {
      punk_middle = 1;
    } else if (tileV_lo2_ctrlflow == 1) {
      punk_top = 1; // orig code just falls through into punk_top at 320e
      punk_middle = 1;
    } else { // if tileV_lo2_ctrlflow is 2 or 3
      punk_bottom = 1;
      carry = tileV_lo2_ctrlflow & 1;
    }
//printf("top=%u, middle=%u, bottom=%u\n", punk_top, punk_middle, punk_bottom);
    
    // to_next_tile_slot = 0; // (scope)
  
    last_955_alt_tileptr = TILEDATA_START;
    unk19_areg_thing = -1;
    
//printf("[4] ptr_L = %x\n", tiledata->ptr_L);
    
    // "punk" loop
    //trace3("begin punk loop\n");
    
    while ( 1 ) {  // Punk_top, note that P_tile_long is the loop outside this one
    
      u8_t areg; // FIXME ... hmm ... u8???
      u16_t alt_tileptr_saved;
      
      // punk_top sets alt_tileptr = tileptr, then advances tileptr by unk19 (maybe multiplied by v13)
      if (punk_top) {
      
        punk_top = 0;
        
        // alt_tileptr doesn't always point to tile data
        // sometimes it will point to t400 etc, so be careful
        
        alt_tileptr = tile_data_getptr (tiledata);
//printf("[2] setting atp = %x\n", alt_tileptr);
        
        //trace2 (sprintf("320e punk_top: alt_tileptr=0x%x", alt_tileptr));
        
        carry = 0;
        areg = tile_width_maybe;
//printf("twm = %x\n", tile_width_maybe);
        if (t_xyzzy_out[3] != 0) {
          carry = (areg & 0x80); // result of ASL A
          areg = ((areg << 1) & 0xfe);
        }
//printf("areg[1] = %x\n", areg);
        if (tileheight_copy != 0) {
          unk19_areg_thing = areg;
          //trace ("3224");
          areg += (tileheight_copy * areg); // FIXME: overflow check?
        }
//printf("areg[2] = %x\n", areg);
        //trace ("3229 Lpunk_top_mult_done");
        // sbc #00 ???
//printf("carry = %x\n", carry);
        areg = subtract_with_carry (areg, 0, &carry);
        e = tile_data_advance(tiledata, areg+1, &carry);
//printf("[5] ptr_L = %x\n", tiledata->ptr_L);
        if (CE_OK != e) { return e; }
        // carry?
        punk_middle = 1;
      }
//printf("middle\n");
      
      if (punk_middle) { // copies alt_tileptr into t_slots_tileptr[tile_slot]
        punk_middle = 0;
        // t_tslots_tileptr values must be adjusted before being used to index into the buffer
        tileslots_inout[*next_tile_slot_inout].ptr = alt_tileptr;
        //trace2 ("322e punk_middle, alt_tileptr=".sprintf("0x%x", alt_tileptr));
        // if tileptr's high byte is 0, run punk_bottom, else go back to P_next_tile_slot
        // this seems to determine the multiple layers thing ...
        if ((alt_tileptr & 0xff00) != 0) {
          //trace("323a bne Lnext_tile_slot");
          (*next_tile_slot_inout)++;
          if (*next_tile_slot_inout >= NUM_TILESLOTS) {
            printf("ERROR: punk_middle: tile slot overflow\n");
            return CE_TILESLOTS_OVERFLOW;
          }
          tile_slot_init(tileslots_inout + *next_tile_slot_inout);
          jmp_P_tile_long = 1; // 3299 jmp P_tile_long
          break;
        } else {
          //trace("323c falling through to punk_bottom");
          punk_bottom = 1;
        }
      }
//printf("bottom\n");
/*
P_tile_long
Lpunk_top
Lpunk_middle
Lpunk_bottom
*/
      
      if (punk_bottom) {
        punk_bottom = 0;
        //trace2("323c punk_bottom");
        if (carry) { // go back to previous tile and then do it all again
          // suspect that this causes reading of the same tile multiple times
          // possibly to draw flipped versions
          //trace2("329c Lregress_tileptr");
          e = tile_data_regress(tiledata, &carry);
          if (CE_OK != e) { return e; }
          punk_top = 1;
          punk_middle = 1; // ??
          punk_bottom = 1;
//printf("32a8\n");
          continue; // 32a8 jmp Lpunk_top
        }
        //trace2("323e no tileptr regression");
        unk19_areg_thing = tileheight_copy;
        // this is the fateful moment when all eight dr4_selfmod_Xx instructions are updated
        // using alt_tileptr
        
        // use old alt_tileptr to get offset into tile data (saved to a load of selfmods in asm)
        // two datasources: alt_tileptr might point to 0x400 buffer, or it
        // might point to static tile data

        // then replace alt_tileptr with t400_ptr ...
        /*
        if ((alt_tileptr & 0xff00) == T_400_PTR_START) {
          //last_400_alt_tileptr = alt_tileptr;
        } else if ((alt_tileptr & 0xff00) == T_600_PTR_START) {
          // ???
          print "T_600\n"; die();
        } else {
        */
        if (alt_tileptr >= TILEDATA_START) {
          last_955_alt_tileptr = alt_tileptr;
        }
        alt_tileptr_saved = alt_tileptr;
//printf("setting atps = %x\n", alt_tileptr);
        alt_tileptr = to_16bit(t400_ptr_H, t400_ptr_L);
//printf("[1] setting atp = %x\n", alt_tileptr);

        //trace2("3260 t400_ptr_L=".sprintf("0x%x", p.t400_ptr_L));
        //trace(sprintf("3260 H=%x, L=%x, t400ptr=%x\n", t400_ptr_H, t400_ptr_L, alt_tileptr));
        
        tileslots_inout[*next_tile_slot_inout].ptr = alt_tileptr;
        
        xreg = 0; // 326e
        
        // handles block from 3274 . 3299
        if (t_xyzzy_out[3] == 0) {
        
          for ( ;
               unk19_areg_thing >= 0;
               unk19_areg_thing--, --yreg) { // 3274: P_tile_flip_outer
               
            yreg = tile_width_maybe; // - 1;
              
            //trace2("3276 P_tile_flip_outer; foo = ".unk19_areg_thing.", yreg = ".yreg);
            
            for (; //yreg = tile_width_maybe; // - 1;
                 yreg > 0;
                 yreg--, xreg++) { // 3277: P_tile_flip_inner
                 
              u8_t lol;
              u16_t t400_ptr;
                 
              // FIXME: overrun/underrun check
              // OK, this is weird ... does t400 actually contain
              // a modified set of tile data? i.e. packed tile data? not a bitmap?

              // "copy" from src to t_400
              // "src" may be either t_400 or TILEDATA

              //trace3(sprintf("lol from %x\n", alt_tileptr_saved + xreg));
              e = tile_data_extread(tiledata, alt_tileptr_saved + xreg, &lol);
              if (CE_OK != e) { return e; }
              
             //lol = tld.extread(alt_tileptr_saved + xreg);
              
              t400_ptr = to_16bit(t400_ptr_H, t400_ptr_L);
              t400_ptr += yreg - 1; // no idea why it's -1 ...
              
              e = tile_data_write (tiledata,
                                   t400_ptr,
                                   ((lol >> 1) & 0x55) | ((lol << 1) & 0xaa));  // also different; 0x55 not 0xaa
              if (CE_OK != e) { return e; }
              
              //~ printf("[a] t_400[%u] = %02x\n", t400_ptr - T_400_PTR_START, t_400_inout[t400_ptr - T_400_PTR_START]);
              
            }
            
            t400_ptr_L += tile_width_maybe; // tile width?
            //trace2("328d t400_ptr_L = 0x".sprintf("%x",t400_ptr_L));
            
          }
          
          // falls through at 3295
          
          (*next_tile_slot_inout)++;
          if (*next_tile_slot_inout >= NUM_TILESLOTS) {
            printf("ERROR: 3295 tile slot overflow\n");
            return CE_TILESLOTS_OVERFLOW;
          }
          tile_slot_init(tileslots_inout + *next_tile_slot_inout);
          //tileslots_inout[*next_tile_slot_inout] = new TileSlot;
          jmp_P_tile_long = 1;
          break; // 3299 jmp P_tile_long
          
        } else { // resumes at P_fill_t_400_outer, 32ab
          break; // 3272: bne P_fill_t_400_outer; breaks out of Punk_top loop
        }
                  
      } // endif (punk_bottom)
      
//printf("end bottom\n");
        
      //trace3(sprintf("punk: tld ptr = %x\n", tld.getptr()));
      
    } // next Punk_top loop
    
//printf("jmp_P_tile_long = %x\n", jmp_P_tile_long);
    
    if (jmp_P_tile_long) {
      // next tile slot?
//printf("P_tile_long: tileV_lo2_ctrlflow = %x, tiledata->ptr_L = %x, tileptr_end_L = %x\n",
//       tileV_lo2_ctrlflow, tiledata->ptr_L, tileptr_end_L);
      continue;
    }
    
    //trace("32ab P_fill_t_400_outer, xreg is ".xreg);
    
    // 329c trampoline was implemented earlier


    xreg = 0;
    
    do { // 32ab, P_fill_t_400_outer
    
      u16_t t400_ptr;
      u16_t mm;
      s16_t mm2;
    
      t400_ptr = to_16bit(t400_ptr_H, t400_ptr_L);
      //trace2("32ab P_fill_t_400_outer; t400_ptr=".sprintf("%x", t400_ptr));
      yreg = (tile_width_maybe << 1) & 0xfe;
      
      do {
      
        // this processes two bytes of tile bytecode, in reverse order
        yreg -= 2; // -2
        
        //trace3(sprintf("split at %x\n", last_955_alt_tileptr));

        tiledata_to_t400 (tiledata,
                          last_955_alt_tileptr + xreg, // + tileptr,
                          yreg + t400_ptr);
                          //t_400_inout); // looks like MODE 2 plotting to t400?
        yreg++;    // +1
        xreg++;
        
        //trace("32c6");
        
        tiledata_to_t400 (tiledata,
                          last_955_alt_tileptr + xreg, // + tileptr,
                          yreg + t400_ptr);
                          //t_400_inout); // looks like MODE 2 plotting to t400?
        yreg--;    // -1
        xreg++;
        
      } while (yreg > 0);
      
      mm = ((tile_width_maybe << 1) & 0xfe) + t400_ptr_L;
      if (mm > 0xff) {
        printf("ERROR: mm overflow\n");
        return CE_MM_OVERFLOW;
      }
      mm2 = mm - t400_ptr_L;
      if (mm2 < 0) {
        printf("ERROR: mm2 underflow\n");
        return CE_MM2_UNDERFLOW;
      }
      t400_ptr_L = mm;
      //trace2("32e2 t400_ptr_L = 0x".sprintf("%x", t400_ptr_L));
      unk19_areg_thing--;
      
    } while (unk19_areg_thing >= 0);
    
    //trace3(sprintf("ender: tld ptr = %x\n", tld.getptr()));
   
    // 32e6 controversy
    (*next_tile_slot_inout)++;
    if (*next_tile_slot_inout >= NUM_TILESLOTS) {
      printf("ERROR: 32e6 tile slot overflow\n");
      return CE_TILESLOTS_OVERFLOW;
    }
    tile_slot_init(tileslots_inout + *next_tile_slot_inout);

  } // end P_tile_long (finally ...)
      
  //trace2("32f3 tile unpack done [a]");
  
  *carry_out = carry;
  *tile_width_maybe_out = tile_width_maybe;
  
  return CE_OK;
  
}


// may have been a macro
static citerr_t tiledata_to_t400  (tile_data_t *tiledata,
                                   u16_t ptr955,
                                   u16_t t_400_pos) {
                           
  citerr_t e;
  u8_t tilebyte, cb, gr;

  // looks like mode 2 plotting to t_400?
  e = tile_data_extread (tiledata, ptr955, &tilebyte);
  if (CE_OK != e) { return e; }
  
  //trace3(sprintf("tiledata_to_t400: from tiledata, read %u (0x%x)\n", tilebyte, tilebyte));

  // tile data byte: bits 0-1 and 4-5: two pixels: two blue values, two collision values
  cb = (tilebyte << 2) & 0xcc; // 0xcc masks off collision & blue, two pixels
  
  // tile data byte: bits 0-1 and 4-5: two pixels: two green values, two red values
  gr = (tilebyte >> 2) & 0x33; // 0x33 masks off green & red, two pixels

  //trace3(sprintf("t_400[%x] = %x\n",t_400_pos - T_400_PTR_START,cb | gr));
  e = tile_data_write(tiledata, t_400_pos, cb | gr);
  return e;
  
}

// _a variant enforces tile slot = 0
citerr_t tile_plot_a_slot_0 (tile_data_t *tiledata,
                             tile_slot_t *ts0,
                             u8_t t_500[T_500_LEN],
                             u8_t plot_mode,
                             u8_t x,
                             u8_t y,          // in A register
                             vram_t *vram) {

  // 2590

  return tile_plot_specify_tile_slot (tiledata,
                                      ts0,
                                      t_500,
                                      plot_mode,
                                      x,
                                      y,          // in A register
                                      0,
                                      0,
                                      0xff, //-1,
                                      vram);

}



citerr_t tile_plot_specify_tile_slot (tile_data_t *tiledata,
                                      tile_slot_t *slot,
                                      u8_t t_500[T_500_LEN],
                                      u8_t plot_mode,
                                      u8_t xcoord,
                                      u8_t ycoord,                  // in A register
                                      u8_t force_branch_b,          // simulates direct jump to branch B in asm
                                      u8_t branch_B_entry_point_h,  // FIXME: remove this and just pass entry_point_h_overridden_500buf_offset or NULL/FALSE/-1
                                      u8_t branch_B_entry_point_h_overridden_500buf_offset, // FIXME: just always pass in separate 500buf offset
                                      vram_t *vram) {
  
  u16_t saved_ptr;
  citerr_t e;
  
  saved_ptr = tile_data_getptr(tiledata);
  
  if ( (slot->use_branch_B == 0) && ( ! force_branch_b ) ) {

    e = tile_plot_branch_A(tiledata,
                           t_500,
                           slot,
                           plot_mode,
                           xcoord,
                           ycoord,
                           vram);
    if (CE_OK != e) { return e; }

  } else {


    e = tile_plot_branch_B(tiledata,
                           t_500,
                           slot,
                           branch_B_entry_point_h, // override slot->offset_500buf?
                           branch_B_entry_point_h_overridden_500buf_offset,
                           plot_mode,
                           xcoord, // x
                           ycoord, // y
                           vram);
    if (CE_OK != e) { return e; }


//printf("branch B TBC\n");
//return CE_TRUNC;

  }

  // restore tileptr
  tile_data_setptr(tiledata, saved_ptr);
  
  return CE_OK;

}


static citerr_t tile_plot_branch_A  (tile_data_t *tiledata,
                                     u8_t t_500[T_500_LEN],
                                     tile_slot_t *slot,
                                     u8_t plot_mode,
                                     u8_t xcoord,
                                     u8_t ycoord,
                                     vram_t *vram) {

  //trace("259c tile_plot branch A2, tilesrc.ptr = ".sprintf("%x", tld.getptr()));
  
  u16_t tileheight;
  u8_t selfmods_500_ptr_low;
  u16_t vram_ptr;
  u8_t half_block_number;

  //tileheight = slot->height;
  selfmods_500_ptr_low = slot->offset_500buf;
  vram_ptr = coords_to_vram_ptr (xcoord, ycoord);
  
  // & 4 gives 1111000011110000 etc. as Y increases; this is the half-block number (0 or 4)
  half_block_number = ycoord & 4; // y-coordinate, converted to half-block number (formerly t_xyzzy[0])
  
  // *** DOES BRANCH A EVER USE 0x400, 0x600 ???

  // looks like 0x400 buf is only needed for *horizontal* flips, not vertical ones;
  // well wheel is drawn as four quarters, but only the second and the fourth use t400

  tile_data_setptr(tiledata, slot->ptr);

  //while (1) {
  for (tileheight = slot->height;
       ! (tileheight & 0x80);
       tileheight--) {
       
    s8_t vram_off;
    citerr_t e;
  
    // each outer loop iteration blits one row of MODE 2 blocks (eight scanlines)
     //selfmod_15_vram_target;
    
    for (vram_off = slot->vram_target;
         vram_off > 0;
         vram_off -= 5 ) {
         
      u8_t a, dummy_carry;
      
      // 25d7
      // each inner loop iteration blits half of one MODE 2 "block" (2x4 double-width pixels, one VRAM byte)

      e = tile_data_read(tiledata, &a);
      if (CE_OK != e) { return e; }
      
      // line 1
      e = two_pixels(t_500,
                     selfmods_500_ptr_low,   // Lselfmod_brchA_8 to Lselfmod_brchA_11; offset into t500, presumably start of tile
                     &a,                      // sub-offset into t500, from tile data, byte to blit comes from this t500 position
                     vram_off,               // offset into vram; -=6 every inner loop cycle?
                     0,                       // no OR modifier
                     plot_mode,  // plotting mode
                     vram_ptr,
                     &tileheight,
                     vram,
                     0);
      vram_off--;
      // line 2
      e = two_pixels(t_500,
                     selfmods_500_ptr_low, // Lselfmod_brchA_8 to Lselfmod_brchA_11
                     &a,
                     vram_off,
                     0, // no OR modifier
                     plot_mode,
                     vram_ptr,
                     &tileheight,
                     vram,
                     0);
      vram_off--;
      // line 3, possibly with lighter horizontal stripe via selfmod_14_wash_colour?
      // (crystals? barrel?)
      e = two_pixels(t_500,
                     selfmods_500_ptr_low, // Lselfmod_brchA_8 to Lselfmod_brchA_11
                     &a,
                     vram_off,
                     slot->stripe,        // OR modifier is Lselfmod_brchA_14
                     plot_mode,
                     vram_ptr,
                     &tileheight,
                     vram,
                     0);
      vram_off--;
      // line 4
      e = two_pixels(t_500,
                     selfmods_500_ptr_low, // Lselfmod_brchA_8 to Lselfmod_brchA_11
                     &a,
                     vram_off,
                     0, // no OR modifier
                     plot_mode,
                     vram_ptr,
                     &tileheight,
                     vram,
                     1); // the final one is nerfed
                 
      e = tile_data_advance(tiledata, 1, &dummy_carry);
      if (CE_OK != e) { return e; }
                 
    }
    /*
    tileheight--; // decrement misc16 low
    //~ trace2("26d9 misc16=0x".sprintf("%x", p.misc16));
    if (tileheight & 0x80) { // if misc16 < 0
      break; // return
    }
    */
    half_block_number ^= 4; // update half-block number for next half-block (0 or 4)
    if (half_block_number) { // if (bottom half-block)
      vram_ptr |= half_block_number; // force vram_ptr bottom half-block?
      if ((vram_ptr & 0xff) != 0) { // continue drawing until we hit the start of a page? (256 bytes is 32 x MODE 2 blocks)
        continue;
      }
    }
    // advances 636 bytes (that's (640 - 4), equivalent to eight lines) so skips to the next row of MODE 2 blocks
    vram_ptr += 0x27c; // only happens for tiles taller than 8 px?
  }
  return CE_OK;
}

// blits one byte (two doublewidth pixels) to the screen
static citerr_t two_pixels  (u8_t t500[T_500_LEN],
                             u8_t t500_offset,  // applied via selfmod of low byte
                             u8_t *t500_sub_offset_inout,     // sub-offset into t500
                             s8_t vram_offset,
                             u8_t or_modifier,
                             u8_t operation,
                             u16_t vram_ptr,
                             u16_t *misc16_inout, // probably not used for this purpose
                             vram_t *vram,
                             u8_t final) {
                             
  u8_t doublepixel;
  citerr_t e;
  u8_t t500_sub_offset;

  if (vram_offset < 0) {
    printf("ERROR: two_pixels: vram_offset < 0\n");
    return CE_2PX_VRAM_SUBZERO;
  }

  if ( ! final ) {
    // copy t500_sub_offset_inout into high byte of misc16
    *misc16_inout = to_16bit(*t500_sub_offset_inout,
                             *misc16_inout & 0xff);
  }
  
  // ANDing with 3 limits the total t500 fill to 4 in mode-A packed tiles:
  t500_sub_offset = final ? (*t500_sub_offset_inout) : (*t500_sub_offset_inout & 3);
  doublepixel = or_modifier | t500[t500_offset + t500_sub_offset]; // select which byte we need from t500
  
  e = vram_op (vram, operation, doublepixel, vram_ptr + vram_offset);
  if (CE_OK != e) { return e; }
  
  if ( ! final ) {
    *t500_sub_offset_inout = ((*misc16_inout >> 10) & 0x3f);
  }
  
  return CE_OK;

}


static citerr_t tile_plot_branch_B  (tile_data_t *tiledata,
                                     u8_t t_500[T_500_LEN],
                                     tile_slot_t *slot,
                                     u8_t override_500buf_offset,
                                     u8_t overridden_500buf_offset,
                                     u8_t plot_mode,
                                     u8_t xcoord,
                                     u8_t ycoord,
                                     vram_t *vram) {
                                     
  u16_t tileheight;
  u16_t vram_ptr;
  u8_t half_block_number;
  u8_t offset_500buf;
  u8_t pos;
    
  //trace("267a tile_plot branch B, ts.ptr=".sprintf("%x", ts.ptr));
  
  tileheight = slot->height;

  // 267a

  vram_ptr = coords_to_vram_ptr (xcoord, ycoord);
  half_block_number = (ycoord & 4);
  
  if (override_500buf_offset) {
    offset_500buf = overridden_500buf_offset;
  } else {
    offset_500buf = slot->offset_500buf;
  }
  
  tile_data_setptr (tiledata, slot->ptr);

  pos = 0;

  while (1) { // 2687 P_blit500_brchB_O
  //for (tileheight = slot->height; // careful: tileheight may be updated during the loop
  //     ! (tileheight & 0xff); // Ldr2_return_2
  //     tileheight--) {
  
    s8_t yreg;
    u8_t tileheight_low_byte;
  
    for (yreg = slot->vram_target;
         yreg >= 0;
         yreg -= 5) { // 2689 P_blit500_brchB_I
         
      u8_t tiledata_byte, tb;
      u8_t xreg, areg;
      u16_t vpos;
      citerr_t e;
      
      // gets clever here ... the datasource for xyzzy[0] can be either T600, t_400, or constant tile data ...

      //trace3(sprintf("read from %x\n", tld.getptr() + pos));
      e = tile_data_offread(tiledata, pos, &tiledata_byte);
      if (CE_OK != e) { return e; }
      
      pos++;
      
      e = tile_data_offread (tiledata, pos, &tb);
      if (CE_OK != e) { return e; }
      
      pos++;
      
      //trace3(sprintf("%x, %x\n",tiledata_byte,tb));
      
      // replace tileheight high byte with value from tile bytecode
      tileheight = to_16bit(tb, tileheight);
      
      xreg = tb & 0xf;
      //trace2(sprintf("269f vram_ptr = 0x%x, selfmod_18_19_20_21=0x%x, xreg=0x%x", p.vram_ptr, selfmod_18_19_20_21, xreg));
      //trace2(sprintf("269f selfmod_18_19_20_21=0x%x, X=0x%x", selfmod_18_19_20_21, xreg));
      vpos = vram_ptr; // - VRAM_GFX_WINDOW_START;
      
//printf("vpos = %x, yreg = %x\n", vpos, yreg);
      
      areg = t_500[offset_500buf + xreg];
      e = vram_op(vram, plot_mode, areg, vpos+yreg);
      if (CE_OK != e) { return e; }
      yreg--;
      xreg = (tileheight >> 12) & 0xf; // FIXME just use tb; top nybble of tile[X] is another t500 sub-offset
      areg = t_500[offset_500buf + xreg];
      e = vram_op(vram, plot_mode, areg, vpos+yreg);
      if (CE_OK != e) { return e; }
      yreg--;
      xreg = (tiledata_byte & 0xf); // now, low nybble of tile[W] is t500 sub-offset
      areg = t_500[offset_500buf + xreg];
      areg |= slot->stripe; // stripe
      e = vram_op(vram, plot_mode, areg, vpos+yreg);
      if (CE_OK != e) { return e; }
      yreg--;
      xreg = (tiledata_byte >> 4) & 0xf; // top nybble of tile[X] is another t500 sub-offset
      areg = t_500[offset_500buf + xreg];
      e = vram_op(vram, plot_mode, areg, vpos+yreg);
      if (CE_OK != e) { return e; }

    }
    
    // decrement tileheight low byte
    tileheight_low_byte = tileheight & 0xff;
    
    tileheight--;
    
//printf("tileheight_low_byte = %x\n", tileheight_low_byte);
    
    if (tileheight_low_byte == 0) {
      break; // Ldr2_return_2
    }
    
    half_block_number ^= 4;  // flip bit 2
    
    if (half_block_number != 0) {
      vram_ptr |= half_block_number;
      if ((vram_ptr & 0xff) != 0) {
        // hmm. this always seems to happen ...
        continue;
      }
      // code never seems to get here
    }
    
    vram_ptr += 0x27c; // next eight scanlines
    
  }
  
  return CE_OK;

}
