src/third_party/libvpx/vp9/encoder/vp9_multi_thread.c - cobalt - Git at Google

 /*
  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include <assert.h>

 #include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_ethread.h"
 #include "vp9/encoder/vp9_multi_thread.h"
 #include "vp9/encoder/vp9_temporal_filter.h"

 void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
                                int tile_id) {
   RowMTInfo *row_mt_info;
   JobQueueHandle *job_queue_hdl = NULL;
   void *next = NULL;
   JobNode *job_info = NULL;
 #if CONFIG_MULTITHREAD
   pthread_mutex_t *mutex_handle = NULL;
 #endif

   row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]);
   job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl;
 #if CONFIG_MULTITHREAD
   mutex_handle = &row_mt_info->job_mutex;
 #endif

 // lock the mutex for queue access
 #if CONFIG_MULTITHREAD
   pthread_mutex_lock(mutex_handle);
 #endif
   next = job_queue_hdl->next;
   if (NULL != next) {
     JobQueue *job_queue = (JobQueue *)next;
     job_info = &job_queue->job_info;
     // Update the next job in the queue
     job_queue_hdl->next = job_queue->next;
     job_queue_hdl->num_jobs_acquired++;
   }

 #if CONFIG_MULTITHREAD
   pthread_mutex_unlock(mutex_handle);
 #endif

   return job_info;
 }

 void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
                                 TileDataEnc *const this_tile) {
   VP9_COMMON *const cm = &cpi->common;
   const int sb_rows =
       (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
   int i;

   this_tile->row_base_thresh_freq_fact =
       (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
                         sizeof(*(this_tile->row_base_thresh_freq_fact)));
   for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
     this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
 }

 void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
   struct VP9Common *cm = &cpi->common;
   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
   int tile_row, tile_col;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
   int jobs_per_tile_col, total_jobs;

   // Allocate memory that is large enough for all row_mt stages. First pass
   // uses 16x16 block size.
   jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
   // Calculate the total number of jobs
   total_jobs = jobs_per_tile_col * tile_cols;

   multi_thread_ctxt->allocated_tile_cols = tile_cols;
   multi_thread_ctxt->allocated_tile_rows = tile_rows;
   multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;

   multi_thread_ctxt->job_queue =
       (JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue));

 #if CONFIG_MULTITHREAD
   // Create mutex for each tile
   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
     RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
     pthread_mutex_init(&row_mt_info->job_mutex, NULL);
   }
 #endif

   // Allocate memory for row based multi-threading
   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
     TileDataEnc *this_tile = &cpi->tile_data[tile_col];
     vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
     if (cpi->sf.adaptive_rd_thresh_row_mt) {
       if (this_tile->row_base_thresh_freq_fact != NULL) {
         vpx_free(this_tile->row_base_thresh_freq_fact);
         this_tile->row_base_thresh_freq_fact = NULL;
       }
       vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
     }
   }

   // Assign the sync pointer of tile row zero for every tile row > 0
   for (tile_row = 1; tile_row < tile_rows; tile_row++) {
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
       TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
       TileDataEnc *this_col_tile = &cpi->tile_data[tile_col];
       this_tile->row_mt_sync = this_col_tile->row_mt_sync;
     }
   }

   // Calculate the number of vertical units in the given tile row
   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
     TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols];
     TileInfo *tile_info = &this_tile->tile_info;
     multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
         get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
   }
 }

 void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
   int tile_col;
 #if CONFIG_MULTITHREAD
   int tile_row;
 #endif

   // Deallocate memory for job queue
   if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);

 #if CONFIG_MULTITHREAD
   // Destroy mutex for each tile
   for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
        tile_col++) {
     RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
     if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex);
   }
 #endif

   // Free row based multi-threading sync memory
   for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
        tile_col++) {
     TileDataEnc *this_tile = &cpi->tile_data[tile_col];
     vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
   }

 #if CONFIG_MULTITHREAD
   for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
        tile_row++) {
     for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
          tile_col++) {
       TileDataEnc *this_tile =
           &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
                           tile_col];
       if (this_tile->row_base_thresh_freq_fact != NULL) {
         vpx_free(this_tile->row_base_thresh_freq_fact);
         this_tile->row_base_thresh_freq_fact = NULL;
       }
     }
   }
 #endif
 }

 void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
   int i;

   for (i = 0; i < tile_cols; i++) {
     TileDataEnc *this_tile = &cpi->tile_data[i];
     int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows;

     // Initialize cur_col to -1 for all rows.
     memset(this_tile->row_mt_sync.cur_col, -1,
            sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col);
     vp9_zero(this_tile->fp_data);
     this_tile->fp_data.image_data_start_row = INVALID_ROW;
   }
 }

 void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
                                int tile_cols, int num_workers) {
   int tile_id = 0;
   int i;

   // Allocating the threads for the tiles
   for (i = 0; i < num_workers; i++) {
     multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++;
     if (tile_id == tile_cols) tile_id = 0;
   }
 }

 int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
                              int cur_tile_id) {
   RowMTInfo *row_mt_info;
   JobQueueHandle *job_queue_hndl;
 #if CONFIG_MULTITHREAD
   pthread_mutex_t *mutex;
 #endif
   int num_jobs_remaining;

   row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id];
   job_queue_hndl = &row_mt_info->job_queue_hdl;
 #if CONFIG_MULTITHREAD
   mutex = &row_mt_info->job_mutex;
 #endif

 #if CONFIG_MULTITHREAD
   pthread_mutex_lock(mutex);
 #endif
   num_jobs_remaining =
       multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired;
 #if CONFIG_MULTITHREAD
   pthread_mutex_unlock(mutex);
 #endif

   return (num_jobs_remaining);
 }

 void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
   VP9_COMMON *const cm = &cpi->common;
   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
   JobQueue *job_queue = multi_thread_ctxt->job_queue;
   const int tile_cols = 1 << cm->log2_tile_cols;
   int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs;
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
   int tile_col, i;

   switch (job_type) {
     case ENCODE_JOB: jobs_per_tile_col = sb_rows; break;
     case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break;
     case ARNR_JOB:
       jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT);
       break;
     default: assert(0);
   }

   total_jobs = jobs_per_tile_col * tile_cols;

   multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
   // memset the entire job queue buffer to zero
   memset(job_queue, 0, total_jobs * sizeof(JobQueue));

   // Job queue preparation
   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
     RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col];
     JobQueue *job_queue_curr, *job_queue_temp;
     int tile_row = 0;

     tile_ctxt->job_queue_hdl.next = (void *)job_queue;
     tile_ctxt->job_queue_hdl.num_jobs_acquired = 0;

     job_queue_curr = job_queue;
     job_queue_temp = job_queue;

     // loop over all the vertical rows
     for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col;
          job_row_num++, jobs_per_tile++) {
       job_queue_curr->job_info.vert_unit_row_num = job_row_num;
       job_queue_curr->job_info.tile_col_id = tile_col;
       job_queue_curr->job_info.tile_row_id = tile_row;
       job_queue_curr->next = (void *)(job_queue_temp + 1);
       job_queue_curr = ++job_queue_temp;

       if (ENCODE_JOB == job_type) {
         if (jobs_per_tile >=
             multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) {
           tile_row++;
           jobs_per_tile = -1;
         }
       }
     }

     // Set the last pointer to NULL
     job_queue_curr += -1;
     job_queue_curr->next = (void *)NULL;

     // Move to the next tile
     job_queue += jobs_per_tile_col;
   }

   for (i = 0; i < cpi->num_workers; i++) {
     EncWorkerData *thread_data;
     thread_data = &cpi->tile_thr_data[i];
     thread_data->thread_id = i;

     for (tile_col = 0; tile_col < tile_cols; tile_col++)
       thread_data->tile_completion_status[tile_col] = 0;
   }
 }

 int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
                               int *tile_completion_status, int *cur_tile_id,
                               int tile_cols) {
   int tile_col;
   int tile_id = -1;  // Stores the tile ID with minimum proc done
   int max_num_jobs_remaining = 0;
   int num_jobs_remaining;

   // Mark the completion to avoid check in the loop
   tile_completion_status[*cur_tile_id] = 1;
   // Check for the status of all the tiles
   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
     if (tile_completion_status[tile_col] == 0) {
       num_jobs_remaining =
           vp9_get_job_queue_status(multi_thread_ctxt, tile_col);
       // Mark the completion to avoid checks during future switches across tiles
       if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1;
       if (num_jobs_remaining > max_num_jobs_remaining) {
         max_num_jobs_remaining = num_jobs_remaining;
         tile_id = tile_col;
       }
     }
   }

   if (-1 == tile_id) {
     return 1;
   } else {
     // Update the cur ID to the next tile ID that will be processed,
     // which will be the least processed tile
     *cur_tile_id = tile_id;
     return 0;
   }
 }
	/*
	* Copyright (c) 2017 The WebM project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include <assert.h>

	#include "vp9/encoder/vp9_encoder.h"
	#include "vp9/encoder/vp9_ethread.h"
	#include "vp9/encoder/vp9_multi_thread.h"
	#include "vp9/encoder/vp9_temporal_filter.h"

	void vp9_enc_grp_get_next_job(MultiThreadHandle multi_thread_ctxt,
	int tile_id) {
	RowMTInfo *row_mt_info;
	JobQueueHandle *job_queue_hdl = NULL;
	void *next = NULL;
	JobNode *job_info = NULL;
	#if CONFIG_MULTITHREAD
	pthread_mutex_t *mutex_handle = NULL;
	#endif

	row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]);
	job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl;
	#if CONFIG_MULTITHREAD
	mutex_handle = &row_mt_info->job_mutex;
	#endif

	// lock the mutex for queue access
	#if CONFIG_MULTITHREAD
	pthread_mutex_lock(mutex_handle);
	#endif
	next = job_queue_hdl->next;
	if (NULL != next) {
	JobQueue job_queue = (JobQueue )next;
	job_info = &job_queue->job_info;
	// Update the next job in the queue
	job_queue_hdl->next = job_queue->next;
	job_queue_hdl->num_jobs_acquired++;
	}

	#if CONFIG_MULTITHREAD
	pthread_mutex_unlock(mutex_handle);
	#endif

	return job_info;
	}

	void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
	TileDataEnc *const this_tile) {
	VP9_COMMON *const cm = &cpi->common;
	const int sb_rows =
	(mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
	int i;

	this_tile->row_base_thresh_freq_fact =
	(int )vpx_calloc(sb_rows BLOCK_SIZES * MAX_MODES,
	sizeof(*(this_tile->row_base_thresh_freq_fact)));
	for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
	this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
	}

	void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
	struct VP9Common *cm = &cpi->common;
	MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
	int tile_row, tile_col;
	const int tile_cols = 1 << cm->log2_tile_cols;
	const int tile_rows = 1 << cm->log2_tile_rows;
	const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
	int jobs_per_tile_col, total_jobs;

	// Allocate memory that is large enough for all row_mt stages. First pass
	// uses 16x16 block size.
	jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
	// Calculate the total number of jobs
	total_jobs = jobs_per_tile_col * tile_cols;

	multi_thread_ctxt->allocated_tile_cols = tile_cols;
	multi_thread_ctxt->allocated_tile_rows = tile_rows;
	multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;

	multi_thread_ctxt->job_queue =
	(JobQueue )vpx_memalign(32, total_jobs sizeof(JobQueue));

	#if CONFIG_MULTITHREAD
	// Create mutex for each tile
	for (tile_col = 0; tile_col < tile_cols; tile_col++) {
	RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
	pthread_mutex_init(&row_mt_info->job_mutex, NULL);
	}
	#endif

	// Allocate memory for row based multi-threading
	for (tile_col = 0; tile_col < tile_cols; tile_col++) {
	TileDataEnc *this_tile = &cpi->tile_data[tile_col];
	vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
	if (cpi->sf.adaptive_rd_thresh_row_mt) {
	if (this_tile->row_base_thresh_freq_fact != NULL) {
	vpx_free(this_tile->row_base_thresh_freq_fact);
	this_tile->row_base_thresh_freq_fact = NULL;
	}
	vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
	}
	}

	// Assign the sync pointer of tile row zero for every tile row > 0
	for (tile_row = 1; tile_row < tile_rows; tile_row++) {
	for (tile_col = 0; tile_col < tile_cols; tile_col++) {
	TileDataEnc this_tile = &cpi->tile_data[tile_row tile_cols + tile_col];
	TileDataEnc *this_col_tile = &cpi->tile_data[tile_col];
	this_tile->row_mt_sync = this_col_tile->row_mt_sync;
	}
	}

	// Calculate the number of vertical units in the given tile row
	for (tile_row = 0; tile_row < tile_rows; tile_row++) {
	TileDataEnc this_tile = &cpi->tile_data[tile_row tile_cols];
	TileInfo *tile_info = &this_tile->tile_info;
	multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
	get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
	}
	}

	void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
	MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
	int tile_col;
	#if CONFIG_MULTITHREAD
	int tile_row;
	#endif

	// Deallocate memory for job queue
	if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);

	#if CONFIG_MULTITHREAD
	// Destroy mutex for each tile
	for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
	tile_col++) {
	RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
	if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex);
	}
	#endif

	// Free row based multi-threading sync memory
	for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
	tile_col++) {
	TileDataEnc *this_tile = &cpi->tile_data[tile_col];
	vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
	}

	#if CONFIG_MULTITHREAD
	for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
	tile_row++) {
	for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
	tile_col++) {
	TileDataEnc *this_tile =
	&cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
	tile_col];
	if (this_tile->row_base_thresh_freq_fact != NULL) {
	vpx_free(this_tile->row_base_thresh_freq_fact);
	this_tile->row_base_thresh_freq_fact = NULL;
	}
	}
	}
	#endif
	}

	void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
	VP9_COMMON *const cm = &cpi->common;
	const int tile_cols = 1 << cm->log2_tile_cols;
	const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
	int i;

	for (i = 0; i < tile_cols; i++) {
	TileDataEnc *this_tile = &cpi->tile_data[i];
	int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows;

	// Initialize cur_col to -1 for all rows.
	memset(this_tile->row_mt_sync.cur_col, -1,
	sizeof(this_tile->row_mt_sync.cur_col) jobs_per_tile_col);
	vp9_zero(this_tile->fp_data);
	this_tile->fp_data.image_data_start_row = INVALID_ROW;
	}
	}

	void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
	int tile_cols, int num_workers) {
	int tile_id = 0;
	int i;

	// Allocating the threads for the tiles
	for (i = 0; i < num_workers; i++) {
	multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++;
	if (tile_id == tile_cols) tile_id = 0;
	}
	}

	int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
	int cur_tile_id) {
	RowMTInfo *row_mt_info;
	JobQueueHandle *job_queue_hndl;
	#if CONFIG_MULTITHREAD
	pthread_mutex_t *mutex;
	#endif
	int num_jobs_remaining;

	row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id];
	job_queue_hndl = &row_mt_info->job_queue_hdl;
	#if CONFIG_MULTITHREAD
	mutex = &row_mt_info->job_mutex;
	#endif

	#if CONFIG_MULTITHREAD
	pthread_mutex_lock(mutex);
	#endif
	num_jobs_remaining =
	multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired;
	#if CONFIG_MULTITHREAD
	pthread_mutex_unlock(mutex);
	#endif

	return (num_jobs_remaining);
	}

	void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
	VP9_COMMON *const cm = &cpi->common;
	MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
	JobQueue *job_queue = multi_thread_ctxt->job_queue;
	const int tile_cols = 1 << cm->log2_tile_cols;
	int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs;
	const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
	int tile_col, i;

	switch (job_type) {
	case ENCODE_JOB: jobs_per_tile_col = sb_rows; break;
	case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break;
	case ARNR_JOB:
	jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT);
	break;
	default: assert(0);
	}

	total_jobs = jobs_per_tile_col * tile_cols;

	multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
	// memset the entire job queue buffer to zero
	memset(job_queue, 0, total_jobs * sizeof(JobQueue));

	// Job queue preparation
	for (tile_col = 0; tile_col < tile_cols; tile_col++) {
	RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col];
	JobQueue job_queue_curr, job_queue_temp;
	int tile_row = 0;

	tile_ctxt->job_queue_hdl.next = (void *)job_queue;
	tile_ctxt->job_queue_hdl.num_jobs_acquired = 0;

	job_queue_curr = job_queue;
	job_queue_temp = job_queue;

	// loop over all the vertical rows
	for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col;
	job_row_num++, jobs_per_tile++) {
	job_queue_curr->job_info.vert_unit_row_num = job_row_num;
	job_queue_curr->job_info.tile_col_id = tile_col;
	job_queue_curr->job_info.tile_row_id = tile_row;
	job_queue_curr->next = (void *)(job_queue_temp + 1);
	job_queue_curr = ++job_queue_temp;

	if (ENCODE_JOB == job_type) {
	if (jobs_per_tile >=
	multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) {
	tile_row++;
	jobs_per_tile = -1;
	}
	}
	}

	// Set the last pointer to NULL
	job_queue_curr += -1;
	job_queue_curr->next = (void *)NULL;

	// Move to the next tile
	job_queue += jobs_per_tile_col;
	}

	for (i = 0; i < cpi->num_workers; i++) {
	EncWorkerData *thread_data;
	thread_data = &cpi->tile_thr_data[i];
	thread_data->thread_id = i;

	for (tile_col = 0; tile_col < tile_cols; tile_col++)
	thread_data->tile_completion_status[tile_col] = 0;
	}
	}

	int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
	int tile_completion_status, int cur_tile_id,
	int tile_cols) {
	int tile_col;
	int tile_id = -1; // Stores the tile ID with minimum proc done
	int max_num_jobs_remaining = 0;
	int num_jobs_remaining;

	// Mark the completion to avoid check in the loop
	tile_completion_status[*cur_tile_id] = 1;
	// Check for the status of all the tiles
	for (tile_col = 0; tile_col < tile_cols; tile_col++) {
	if (tile_completion_status[tile_col] == 0) {
	num_jobs_remaining =
	vp9_get_job_queue_status(multi_thread_ctxt, tile_col);
	// Mark the completion to avoid checks during future switches across tiles
	if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1;
	if (num_jobs_remaining > max_num_jobs_remaining) {
	max_num_jobs_remaining = num_jobs_remaining;
	tile_id = tile_col;
	}
	}
	}

	if (-1 == tile_id) {
	return 1;
	} else {
	// Update the cur ID to the next tile ID that will be processed,
	// which will be the least processed tile
	*cur_tile_id = tile_id;
	return 0;
	}
	}