fork download
  1. #include <stdio.h>
  2. #include <stdint.h>
  3. #include <stdlib.h>
  4. #include <math.h>
  5. #include <string.h>
  6.  
  7. #define FIXED_POINT_SCALE 256
  8. #define CORDIC_NTAB 64
  9. #define N_MAX 16
  10.  
  11. // === Function prototypes for printing helpers (updated) ===
  12. void print_matrix(const char* title, float* mat, int n);
  13. void print_matrix_double(const char* title, double* mat, int n);
  14. void print_matrix_int64(const char* title, int64_t* mat, int n);
  15. void print_zigzag_int64(const char* title, int64_t* arr, int n);
  16.  
  17. // === Quantization Matrix Selector ===
  18. void select_quant_matrix(int n, int quant_matrix[N_MAX][N_MAX]) {
  19. int i, j;
  20.  
  21. memset(quant_matrix, 0, sizeof(int) * N_MAX * N_MAX);
  22.  
  23. if (n == 4) {
  24. int tmp[4][4] = {
  25. {16, 12, 34, 61},
  26. {14, 20, 57, 58},
  27. {22, 52, 95, 87},
  28. {72, 96, 104, 99}
  29. };
  30. for (i = 0; i < 4; i++)
  31. for (j = 0; j < 4; j++)
  32. quant_matrix[i][j] = tmp[i][j];
  33. } else if (n == 6) {
  34. int tmp[6][6] = {
  35. {16, 11, 15, 27, 46, 61},
  36. {13, 13, 19, 35, 61, 55},
  37. {14, 18, 26, 54, 79, 61},
  38. {19, 30, 54, 77, 106, 80},
  39. {37, 57, 75, 97, 116, 97},
  40. {72, 93, 97, 109, 102, 99}
  41. };
  42. for (i = 0; i < 6; i++)
  43. for (j = 0; j < 6; j++)
  44. quant_matrix[i][j] = tmp[i][j];
  45. } else if (n == 8) {
  46. int tmp[8][8] = {
  47. {16, 11, 10, 16, 24, 40, 51, 61},
  48. {12, 12, 14, 19, 26, 58, 60, 55},
  49. {14, 13, 16, 24, 40, 57, 69, 56},
  50. {14, 17, 22, 29, 51, 87, 80, 62},
  51. {18, 22, 37, 56, 68, 109, 103, 77},
  52. {24, 35, 55, 64, 81, 104, 113, 92},
  53. {49, 64, 78, 87, 103, 121, 120, 101},
  54. {72, 92, 95, 98, 112, 100, 103, 99}
  55. };
  56. for (i = 0; i < 8; i++)
  57. for (j = 0; j < 8; j++)
  58. quant_matrix[i][j] = tmp[i][j];
  59. } else if (n == 10) {
  60. int tmp[10][10] = {
  61. {16, 12, 10, 12, 17, 23, 34, 45, 53, 61},
  62. {13, 12, 12, 15, 19, 25, 42, 55, 58, 56},
  63. {13, 13, 14, 17, 23, 32, 48, 61, 63, 56},
  64. {14, 14, 16, 20, 27, 41, 57, 69, 69, 58},
  65. {14, 17, 21, 26, 33, 50, 75, 86, 78, 64},
  66. {18, 20, 28, 40, 53, 64, 91, 104, 94, 75},
  67. {22, 28, 39, 52, 63, 75, 95, 107, 104, 87},
  68. {33, 43, 55, 67, 75, 88, 104, 113, 111, 96},
  69. {53, 65, 76, 84, 91, 103, 112, 116, 112, 101},
  70. {72, 87, 94, 96, 99, 110, 104, 101, 102, 99}
  71. };
  72. for (i = 0; i < 10; i++)
  73. for (j = 0; j < 10; j++)
  74. quant_matrix[i][j] = tmp[i][j];
  75. } else if (n == 12) {
  76. int tmp[12][12] = {
  77. {16, 13, 11, 10, 13, 17, 22, 30, 41, 48, 54, 61},
  78. {13, 12, 12, 12, 15, 19, 24, 35, 51, 55, 57, 57},
  79. {13, 12, 13, 14, 17, 22, 27, 40, 58, 61, 60, 55},
  80. {14, 13, 14, 16, 20, 26, 35, 46, 58, 65, 63, 56},
  81. {14, 15, 16, 19, 23, 29, 41, 56, 72, 74, 69, 59},
  82. {15, 17, 19, 24, 29, 36, 49, 68, 90, 86, 76, 64},
  83. {17, 20, 24, 32, 42, 52, 62, 80, 104, 100, 89, 74},
  84. {21, 25, 31, 42, 52, 62, 71, 87, 107, 107, 98, 83},
  85. {26, 32, 42, 55, 62, 69, 79, 92, 106, 111, 106, 93},
  86. {40, 49, 58, 69, 76, 83, 93, 105, 116, 118, 111, 98},
  87. {56, 66, 76, 83, 88, 93, 103, 109, 113, 113, 109, 100},
  88. {72, 84, 93, 95, 97, 100, 109, 106, 100, 102, 102, 99}
  89. };
  90. for (i = 0; i < 12; i++)
  91. for (j = 0; j < 12; j++)
  92. quant_matrix[i][j] = tmp[i][j];
  93. } else if (n == 14) {
  94. int tmp[14][14] = {
  95. {16, 13, 11, 10, 11, 14, 18, 22, 28, 37, 44, 50, 55, 61},
  96. {14, 12, 12, 12, 13, 16, 19, 23, 31, 44, 51, 55, 57, 58},
  97. {12, 12, 12, 13, 15, 18, 21, 25, 34, 51, 59, 60, 58, 55},
  98. {13, 13, 13, 14, 16, 20, 24, 31, 40, 53, 60, 65, 61, 56},
  99. {14, 14, 14, 15, 18, 22, 28, 37, 47, 57, 64, 70, 64, 57},
  100. {14, 15, 16, 18, 21, 25, 31, 42, 55, 71, 76, 76, 68, 60},
  101. {15, 16, 18, 22, 26, 31, 38, 49, 64, 85, 89, 85, 75, 65},
  102. {17, 19, 21, 28, 35, 43, 51, 60, 74, 96, 101, 98, 85, 73},
  103. {20, 23, 26, 35, 44, 53, 61, 68, 81, 101, 107, 106, 94, 81},
  104. {23, 28, 34, 43, 53, 59, 66, 75, 86, 100, 107, 111, 101, 90},
  105. {32, 38, 45, 55, 64, 69, 76, 85, 95, 107, 112, 115, 106, 95},
  106. {46, 54, 62, 70, 77, 82, 88, 97, 106, 117, 120, 119, 110, 100},
  107. {59, 67, 76, 82, 86, 90, 95, 103, 108, 110, 111, 112, 106, 100},
  108. {72, 82, 92, 94, 95, 97, 101, 109, 108, 102, 101, 103, 101, 99}
  109. };
  110. for (i = 0; i < 14; i++)
  111. for (j = 0; j < 14; j++)
  112. quant_matrix[i][j] = tmp[i][j];
  113. } else if (n == 16) {
  114. int tmp[16][16] = {
  115. {16, 13, 11, 11, 10, 12, 15, 18, 22, 27, 34, 41, 46, 52, 56, 61},
  116. {14, 13, 12, 12, 12, 13, 16, 19, 23, 28, 38, 49, 52, 55, 57, 58},
  117. {12, 12, 12, 13, 13, 15, 18, 20, 24, 30, 44, 57, 58, 59, 57, 55},
  118. {13, 13, 12, 13, 14, 17, 19, 23, 28, 35, 47, 58, 61, 63, 59, 55},
  119. {14, 13, 13, 14, 15, 18, 22, 26, 33, 41, 50, 58, 63, 67, 61, 56},
  120. {14, 14, 14, 16, 17, 20, 24, 29, 38, 47, 57, 67, 70, 71, 64, 58},
  121. {14, 15, 16, 18, 20, 23, 26, 32, 42, 54, 68, 80, 79, 76, 68, 61},
  122. {15, 16, 18, 21, 24, 28, 32, 39, 49, 61, 78, 91, 88, 84, 74, 66},
  123. {17, 18, 20, 25, 30, 37, 44, 51, 58, 69, 87, 102, 99, 94, 83, 73},
  124. {19, 21, 24, 30, 37, 45, 54, 61, 67, 77, 94, 108, 106, 103, 91, 80},
  125. {22, 25, 29, 36, 45, 52, 58, 65, 72, 82, 95, 106, 108, 108, 97, 87},
  126. {26, 31, 37, 45, 54, 60, 65, 71, 79, 88, 98, 107, 111, 112, 102, 93},
  127. {37, 43, 49, 57, 65, 71, 75, 81, 89, 97, 107, 114, 116, 116, 106, 97},
  128. {50, 57, 64, 71, 77, 82, 86, 92, 99, 107, 114, 119, 119, 117, 109, 101},
  129. {60, 68, 76, 81, 85, 89, 91, 96, 103, 108, 109, 109, 110, 110, 105, 100},
  130. {72, 81, 91, 93, 95, 96, 97, 102, 108, 109, 104, 100, 102, 103, 101, 99}
  131. };
  132. for (i = 0; i < 16; i++)
  133. for (j = 0; j < 16; j++)
  134. quant_matrix[i][j] = tmp[i][j];
  135. }
  136. }
  137.  
  138. int rotations(int bpp){
  139. if(bpp>3 & bpp<9){
  140. return 16;
  141. }
  142. else if(bpp>9 & bpp<17){
  143. return 32;
  144. }
  145. else{
  146. return 55;
  147. }
  148. }
  149.  
  150. static const __uint128_t atanh_fixed55[56] = {
  151. 0,
  152. 231808622658467921920, 136844620538003570688, 72304939235532316672, 36703116139066482688, 8422781014094860288,
  153. 9220371395095598080, 4611310773424436736, 2305796098435487488, 1152915640598518656, 576460019297349376,
  154. 288230284525795200, 144115176622611392, 72057592606272224, 36028796840007000, 18014398487112362,
  155. 9007199251944789, 4503599627020970, 2251799813641557, 1125899906837163, 562949953420629,
  156. 281474976710571, 140737488355317, 70368744177663, 35184372088832, 17592186044416,
  157. 8796093022208, 4398046511104, 2199023255552, 1099511627776, 549755813888,
  158. 274877906944, 137438953472, 68719476736, 34359738368, 17179869184,
  159. 8589934592, 4294967296, 2147483648, 1073741824, 536870912,
  160. 268435456, 134217728, 67108864, 33554432, 16777216,
  161. 8388608, 4194304, 2097152, 1048576, 524288,
  162. 262144, 131072, 65536, 32768, 16384
  163. };
  164.  
  165. const uint64_t k = 11201839480116572000ULL;
  166.  
  167. //uint64_t K = k >> (63 - TP_WR_WIDTH - 12);
  168.  
  169. int clog2_n2(int n) {
  170. int v = n*n, count = 0;
  171. while (v > 1) { v >>= 1; ++count; }
  172. return count + 1;
  173. }
  174.  
  175. void print_matrix(const char* title, float* mat, int n) {
  176. printf("\n%s:\n", title);
  177. for (int i = 0; i < n; i++) {
  178. for (int j = 0; j < n; j++) printf("%8.4f ", mat[i*n + j]);
  179. printf("\n");
  180. }
  181. }
  182.  
  183. void print_matrix_double(const char* title, double* mat, int n) {
  184. printf("\n%s (double):\n", title);
  185. for (int i = 0; i < n; i++) {
  186. for (int j = 0; j < n; j++) printf("%12.4f ", mat[i*n + j]);
  187. printf("\n");
  188. }
  189. }
  190.  
  191. void print_matrix_int64(const char* title, int64_t* mat, int n) {
  192. printf("\n%s (int64_t):\n", title);
  193. for (int i = 0; i < n; i++) {
  194. for (int j = 0; j < n; j++)
  195. printf("%12lld ", (long long)mat[i*n + j]);
  196. printf("\n");
  197. }
  198. }
  199.  
  200. void print_zigzag_int64(const char* title, int64_t* arr, int n) {
  201. int total = n * n;
  202. printf("\n%s (length %d):\n", title, total);
  203. for (int i = 0; i < total; ++i) {
  204. printf("[%3d]=%6lld ", i, (long long)arr[i]);
  205. if ((i + 1) % 8 == 0) printf("\n");
  206. }
  207. if (total % 8 != 0) printf("\n");
  208. }
  209.  
  210. void cordic_rotation(uint64_t theta, int rot, int64_t* c, int64_t* s, int bpp,int tc_mode,int idct_mode) {
  211. uint64_t atanh_fixed[56];
  212. const uint64_t m_pi = 14488038916154245000LL;
  213. int TP_WR_WIDTH = bpp/2+bpp+3+(1-tc_mode)*(1-idct_mode);
  214. uint64_t K = k >> (64 - TP_WR_WIDTH - 12);
  215. for (int i = 1; i <= 56; i++) {
  216. atanh_fixed[i] = (uint64_t)(atanh_fixed55[i] >> (63 - TP_WR_WIDTH - 12));
  217. }
  218. while (theta < -m_pi)
  219. theta += (m_pi << 1);
  220. while (theta > m_pi)
  221. theta -= (m_pi << 1);
  222. int sign = 1;
  223. if(theta < -(2*m_pi)){
  224. theta += m_pi;
  225. sign = -1;
  226. }
  227. else if(theta > (2*m_pi)){
  228. theta -= m_pi;
  229. sign = -1;
  230. }
  231. uint64_t x = K, y = 0, z = theta;
  232. for (int i = 0; i < rot; ++i){
  233. int64_t di = (z >= 0) ? 1 : -1;
  234. int64_t x_new = x - di * (y >> i);
  235. int64_t y_new = y + di * (x >> i);
  236. int64_t z_new = z - di * atanh_fixed[i];
  237. x = x_new;
  238. y = y_new;
  239. z = z_new;
  240. }
  241. *c = sign * x;
  242. *s = sign * y;
  243. }
  244.  
  245. void cordic_dct_1d(const uint64_t* in, uint64_t* out, int n, int rot, int bpp, int tc_mode, int idct_mode) {
  246. static int64_t alpha[N_MAX];
  247. const uint64_t m_pi = 14488038916154245000LL;
  248. static int inited = 0;
  249. if (!inited){
  250. alpha[0] = sqrt(1 / n);
  251. for (int k = 1; k < n; ++k)
  252. alpha[k] = sqrt(2 / n);
  253. inited = 1;
  254. }
  255. for (int k = 0; k < n; ++k) {
  256. int64_t acc = 0;
  257. for(int m = 0; m < n; ++m) {
  258. int64_t angle = (m_pi * (2LL * m + 1LL) * k) / (2LL * n);
  259. int64_t c, s;
  260. cordic_rotation(angle, rot, &c, &s, bpp, tc_mode, idct_mode);
  261. acc += in[m] * c;
  262. }
  263. out[k] = alpha[k] * acc;
  264. }
  265. }
  266.  
  267. // quantize from int64_t (signed) to int64_t (signed)
  268. void quantize_fixed(int64_t* in, int64_t* out, int n,int rt_mode, int quant_matrix[N_MAX][N_MAX]) {
  269. for (int i = 0; i < n; i++)
  270. for (int j = 0; j < n; j++) {
  271. int q = quant_matrix[i][j];
  272. if (q == 0) q = 1; // safety
  273. if(rt_mode==0) {
  274. double v = (double)in[i*n + j] / (double)q;
  275. out[i*n + j] = (int64_t)llround(v);
  276. }
  277. else{
  278. out[i*n+j] = in[i*n+j]/q;
  279. }
  280. }
  281. }
  282.  
  283. void zigzag_traversal_int64(int64_t* mat, int64_t* out, int n) {
  284. int i = 0, j = 0, index = 0, up = 1;
  285. for (int k = 0; k < n * n; k++) {
  286. out[index++] = mat[i*n + j];
  287. if (up) {
  288. if (j == n - 1) { i++; up = 0; }
  289. else if (i == 0) { j++; up = 0; }
  290. else { i--; j++; }
  291. } else {
  292. if (i == n - 1) { j++; up = 1; }
  293. else if (j == 0) { i++; up = 1; }
  294. else { i++; j--; }
  295. }
  296. }
  297. }
  298.  
  299. static inline int64_t limit_bits_signed(int64_t x, int bits) {
  300. int64_t min, max;
  301. if (bits >= 63) return x;
  302. min = -(1LL << (bits - 1));
  303. max = (1LL << (bits - 1)) - 1;
  304. if (x < min) x = min;
  305. else if (x > max) x = max;
  306. return x;
  307. }
  308.  
  309. static inline int64_t inverse_limit_bits_signed(int64_t x, int bits)
  310. {
  311. if (bits >= 63) return x; // No limit needed
  312.  
  313. int64_t max = (1LL << bits) - 1; // Max unsigned value with N bits
  314.  
  315. if (x < 0)
  316. return 0; // Negative → clamp to 0
  317.  
  318. if (x > max)
  319. return max; // Overflow → clamp to max
  320.  
  321. return x; // Within range
  322. }
  323.  
  324. // === MAIN FUNCTION ===
  325. int main() {
  326. int n = 8, bpp = 8;
  327.  
  328. printf("Set matrix size n (even, 4–16, default 8): ");
  329. int tmpn;
  330. if (scanf("%d", &tmpn) == 1 && tmpn >= 4 && tmpn <= 16 && tmpn % 2 == 0)
  331. n = tmpn;
  332.  
  333. printf("Set bits per pixel bpp (4–32, default 8): ");
  334. int tmpb;
  335. if (scanf("%d", &tmpb) == 1 && tmpb >= 4 && tmpb <= 32)
  336. bpp = tmpb;
  337.  
  338. int tc_mode;
  339. printf("enter the tc_mode (0=normal signed, 1=normal signed – currently same): ");
  340. scanf("%d", &tc_mode); // kept for interface; not changing behavior
  341.  
  342. int idct_mode;
  343. printf("enter the idct_mode (0=DCT, 1=IDCT): ");
  344. scanf("%d", &idct_mode);
  345.  
  346. int rt_mode;
  347. printf("enter the rt_mode (0 :rounded up 1:truncate): ");
  348. scanf("%d", &rt_mode);
  349.  
  350. int addr_bits = clog2_n2(n);
  351. printf("For n=%d, RAM address width: %d bits\n", n, addr_bits);
  352.  
  353. // Select quantization matrix
  354. int quant_matrix[N_MAX][N_MAX], rot;
  355. select_quant_matrix(n, quant_matrix);
  356. rot = rotations(bpp);
  357. printf("\n%d:\n", rot);
  358.  
  359. // Effective input width (for range limit only)
  360. // int input_bits = (idct_mode == 0) ? bpp : (n/2 + bpp);
  361. // unsigned long long max_val = (input_bits >= 63) ? ((1ULL << 62) - 1ULL)
  362. // : ((1ULL << (input_bits - 1)) - 1ULL);
  363.  
  364. // Buffers
  365. int64_t dct_mem[N_MAX*N_MAX]; // generic float buffer (spatial or freq)
  366. int64_t tp_mem[N_MAX*N_MAX]; // after row transform
  367. int64_t tp_transposed[N_MAX*N_MAX]; // transposed intermediate
  368. int64_t fixed[N_MAX*N_MAX]; // integer-rounded values
  369. int64_t quantized[N_MAX*N_MAX]; // quantized coefficients
  370. int64_t zigzag[N_MAX*N_MAX]; // zigzag order
  371. int64_t zigzag_matrix[N_MAX*N_MAX]; // zigzag array reshaped as matrix
  372. int64_t idct_matrix[N_MAX*N_MAX]; // de-zigzagged quantized
  373. int64_t idct_mem[N_MAX*N_MAX]; // dequantized freq-domain (int64)
  374.  
  375.  
  376.  
  377. if (idct_mode == 0) {
  378. int done=0;
  379. // ==========================
  380. // Forward 2D DCT + Quant + Zigzag
  381. // ==========================
  382. int input_bits = (idct_mode == 0) ? bpp : (n/2 + bpp);
  383. unsigned long long max_val = (input_bits >= 64) ? ~0ULL : ((1ULL << input_bits) - 1ULL);
  384. printf("Enter %d input values (decimal, 0–%llu):\n", n * n, max_val);
  385. for (int addr = 0; addr < n * n; ++addr) {
  386. unsigned long long uval;
  387. if (scanf("%llu", &uval) != 1 || uval > max_val) {
  388. fprintf(stderr, "Input out of range (0–%llu)\n", max_val);
  389. return 1;
  390. }
  391.  
  392. // Interpret as unsigned or signed (two's complement) depending on tc_mode
  393. if (tc_mode == 1) {
  394. dct_mem[addr] = (~uval+1) & ((((uint64_t)1 << input_bits) - 1));;
  395. printf("WR dct_mem[%d] = %.0f\n", addr, dct_mem[addr]);
  396. done=1;
  397. //int64_t sval = tc_to_signed(uval, input_bits);
  398. // dct_mem[addr] = twos_complement_to_signed(uval, input_bits);
  399. // printf("WR dct_mem[%d] = %lld (from 0x%llX, %d-bit two's complement)\n",
  400. // addr, (long long)sval, uval, input_bits);
  401. } else {
  402. dct_mem[addr] = (int64_t)uval;
  403. printf("WR dct_mem[%d] = %.0f\n", addr, dct_mem[addr]);
  404. done=1;
  405. }
  406. }
  407.  
  408. // Step 1: Row-wise DCT
  409. for (int i = 0; i < n; i++) {
  410. int64_t in_row[N_MAX], out_row[N_MAX];
  411. for (int j = 0; j < n; j++)
  412. in_row[j] = dct_mem[i*n + j];
  413. cordic_dct_1d(in_row, out_row, n, rot, bpp, tc_mode, idct_mode);
  414. for (int j = 0; j < n; j++)
  415. tp_mem[i*n + j] = (int64_t)out_row[j];
  416. }
  417. print_matrix_int64("1D DCT (Rows) Output", tp_mem, n);
  418.  
  419. // Step 2: Transpose after rounding
  420. //float_to_int64(tp_mem, fixed, n, rt_mode);
  421.  
  422. for (int i = 0; i < n; i++)
  423. for (int j = 0; j < n; j++)
  424. tp_transposed[i*n + j] = (int64_t)fixed[j*n + i];
  425.  
  426. //print_matrix_double("Transpose after 1D DCT (double)", tp_transposed, n);
  427.  
  428. // Step 3: Column-wise DCT on transposed
  429. for (int i = 0; i < n; i++) {
  430. int64_t in_col[N_MAX], out_col[N_MAX];
  431. for (int j = 0; j < n; j++)
  432. in_col[j] = tp_transposed[i*n + j];
  433. cordic_dct_1d(in_col, out_col, n, rot, bpp, tc_mode, idct_mode);
  434. for (int j = 0; j < n; j++)
  435. dct_mem[i*n + j] = (int64_t)out_col[j];
  436. }
  437. print_matrix_int64("Final 2D DCT Result", dct_mem, n);
  438.  
  439. // Step 4: Quantization + Zigzag
  440. //float_to_int64(dct_mem, fixed, n, rt_mode);
  441. print_matrix_int64("DCT rounded to int64 (before quant)", fixed, n);
  442.  
  443. quantize_fixed(fixed, quantized, n,rt_mode, quant_matrix);
  444. int dct_bits=(n/2+bpp);
  445. for (int i = 0; i < n*n; ++i)
  446. quantized[i] = limit_bits_signed(quantized[i], dct_bits);
  447. print_matrix_int64("Quantized Matrix (int64)", quantized, n);
  448.  
  449. zigzag_traversal_int64(quantized, zigzag, n);
  450. print_zigzag_int64("Zig-Zag Output Array", zigzag, n);
  451.  
  452. // Also reshape zigzag as n x n matrix (for easier visual)
  453. for (int idx = 0; idx < n*n; idx++)
  454. zigzag_matrix[idx] = zigzag[idx];
  455. print_matrix_int64("Zig-Zag Output (as n x n matrix)", zigzag_matrix, n);
  456.  
  457. } return 0;
  458. }
Success #stdin #stdout 0.01s 5276KB
stdin
4 
4
0
0
0
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 2
stdout
Set matrix size n (even, 4–16, default 8): Set bits per pixel bpp (4–32, default 8): enter the tc_mode (0=normal signed, 1=normal signed – currently same): enter the idct_mode (0=DCT, 1=IDCT): enter the rt_mode (0 :rounded up 1:truncate): For n=4, RAM address width: 5 bits

16:
Enter 16 input values (decimal, 0–15):
WR dct_mem[0] = 0
WR dct_mem[1] = 0
WR dct_mem[2] = 0
WR dct_mem[3] = 0
WR dct_mem[4] = 0
WR dct_mem[5] = 0
WR dct_mem[6] = 0
WR dct_mem[7] = 0
WR dct_mem[8] = 0
WR dct_mem[9] = 0
WR dct_mem[10] = 0
WR dct_mem[11] = 0
WR dct_mem[12] = 0
WR dct_mem[13] = 0
WR dct_mem[14] = 0
WR dct_mem[15] = 0

1D DCT (Rows) Output (int64_t):
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 

Final 2D DCT Result (int64_t):
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 

DCT rounded to int64 (before quant) (int64_t):
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 

Quantized Matrix (int64) (int64_t):
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 

Zig-Zag Output Array (length 16):
[  0]=     0  [  1]=     0  [  2]=     0  [  3]=     0  [  4]=     0  [  5]=     0  [  6]=     0  [  7]=     0  
[  8]=     0  [  9]=     0  [ 10]=     0  [ 11]=     0  [ 12]=     0  [ 13]=     0  [ 14]=     0  [ 15]=     0  

Zig-Zag Output (as n x n matrix) (int64_t):
           0            0            0            0 
           0            0            0            0 
           0            0            0            0 
           0            0            0            0