// 演算法 / Algorithm:
//   1) 對每一層,從左上角依順時針順序把所有元素攤平成一維陣列 arr。
//      For each layer, flatten the ring clockwise (top→right→bottom→left) into arr.
//   2) 因為轉 L 次回到原狀,實際旋轉次數是 kk = k % L。
//      Effective rotation is kk = k % L since rotating L times is identity.
//   3) 用同樣的順時針順序寫回:new[i] = arr[(i + kk) % L]。
//      Write back in the same clockwise order using new[i] = arr[(i + kk) % L].

#include <stdlib.h>  // malloc / free,以便動態配置暫存陣列 / for malloc/free

int** rotateGrid(int** grid, int gridSize, int* gridColSize, int k,
                 int* returnSize, int** returnColumnSizes) {
    int m = gridSize;            // 矩陣列數 / number of rows
    int n = gridColSize[0];      // 矩陣行數 / number of columns (every row same length)

    // 層數 = min(m, n) / 2;m, n 皆為偶數,所以一定整除。
    // Number of layers = min(m, n) / 2 (exact since m, n are even).
    int layers = (m < n ? m : n) / 2;

    for (int d = 0; d < layers; d++) {
        // 第 d 層的四個邊界 / four boundaries of layer d
        int top = d, bottom = m - 1 - d;
        int left = d, right = n - 1 - d;

        // 周長 = 兩條垂直邊 + 兩條水平邊;再扣掉重複算的 4 個角。
        // Perimeter: 2*(height) + 2*(width) - 4, written here in expanded form.
        int L = 2 * (bottom - top) + 2 * (right - left);

        // 暫存這一層的元素;只配置長度 L,用完即釋放。
        // Temporary buffer for this layer; freed at the end of the loop body.
        int* arr = (int*)malloc(sizeof(int) * L);

        // ---- 攤平:順時針把這一層複製到 arr ----
        // ---- Flatten the ring clockwise into arr ----
        int idx = 0;
        // 上邊:由左到右 / top edge: left → right
        for (int j = left; j <= right; j++)        arr[idx++] = grid[top][j];
        // 右邊:由上到下(跳過已加入的右上角)/ right edge: top+1 → bottom
        for (int i = top + 1; i <= bottom; i++)    arr[idx++] = grid[i][right];
        // 下邊:由右到左(跳過已加入的右下角)/ bottom edge: right-1 → left
        for (int j = right - 1; j >= left; j--)    arr[idx++] = grid[bottom][j];
        // 左邊:由下到上(跳過已加入的左下角與左上角)/ left edge: bottom-1 → top+1
        for (int i = bottom - 1; i > top; i--)     arr[idx++] = grid[i][left];

        // 把超大的 k 壓成有效旋轉次數;否則迴圈會浪費時間。
        // Reduce k modulo L; without this, large k would blow up the runtime.
        int kk = k % L;

        // ---- 寫回:同樣順時針走一遍,讀 arr[(i + kk) % L] ----
        // ---- Restore: walk the same clockwise path, but read shifted indices ----
        idx = 0;
        for (int j = left; j <= right; j++)
            grid[top][j]    = arr[(idx++ + kk) % L];   // 上邊 / top edge
        for (int i = top + 1; i <= bottom; i++)
            grid[i][right]  = arr[(idx++ + kk) % L];   // 右邊 / right edge
        for (int j = right - 1; j >= left; j--)
            grid[bottom][j] = arr[(idx++ + kk) % L];   // 下邊 / bottom edge
        for (int i = bottom - 1; i > top; i--)
            grid[i][left]   = arr[(idx++ + kk) % L];   // 左邊 / left edge

        free(arr);   // 釋放這層的暫存記憶體 / release the per-layer buffer
    }

    // LeetCode 的 C 介面要求回傳列數與每列長度。
    // The LeetCode C harness asks for the number of rows and the length of each row.
    *returnSize = m;
    *returnColumnSizes = (int*)malloc(sizeof(int) * m);
    for (int i = 0; i < m; i++) (*returnColumnSizes)[i] = n;

    return grid;   // 我們是原地修改,所以可以直接回傳同一個指標 / in-place, return same ptr
}
