void advect128x64(fix* dye, fix* vx, fix* vy, fix* outx, fix* outy, fix* outz) { for (int j = 1; j < H-1; j++) { for (int i = 1; i < W2-1; i++) { int id = ID2(i, j); fix x = FIX(i) - fmul(dtRdx, vx[id]); fix y = FIX(j) - fmul(dtRdx, vy[id]); if (x < 0) x = 0; else if (x > MAX_X2) x = MAX_X2; if (y < 0) y = 0; else if (y > MAX_Y) y = MAX_Y; int x1 = UNFIX(x); int y1 = UNFIX(y); int x2 = x1 + 1; int y2 = y1 + 1; int tl = ID2(x1, y1); int tr = ID2(x2, y1); int bl = ID2(x1, y2); int br = ID2(x2, y2); fix s1 = ffrac(x); fix s0 = ONE - s1; fix t1 = ffrac(y); fix t0 = ONE - t1; outx[id] = fmul(s0, fmul(t0, vx[tl]) + fmul(t1, vx[bl])) + fmul(s1, fmul(t0, vx[tr]) + fmul(t1, vx[br])); outy[id] = fmul(s0, fmul(t0, vy[tl]) + fmul(t1, vy[bl])) + fmul(s1, fmul(t0, vy[tr]) + fmul(t1, vy[br])); outz[id] = fmul(s0, fmul(t0, dye[tl]) + fmul(t1, dye[bl])) + fmul(s1, fmul(t0, dye[tr]) + fmul(t1, dye[br])); } } } void fluid_step128x64() { // Advect the velocity field and the dye (27.7ms) advect128x64(dye, vx, vy, tx, ty, tz); // Swap the pointers swap(vx, tx); swap(vy, ty); swap(dye, tz); // Velocity boundary conditions (0.1ms) for (int i = 0; i < W2; i++) { vy[ID2(i, 0)] = -vy[ID2(i, 1)]; vy[ID2(i, H-1)] = -vy[ID2(i, H-2)]; vx[ID2(i, 0)] = vx[ID2(i, 1)]; vx[ID2(i, H-1)] = vx[ID2(i, H-2)]; } for (int j = 0; j < H; j++) { vx[ID2(0, j)] = -vx[ID2(1, j)]; vx[ID2(W2-1, j)] = -vx[ID2(W2-2, j)]; vy[ID2(0, j)] = vy[ID2(1, j)]; vy[ID2(W2-1, j)] = vy[ID2(W2-2, j)]; } // Calculate divergence & first pressure iteration (3.6ms) for (int j = 1; j < H-1; j++) { for (int i = 1; i < W2-1; i++) { div[ID2(i, j)] = fmul(alphaHalfRdx, vx[ID2(i+1,j)] - vx[ID2(i-1, j)] + vy[ID2(i, j+1)] - vy[ID2(i, j-1)]); p[ID2(i, j)] = div[ID2(i, j)] >> 2; } } // Initial Pressure boundary conditions (0ms) for (int i = 0; i < W2; i++) { p[ID2(i, 0)] = p[ID2(i, 1)]; p[ID2(i, H-1)] = p[ID2(i, H-2)]; } for (int j = 0; j < H; j++) { p[ID2(0, j)] = p[ID2(1, j)]; p[ID2(W2-1, j)] = p[ID2(W2-2, j)]; } // Poisson pressure solver (20.8ms) for (int k = 0; k < pressureIterations-1; k++) { // Jacobi iteration (20.4ms) for (int j = 1; j < H-1; j++) { for (int i = 1; i < W2-1; i++) { int id = ID2(i, j); p[id] = (div[id] + p[ID2(i-1, j)] + p[ID2(i+1, j)] + p[ID2(i, j-1)] + p[ID2(i, j+1)]) >> 2; } } // Pressure boundary conditions (0.4ms) for (int i = 0; i < W2; i++) { p[ID2(i, 0)] = p[ID2(i, 1)]; p[ID2(i, H-1)] = p[ID2(i, H-2)]; } for (int j = 0; j < H; j++) { p[ID2(0, j)] = p[ID2(1, j)]; p[ID2(W2-1, j)] = p[ID2(W2-2, j)]; } } // Apply Forces if (currentView == 0) { int lastX = addX; int lastY = addY; if (keydown(KEY_LEFT)) { addX -= 1; if (addX < 1) addX = W2-2; } else if (keydown(KEY_RIGHT)) { addX += 1; if (addX > W2-2) addX = 1; } if (keydown(KEY_UP)) { addY -= 1; if (addY < 1) addY = H-2; } else if (keydown(KEY_DOWN)) { addY += 1; if (addY > H-2) addY = 1; } fix adx = FIX(addX - lastX); fix ady = FIX(addY - lastY); if (adx || ady) { fix ddx = abs(adx); fix ddy = abs(ady); fix vdx = adx / 10; fix vdy = ady / 10; fix ddxy = ddx + ddy; for (int j = addY-3; j < addY+3; j++) { for (int i = addX-3; i < addX+3; i++) { if (i<=0||j<=0||i>=W2-1||j>=H-1) continue; int id = ID2(i, j); fix dist = radius - FIX((addX-i)*(addX-i)+(addY-j)*(addY-j)); if (dist < 0) continue; fix dyeD = fmul(dist, dyeIntensity); fix velD = fmul(dist, velIntensity); dye[id] = dye[id] + fmul(ddxy, dyeD); if (dye[id] > maxDyeIntensity) dye[id] = maxDyeIntensity; vx[id] = vx[id] + fmul(vdx, velD); vy[id] = vy[id] + fmul(vdy, velD); } } } } // Subtract pressure from velocity & Diffuse dye and velocity (3.4ms) for (int j = 1; j < H-1; j++) { for (int i = 1; i < W2-1; i++) { int id = ID2(i, j); vx[id] = fmul(vx[id] - fmul(halfRdx, p[ID2(i+1, j)] - p[ID2(i-1, j)]), velDiffusion); vy[id] = fmul(vy[id] - fmul(halfRdx, p[ID2(i, j+1)] - p[ID2(i, j-1)]), velDiffusion); } } // Draw dye to vram (3.1ms) uint32_t *vram = gint_vram+4; for (int j = 1; j < H-1; j++) { uint32_t data = 0; data = (data << 1); for(int k = 1; k < 32; k++) { int x = 32*0+k; int id = ID2(x, j); dye[id] = fmul(dye[id], dyeDiffusion); data = (data << 1) | (dye[id] > saturateTreshold || (dye[id] > emptyTreshold && (x+j)%2)); } vram[0] = data; data = 0; data = (data << 1); for(int k = 0; k < 32; k++) { int x = 32*1+k; int id = ID2(x, j); dye[id] = fmul(dye[id], dyeDiffusion); data = (data << 1) | (dye[id] > saturateTreshold || (dye[id] > emptyTreshold && (x+j)%2)); } vram[1] = data; data = 0; data = (data << 1); for(int k = 0; k < 32; k++) { int x = 32*2+k; int id = ID2(x, j); dye[id] = fmul(dye[id], dyeDiffusion); data = (data << 1) | (dye[id] > saturateTreshold || (dye[id] > emptyTreshold && (x+j)%2)); } vram[2] = data; data = 0; for(int k = 0; k < 31; k++) { int x = 32*3+k; int id = ID2(x, j); dye[id] = fmul(dye[id], dyeDiffusion); data = (data << 1) | (dye[id] > saturateTreshold || (dye[id] > emptyTreshold && (x+j)%2)); } data = (data << 1); vram[3] = data; vram+=4; } }