aboutsummaryrefslogtreecommitdiff
path: root/Minecraft.Client/PS3/SPU_Tasks/LevelRenderer_cull/LevelRenderer_cull.cpp
blob: f2a7bca0f600a2764598f96f1506305748829bbf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
/* SCE CONFIDENTIAL
PlayStation(R)3 Programmer Tool Runtime Library 430.001
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* All Rights Reserved.
*/

/* common headers */
#include <stdint.h>
#include <stdlib.h>
#include <alloca.h>
#include <spu_intrinsics.h>
#include <cell/spurs.h>
#include <cell/dma.h>
#include <cell/spurs/job_queue.h>

#include "LevelRenderer_cull.h"
#include "..\Common\DmaData.h"
#include <vectormath/c/vectormath_aos_v.h>



// #define SPU_HEAPSIZE (128*1024)
// #define SPU_STACKSIZE (16*1024)
// 
// CELL_SPU_LS_PARAM(128*1024, 16*1024);	// can't use #defines here as it seems to  create an asm instruction


static const bool sc_verbose = false;

CellSpursJobContext2* g_pSpursJobContext;


// The flag definitions
static const int    CHUNK_FLAG_COMPILED		= 0x01;
static const int    CHUNK_FLAG_DIRTY		= 0x02;
static const int    CHUNK_FLAG_EMPTY0		= 0x04;
static const int    CHUNK_FLAG_EMPTY1		= 0x08;
static const int	CHUNK_FLAG_EMPTYBOTH	= 0x0c;
static const int    CHUNK_FLAG_NOTSKYLIT	= 0x10;
static const int	CHUNK_FLAG_REF_MASK		= 0x07;
static const int	CHUNK_FLAG_REF_SHIFT	= 5;


bool inline clip(float *bb, float *frustum)
{
	for (int i = 0; i < 6; ++i, frustum += 4)
	{
		if (frustum[0] * (bb[0]) + frustum[1] * (bb[1]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[3]) + frustum[1] * (bb[1]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[0]) + frustum[1] * (bb[4]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[3]) + frustum[1] * (bb[4]) + frustum[2] * (bb[2]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[0]) + frustum[1] * (bb[1]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[3]) + frustum[1] * (bb[1]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[0]) + frustum[1] * (bb[4]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
		if (frustum[0] * (bb[3]) + frustum[1] * (bb[4]) + frustum[2] * (bb[5]) + frustum[3] > 0) continue;
		return false;
	}
	return true;
}

class PPUStoreArray
{
	static const int sc_cacheSize = 128;
	int m_localCache[128];
	int* m_pDataPPU;
	int m_cachePos;
	int m_ppuPos;

public:
	PPUStoreArray(uintptr_t pDataPPU) { m_pDataPPU = (int*)pDataPPU; m_cachePos = 0; m_ppuPos = 0;}

	void store(int val)
	{
		m_localCache[m_cachePos] = val;
		m_cachePos++;
		if(m_cachePos >= sc_cacheSize)
			flush();
	}

	void flush()
	{
		if(m_cachePos > 0)
		{
			// dma the local cache back to PPU and start again
// 			spu_print("DMAing %d bytes from 0x%08x(SPU) to 0x%08x(PPU)\n",(int)( m_cachePos*sizeof(int)), (int)m_localCache, (int)&m_pDataPPU[m_ppuPos]);
			DmaData_SPU::put(m_localCache, (uintptr_t)&m_pDataPPU[m_ppuPos], DmaData_SPU::roundUpDMASize(m_cachePos*sizeof(int)));
			m_ppuPos += m_cachePos;
			m_cachePos = 0;
		}
	}
	int getSize() { return m_ppuPos; }
};

void cull(LevelRenderer_cull_DataIn* dataIn, LevelRenderer_cull_DataIn* eaDataIn)
{
	ClipChunk_SPU chunkDoubleBuffer[2];		
	int chunkIdx = 0;
 	unsigned char* globalChunkFlags = (unsigned char*)alloca(dataIn->numGlobalChunks);		// 164K !!!
 	DmaData_SPU::getAndWait(globalChunkFlags, (uintptr_t)dataIn->pGlobalChunkFlags, sizeof(unsigned char)*dataIn->numGlobalChunks);

	PPUStoreArray layer0List((uintptr_t)dataIn->listArray_layer0);
	PPUStoreArray layer1List((uintptr_t)dataIn->listArray_layer1);

	PPUStoreArray layer0ZDepth((uintptr_t)dataIn->zDepth_layer0);
	PPUStoreArray layer1ZDepth((uintptr_t)dataIn->zDepth_layer1);
	float xOff = dataIn->clipMat[3][0];
	float yOff = dataIn->clipMat[3][1];
	float zOff = dataIn->clipMat[3][2];
	dataIn->clipMat[3][0] = 0;
	dataIn->clipMat[3][1] = 0;
	dataIn->clipMat[3][2] = 0;




	// DMA up the first chunk
	if(dataIn->numClipChunks > 0)
		DmaData_SPU::get(&chunkDoubleBuffer[chunkIdx], (uintptr_t)&dataIn->pClipChunks[0], sizeof(ClipChunk_SPU));

	for(int i=0;i<dataIn->numClipChunks; i++)
	{
		DmaData_SPU::wait();// wait for the last chunk to have been uploaded
		ClipChunk_SPU& chunk = chunkDoubleBuffer[chunkIdx];
		chunkIdx ^= 1;
		// queue up the next chunk
		DmaData_SPU::get(&chunkDoubleBuffer[chunkIdx], (uintptr_t)&dataIn->pClipChunks[i+1], sizeof(ClipChunk_SPU));

		Vectormath::Aos::Vector4 pos(chunk.aabb[0] + xOff, chunk.aabb[1] + yOff, chunk.aabb[2]  + zOff, 1.0f);
		Vectormath::Aos::Vector4 transPos = dataIn->clipMat * pos;
		float zDepth = -transPos.getZ();
// 		spu_print("z val = %f : maxDepth = %f\n", zDepth, dataIn->maxDepthRender);
		if(zDepth > dataIn->maxDepthRender || chunk.aabb[1] < dataIn->maxHeightRender )
			chunk.visible = false;
		else
		{
			unsigned char flags = chunk.globalIdx == -1 ? 0 : globalChunkFlags[ chunk.globalIdx ];
			chunk.visible = false;
			if ( (flags & CHUNK_FLAG_COMPILED ) && ( ( flags & CHUNK_FLAG_EMPTYBOTH ) != CHUNK_FLAG_EMPTYBOTH ) )
			{
				chunk.visible = clip(chunk.aabb, dataIn->fdraw);
			}
		}
		// write the visible flag directly into the chunk structure on PPU
		DmaData_SPU::put(&chunk, (uintptr_t)&dataIn->pClipChunks[i], 16);// only DMA the first 16 bytes, as they contain the visible flag we need to pass back

		// add the data for renderChunks
		if( !chunk.visible ) 
			continue;													// This will be set if the chunk isn't visible, or isn't compiled, or has both empty flags set
		if( chunk.globalIdx == -1 ) 
			continue;												// Not sure if we should ever encounter this... TODO check
		int layer = 0;
// 		zDepth /= transPos.getW(); 
		if( ( globalChunkFlags[chunk.globalIdx] & CHUNK_FLAG_EMPTY0 ) == 0)	// Check that this particular layer isn't empty
		{
			// List can be calculated directly from the chunk's global index
			int list = chunk.globalIdx * 2 + layer;
			list += dataIn->chunkLists;
			layer0List.store(list);
			layer0ZDepth.store(*((int*)&zDepth));
		}

		layer = 1;
		if( ( globalChunkFlags[chunk.globalIdx] & CHUNK_FLAG_EMPTY1 ) == 0)	// Check that this particular layer isn't empty
		{
			// List can be calculated directly from the chunk's global idex
			int list = chunk.globalIdx * 2 + layer;
			list += dataIn->chunkLists;
			layer1List.store(list);
			layer0ZDepth.store(*((int*)&zDepth));
		}
	}
	layer0List.flush();
	layer1List.flush();
	layer0ZDepth.flush();
	cellDmaPutUint32(layer0List.getSize(), (uintptr_t)&eaDataIn->numToRender_layer0, 0, 0, 0);
	cellDmaPutUint32(layer1List.getSize(), (uintptr_t)&eaDataIn->numToRender_layer1, 0, 0, 0);

}




void cellSpursJobQueueMain(CellSpursJobContext2 *pContext, CellSpursJob256 *pJob)
{
	// 	CellSpursTaskId idTask = cellSpursGetTaskId();
	unsigned int idSpu = cellSpursGetCurrentSpuId();

	if(sc_verbose)
		spu_print("LevelRenderer_cull [SPU#%u] start\n", idSpu);

	g_pSpursJobContext = pContext;
	uint32_t eaDataIn = pJob->workArea.userData[0];						
// 	uint32_t eaDataOut =pJob->workArea.userData[1];

	LevelRenderer_cull_DataIn dataIn;
	DmaData_SPU::getAndWait(&dataIn, eaDataIn, sizeof(LevelRenderer_cull_DataIn));

	if(sc_verbose)
		spu_print("LevelRenderer_cull [SPU#%u] DMA'd input data, ready to cull %d chunks\n", idSpu, dataIn.numClipChunks);

	int numForDMA = dataIn.numClipChunks;
	if(numForDMA % 16)
	{
		numForDMA &= ~0x0f;
		numForDMA += 0x10;
	}

	cull(&dataIn, (LevelRenderer_cull_DataIn*)eaDataIn);
// 	DmaData_SPU::putAndWait(outputData, eaDataOut, sizeof(char) * numForDMA);


	if(sc_verbose)
		spu_print("LevelRenderer_cull [SPU#%u] exit\n", idSpu);
}