#include "sweep.h"
#include <cmath>

void SweepKernel::initTangent() {
	return;
	if (d_lookAhead) {
		/*append("float2 nextV;\n");
		append("float2 prevV = -pLocal;\n");*/
		sample("li.startPos", "pLocal");
		//append("prevV += pLocal;\n");
		append("li.startPos += li.stepDir;\n"); //*%Ef;\n", (float)d_config->stepSkip());
	}

	if (d_config->stepInterpolation() == 3 && !d_config->mode()) {
		append("float prevHeight;\n");
	}
}

void SweepKernel::initKernel() {
	append("int tid = #globalThreadX#;\n");
	append("struct LineInfo li = liIn[tid];\n");
	append("if (li.numSteps < 2) return;\n");

	if (d_config->sweepSurface()) {
		int shiftAmount = log2((double)d_config->sweepStorageStripe()) + 0.5;
		append("int storeY = li.layerDistance>>%d;\n", shiftAmount);
		if (d_config->edgeAwareAcc()) {
			append("int storeDepthX = (li.layerDistance&%d)*8;\n",
					d_config->sweepStorageStripe()-1);
			append("int storeX = storeDepthX + (li.dirIndex < %d ? 4 : 6);\n", d_config->dirs()/2);
		} else {
			append("int storeX = (li.layerDistance&%d)*4 + ((li.dirIndex < %d) ? 0 : 2);\n",
					d_config->sweepStorageStripe()-1,
					d_config->dirs()/2);
		}
	} else {
		append("int destIndex = li.layerDistance;\n");

		if (d_config->matchOpposite()) {
			if (d_config->useHalfBuffers()) {
				if (d_config->edgeAwareAcc()) {
					append("#globalType# #halfType# *outOcc = (#globalType# #halfType#*)out;\n");
					//append("short *outOcc = (short*)out;\n");
					append("outOcc += (li.dirIndex < %d) ? 2 : 3;\n", d_config->dirs()/2);
				} else
					append("out += (li.dirIndex < %d) ? 0 : 1;\n", d_config->dirs()/2);
			} else {
				append("out += (li.dirIndex < %d) ? 0 : 1;\n", d_config->dirs()/2);
			}
		}

		if (d_config->matchOpposite()) {
			append("int myStripe = (li.dirIndex < %d) ? %d : %d;\n",
					d_config->dirs()/2, d_config->sweepWidth(), -d_config->sweepWidth());
		}
	}

	append("float2 dirStep = SSEOnormalize2(#float2Ctor{li.stepDir.x*%Ef}{li.stepDir.y*%Ef}#);\n",
			(float)d_config->hfWidth(), (float)d_config->hfHeight());
	append("// FIXME:  Having zCoef might help with circular assembly artefacts\n");

	if (d_config->tangentType() == 3) {
		append("float2 normal;\n");
		//append("float2 normalNext;\n");
	}
	
	if (d_config->stepInterpolation() == 2)
		append("float prevHeight;\n");

}

void SweepKernel::genOcclusion() {
	append("\n/* Calculate occlusion START */\n");
	append("{\n");
	incrementIndent();

	if (d_config->tangentType()) {
		/*if (d_useFalloff)
			append("float distance = SSEOlength2(horVec);\n");*/
		append("float horizonSin = SSEOdot2(upVec, SSEOnormalize2(horVec));\n");
		append("occlusion = tangentSin + (horizonSin - tangentSin)");
		if (d_useFalloff) append("*vecFalloff(horVec)");
		append(";\n");
		
	} else {
		append("occlusion = %s;\n", genOcc("horVec").c_str()); //SSEOdot2(upVec, SSEOnormalize2(horVec))*fallOff(distance);\n");
	}

	decrementIndent();
	append("}\n");
	append("/* Calculate occlusion END */\n\n");
}

void SweepKernel::writeOut() {
	append("// Writing out\n");
	std::string outValue;

	if (d_config->occlusionScatter()) {
		append("int writeY = %d - tempSnapCoord.y*%Ef + 0.5f;\n", d_config->hfHeight(), (float)d_config->hfHeight());
		append("int writeX = tempSnapCoord.x*%Ef + 0.5f;\n", (float)d_config->hfWidth());
		append("#atomicAdd{out[writeY*%d + writeX]}{(1.0f - occlusion)*%Ef}#;\n", d_config->hfWidth(), 1.0f/(float)d_config->dirs());
		//append("out[writeY*%d + writeX] = (1.0f - occlusion)*%Ef;\n", d_config->hfWidth(), 1.0f/(float)d_config->dirs());
	} else {
		if (d_config->edgeAwareAcc() == 1) {
			outValue = genStr("#float2Ctor{occlusion}{pLocal.y}#");
		} else
			outValue = genStr("occlusion");

		//append("occlusion = -pLocal.y*0.1f;\n");

		if (d_config->sweepSurface()) {
			if (d_config->edgeAwareAcc()) {
				append("if (li.dirIndex < %d) {\n", d_config->dirs()/2);
				incrementIndent();
				append("#surf2DWrite{sweepSurf}{storeDepthX}{storeY}{float}{pLocal.y}#;\n");
				decrementIndent();
				append("}\n");
			}
			append("#surf2DWrite{sweepSurf}{storeX}{storeY}{unsigned short}{__float2half_rn(occlusion)}#;\n");
		} else {
			if (d_config->matchOpposite()) {
				if (d_config->useHalfBuffers()) { // This goes totally differently
					if (d_config->edgeAwareAcc()) {
						append("if (li.dirIndex < %d) {\n", d_config->dirs()/2);
						incrementIndent();
						append("out[destIndex*2] = pLocal.y;\n");
						decrementIndent();
						append("}\n");
						append("#writeHalf{outOcc[destIndex*4]}{occlusion}#;\n");
						//append("outOcc[destIndex*4] = __float2half_rn(occlusion);\n");
					} else 
						append("#writeHalf{outOcc[destIndex*2]}{occlusion}#;\n");
						//append("out[destIndex*2] = __float2half_rn(occlusion);\n");
				} else {
					int indexBase = d_config->useV4Buffers() ? 4 : 3;
					/*append("if (li.dirIndex > %d) outMirror[destIndex] = occlusion;\n", d_config->dirs()/2-1);
					append("else out[destIndex] = %s;\n", outValue.c_str());*/
					append("if (li.dirIndex < %d) {\n", d_config->dirs()/2);
					incrementIndent();
					append("out[destIndex*%d] = pLocal.y;\n", indexBase);
					decrementIndent();
					append("}\n");
					append("out[destIndex*%d+1] = occlusion;\n", indexBase);
				}
			} else {
				append("out[destIndex] = %s;", outValue.c_str());
			}
		}
	}
}

void SweepKernel::postTangent() {
	if (d_lookAhead)
		append("prevP = pLocal;\n");

	/*if (d_config->stepInterpolation() == 2)
		append("prevHeight = pLocal.y;\n");*/
}
