#include "sweep.h"

#define INDEX2D

void SweepKernel::genHullInit() {
	// FIXME
	append("// FIXME:  Instead of clamping when the stack is full, rotate (bitwise and the index)\n");
	#ifdef REGH3
	append("int convexIndex = 1;\n");
	#else
	append("int convexIndex = 0;\n");
	#endif

	std::string type;
	if (d_config->hullType())
		type = "unsigned int";
	else
		type = "float2";

	if (d_config->hullStorage() == 0)
		append("%s convexHull[%d];\n", type.c_str(), d_config->hullSize());
	else if (d_config->hullStorage() == 1)
		append("#globalType# %s *convexHull = hullPool + tid*%d;\n", type.c_str(), d_config->hullSize());
	else if (d_config->hullStorage() == 2) {
		#ifdef INDEX2D
		append("#sharedMem# %s convexHull[%d][%d];\n", type.c_str(), d_config->hullSize(), d_config->sweepBlock());
		#else
		append("#sharedMem# %s convexHullS[%d*%d];\n", type.c_str(), d_config->hullSize(), d_config->sweepBlock());
		append("%s *convexHull = convexHullS + #threadX#;\n", type.c_str());
		#endif
	} else
		append("#sharedMem# %s convexHull[%d][%d];\n", type.c_str(), d_config->sweepBlock(), d_config->hullSize());

	if (d_config->optiPath())
		#ifdef REGH3
		append("float2 h1, h2, h3;\n");
		#else
		append("float2 h1, h2;\n");
		#endif
}

std::string SweepKernel::hullRefCurrent() {
	return std::string("convexHull[convexIndex]");
}

void SweepKernel::hullIndexIncrement() {
	append("convexIndex++;\n");
}

void SweepKernel::hullIndexDecrement() {
	append("convexIndex--;\n");
}

std::string SweepKernel::genOccDot(std::string horVec, std::string tanSin, std::string altSin, std::string upVec) {
	std::string sinExpr = (altSin == "") ? genStr("SSEOdot2(%s, SSEOnormalize2(%s))", 
			upVec.c_str(), horVec.c_str()) : ("(" + altSin + ")");

	std::string ret;
	if (d_config->tangentType()) {
		if (tanSin != "")
			ret += tanSin + " + (#maxf{" + "0.0f" + "}{" + sinExpr + " - " + tanSin + "}#)";
		else {
			ret += "(1.0f + " + sinExpr + ")";
		}
	} else {
		if (!d_config->optiPath() || d_config->mode() != 0) {
			ret += genStr("%Ef + (#maxf{0.0f}{%s - %Ef}#)",
					d_config->fixedTangent(), 
					sinExpr.c_str(),
					d_config->fixedTangent()); //tanSin + " + (#maxf{" + "0.0f" + "}{" + sinExpr + " - " + tanSin + "}#)";
			//ret = sinExpr; //genStr("SSEOdot2(upVec, SSEOnormalize2(%s))", horVec.c_str());
		} else {

			if (d_config->fixedTangent() == -1.0f)
				ret += genStr("(%s + 1.0f)", sinExpr.c_str());
			else
				//ret += genStr("%Ef + (#maxf{0.0f}{%s - %Ef}#)",
				ret += genStr("(#maxf{0.0f}{%s - %Ef}#)",
						//d_config->fixedTangent(), 
						sinExpr.c_str(),
						d_config->fixedTangent()); //tanSin + " + (#maxf{" + "0.0f" + "}{" + sinExpr + " - " + tanSin + "}#)";
				//ret = sinExpr; //genStr("SSEOdot2(upVec, SSEOnormalize2(%s))", horVec.c_str());
		}
	}

	return ret;
}

std::string SweepKernel::genFalloff(std::string horVec) {
	std::string ret;

	if (d_useFalloff)
		//ret += genStr("*fallOff(SSEOlength2(%s))", horVec.c_str());
		ret += genStr("vecFalloff(%s)", horVec.c_str());
		//ret += genStr("*fallOff(%s.y*upVec.x - %s.x*upVec.y)", horVec.c_str(), horVec.c_str());

	return ret;
}

std::string SweepKernel::genOcc(std::string horVec, std::string tanSin, std::string altSin, std::string upVec) {

	/*//return genStr("(1.0f + SSEOdot2(%s, %s)/SSEOdot2(%s, %s))", upVec.c_str(), horVec.c_str(), horVec.c_str(), horVec.c_str());
	return genStr("(1.0f + SSEOdot2(%s, SSEOnormalize2(%s)))", upVec.c_str(), horVec.c_str());*/

	/*if (d_config->tangentType() && tanSin == "")
		throw std::string("Tangent needs to be supplied to genOcc when tangentType>0 is being used");*/
	if (d_config->jitterTangent())
		throw std::string("Jitter tangent used with an obsoleted functions");

	return genOccDot(horVec, tanSin, altSin, upVec) + "*" + genFalloff(horVec);
}

std::string SweepKernel::genConvexOcc(std::string horVec, std::string tanSin, std::string altSin) {
	// You can replace this w/ something else..
	return genOcc(horVec, "0.0f", altSin);
	//return genOcc(horVec, tanSin, altSin);
}

void SweepKernel::genHullCompare() {
	if (d_occlusionCompare) {
		if (d_config->jitterTangent())
			append("#funcDecl{occlusionCompare}{bool}{const float2 v1, const float2 v2, const float2 upVec, const float tangent}# {\n");
		else
			append("#funcDecl{occlusionCompare}{bool}{const float2 v1, const float2 v2, const float2 upVec}# {\n");

		incrementIndent();
		append("return \n" + genConvexOcc("v1") + "\n > " + genConvexOcc("v2") + ";\n");
		decrementIndent();

		append("}\n\n");

		/*append("#funcDecl{globalOcclusionCompare}{bool}{const float2 v1, const float2 v2, const float2 up, const float2 tanVec}# {\n");
		append("}\n");*/
	} else {
		append("#funcDecl{convexCompare}{bool}{const float2 v1, const float2 v2}# {\n");
		append("  return (v2.y*v1.x > v2.x*v1.y);\n");
		append("}\n\n");
	}
}

void SweepKernel::stepConvexUnroll(bool write, std::string plocal, std::string upvec, std::string horOut) {
	/*std::string plocal = genStr("pLocal%d", index);
	std::string upvec = genStr("upVec%d", index);*/

	append("\n/* Updating convex hull START */\n");
	append("while (convexIndex != 1 && ");
	append("!occlusionCompare(convexHull[convexIndex-1] - %s, convexHull[convexIndex-2] - %s, %s)) {\n",
			plocal.c_str(), plocal.c_str(), upvec.c_str());
	append("  convexIndex--;\n");
	append("}\n");
	append("if (convexIndex < %d)\n", d_config->hullSize()-1);
	append("  convexHull[convexIndex++] = %s;\n\n", plocal.c_str());

	//std::string horOut = genStr("horVec%d", index);
	if (write && horOut != "") {
		append("float2 %s = convexHull[convexIndex-2] - %s;\n", horOut.c_str(), plocal.c_str());
	}
	append("/* Updating convex hull END */\n");
}

std::string SweepKernel::thresholdCompare(std::string smaller, std::string larger) {
	if (d_config->hullThresholdType() == 0)
		return genStr("%s <= %s + %Ef", smaller.c_str(), larger.c_str(), d_config->hullThreshold()); // + " < " + larger + " + 
	else if (d_config->hullThresholdType() == 1)
		return genStr("%s <= %s*%Ef", smaller.c_str(), larger.c_str(), d_config->hullThreshold()+1.0f); // + " < " + larger + " + 
	else
		throw std::string("Unknown threshold type");
}

void SweepKernel::stepConvexSimple(bool write) {
	std::string hullStorage;
	if (d_config->hullStorage() == 0)
		hullStorage = "reg";
	else if (d_config->hullStorage() == 1)
		hullStorage = "global";
	else
		hullStorage = "shared";

	#ifdef ROTATE_HULL_POINTS
	append("float2 v1 = h1 - pLocal;\n");
	append("float2 v2 = h2 - pLocal;\n");

	if (d_config->stopAtMax()) {
		append("float dot1 = " + genOccDot("v1") + ";\n");
		append("float dot2 = " + genOccDot("v2") + ";\n");
		append("float occ1 = dot1*" + genFalloff("v1") + ";\n");
		append("float occ2 = dot2*" + genFalloff("v2") + ";\n");
	} else {
		append("float occ1 = " + genConvexOcc("v1") + ";\n");
		append("float occ2 = " + genConvexOcc("v2") + ";\n");
	}

	append("int fullIters = %d;\n", d_config->hullSize()-1);

	std::string extraChecks;
	if (d_config->stopAtMax())
		//extraChecks += " && SSEOdot2(upVec, SSEOnormalize2(v1)) < SSEOdot2(upVec, SSEOnormalize2(v2))";
		extraChecks += genStr(" && %s", thresholdCompare("dot1", "dot2").c_str()); //dot1 < dot2 + %Ef", d_config->hullThreshold()); //SSEOdot2(upVec, SSEOnormalize2(v1))+1.0f < SSEOdot2(upVec, SSEOnormalize2(v2))+1.0f";

	#ifdef REGH3
	/*if (d_config->stopAtMax())
	else*/
		append("if (convexIndex && %s%s) {\n", thresholdCompare("occ1", "occ2").c_str(), extraChecks.c_str());

	incrementIndent();

	if (d_config->stopAtMax())
		append("dot1 = dot2;\n");
	
	append("occ1 = occ2;\n");

	//append("v1 = v2;\n");
	append("h1 = h2;\n");
	append("h2 = h3;\n");
	append("convexIndex--;\n");
	append("fullIters--;\n");
	append("v2 = h2 - pLocal;\n");

	if (d_config->stopAtMax()) {
		append("dot2 = " + genOccDot("v2") + ";\n");
		append("occ2 = dot2*" + genFalloff("v2") + ";\n");
	} else
		append("occ2 = " + genConvexOcc("v2") + ";\n");

	#endif // REGH3
	
	append("while (fullIters && convexIndex && %s%s) {\n", thresholdCompare("occ1", "occ2").c_str(), extraChecks.c_str());
	//append("while (fullIters && occ1 < occ2+0.01f) {\n");
	incrementIndent();

	if (d_config->stopAtMax())
		append("dot1 = dot2;\n");

	append("occ1 = occ2;\n");
	//append("v1 = v2;\n");
	append("h1 = h2;\n");

	std::string indexing;
	if (d_config->hullStorage() == 2) {
		#ifdef INDEX2D
		indexing = genStr("[convexIndex&%d][#threadX#]", d_config->hullSize()-1);
		#else
		indexing = genStr("[(convexIndex&%d)*%d]", d_config->hullSize()-1, d_config->sweepBlock());
		#endif
	} else if (d_config->hullStorage() == 3)
		indexing = genStr("[#threadX#][convexIndex&%d]", d_config->hullSize()-1);
	else
		indexing = genStr("[convexIndex&%d]", d_config->hullSize()-1);

	append("convexIndex--;\n");

	if (d_config->hullType()) {
		append("%s;\n", d_keywords->readHalf2("h2",
					genStr("&convexHull%s", indexing.c_str()), hullStorage, "0").c_str());
	} else 
		append("h2 = convexHull%s;\n", indexing.c_str());
	append("fullIters--;\n");

	append("v2 = h2 - pLocal;\n");

	if (d_config->stopAtMax()) {
		append("dot2 = " + genOccDot("v2") + ";\n");
		append("occ2 = dot2*" + genFalloff("v2") + ";\n");
	} else 
		append("occ2 = " + genConvexOcc("v2") + ";\n");

	decrementIndent();
	append("}\n");

	#ifdef REGH3
	decrementIndent();
	append("}\n");

	append("h3 = h2;\n");
	#endif

	// If we did not go backwards, we have to drop current h2
	// in case next loop unravels the hull and requires h3..
	//append("if (fullIters == %d && convexIndex < %d) {\n", d_config->hullSize()-1, d_config->hullSize()-1);
	append("if (fullIters == %d) {\n", d_config->hullSize()-1);
	if (d_config->hullType())
		append("  %s;\n", d_keywords->writeHalf2(
					genStr("&convexHull%s", indexing.c_str()), "h2", hullStorage).c_str());
	else
		append("  convexHull%s = h2;\n", indexing.c_str());
	append("}\n");
	append("convexIndex++;\n");

	//append("if (convexIndex < %d)\n", d_config->hullSize()-1);
	//append("  convexHull[convexIndex++] = pLocal;\n\n");

	append("h2 = h1;\n");
	append("h1 = pLocal;\n");

	#else // ROTATE_HULL_POINTS
	append("float2 v1 = convexHull[convexIndex-1] - pLocal;\n");
	append("float2 v2 = convexHull[convexIndex-2] - pLocal;\n");
	append("float occ1 = " + genConvexOcc("v1") + ";\n");
	append("float occ2 = " + genConvexOcc("v2") + ";\n");
	append("while (convexIndex != 2 && occ1 < occ2) {\n");
	incrementIndent();

	append("v1 = v2; occ1 = occ2;\n");
	append("v2 = convexHull[--convexIndex - 2] - pLocal;\n");
	append("occ2 = " + genConvexOcc("v2") + ";\n");

	decrementIndent();
	append("}\n");

	append("if (convexIndex < %d)\n", d_config->hullSize()-1);
	append("  convexHull[convexIndex++] = pLocal;\n\n");

	#endif // ROTATE_HULL_POINTS
	if (write)
		append("occlusion = %Ef + occ1;\n",
				d_config->fixedTangent());
}

void SweepKernel::stepConvex(bool write) {
	if (d_config->optiPath()) {
		stepConvexSimple(write);
		return;
	}

	//3) SIMPLY DO THIS 2 TEXELS AT A TIME, specially handle you know
	//X) READ IN VALUES TO SHARED MEM, DISCARD TEXELS THAT STEP TOO LITTLE ALONG STEPDIR, COMPRESS, CALCULATE, THEN WRITE EXPANDED OUT

	//append("occlusion = 0.0f;\n");
	//USE LINEAR INTERPOLATION UNTIL HIT A DEPTH DISCONTINUITY (APPROXIMATION), THEN SNAP
	//append("if (SSEOdot2(SSEOnormalize2(thisSnapCoord - oldSnapCoord), dirStep) > 0.980f) {\n");
	//append("if (thisSnapLength - oldSnapLength > 0.00125f) {\n");
	//append("if (oldSnapCoord != thisSnapCoord) {\n");
	//append("if (!(li.layerDistance%3)) {\n");
	//append("if (true) {\n");
	//incrementIndent();
	// We accept this step and therefore update oldSnapCoord
	//append("oldSnapCoord = thisSnapCoord;\n");

	append("\n/* Updating convex hull START */\n");
	append("while (convexIndex != 1 && ");
	if (d_occlusionCompare) {
		if (d_config->jitterTangent()) {
			//append("!occlusionCompare(convexHull[convexIndex-1] - pLocal, convexHull[convexIndex-2] - pLocal, upVec, li.tangent*(float)(li.numSteps%100)*0.01f)) {\n");
			append("!occlusionCompare(convexHull[convexIndex-1] - pLocal, convexHull[convexIndex-2] - pLocal, upVec, li.tangent)) {\n");
		} else
			append("!occlusionCompare(convexHull[convexIndex-1] - pLocal, convexHull[convexIndex-2] - pLocal, upVec)) {\n");
	} else
		append("!convexCompare(convexHull[convexIndex-1] - pLocal, convexHull[convexIndex-2] - pLocal)) {\n");
	append("  convexIndex--;\n");
	append("}\n");
	append("if (convexIndex < %d)\n", d_config->hullSize());
	append("  convexHull[convexIndex++] = pLocal;\n\n");

	if (write) {
		#ifdef SKIPWRITES
		append("if (li.numSteps <= li.layerOffset) {\n");
		incrementIndent();
		#endif
		append("float2 horVec = convexHull[convexIndex-2] - pLocal;\n");
	}
	append("/* Updating convex hull END */\n");
	//decrementIndent();
}
