library SIMD;

uses	Windows, Classes, ContNrs, Exentia, Exentia64;


{$R *.res}

type
		TSSEHandle					= pointer;

      // function prototypes
      TSSEVectorProc				= procedure(V:TFVector) of object;
      TSSEDualVectorProc  		= procedure(U,V:TFVector) of object;
      TSSEDualVectorExProc  	= procedure(U,V:TFVector;const StartIndex,Count:integer) of object;
      TSSEVectorSingleFunc		= function(V:TFVector):single of object;

var	FSSEVectorList		: TObjectList;
		FSSE2VectorList	: TObjectList;
		FSSESorted			: boolean;

//---------------------------------------------------------------------
// List help functions
//---------------------------------------------------------------------

function SortVectorList(Item1,Item2:Pointer): Integer;
begin
	if Cardinal(Item1)<Cardinal(Item2)
   	then Result:=-1
      else if Cardinal(Item1)>Cardinal(Item2)
      	then Result:=1
         else Result:=0;
end;

//---------------------------------------------------------------------

function FastIndexOf(P:Pointer):Integer;
var	L,H,I : Integer;
		Found	: boolean;
begin
	if FSSESorted then
   begin
      Found:=false;
      L := 0;
      H := FSSEVectorList.Count - 1;
      while L <= H do
      begin
         I := (L + H) shr 1;
         if Cardinal(FSSEVectorList[i])<Cardinal(P)
            then L := I + 1
            else
            begin
               H := I - 1;
               if Cardinal(FSSEVectorList[i])=Cardinal(P)
                  then Found:=True;
            end;
      end;
      if Found
         then Result:=L
         else Result:=-1;
   end
   else Result:=FastIndexOf(P);
end;

//---------------------------------------------------------------------
// SSE help functions
//---------------------------------------------------------------------

function SSE_VectorIntegerFunction(V:TSSEHandle;VProc:TSSEVectorProc):integer;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(V));
   if (Index>=0) then
   begin
     	VProc(TFVector(V));
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_VectorSingleFunction(V:TSSEHandle;VProc:TSSEVectorSingleFunc;var Res:single):integer;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(V));
   if (Index>=0) then
   begin
     	Res:=VProc(TFVector(V));
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualVectorIntegerFunction(V1,V2:TSSEHandle;VProc:TSSEDualVectorProc):integer;
var	V1Index,V2Index	: integer;
begin
   V1Index:=FastIndexOf(TFVector(V1));
   V2Index:=FastIndexOf(TFVector(V2));
   if (V1Index>=0) and (V2Index>=0) then
   begin
     	VProc(TFVector(V1),TFVector(V2));
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualVectorExIntegerFunction(V1,V2:TSSEHandle;StartIndex,Count:integer;VProc:TSSEDualVectorExProc):integer;
var	V1Index,V2Index	: integer;
begin
   V1Index:=FastIndexOf(TFVector(V1));
   V2Index:=FastIndexOf(TFVector(V2));
   if (V1Index>=0) and (V2Index>=0) then
   begin
     	VProc(TFVector(V1),TFVector(V2),StartIndex,Count);
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------
//	SSE functions
//---------------------------------------------------------------------

function SSE_CreateVector(Count:integer;Data:PSingleArray):TSSEHandle; stdcall; export;
var	SSEVector	: TFVector;
begin
	SSEVector:=TFVector.BuildUsingAlignedArrayPart(Data,Count);
   FSSEVectorList.Add(SSEVector);
   Result:=TSSEHandle(SSEVector);
   if FSSEVectorList.Count>100 then
   begin
   	FSSEVectorList.Sort(@SortVectorList);
      FSSESorted:=true;
   end;
end;

//---------------------------------------------------------------------

function SSE2_CreateVector(Count:integer;Data:PDoubleArray):TSSEHandle; stdcall; export;
begin
   Result:=nil;
end;

//---------------------------------------------------------------------

procedure SSE_FreeVector(aHandle:TSSEHandle); stdcall; export;
var	aIndex	: integer;
begin
	aIndex:=FastIndexOf(TFVector(aHandle));
	if aIndex>=0
   	then FSSEVectorList.Delete(aIndex)
      else
      begin
      	aIndex:=FastIndexOf(TFVector(aHandle));
			if aIndex>=0
   			then FSSE2VectorList.Delete(aIndex);
      end;
end;

//---------------------------------------------------------------------

function SSE_VectorData(aHandle:TSSEHandle;var Count:integer):PSingleArray stdcall; export;
var	aIndex	: integer;
begin
	aIndex:=FastIndexOf(TFVector(aHandle));
	if aIndex>=0 then
   begin
   	Count:=TFVector(aHandle).Length;
      Result:=TFVector(aHandle).DataArray;
   end
   else
   begin
   	Count:=0;
      Result:=nil;
   end;
end;

//---------------------------------------------------------------------

function SSE_AddVector(Source,V:TSSEHandle):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorIntegerFunction(V,TFVector(Source).Add)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_SubtractVector(Source,V:TSSEHandle):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorIntegerFunction(V,TFVector(Source).Sub)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_MultiplyVector(Source,V:TSSEHandle):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorIntegerFunction(V,TFVector(Source).Mul)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DivideVector(Source,V:TSSEHandle):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorIntegerFunction(V,TFVector(Source).Divide)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_AddSquare(Source,V:TSSEHandle):integer; stdcall; export;   // Source = Source + V^2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorIntegerFunction(V,TFVector(Source).AddSquare)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_InnerSum(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Sum(Source[i])
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).InnerSum;
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_InnerSumAbs(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Sum(abs(Source[i]))
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).InnerSumAbs;
      Result:=0;
   end
	else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DotProduct(Source,V:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Sum(Source[i] * V[i])
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=SSE_VectorSingleFunction(V,TFVector(Source).DotProduct,Res);
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_SqrVector(Source,V:TSSEHandle):integer; stdcall; export;  // Source = Sqr(V)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorIntegerFunction(V,TFVector(Source).Sqr)
	   else Result:=0;
end;

//---------------------------------------------------------------------

function SSE_MaxValue(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Max(Source[i])
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).MaxValue;
      Result:=0;
   end
   else Result:=0;
end;

//---------------------------------------------------------------------

function SSE_MaxAbsValue(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Max(abs(Source[i]))
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).MaxAbsValue;
      Result:=0;
   end
   else Result:=0;
end;

//---------------------------------------------------------------------

function SSE_IndexMaxValue(Source:TSSEHandle;var Res:integer):integer; stdcall; export;  // result = index of Max(Source)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).IndexMaxValue;
      Result:=0;
   end
   else Result:=0;
end;

//---------------------------------------------------------------------

function SSE_MinValue(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Min(Source[i])
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).MinValue;
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_MinAbsValue(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Min(abs(Source[i]))
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).MinAbsValue;
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_IndexMinValue(Source:TSSEHandle;var Res:integer):integer; stdcall; export;  // result = index of Min(Source)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).IndexMinValue;
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_MeanValue(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Sum(Source[i]) / NumElements
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).Mean;
      Result:=0;
   end
   else Result:=0;
end;

//---------------------------------------------------------------------

function SSE_Scale(Source:TSSEHandle;Factor:single):integer; stdcall; export;  // Source = value * Source
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	TFVector(Source).Scale(Factor);
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualAdd(Source,V1,V2:TSSEHandle):integer; stdcall; export;  // Source := V1 + V2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).Add)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualAddEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;  // Source = V1 + V2; applies for elements [StartInd] ... [StartInd + Count -1]
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).Add)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualSub(Source,V1,V2:TSSEHandle):integer; stdcall; export;  // Source := V1 - V2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).Sub)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualSubEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).Sub)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMul(Source,V1,V2:TSSEHandle):integer; stdcall; export;  // Source := V1*V2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).Mul)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMulEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).Mul)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualDivide(Source,V1,V2:TSSEHandle):integer; stdcall; export; // Source := V1/V2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).Divide)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualDivideEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).Divide)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMulDiv(Source,V1,V2:TSSEHandle):integer; stdcall; export;  // Source = Source*V1 / V2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).MulDiv)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualAddSquare(Source,V1,V2:TSSEHandle):integer; stdcall; export;   // Source = V1^2 + V2^2
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).AddSquare)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualAddSquareEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).AddSquare)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMax(Source,V1,V2:TSSEHandle):integer; stdcall; export;  // Source = Max(Source, V)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).Max)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMaxEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;  // Source = Max(Source, V)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).Max)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMin(Source,V1,V2:TSSEHandle):integer; stdcall; export;  // Source = Min(V1, V2)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorIntegerFunction(V1,V2,TFVector(Source).Min)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualMinEx(Source,V1,V2:TSSEHandle;const xStartInd,xCount:integer):integer; stdcall; export;  // Source = Min(V1, V2)
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_DualVectorExIntegerFunction(V1,V2,xStartInd,xCount,TFVector(Source).Min)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_Add(Source:TSSEHandle;Value:single):integer; stdcall; export; // Source = Source + value
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	TFVector(Source).Add(Value);
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualCombine(Source,V1,V2:TSSEHandle;xA,xB:single):integer; stdcall; export; // Source := A*V1 + B*V2
var	Index,V1Index,V2Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   V1Index:=FastIndexOf(TFVector(V1));
   V2Index:=FastIndexOf(TFVector(V2));
   if (Index>=0) and (V1Index>=0) and (V2Index>=0) then
   begin
   	TFVector(Source).Combine(TFVector(V1),TFVector(V2),xA,xB);
      Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_CombineVector(Source,V:TSSEHandle;xA:single):integer; stdcall; export; // Source := Source + A*V1
var	Index,VIndex	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   VIndex:=FastIndexOf(TFVector(V));
   if (Index>=0) and (VIndex>=0) then
   begin
   	TFVector(Source).Combine(TFVector(V),xA);
   	Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_LerpVector(Source,V:TSSEHandle;xT:single):integer; stdcall; export;       // Source := Source + (V - Source) * t;
var	Index,VIndex	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   VIndex:=FastIndexOf(TFVector(V));
   if (Index>=0) and (VIndex>=0) then
   begin
   	TFVector(Source).Lerp(TFVector(V),xT);
   	Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_DualLerp(Source,V1,V2:TSSEHandle;xT:single):integer; stdcall; export;       // Source := V1 + (V2 - V1) * t;
var	Index,V1Index,V2Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   V1Index:=FastIndexOf(TFVector(V1));
   V2Index:=FastIndexOf(TFVector(V2));
   if (Index>=0) and (V1Index>=0) and (V2Index>=0) then
   begin
   	TFVector(Source).Lerp(TFVector(V1),TFVector(V2),xT);
   	Result:=0;
   end
   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_InnerSqrSum(Source:TSSEHandle;var Res:single):integer; stdcall; export;  // result = Sum(sqr(Source[i]))
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0) then
   begin
   	Res:=TFVector(Source).InnerSqrSum;
      Result:=0;
   end
   else Result:=0;
end;

//---------------------------------------------------------------------

function SSE_NormL1(Source,V:TSSEHandle;var Res:single):integer; stdcall; export; // result = Sum(abs(Source[i] - V[i])
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorSingleFunction(V,TFVector(Source).NormL1,Res)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

function SSE_NormL2(Source,V:TSSEHandle;var Res:single):integer; stdcall; export; // result = Sun(sqr(Source[i] - V[i]);
var	Index	: integer;
begin
   Index:=FastIndexOf(TFVector(Source));
   if (Index>=0)
   	then Result:=SSE_VectorSingleFunction(V,TFVector(Source).NormL2,Res)
	   else Result:=-1;
end;

//---------------------------------------------------------------------

exports
	SSE_CreateVector,
	SSE_FreeVector,
   SSE_VectorData,
	SSE_AddVector,
   SSE_SubtractVector,
	SSE_MultiplyVector,
	SSE_DivideVector,
	SSE_AddSquare,
	SSE_InnerSum,
	SSE_InnerSumAbs,
	SSE_DotProduct,
	SSE_MaxValue,
	SSE_MaxAbsValue,
	SSE_IndexMaxValue,
	SSE_MinValue,
	SSE_MinAbsValue,
	SSE_IndexMinValue,
	SSE_MeanValue,
	SSE_Scale,
	SSE_DualAdd,
	SSE_DualAddEx,
	SSE_DualSub,
	SSE_DualSubEx,
	SSE_DualMul,
	SSE_DualMulEx,
	SSE_DualDivide,
	SSE_DualDivideEx,
	SSE_DualMulDiv,
   SSE_SqrVector,
   SSE_DualAddSquare,
   SSE_DualAddSquareEx,
   SSE_DualMax,
   SSE_DualMaxEx,
   SSE_DualMin,
   SSE_DualMinEx,
   SSE_Add,
   SSE_DualCombine,
   SSE_CombineVector,
   SSE_LerpVector,
   SSE_DualLerp,
   SSE_InnerSqrSum,
   SSE_NormL1,
   SSE_NormL2;

//---------------------------------------------------------------------

procedure DLLEntryPoint(Reason: Integer);
begin
	case Reason of
		Dll_Process_Attach:
		begin
			FSSEVectorList:=TObjectList.Create;
			FSSE2VectorList:=TObjectList.Create;
         FSSESorted:=false;
		end; // of Dll_Process_Attach:
		Dll_Process_Detach:
		begin
			FSSEVectorList.Free;
			FSSE2VectorList.Free;
		end;
	end; // of case
end;

//---------------------------------------------------------------------

begin
	if not assigned(DllProc) then
	begin
		DllProc := @DLLEntryPoint;
		DllEntryPoint(Dll_Process_Attach);
  end;
end.
