thysol
BIOS-Overclocker(in)
Hi,
Ich habe einen OpenCL GPU Benchmark geschrieben der allerdings sehr schlecht auf Radeons laeuft. Wie nutze ich die 4/5D Shader aktueller Radeons? Momentan habe ich nur SIMD code:
Was muss ich aendern damit die 4/5D Shader aktueller Radeons voll ausgelastet werden?
Ich habe einen OpenCL GPU Benchmark geschrieben der allerdings sehr schlecht auf Radeons laeuft. Wie nutze ich die 4/5D Shader aktueller Radeons? Momentan habe ich nur SIMD code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Threading;
using Cloo;
namespace CPU_Benchmark__Single_threaded_
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
Random RandomClass = new Random();
System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
sw.Start();
double a;
int numPlats = ComputePlatform.Platforms.Count;
ComputeContextPropertyList Properties = new ComputeContextPropertyList(ComputePlatform.Platforms[0]); ComputeContext Context = new ComputeContext(ComputeDeviceTypes.All, Properties, null, IntPtr.Zero);
string vecSum = @"
__kernel void
orbit(__global float * v1,
__global float * v2,
__global float * v3,
__global float * v4,
__global float * v5,
__global float * v6,
__global float * v7,
__global float * v8)
{
int i = get_global_id(0);
int b = 0;
while (b < 1000)
{
v4 = v3 / (2 * 398600500000000 / (v3 * (v1 * v1)) - 1);
v5 = v3 * (v1 * v1) / 398600500000000 - 1;
v6 = v4 / (1 + v5);
v7 = v5 * v6;
v6 = v6 * v6;
v7 = v7 * v7;
v8 = v6 - v7;
v8 = sqrt( v8 );
b++;
}
}";
List<ComputeDevice> Devs = new List<ComputeDevice>();
Devs.Add(ComputePlatform.Platforms[0].Devices[0]);
Devs.Add(ComputePlatform.Platforms[0].Devices[1]);
ComputeProgram prog = null;
try
{
prog = new ComputeProgram(Context, vecSum); prog.Build(Devs, "", null, IntPtr.Zero);
}
catch
{ }
ComputeKernel kernelVecSum = prog.CreateKernel("orbit");
ICollection<ComputeKernel> Kernels = prog.CreateAllKernels();
foreach (ComputeKernel k in Kernels)
{
}
int n = 1000000;
float[] v1 = new float[n], v2 = new float[n], v3 = new float[n], v4 = new float[n], v5 = new float[n], v6 = new float[n], v7 = new float[n], v8 = new float[n];
for (int i = 0; i < n; i++)
{
v1 = RandomClass.Next(1000, 100000);
v2 = RandomClass.Next(200000, 10000000);
v3 = 6378140 + v2;
}
ComputeBuffer<float> bufV1 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v1);
ComputeBuffer<float> bufV2 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v2);
ComputeBuffer<float> bufV3 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v3);
ComputeBuffer<float> bufV4 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v4);
ComputeBuffer<float> bufV5 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v5);
ComputeBuffer<float> bufV6 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v6);
ComputeBuffer<float> bufV7 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v7);
ComputeBuffer<float> bufV8 = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, v8);
kernelVecSum.SetMemoryArgument(0, bufV1);
kernelVecSum.SetMemoryArgument(1, bufV2);
kernelVecSum.SetMemoryArgument(2, bufV3);
kernelVecSum.SetMemoryArgument(3, bufV4);
kernelVecSum.SetMemoryArgument(4, bufV5);
kernelVecSum.SetMemoryArgument(5, bufV6);
kernelVecSum.SetMemoryArgument(6, bufV7);
kernelVecSum.SetMemoryArgument(7, bufV8);
ComputeCommandQueue Queue = new ComputeCommandQueue(Context, ComputePlatform.Platforms[0].Devices[0], ComputeCommandQueueFlags.None);
Queue.Execute(kernelVecSum, null, new long[] { v1.Length }, null, null);
v8 = Queue.Read<float>(bufV8, null);
sw.Stop();
a = sw.Elapsed.TotalSeconds;
label1.Text = a.ToString();
}
}
}
Was muss ich aendern damit die 4/5D Shader aktueller Radeons voll ausgelastet werden?