<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" version="2.0">
<channel>
 
  <title>Khronos.org Message Boards</title>
  <link>http://www.khronos.org/message_boards</link>
  <description>Public discussions about the Khronos Dynamic Media APIs</description>
  <language>en</language>
  <copyright>  Khronos.org Message Boards</copyright>     
  <managingEditor>webmaster@khronos.org (Khronos.org Message Boards)</managingEditor>  
   <generator>Khronos.org Message Boards</generator>
  <ttl>1</ttl>  

                                  <atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/khr-message-board-topics" /><feedburner:info uri="khr-message-board-topics" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><feedburner:emailServiceId>khr-message-board-topics</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><item>
                                  <title>Confusion on output</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/EzjSTnU4PHQ/viewtopic.php</link>
                                  <description>by mathuin (Posted Thu Feb 09, 2012 11:52 am)&lt;br/&gt;I was able to solve my own problem.  My main trouble was argument agreement -- don't send an integer when a float is expected, et cetera.  The next step is to figure out how to handle arbitrarily large arrays, but that will be a different thread if necessary.&lt;br /&gt;&lt;br /&gt;Here is the resulting code:&lt;br /&gt;&lt;br /&gt;First the OpenCL code:&lt;br /&gt;&lt;br /&gt;&lt;div class="codetitle"&gt;&lt;b&gt;Code:&lt;/b&gt;&lt;/div&gt;&lt;div class="codecontent"&gt;struct point {&lt;br /&gt;&amp;nbsp; int x;&lt;br /&gt;&amp;nbsp; int z;&lt;br /&gt;};&lt;br /&gt;&lt;br /&gt;__kernel void nearest(__global struct point *coords, __global int *values, __global struct point *base, __global int *output, const unsigned int lencoords) {&lt;br /&gt;&amp;nbsp; // base index&lt;br /&gt;&amp;nbsp; int g_dataset_id = get_global_id(0);&lt;br /&gt;&lt;br /&gt;&amp;nbsp; int dmin = -1;&lt;br /&gt;&amp;nbsp; int d, dx, dz;&lt;br /&gt;&lt;br /&gt;&amp;nbsp; for (int i=0; i&amp;lt;lencoords; i++) {&lt;br /&gt;&amp;nbsp; &amp;nbsp; dx = coords&amp;#91;i&amp;#93;.x-base&amp;#91;g_dataset_id&amp;#93;.x;&lt;br /&gt;&amp;nbsp; &amp;nbsp; dx = dx * dx;&lt;br /&gt;&amp;nbsp; &amp;nbsp; dz = coords&amp;#91;i&amp;#93;.z-base&amp;#91;g_dataset_id&amp;#93;.z;&lt;br /&gt;&amp;nbsp; &amp;nbsp; dz = dz * dz;&lt;br /&gt;&amp;nbsp; &amp;nbsp; d = dx + dz;&lt;br /&gt;&amp;nbsp; &amp;nbsp; if (dmin == -1 || d &amp;lt; dmin) {&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; output&amp;#91;g_dataset_id&amp;#93; = values&amp;#91;i&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; dmin = d;&lt;br /&gt;&amp;nbsp; &amp;nbsp; }&lt;br /&gt;&amp;nbsp; }&lt;br /&gt;}&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Now the Python code:&lt;br /&gt;&lt;br /&gt;&lt;div class="codetitle"&gt;&lt;b&gt;Code:&lt;/b&gt;&lt;/div&gt;&lt;div class="codecontent"&gt;import pyopencl as cl&lt;br /&gt;import numpy&lt;br /&gt;from itertools import product&lt;br /&gt;from random import randint, uniform&lt;br /&gt;&lt;br /&gt;class CL:&lt;br /&gt;&amp;nbsp; &amp;nbsp; def __init__(self):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.ctx = cl.create_some_context()&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.queue = cl.CommandQueue(self.ctx)&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; def loadProgram(self, filename):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; #read in the OpenCL source file as a string&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; f = open(filename, 'r')&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fstr = &amp;quot;&amp;quot;.join(f.readlines())&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; #print fstr&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; #create the program&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.program = cl.Program(self.ctx, fstr).build()&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; def popCorn(self):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; mf = cl.mem_flags&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; #initialize client side (CPU) arrays&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.xsize = 256&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.zsize = 256&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.lenbase = self.xsize * self.zsize&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.lencoords = max(1, int(self.lenbase*0.05))&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.coords = numpy.array(&amp;#91;(randint(0, self.xsize-1),randint(0, self.xsize-1)) for elem in xrange(self.lencoords)&amp;#93;, dtype=numpy.int32)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.values = numpy.array(&amp;#91;uniform(1,5) for elem in xrange(self.lencoords)&amp;#93;, dtype=numpy.int32)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.base = numpy.array(&amp;#91;(index / self.zsize, index % self.zsize) for index in xrange(self.lenbase)&amp;#93;, dtype=numpy.int32)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.output = numpy.zeros((self.lenbase), dtype=numpy.int32)&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; #create OpenCL buffers&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.coords_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.coords)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.values_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.values)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.base_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.base)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.output_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, size=self.output.nbytes)&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; def execute(self):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.program.nearest(self.queue, self.base.shape, None, self.coords_buf, self.values_buf, self.base_buf, self.output_buf, numpy.int32(self.lencoords))&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.results = numpy.empty_like(self.output)&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; cl.enqueue_read_buffer(self.queue, self.output_buf, self.results).wait()&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; self.results.resize((self.zsize, self.xsize))&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; def dumpit(self):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for xind in xrange(self.xsize):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; line = ''&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for zind in xrange(self.zsize):&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; line += str(int(self.results&amp;#91;xind,zind&amp;#93;))&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; print line&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&lt;br /&gt;if __name__ == &amp;quot;__main__&amp;quot;:&lt;br /&gt;&amp;nbsp; &amp;nbsp; example = CL()&lt;br /&gt;&amp;nbsp; &amp;nbsp; example.loadProgram(&amp;quot;nearest.cl&amp;quot;)&lt;br /&gt;&amp;nbsp; &amp;nbsp; example.popCorn()&lt;br /&gt;&amp;nbsp; &amp;nbsp; example.execute()&lt;br /&gt;&amp;nbsp; &amp;nbsp; example.dumpit()&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4740"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/EzjSTnU4PHQ" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 11:52 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4740#p13932</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4740#p13932</feedburner:origLink></item>
                                  <item>
                                  <title>Cross-device bandwidth for discrete GPU (HD 5870)</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/EXkJysoOs0s/viewtopic.php</link>
                                  <description>by Cadorino (Posted Thu Feb 09, 2012 11:41 am)&lt;br/&gt;Hi,&lt;br /&gt;I'm testing a system equipped with a Fusion A8-3850 and an HD 5870 gpu. I was planning to test the memory access bandwidth in the following cases:&lt;br /&gt; &lt;br /&gt;1) The discrete GPU (HD 5870) reads from a buffer allocated in the host memory (CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY)&lt;br /&gt;2) The integrated GPU (6550D) reads from a buffer allocated in the host memory (CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY)&lt;br /&gt; &lt;br /&gt;Reads are performed linearly (each thread reads a fixed-size memory range starting from its own global index).&lt;br /&gt;&lt;br /&gt;I was assuming that the result of the first test (discrete gpu) would have never been higher than the PCI-express bandwidth (approx 8GB/s), but I'm getting a bandwidth that is around 40 GB/s.&lt;br /&gt;I'm checking the bandwidth by using both the GlobalMemoryTest sample shipped with the AMD SDK and a program written by myself. The results are very similar.&lt;br /&gt; &lt;br /&gt;Can you explain me if it is (and why it is) possible to get a cross-domain (gpu-&amp;gt;cpu) read bandwidth higher than the PCI one from a discrete GPU?.&lt;br /&gt; &lt;br /&gt;Thank you very much!&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4744"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/EXkJysoOs0s" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 11:41 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4744#p13931</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4744#p13931</feedburner:origLink></item>
                                  <item>
                                  <title>CPU + GPU processing of same image</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/ENHDjWiaVFE/viewtopic.php</link>
                                  <description>by Photovore (Posted Thu Feb 09, 2012 11:01 am)&lt;br/&gt;When I tried it, it was pretty easy and well worth the effort.&lt;br /&gt;&lt;br /&gt;If it's working on CL_DEVICE_TYPE_GPU, the same code should work on the cpu if you specify CL_DEVICE_TYPE_CPU.  Then make two contexts, two command queues, two programs, etc; just do everything twice, once for each.  I read the data back to a big array in ram, so I just figure out the offset into that array when I read back the bottom half.  I also send in a parameter so each kernel knows where it is.  (I also do a 3-way, cpu+big gpu+little gpu, to try to get just a little more performance.)&lt;br /&gt;&lt;br /&gt;What I don't know about is how you could merge the CPU half of the data up onto the graphics card itself, to display with opengl, without having to copy the gpu's data off and back on.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4713"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/ENHDjWiaVFE" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 11:01 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4713#p13930</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4713#p13930</feedburner:origLink></item>
                                  <item>
                                  <title>Real-time raytracing -- Is MIMD req'd?</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/otZGYycNSbo/viewtopic.php</link>
                                  <description>by Nick Wiggill (Posted Thu Feb 09, 2012 10:23 am)&lt;br/&gt;(Rest the latter's soul.) Thanks for the input. These are new concepts to me (I've heard vaguely of branchless logic though I don't grasp the underlying mechanism yet. Any pointers on that would help.&lt;br /&gt;&lt;br /&gt;This is why I mentioned books; I feel there are a lot of blanks I need to fill in.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4723"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/otZGYycNSbo" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 10:23 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4723#p13929</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4723#p13929</feedburner:origLink></item>
                                  <item>
                                  <title>Command queue goes invalid after kernel execution</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/k5yjRl-hPEk/viewtopic.php</link>
                                  <description>by Photovore (Posted Thu Feb 09, 2012 9:53 am)&lt;br/&gt;&lt;div class="quotetitle"&gt;&lt;b&gt;Quote:&lt;/b&gt;&lt;/div&gt;&lt;div class="quotecontent"&gt;...invalid command queue usually means your code has crashed on the device. / Without the complete source, it's anyone's guess as to why.&lt;/div&gt;&lt;br /&gt;Even _with_ the complete source, for my ~2000 lines of code I ultimately used the &amp;quot;méthode tedieuse&amp;quot; -- comment out huge chunks, verify that it runs.  Uncomment out something, try again, till &amp;quot;command queue invalid&amp;quot; occurs.  Of that chunk you've just uncommented, subdivide it and comment out bits of that.  Find culprit and fix.  On to next section.  Tedious as I have many helper subroutines, and the strangest things seemed to set it off.&lt;br /&gt;Of course, this was my first attempt at opencl; I had a project I'd worked on for decades that was a good candidate and I just dove into converting it.  Made it work first, optimized later.&lt;br /&gt;&lt;br /&gt;&lt;div class="quotetitle"&gt;&lt;b&gt;Quote:&lt;/b&gt;&lt;/div&gt;&lt;div class="quotecontent"&gt;(op on advice re: possibly running off the end of an array)&lt;/div&gt;&lt;br /&gt;Make sure that it doesn't!  One of those was the final gotcha for me; kernel would run fine for a random number of seconds, then command queue invalid.  Was running off the end of an array only occasionally.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4704"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/k5yjRl-hPEk" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 9:53 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4704#p13927</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4704#p13927</feedburner:origLink></item>
                                  <item>
                                  <title>Is OpenCL can be used here?</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/hC8B_DcZmh8/viewtopic.php</link>
                                  <description>by notzed (Posted Thu Feb 09, 2012 6:51 am)&lt;br/&gt;&lt;div class="quotetitle"&gt;lloydkl wrote:&lt;/div&gt;&lt;div class="quotecontent"&gt;Thanks. The errors are only due to the lack of my knowledge in OpenCL. I hope I can learn and fix them. &lt;br /&gt;&lt;br /&gt;My suspicion was also about the efficiency - as in my present code there is large amount of memory reallocation. As I mentioned earlier there is a large file, I have 'memory mapped&amp;quot; this file in the host (CPU) program (only file reading is needed). To pass this data (memory buffer) to OpenCL I again have created memory buffers for OpenCL, then copied data...&lt;br /&gt;&lt;br /&gt;I didn't feel it as &amp;quot;efficient&amp;quot;. I feel there must a better mechanism to &amp;quot;share&amp;quot; data between host and GPU without reallocation, isn't it?&lt;br /&gt;&lt;br /&gt;Thanks,&lt;br /&gt;  Lloyd&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;I'm not sure why you'd need to 'reallocate', unless it's part of the algorithm.&lt;br /&gt;&lt;br /&gt;No matter what you do the data still has to get to the cpu or the gpu, mmap still needs to read the disk, but it has no idea how the application will use the data so has to guess (read-ahead).  If you're just streaming data, then you know the precise access patten so can read-ahead yourself both accurately and trivially: it should be possible to beat (or at least equal) mmap since the latency is from the disk access and not from the memcpy's.&lt;br /&gt;&lt;br /&gt;Since the CPU is just shunting data around and GPU is doing the work, it's not like you're saving the cpu cycles for processing either.&lt;br /&gt;&lt;br /&gt;For streaming in opencl you'd just allocate a few buffers and use them cyclically ('multi-buffer'), loading the next one while the current one is being sent to the gpu and so on.&lt;br /&gt;&lt;br /&gt;This multi-buffer approach is very efficient and can hide the i/o and bus latencies.  Assuming the processing takes longer than the PCI transfers, the cpu should just be queuing work and spending most of it's time waiting around for the GPU to finish the currently-oldest buffer queued.  And if it takes more time than the disk i/o, the disk reads should be finished by the time the gpu is ready for the data too.&lt;br /&gt;&lt;br /&gt;Obviously you have to 'copy' the data to the gpu device, since it uses different memory (unless you're using an APU, in which case the 'copy' functions do nothing).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4741"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/hC8B_DcZmh8" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 6:51 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4741#p13924</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4741#p13924</feedburner:origLink></item>
                                  <item>
                                  <title>Face/Object detection - are they OpenMAX IL components?</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/yamdzBATgHY/viewtopic.php</link>
                                  <description>by ngnir (Posted Thu Feb 09, 2012 6:16 am)&lt;br/&gt;We are developing a face detection lib.&lt;br /&gt;&lt;br /&gt;Is this kind of lib has a place in the OpenMAX IL?&lt;br /&gt;If so, under which component class?&lt;br /&gt;Which roles?&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Thanks&lt;br /&gt;&lt;br /&gt;Nir&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=31&amp;amp;t=4743"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/yamdzBATgHY" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 6:16 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=31&amp;t=4743#p13923</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=31&amp;t=4743#p13923</feedburner:origLink></item>
                                  <item>
                                  <title>doubts with work items and groups</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/Kx48gGTEtAs/viewtopic.php</link>
                                  <description>by mustang (Posted Thu Feb 09, 2012 5:42 am)&lt;br/&gt;Thank you again!!! And regarding execution, I have an nvidia card (it is a 9400/ION), is it correct that each work group is executed by only one CUDA core (in test deviceQuery it says that it has 2*8=16 CUDA cores) but each CUDA core may execute more than 1 work group as there is no limits for the amount of them?&lt;br /&gt;thank you very much for paying attention to my questions which I guess are very basic!!&lt;br /&gt;&lt;br /&gt;Pablo&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4729"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/Kx48gGTEtAs" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 5:42 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4729#p13922</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4729#p13922</feedburner:origLink></item>
                                  <item>
                                  <title>problem environment mapping using samplerCube</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/PXs4qnW33-8/viewtopic.php</link>
                                  <description>by xcode_dave (Posted Thu Feb 09, 2012 2:31 am)&lt;br/&gt;Hi everyone,&lt;br /&gt;&lt;br /&gt;I'm having a strange issue with environment mapping by using samplerCube for reflections (on an iPhone 4).&lt;br /&gt;&lt;br /&gt;I don't think the issue is with my shader code because I've tested the output without the samplerCube (by using the reflection vector as the gl_FragColor), and it seems to output as I'd expect.&lt;br /&gt;&lt;br /&gt;When I try using the samplerCube though, all fragments are just output as black colour.&lt;br /&gt;&lt;br /&gt;Here is my shader code, first of all the vertex shader:&lt;br /&gt;&lt;br /&gt;&lt;div class="codetitle"&gt;&lt;b&gt;Code:&lt;/b&gt;&lt;/div&gt;&lt;div class="codecontent"&gt;//&amp;nbsp; Shader.vsh&lt;br /&gt;&lt;br /&gt;attribute vec4 position;&lt;br /&gt;attribute vec3 normal;&lt;br /&gt;&lt;br /&gt;uniform mat4 Projection;&lt;br /&gt;uniform mat4 Modelview;&lt;br /&gt;uniform mat3 Model;&lt;br /&gt;uniform vec3 EyePosition;&lt;br /&gt;&lt;br /&gt;varying vec3 ReflectDir;&lt;br /&gt;&lt;br /&gt;void main()&lt;br /&gt;{&lt;br /&gt;&amp;nbsp; &amp;nbsp; gl_Position = Projection * Modelview * position;&lt;br /&gt;&amp;nbsp; &amp;nbsp; // Compute eye direction in object space:&lt;br /&gt;&amp;nbsp; &amp;nbsp; highp vec3 eyeDir = normalize(position.xyz - EyePosition);&lt;br /&gt;&lt;br /&gt;&amp;nbsp; &amp;nbsp; // Reflect eye direction over normal and transform to world space:&lt;br /&gt;&amp;nbsp; &amp;nbsp; ReflectDir = Model * reflect(eyeDir, normal);&lt;br /&gt;}&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;And here is my fragment shader:&lt;br /&gt;&lt;br /&gt;&lt;div class="codetitle"&gt;&lt;b&gt;Code:&lt;/b&gt;&lt;/div&gt;&lt;div class="codecontent"&gt;//&amp;nbsp; Shader.fsh&lt;br /&gt;varying highp vec3 ReflectDir;&lt;br /&gt;uniform samplerCube cubeMap;&lt;br /&gt;&lt;br /&gt;void main()&lt;br /&gt;{&lt;br /&gt;&amp;nbsp; &amp;nbsp; gl_FragColor = vec4(ReflectDir.x, ReflectDir.y, ReflectDir.z, 1.0);&lt;br /&gt;&amp;nbsp; &amp;nbsp; //gl_FragColor = textureCube(cubeMap, ReflectDir);&lt;br /&gt;}&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In my fragment shader, you can see I've commented out the line causing the issue which tries to use the reflection direction vector to sample the cube map.&lt;br /&gt;Above it, you can see my debug code which is using the reflection direction as a colour (to visually debug).&lt;br /&gt;&lt;br /&gt;I get no errors during build or run, just the wrong output.&lt;br /&gt;&lt;br /&gt;To load my cube map, I'm using GLKit for convenience. Here is my code for that:&lt;br /&gt;&lt;br /&gt;&lt;div class="codetitle"&gt;&lt;b&gt;Code:&lt;/b&gt;&lt;/div&gt;&lt;div class="codecontent"&gt;//setup the cube map&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSDictionary *options = &amp;#91;NSDictionary dictionaryWithObject:&amp;#91;NSNumber numberWithBool:YES&amp;#93; forKey:GLKTextureLoaderGenerateMipmaps&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSString *rightTex = &amp;#91;&amp;#91;NSBundle mainBundle&amp;#93; pathForResource:@&amp;quot;right&amp;quot; ofType:@&amp;quot;png&amp;quot;&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSString *leftTex = &amp;#91;&amp;#91;NSBundle mainBundle&amp;#93; pathForResource:@&amp;quot;left&amp;quot; ofType:@&amp;quot;png&amp;quot;&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSString *topTex = &amp;#91;&amp;#91;NSBundle mainBundle&amp;#93; pathForResource:@&amp;quot;top&amp;quot; ofType:@&amp;quot;png&amp;quot;&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSString *bottomTex = &amp;#91;&amp;#91;NSBundle mainBundle&amp;#93; pathForResource:@&amp;quot;down&amp;quot; ofType:@&amp;quot;png&amp;quot;&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSString *frontTex = &amp;#91;&amp;#91;NSBundle mainBundle&amp;#93; pathForResource:@&amp;quot;front&amp;quot; ofType:@&amp;quot;png&amp;quot;&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSString *backTex = &amp;#91;&amp;#91;NSBundle mainBundle&amp;#93; pathForResource:@&amp;quot;back&amp;quot; ofType:@&amp;quot;png&amp;quot;&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSError *error;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; cubeMapInfo = &amp;#91;GLKTextureLoader cubeMapWithContentsOfFiles:&amp;#91;NSArray arrayWithObjects:rightTex, leftTex, topTex, bottomTex, frontTex, backTex, nil&amp;#93; options:options error:&amp;amp;error&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if (error!=nil){&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSLog(@&amp;quot;ERROR: %@&amp;quot;, error.localizedDescription);&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSLog(@&amp;quot;Failure reason: %@&amp;quot;, error.description);&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSLog(@&amp;quot;Error code: %i&amp;quot;, error.code);&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; } else {&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSLog(@&amp;quot;No error!&amp;quot;);&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;#91;cubeMapInfo retain&amp;#93;;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; NSLog(@&amp;quot;Cube map name: %i&amp;quot;, cubeMapInfo.name);&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; glUniform1i(uniforms&amp;#91;UNIFORM_CUBE_MAP&amp;#93;, cubeMapInfo.name);&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;I'm not sure how I can debug further or what to do next. If anyone has any idea then your help would be much appreciated.&lt;br /&gt;&lt;br /&gt;Thanks!&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=9&amp;amp;t=4742"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/PXs4qnW33-8" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Thu Feb 09, 2012 2:31 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=9&amp;t=4742#p13915</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=9&amp;t=4742#p13915</feedburner:origLink></item>
                                  <item>
                                  <title>OpenWF - C  on Android</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/Nr2dT4lImE4/viewtopic.php</link>
                                  <description>by lremes (Posted Wed Feb 08, 2012 11:54 pm)&lt;br/&gt;I'm not aware of any public attempts to port and integrate OpenWF to Android platform.&lt;br /&gt;I haven't taken a closer look, but I assume you would somehow need to integrate OpenWF below SurfaceFlinger in the graphics stack.&lt;br /&gt;&lt;br /&gt;Please note that the Sample Implementation does not use HW acceleration at all. If you want to HW accelerate the composition, for example, using OpenGL ES, you need to write a new backend.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=48&amp;amp;t=4733"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/Nr2dT4lImE4" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Wed Feb 08, 2012 11:54 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=48&amp;t=4733#p13914</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=48&amp;t=4733#p13914</feedburner:origLink></item>
                                  <item>
                                  <title>WFD functions ported to Intel Sandy Bridge chip</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/4KcZBagq2uk/viewtopic.php</link>
                                  <description>by lremes (Posted Wed Feb 08, 2012 11:48 pm)&lt;br/&gt;The public reference implementation is available at &lt;!-- m --&gt;&lt;a class="postlink" href="http://www.khronos.org/registry/wf/"&gt;http://www.khronos.org/registry/wf/&lt;/a&gt;&lt;!-- m --&gt;&lt;br /&gt;&lt;br /&gt;I have not heard that anyone would yet have a public port of WFD for Sandy Bridge.&lt;br /&gt;Any platform specific ports are usually property of the vendors.&lt;br /&gt;&lt;br /&gt;The Sample Implementation contains a generic porting layer implementation for Linux that uses X11/SDL, but it’s just an example. As WFD is an abstraction of display controller and you probably want to get rid of any external dependencies, using that as the baseline does not work.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=48&amp;amp;t=4735"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/4KcZBagq2uk" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Wed Feb 08, 2012 11:48 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=48&amp;t=4735#p13913</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=48&amp;t=4735#p13913</feedburner:origLink></item>
                                  <item>
                                  <title>Can Native Kernels Enqueue Non-Native Kernels?</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/H0w0sQ3hgfk/viewtopic.php</link>
                                  <description>by sean.settle (Posted Wed Feb 08, 2012 5:58 pm)&lt;br/&gt;I know non-native kernels cannot enqueue non-native kernels, but what about native kernels enqueueing non-native kernels?  Say you have a quad-core CPU and four GPUs, then partition the CPU info four single-core devices, each of which would then control a distinct GPU.&lt;br /&gt;&lt;br /&gt;When will out-of-order command queues be supported in almost all platforms?&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4739"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/H0w0sQ3hgfk" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Wed Feb 08, 2012 5:58 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4739#p13909</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4739#p13909</feedburner:origLink></item>
                                  <item>
                                  <title>kernel freezes video output</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/jQ4ksmjHsjI/viewtopic.php</link>
                                  <description>by Yours3lf (Posted Wed Feb 08, 2012 4:52 pm)&lt;br/&gt;Hi,&lt;br /&gt;&lt;br /&gt;I wrote a tile based deferred shading kernel, it compiles, but it makes the video output freeze after a few seconds of running with ~70FPS. Only the graphics card is hanging, the system is still responsive through ssh. I'm using Linux Mint 12.1 64 bit with Catalyst 12.1 64 bit, and APP SDK 2.6&lt;br /&gt;&lt;br /&gt;here's the kernel:&lt;br /&gt;&lt;div class="codetitle"&gt;&lt;b&gt;Code:&lt;/b&gt;&lt;/div&gt;&lt;div class="codecontent"&gt;&amp;nbsp; &amp;nbsp; __constant float far = -10000.0f; //far plane distance&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; __constant float near = -1.0f; //near plane distance&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; __constant sampler_t the_sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; __constant float cutoff = 0.25f; //0.005f&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; __constant int attenuation_type = 0; //linear or full attenuation?&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; float my_abs( float var ) //these floating point operations aren't supported in opencl 1.1&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; if ( var &amp;lt; 0 )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; return -var;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; else&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; return var;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; float my_mix( float x, float y, float weigth )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; return x * ( 1.0f - weigth ) + y * weigth;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; float3 my_reflect( float3 incident, float3 normal )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; return incident - 2.0f * dot( normal, incident ) * normal;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; float3 decode_normals_spheremap( float4 n ) //decode normals from spheremap encoding&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 nn = n * ( float4 )( 2.0f, 2.0f, 0.0f, 0.0f ) + ( float4 )( -1.0f, -1.0f, 1.0f, -1.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float l = dot( nn.xyz, -nn.xyw );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; nn.z = l;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; nn.xy *= sqrt( l );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; return nn.xyz * 2.0f + ( float3 )( 0.0f, 0.0f, -1.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; float3 decode_linear_depth( float4 linear_depth, float4 position ) //decode linear depth into view space position&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; return ( float3 )( position.xy * ( far / position.z ), far ) * linear_depth.x;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; __kernel void main( __read_only image2d_t albedo, //diffuse surface color from the g-buffer&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __read_only image2d_t normals, //normals encoded using spheremap encoding&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __read_only image2d_t depth, //linear depth&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __write_only image2d_t result, //the output buffer that stores lighting data&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __global const float* far_plane, //the lower left and upper right corners of the far plane&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __global const float* in_view_pos, //view space camera position&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __global const float* in_lights, //1024 lights {light pos&amp;#91;3&amp;#93;, diffuse_color&amp;#91;3&amp;#93;, radius&amp;#91;1&amp;#93;, specular intensity&amp;#91;1&amp;#93; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __global const float* in_num_of_lights, //number of incoming lights (1024)&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; __global const float* in_projection_matrix ) //the projection matrix is used for frustum culling&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; /* &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;* Per pixel calculations (global) &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;*/&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; int2 coords = ( int2 )( get_global_id( 0 ), get_global_id( 1 ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 raw_albedo = read_imagef( albedo, the_sampler, coords );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 raw_normal = read_imagef( normals, the_sampler, coords ); //this will store the decoded normals&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 out_color = ( float4 )( 0.0f ); //this will store the resulting color&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; out_color.w = 1.0f;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 raw_depth; //this will store the decoded view space position&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; int2 global_size = ( int2 )( get_global_size( 0 ), get_global_size( 1 ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; /* &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;* Per tile data &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;*/&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float3 view_pos; //this will store the view space position (uniform among the workgroups, but stored as local for speedup)&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; int num_of_lights; //num of lights (same here)&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; int2 local_coords;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; int2 local_size;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; int workgroup_index;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; __local int tile_lights&amp;#91;1024&amp;#93;; //index of the lights visible per tile&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; __local int num_of_tile_lights; //number of lights per tile&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float2 tile_scale; //used for calculating frustum culling, taken from Intel's sample&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float2 tile_bias;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 column_1;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 column_2;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 column_4;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; float4 frustum_planes&amp;#91;6&amp;#93;;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; /* &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;* Check for skybox &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;*/&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; bool early_rejection = ( length( raw_normal.xy ) == 0.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; if ( early_rejection )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; out_color = raw_albedo;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; else&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; local_coords = ( int2 )( get_local_id( 0 ), get_local_id( 1 ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; local_size = ( int2 )( get_local_size( 0 ), get_local_size( 1 ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; workgroup_index = local_coords.y * local_size.x + local_coords.x;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float4 ll, ur;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; raw_depth = read_imagef( depth, the_sampler, coords );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ll = ( float4 )( far_plane&amp;#91;0&amp;#93;, far_plane&amp;#91;1&amp;#93;, far_plane&amp;#91;2&amp;#93;, 1.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ur = ( float4 )( far_plane&amp;#91;3&amp;#93;, far_plane&amp;#91;4&amp;#93;, far_plane&amp;#91;5&amp;#93;, 1.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; //texture coordinate &amp;#91;0...1&amp;#93; for input processing&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float2 texel = ( float2 )(( float )( coords.x ) / ( float )( global_size.x ), ( float )( coords.y ) / ( float )( global_size.y ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; raw_depth.xyz = decode_linear_depth( raw_depth, ( float4 )( my_mix( ll.x, ur.x, texel.x ), my_mix( ll.y, ur.y, texel.y ), ll.z, 1.0f ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; raw_normal.xyz = decode_normals_spheremap( raw_normal );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; view_pos = vload3( 0, in_view_pos );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; num_of_lights = ( int )in_num_of_lights&amp;#91;0&amp;#93;;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; //I'm not sure if the maths here is correct due to OpenGL and DirectX using different matrices, but this shouldnt matter&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; tile_scale = ( float2 )( global_size.x, global_size.y ) * ( 1.0f / ( float )( 2.0f * local_size.x ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; tile_bias = tile_scale - ( float2 )( local_coords.x, local_coords.y );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; column_1 = ( float4 )( in_projection_matrix&amp;#91;5&amp;#93; * tile_scale.x, 0.0f, tile_bias.x, 0.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; column_2 = ( float4 )( 0.0f, -in_projection_matrix&amp;#91;10&amp;#93; * tile_scale.y, tile_bias.y, 0.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; column_4 = ( float4 )( 0.0f, 0.0f, 1.0f, 0.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;0&amp;#93; = column_4 - column_1;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;1&amp;#93; = column_4 + column_1;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;2&amp;#93; = column_4 - column_2;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;3&amp;#93; = column_4 + column_2;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;4&amp;#93; = ( float4 )( 0.0f, 0.0f, -1.0f, near );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;5&amp;#93; = ( float4 )( 0.0f, 0.0f, 1.0f, far );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for ( int c = 0; c &amp;lt; 4; c++ ) //normalize frustum plane normals&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; frustum_planes&amp;#91;c&amp;#93;.xyz *= 1.0f / length( frustum_planes&amp;#91;c&amp;#93;.xyz );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; * Per workgroup (tile) calculations (local) &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; */&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( workgroup_index == 0 )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; num_of_tile_lights = 0;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; barrier( CLK_LOCAL_MEM_FENCE );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; if ( !early_rejection )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for ( int c = workgroup_index; c &amp;lt; num_of_lights; c += local_size.x * local_size.y ) //cull each light per tile, each thread in a tile processes one light&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( c &amp;lt; num_of_lights )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; bool in_frustum = true;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float attenuation_end = 0.0f;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( attenuation_type == 0 )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; attenuation_end = ( float )( in_lights&amp;#91;c * 8 + 6&amp;#93; ) / ( float )( cutoff ); //radius / cutoff&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; else&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; attenuation_end = ( float )( in_lights&amp;#91;c * 8 + 6&amp;#93; ); //radius&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for ( int d = 0; d &amp;lt; 6; d++ ) //cull each light based on the distance where it will shine and the frustum defined by the tile&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float e = dot( frustum_planes&amp;#91;d&amp;#93;, ( float4 )( in_lights&amp;#91;c * 8 + 0&amp;#93;, in_lights&amp;#91;c * 8 + 1&amp;#93;, in_lights&amp;#91;c * 8 + 2&amp;#93;, 1.0f ) );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; in_frustum = in_frustum &amp;amp;&amp;amp; ( e &amp;gt;= -attenuation_end );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( in_frustum ) //if the light is in the frustum, then store its index (if I comment this out, the kernel runs, but doesn't cull lights)&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; int index = atomic_inc( &amp;amp;num_of_tile_lights );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; tile_lights&amp;#91;index&amp;#93; = c;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( num_of_tile_lights != 0 )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; out_color = ( float4 )( 0.0f, 1.0f, 0.0f, 1.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; barrier( CLK_LOCAL_MEM_FENCE );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; /* &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;* Per light calculations &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;*/&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; if ( !early_rejection )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for ( int c = 0; c &amp;lt; num_of_tile_lights; c++ ) //draw each light per tile&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; int index = tile_lights&amp;#91;c&amp;#93;; //get back the light index&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float3 light_pos = ( float3 )( in_lights&amp;#91;index * 8 + 0&amp;#93;, in_lights&amp;#91;index * 8 + 1&amp;#93;, in_lights&amp;#91;index * 8 + 2&amp;#93; ); //gather light data using the index&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float light_radius = in_lights&amp;#91;index * 8 + 6&amp;#93;;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; //calculate blinn-phong lighting with custom attenuation&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float3 light_dir = light_pos - raw_depth.xyz;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float distance = length( light_dir );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; light_dir /= distance;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float coeff, attenuation;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( attenuation_type == 0 )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; coeff = max( distance - light_radius, 0.0f ) / light_radius + 1.0f;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; attenuation = max(( 1.0f / ( coeff * coeff ) - cutoff ) / ( 1.0f - cutoff ), 0.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; else&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; attenuation = ( light_radius - distance ) / ( light_radius * 0.01f ) * 0.01f;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if ( attenuation &amp;gt; 0.0f )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float3 light_diffuse_color = ( float3 )( in_lights&amp;#91;index * 8 + 3&amp;#93;, in_lights&amp;#91;index * 8 + 4&amp;#93;, in_lights&amp;#91;index * 8 + 5&amp;#93; );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float light_specular_power = ( float )in_lights&amp;#91;index * 8 + 3&amp;#93;;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float3 view_dir = normalize( view_pos - raw_depth.xyz );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float3 half_vector = ( light_dir + view_dir ) * 0.5f;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float n_dot_l = max( dot( raw_normal.xyz, light_dir ), 0.0f );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; out_color.xyz += raw_albedo.xyz * light_diffuse_color * n_dot_l * attenuation;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; float n_dot_h = pow( max( dot( raw_normal.xyz, half_vector ), 0.0f ), light_specular_power );&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; out_color.xyz += light_diffuse_color * n_dot_h * attenuation;&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &amp;nbsp;&lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; if ( coords.x &amp;lt; global_size.x &amp;amp;&amp;amp; coords.y &amp;lt; global_size.y )&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; {&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; write_imagef( result, coords, out_color ); //write the calculated light data to the result buffer (texture)&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; }&amp;nbsp; &lt;br /&gt;&amp;nbsp; &amp;nbsp; }&amp;nbsp; &amp;nbsp;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;best regards,&lt;br /&gt;Yours3!f&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4738"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/jQ4ksmjHsjI" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Wed Feb 08, 2012 4:52 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4738#p13907</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4738#p13907</feedburner:origLink></item>
                                  <item>
                                  <title>multiple OpenCL applications and global memory effects</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/OBU4FyP4Qv0/viewtopic.php</link>
                                  <description>by david.garcia (Posted Wed Feb 08, 2012 4:07 pm)&lt;br/&gt;Different OpenCL contexts are isolated from each other in a similar manner that different processes are isolated.&lt;br /&gt;&lt;br /&gt;Note I'm not saying that different OpenCL contexts are the same thing as different processes.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4737"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/OBU4FyP4Qv0" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Wed Feb 08, 2012 4:07 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4737#p13906</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4737#p13906</feedburner:origLink></item>
                                  <item>
                                  <title>Why can kernels take __local pointer arguments?</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/6sIvJchFlDU/viewtopic.php</link>
                                  <description>by homemade-jam (Posted Wed Feb 08, 2012 3:51 am)&lt;br/&gt;Great thanks. I was clarifying since the docs I have read are very clear on what it can do but not on what it &lt;span style="font-weight: bold"&gt;can't&lt;/span&gt; do, so just wanted to clarify.&lt;br /&gt;&lt;br /&gt;Thanks.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4730"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/6sIvJchFlDU" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Wed Feb 08, 2012 3:51 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4730#p13904</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4730#p13904</feedburner:origLink></item>
                                  <item>
                                  <title>Required atomic built-in functions</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/KK1blyDWrFs/viewtopic.php</link>
                                  <description>by notzed (Posted Tue Feb 07, 2012 4:16 pm)&lt;br/&gt;&lt;div class="quotetitle"&gt;sean.settle wrote:&lt;/div&gt;&lt;div class="quotecontent"&gt;I think I just found part of my answer.  In OpenCL 1.0 basic atomic functions were optional, but as of OpenCL 1.1 they are required.  If that is wrong please correct me.&lt;br /&gt;&lt;br /&gt;Well, atomic functions pretty much ruins my hope of making an abstract device that integrates several similar devices.  I could only think of how to do it without any atomic functions.&lt;br /&gt;&lt;br /&gt;Any reasoning behind making it obligatory in OpenCL 1.1?&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;OpenCL is a software abstraction: you can implement atomics however you want, they just have to honour the contract.&lt;br /&gt;&lt;br /&gt;e.g. for many devices you could break the kernels up into atomically bounded sections and run the kernel parts separately and then synchronise on the host.&lt;br /&gt;&lt;br /&gt;I'm not saying it would be efficient, but I mean what do you really expect to be able to do anyway?  The atomic operations require very specific specialised hardware in order to run fast, and without that you will have no choice but to resort to *host-based* software.&lt;br /&gt;&lt;br /&gt;Global atomics are so slow on AMD hardware for example I wouldn't use them except for very rarely-executed code (i.e. it's possible calling the host already), so a high overhead is already expected.  But they have global counters implemented in hardware to get around that ...&lt;br /&gt;&lt;br /&gt;Re your earlier query there's nothing to say a research project implemented the full specification in the first place.  It is possible to do without atomics entirely, at a cost of memory and extra processing steps.&lt;br /&gt;&lt;br /&gt;Amalgamating different hardware with different performance characterstics will be a challenge!  Often different hardware requires a different coding approach, it runs at a different speed, and so on: managing all that scheduling and keeping the memory close to the right kernels will be difficult.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4716"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/KK1blyDWrFs" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 4:16 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4716#p13898</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4716#p13898</feedburner:origLink></item>
                                  <item>
                                  <title>Huge kernel overhead on Mac</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/XYYtvJU5ijI/viewtopic.php</link>
                                  <description>by notzed (Posted Tue Feb 07, 2012 4:03 pm)&lt;br/&gt;&lt;div class="quotetitle"&gt;yoavhacohen wrote:&lt;/div&gt;&lt;div class="quotecontent"&gt;That makes sense, thanks.&lt;br /&gt;I think that the specification should allow prevention of such lazy operations by adding flags to the kernel and the buffer constructors.&lt;br /&gt;It might be the case that the application can do the allocation asyncroniuously but not the running of the kernel itself. In such cases, the lazy approach is a waste of time.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;How is the lazy approach a waste of time?  It only happens once, it still has to happen once no matter what happens.  If you're doing any micro-benchmarks it is a given that you cannot get reliable results if you do not let the system `warm up' first - i.e. do a couple of dummy runs.&lt;br /&gt;&lt;br /&gt;BTW the 'lazy allocation' is at the operating system level, and beyond such a specification's scope.  Although a unix process can be given a big virtual address space, those pages do not exist until they are accessed.  This is not something a driver could change.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4720"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/XYYtvJU5ijI" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 4:03 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4720#p13897</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4720#p13897</feedburner:origLink></item>
                                  <item>
                                  <title>picking</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/4bSksdRNA88/viewtopic.php</link>
                                  <description>by gibubu (Posted Tue Feb 07, 2012 4:03 pm)&lt;br/&gt;On user click of a particular object, I want to find the position of that click within the texture, &lt;br /&gt;&lt;br /&gt;After that, I'll then update that texture with a red dot at that position.&lt;br /&gt;&lt;br /&gt;I only need help with finding the position of that click.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=35&amp;amp;t=4736"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/4bSksdRNA88" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 4:03 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=35&amp;t=4736#p13896</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=35&amp;t=4736#p13896</feedburner:origLink></item>
                                  <item>
                                  <title>Memory Allocation and time collection</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/3dsewqgGrb8/viewtopic.php</link>
                                  <description>by notzed (Posted Tue Feb 07, 2012 3:58 pm)&lt;br/&gt;&lt;div class="quotetitle"&gt;homemade-jam wrote:&lt;/div&gt;&lt;div class="quotecontent"&gt;&lt;div class="quotetitle"&gt;notzed wrote:&lt;/div&gt;&lt;div class="quotecontent"&gt;Private memory is just the same as global memory if it isn't in a register.  Its in a register if it can fit, and if it is an array with fixed indexing (or indexing that can be calculated at compile time).&lt;br /&gt;&lt;br /&gt;Anyway this is pretty fundamental computer architecture: yes, if you can always use the fastest memory, the the next, and so on.  So that means registers, then shared memory, then private/global memory.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Ok so I'm slightly confused now. Private memory is only accessible within a workitem; ie. you can't reach it from other workitems? Therefore, how does OpenCL know that it can be put into private memory? When you say shared memory, do you mean local memory? Or are we getting muddled with CUDA/OpenCL?&lt;br /&gt;&lt;br /&gt;In my use case, caching isn't of much use since I only use each piece of data once.&lt;/div&gt;&lt;br /&gt;Ahh yeah sorry, shared==local: apart from picking it up from nvidia's 'ported' opencl docs, it is the only memory shared amongst work items.&lt;br /&gt;&lt;br /&gt;Private memory is private yes (oddly enough ...).  If it fits/can be it goes into a register.  The compiler knows it can go into private as private is the default qualifier for variable declarations.  local and global are the other qualifiers.&lt;br /&gt;&lt;br /&gt;Well you implied you were looping on the data in your original question.&lt;br /&gt;&lt;br /&gt;But like i said, it depends on the problem.  I'm sure you'll end up having more than a single problem to solve though, so you will eventually come across a use for it.  Local memory can be used for more than a cache too, for example to re-arrange scatter-gather global memory requests for more efficient lookup, or particularly to communicate partial results amongst work-items (for reductions).  But if you are only reading data once, and it's only being read sequentially (i.e. fully coalesced), then local memory just adds overhead.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4722"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/3dsewqgGrb8" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 3:58 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4722#p13895</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4722#p13895</feedburner:origLink></item>
                                  <item>
                                  <title>OpenCL device problems</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/ntxCIvQmzVw/viewtopic.php</link>
                                  <description>by wishgranter (Posted Tue Feb 07, 2012 3:29 pm)&lt;br/&gt;have unistalled the AMD CPU driver and the app see my Nvidia GPU back...... Thanx&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4725"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/ntxCIvQmzVw" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 3:29 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4725#p13891</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4725#p13891</feedburner:origLink></item>
                                  <item>
                                  <title>Example for Random Number Generator?</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/jwcTjwL7bc0/viewtopic.php</link>
                                  <description>by yoavhacohen (Posted Tue Feb 07, 2012 10:04 am)&lt;br/&gt;&lt;div class="quotetitle"&gt;&lt;b&gt;Quote:&lt;/b&gt;&lt;/div&gt;&lt;div class="quotecontent"&gt;If so, I can't really make it fast without consuming more memory and have a states vector per work-item, right?&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Not right: it's possible to save the memory overhead using a counter-based RNG:&lt;br /&gt;&lt;!-- m --&gt;&lt;a class="postlink" href="http://www.openclblog.com/2011/11/gpus-and-random-number-generation.html"&gt;http://www.openclblog.com/2011/11/gpus- ... ation.html&lt;/a&gt;&lt;!-- m --&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=3522"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/jwcTjwL7bc0" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 10:04 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=3522#p13887</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=3522#p13887</feedburner:origLink></item>
                                  <item>
                                  <title>Drawing masked sprites (textured quad with alpha channel)</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/VfIEu42-f2I/viewtopic.php</link>
                                  <description>by lucafr (Posted Tue Feb 07, 2012 9:39 am)&lt;br/&gt;Hi, i would like to know how to draw a sprite (rendered using a textured quad) on the screen in such a way that some parts of it are not rendered at all (masked).&lt;br /&gt;&lt;br /&gt;For example, suppose i have a wall with a window at its center. The window is open and i want to render a sprite inside the window. Suppose that the sprite is animated: the character is walking from left to right. I want to appear behind the window gradually.&lt;br /&gt;&lt;br /&gt;The easy way is to use glScissor. Even if the window is not perfectly rectangular, i can use, for glScissor, a slightly bigger rectangle.&lt;br /&gt;&lt;br /&gt;But what i i want to use a non rectangular mask (and even a non polygonal but complex mask)?&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=4&amp;amp;t=4734"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/VfIEu42-f2I" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 9:39 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=4&amp;t=4734#p13886</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=4&amp;t=4734#p13886</feedburner:origLink></item>
                                  <item>
                                  <title>OpenMAX AL DTV sample implementation</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/-IWkqcN6jtA/viewtopic.php</link>
                                  <description>by francois (Posted Tue Feb 07, 2012 3:24 am)&lt;br/&gt;Hi&lt;br /&gt;&lt;br /&gt;The DTV extension looks very promising.&lt;br /&gt;Is there a plan for a sample implementation ?&lt;br /&gt;&lt;br /&gt;Thanks&lt;br /&gt;Francois&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=51&amp;amp;t=4732"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/-IWkqcN6jtA" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 3:24 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=51&amp;t=4732#p13883</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=51&amp;t=4732#p13883</feedburner:origLink></item>
                                  <item>
                                  <title>Add camera functions</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/nc1eO5RE_2A/viewtopic.php</link>
                                  <description>by davepetalcurin (Posted Tue Feb 07, 2012 12:32 am)&lt;br/&gt;Hi, maybe you can demonstrate it or further describe what you wanted so we can figure it out.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=5&amp;amp;t=450"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/nc1eO5RE_2A" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Tue Feb 07, 2012 12:32 am</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=5&amp;t=450#p13880</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=5&amp;t=450#p13880</feedburner:origLink></item>
                                  <item>
                                  <title>PTX file to work with clCreateProgramWithBinary</title>
                                  <link>http://feedproxy.google.com/~r/khr-message-board-topics/~3/KhWTrAPKC9M/viewtopic.php</link>
                                  <description>by luizdrumond (Posted Mon Feb 06, 2012 3:44 pm)&lt;br/&gt;Hi,&lt;br /&gt;&lt;br /&gt;My question is very simple.&lt;br /&gt;&lt;br /&gt;I want to use clCreateProgramWithBinary, however i need the .ptx file to work with it.&lt;br /&gt;Where is the file? I don't can find it. &lt;br /&gt;I already used clCreateProgramWithSource before to try to use clCreateProgramWithBinary.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I work with VS2010.&lt;br /&gt;&lt;br /&gt;In AMD SKD the file is also .ptx ?&lt;br /&gt;&lt;br /&gt;Thanks,&lt;br /&gt;&lt;br /&gt;Luiz.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;amp;t=4728"&gt;&lt;/a&gt;&lt;hr /&gt;&lt;img src="http://feeds.feedburner.com/~r/khr-message-board-topics/~4/KhWTrAPKC9M" height="1" width="1"/&gt;</description>
                                        					    <pubDate>Mon Feb 06, 2012 3:44 pm</pubDate>                                        
                            	    <guid isPermaLink="false">http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4728#p13870</guid>		   					
                                      <feedburner:origLink>http://www.khronos.org/message_boards/viewtopic.php?f=28&amp;t=4728#p13870</feedburner:origLink></item></channel></rss>

