<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="http://feeds.feedburner.com/~d/styles/rss1full.xsl" type="text/xsl" media="screen"?><?xml-stylesheet href="http://feeds.feedburner.com/~d/styles/itemcontent.css" type="text/css" media="screen"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/" xmlns="http://purl.org/rss/1.0/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">

<channel rdf:about="http://www.ldodds.com/blog/">
<title>Lost Boy</title>
<link>http://www.ldodds.com/blog/</link>
<description>A journal of no fixed aims or direction by Leigh Dodds. If you see him wandering, point him in the direction of home.</description>
<dc:language>en-us</dc:language>
<dc:creator />
<dc:date>2008-07-18T00:00:00-05:00</dc:date>
<admin:generatorAgent rdf:resource="http://www.movabletype.org/?v=2.61" />

<items>
<rdf:Seq><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-17" /><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-15" /><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-13" /><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-12" /><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-11" /><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-10" /><rdf:li rdf:resource="http://del.icio.us/ldodds#2008-07-08" /><rdf:li rdf:resource="tag:flickr.com,2004:/photo/2613153088" /><rdf:li rdf:resource="tag:flickr.com,2004:/photo/2521030398" /><rdf:li rdf:resource="tag:flickr.com,2004:/photo/2521028864" /><rdf:li rdf:resource="tag:flickr.com,2004:/photo/2521027506" /><rdf:li rdf:resource="tag:flickr.com,2004:/photo/2521026236" /><rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000331.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000330.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000329.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000328.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000327.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000326.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000325.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000324.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000323.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000322.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000321.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000320.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000318.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000316.html" />
<rdf:li rdf:resource="http://www.ldodds.com/blog/archives/000315.html" />
</rdf:Seq>
</items>

<cc:license xmlns:cc="http://web.resource.org/cc/" cc:license="http://creativecommons.org/licenses/by-nc-sa/2.0/" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" href="http://feeds.feedburner.com/LostBoy" type="application/rss+xml" /><feedburner:browserFriendly>This is an XML content feed. It is intended to be viewed in a newsreader or syndicated to another site.</feedburner:browserFriendly></channel>

<item rdf:about="http://del.icio.us/ldodds#2008-07-17"><title>Links for 2008-07-17 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/338691380/ldodds</link><dc:date>2008-07-18T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://ejohn.org/projects/jspark/">John Resig - Javascript Sparklines Library</a></li>
<li><a href="http://thisissand.com/">thisissand.com</a></li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://ejohn.org/projects/jspark/"&gt;John Resig - Javascript Sparklines Library&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://thisissand.com/"&gt;thisissand.com&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-17</feedburner:origLink></item><item rdf:about="http://del.icio.us/ldodds#2008-07-15"><title>Links for 2008-07-15 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/336764952/ldodds</link><dc:date>2008-07-16T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://code.google.com/p/persevere-framework/">persevere-framework - Google Code</a><br/>
Persevere is an open source set of tools for persistence and distributed computing using an intuitive standards-based JSON interfaces of HTTP REST, JSON-RPC, JSONPath, and HTTP Channels</li>
<li><a href="http://code.google.com/p/the-cassandra-project/">the-cassandra-project - Google Code</a><br/>
Cassandra is a distributed storage system for managing structured data while providing reliability at a massive scale. Structured storage on a P2P network</li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://code.google.com/p/persevere-framework/"&gt;persevere-framework - Google Code&lt;/a&gt;&lt;br/&gt;
Persevere is an open source set of tools for persistence and distributed computing using an intuitive standards-based JSON interfaces of HTTP REST, JSON-RPC, JSONPath, and HTTP Channels&lt;/li&gt;
&lt;li&gt;&lt;a href="http://code.google.com/p/the-cassandra-project/"&gt;the-cassandra-project - Google Code&lt;/a&gt;&lt;br/&gt;
Cassandra is a distributed storage system for managing structured data while providing reliability at a massive scale. Structured storage on a P2P network&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-15</feedburner:origLink></item><item rdf:about="http://del.icio.us/ldodds#2008-07-13"><title>Links for 2008-07-13 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/334790846/ldodds</link><dc:date>2008-07-14T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://www.mulgara.org/">Welcome to the new Mulgara project!</a><br/>
Mulgara is a scalable RDF database written entirely in Java.
Mulgara is an Open Source fork of Kowari.</li>
<li><a href="http://jrdf.sourceforge.net/">JRDF - Java RDF API</a><br/>
JRDF is an attempt to create a standard set of APIs and base implementations to RDF using the latest version of the Java language. A key aspect will be to ensure a high degree of modularity and to follow standard Java conventions.</li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://www.mulgara.org/"&gt;Welcome to the new Mulgara project!&lt;/a&gt;&lt;br/&gt;
Mulgara is a scalable RDF database written entirely in Java.
Mulgara is an Open Source fork of Kowari.&lt;/li&gt;
&lt;li&gt;&lt;a href="http://jrdf.sourceforge.net/"&gt;JRDF - Java RDF API&lt;/a&gt;&lt;br/&gt;
JRDF is an attempt to create a standard set of APIs and base implementations to RDF using the latest version of the Java language. A key aspect will be to ensure a high degree of modularity and to follow standard Java conventions.&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-13</feedburner:origLink></item><item rdf:about="http://del.icio.us/ldodds#2008-07-12"><title>Links for 2008-07-12 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/334026575/ldodds</link><dc:date>2008-07-13T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://code.google.com/p/hrdf/">hrdf - Google Code</a></li>
<li><a href="http://sw.deri.org/2007/02/swsepaper/iswc2007.pdf">YARS2: A Federated Repository for Querying Graph Structured Data from the Web</a></li>
<li><a href="http://bnode.org/blog/2005/12/19/pragmatic-design-considerations-for-a-php-based-sparql-store">Pragmatic design considerations for a PHP-based SPARQL store - benjamin nowack's blog</a><br/>
Notes on indexing and storage strategies</li>
<li><a href="http://web.mit.edu/dna/www/abadirdf.pdf">Scalable Semantic Web Data Management Using Vertical Partitioning</a></li>
<li><a href="http://en.wikipedia.org/wiki/C-Store">C-Store - Wikipedia, the free encyclopedia</a><br/>
C-Store differs from most traditional relational database management system (RDBMS) designs in many ways, perhaps most notably in that it stores data by column and not by row, and that it is not optimized for writing.</li>
<li><a href="http://db.lcs.mit.edu/projects/cstore/abadicidr07.pdf">Scalable Semantic Web Data Management Using Vertical</a></li>
<li><a href="http://www.hpl.hp.com/techreports/2005/HPL-2005-171.html">Note on database layouts for SPARQL datastores</a><br/>
This report summarizes some lessons learnt while implementing a SPARQL datastore on top of ModelRDB, the database backend of the Jena Semantic Web Framework, and puts forward recommendations for the database layout of a future, dedicated SPARQL datastore.</li>
<li><a href="http://gearon.blogspot.com/2004/05/addendum-i-noticed-someone-referring.html">Working notes</a><br/>
Notes on Kowari's indexing strategies for storing RDF data</li>
<li><a href="http://www.phildawes.net/blog/2004/10/14/optimising-mysql-tables-for-rdf-store/">Optimising mysql tables for rdf storage</a><br/>
Notes on MySQL table structures for RDF storage</li>
<li><a href="http://citeseerx.ist.psu.edu/viewdoc/summary;jsessionid=95F8796D76A99F32F1D0919C6DEBB11F?doi=10.1.1.99.8423">ABSTRACT Semantic Storage: Overview and Assessment - CiteSeerX</a><br/>
Literature review (from around 2005?) looking at different triple stores</li>
<li><a href="http://citeseerx.ist.psu.edu/viewdoc/summary;jsessionid=95F8796D76A99F32F1D0919C6DEBB11F?doi=10.1.1.100.4934">SPARQL query processing with conventional relational database systems - CiteSeerX</a><br/>
Steve Harris paper discussing evolution of 3Store triplestore to accomodate SPARQL</li>
<li><a href="http://en.wikipedia.org/wiki/Table_of_mathematical_symbols">Table of mathematical symbols - Wikipedia, the free encyclopedia</a><br/>
handy reference for the mathematically challenged</li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://code.google.com/p/hrdf/"&gt;hrdf - Google Code&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://sw.deri.org/2007/02/swsepaper/iswc2007.pdf"&gt;YARS2: A Federated Repository for Querying Graph Structured Data from the Web&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://bnode.org/blog/2005/12/19/pragmatic-design-considerations-for-a-php-based-sparql-store"&gt;Pragmatic design considerations for a PHP-based SPARQL store - benjamin nowack's blog&lt;/a&gt;&lt;br/&gt;
Notes on indexing and storage strategies&lt;/li&gt;
&lt;li&gt;&lt;a href="http://web.mit.edu/dna/www/abadirdf.pdf"&gt;Scalable Semantic Web Data Management Using Vertical Partitioning&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://en.wikipedia.org/wiki/C-Store"&gt;C-Store - Wikipedia, the free encyclopedia&lt;/a&gt;&lt;br/&gt;
C-Store differs from most traditional relational database management system (RDBMS) designs in many ways, perhaps most notably in that it stores data by column and not by row, and that it is not optimized for writing.&lt;/li&gt;
&lt;li&gt;&lt;a href="http://db.lcs.mit.edu/projects/cstore/abadicidr07.pdf"&gt;Scalable Semantic Web Data Management Using Vertical&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.hpl.hp.com/techreports/2005/HPL-2005-171.html"&gt;Note on database layouts for SPARQL datastores&lt;/a&gt;&lt;br/&gt;
This report summarizes some lessons learnt while implementing a SPARQL datastore on top of ModelRDB, the database backend of the Jena Semantic Web Framework, and puts forward recommendations for the database layout of a future, dedicated SPARQL datastore.&lt;/li&gt;
&lt;li&gt;&lt;a href="http://gearon.blogspot.com/2004/05/addendum-i-noticed-someone-referring.html"&gt;Working notes&lt;/a&gt;&lt;br/&gt;
Notes on Kowari's indexing strategies for storing RDF data&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.phildawes.net/blog/2004/10/14/optimising-mysql-tables-for-rdf-store/"&gt;Optimising mysql tables for rdf storage&lt;/a&gt;&lt;br/&gt;
Notes on MySQL table structures for RDF storage&lt;/li&gt;
&lt;li&gt;&lt;a href="http://citeseerx.ist.psu.edu/viewdoc/summary;jsessionid=95F8796D76A99F32F1D0919C6DEBB11F?doi=10.1.1.99.8423"&gt;ABSTRACT Semantic Storage: Overview and Assessment - CiteSeerX&lt;/a&gt;&lt;br/&gt;
Literature review (from around 2005?) looking at different triple stores&lt;/li&gt;
&lt;li&gt;&lt;a href="http://citeseerx.ist.psu.edu/viewdoc/summary;jsessionid=95F8796D76A99F32F1D0919C6DEBB11F?doi=10.1.1.100.4934"&gt;SPARQL query processing with conventional relational database systems - CiteSeerX&lt;/a&gt;&lt;br/&gt;
Steve Harris paper discussing evolution of 3Store triplestore to accomodate SPARQL&lt;/li&gt;
&lt;li&gt;&lt;a href="http://en.wikipedia.org/wiki/Table_of_mathematical_symbols"&gt;Table of mathematical symbols - Wikipedia, the free encyclopedia&lt;/a&gt;&lt;br/&gt;
handy reference for the mathematically challenged&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-12</feedburner:origLink></item><item rdf:about="http://del.icio.us/ldodds#2008-07-11"><title>Links for 2008-07-11 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/333284169/ldodds</link><dc:date>2008-07-12T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://helm.cs.unibo.it/mml-widget/">GtkMathView Home Page</a><br/>
GtkMathView is a C++ rendering engine for MathML documents. It provides an interactive view that can be used for browsing and editing MathML markup.</li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://helm.cs.unibo.it/mml-widget/"&gt;GtkMathView Home Page&lt;/a&gt;&lt;br/&gt;
GtkMathView is a C++ rendering engine for MathML documents. It provides an interactive view that can be used for browsing and editing MathML markup.&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-11</feedburner:origLink></item><item rdf:about="http://del.icio.us/ldodds#2008-07-10"><title>Links for 2008-07-10 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/332375702/ldodds</link><dc:date>2008-07-11T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://research.yahoo.com/node/90">Pig | Yahoo! Research</a><br/>
We are creating infrastructure to support ad-hoc analysis of very large data sets. Parallel processing is the name of the game. Our system runs on a cluster computing architecture, on top of which sit several layers of abstraction that ultimately bring th</li>
<li><a href="http://hadoop.apache.org/hbase/">Welcome to HBase!</a><br/>
HBase is the Hadoop database. Its an open-source, distributed, column-oriented store modeled after BigTable</li>
<li><a href="https://issues.apache.org/jira/browse/HADOOP-3601">[#HADOOP-3601] Hive as a contrib project - ASF JIRA</a><br/>
Hive is a data warehouse built on top of flat files (stored primarily in HDFS)</li>
<li><a href="http://www.cascading.org/">Cascading</a><br/>
Cascading is a feature rich API for defining and executing complex and fault tolerant data processing workflows on a Hadoop cluster</li>
<li><a href="http://zookeeper.sourceforge.net/">ZooKeeper</a><br/>
ZooKeeper is a service for coordinating processes of distributed applications. Historically distributed processes are coordinated using group messaging, shared registers, or distributed lock services. ZooKeeper incorporates elements from all these servers</li>
<li><a href="http://heroku.com/">Heroku</a></li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://research.yahoo.com/node/90"&gt;Pig | Yahoo! Research&lt;/a&gt;&lt;br/&gt;
We are creating infrastructure to support ad-hoc analysis of very large data sets. Parallel processing is the name of the game. Our system runs on a cluster computing architecture, on top of which sit several layers of abstraction that ultimately bring th&lt;/li&gt;
&lt;li&gt;&lt;a href="http://hadoop.apache.org/hbase/"&gt;Welcome to HBase!&lt;/a&gt;&lt;br/&gt;
HBase is the Hadoop database. Its an open-source, distributed, column-oriented store modeled after BigTable&lt;/li&gt;
&lt;li&gt;&lt;a href="https://issues.apache.org/jira/browse/HADOOP-3601"&gt;[#HADOOP-3601] Hive as a contrib project - ASF JIRA&lt;/a&gt;&lt;br/&gt;
Hive is a data warehouse built on top of flat files (stored primarily in HDFS)&lt;/li&gt;
&lt;li&gt;&lt;a href="http://www.cascading.org/"&gt;Cascading&lt;/a&gt;&lt;br/&gt;
Cascading is a feature rich API for defining and executing complex and fault tolerant data processing workflows on a Hadoop cluster&lt;/li&gt;
&lt;li&gt;&lt;a href="http://zookeeper.sourceforge.net/"&gt;ZooKeeper&lt;/a&gt;&lt;br/&gt;
ZooKeeper is a service for coordinating processes of distributed applications. Historically distributed processes are coordinated using group messaging, shared registers, or distributed lock services. ZooKeeper incorporates elements from all these servers&lt;/li&gt;
&lt;li&gt;&lt;a href="http://heroku.com/"&gt;Heroku&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-10</feedburner:origLink></item><item rdf:about="http://del.icio.us/ldodds#2008-07-08"><title>Links for 2008-07-08 [del.icio.us]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/330466375/ldodds</link><dc:date>2008-07-09T00:00:00-05:00</dc:date><content:encoded><![CDATA[<ul>
<li><a href="http://blog.secondlife.com/2008/07/08/ibm-linden-lab-interoperability-announcement/">IBM and Linden Lab Interoperability Announcement &laquo; Official Second Life Blog</a><br/>
This is a historic day for Second Life, and for virtual worlds in general. IBM and Linden Lab have announced that research teams from the two companies successfully teleported avatars from the Second Life Preview Grid into a virtual world running on an Op</li>
</ul>]]></content:encoded><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="http://blog.secondlife.com/2008/07/08/ibm-linden-lab-interoperability-announcement/"&gt;IBM and Linden Lab Interoperability Announcement &amp;laquo; Official Second Life Blog&lt;/a&gt;&lt;br/&gt;
This is a historic day for Second Life, and for virtual worlds in general. IBM and Linden Lab have announced that research teams from the two companies successfully teleported avatars from the Second Life Preview Grid into a virtual world running on an Op&lt;/li&gt;
&lt;/ul&gt;</description><feedburner:origLink>http://del.icio.us/ldodds#2008-07-08</feedburner:origLink></item><item rdf:about="tag:flickr.com,2004:/photo/2613153088"><title>Wordle Tag Cloud (June 2008) [Flickr]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/320430956/</link><dc:subject>tagcloud wordle</dc:subject><dc:creator>ldodds</dc:creator><dc:date>2008-06-26T05:22:09-05:00</dc:date><description>&lt;p&gt;&lt;a href="http://www.flickr.com/people/ldodds/"&gt;ldodds&lt;/a&gt; posted a photo:&lt;/p&gt;
&lt;p&gt;&lt;a href="http://www.flickr.com/photos/ldodds/2613153088/" title="Wordle Tag Cloud (June 2008)"&gt;&lt;img src="http://farm4.static.flickr.com/3065/2613153088_7dd54df03f_m.jpg" width="240" height="157" alt="Wordle Tag Cloud (June 2008)" /&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;A tag could of &lt;a href="http://del.icio.us/ldodds"&gt;my del.icio.us tags&lt;/a&gt; generated using the &lt;a href="http://wordle.net/"&gt;wordle&lt;/a&gt; word cloud generator&lt;/p&gt;</description><dc:date.Taken>2008-06-26T11:22:09-08:00</dc:date.Taken><feedburner:origLink>http://www.flickr.com/photos/ldodds/2613153088/</feedburner:origLink></item><item rdf:about="tag:flickr.com,2004:/photo/2521030398"><title>IMG_3159 [Flickr]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/297714458/</link><dc:creator>ldodds</dc:creator><dc:date>2008-05-25T06:21:26-05:00</dc:date><description>&lt;p&gt;&lt;a href="http://www.flickr.com/people/ldodds/"&gt;ldodds&lt;/a&gt; posted a photo:&lt;/p&gt;
&lt;p&gt;&lt;a href="http://www.flickr.com/photos/ldodds/2521030398/" title="IMG_3159"&gt;&lt;img src="http://farm4.static.flickr.com/3131/2521030398_1ac100c9b9_m.jpg" width="240" height="180" alt="IMG_3159" /&gt;&lt;/a&gt;&lt;/p&gt;

</description><dc:date.Taken>2008-05-10T12:55:20-08:00</dc:date.Taken><feedburner:origLink>http://www.flickr.com/photos/ldodds/2521030398/</feedburner:origLink></item><item rdf:about="tag:flickr.com,2004:/photo/2521028864"><title>IMG_3157 [Flickr]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/297714459/</link><dc:creator>ldodds</dc:creator><dc:date>2008-05-25T06:20:27-05:00</dc:date><description>&lt;p&gt;&lt;a href="http://www.flickr.com/people/ldodds/"&gt;ldodds&lt;/a&gt; posted a photo:&lt;/p&gt;
&lt;p&gt;&lt;a href="http://www.flickr.com/photos/ldodds/2521028864/" title="IMG_3157"&gt;&lt;img src="http://farm3.static.flickr.com/2080/2521028864_f1d0f53498_m.jpg" width="240" height="180" alt="IMG_3157" /&gt;&lt;/a&gt;&lt;/p&gt;

</description><dc:date.Taken>2008-05-10T12:54:26-08:00</dc:date.Taken><feedburner:origLink>http://www.flickr.com/photos/ldodds/2521028864/</feedburner:origLink></item><item rdf:about="tag:flickr.com,2004:/photo/2521027506"><title>IMG_3156 [Flickr]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/297714460/</link><dc:creator>ldodds</dc:creator><dc:date>2008-05-25T06:19:36-05:00</dc:date><description>&lt;p&gt;&lt;a href="http://www.flickr.com/people/ldodds/"&gt;ldodds&lt;/a&gt; posted a photo:&lt;/p&gt;
&lt;p&gt;&lt;a href="http://www.flickr.com/photos/ldodds/2521027506/" title="IMG_3156"&gt;&lt;img src="http://farm3.static.flickr.com/2180/2521027506_63af132922_m.jpg" width="180" height="240" alt="IMG_3156" /&gt;&lt;/a&gt;&lt;/p&gt;

</description><dc:date.Taken>2008-05-10T12:53:01-08:00</dc:date.Taken><feedburner:origLink>http://www.flickr.com/photos/ldodds/2521027506/</feedburner:origLink></item><item rdf:about="tag:flickr.com,2004:/photo/2521026236"><title>IMG_3155 [Flickr]</title><link>http://feeds.feedburner.com/~r/LostBoy/~3/297714461/</link><dc:creator>ldodds</dc:creator><dc:date>2008-05-25T06:18:44-05:00</dc:date><description>&lt;p&gt;&lt;a href="http://www.flickr.com/people/ldodds/"&gt;ldodds&lt;/a&gt; posted a photo:&lt;/p&gt;
&lt;p&gt;&lt;a href="http://www.flickr.com/photos/ldodds/2521026236/" title="IMG_3155"&gt;&lt;img src="http://farm4.static.flickr.com/3165/2521026236_7fe8a78074_m.jpg" width="180" height="240" alt="IMG_3155" /&gt;&lt;/a&gt;&lt;/p&gt;

</description><dc:date.Taken>2008-05-10T12:52:40-08:00</dc:date.Taken><feedburner:origLink>http://www.flickr.com/photos/ldodds/2521026236/</feedburner:origLink></item><item rdf:about="http://www.ldodds.com/blog/archives/000331.html">
<title>Say Everything</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/294196376/000331.html</link>
<description>Reading this interesting post containing lessons for ontology writers by Ian Davis this morning, it occured to me that the key lesson is applicable to open data publishing in general and not just to ontology design. Ian's post describes some of the techniques introduced in the Taming the Open World session at SemTech. I won't repeat them all here. Go and read the post. The majority of the techniques relate to schema (i.e. ontology) design, e.g. identifying what types of resource a property relates, whether two types of resource are completely unrelated, etc. I think these all boil down to...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-05-20T12:17:16+00:00</dc:date>
<content:encoded><![CDATA[<p>Reading this interesting post containing <a href="http://blogs.talis.com/nodalities/2008/05/lessons-for-ontology-writers.php">lessons for ontology writers</a> by Ian Davis this morning, it occured to me that the key lesson is applicable to open data publishing in general and not just to ontology design.</p>

<p>Ian's post describes some of the techniques introduced in the Taming the Open World session at <a href="http://www.semantic-conference.com/">SemTech</a>. I won't repeat them all here. Go and read the post. The majority of the techniques relate to schema (i.e. ontology) design, e.g. identifying what types of resource a property relates, whether two types of resource are completely unrelated, etc.</p>

<p>I think these all boil down to a general principle to: <i>say everything</i>. i.e. if you know something is true, if you have a fact that you can share, then share it. Commonly in open data discussions we tend to focus on the basic facts: the data we want to see opened up, and build cool stuff against. But we mustn't forget the the need to share the metadata too. All data has metadata, even metadata. And schemas are a form of metadata.</p>

<p>Some of the advice quoted in Ian's post was new to me. It hadn't occured to me that there were some real benefits in the additional precision. And given I've moaned before about performance of reasoners, I can now see where I've not been helping them out. As usual its all obvious in hind-sight. I'm sure there are other easy wins. For example, I rarely see RDF data that has specifies its data type. Why not? If you know what type a literal is, then why not say so?</p>

<p>So remember, say everything. <i>Speak to the machine</i>.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000331.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000330.html">
<title>Google AppEngine for Personal Web Presence?</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/269903085/000330.html</link>
<description>Some thinking aloud... I've browsed through the Google App Engine gallery and the applications you can find there at the moment are pretty much what you'd expect: lots of Web 2.0 "share this, share that" sites. These are what you'd expect because firstly they're the kind of simple application you'd build whilst exploring any new environment. Secondly because they're exactly the kind of sites that are currently being released every which way you turn. But for me App Engine is intriguing as it might provide an interesting new perspective on distributing shrink-wrapped packaged software. When Google take the lid off...</description>
<dc:subject>Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-04-14T10:11:21+00:00</dc:date>
<content:encoded><![CDATA[<p>Some thinking aloud...</p>

<p>I've browsed through <a href="http://appgallery.appspot.com/">the Google App Engine gallery</a> and the applications you can find there at the moment are pretty much what you'd expect: lots of Web 2.0 "share this, share that" sites. These are what you'd expect because firstly they're the kind of simple application you'd build whilst exploring any new environment. Secondly because they're exactly the kind of sites that are currently being released every which way you turn.</p>

<p>But for me App Engine is intriguing as it might provide an interesting new perspective on distributing shrink-wrapped packaged software. When Google take the lid off of the number of sign-ups, its going to be a simple matter for anyone to have their own App Engine environment. Forget cheap web hosting and the expensive and configuration overhead that that entails: just sign up for an App Engine account. </p>

<p>App Engine has the potential to provide an enormous number of people with a well-documented stable environment into which an application can be deployed.</p>

<p>It will be interesting to see if anyone seizes on App Engine as an opportunity to create a simple <i>personal</i> application that combines elements of all of the Web 2.0 favourites: bookmarks, blogging, calendar, photos, travel, and perhaps an OpenId provider. One that that makes me the administrator of all of my own data, but doesn't scrimp on the options for other people to harvest, syndicate and browse what I'm uploading.</p>

<p>At the moment our online identities start out fragmented, because we have to push data into a number of different services. And then we strive for ways to bring that data together and knit it into other sites that we, or our social network, use. </p>

<p>But why not turn this on it's head? And seize on App Engine as a way to avoid this early fragmentation and instead start out with a centralized, personal web presence; but one which seamlessly integrates with data in other spaces. The potential is in open data, and services that are built around it. So why aren't we managing our own open data repositories and letting others offer us services against particular aspects of it?</p>

<p>The App Engine environment doesn't involve any configuration on behalf of the end user, and I suspect you could probably create an App Engine Deployer using App Engine itself. So sign-up, deployment and upgrades could also be pretty straight-forward. Python seems well suited for creating a simple modular web application that could be extended to cover new areas as users needed.</p>

<p>Instead of using lots of different web applications, we can each have our own modular web application that is intimately linked into the web, and becomes the primary repository for the data you want on the web. Data portability follows from the fact that you'd be the administrator of your own data.</p>

<p>This would also change the nature of the kinds of applications that we'd need elsewhere on the web. Instead of lots of specialist databases, we need more generic services and more community/local/temporary aggregations.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000330.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000329.html">
<title>Teaching a Six Year Old About Triples</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/262202118/000329.html</link>
<description>I've written in the past about how both of my kids are star wars geeks thanks to Lego Star Wars. My son had a Star Wars Annual for Christmas which he's been poring over, in that obsessive way that young boys do. Anyway, we got to talking about some of the relationships between the different characters: that Luke was Anakin's son; that Anakin and Vader are the same person; etc. We went back and forth a bit as he was getting confused by some of the secret identities and the overall timeline (he's not seen all of the films of...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-04-01T20:01:17+00:00</dc:date>
<content:encoded><![CDATA[<p>I've written in the past about how both of my kids are <a href="http://www.ldodds.com/blog/archives/000308.html">star wars geeks</a> thanks to Lego Star Wars. My son had a Star Wars Annual for Christmas which he's been poring over, in that obsessive way that young boys do. Anyway, we got to talking about some of the relationships between the different characters: that Luke was Anakin's son; that Anakin and Vader are the same person; etc. We went back and forth a bit as he was getting confused by some of the secret identities and the overall timeline (he's not seen all of the films of course; he's only six).</p>

<p>I suggested to him that we try drawing it out. I thought that this might help him get a better mental picture. I explained to him that we could try writing down the characters names and start drawing lines between them to illustrate the relationships.</p>

<p>He got it straight away. </p>

<p>Here's what we came up with after about half an hours work. Click through to see the larger image:</p>

<p><a href="http://flickr.com/photos/ldodds/2381025770/"><img border="0" src="http://farm4.static.flickr.com/3225/2381025770_588307637e_m_d.jpg" /></a></p>

<p>We started out in the bottom left with Luke, drawing an arc from him to Anakin. I suggested he label the line with "dad" to describe the relationship.</p>

<p>He then decided we should move on to Anakin and capture a fact about him next. We discussed this a bit, in particular, what would be a good name for the label between Anakin and Vader. He settled on "became". We then recorded a further fact, that Darth Sidious "trained" Vader.</p>

<p>So far so good. He easily grasped the simple pattern of "X relates to Y" and also grokked a number of other things quite quickly. Firstly he reused "became" to record that Palpatine was also Darth Sidious, he saw that it was basically the same relationship. Secondly, he pointed out to me that  "Anakin became Darth Vader" is actually a sentence. He also noted that my original suggestion of "Luke dad Vader" didn't read very well! Finally he also saw that the technique was quite general: he observed that we could also capture facts about which weapons each of the characters used.</p>

<p>We also recorded a relationship between Luke and Vader: "fights". After we'd drawn this out I pointed out to him that if Luke fights Vader, Vader is actually Anakin, and Anakin is Luke's dad, then Luke must have been fighting his dad. This was the source of much hilarity. But he was easily able to see how this made sense from his drawing.</p>

<p>We rounded off the drawing with a few facts about the droids, which are a particular favourite of his.</p>

<p>I found the whole exercise quite interesting as it seemed to be a good way to lay out some facts in a way that was both amenable for teaching, but was also fun. I certainly didn't force him into doing it. And a bit of father and son time never hurts.</p>

<p>Really its a testament to the simplicity at the heart of the RDF model, the simple triple, that it can be understood by a six year old.</p>

<p>(No, this isn't an April Fool)</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000329.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000328.html">
<title>Twinkle on code.google.com</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/249738851/000328.html</link>
<description>I've created a Google Code project for Twinkle. It's called twinkle-sparql-tools. If you're a Java developer and/or a user of the tool and are interesting in contributing code then drop me a mail and I'll set up you up with source access....</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-03-11T20:59:28+00:00</dc:date>
<content:encoded><![CDATA[<p>I've created a <a href="http://code.google.com">Google Code</a> project for <a href="http://www.ldodds.com/projects/twinkle">Twinkle</a>. It's called <a href="http://code.google.com/p/twinkle-sparql-tools/">twinkle-sparql-tools</a>.</p>

<p>If you're a Java developer and/or a user of the tool and are interesting in contributing code then drop me a mail and I'll set up you up with source access.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000328.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000327.html">
<title>Set Algebra For Updating a Triple Store</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/249498192/000327.html</link>
<description>Lets assume we have a stored graph Gstore. Also that we have been given another graph of incoming data Gin that contains some modifications to a specific sub-graph. Lets also assume that we have a function view() that can extract the "equivalent" sub-graph (i.e. equivalent view) of the original data. In pseudo code to apply these updates we do the following: Gview = view(Gstore) Gdelete = Gview - Gin Ginsert = Gin - Gview Gstore' = Gstore.remove(Gdelete).add(Ginsert) Job done. The Jena API provides methods for handling the basic operations see, for example, the difference method. You can also wrap the...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-03-11T12:44:50+00:00</dc:date>
<content:encoded><![CDATA[<p>Lets assume we have a stored graph <i>G<sub>store</sub></i>. Also that we have been given another graph of incoming data <i>G<sub>in</sub></i> that contains some modifications to a specific sub-graph.</p>

<p>Lets also assume that we have a function <i>view()</i> that can extract the "equivalent" sub-graph (i.e. equivalent view) of the original data.</p>

<p>In pseudo code to apply these updates we do the following:</p>

<p><code><br />
G<sub>view</sub> = view(G<sub>store</sub>)<br />
G<sub>delete</sub> = G<sub>view</sub> - G<sub>in</sub><br />
G<sub>insert</sub> = G<sub>in</sub> - G<sub>view</sub><br />
G<sub>store'</sub> = G<sub>store</sub>.remove(G<sub>delete</sub>).add(G<sub>insert</sub>)<br />
</code></p>

<p>Job done. The Jena API provides methods for handling the basic operations see, for example, the <a href="http://jena.sourceforge.net/javadoc/com/hp/hpl/jena/rdf/model/Model.html#difference(com.hp.hpl.jena.rdf.model.Model)">difference</a> method. You can also wrap the modifications to <i>G<sub>store</sub></i> in a transaction.</p>

<p>The nice thing is that this is agnostic to the actual data being updated, we don't care <i>which</i> triples are being added or inserted. This differentiates it from the SPARQL Update Language, specifically <a href="http://jena.hpl.hp.com/~afs/SPARQL-Update.html#sec_updateLanguage">the MODIFY operation</a>, which requires the patterns being inserted or deleted to be added to the query. <a href="http://vocab.org/changeset/schema">Changesets</a> are much the same.</p>

<p>In the above approach the detail of what is being changed (or is being <i>allowed</i> to change) is shifted out of the triple store update code and into the <i>view()</i> function. The extent of the graph that is returned by this function must match that being passed as input. So we've defined a specific "<a href="http://www.ldodds.com/blog/archives/000324.html">document type</a>". As it turns out this is quite reasonable as you can generally match, e.g. a RESTful service call, to a view based on the identifier of the item to which the content is being posted, its media-type, other service parameters, etc.</p>

<p>In terms of implementing the <i>view()</i> function, it turns out you can go a long way with a SPARQL <code>CONSTRUCT</code> operation. <code>DESCRIBE</code> isn't suitable as you don't have control over how the sub-graph is built.</p>

<p>I think there are strengths and weaknesses to all of the different approaches to updating RDF stores and suspect that there isn't going to be a one size fits all approach. For example <a href="http://jena.hpl.hp.com/~afs/SPARQL-Update.html">SPARQL Update</a> looks like a handy syntax to use when the modifications all follow predictable patterns, e.g. I'm doing parameterized updates to some stored data, much like parameterized updates in a SQL database. Changesets offer some extra functionality around store versioning which doesn't drop out of the set logic approach (although it could be added).</p>

<p>Oh, and the keen eyed amongst you will notice that this approach does involve some "thrashing" of updates for bnodes, because they don't compare as equal. But <a href="http://iandavis.com/blog/2007/03/bnodes-out">what ya gonna do</a>?! :)</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000327.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000326.html">
<title>Graph Shape Sorting</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/249110470/000326.html</link>
<description>On Sunday I posted about how constrained views of RDF can be useful in order to document the inputs into an application, validate those inputs, and also manage updates via application of set algebra. I explored the idea that a system may support many such views or "document types" without blessing any as the primary view of the data. And, importantly, that this approach doesn't ultimately constrain the range of data that you can put into a triple store. It just occured to me that there's another way to explain the concept: a shape sorter. A shape sorter can contain...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-03-10T20:58:10+00:00</dc:date>
<content:encoded><![CDATA[<p>On Sunday I posted about how <a href="http://www.ldodds.com/blog/archives/000324.html">constrained views of RDF</a> can be useful in order to document the inputs into an application, validate those inputs, and also manage updates via application of set algebra. I explored the idea that a system may support many such views or "document types" without blessing any as the primary view of the data. And, importantly, that this approach doesn't ultimately constrain the range of data that you can put into a triple store.</p>

<p>It just occured to me that there's another way to explain the concept: a shape sorter.</p>

<p><a href="http://www.flickr.com/photos/ellasdad/425813314/" alt="Photo by ellas dad"><img src="http://farm1.static.flickr.com/170/425813314_8868fd6d4b_m.jpg" border="0" alt="Photo by ellas dad" /></a></p>

<p>A shape sorter can contain many different sizes, shapes, and colours of block. Each can only be put into the box through a specific hole, but once in they're all mixed together. And one can reach in and pick out any or all of them. Depending on which face of the shape sorter you're looking at the options may look quite limited. But the sorter has a whole has a lot of different faces and options.</p>

<p>The inside of the box is the triple store. It can contain many different things. Each block is a specific data format or the shape of a specific sub-graph. Passing a block through a shape is the validation process, and the shape sorter offers many different forms of validation.</p>

<p>Useful alternate explanation or excuse to post a pointer to <a href="http://www.flickr.com/photos/ellasdad/425813314/">a pretty picture</a>?</p>

<p><br />
</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000326.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000325.html">
<title>Modelling Statistical Publications: Some Notes</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/248825323/000325.html</link>
<description>Lee Feigenbaum has put together a really nice posting discussing different ways of modelling statistical data using RDF. I wanted to contribute to that discussion and add in a few comments about how I've been modelling some of the OECD's statistical publications using RDF. Note the emphasis: what I've been doing is capturing metadata about individual statistical tables and graphs, their association with specific publications, their metadata, etc. I've not attempted to capture the detail of the statistics themselves, but do have a few relevant comments there. The background to this is that I'm currently technically leading a project to...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-03-10T11:07:21+00:00</dc:date>
<content:encoded><![CDATA[<p>Lee Feigenbaum has put together a really nice posting discussing different ways of <a href="http://www.thefigtrees.net/lee/blog/2008/03/modeling_statistics_in_rdf_a_s.html">modelling statistical data using RDF</a>. I wanted to contribute to that discussion and add in a few comments about how I've been modelling some of the <a href="http://www.oecd.org">OECD</a>'s statistical <i>publications</i> using RDF.</p>

<p>Note the emphasis: what I've been doing is capturing metadata about individual statistical tables and graphs, their association with specific publications, their metadata, etc. I've not attempted to capture the detail of the statistics themselves, but do have a few relevant comments there.</p>

<p>The background to this is that I'm currently technically leading a project to build the latest version of OECD's electronic library. All of the metadata is stored in RDF, with content available as HTML, PDFs, Excel spreadsheets or as views into the <a href="http://www.sourceoecd.org/database/OECDStat">OECD.stat</a>   application that the OECD have developed as a power tool for housing and delivering their statistical data.</p>

<p>As Lee discovered in the EuroStat data, regions and countries are core concepts. All of the OECD's statistical output can be classified by country and region, and these are types defined within our schema.  We assign URIs to the countries using either the ISO 3166-1 alpha-2 country code or, in the case of classifying data that refers to countries that no longer exist as a specific entity (e.g. Yugoslavia), we use the ISO 3166-3 4 letter country code. </p>

<p>A country may be associated with zero or more Regions, using an Is Part Of relationship. A region may be the European Union, OECD member states or other arbitrary grouping. I suspect the same basic requirements will apply to other statistical datasets.</p>

<p>There are some other types of classification that we associate with the tables:</p>

<ul>
<li>An indicator of whether the table is a "comparative table": e.g. does it include data from multiple countries?</li>
<li>An association between the table and a "Table Series" which constitute a collection of tables published over time</li>
<li>The statistical Variables that the table contains, e.g. GDP</li>
<li>A summary of the time range that the table covers, e.g. "2007", "2005-2007", "2000, 2002-2005", etc. These are captured as simple literals for now as we have to do little/no processing on them at this level.</li>
</ul>

<p>And then there's the usual collection of title, description, etc. all as multi-lingual literals. All tables are also assigned a <a href="http://www.doi.org">DOI</a> to provide a stable link that can be cited in publications. If the table was originally published in a specific Book or journal Article then that relationship is also captured.</p>

<p>Obviously this metadata is, largely, at a level above that which Lee has been exploring, but I thought this might provide some useful context. For anyone looking at capturing statistical data in RDF, there are some other useful places to look at for defining terms and drawing on prior experience. </p>

<p>Firstly <a href="http://www.aeaweb.org/journal/jel_class_system.html">the Journal of Economic Literature Classification</a> provides some terms that can be associated with statistical publications to help categorize them. The <a href="http://stats.oecd.org/glossary/index.htm">OECD's statistical glossary</a> fills a similar role.</p>

<p>Secondly, the <a href="http://www.sdmx.org/index.php">Statistical Data and Metadata EXchange</a> (SDMX) initiative is also worthy of a look. It's not RDF but, as well as defining XML Schemas and web services for exchanging statistical data, the <a href="http://www.sdmx.org/index.php?page_id=11">guidelines</a> include lists of cross-domain concepts and their mappings to those in use by EuroStat, OECD, IMF, etc. So plenty of scope for grounding RDF vocabularies for statistical in a lot of prior art.</p>

<p>Finally, the OECD have some public documentation about the design and implementation of their "MetaStore" database that supports OECD.stat (it's a different beast to the Ingenta MetaStore, I should point out). For example, the document "<a href="http://www.oecd.org/dataoecd/26/33/33869551.pdf">Management of Statistical Metadata at the OECD</a>" (PDF) has some interesting detail about the different types of metadata (structural, technical, publishing) that is stored in  these multi-dimensional data cubes.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000325.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000324.html">
<title>Documents Types in RDF</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/248397143/000324.html</link>
<description>The notion of a "document" and a "document type" are core concepts in XML. The specification includes a precise description of document, what it means for a document to be well-formed, valid, and so on. Even if you're not using a DTD or XML schema, and are just using XML as a syntax for exchanging structured or semi-structured data, the concept of document is still a useful one. For example a document has a clear boundary and content, and so there is a limited scope for the data that an application has to deal with. The ability to define classes...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-03-09T15:56:04+00:00</dc:date>
<content:encoded><![CDATA[<p>The notion of a "document" and a "document type" are core concepts in XML. The specification includes a precise description of <a href="http://www.w3.org/TR/REC-xml/#NT-document">document</a>, what it means for a document to be well-formed, valid, and so on. Even if you're not using a DTD or XML schema, and are just using XML as a syntax for exchanging structured or semi-structured data, the concept of document is still a useful one. For example a document has a clear boundary and content, and so there is a limited scope for the data that an application has to deal with. </p>

<p>The ability to define classes of documents ("document types") brings other benefits: the structure and content of documents can be standardized. The document type becomes both a contract that can be enforced by an application prior to its processing of any given document, and a description of the acceptable inputs of that application. </p>

<p>The concepts of "document" and "document type" are quite general and aren't limited to XML applications. See, for example, the <a href="http://groups.google.com/group/json-schema">JSON schema</a> discussion. The same concepts and their attendant benefits also <a href="http://www.enterpriseintegrationpatterns.com/DocumentMessage.html">crop up</a> in <a href="http://www.enterpriseintegrationpatterns.com/CanonicalDataModel.html">messaging systems</a>.</p>

<p>But you don't see much discussion about the concept of a document or their types in RDF applications. Granted, <a href="http://www.w3.org/TR/rdf-syntax-grammar/">RDF/XML</a> does define a document type for serializing RDF graphs, but we all know that the large variation in how any single RDF graph could be encoded in valid RDF/XML means that the same benefits we get from non-RDF XML vocabularies are lost. The document scope can be highly variable scope, as can content and syntax. Of course it is is possible to create "RDF profiles" that constrain the RDF/XML syntax so that an XML schema can be used to validate documents. Jeni Tennison has recently discussed <a href="http://www.jenitennison.com/blog/node/74">some approaches</a> to this, and I've <a href="http://www.ldodds.com/blog/archives/000104.html">explored the topic myself in the past</a>. In fact I regularly apply it when designing RDF based systems: it is extremely useful (essential) to be able to validate incoming data.</p>

<p>But generally the notion of document types doesn't sit well with RDF. RDF is a data model for semi-structured data. It assumes an "open world model" in which missing information is not invalid, or as Dan Brickley has put it "<a href="http://rdfweb.org/mt/foaflog/archives/000047.html">missing isn't broken</a>". This wild and woolly nature of RDF is, I think, one of the reasons many people struggle with it. As Dan says:<blockquote><cite>If nothing is mandatory, then how can they write code that knows what to expect?</cite></blockquote></p>

<p>Dan concludes that posting by suggesting that there are certain bedrocks which application authors can still rely on, e.g. XML+Namespaces, conformance to the RDF model, etc. But lately I've come around to the view that we need to go beyond that and offer tighter ways to document, declare and validate data that is being exchanged in RDF applications. I don't know of any applications that adopt an open world model; quite the opposite in fact. I think there are benefits in looking at the notion of "document" and "document type" in an RDF context. Although "document" may not be the right term here, a better one may be "view".</p>

<p>So how might we achieve this, and what are the benefits in doing so?</p>

<p>We can use the aforemention "profiling" option to create an constrained RDF/XML vocabularly that can be validated using XML schema (of whatever kind). Where two parties need to have an agreed on format for data exchange this works well. So for example, the OECD are supplying <a href="http://www.ingenta.com">us</a> with XML documents according to an XML schema. The documents are valid RDF/XML so we can simply pour them into a triple store for our application to use. Each XML document is basically a packet of RDF that describes one section (or sub-graph) of the entire data set. Those same packets are used as the basic message format for passing between internal components (e.g. the search indexer). So this is one useful application of the document concept in an application which is otherwise entirely RDF-driven and which goes to some length to be agnostic to the details of the data it contains.</p>

<p>In a scenario where there isn't any prior co-ordination between the parties exchanging data then there are other options. A typical scenario here might be submitting my FOAF document (either directly, or referenced via an OpenId) to register/configure some online service. There are many ways I might structure my FOAF document, so how does the service validate or check that the required data is present? The answer here is SPARQL. SPARQL can be used to to validate a graph by <a href="http://isegserv.itd.rl.ac.uk/schemarama/">testing whether specific graph patterns are present</a> using ASK or CONSTRUCT. It can also be used to CONSTRUCT a constrained "view" of the submitted data that throws away anything that the application isn't directly interestd in. The other side benefit to using SPARQL is that it doesn't really matter that RDF syntax is being used: the validation and data extraction is happening at the level of the data model not the syntax.</p>

<p>We use the technique of defining RDF views using CONSTRUCT elsewhere in our applications. The primary one being fetching the data required to present some aspect of the RDF graph to an end user. I've described this, and the underlying system and its assumptions <a href="http://www.slideshare.net/ldodds/facet-building-web-pages-with-sparql/">in a recent presentation</a>. Here the "view" or "document type" is used to drive a simple data binding layer, and is essentially the contract between the application logic and the presentation layer. The application doesn't need to deal with the entire graph, just useful use case specific subsets. And these are different "document types" to that used when loading the original data. The application doesn't have a single document type: it has many and they're used in different contexts. This avoids overly constraining the model (we want to be able to store arbitrary additional properties) but imposes local scoping to gain the benefits of validation, known contents, etc.</p>

<p>It turns out that there's another use case where RDF document types or views are useful: managing updates to a triple store. If you know that some incoming data is constrained to a particular view (e.g. by prior agreement, or through extracting only those graph patterns that are of interest) then apply the incoming message as an update to the store is simply a matter of doing some set algebra. Extract the equivalent view from the store (i.e. the relevant sub-graph) and then look for the difference between the stored and incoming sub-graphs. The end result is a list of triples to delete and add to the store.</p>

<p>I'll follow up more on the topics in this posting, as I think there are huge benefits to be had here from looking at how the notion of documents and document types can add value to RDF systems. It's very easy to get caught up in the completely general case of a highly-distributed, wild and woolly world of RDF and the Semantic Web. But the majority of applications will have a much more limited world view, and my experience so far is that applying some additional constraints here and there can have huge benefits. Embracing the notion of multiple document types is one of these.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000324.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000323.html">
<title>Oxford SWIG Talks: Twinkle &amp; SPARQL Query Forms</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/247289156/000323.html</link>
<description>I finally found time to attend one of the Oxford SWIG sessions last night and had a thoroughly enjoyable time. I gave a couple of presentations which I've posted to slideshare, and which I'll embed below. The first was a general introduction and mini-demonstration of Twinkle. I gave a basic overview of the key features and showed how the configuration drives the user interface: | View | Upload your own The second talk as about the different SPARQL query forms. I started by asking the question "why are there four different query forms?" and then proceeded to examine each one...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-03-07T09:18:57+00:00</dc:date>
<content:encoded><![CDATA[<p>I finally found time to attend one of the Oxford SWIG sessions last night and had a thoroughly enjoyable time. </p>

<p>I gave a couple of presentations which I've posted to slideshare, and which I'll embed below.</p>

<p>The first was a general introduction and mini-demonstration of <a href="http://www.ldodds.com/projects/twinkle">Twinkle</a>. I gave a basic overview of the key features and showed how the configuration drives the user interface:</p>

<div style="width:425px;text-align:left" id="__ss_294188"><object style="margin:0px" width="425" height="355"><param name="movie" value="http://static.slideshare.net/swf/ssplayer2.swf?doc=twinkle-a-sparql-query-tool-1204750634891766-2"/><param name="allowFullScreen" value="true"/><param name="allowScriptAccess" value="always"/><embed src="http://static.slideshare.net/swf/ssplayer2.swf?doc=twinkle-a-sparql-query-tool-1204750634891766-2" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"></embed></object><div style="font-size:11px;font-family:tahoma,arial;height:26px;padding-top:2px;"><a href="http://www.slideshare.net/?src=embed"><img src="http://static.slideshare.net/swf/logo_embd.png" style="border:0px none;margin-bottom:-5px" alt="SlideShare"/></a> | <a href="http://www.slideshare.net/ldodds/twinkle-a-sparql-query-tool?src=embed" title="View 'Twinkle: A SPARQL Query Tool' on SlideShare">View</a> | <a href="http://www.slideshare.net/upload?src=embed">Upload your own</a></div></div>

<p>The second talk as about the different SPARQL query forms. I started by asking the question "why are there four different query forms?" and then proceeded to examine each one and talk about the benefits and their applied use.</p>

<div style="width:425px;text-align:left" id="__ss_295333"><object style="margin:0px" width="425" height="355"><param name="movie" value="http://static.slideshare.net/swf/ssplayer2.swf?doc=sparql-query-forms-1204820309554821-3"/><param name="allowFullScreen" value="true"/><param name="allowScriptAccess" value="always"/><embed src="http://static.slideshare.net/swf/ssplayer2.swf?doc=sparql-query-forms-1204820309554821-3" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"></embed></object><div style="font-size:11px;font-family:tahoma,arial;height:26px;padding-top:2px;"><a href="http://www.slideshare.net/?src=embed"><img src="http://static.slideshare.net/swf/logo_embd.png" style="border:0px none;margin-bottom:-5px" alt="SlideShare"/></a> | <a href="http://www.slideshare.net/ldodds/sparql-query-forms?src=embed" title="View 'SPARQL Query Forms' on SlideShare">View</a> | <a href="http://www.slideshare.net/upload?src=embed">Upload your own</a></div></div>

<p>The talk was streamed online via <a href="http://live.yahoo.com/">Yahoo Live</a> which was a nice touch as one SWIGger was at home with a broken ankle (get well soon Katie!). It'd be nice to see more use of free video streaming at other events.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000323.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000322.html">
<title>Bee Node Deconstructed</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/227275935/000322.html</link>
<description> As with my first "FOAF tale", "Joe Triple" yesterday's story "Bee Node" was intended as more than an exercise in punning. The original story was intended to help illustrate a few aspects of Semantic Web technology which I think I worth drawing attention to. But this time around the focus is mainly on SPARQL rather than on RDF modelling and ontologies. The SPARQL queries in the story illustrate a general pattern of interaction that I expect will become common in clients accessing data via SPARQL endpoints. This pattern is: ASK, DESCRIBE, CONSTRUCT which I'll call "ADC" from now on....</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-02-01T14:17:10+00:00</dc:date>
<content:encoded><![CDATA[<p>
As with my first "FOAF tale", "<a href="http://www.ldodds.com/blog/archives/000315.html">Joe Triple</a>" yesterday's story "<a href="http://www.ldodds.com/blog/archives/000321.html">Bee Node</a>" was intended as more than an exercise in punning. The original story was intended to help illustrate a few aspects of Semantic Web technology which I think I worth drawing attention to. But this time around the focus is mainly on SPARQL rather than on <a href="http://www.ldodds.com/blog/archives/000316.html">RDF modelling and ontologies</a>.
</p>
<p>
The SPARQL queries in the story illustrate a general pattern of interaction that I expect will become common in clients accessing data via SPARQL endpoints.
</p>

<p>
This pattern is: <code>ASK</code>, <code>DESCRIBE</code>, <code>CONSTRUCT</code> which I'll call "ADC" from now on. What the ADC pattern provides is a way to probe a remote data set to see if it has information that is of interest and then extract information from that data set with increasing levels of precision and control.
</p>

<h3>The ADC Pattern: ASK</h3>

<p>
The initial step is the <code>ASK</code> query. When I was first learning SPARQL I didn't really see the usefulness in ASK. It seemed that the same effect, i.e. detecting where a given graph pattern can be matched against the data, could be achieved with a SELECT query:
</p>

<pre>
<code>
SELECT *
WHERE {
  ...pattern of interest...
}
LIMIT 1
</code>
</pre>

<p>
If there's at least one row, then we know there's matching data. This kind of query is useful when checking for existence of data in a relational database for example.
</p>
<p>
But, as I understand it, a SPARQL query engine can optimize for this common usage as it need not return any data (as it must do with a SELECT), it can simply terminate the query once it has found the first query solution. Better all round really as the query form better reflects the intent of the query than the "LIMIT 1" 
hack does.
</p>
<p>
Detective Sparql practically applies this query form in his investigation. His first query attempts to find sources that have the location of Bee Node and just asks whether the endpoint has the specific data items:
</p>

<pre>
<code>
PREFIX geo &lt;http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX foaf &lt;http://xmlns.com/foaf/0.1>
ASK WHERE {
  {
  &lt;/person/bnode> 
     geo:lat ?lat;
     geo:long ?long. 
  }
  UNION
  {
  ?person
     foaf:mbox &lt;mailto:bnode@example.com>;
     geo:lat ?lat;
     geo:long ?long.      
  }
}
</code>
</pre>

<p>
The query uses a UNION to ask the same question in slightly different ways. The first pattern uses a URI for Bee Node, the second references her via <a href="http://esw.w3.org/topic/InverseFunctionalProperty">an identifying property</a>. This is a realistic and likely scenario as different endpoints may have different URIs for the same resource.</p>
<p>
The second ASK query that Piotr uses does essentially the same thing, but instead of looking for specific triples, e.g. <code>geo:lat</code> it ASKs a more general question: does the endpoint have <i>any</i> triples for specified subject; in this case Bee Node. It does this by using a variable in place of both the predicate and object:
</p>

<code>
&lt;/person/bnode> ?p ?o.
</code>

<p>
Queries that use wildcards for properties are a brilliantly useful feature in SPARQL as it allows one to describe very general, reusable graph patterns.
</p>

<p>
Actually Detective Sparql missed a trick here as what he should have asked is:
</p>

<pre>
<code>
ASK WHERE {
  {
  &lt;/person/bnode> ?p o.  
  }
  UNION
  {
   ?s ?p &lt;/person/bnode>.
  }
}
</code>
</pre>

<p>
...as that query would have checked for both facts <i>about</i> Bee Node and facts <i>relating to</i> Bee Node.
</p>

<h3>The ADC Pattern: DESCRIBE</h3>

<p>
Following up on the ASK queries, Detective Sparql uses a DESCRIBE query to request that specific sources "spill the beans" and demonstrate what they know and provide whatever information they find useful.
</p>

<p>
This provides a good way to extract some useful view of the data context within which a specific resource sits: its literal properties and relationships to other resources in the dataset. Depending on the algorithm the endpoint uses to generate these views (and the shape of the underlying data set) the amount of data returned by a DESCRIBE query can vary wildly.
</p>

<p>
This is very useful in some contexts; particularly web crawling where the client just wants to execute some general queries and use that as a starting point for further accesses. However in many others this unpredictability may not be suitable, particularly where the client wants or needs to control the shape of the result graph and the amount of information returned.
</p>

<h3>The ADC Pattern: CONSTRUCT</h3>

<p>
It's at this point where the CONSTRUCT query becomes useful.
</p>
<p>
The advantage of a CONSTRUCT query is that it provides the client with complete control over how the result graph is constructed. The client can specific exactly what resources it wants returned and which properties it's interested in.
</p>
<p>
Like ASK I originally wrote off CONSTRUCT and DESCRIBE as being specialized queries that would only be of limited interest. I expected that SELECT queries, which line up very nicely with their SQL equivalents, would be the primary SPARQL query form. But I was mistaken. Now that I've actually began writing applications that make heavy use of SPARQL I've found that CONSTRUCT is the query form that has most flexibility. There's more to say about that, but <a href="http://www.slideshare.net/ldodds/facet-building-web-pages-with-sparql/">the presentation I gave at a recent SWIG meeting</a> is useful background reading.
</p>
<p>
One important utility of CONSTRUCT is the ability to <i>transform</i> the underlying data set. Currently a CONSTRUCT query is the closest thing that RDF has to XSLT. Using CONSTRUCT a data set can be transformed into a particular shape than may fit the processing expectations of the client application. Although it should be said that CONSTRUCT is a poor cousin to XSLT (or SQL for that matter), in that it's limited in what it can achieve. At least until SPARQL gets more basic functions for things like string manipulation.
</p>
<p>
Detective Sparql uses this feature to transform SIOC data into his preferred ontology. This is going to be inevitable where vocabularies don't neatly line up with one another as is the case with SIOC and FOAF.
</p>
<p>
CONSTRUCT also provides a limited form of inferencing capability without requiring all a full reasoner.
</p>
<p>
Where CONSTRUCT is limited is in its ability to traverse an RDF graph. Limited in the sense that the traversal must be explicitly specified. DESCRIBE doesn't suffer from this, except that you have to rely on the SPARQL endpoint deciding where and how far to traverse. It'd be interesting to see DESCRIBE extended to allow the client to specify the algorithm for generating the view
</p>

<p>
Hopefully this posting demonstrates some aspects of SPARQL which go beyond the simple query language, and illustrates how the different query forms have their own strengths and weakness and how they can be combined to work with data out in the wild.
</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000322.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000321.html">
<title>Bee Node: A FOAF Tale</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/226416483/000321.html</link>
<description><![CDATA[ Detective Piotr Sparql lent back in his chair cradling a tumbler of vodka and reflected on his most recent case. It had started as a simple missing person; he'd been assigned to investigate the disappearance of Beatrice "Bee" Node: @prefix foaf: &lt;http://xmlns.com/foaf/0.1> . &lt;/person/bnode> a foaf:Person; foaf:name "Beatrice Node"; foaf:nick "Bee" foaf:mbox &lt;mailto:bnode@example.com>. His investigation had started out routinely enough: trawling his usual sources to see if any of them had word of Bee's location: PREFIX geo &lt;http://www.w3.org/2003/01/geo/wgs84_pos#> PREFIX foaf &lt;http://xmlns.com/foaf/0.1> ASK WHERE { { &lt;/person/bnode> geo:lat ?lat; geo:long ?long. } UNION { ?person foaf:mbox &lt;mailto:bnode@example.com>; geo:lat ?lat; geo:long...]]></description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-01-31T12:18:09+00:00</dc:date>
<content:encoded><![CDATA[<p>
Detective Piotr Sparql lent back in his chair cradling a tumbler of vodka and reflected on his most recent case. It had started as a simple missing person; he'd been assigned to investigate the disappearance of Beatrice "Bee" Node:
</p>

<pre>
<code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
&lt;/person/bnode> a foaf:Person;
   foaf:name "Beatrice Node";
   foaf:nick "Bee"
   foaf:mbox &lt;mailto:bnode@example.com>.
</code>
</pre>

<p>
His investigation had started out routinely enough: trawling his usual sources to see if any of them had word of Bee's location:
</p>

<pre>
<code>
PREFIX geo &lt;http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX foaf &lt;http://xmlns.com/foaf/0.1>
ASK WHERE {
  {
  &lt;/person/bnode> 
     geo:lat ?lat;
     geo:long ?long. 
  }
  UNION
  {
  ?person
     foaf:mbox &lt;mailto:bnode@example.com>;
     geo:lat ?lat;
     geo:long ?long.      
  }
}
</code>
</pre>

<p>
And then Bee turned up. Dead.
</p>

<pre>
<code>
@prefix xsd: &lt;http://www.w3.org/2001/XMLSchema#>.
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
@prefix bio: &lt;http://purl.org/vocab/bio/0.1/> .
&lt;/person/bnode> a foaf:Person;
   foaf:name "Beatrice Node";
   bio:event [ a bio:Death;
               bio:date "2008-01-29"^^xsd:date.
             ].   
</code>
</pre>

<p>
So he'd begun leaning on his sources harder, attempting to find those that had anything on Bee that might be useful in tracking down her murderer:
</p>

<pre>
<code>
ASK WHERE {
  {
  &lt;/person/bnode> ?p ?o.  }
  UNION
  {
  ?person
     foaf:mbox &lt;mailto:bnode@example.com>;
     ?p ?o.
  }
}
</code>
</pre>

<p>
...and then getting them to spill what they knew:
</p>

<pre>
<code>
DESCRIBE &lt;/person/bnode>.
</code>
</pre>

<p>Pickings were slim. He tried a few obvious tacks:</p>

<pre>
<code>
PREFIX rel: &lt;http://vocab.org/relationship/>
SELECT ?name ?mbox
WHERE {
  ?suspect rel:enemyOf &lt;/person/bnode>.
  ?suspect foaf:name ?name.
  ?suspect foaf:mbox ?mbox.
}
</code>
</pre>

<p>But Bee had had few enemies and all of them had alibis. He widened his search through the social networks:</p>

<pre>
<code>
PREFIX rel: &lt;http://vocab.org/relationship/>
SELECT ?name, ?mbox
WHERE {
  ?suspect rel:enemyOf &lt;/person/bnode>.
  ?suspect foaf:knows ?otherSuspect.
  ?otherSuspect foaf:name ?name.
  ?otherSuspect foaf:mbox ?mbox.
}
</code>
</pre>

<p>But everyone's alibis were water-tight. At this point he'd gone back to basics, gathering everything he could on the late lamented Bee Node. On a hunch he probed for more background on Bee's social network. She's been active in a number of forums and he'd figured that she may have unknowingly upset someone:</p>

<pre>
<code>
PREFIX sioc: &lt;http://rdfs.org/sioc/ns#>
PREFIX foaf: &lt;http://xmlns.com/foaf/0.1/>

CONSTRUCT {
  ?suspect a foaf:Person;
  ?suspect foaf:name ?name;
  ?suspect foaf:mbox ?mbox.
}
WHERE {
  {
    ?post a sioc:Post;
          sioc:has_creator ?bee;
          sioc:has_reply ?reply.
          
    ?bee sioc:email &lt;mailto:bnode@example.com>.
       
    ?reply sioc:has_creator ?suspect.
    
    ?suspect sioc:name ?name;
    	     sioc:email ?mbox.
    	  
  }
  UNION
  {
    ?post a sioc:Post;
          sioc:has_creator ?suspect;
          sioc:has_reply ?reply.
                     
    ?reply sioc:has_creator ?bee.

    ?bee sioc:email &lt;mailto:bnode@example.com>.    
    
    ?suspect sioc:name ?name;
    	     sioc:email ?mbox.
  }
}
</code>
</pre>

<p>
Cross-referencing the email addresses on the short list of suspects, with data taken from <a href="http://blog.nominet.org.uk/tech/2006/06/06/the-semantic-web-web-ontology-language-owl-example/">a contact at nominet</a>, he'd managed to gather some addresses:
</p>

<pre>
<code>
PREFIX whois: &lt;http://xml.nominet.org.uk/rdf/nom/domain#>
PREFIX foaf: &lt;http://xmlns.com/foaf/0.1/>
PREFIX util: &lt;http://www.example.org/sparql/util/>
SELECT 
  ?name ?mbox ?line1 ?line2 ?postcode ?country
  
WHERE {
  ?suspect foaf:name ?name;
           foaf:mbox ?mbox.
  
  ?d a whois:domainName;
      whois:domainNameValue ?domainName;
      whois:hasRegistrant ?registrant.
    
  ?registrant whois:registrantAddress ?address.

  ?address whois:addressline1 ?line1;
         whois:addressline2 ?line2;
         whois:postcode ?postcode;
         whois:country ?country;
         
  FILTER ( ?domainName = util:ExtractMailDomain(?mbox) )         
}
ORDER BY ?name
</code>
</pre>

<p>
The rest had come down to old fashioned legwork. He cursed himself softly as he finished his drink, pouring himself another slug of Absolut from the bottle in his desk drawer. In his haste he'd missed the obvious angles; hadn't bothered to check out the family. after all they'd all seemed so...anonymous at first glance.</p>

<p>The murderer? Her relative: Uri. He'd been masquerading under an 
alias.</p>
]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000321.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000320.html">
<title>Self-Description for Service Connection</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/225462032/000320.html</link>
<description>I hate quoting myself, as I worry about it making me seem like a pompous ass, but I feel moved to do it in this instance after reading Danny's posting about DataPortability Service Discovery, in which he discusses the current blueprint from the DataPortability group. Danny rightly points out that FOAF already provides a means for listing all of the accounts that a person uses as part of their online activity. The vocabulary allows the service to be identified along with their account username. This is typically sufficient information to start interacting with a service API to extract useful information...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2008-01-29T20:53:29+00:00</dc:date>
<content:encoded><![CDATA[<p>I hate quoting myself, as I worry about it making me seem like a pompous ass, but I feel moved to do it in this instance after reading Danny's posting about <a href="http://dannyayers.com/2008/01/29/dataportability-service">DataPortability Service Discovery</a>, in which he discusses <a href="http://groups.google.com/group/dataportability-public/web/reference-design">the current blueprint</a> from the DataPortability group.</p>

<p>Danny rightly points out that FOAF already provides a means for listing all of the accounts that a person uses as part of their online activity. The vocabulary allows the service to be identified along with their account username. This is typically sufficient information to start interacting with a service API to extract useful information about the user. E.g. for importing into another site.</p>

<p>Here's the example that I included in <a href="http://www.idealliance.org/proceedings/xtech05/papers/02-07-04/">an XTech paper I presented in 2005</a>:</p>

<pre>
<code>&lt;foaf:Person>
 &lt;foaf:holdsAccount>
  &lt;foaf:OnlineAccount>
	&lt;foaf:accountName>ldodds&lt;/foaf:accountName>
	  &lt;foaf:accountServiceHomepage 
	      rdf:resource="http://del.icio.us"/>
  &lt;/foaf:OnlineAccount>
 &lt;/foaf:holdsAccount>
&lt;/foaf:Person>
</code>
</pre>

<p>With that bit of information you can easily get access to my del.icio.us bookmarks, for example. The limitation in this kind of approach, whether its implemented using FOAF, or using the protocol outlined in the DataPortability blueprint, is that a third-party service wanting to extract data about the user needs some prior knowledge of the service it will be interacting with: it need knowledge of the API (i.e. a client) and also what kind of information it holds about the user (i.e. does it contain relevant data)?</p>

<p>And in my opinion this doesn't scale. For truly distributed, ad hoc service integration, I think you need a slightly different approach to the problem. And in my opinion to achieve this means embracing a more RESTful approach, and one that ideally takes advantage of the flexibility of RDF.</p>

<p>Rather than simply providing a list of services, I should <i>point to the data</i>. Towards the end of my paper (see the section "Self-Description as Service Connectors") I suggested that use of <code>rdfs:seeAlso</code> to create RDF hyperlinks between documents <i>and</i> appropriately typing the linked resources will bring two advantages. Firstly it avoids the need to trawl through unnecessary services in order to get at the data that's of interest, the user can explicitly point to it. Secondly there's no need for API specific clients beyond the need for an HTTP GET request.</p>

<p>Here's the example in the paper rewritten to address a particular <a href="http://groups.google.com/group/dataportability-public/web/use-cases">DataPortability use case</a>: "Aggregate your, and your friend's,  "Status" (eg Twitter) from all the "Status" systems you belong to."</p>

<p>Firstly "my friends" can be those people listed in my FOAF document. FOAF provides the basic data substrate for glueing the services together. Secondly, I point to a web resource from which my "Status" message(s) can be retrieved:</p>

<pre>
<code>
&lt;foaf:Person>
  &lt;eg:statuses>
     &lt;eg:Status 
         rdf:resource="http://twitter.com/statuses/user_timeline/14813.atom"/>
  &lt;/eg:statuses>
&lt;/foaf:Person>
</code>
</pre>

<p>So a third-party service that needs to find my current Status simply identifies the relevant resource and then takes that URI and does an HTTP GET on it.</p>

<p>Then lets say I decide to move from Twitter and use some other service. Here's what happens:</p>

<pre>
<code>
&lt;foaf:Person>
  &lt;eg:statuses>
     &lt;eg:Status 
         rdf:resource="http://example.com/status/ldodds"/>
  &lt;/eg:statuses>
&lt;/foaf:Person>
</code>
</pre>

<p>See, what I did? And guess what that Status aggregator has to do: Nothing.</p>

<p>In my opinion this rightly shifts the emphasis away from the details of individual service APIs and encourages standardization on data formats. Surely this has to be the most important aspect to Data Portability? For example it will encourage sites that produce Status messages to agree on how these will be published onto the web, whether that involves explicit standardization or simple adoption of a standard like Atom.</p>

<p>As I've written before, RDF does have some nice properties for <a href="http://www.ldodds.com/blog/archives/000314.html">enabling data integration</a> and allowing for independent evolution of community specific vocabularies which are worth exploring in this context.</p>

<p>I really don't see the need for intermediary services at all to create this kind of connection beyond services that allow for maintenance of a FOAF profile. The other nice property of this form of interaction is that I don't need to use <i>any</i> services. If I decide to manage my own online presence, manage my own OpenID, and publish all my public data as a collection of hand-crafted static data files on my own server, then that's fine: its all just URIs. </p>

<p>If we want true ownership of our own data, and true portability, then the means of integration needs to support this at the most fundamental level.</p>

<p>Pompous ass mode off.<br />
</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000320.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000318.html">
<title>Twinkle 2.0</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/204066931/000318.html</link>
<description>Nearly three years ago now I announced a little GUI tool , called Twinkle for working with SPARQL queries. Since then a number of nice people have asked whether I'm going to update the tool to add various features, like support for querying persistent data, inferencing, etc. And Danny pointed out that the UI wasn't exactly twinkly; and he was right it was crap. Anyway, I've finally gotten around to releasing a new version, Twinkle 2.0, which has all the features that everyone has asked for, and I've even tried to accommodate Danny and make the user interface a bit...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2007-12-21T14:09:22+00:00</dc:date>
<content:encoded><![CDATA[<p>Nearly three years ago now I <a href="http://www.ldodds.com/blog/archives/000182.html">announced</a> a little GUI tool , called <a href="http://www.ldodds.com/projects/twinkle/">Twinkle</a> for working with SPARQL queries. Since then a number of nice people have asked whether I'm going to update the tool to add various features, like support for querying persistent data, inferencing, etc. And Danny pointed out that the UI wasn't exactly twinkly; and he was right it was crap.</p>

<p>Anyway, I've finally gotten around to releasing a new version, <a href="http://www.ldodds.com/projects/twinkle/">Twinkle 2.0</a>, which has all the features that everyone has asked for, and I've even tried to accommodate Danny and make <a href="http://flickr.com/photos/ldodds/tags/twinkle/">the user interface</a> a bit nicer. The project page lists the <a href="http://www.ldodds.com/projects/twinkle/#features">features</a> which I think are reasonably comprehensive.</p>

<p>The tool isn't going to languish for another three years as its going to become part of our toolset at work. So, I've got a growing list of further improvements I'd like to make to it, including adding SPARQL update support, I18N of the user interface, and some performance improvements. If you have a feature you'd like to see then <a href="mailto:leigh@ldodds.com">drop me a mail</a>.</p>

<p>So, happy christmas to all you nice Semantic Web folk, this is an Xmas present from me to you.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000318.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000316.html">
<title>Joe Triple: Deconstructed</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/193412876/000316.html</link>
<description> I'm sure that the majority of you realised that yesterdays posting, Joe Triple: A FOAF Tale was more than just a bit of whimsy. The story is intended as an illustration of a few semantic web principles. In this post I wanted to review those principles in a clearer way, although I can't promise to have shed all the whimsy! The first thing you need to understand is that... There Is No Spoon RDF is the Resource Description Framework, so it's all about Resources, correct? Well yes, and no, it depends on your perspective. Obviously a key concept in...</description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2007-12-01T11:05:53+00:00</dc:date>
<content:encoded><![CDATA[<p>
I'm sure that the majority of you realised that yesterdays posting, 
<a href="http://www.ldodds.com/blog/archives/000315.html">Joe Triple: A FOAF Tale</a> was more 
than just a bit of whimsy. The story is intended as an 
illustration of a few semantic web principles.
</p>
<p>
In this post I wanted to review those principles in a 
clearer way, although I can't promise to have shed all the whimsy!
</p>
<p>
The first thing you need to understand is that...
</p>
<h2>There Is No Spoon</h2>
<p>
RDF is the Resource Description Framework, so it's all about
Resources, correct?
</p>
<p>
Well yes, and no, it depends on your perspective. Obviously
a key concept in RDF is the notion of a Resource and the
primary goal of the technology is to provide the ability to
associate metadata with those resources.
</p>
<p>
But that's only one perspective. The human one. A more
valid perspective is that RDF is all about <i>properties</i>.
In the RDF model a Resource does not, <i>cannot</i>, exist
if there are no statements that refer to it. A Resource
does not exist as an entity in its own right and it comes into
being when a statement is made about it. A Resource is therefore fully
defined by its properties. And, in the closed world of
an RDF triple store, if you delete all the statements about a
Resource then it ceases to exist.
</p>
<p>
So, you see, there is no spoon.
</p>

<h2>Just the Facts Ma'am</h2>
<p>
When you make the first assertion about a Resource you create it (again,
within the closed world of a given store), and this is where the story of Joe Triple began, with the creation of a Resource:
</p>
<pre><code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1&gt; .
&lt;/joe&gt; foaf:name "Joe Triple".
</code></pre>
<p>
But, for a RDF Schema-aware processor, by making
this assertion I've actually done a bit more: I've
also assigned a type to the Resources referenced in the
statement. In effect I've made some additional assertions
without knowing it.
</p>
<p>
If we look in the FOAF schema at the definition of the
<code><a href="http://xmlns.com/foaf/spec/#term_name">foaf:name</a></code>
property, then we'll find something like this (I've translated
it into N3):</p>
<pre><code>
@prefix rdf:  &lt;http://www.w3.org/1999/02/22-rdf-syntax-ns#&gt; .
@prefix rdfs: &lt;http://www.w3.org/2000/01/rdf-schema#&gt;
@prefix owl:  &lt;http://www.w3.org/2002/07/owl#&gt; .
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1&gt; .
foaf:name a rdf:Property;
  rdfs:domain owl:Thing;
  rdfs:range rdfs:Literal.
</code></pre>
<p>
Which defines <code>foaf:name</code> as an RDF property whose
values are Literals. It also says that when this property is
used then the subject of the statement will be of type
<code>owl:Thing</code>. The subject of the statement may have
other types either explicitly stated or implied, but the subject will <i>always</i>
be of type <code>owl:Thing</code>.
</p>

<p>
So, to an RDF application that has loaded (or been coded to understand)
the FOAF schema then, after asserting that Joe has a name, I've
also said:
</p>

<pre><code>
@prefix owl:  &lt;http://www.w3.org/2002/07/owl#&gt; .
&lt;/joe&gt; a owl:Thing.
</code></pre>
<p>
The rest of the story which charts Joe's journey from being a
lonely <code>owl:Thing</code> to a fully rounded <code>foaf:Person</code>
is entirely defined by the progressive disclosure of more facts:
when we learn Joe has a <code>foaf:birthday</code> we know that
he's also a <code>foaf:Agent</code>; once we know Joe has some
<code>foaf:interest</code>s we know that Joe is a <code>foaf:Person</code>.</p>
<p>
And at the end of the story, when we learn that code has become
the <code>rel:mentorOf</code> some other resources, we know that
they too <i>must</i> be of type <code>foaf:Person</code>, even without
knowing anything else about them, because that's whats defined in the
<a href="http://vocab.org/relationship">relationship schema</a>. That's
the "world view" that the schema embodies.
</p>

<p>
There's a certain conciseness here that appeals to me; a single
assertion may yield additional implied statements. While the
examples here are trivial, its not hard to see how in a more
complex data set with more complex ontologies, these additional
"implied" assertions may actually yield useful data for an
application. This data could be used to influence the applications
behaviour in various ways. For example by prompting the user for more information; adapting its user interface to better present the data; or recognising that additional information could be gleaned through some directed crawling or searching of a particular web service.
</p>

<h2>The World View of Crowds</h2>

<p>
Publishing an RDF schema or an OWL ontology involves expressing a
particular world view to which you believe your data conforms. And
when you reuse a particular vocabulary you're subscribing to that
world view. And when we begin to connect up vocabularies, using
some of the techniques I described in <a href="http://www.ldodds.com/blog/archives/000314.html">my post on integration</a>,
we're coming to agreement onto a particular view or intepretation
of data that uses those schemas. It seems to me that this presents
further ways to benefit from "The Wisdom of Crowds", as using Semantic Web technologies we can also share interpretations of data.
</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000316.html</feedburner:origLink></item>
<item rdf:about="http://www.ldodds.com/blog/archives/000315.html">
<title>Joe Triple: A FOAF Tale</title>
<link>http://feeds.feedburner.com/~r/LostBoy/~3/192622383/000315.html</link>
<description><![CDATA[Once upon a time there was a Resource whose name was Joe Triple: @prefix foaf: &lt;http://xmlns.com/foaf/0.1> . &lt;/joe> foaf:name "Joe Triple". Joe was a lonely Thing, and resolved to set off into the world to learn more about himself. On this quest for self discovery Joe learnt that his birthday was 29th November 2007: @prefix foaf: &lt;http://xmlns.com/foaf/0.1> . &lt;/joe> foaf:name "Joe Triple"; foaf:birthday "2007-11-29". Heartened by learning this new fact about himself and feeling more and more like a free Agent, Joe was encouraged to continue his journey of self-disovery. Joe acquired an email address and started a blog in...]]></description>
<dc:subject>Semantic Web</dc:subject>
<dc:creator>ldodds</dc:creator>
<dc:date>2007-11-29T22:04:24+00:00</dc:date>
<content:encoded><![CDATA[<p>Once upon a time there was a Resource whose <a href="http://xmlns.com/foaf/spec/#term_name">name</a> was Joe Triple:</p>
<pre><code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
&lt;/joe> foaf:name "Joe Triple".
</pre></code>
<p>Joe was a lonely <a href="http://www.w3.org/TR/owl-guide/#DefiningSimpleClasses">Thing</a>, and resolved to set off into the world to learn more about himself.</p>
<p>On this quest for self discovery Joe learnt that his <a href="http://xmlns.com/foaf/spec/#term_birthday">birthday</a> was 29th November 2007:</p>
<pre><code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
&lt;/joe> foaf:name "Joe Triple";
         foaf:birthday "2007-11-29".
</pre></code>
<p>Heartened by learning this new fact about himself and feeling more and more like a free <a href="http://xmlns.com/foaf/spec/#term_Agent">Agent</a>, Joe was encouraged to continue his journey of self-disovery. Joe acquired an <a href="http://xmlns.com/foaf/spec/#term_mbox">email address</a> and started a <a href="http://xmlns.com/foaf/spec/#term_weblog">blog</a> in order to have further contact with the world:</p>
<pre><code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
&lt;/joe> foaf:name "Joe Triple";
         foaf:birthday "2007-11-29";
         foaf:weblog &lt;http://www.example.org/~joe/blog>;
         foaf:mbox &lt;joe.triple@example.org>.
</pre></code>
<p>Through his travels and researches Joe developed many new <a href="http://xmlns.com/foaf/spec/#term_interest">interests</a>:</p>
<pre><code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
&lt;/joe> foaf:name "Joe Triple";
         foaf:birthday "2007-11-29";
         foaf:weblog &lt;http://www.example.org/~joe/blog>;
         foaf:mbox &lt;joe.triple@example.org>;
         foaf:interest &lt;http://www.w3.org/RDF/>;
         foaf:interest &lt;http://en.wikipedia.org/wiki/Travel>.
</pre></code>
<p>These interests helped Joe to define himself as a <a href="http://xmlns.com/foaf/spec/#term_Person">Person</a>. In addition Joe developed a network of many friends, for many of whom he became a <a href="http://vocab.org/relationship/#term-mentorOf">mentor</a>:</p>
<pre><code>
@prefix foaf:  &lt;http://xmlns.com/foaf/0.1> .
@prefix rel: &lt;http://purl.org/vocab/relationship/>
&lt;/joe> foaf:name "Joe Triple";
         foaf:birthday "2007-11-29";
         foaf:weblog &lt;http://www.example.org/~joe/blog>;
         foaf:mbox &lt;joe.triple@example.org>;
         foaf:interest &lt;http://www.w3.org/RDF/>;
         foaf:interest &lt;http://en.wikipedia.org/wiki/Travel>;
         rel:mentorOf &lt;/rod>;
         rel:mentorOf &lt;/jane>;
         rel:mentorOf &lt;/fred>.
</pre></code>
<p>All of these people were enriched by their encounters with Joe, who helped each of them define themselves as <a href="http://xmlns.com/foaf/spec/#term_Person">people</a>.</p>
<p>The End.</p>]]></content:encoded>
<feedburner:origLink>http://www.ldodds.com/blog/archives/000315.html</feedburner:origLink></item>


<cc:License xmlns:cc="http://web.resource.org/cc/" rdf:about="http://creativecommons.org/licenses/by-nc-sa/2.0/"><cc:permits rdf:resource="http://web.resource.org/cc/Reproduction" /><cc:permits rdf:resource="http://web.resource.org/cc/Distribution" /><cc:permits rdf:resource="http://web.resource.org/cc/DerivativeWorks" /><cc:requires rdf:resource="http://web.resource.org/cc/Notice" /><cc:requires rdf:resource="http://web.resource.org/cc/Attribution" /><cc:requires rdf:resource="http://web.resource.org/cc/ShareAlike" /><cc:prohibits rdf:resource="http://web.resource.org/cc/CommercialUse" /></cc:License></rdf:RDF>
