<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/atom10full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" gd:etag="W/&quot;A0UEQnYyfCp7ImA9WhRaEUk.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812</id><updated>2012-02-13T11:06:43.894-05:00</updated><category term="sandbox" /><category term="data integration" /><category term="Talend" /><category term="bloggers" /><category term="teradata" /><category term="tools" /><category term="Air2Web" /><category term="mergers" /><category term="networking event" /><category term="MIT IQ Conference" /><category term="ofac" /><category term="dedupe records" /><category term="books" /><category term="information engineer" /><category term="deterministic" /><category term="open source" /><category term="conference" /><category term="Oracle" /><category term="green technology" /><category term="address verification" /><category term="mission statement" /><category term="mainframe" /><category term="big data" /><category term="data migration" /><category term="information quality" /><category term="data modeling" /><category term="direct mail" /><category term="Identex" /><category term="SAP" /><category term="postal validation" /><category term="consulting services" /><category term="trillium software" /><category term="financial services" /><category term="Business Objects" /><category term="supply chain" /><category term="data steward" /><category term="business strategy" /><category term="NetWeaver" /><category term="call center" /><category term="national ID database" /><category term="HP" /><category term="change management" /><category term="master data management" /><category term="UMB" /><category term="data quality" /><category term="CRM" /><category term="Informatica" /><category term="data governance team" /><category term="global information quality" /><category term="roundup" /><category term="politics" /><category term="economy" /><category term="donation" /><category term="Rob Karel" /><category term="sdn" /><category term="banks" /><category term="mdm" /><category term="webinars" /><category term="CSR" /><category term="data profiling" /><category term="butterfly effect" /><category term="agile data management" /><category term="Forrester Research" /><category term="Internation MIT IQ Conference" /><category term="data governance" /><category term="carnival" /><category term="IDQ Conference" /><category term="matching algorithms" /><category term="reference data" /><category term="probabilistic" /><category term="social media" /><category term="SAP CRM" /><category term="unstructured data" /><category term="erp" /><category term="Fuzzy Informatik" /><category term="MGM Grand Resorts" /><title>Data Governance and Data Quality Insider</title><subtitle type="html">Covering the world of data integration, data governance, and data quality.</subtitle><link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/posts/default" /><link rel="alternate" type="text/html" href="http://data-governance.blogspot.com/" /><link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default?start-index=26&amp;max-results=25&amp;redirect=false&amp;v=2" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><generator version="7.00" uri="http://www.blogger.com">Blogger</generator><openSearch:totalResults>118</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/atom+xml" href="http://feeds.feedburner.com/blogspot/fCls" /><feedburner:info uri="blogspot/fcls" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><geo:lat>42.54805</geo:lat><geo:long>-71.250547</geo:long><feedburner:emailServiceId>blogspot/fCls</feedburner:emailServiceId><feedburner:feedburnerHostname>http://feedburner.google.com</feedburner:feedburnerHostname><feedburner:feedFlare href="http://add.my.yahoo.com/rss?url=http%3A%2F%2Ffeeds.feedburner.com%2Fblogspot%2FfCls" src="http://us.i1.yimg.com/us.yimg.com/i/us/my/addtomyyahoo4.gif">Subscribe with My Yahoo!</feedburner:feedFlare><feedburner:feedFlare href="http://www.newsgator.com/ngs/subscriber/subext.aspx?url=http%3A%2F%2Ffeeds.feedburner.com%2Fblogspot%2FfCls" src="http://www.newsgator.com/images/ngsub1.gif">Subscribe with NewsGator</feedburner:feedFlare><feedburner:feedFlare href="http://feeds.my.aol.com/add.jsp?url=http%3A%2F%2Ffeeds.feedburner.com%2Fblogspot%2FfCls" src="http://o.aolcdn.com/favorites.my.aol.com/webmaster/ffclient/webroot/locale/en-US/images/myAOLButtonSmall.gif">Subscribe with My AOL</feedburner:feedFlare><feedburner:feedFlare href="http://www.bloglines.com/sub/http://feeds.feedburner.com/blogspot/fCls" src="http://www.bloglines.com/images/sub_modern11.gif">Subscribe with Bloglines</feedburner:feedFlare><feedburner:feedFlare href="http://www.netvibes.com/subscribe.php?url=http%3A%2F%2Ffeeds.feedburner.com%2Fblogspot%2FfCls" src="http://www.netvibes.com/img/add2netvibes.gif">Subscribe with Netvibes</feedburner:feedFlare><feedburner:feedFlare href="http://fusion.google.com/add?feedurl=http%3A%2F%2Ffeeds.feedburner.com%2Fblogspot%2FfCls" src="http://buttons.googlesyndication.com/fusion/add.gif">Subscribe with Google</feedburner:feedFlare><feedburner:feedFlare href="http://www.pageflakes.com/subscribe.aspx?url=http%3A%2F%2Ffeeds.feedburner.com%2Fblogspot%2FfCls" src="http://www.pageflakes.com/ImageFile.ashx?instanceId=Static_4&amp;fileName=ATP_blu_91x17.gif">Subscribe with Pageflakes</feedburner:feedFlare><entry gd:etag="W/&quot;A0UEQnc7fSp7ImA9WhRaEUk.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-8935775883342167125</id><published>2012-01-24T10:14:00.001-05:00</published><updated>2012-02-13T11:06:43.905-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-02-13T11:06:43.905-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="big data" /><title>Big Data, Enterprise Data and Discrete Data</title><content type="html">&lt;b&gt;Total Data Management&lt;/b&gt;©&lt;br /&gt;
The data management world is buzzing about big data. &amp;nbsp;Many are the number of blog posts articles and white papers covering this new area. Just about every data management vendor is scrambling to build tools to meet the needs of big data.&lt;br /&gt;
&lt;br /&gt;
The world is correct to pay notice. The ability for companies to handle big data represents exciting innovation where large relational databases with high price tags are sometimes replaced with flat files, technologies like Hadoop and intelligent parsers to create analytics from massive amounts of data. &amp;nbsp;It’s a game-changer for those in the Business Intelligence and relational database business. &amp;nbsp;It’s about managing an increasingly common huge data problem more effectively and at lower cost.&lt;br /&gt;
&lt;br /&gt;
However, where there is big data, there is also enterprise (medium) data and discrete (small) data. With each size of data come very specific challenges. &amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;table align="left" border="1" cellpadding="0" cellspacing="0" class="MsoTableGrid" style="border-collapse: collapse; border: none; margin-left: 6.75pt; margin-right: 6.75pt; mso-border-alt: solid windowtext .5pt; mso-padding-alt: 0in 5.4pt 0in 5.4pt; mso-table-anchor-horizontal: margin; mso-table-anchor-vertical: paragraph; mso-table-left: left; mso-table-lspace: 9.0pt; mso-table-rspace: 9.0pt; mso-table-top: 131.65pt; mso-yfti-tbllook: 1184;"&gt;&lt;tbody&gt;
&lt;tr style="height: 14.25pt; mso-yfti-firstrow: yes; mso-yfti-irow: 0; page-break-inside: avoid;"&gt;   &lt;td style="border: solid windowtext 1.0pt; height: 8.25pt; mso-border-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-left: none; border: solid windowtext 1.0pt; height: 8.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;b&gt;&lt;span style="font-size: x-small;"&gt;BIG DATA&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-left: none; border: solid windowtext 1.0pt; height: 8.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;b&gt;&lt;span style="font-size: x-small;"&gt;ENTERPRISE DATA&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-left: none; border: solid windowtext 1.0pt; height: 8.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;DISCRETE DATA&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr style="height: 98.95pt; mso-yfti-irow: 1; page-break-inside: avoid;"&gt;   &lt;td style="border-top: none; border: solid windowtext 1.0pt; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Technologies&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Hadoop and flat   files to reduce costs and avoid relational database costs.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Relational databases&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Spreadsheets and   flat files and flat databases. May come from other non-relational sources,   such as e-mail attachments, social media JSON, and XML data.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr style="height: 98.95pt; mso-yfti-irow: 2; page-break-inside: avoid;"&gt;   &lt;td style="border-top: none; border: solid windowtext 1.0pt; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Use Cases&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Real-time analytics   of a large number of transactions, including web analytics, SaaS up-time   optimization, mission-critical analysis of transactions&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Just about every   business application today, including CRM, ERP, Data Warehouse, and MDM.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 98.95pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Companies with no or   little data management strategy, or for those companies dealing with immature   data architecture. Companies who receive mission-critical data via   e-mail.&amp;nbsp; Companies who need to closely   follow social media streams.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr style="height: 56.2pt; mso-yfti-irow: 3; page-break-inside: avoid;"&gt;   &lt;td style="border-top: none; border: solid windowtext 1.0pt; height: 56.2pt; mso-border-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Innovation&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 56.2pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Handles huge amounts   of data that is predominantly used for business analytics and operational BI.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 56.2pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Provides a power   data management architecture that can be accessed by a common language (SQL).&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 56.2pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Handles more diverse   and more dynamic sources.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr style="height: 14.25pt; mso-yfti-irow: 4; page-break-inside: avoid;"&gt;   &lt;td style="border-top: none; border: solid windowtext 1.0pt; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Positives&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Replaces high cost   multi-server relational databases with lower costs flat files and Hadoop   server farms.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Provides a scalable,   reproducible environment in which database applications and solutions can be   developed. Replaces unwieldy human-intensive data processes with streamlined   central repository of information. Used in many businesses in day-to-day   operations.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;‘Simplifies’ the data   management process to the point of being completely within the grasp of the   business users without too much complicated technology.&amp;nbsp; In the long run, however, data management   is more costly and unwieldy when it is in spreadmarts.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr style="height: 14.25pt; mso-yfti-irow: 5; page-break-inside: avoid;"&gt;   &lt;td style="border-top: none; border: solid windowtext 1.0pt; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Negatives&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Relatively new   technology with limited pool of Big Data experts. Legacy medium-sized systems   can sometimes scale.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Can be costly when   data volumes become high, as new servers and new enterprise licenses get more   common.&amp;nbsp; Also, the number of sources   and diversity of data types.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Error-prone and   labor intensive. &lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr style="height: 14.25pt; mso-yfti-irow: 6; mso-yfti-lastrow: yes; page-break-inside: avoid;"&gt;   &lt;td style="border-top: none; border: solid windowtext 1.0pt; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 72.9pt;" valign="top" width="97"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;&lt;b&gt;Cost Focus&lt;/b&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 139.5pt;" valign="top" width="186"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Expertise&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.9in;" valign="top" width="182"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Servers and   licenses/ Connectors and database technology&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-bottom: solid windowtext 1.0pt; border-left: none; border-right: solid windowtext 1.0pt; border-top: none; height: 14.25pt; mso-border-alt: solid windowtext .5pt; mso-border-left-alt: solid windowtext .5pt; mso-border-top-alt: solid windowtext .5pt; padding: 0in 5.4pt 0in 5.4pt; width: 1.8in;" valign="top" width="173"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: x-small;"&gt;Efficiency and   productivity &lt;/span&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;br /&gt;
&lt;b&gt;Growing Up&lt;/b&gt;&lt;br /&gt;
An organization’s data management maturity plays a role in big and little data. &amp;nbsp;If you’re still managing your customer list in a spreadsheet, it’s probably something you started when your company was fairly young. &amp;nbsp;Now, the uses for the data should be expanded and you are still stuck in the young company’s process. Something that was agile when you were young is inefficient today.&lt;br /&gt;
&lt;br /&gt;
Your pain may also have something to do with your partners’ data management maturity. &amp;nbsp;While the other companies you do business with are good at what they do, supplying products and services to your company, they may not be as good at data management. The new parts catalog comes every so often as an e-mail attachment. &amp;nbsp;You need an efficient process to update whoever uses it.&lt;br /&gt;
&lt;br /&gt;
No matter how mature you are, it is likely that you will have to deal with all types of data. When selecting tools, make sure you examine the cost and efficiency of all of these types, not just big data.&lt;br /&gt;
&lt;br /&gt;
&lt;div&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-8935775883342167125?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ZJGk5GFTAng:L_Gsi_cpVTU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ZJGk5GFTAng:L_Gsi_cpVTU:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ZJGk5GFTAng:L_Gsi_cpVTU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=ZJGk5GFTAng:L_Gsi_cpVTU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ZJGk5GFTAng:L_Gsi_cpVTU:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/8935775883342167125/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=8935775883342167125" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8935775883342167125?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8935775883342167125?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/ZJGk5GFTAng/big-data-enterprise-data-and-discrete.html" title="Big Data, Enterprise Data and Discrete Data" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2012/01/big-data-enterprise-data-and-discrete.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkcAQXYzfCp7ImA9WhRVEk8.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-9151267940838283608</id><published>2012-01-10T15:40:00.000-05:00</published><updated>2012-01-10T15:40:40.884-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2012-01-10T15:40:40.884-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Talend" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance team" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>What is Data Governance?</title><content type="html">I recently did a quick movie for a &lt;a href="http://www.talend.com/"&gt;Talend &lt;/a&gt;promotion to define data governance. It turns out that defining data governance is trickier than you think. Here, I examine the characteristics of data management initiative and how they define data governance.&lt;br /&gt;
&lt;br /&gt;
&lt;object data="http://www.brainshark.com/brainshark/viewer/getplayer.ashx" height="366" id="bsplayer59581" name="bsplayer59581" type="application/x-shockwave-flash" width="440"&gt;&lt;param name="movie" value="http://www.brainshark.com/brainshark/viewer/getplayer.ashx" /&gt;&lt;param name="allowFullScreen" value="true" /&gt;&lt;param name="allowscriptaccess" value="always" /&gt;&lt;param name="flashvars" value="pi=789047103&amp;dm=5&amp;pause=1&amp;eurl=zHBzuyGQPz3T2ez0" /&gt;&lt;a href="http://www.brainshark.com/brainshark/viewer/fallback.ashx?pi=789047103"&gt;&lt;video width="440" height="366" controls="true" poster="http://www.brainshark.com/brainshark/brainshark.net/common/getimage.ashx?pi=789047103&amp;w=440&amp;h=366&amp;sln=1"&gt;&lt;source src="http://www.brainshark.com/brainshark/brainshark.net/apppresentation/getmovie.aspx?pi=789047103&amp;fmt=2" /&gt;&lt;img src="http://www.brainshark.com/brainshark/brainshark.net/apppresentation/splash.aspx?pi=789047103" width="440" height="366" border="0" /&gt;&lt;/video&gt;&lt;/a&gt;&lt;/object&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-9151267940838283608?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FqcJ7_O0XFg:ps4XEBrIXis:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FqcJ7_O0XFg:ps4XEBrIXis:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FqcJ7_O0XFg:ps4XEBrIXis:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=FqcJ7_O0XFg:ps4XEBrIXis:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FqcJ7_O0XFg:ps4XEBrIXis:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/9151267940838283608/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=9151267940838283608" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/9151267940838283608?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/9151267940838283608?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/FqcJ7_O0XFg/what-is-data-governance.html" title="What is Data Governance?" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2012/01/what-is-data-governance.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEYBSX46fip7ImA9WhRSEUw.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-1691620496933699082</id><published>2011-11-12T10:35:00.000-05:00</published><updated>2011-11-12T10:35:58.016-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-11-12T10:35:58.016-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="mainframe" /><category scheme="http://www.blogger.com/atom/ns#" term="agile data management" /><category scheme="http://www.blogger.com/atom/ns#" term="information quality" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>The ‘Time’ Factor in Data Management</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-qUNlpJHEI8I/Tr6QP_cZ1jI/AAAAAAAAAS0/5EGwndLD0GM/s1600/TIme.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="132" src="http://4.bp.blogspot.com/-qUNlpJHEI8I/Tr6QP_cZ1jI/AAAAAAAAAS0/5EGwndLD0GM/s200/TIme.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;I've been thinking about how many ways time influences the data management world. When it comes to managing data, we think about improving processes, coercing the needs and desires of people and how technology comes to help us manage it all. However, an often overlooked aspect of data management is time. Time impacts data management from many different directions.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Time Means Technology Will Improve&lt;/b&gt;&lt;br /&gt;
As time marches on, technology offers twists and turns to the data steward through innovation.&amp;nbsp; 20 years ago, mainframes ruled the world.&amp;nbsp; We’ve migrated through relational databases on powerful servers to a place where we see our immediate future in cloud and big data. As technology shifts, you must consider the impact of data.&lt;br /&gt;
&lt;br /&gt;
The good news is that with these huge challenges, you also get access to new tools.&amp;nbsp; In general, tools have become less arcane and more business-user focused as time marches on.&amp;nbsp; &lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Time Causes People to Change&lt;/b&gt;&lt;br /&gt;
Like changes in technology, people also mature, change careers, retire. With regard to data management, the corporation must think about the expertise needed to complete the data mission. Data management must pass the “hit by a bus” test where the company would not suffer if one or more key people were to be hit by a Greyhound traveling from Newark to Richmond.&lt;br /&gt;
&lt;br /&gt;
Here, time is requiring us to be more diligent in documenting our processes.&amp;nbsp; It is requiring us to avoid undocumented hand-coding and pick a reproducible data management platform.&amp;nbsp; It helps to have third-party continuity, like consultants who, although will also experience changes in personnel, will change on a different schedule than their clients.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Time Leads to Clarity in the Imperative of Data Management&lt;/b&gt;&lt;br /&gt;
With regard to data management, corporations have a maturity process they go through. They often start as chaotic immature organizations and realize the power of data management in a tactical maturity stage. Finally, they realize data management is a strategic initiative when they begin to govern the data.&amp;nbsp; Throughout it all, people, process and technologies change.&lt;br /&gt;
&lt;br /&gt;
Knowing where you are in this maturity cycle can help you plan where you want to go from here and what tactics you need to put in place to get there. For example, very few companies go from chaotic, ad hoc data management to full-blown MDM. For the most part, they get there through making little changes, seeing the positive impact of the little changes and wanting more. Rather, a chaotic organization might be more apt to evolve their data management maturity by consolidating two or more ERP systems and revel in its efficiency.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Time Prevents Us from Achieving Successful Projects&lt;/b&gt;&lt;br /&gt;
When it comes to specific projects, taking too much time can lead to failure in projects.&amp;nbsp; In the not so distant past, circa 2007, the industry commonly took on massive, multi-year, multimillion dollar MDM projects. We now know that these projects are not the best way to manage data. Why? Think about how much your own company has changed in the last two years.&amp;nbsp; If it is a dynamic, growing company, it likely has different goals, different markets, different partners and new leadership. The world has changed significantly, too.&amp;nbsp; Today’s worldwide economy is so much different that even one year ago. (Have you heard about the recession and European debt crisis?) The goals of a project that you set up two years ago will never achieve success today. &lt;br /&gt;
&lt;br /&gt;
Time makes us take an agile approach to data management. It requires that we pick off small portions of our problems, solve them, prove value and re-use what we’ve learned on the next agile project.&amp;nbsp; Limit and hold scope to achieve success.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Time Achieves Corporate Growth&lt;/b&gt; (which is counter to data management)&lt;br /&gt;
Companies who are just starting out generally have fewer data management problems than those who are mature. Time pushes our data complexity deeper and deeper. Therefore time dictates that even small companies should have some sort of data management strategy.&amp;nbsp; The good news is that now achievable with help from open source and lower cost data management solutions. Proper data management tools are affordable by both Fortune 1000 and small to medium-sized enterprises.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Time Holds Us Responsible&lt;/b&gt;&lt;br /&gt;
That said, the longer a corporation is in business, the longer it can be held responsible for lower revenue, decreased efficiency and lack of compliance due to poor data management. The company decides how it is going to govern (or not govern) data, what data is acceptable in the CRM and who is responsible for the mistakes that happen due to poor data management. The longer you are in business, the more responsible the corporation is for its governance. Time holds us responsible if the problems aren’t solved.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Time and Success Lead to Apathy &lt;/b&gt;&lt;br /&gt;
Finally, time often brings us success in data management.&amp;nbsp; With success, there is a propensity for corporations to take the eye off the prize and spend monies on more pressing issues.&amp;nbsp; Time and success can lead to a certain apathy, believing that the data management problem is solved.&amp;nbsp; But, as time marches on, new partners, new data sources, new business processes. Time requires us to be ever vigilant in our efforts to manage data.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-1691620496933699082?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ggrU840bmgk:iwTE6aZSUdk:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ggrU840bmgk:iwTE6aZSUdk:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ggrU840bmgk:iwTE6aZSUdk:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=ggrU840bmgk:iwTE6aZSUdk:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=ggrU840bmgk:iwTE6aZSUdk:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/1691620496933699082/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=1691620496933699082" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/1691620496933699082?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/1691620496933699082?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/ggrU840bmgk/time-factor-in-data-management.html" title="The ‘Time’ Factor in Data Management" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-qUNlpJHEI8I/Tr6QP_cZ1jI/AAAAAAAAAS0/5EGwndLD0GM/s72-c/TIme.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/11/time-factor-in-data-management.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0AAQX88fSp7ImA9WhdXF0U.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-5270431920144806969</id><published>2011-08-31T05:09:00.010-04:00</published><updated>2011-08-31T05:09:00.175-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-31T05:09:00.175-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="agile data management" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance team" /><category scheme="http://www.blogger.com/atom/ns#" term="politics" /><title>Top Ten Root Causes of Data Quality Problems: Part Five</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-J4emDUEeVWY/TlqrNi7LE9I/AAAAAAAAASg/ch6KBJUJr4U/s1600/Checklistblu.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="150" src="http://3.bp.blogspot.com/-J4emDUEeVWY/TlqrNi7LE9I/AAAAAAAAASg/ch6KBJUJr4U/s200/Checklistblu.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Part 5 of 5: People Issues&lt;/b&gt;&lt;br /&gt;
In this continuing series, we're looking at root causes of data quality problems and the business processes you can put in place to solve them.&amp;nbsp; Companies rely on data to make significant decisions that can affect customer service, regulatory compliance, supply chain and many other areas. As you collect more and more information about customers, products, suppliers, transactions and billing, you must attack the root causes of data quality.&amp;nbsp; &lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Root Cause Number Nine: Defining Data Quality&lt;/b&gt;&lt;br /&gt;
More and more companies recognize the need for data quality, but there are different ways to&amp;nbsp;&amp;nbsp; clean data and improve data quality.&amp;nbsp;&amp;nbsp; You can:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Write some code and cleanse manually&lt;/li&gt;
&lt;li&gt;Handle data quality within the source application&lt;/li&gt;
&lt;li&gt;Buy tools to cleanse data&lt;/li&gt;
&lt;/ul&gt;However, consider what happens when you have two or more of these types of data quality processes adjusting and massaging the data. Sales has one definition of customer, while billing has another.&amp;nbsp; Due to differing processes, they don’t agree on whether two records are a duplicate.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt;&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Standardize Tools – Whenever possible, choose tools that aren’t tied to a particular solution. Having data quality only in SAP, for example, won’t help your Oracle, Salesforce and MySQL data sets.&amp;nbsp; When picking a solution, select one that is capable of accessing any data, anywhere, at any time.&amp;nbsp; It shouldn't cost you a bundle to leverage a common solution across multiple platforms and solutions.&lt;/li&gt;
&lt;li&gt;Data Governance – By setting up a cross-functional data governance team, you will have the people in place to define a common data model.&lt;/li&gt;
&lt;/ul&gt;&lt;b&gt;&lt;br /&gt;
Root Cause Number Ten: Loss of Expertise&lt;/b&gt;&lt;br /&gt;
On almost every data intensive project, there is one person whose legacy data expertise is outstanding. These are the folks who understand why some employee date of hire information is stored in the date of birth field and why some of the name attributes also contain tax ID numbers.&amp;nbsp; &lt;br /&gt;
Data might be a kind of historical record for an organization. It might have come from legacy systems. In some cases, the same value in the same field will mean a totally different thing in different records. Knowledge of these anomalies allows experts to use the data properly. &lt;br /&gt;
If you encounter this situation, there are some business processes you can follow.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt;&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Profile and Monitor – Profiling the data will help you identify most of these types of issues.&amp;nbsp; For example, if you have a tax ID number embedded in the name field, analysis will let you quickly spot it. Monitoring will prevent a recurrence.&lt;/li&gt;
&lt;li&gt;Document – Although they may be reluctant to do so for fear of losing job security, make sure experts document all of the anomalies and transformations that need to happen every time the data is moved.&lt;/li&gt;
&lt;li&gt;Use Consultants – Expert employees may be so valuable and busy that there is no time to document the legacy anomalies. Outside consulting firms are usually very good at documenting issues and providing continuity between legacy and new employees.&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
This post is an excerpt from a white paper available &lt;a href="http://info.talend.com/DQ_10_Root_Causes.html?src=datagovblog"&gt;here&lt;/a&gt;. More to come on this subject in the days ahead.&lt;br /&gt;
&lt;br /&gt;
See also:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Part One: &lt;a href="http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality.html"&gt;The Basics&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Part Two: &lt;a href="http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality_25.html"&gt;Renegades and Pirates&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Part Three: &lt;a href="http://data-governance.blogspot.com/"&gt;Secret Code and Corporate Evolution&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;Part Four: &lt;a href="http://data-governance.blogspot.com/"&gt;Data Flow&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;br /&gt;
&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-5270431920144806969?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=Fzz3xPe0jv0:7Kj1MtgeFEM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=Fzz3xPe0jv0:7Kj1MtgeFEM:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=Fzz3xPe0jv0:7Kj1MtgeFEM:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=Fzz3xPe0jv0:7Kj1MtgeFEM:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=Fzz3xPe0jv0:7Kj1MtgeFEM:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/5270431920144806969/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=5270431920144806969" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5270431920144806969?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5270431920144806969?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/Fzz3xPe0jv0/top-ten-root-causes-of-data-quality_31.html" title="Top Ten Root Causes of Data Quality Problems: Part Five" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-J4emDUEeVWY/TlqrNi7LE9I/AAAAAAAAASg/ch6KBJUJr4U/s72-c/Checklistblu.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality_31.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0QAQXc6cSp7ImA9WhdXFkQ.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-5471338305820538222</id><published>2011-08-30T05:09:00.001-04:00</published><updated>2011-08-30T05:09:00.919-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-30T05:09:00.919-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="agile data management" /><category scheme="http://www.blogger.com/atom/ns#" term="information quality" /><category scheme="http://www.blogger.com/atom/ns#" term="business strategy" /><category scheme="http://www.blogger.com/atom/ns#" term="master data management" /><category scheme="http://www.blogger.com/atom/ns#" term="data profiling" /><title>Top Ten Root Causes of Data Quality Problems: Part Four</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-sEHLApnc-kM/Tlql2ee9EcI/AAAAAAAAASc/KHuMu_6QwG0/s1600/Checklistgrn.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="150" src="http://1.bp.blogspot.com/-sEHLApnc-kM/Tlql2ee9EcI/AAAAAAAAASc/KHuMu_6QwG0/s200/Checklistgrn.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Part 4 of 5: Data Flow&lt;/b&gt;&lt;br /&gt;
In this continuing series, we're looking at root causes of data quality problems and the business processes you can put in place to solve them.&amp;nbsp; In part four, we examine some of the areas involving the pervasive nature of data and how it flows to and fro within an organization.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Root Cause Number Seven: Transaction Transition&lt;/b&gt;&lt;br /&gt;
More and more data is exchanged between systems through real-time (or near real-time) interfaces. As soon as the data enters one database, it triggers procedures necessary to send transactions to other downstream databases. The advantage is immediate propagation of data to all relevant databases.&lt;br /&gt;
&lt;br /&gt;
However, what happens when transactions go awry? A malfunctioning system could cause problems with downstream business applications.&amp;nbsp; In fact, even a small data model change could cause issues.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt;&lt;ul&gt;&lt;li&gt;Schema Checks – Employ schema checks in your job streams to make sure your real-time applications are producing consistent data.&amp;nbsp; Schema checks will do basic testing to make sure your data is complete and formatted correctly before loading.&lt;/li&gt;
&lt;li&gt;Real-time Data Monitoring – One level beyond schema checks is to proactively monitor data with profiling and data monitoring tools.&amp;nbsp; Tools like the &lt;a href="http://www.talend.com/products-data-quality/talend-data-quality.php%20"&gt;Talend Data Quality Portal&lt;/a&gt; and others will ensure the data contains the right kind of information.&amp;nbsp; For example, if your part numbers are always a certain shape and length, and contain a finite set of values, any variation on that attribute can be monitored. When variations occur, the monitoring software can notify you.&lt;/li&gt;
&lt;/ul&gt;&lt;b&gt;&lt;br /&gt;
Root Cause Number Eight: Metadata Metamorphosis&lt;/b&gt;&lt;br /&gt;
Metadata repository should be able to be shared by multiple projects, with audit trail maintained on usage and access.&amp;nbsp; For example, your company might have part numbers and descriptions that are universal to CRM, billing, ERP systems, and so on.&amp;nbsp; When a part number becomes obsolete in the ERP system, the CRM system should know. Metadata changes and needs to be shared.&lt;br /&gt;
&lt;br /&gt;
In theory, documenting the complete picture of what is going on in the database and how various processes are interrelated would allow you to completely mitigate the problem. Sharing the descriptions and part numbers among all applicable applications needs to happen. To get started, you could then analyze the data quality implications of any changes in code, processes, data structure, or data collection procedures and thus eliminate unexpected data errors. In practice, this is a huge task.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Root Cause Attack Plan &lt;/b&gt;&lt;ul&gt;&lt;li&gt;Predefined Data Models – Many industries now have basic definitions of what should be in any given set of data.&amp;nbsp; For example, the automotive industry follows certain ISO 8000 standards.&amp;nbsp; The energy industry follows Petroleum Industry Data Exchange standards or PIDX.&amp;nbsp; Look for a data model in your industry to help.&lt;/li&gt;
&lt;li&gt;Agile Data Management – Data governance is achieved by starting small and building out a process that first fixes the most important problems from a business perspective. You can leverage agile solutions to share metadata and set up optional processes across the enterprise.&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
This post is an excerpt from a white paper available &lt;a href="http://info.talend.com/DQ_10_Root_Causes.html?src=datagovblog"&gt;here&lt;/a&gt;. My final post on this subject in the days ahead.&lt;br /&gt;
&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-5471338305820538222?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=4wyqdTfecLs:ynPKOIM2qr0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=4wyqdTfecLs:ynPKOIM2qr0:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=4wyqdTfecLs:ynPKOIM2qr0:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=4wyqdTfecLs:ynPKOIM2qr0:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=4wyqdTfecLs:ynPKOIM2qr0:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/5471338305820538222/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=5471338305820538222" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5471338305820538222?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5471338305820538222?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/4wyqdTfecLs/top-ten-root-causes-of-data-quality_30.html" title="Top Ten Root Causes of Data Quality Problems: Part Four" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-sEHLApnc-kM/Tlql2ee9EcI/AAAAAAAAASc/KHuMu_6QwG0/s72-c/Checklistgrn.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality_30.html</feedburner:origLink></entry><entry gd:etag="W/&quot;Ak4AQX06cCp7ImA9WhdXFk0.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-7886958658123987283</id><published>2011-08-29T05:09:00.002-04:00</published><updated>2011-08-29T05:09:00.318-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-29T05:09:00.318-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="reference data" /><category scheme="http://www.blogger.com/atom/ns#" term="business strategy" /><category scheme="http://www.blogger.com/atom/ns#" term="politics" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>Top Ten Root Causes of Data Quality Problems: Part Three</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-M9f3t2M09KI/Tlqgrc1WM0I/AAAAAAAAASY/FHGkRI3CTLc/s1600/Checklistblu.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="150" src="http://4.bp.blogspot.com/-M9f3t2M09KI/Tlqgrc1WM0I/AAAAAAAAASY/FHGkRI3CTLc/s200/Checklistblu.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Part 3 of 5: Secret Code and Corporate Evolution&lt;/b&gt;&lt;br /&gt;
In this continuing series, we're looking at root causes of data quality problems and the business processes you can put in place to solve them.&amp;nbsp; In part three, we examine secret code and corporate evolution as two of the root causes for data quality problems.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
&lt;/b&gt;&lt;b&gt;Root Cause Number Five: &lt;/b&gt;&lt;b&gt;Corporate Evolution&lt;/b&gt;&lt;br /&gt;
&lt;i&gt;Change is good… except for data quality&lt;/i&gt;&lt;br /&gt;
An organizations undergoes business process change to improve itself. Good, right?&amp;nbsp; Prime examples include:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Company expansion into new markets&lt;/li&gt;
&lt;li&gt;New partnership deals&lt;/li&gt;
&lt;li&gt;New regulatory reporting laws&lt;/li&gt;
&lt;li&gt;Financial reporting to a parent company&lt;/li&gt;
&lt;li&gt;Downsizing &lt;/li&gt;
&lt;/ul&gt;If data quality is defined as “fitness for purpose,” what happens when the purpose changes? It’s these new data uses that bring about changes in perceived level of data quality even though underlying data is the same. It’s natural for data to change.&amp;nbsp; As it does, the data quality rules, business rules and data integration layers must also change.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt; &lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Data Governance – By setting up a cross-functional data governance team, you will always have a team who will be looking at the changes your company is undergoing and considering its impact on information. In fact, this should be in the charter of a data governance team.&lt;/li&gt;
&lt;li&gt;Communication – Regular communication and a well-documented metadata model will make the process of change much easier.&lt;/li&gt;
&lt;li&gt;Tool Flexibility – One of the challenges of buying data quality tools embedded within enterprise applications is that they may not work in ALL all enterprise applications. When you choose tools, make sure they are flexible enough to work with data from any application and that the company is committed to flexibility and openness.&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;b&gt;Root Cause Number Six: &lt;/b&gt;&lt;b&gt;Secret Code&lt;/b&gt;&lt;br /&gt;
Databases rarely start begin their life empty. The starting point is typically a data conversion from some previously existing data source. The problem is that while the data may work perfectly well in the source application, it may fail in the target. It’s difficult to see all the custom code and special processes that happen beneath the data unless you profile. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt;&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Profile Early and Often – Don’t assume your data is fit for purpose because it works in the source application. Profiling will give you an exact evaluation of the shape and syntax of the data in the source.&amp;nbsp; It also will let you know how much work you need to do to make it work in the target.&lt;/li&gt;
&lt;li&gt;Corporate Standards - Data governance will help you define corporate standards for data quality. &lt;/li&gt;
&lt;li&gt;Apply Reusable Data Quality Tools When Possible – Rather than custom code in the application, a better strategy is to let data quality tools apply standards.&amp;nbsp; Data quality tools will apply corporate standards in a uniform way, leading to more accurate sharing of data.&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
This post is an excerpt from a white paper available &lt;a href="http://info.talend.com/DQ_10_Root_Causes.html?src=datagovblog"&gt;here&lt;/a&gt;. The final posts on this subject will come in the days ahead.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-7886958658123987283?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9JuCv4TeAec:7QXO0dRPimU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9JuCv4TeAec:7QXO0dRPimU:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9JuCv4TeAec:7QXO0dRPimU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=9JuCv4TeAec:7QXO0dRPimU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9JuCv4TeAec:7QXO0dRPimU:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/7886958658123987283/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=7886958658123987283" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/7886958658123987283?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/7886958658123987283?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/9JuCv4TeAec/top-ten-root-causes-of-data-quality_29.html" title="Top Ten Root Causes of Data Quality Problems: Part Three" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/-M9f3t2M09KI/Tlqgrc1WM0I/AAAAAAAAASY/FHGkRI3CTLc/s72-c/Checklistblu.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality_29.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUQAQX07cCp7ImA9WhdXEks.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-3130186554232391828</id><published>2011-08-25T05:09:00.010-04:00</published><updated>2011-08-25T05:09:00.308-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-25T05:09:00.308-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="sandbox" /><category scheme="http://www.blogger.com/atom/ns#" term="agile data management" /><category scheme="http://www.blogger.com/atom/ns#" term="mergers" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>Top Ten Root Causes of Data Quality Problems: Part Two</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-dlCgbGBWsh8/TlTGGc_HfQI/AAAAAAAAASU/SkicLKPJGcs/s1600/Checklist.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="150" src="http://3.bp.blogspot.com/-dlCgbGBWsh8/TlTGGc_HfQI/AAAAAAAAASU/SkicLKPJGcs/s200/Checklist.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Part 2 of 5: Renegades and Pirates&lt;/b&gt;&lt;br /&gt;
In this continuing series, we're looking at root causes of data quality problems and the business processes you can put in place to solve them.&amp;nbsp; In part two, we examine IT renegades and corporate pirates as two of the root causes for data quality problems.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Number Three: Renegade IT and Spreadmarts&lt;/b&gt;&lt;br /&gt;
A renegade is a person who deserts and betrays an organizational set of principles. That’s exactly what some impatient business owners unknowingly do by moving data in and out of business solutions, databases and the like. Rather than wait for some professional help from IT, eager business units may decide to create their own set of local applications without the knowledge of IT. While the application may meet the immediate departmental need, it is unlikely to adhere to standards of data, data model or interfaces. The database might start by making a copy of a sanctioned database to a local application on team desktops. So-called “spreadmarts,” which are important pieces of data stored in Excel spreadsheets, are easily replicated to team desktops. In this scenario, you lose control of versions as well as standards. There are no backups, versioning or business rules.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt; &lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Corporate Culture – There should be a consequence for renegade data, making it more difficult for the renegades to create local data applications.&lt;/li&gt;
&lt;li&gt;Communication – Educate and train your employees on the negative impact of renegade data.&lt;/li&gt;
&lt;li&gt;Sandbox – Having tools that can help business users and IT professionals experiment with the data in a safe environment is crucial. A sandbox, where users are experimenting on data subsets and copies of production data, has proven successful for many for limiting renegade IT.&lt;/li&gt;
&lt;li&gt;Locking Down the Data – A culture where creating unsanctioned spreadmarts is shunned is the goal.&amp;nbsp; Some organizations have found success in locking down the data to make it more difficult to export.&lt;/li&gt;
&lt;/ul&gt;&lt;b&gt;&lt;br /&gt;
Root Cause Number Four: Corporate Mergers&lt;/b&gt;&lt;br /&gt;
Corporate mergers increase the likelihood for data quality errors because they usually happen fast and are unforeseen by IT departments. Almost immediately, there is pressure to consolidate and take shortcuts on proper planning. The consolidation will likely include the need to share data among a varied set of disjointed applications. Many shortcuts are taken to “make it happen,” often involving known or unknown risks to the data quality. &lt;br /&gt;
On top of the quick schedule, merging IT departments may encounter culture clash and a different definition of truth.&amp;nbsp; Additionally, mergers can result in a loss of expertise when key people leave midway through the project to seek new ventures.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt;&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Corporate Awareness – Whenever possible civil division of labor should be mandated by management to avoid culture clashes and data grabs by the power hungry.&lt;/li&gt;
&lt;li&gt;Document – Your IT initiative should survive even if the entire team leaves, disbands or gets hit by a bus when crossing the street.&amp;nbsp; You can do this with proper documentation of the infrastructure.&lt;/li&gt;
&lt;li&gt;Third-party Consultants – Management should be aware that there is extra work to do and that conflicts can arise after a merger. Consultants can provide the continuity needed to get through the transition.&lt;/li&gt;
&lt;li&gt;Agile Data Management – Choose solutions and strategies that will keep your organization agile, giving you the ability to divide and conquer the workload without expensive licensing of commercial applications.&lt;/li&gt;
&lt;/ul&gt;This post is an excerpt from a white paper available &lt;a href="http://info.talend.com/DQ_10_Root_Causes.html?src=datagovblog"&gt;here&lt;/a&gt;. More to come on this subject in the days ahead.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-3130186554232391828?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=tL8G3cpgCE8:yrHmimhE2gM:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=tL8G3cpgCE8:yrHmimhE2gM:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=tL8G3cpgCE8:yrHmimhE2gM:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=tL8G3cpgCE8:yrHmimhE2gM:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=tL8G3cpgCE8:yrHmimhE2gM:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/3130186554232391828/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=3130186554232391828" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/3130186554232391828?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/3130186554232391828?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/tL8G3cpgCE8/top-ten-root-causes-of-data-quality_25.html" title="Top Ten Root Causes of Data Quality Problems: Part Two" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-dlCgbGBWsh8/TlTGGc_HfQI/AAAAAAAAASU/SkicLKPJGcs/s72-c/Checklist.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality_25.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEMMRHc6eSp7ImA9WhdXEUo.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-2624880194598042286</id><published>2011-08-24T05:01:00.000-04:00</published><updated>2011-08-24T05:01:25.911-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-08-24T05:01:25.911-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="business strategy" /><title>Top Ten Root Causes of Data Quality Problems: Part One</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-nxbK9iJhQgs/TlS9X_bJ0BI/AAAAAAAAASQ/PiL-Gg_rV6Q/s1600/Checklist.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="150" src="http://2.bp.blogspot.com/-nxbK9iJhQgs/TlS9X_bJ0BI/AAAAAAAAASQ/PiL-Gg_rV6Q/s200/Checklist.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Part 1 of 5: The Basics&lt;/b&gt;&lt;br /&gt;
We all know data quality problems when we see them.&amp;nbsp; They can undermine your organization’s ability to work efficiently, comply with government regulations and make revenue. The specific technical problems include missing data, misfielded attributes, duplicate records and broken data models to name just a few.&lt;br /&gt;
But rather than merely patching up bad data, most experts agree that the best strategy for fighting data quality issues is to understand the root causes and put new processes in place to prevent them.&amp;nbsp; This five part blog series discusses the top ten root causes of data quality problems and suggests steps the business can implement to prevent them. &lt;br /&gt;
In this first blog post, we'll confront some of the more obvious root causes of data quality problems.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Number One: Typographical Errors and Non-Conforming Data&lt;/b&gt;&lt;br /&gt;
Despite a lot of automation in our data architecture these days, data is still typed into Web forms and other user interfaces by people. A common source of data inaccuracy is that the person manually entering the data just makes a mistake. People mistype. They choose the wrong entry from a list. They enter the right data value into the wrong box.&lt;br /&gt;
&lt;br /&gt;
Given complete freedom on a data field, those who enter data have to go from memory.&amp;nbsp; Is the vendor named Grainger, WW Granger, or W. W. Grainger? Ideally, there should be a corporate-wide set of reference data so that forms help users find the right vendor, customer name, city, part number, and so on.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan &lt;/b&gt;&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Training – Make sure that those people who enter data know the impact they have on downstream applications.&lt;/li&gt;
&lt;li&gt;Metadata Definitions – By locking down exactly what people can enter into a field using a definitive list, many problems can be alleviated. This metadata (for vendor names, part numbers, and so on can) become part of data quality in data integration, business applications and other solutions.&lt;/li&gt;
&lt;li&gt;Monitoring – Make public the results of poorly entered data and praise those who enter data correctly. You can keep track of this with data monitoring software such as the Talend Data Quality Portal.&lt;/li&gt;
&lt;li&gt;Real-time Validation – In addition to forms, validation data quality tools can be implemented to validate addresses, e-mail addresses and other important information as it is entered. Ensure that your data quality solution provides the ability to deploy data quality in application server environments, in the cloud or in an enterprise service bus (ESB).&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;b&gt;Root Cause Number Two: Information Obfuscation&lt;/b&gt;&lt;br /&gt;
Data entry errors might not be completely by mistake. How often do people give incomplete or incorrect information to safeguard their privacy?&amp;nbsp; If there is nothing at stake for those who enter data, there will be a tendency to fudge.&lt;br /&gt;
&lt;br /&gt;
Even if the people entering data want to do the right thing, sometimes they cannot. If a field is not available, an alternate field is often used. This can lead to such data quality issues as having Tax ID numbers in the name field or contact information in the comments field.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Root Cause Attack Plan&lt;/b&gt;&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Reward – Offer an incentive for those who enter personal data correctly. This should be focused on those who enter data from the outside, like those using Web forms. Employees should not need a reward to do their job. The type of reward will depend upon how important it is to have the correct information.&lt;/li&gt;
&lt;li&gt;Accessibility – As a technologist in charge of data stewardship, be open and accessible about criticism from users. Give them a voice when processes change requiring technology change.&amp;nbsp; If you’re not accessible, users will look for quiet ways around your forms validation. &lt;/li&gt;
&lt;li&gt;Real-time Validation – In addition to forms, validation data quality tools can be implemented to validate addresses, e-mail addresses and other important information as it is entered.&lt;/li&gt;
&lt;/ul&gt;This post is an excerpt from a white paper available &lt;a href="http://info.talend.com/DQ_10_Root_Causes.html?src=datagovblog"&gt;here&lt;/a&gt;. More to come on this subject in the days ahead.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-2624880194598042286?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=syDYYq6GGik:FNNLsrcF58k:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=syDYYq6GGik:FNNLsrcF58k:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=syDYYq6GGik:FNNLsrcF58k:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=syDYYq6GGik:FNNLsrcF58k:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=syDYYq6GGik:FNNLsrcF58k:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/2624880194598042286/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=2624880194598042286" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/2624880194598042286?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/2624880194598042286?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/syDYYq6GGik/top-ten-root-causes-of-data-quality.html" title="Top Ten Root Causes of Data Quality Problems: Part One" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-nxbK9iJhQgs/TlS9X_bJ0BI/AAAAAAAAASQ/PiL-Gg_rV6Q/s72-c/Checklist.jpg" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/08/top-ten-root-causes-of-data-quality.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CE8AQno8eSp7ImA9WhZUGUQ.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-992687192069897552</id><published>2011-06-13T14:50:00.002-04:00</published><updated>2011-06-13T15:20:43.471-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-06-13T15:20:43.471-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="data migration" /><category scheme="http://www.blogger.com/atom/ns#" term="big data" /><category scheme="http://www.blogger.com/atom/ns#" term="data profiling" /><category scheme="http://www.blogger.com/atom/ns#" term="data integration" /><title>The Differences Between Small and Big Data</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-s0c9s30YHNI/TfZbu6b1uPI/AAAAAAAAARg/NYTPabTNmNw/s1600/Fish.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="136" src="http://2.bp.blogspot.com/-s0c9s30YHNI/TfZbu6b1uPI/AAAAAAAAARg/NYTPabTNmNw/s200/Fish.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;There is a lot of buzz today about big data and companies stepping up to meet the challenge of ever increasing data volumes. In the center of it all, are Hadoop and the Cloud.&amp;nbsp; Hadoop can intelligently manage the distribution of processing and your files. It manages the infrastructure needed to break down big data into more manageable chunks for processing by multiple servers. Likewise, a cloud strategy can take data management outside the walls of a corporation into a high scalable infrastructure.&lt;br /&gt;
&lt;br /&gt;
Do you have big data?&amp;nbsp; It’s difficult to know precisely whether you do because big data is vaguely defined. You may qualify for big data technology if you face hundreds of gigabytes of data, or it may hundreds or thousands of terabytes. The classification of “big data” is not strictly defined by data size, but other business processes, too. Your data management infrastructure needs to take into account factors like future data volumes, peaks and lulls in requirements, business requirements and much more.&lt;br /&gt;
&lt;b&gt;&lt;br /&gt;
Small and Medium-Sized Data&lt;/b&gt;&lt;br /&gt;
What about “small” and medium-sized data? For example, data from spreadsheet, the occasional flat file, leads from a trade show, and catalog data from vendors may be vital to your business processes. With a new industry focus on transparency, business user involvement and sharing of data, small data is a constant issue.&amp;nbsp; Spreadsheets and flat files are the preferred method to share data today because most companies have some process for handling them. When you get these small to medium sized data sets,&amp;nbsp; it is still necessary to:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;profile them&lt;/li&gt;
&lt;li&gt;integrate them into your relational database&lt;/li&gt;
&lt;li&gt;aggregate data from these sources, or extract only the vital parts&lt;/li&gt;
&lt;li&gt;apply data quality standards when necessary&lt;/li&gt;
&lt;li&gt;use them as part of a master data management (MDM) initiative&lt;/li&gt;
&lt;/ul&gt;&lt;br /&gt;
&lt;b&gt;The Difference Goals of Big Data and Little Data &lt;/b&gt;&lt;br /&gt;
With big data, the concern is usually about your data management technology’s ability to handle massive quantities in order to provide you aggregates that are meaningful.&amp;nbsp; You need solutions that will scale to meet your data management needs.&amp;nbsp; However, handling small and medium data sets is more about short and long term costs.&amp;nbsp; How can you quickly and easily integrate data without a lot of red tape, big license fees, pain and suffering.&lt;br /&gt;
&lt;br /&gt;
Think about it. When you need to handle small and medium data, you have options:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Hand-coding: Using hand-coding is sometimes faster than any solution and it still may be OK for ad-hoc, one off data integration.&amp;nbsp; Once you find yourself hand-coding again and again, you’ll find yourself rethinking that strategy. Eventually managing all that code will waste time and cost you a bundle. If your data volumes grow, hand-coded quickly becomes obsolete due to lack of scaling. Hand-coding gets high marks on speed to value, but falters in sustainability and long-term costs.&lt;/li&gt;
&lt;li&gt;Open Source: Open source data management tools provide a quick way to get started, low overall costs and high sustainability.&amp;nbsp; By just downloading and learning the tools, you’re on your way to getting data management done.&amp;nbsp; The open source solutions may have some limitations on scalability, but most open source providers have low-cost commercial upgrades that meet these needs.&amp;nbsp; In other words, it's easy to start today and leverage Hadoop and the Cloud if you need it later. Open source gets high marks on speed to value, sustainability and costs.&lt;/li&gt;
&lt;li&gt;Traditional Data Management Vendors: Small data is a tough issue for the mega-vendors. Even for 50K-100K records, the license cost in both the short term and long term could be prohibitive.&amp;nbsp; The mega-vendor solutions do tend to scale well, making them sustainable at a cost. However mergers in the data management business do happen. The sustainability of a product can be affected by these mergers.&amp;nbsp; Commercial vendors get respectable marks in speed to value and sustainability, but falter in high up-front costs and maintenance fees.&lt;/li&gt;
&lt;/ul&gt;I've heard it a million times in this business - start small and fast with technology that gives you a fast success but also scales to future tasks. &lt;br /&gt;
&lt;ul&gt;&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-992687192069897552?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=fvKghjzfUCQ:psNoDDq6gM0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=fvKghjzfUCQ:psNoDDq6gM0:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=fvKghjzfUCQ:psNoDDq6gM0:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=fvKghjzfUCQ:psNoDDq6gM0:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=fvKghjzfUCQ:psNoDDq6gM0:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/992687192069897552/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=992687192069897552" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/992687192069897552?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/992687192069897552?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/fvKghjzfUCQ/lets-get-small.html" title="The Differences Between Small and Big Data" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-s0c9s30YHNI/TfZbu6b1uPI/AAAAAAAAARg/NYTPabTNmNw/s72-c/Fish.jpg" height="72" width="72" /><thr:total>3</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/06/lets-get-small.html</feedburner:origLink></entry><entry gd:etag="W/&quot;D0YMQXg7fip7ImA9WhZWFUU.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-8722310663073228410</id><published>2011-05-16T18:33:00.000-04:00</published><updated>2011-05-16T18:33:00.606-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-16T18:33:00.606-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="Talend" /><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="data migration" /><category scheme="http://www.blogger.com/atom/ns#" term="supply chain" /><category scheme="http://www.blogger.com/atom/ns#" term="erp" /><category scheme="http://www.blogger.com/atom/ns#" term="butterfly effect" /><title>The Butterfly Effect and Data Quality</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-HRT7QjXe65M/TdGlRsiK_VI/AAAAAAAAARY/TbCTOlxLwrw/s1600/Butterfly.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="178" src="http://2.bp.blogspot.com/-HRT7QjXe65M/TdGlRsiK_VI/AAAAAAAAARY/TbCTOlxLwrw/s200/Butterfly.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;I just wrote a paper called the &lt;a href="http://www.talend.com/document-download.php?doc=butterfly&amp;amp;src=DataGovernanceBlog"&gt;‘Butterfly Effect’ of poor data quality&lt;/a&gt; for Talend.&lt;br /&gt;
&lt;br /&gt;
The term butterfly effect refers to the way a minor event – like the movement of a butterfly’s wing – can have a major impact on a complex system – like the weather. The movement of the butterfly wing represents a small change in the initial condition of the system, but it starts a chain of events: moving pollen through the air, which causes a gazelle to sneeze, which triggers a stampede of gazelles, which raises a cloud of dust, which partially blocks the sun, which alters the atmospheric temperature, which ultimately alters the path of a tornado on the other side of the world.&lt;br /&gt;
&lt;br /&gt;
Enterprise data is equally susceptible to the butterfly effect.&amp;nbsp; When poor quality data enters the complex system of enterprise data, even a small error – the transposed letters in a street address or part number – can lead to 1) revenue loss; 2) process inefficiency and; 3) failure to comply with industry and government regulations. Organizations depend on the movement and sharing of data throughout the organization, so the impact of data quality errors are costly and far reaching. Data issues often begin with a tiny mistake in one part of the organization, but the butterfly effect can produce far reaching results. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;The Pervasiveness of Data&lt;/b&gt;&lt;br /&gt;
When data enters the corporate ecosystem, it rarely stays in one place.&amp;nbsp; Data is pervasive. As it moves throughout a corporation, data impacts systems and business processes. The negative impact of poor data quality reverberates as it crosses departments, business units and cross-functional systems.&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;b&gt;Customer Relationship Management (CRM)&lt;/b&gt; - By standardizing customer data, you will be able to offer better, more personalized customer service.&amp;nbsp; And you will be better able to contact your customers and prospects for cross-sell, up-sell, notification and services.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;ERP / Supply Chain Data&lt;/b&gt;- If you have clean data in your supply chain, you can achieve some tangible benefits.&amp;nbsp; First, the company will have a clear picture about delivery times on orders because of a completely transparent supply chain. Next, you will avoid unnecessary warehouse costs by holding the right amount of inventory in stock.&amp;nbsp; Finally, you will be able to see all the buying patterns and use that information when negotiating supply contracts.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Orders / Billing System &lt;/b&gt;- If you have clean data in your billing systems, you can achieve the tangible benefits of more accurate financial reporting and correct invoices that reach the customer in a timely manner.&amp;nbsp; An accurate bill not only leads to trust among workers in the billing department, but customer attrition rates will be lower if invoices are delivered accurately and on time.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Data Warehouse&lt;/b&gt; - If you have standardized the data feeding into your data warehouse, you can dramatically improve business intelligence. Employees can access the data warehouse and be assured that the data they use for reports, analysis and decision making is accurate. Using the clean data in a warehouse can help you find trends, see relationships between data, and understand the competition in a new light.&lt;/li&gt;
&lt;/ul&gt;To read more about the butterfly effect of data quality, &lt;a href="http://www.talend.com/document-download.php?doc=butterfly&amp;amp;src=DataGovernanceBlog"&gt;download it from the Talend site&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-8722310663073228410?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=_EgnoqAF5k4:iHoD1z4Khb8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=_EgnoqAF5k4:iHoD1z4Khb8:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=_EgnoqAF5k4:iHoD1z4Khb8:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=_EgnoqAF5k4:iHoD1z4Khb8:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=_EgnoqAF5k4:iHoD1z4Khb8:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/8722310663073228410/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=8722310663073228410" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8722310663073228410?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8722310663073228410?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/_EgnoqAF5k4/butterfly-effect-and-data-quality.html" title="The Butterfly Effect and Data Quality" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/-HRT7QjXe65M/TdGlRsiK_VI/AAAAAAAAARY/TbCTOlxLwrw/s72-c/Butterfly.jpg" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/05/butterfly-effect-and-data-quality.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEUMQX08cCp7ImA9WhZXGUs.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-6095926809193852174</id><published>2011-05-09T14:38:00.000-04:00</published><updated>2011-05-09T14:38:00.378-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-05-09T14:38:00.378-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="MIT IQ Conference" /><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="conference" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>MIT Information Quality Symposium</title><content type="html">This year I’m planning to attend the &lt;a href="http://mitiq.mit.edu/iqis/2011/"&gt;MIT IQ symposium&lt;/a&gt; again.&amp;nbsp; I’m also one of the vice chairs of the event. The symposium is a July event in Boston that is a discussion and exchange of ideas about data quality between practitioners and academicians. &lt;br /&gt;
&lt;br /&gt;
I return to this conference and participate in the planning every year because I think it’s one of the most important data quality events.&amp;nbsp; The people here really do change the course of information management.&amp;nbsp; On these hot summer days in Boston, government, healthcare and general business professionals collaborate on the latest updates about data quality.&amp;nbsp; This event has the potential to dramatically change the world – the people, organizations, and governments who manage data. I’ve grown to really enjoy the combination of ground-breaking presentations, high ranking government officials, sharp consultants and MIT hallway chat that you find here.&lt;br /&gt;
&lt;br /&gt;
If you have some travel budget, please consider joining me for this event.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-6095926809193852174?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=5CyA54R9P28:6a1DyBSWjzU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=5CyA54R9P28:6a1DyBSWjzU:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=5CyA54R9P28:6a1DyBSWjzU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=5CyA54R9P28:6a1DyBSWjzU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=5CyA54R9P28:6a1DyBSWjzU:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/6095926809193852174/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=6095926809193852174" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/6095926809193852174?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/6095926809193852174?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/5CyA54R9P28/mit-information-quality-symposium.html" title="MIT Information Quality Symposium" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/05/mit-information-quality-symposium.html</feedburner:origLink></entry><entry gd:etag="W/&quot;AkMGQXY5cSp7ImA9WhZXEEU.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-8445772812935164751</id><published>2011-04-29T10:47:00.006-04:00</published><updated>2011-04-29T10:47:00.829-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-29T10:47:00.829-04:00</app:edited><title>Open Source and Data Quality</title><content type="html">My latest video on the &lt;a href="http://www.youtube.com/user/TalendChannel#p/u/3/jP7T2ga_rf8"&gt;Talend Channel&lt;/a&gt; about data quality and open source.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;object width="320" height="266" class="BLOGGER-youtube-video" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0" data-thumbnail-src="http://3.gvt0.com/vi/jP7T2ga_rf8/0.jpg"&gt;&lt;param name="movie" value="http://www.youtube.com/v/jP7T2ga_rf8&amp;fs=1&amp;source=uds" /&gt;&lt;param name="bgcolor" value="#FFFFFF" /&gt;&lt;embed width="320" height="266" src="http://www.youtube.com/v/jP7T2ga_rf8&amp;fs=1&amp;source=uds" type="application/x-shockwave-flash"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;/div&gt;&lt;br /&gt;
This was filmed in the Paris office in January. I can get excited in any time zone when it comes to data quality.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-8445772812935164751?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=lA6T6s4i1xc:v1TawUdNxGc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=lA6T6s4i1xc:v1TawUdNxGc:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=lA6T6s4i1xc:v1TawUdNxGc:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=lA6T6s4i1xc:v1TawUdNxGc:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=lA6T6s4i1xc:v1TawUdNxGc:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/8445772812935164751/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=8445772812935164751" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8445772812935164751?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8445772812935164751?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/lA6T6s4i1xc/open-source-and-data-quality.html" title="Open Source and Data Quality" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/04/open-source-and-data-quality.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUQFQHozfyp7ImA9WhZQF0s.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-7221920059855540524</id><published>2011-04-25T16:28:00.000-04:00</published><updated>2011-04-25T16:28:31.487-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-25T16:28:31.487-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="tools" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance team" /><category scheme="http://www.blogger.com/atom/ns#" term="data profiling" /><title>Data Quality Scorecard: Making Data Quality Relevant</title><content type="html">&lt;!--[if gte mso 9]&gt;&lt;xml&gt;  &lt;o:OfficeDocumentSettings&gt;   &lt;o:AllowPNG/&gt;  &lt;/o:OfficeDocumentSettings&gt; &lt;/xml&gt;&lt;![endif]--&gt;&lt;!--[if gte mso 9]&gt;&lt;xml&gt;  &lt;w:WordDocument&gt;   &lt;w:View&gt;Normal&lt;/w:View&gt;   &lt;w:Zoom&gt;0&lt;/w:Zoom&gt;   &lt;w:TrackMoves/&gt;   &lt;w:TrackFormatting/&gt;   &lt;w:PunctuationKerning/&gt;   &lt;w:ValidateAgainstSchemas/&gt;   &lt;w:SaveIfXMLInvalid&gt;false&lt;/w:SaveIfXMLInvalid&gt;   &lt;w:IgnoreMixedContent&gt;false&lt;/w:IgnoreMixedContent&gt;   &lt;w:AlwaysShowPlaceholderText&gt;false&lt;/w:AlwaysShowPlaceholderText&gt;   &lt;w:DoNotPromoteQF/&gt;   &lt;w:LidThemeOther&gt;EN-US&lt;/w:LidThemeOther&gt;   &lt;w:LidThemeAsian&gt;X-NONE&lt;/w:LidThemeAsian&gt;   &lt;w:LidThemeComplexScript&gt;X-NONE&lt;/w:LidThemeComplexScript&gt;   &lt;w:Compatibility&gt;    &lt;w:BreakWrappedTables/&gt;    &lt;w:SnapToGridInCell/&gt;    &lt;w:WrapTextWithPunct/&gt;    &lt;w:UseAsianBreakRules/&gt;    &lt;w:DontGrowAutofit/&gt;    &lt;w:SplitPgBreakAndParaMark/&gt;    &lt;w:EnableOpenTypeKerning/&gt;    &lt;w:DontFlipMirrorIndents/&gt;    &lt;w:OverrideTableStyleHps/&gt;   &lt;/w:Compatibility&gt;   &lt;m:mathPr&gt;    &lt;m:mathFont m:val="Cambria Math"/&gt;    &lt;m:brkBin m:val="before"/&gt;    &lt;m:brkBinSub m:val="&amp;#45;-"/&gt;    &lt;m:smallFrac m:val="off"/&gt;    &lt;m:dispDef/&gt;    &lt;m:lMargin m:val="0"/&gt;    &lt;m:rMargin m:val="0"/&gt;    &lt;m:defJc m:val="centerGroup"/&gt;    &lt;m:wrapIndent m:val="1440"/&gt;    &lt;m:intLim m:val="subSup"/&gt;    &lt;m:naryLim m:val="undOvr"/&gt;   &lt;/m:mathPr&gt;&lt;/w:WordDocument&gt; &lt;/xml&gt;&lt;![endif]--&gt;&lt;!--[if gte mso 9]&gt;&lt;xml&gt;  &lt;w:LatentStyles DefLockedState="false" DefUnhideWhenUsed="true"
  DefSemiHidden="true" DefQFormat="false" DefPriority="99"
  LatentStyleCount="267"&gt;   &lt;w:LsdException Locked="false" Priority="0" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Normal"/&gt;   &lt;w:LsdException Locked="false" Priority="9" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="heading 1"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 2"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 3"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 4"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 5"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 6"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 7"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 8"/&gt;   &lt;w:LsdException Locked="false" Priority="9" QFormat="true" Name="heading 9"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 1"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 2"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 3"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 4"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 5"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 6"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 7"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 8"/&gt;   &lt;w:LsdException Locked="false" Priority="39" Name="toc 9"/&gt;   &lt;w:LsdException Locked="false" Priority="35" QFormat="true" Name="caption"/&gt;   &lt;w:LsdException Locked="false" Priority="10" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Title"/&gt;   &lt;w:LsdException Locked="false" Priority="1" Name="Default Paragraph Font"/&gt;   &lt;w:LsdException Locked="false" Priority="11" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Subtitle"/&gt;   &lt;w:LsdException Locked="false" Priority="22" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Strong"/&gt;   &lt;w:LsdException Locked="false" Priority="20" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Emphasis"/&gt;   &lt;w:LsdException Locked="false" Priority="59" SemiHidden="false"
   UnhideWhenUsed="false" Name="Table Grid"/&gt;   &lt;w:LsdException Locked="false" UnhideWhenUsed="false" Name="Placeholder Text"/&gt;   &lt;w:LsdException Locked="false" Priority="1" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="No Spacing"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1 Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2 Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1 Accent 1"/&gt;   &lt;w:LsdException Locked="false" UnhideWhenUsed="false" Name="Revision"/&gt;   &lt;w:LsdException Locked="false" Priority="34" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="List Paragraph"/&gt;   &lt;w:LsdException Locked="false" Priority="29" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Quote"/&gt;   &lt;w:LsdException Locked="false" Priority="30" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Intense Quote"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2 Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1 Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2 Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3 Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid Accent 1"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3 Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid Accent 2"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3 Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid Accent 3"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3 Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid Accent 4"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3 Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid Accent 5"/&gt;   &lt;w:LsdException Locked="false" Priority="60" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Shading Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="61" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light List Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="62" SemiHidden="false"
   UnhideWhenUsed="false" Name="Light Grid Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="63" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 1 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="64" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Shading 2 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="65" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 1 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="66" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium List 2 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="67" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 1 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="68" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 2 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="69" SemiHidden="false"
   UnhideWhenUsed="false" Name="Medium Grid 3 Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="70" SemiHidden="false"
   UnhideWhenUsed="false" Name="Dark List Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="71" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Shading Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="72" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful List Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="73" SemiHidden="false"
   UnhideWhenUsed="false" Name="Colorful Grid Accent 6"/&gt;   &lt;w:LsdException Locked="false" Priority="19" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Subtle Emphasis"/&gt;   &lt;w:LsdException Locked="false" Priority="21" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Intense Emphasis"/&gt;   &lt;w:LsdException Locked="false" Priority="31" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Subtle Reference"/&gt;   &lt;w:LsdException Locked="false" Priority="32" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Intense Reference"/&gt;   &lt;w:LsdException Locked="false" Priority="33" SemiHidden="false"
   UnhideWhenUsed="false" QFormat="true" Name="Book Title"/&gt;   &lt;w:LsdException Locked="false" Priority="37" Name="Bibliography"/&gt;   &lt;w:LsdException Locked="false" Priority="39" QFormat="true" Name="TOC Heading"/&gt;  &lt;/w:LatentStyles&gt; &lt;/xml&gt;&lt;![endif]--&gt;&lt;!--[if gte mso 10]&gt; &lt;style&gt;
 /* Style Definitions */
 table.MsoNormalTable
 {mso-style-name:"Table Normal";
 mso-tstyle-rowband-size:0;
 mso-tstyle-colband-size:0;
 mso-style-noshow:yes;
 mso-style-priority:99;
 mso-style-parent:"";
 mso-padding-alt:0in 5.4pt 0in 5.4pt;
 mso-para-margin-top:0in;
 mso-para-margin-right:0in;
 mso-para-margin-bottom:10.0pt;
 mso-para-margin-left:0in;
 line-height:115%;
 mso-pagination:widow-orphan;
 font-size:11.0pt;
 font-family:"Calibri","sans-serif";
 mso-ascii-font-family:Calibri;
 mso-ascii-theme-font:minor-latin;
 mso-hansi-font-family:Calibri;
 mso-hansi-theme-font:minor-latin;
 mso-bidi-font-family:"Times New Roman";
 mso-bidi-theme-font:minor-bidi;}
table.MsoTableGrid
 {mso-style-name:"Table Grid";
 mso-tstyle-rowband-size:0;
 mso-tstyle-colband-size:0;
 mso-style-priority:59;
 mso-style-unhide:no;
 border:solid windowtext 1.0pt;
 mso-border-alt:solid windowtext .5pt;
 mso-padding-alt:0in 5.4pt 0in 5.4pt;
 mso-border-insideh:.5pt solid windowtext;
 mso-border-insidev:.5pt solid windowtext;
 mso-para-margin:0in;
 mso-para-margin-bottom:.0001pt;
 mso-pagination:widow-orphan;
 font-size:11.0pt;
 font-family:"Calibri","sans-serif";
 mso-ascii-font-family:Calibri;
 mso-ascii-theme-font:minor-latin;
 mso-hansi-font-family:Calibri;
 mso-hansi-theme-font:minor-latin;
 mso-bidi-font-family:"Times New Roman";
 mso-bidi-theme-font:minor-bidi;}
&lt;/style&gt; &lt;![endif]--&gt;    &lt;div class="MsoNormal"&gt;Most data governance practitioners agree that a data quality scorecard is an important tool in any data governance program. It provides comprehensive information about quality of data in a database, and perhaps even more importantly, allows business users and technical users to collaborate on the quality issue.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="MsoNormal"&gt;However, there are multiple levels of metrics that you should consider. There are:&lt;/div&gt;&lt;table border="1" cellpadding="0" cellspacing="0" class="MsoTableGrid" style="border-collapse: collapse; border: medium none;"&gt;&lt;tbody&gt;
&lt;tr&gt;   &lt;td style="border: 1pt solid windowtext; padding: 0in 5.4pt; width: 0.45in;" valign="top" width="43"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: windowtext windowtext windowtext -moz-use-text-color; border-style: solid solid solid none; border-width: 1pt 1pt 1pt medium; padding: 0in 5.4pt; width: 2.75in;" valign="top" width="264"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;b&gt;METRIC CLASSIFICATION&lt;/b&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: windowtext windowtext windowtext -moz-use-text-color; border-style: solid solid solid none; border-width: 1pt 1pt 1pt medium; padding: 0in 5.4pt; width: 3.45in;" valign="top" width="331"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;b&gt;EXAMPLES&lt;/b&gt;&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext; border-style: none solid solid; border-width: medium 1pt 1pt; padding: 0in 5.4pt; width: 0.45in;" valign="top" width="43"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: 24pt;"&gt;1&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext -moz-use-text-color; border-style: none solid solid none; border-width: medium 1pt 1pt medium; padding: 0in 5.4pt; width: 2.75in;" valign="top" width="264"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;Metrics that the technologists use to fix data quality problems&lt;/div&gt;&lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext -moz-use-text-color; border-style: none solid solid none; border-width: medium 1pt 1pt medium; padding: 0in 5.4pt; width: 3.45in;" valign="top" width="331"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;7% of the e-mail attribute is blank. 12% of the e-mail attribute does   not follow the standard e-mail syntax. 13% of our US mail addresses fail   address validation.&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext; border-style: none solid solid; border-width: medium 1pt 1pt; padding: 0in 5.4pt; width: 0.45in;" valign="top" width="43"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: 24pt;"&gt;2&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext -moz-use-text-color; border-style: none solid solid none; border-width: medium 1pt 1pt medium; padding: 0in 5.4pt; width: 2.75in;" valign="top" width="264"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;Metrics business people use to make decisions about the data&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext -moz-use-text-color; border-style: none solid solid none; border-width: medium 1pt 1pt medium; padding: 0in 5.4pt; width: 3.45in;" valign="top" width="331"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;9% of my contacts have invalid e-mails. &lt;span&gt;&amp;nbsp;&lt;/span&gt;3% have both invalid e-mails and invalid   addresses.&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;tr&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext; border-style: none solid solid; border-width: medium 1pt 1pt; padding: 0in 5.4pt; width: 0.45in;" valign="top" width="43"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: 24pt;"&gt;3&lt;/span&gt;&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext -moz-use-text-color; border-style: none solid solid none; border-width: medium 1pt 1pt medium; padding: 0in 5.4pt; width: 2.75in;" valign="top" width="264"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;Metrics managers use to get a big picture&lt;/div&gt;&lt;/td&gt;   &lt;td style="border-color: -moz-use-text-color windowtext windowtext -moz-use-text-color; border-style: none solid solid none; border-width: medium 1pt 1pt medium; padding: 0in 5.4pt; width: 3.45in;" valign="top" width="331"&gt;   &lt;div class="MsoNormal" style="line-height: normal; margin-bottom: 0.0001pt;"&gt;This customer data is good enough to use for a campaign.&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="MsoNormal"&gt;All levels are important for the various members of the data governance team.&lt;span&gt;&amp;nbsp; &lt;/span&gt;Level one shows the steps you need to take to fix the data.&lt;span&gt;&amp;nbsp; &lt;/span&gt;Level two shows context to the task at hand. Level three tells the uniformed about the business issue without having to dig into the details.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;
&lt;/div&gt;&lt;div class="MsoNormal"&gt;So, when you’re building your DQ metrics, remember to roll-up the data into metrics into slightly higher formulations. You must design the scorecards to meet the needs of the interest of the different audiences, from technical through to business and up to executive. At the beginning of a data quality scorecard is information about data quality of individual data attributes. This is the default information that most profilers will deliver out of the box. As you aggregate scores, the high-level measures of the data quality become more meaningful. In the middle are various score sets allowing your company to analyze and summarize data quality from different perspectives. If you define the objective of a data quality assessment project as calculating these different aggregations, you will have much easier time maturing your data governance program. The business users and c-level will begin to pay attention.&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-7221920059855540524?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=kjS_7Rq6anM:T-AYY_LU7k0:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=kjS_7Rq6anM:T-AYY_LU7k0:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=kjS_7Rq6anM:T-AYY_LU7k0:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=kjS_7Rq6anM:T-AYY_LU7k0:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=kjS_7Rq6anM:T-AYY_LU7k0:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/7221920059855540524/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=7221920059855540524" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/7221920059855540524?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/7221920059855540524?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/kjS_7Rq6anM/data-quality-scorecard-making-data.html" title="Data Quality Scorecard: Making Data Quality Relevant" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/04/data-quality-scorecard-making-data.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0EHQH88eCp7ImA9WhZTEk4.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-8598409371672327532</id><published>2011-03-15T21:40:00.000-04:00</published><updated>2011-03-15T21:40:31.170-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-15T21:40:31.170-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="tools" /><category scheme="http://www.blogger.com/atom/ns#" term="data steward" /><title>Open Source Data Management or Do-it-Yourself</title><content type="html">With the tough economy people are still cutting back on corporate spending.&amp;nbsp; There is a sense of urgency to just get things done, and sometimes that can lead to hand-coding your own data integration, data quality or MDM functions. When you begin to develop your plan and strategies for data management, you have to think about all the hidden costs of getting solutions out-of-the-box versus building on your own. &lt;br /&gt;
&lt;br /&gt;
Reusability is one key consideration. Using data management technologies that only plug into one system just doesn’t make sense.&amp;nbsp; It’s difficult to get that re-usability with custom code, unless your programmers have high visibility into other projects. On the other hand, all tool vendors, even open source ones have pressure from their clients to support multiple databases and business solutions.&amp;nbsp; Open source solutions are built to work in a wider variety of architectures. You can move your data management processes between JD Edwards and SAP and SalesForce, for example, with relative ease.&lt;br /&gt;
&lt;br /&gt;
Indemnity is another consideration. What if something goes wrong with your home-grown solution after the chief architect leaves his job? Who are you going to call? If something goes wrong with your open source solution, you can turn to the community or call the vendor for support. &lt;br /&gt;
&lt;br /&gt;
Long-term costs are yet another issue.&amp;nbsp; Home-grown solutions have the tendency to start cheap and get more expensive as time goes on.&amp;nbsp; It’s difficult to manage custom code, especially if it is poorly documented. You hire consultants to manage code.&amp;nbsp; Eventually, you have to rip and replace and that can be costly.&lt;br /&gt;
&lt;br /&gt;
You should consider your human resources, too. Does it make sense to have a team work on hand-coding database extractions and transformation, or would the total cost/benefit be better if you used an open source data integration tool? It might just free up some of your programmers to pursue more important, ROI-centric ventures.&lt;br /&gt;
&lt;br /&gt;
If you’re thinking of cooking up your own technical solutions for data management, hoping to just get it done, think again. Your most economical solution might just be to leverage the community of experts and go with open source.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-8598409371672327532?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=nUCHO519Uv8:46XxQGeKbnU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=nUCHO519Uv8:46XxQGeKbnU:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=nUCHO519Uv8:46XxQGeKbnU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=nUCHO519Uv8:46XxQGeKbnU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=nUCHO519Uv8:46XxQGeKbnU:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/8598409371672327532/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=8598409371672327532" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8598409371672327532?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/8598409371672327532?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/nUCHO519Uv8/open-source-data-management-or-do-it.html" title="Open Source Data Management or Do-it-Yourself" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>1</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/03/open-source-data-management-or-do-it.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MNRH8_fSp7ImA9Wx9aF0Q.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-6814952674005863157</id><published>2011-03-10T16:11:00.000-05:00</published><updated>2011-03-10T16:11:35.145-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-03-10T16:11:35.145-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="business strategy" /><title>My Interview in the Talend Newsletter</title><content type="html">&lt;b&gt;Q. Some people would say that data quality technology is mature and that the topic is sort of stale. Are there major changes happening in the data quality world today?&lt;/b&gt;&lt;br /&gt;
A. Probably the biggest over-arching change we see today is that the distinction between those managing data from the &lt;i&gt;business standpoint&lt;/i&gt; and those managing the &lt;i&gt;technical aspects &lt;/i&gt;of data quality is getting more and more blurry. It used to be that data quality was... &lt;b&gt;&lt;a href="http://www.talend.com/newsletter/newsletter25_EN_edito.html?mkt_tok=3RkMMJWWfF9wsRonuK7LZKXonjHpfsX76egtW6Og38431UFwdcjKPmjr1YIBRcV0dvycMRAVFZl5nRhZFuWbeA%3D%3D"&gt;read more&lt;/a&gt;&lt;/b&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-6814952674005863157?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=M8wgJrvsC84:zK-5yS7fFFQ:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=M8wgJrvsC84:zK-5yS7fFFQ:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=M8wgJrvsC84:zK-5yS7fFFQ:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=M8wgJrvsC84:zK-5yS7fFFQ:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=M8wgJrvsC84:zK-5yS7fFFQ:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/6814952674005863157/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=6814952674005863157" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/6814952674005863157?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/6814952674005863157?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/M8wgJrvsC84/my-interview-in-talend-newsletter.html" title="My Interview in the Talend Newsletter" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2011/03/my-interview-in-talend-newsletter.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A0MNRXo5eip7ImA9Wx9REEw.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-5658254692705471295</id><published>2010-12-10T17:18:00.000-05:00</published><updated>2010-12-10T17:18:14.422-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-12-10T17:18:14.422-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="open source" /><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="tools" /><category scheme="http://www.blogger.com/atom/ns#" term="business strategy" /><category scheme="http://www.blogger.com/atom/ns#" term="master data management" /><title>Six Data Management Predictions for 2011</title><content type="html">This time of year everyone makes prognostications about the state of the data management field for 2011. I thought I’d take my turn by offering my predictions for the coming year.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Data will become more open&lt;/b&gt;&lt;br /&gt;
In the old days good quality reference data was an asset kept in the corporate lockbox. If you had a good reference table for common misspellings of parts, cities, or names for example, the mind set was to keep it close and away from falling into the wrong hands.&amp;nbsp; The data might have been sold for profit or simply not available.&amp;nbsp; Today, there really is no “wrong hands”.&amp;nbsp; Governments and corporations alike are seeing the societal benefits of sharing information. More reference data is there for the taking on the internet from sites like data.gov and geonames.org.&amp;nbsp; That trend will continue in 2011.&amp;nbsp; Perhaps we’ll even see some of the bigger players make announcements as to the availability of their data. Are you listening Google?&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Business and IT will become blurry&lt;/b&gt;&lt;br /&gt;
It’s becoming harder and harder to tell an IT guy from the head of marketing. That’s because in order to succeed, the IT folks need to become more like the marketer and vice versa.&amp;nbsp; In the coming year, the difference will be less noticeable and business people get more and more involved in using data to their benefit.&amp;nbsp; Newsflash One: If you’re in IT, you need marketing skills to pitch your projects and get funding.&amp;nbsp; Newsflash Two: If you’re in business, you need to know enough about data management practices to succeed.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Tools will become easier to use&lt;/b&gt;&lt;br /&gt;
As the business users  come into the picture, they will need access to the tools to manage  data.&amp;nbsp; Vendors must respond to this new marketplace or die.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Tools will do less heavy lifting&lt;/b&gt;&lt;br /&gt;
Despite the improvements in the tools, corporations will turn to improving processes and reporting in order to achieve better data management. Dwindling are the days where we’re dealing with data that is so poorly managed that it requires overly complicated data quality tools.&amp;nbsp; We’re getting better at the data management process and therefore, the burden on the tools becomes less. Future tools with focus on supporting the process improvement with work flow features, reporting and better graphical user interfaces.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;CEOs and Government Officials will gain enlightenment&lt;/b&gt;&lt;br /&gt;
Feeding off the success of a few pioneers in data governance as well as failures of IT projects in our past, CEOs and governments will gain enlightenment about managing their data and put teams in place to handle it.&amp;nbsp; It has taken decades of our sweet-talk and cajoling for government and CEOs to achieve enlightenment, but I believe it is practically here.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;We will become more reliant on data&lt;/b&gt;&lt;br /&gt;
Ten years ago, it was difficult to imagine us where we are today with respect to our data addiction. Today, data is a pervasive part of our internet-connected society, living in our PCs, our TVs, our mobile phones many other devices. It’s a huge part of our daily lives. As I’ve said in past posts, the world is addicted to data and that bodes well for anyone who helps the world manage it. In 2011, no matter if the economy turns up or down, our industry will continue to feed the addiction to good, clean data.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-5658254692705471295?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=pXTx0aF_QZE:yMw4oandS04:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=pXTx0aF_QZE:yMw4oandS04:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=pXTx0aF_QZE:yMw4oandS04:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=pXTx0aF_QZE:yMw4oandS04:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=pXTx0aF_QZE:yMw4oandS04:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/5658254692705471295/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=5658254692705471295" title="3 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5658254692705471295?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5658254692705471295?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/pXTx0aF_QZE/six-data-management-predictions-for.html" title="Six Data Management Predictions for 2011" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>3</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/12/six-data-management-predictions-for.html</feedburner:origLink></entry><entry gd:etag="W/&quot;C0MARHc9eSp7ImA9Wx9SEUs.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-1924037739719663419</id><published>2010-11-30T18:57:00.000-05:00</published><updated>2010-11-30T18:57:25.961-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-30T18:57:25.961-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data steward" /><category scheme="http://www.blogger.com/atom/ns#" term="matching algorithms" /><category scheme="http://www.blogger.com/atom/ns#" term="dedupe records" /><category scheme="http://www.blogger.com/atom/ns#" term="mdm" /><title>Match Mitigation: When Algorithms Aren’t Enough</title><content type="html">I’d like to get a little technical on this post. I try to keep my posts business-friendly, but sometimes there's importance in detail. If none of this post makes any sense to you, I wrote a sort of primer on how matching works in many data quality tools, which you can get &lt;a href="http://www.talend.com/document-download.php?doc=matchdq&amp;amp;src=DataGovernanceBlog"&gt;here&lt;/a&gt;. &lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Matching Algorithms&lt;/b&gt;&lt;br /&gt;
When you use a data quality tool, you’re often using matching algorithms and rules to make decisions on whether records match or not.&amp;nbsp; You might be using deterministic algorithms like Jaro, SoundEx and Metaphones. You might also be using probabilistic matching algorithms. &lt;br /&gt;
&lt;br /&gt;
In many tools, you can set the rules to be tight where the software uses tougher criteria to determine a match, or loose where the software is not so particular. Tight and loose matches are important because you may have strict rules for putting records together, like customers of a bank, or not so strict rules, like when you’re putting together a customer list for marketing purposes.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;What to do with Matches&lt;/b&gt;&lt;br /&gt;
Once data has been processed through the matcher, there are several possible outcomes. Between any two given records, the matcher may find:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;No relationship&lt;/li&gt;
&lt;li&gt;Match – the matcher found a definite match based on the criteria given&lt;/li&gt;
&lt;li&gt;Suspect – the matcher thinks it found a match but is not confident. The results should be manually reviewed.&lt;/li&gt;
&lt;/ul&gt;It’s that last category that the tough one.&amp;nbsp; Mitigating the suspect matches is the most time-consuming follow-up task after the matching is complete. Envision a million record database where you have 20,000 suspect matches.&amp;nbsp;&amp;nbsp; That’s still going to take you some time to review.&lt;br /&gt;
&lt;br /&gt;
Some of the newer (and cooler) tools offer strategies for dealing with suspect matches. The tools will present the suspect matches in a graphical user interface and allow users to pick which relationships are accurate and which are not. For example, Talend now offers a data stewardship console that lets you pick and choose records and attributes that will make up a best of breed record.&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_3TMxkaIa5WY/TPWO7ZoAaWI/AAAAAAAAAQ0/uioTmPb50RE/s1600/MatchMiti.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="149" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TPWO7ZoAaWI/AAAAAAAAAQ0/uioTmPb50RE/s320/MatchMiti.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;
The goal, of course, is to not have suspect matches, so tuning the matches and limiting the suspect matches is the ultimate. The newest tools will make this easy. Some of the legacy tools make this hard.&lt;br /&gt;
&lt;br /&gt;
Match mitigation is perhaps one of the most often overlooked processes of data quality. Don’t overlook it in your planning and processes.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-1924037739719663419?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9uqXUHdCGpE:57QWDGgbYOE:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9uqXUHdCGpE:57QWDGgbYOE:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9uqXUHdCGpE:57QWDGgbYOE:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=9uqXUHdCGpE:57QWDGgbYOE:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=9uqXUHdCGpE:57QWDGgbYOE:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/1924037739719663419/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=1924037739719663419" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/1924037739719663419?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/1924037739719663419?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/9uqXUHdCGpE/match-mitigation-when-algorithms-arent.html" title="Match Mitigation: When Algorithms Aren’t Enough" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_3TMxkaIa5WY/TPWO7ZoAaWI/AAAAAAAAAQ0/uioTmPb50RE/s72-c/MatchMiti.jpg" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/11/match-mitigation-when-algorithms-arent.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEUNRXY9eCp7ImA9Wx5aGU4.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-5006381747047412164</id><published>2010-11-16T13:31:00.001-05:00</published><updated>2010-11-16T13:31:34.860-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-11-16T13:31:34.860-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="master data management" /><category scheme="http://www.blogger.com/atom/ns#" term="politics" /><category scheme="http://www.blogger.com/atom/ns#" term="global information quality" /><category scheme="http://www.blogger.com/atom/ns#" term="data profiling" /><category scheme="http://www.blogger.com/atom/ns#" term="data integration" /><title>Ideas Having Sex: The Path to Innovation in Data Management</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_3TMxkaIa5WY/TOLNa2r7h_I/AAAAAAAAAQw/faxBBCswRsU/s1600/community.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="199" src="http://3.bp.blogspot.com/_3TMxkaIa5WY/TOLNa2r7h_I/AAAAAAAAAQw/faxBBCswRsU/s200/community.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;I read a recent analyst report on the data quality market and “enterprise-class” data quality solutions. Per usual, the open source solutions were mentioned at a passing while the data quality solutions of the past were given high marks. Some of the solutions picked in the top originated from days when mainframe was king. Some of the top contenders still contained cobbled-together applications from ill-conceived acquisitions. It got me thinking about the way we do business today and how so much of  it is changing.&lt;br /&gt;
&lt;br /&gt;
Back in the 1990’s or earlier, if you had an idea for a new product, you’d work with an internal team of engineers and build the individual parts.&amp;nbsp; This innovation took time, as you might not always have exactly the right people working on the job.&amp;nbsp; It was slow and tedious. The product was always confined by its own lineage.&lt;br /&gt;
&lt;br /&gt;
The Android phone market is a perfect examples of the modern way to innovate.&amp;nbsp; Today, when you want to build something groundbreaking like an Android, you pull in expertise from all around the world. Sure, Samsung might make the CPU and Video processing chips, but Primax Electronics in Taiwan might make the digital camera and Broadcomm in the US makes the touch screen, plus many others. Software vendors push the platform further with their cool apps. Innovation happens at break-neck speed because the Android is a collection of ideas that have sex and produce incredible offspring.&lt;br /&gt;
&lt;br /&gt;
Isn’t that really the model of a modern company?&amp;nbsp; You have ideas getting together and making new ideas. When you have free exchange between people, there is no need to re-invent something that has already been invented. See the TED for more on this concept, where British author Matt Ridley argues that, through history, the engine of human progress and prosperity is "&lt;a href="http://www.ted.com/talks/matt_ridley_when_ideas_have_sex.html"&gt;ideas having sex&lt;/a&gt;.”&lt;br /&gt;
&lt;br /&gt;
The business model behind open source has a similar mission.&amp;nbsp; Open source simply creates better software. Everyone collaborates, not just within one company, but among an Internet-connected, worldwide community. As a result, the open source model often builds higher quality, more secure, more easily integrated software. It does so at a vastly accelerated pace and often at a lower cost.&lt;br /&gt;
&lt;br /&gt;
So why do some industry analysts ignore it? There’s no denying that there are capitalist and financial reasons.&amp;nbsp; I think if an industry analyst were to actually come out and say that the open source solution is the best, it would be career suicide. The old-school would shun the analysts making him less relevant. The link between the way the industry pays and promotes analysts and vice versa seems to favor enterprise application vendors.&lt;br /&gt;
&lt;br /&gt;
Yet the open source community along with Talend has developed a very strong data management offering that should be considered in the top of its class. The solution leverages other cutting edge solutions. To name just a few examples:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;if you want to scale up, you can use distributed platform technology from Hadoop, which enables it to work with thousands of nodes and petabytes of data.&lt;/li&gt;
&lt;li&gt;very strong enterprise class data profiling.&amp;nbsp;&amp;nbsp;&lt;/li&gt;
&lt;li&gt;matching that users can actually use and tune without having to jump between multiple applications.&lt;/li&gt;
&lt;li&gt;a platform that grows with your data management strategy so that if your future is MDM, you can seamlessly move there without having to learn a new GUI.&lt;/li&gt;
&lt;/ul&gt;The way we do business today has changed. Innovation can only happen when ideas have sex, as Matt Ridley puts it. As long as we’re engaged in exchange and specialization, we will achieve those new levels of innovation.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-5006381747047412164?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=55JH23DJfQY:TQTR9094fPY:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=55JH23DJfQY:TQTR9094fPY:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=55JH23DJfQY:TQTR9094fPY:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=55JH23DJfQY:TQTR9094fPY:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=55JH23DJfQY:TQTR9094fPY:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/5006381747047412164/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=5006381747047412164" title="4 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5006381747047412164?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5006381747047412164?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/55JH23DJfQY/ideas-having-sex-path-to-innovation-in.html" title="Ideas Having Sex: The Path to Innovation in Data Management" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_3TMxkaIa5WY/TOLNa2r7h_I/AAAAAAAAAQw/faxBBCswRsU/s72-c/community.jpg" height="72" width="72" /><thr:total>4</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/11/ideas-having-sex-path-to-innovation-in.html</feedburner:origLink></entry><entry gd:etag="W/&quot;A08GQ3s4eip7ImA9WhZQGE8.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-5330541312509104434</id><published>2010-10-16T20:11:00.001-04:00</published><updated>2011-04-26T10:57:02.532-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2011-04-26T10:57:02.532-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="information quality" /><category scheme="http://www.blogger.com/atom/ns#" term="matching algorithms" /><category scheme="http://www.blogger.com/atom/ns#" term="address verification" /><title>Is 99.8 % data accuracy enough?</title><content type="html">Ripped from recent headlines, we see how even a .2% failure can have a big impact.&lt;br /&gt;
&lt;blockquote&gt;WASHINGTON (AP) ― More than 89,000 stimulus payments of $250 each went to people who were either dead or in prison, a government investigator says in a new report.&lt;/blockquote&gt;&lt;br /&gt;
Let’s take a good, hard look at &lt;a href="http://online.wsj.com/article/SB10001424052748704696304575538483156036168.html"&gt;this story&lt;/a&gt;. It begins with the US economy slumping.&amp;nbsp; The president proposes and passes through congress one of the biggest stimulus packages ever. The idea is sound to many; get America working by offering jobs in green energy, shovel-ready infrastructure projects. Among other actions, the plan is to give lower income people some government money so they can stimulate the economy.&lt;br /&gt;
&lt;br /&gt;
I’m not really here to praise or zing the wisdom of this. I’m just here to give the facts. In hindsight, it appears as though it hasn’t stimulated the economy as many had hoped, but that’s beside the point.&lt;br /&gt;
&lt;br /&gt;
Continuing on, the government issues 52 million people on social security a check for $250. It turns out of that number nearly 100,000 people were in prison or dead, roughly 0.2% of the checks. Some checks are returned, some are cashed. Ultimately, the government loses $22.3 million on the 0.2% error.&lt;br /&gt;
&lt;br /&gt;
While $22.3 million is a HUGE number, 0.2% is a tiny number.&amp;nbsp; It strikes at the heart at why data quality is so important.&amp;nbsp; Social Security spokesman Mark Lassiter said, "…Each year we make payments to a small number of deceased recipients usually because we have not yet received reports of their deaths." &lt;br /&gt;
&lt;br /&gt;
There is strong evidence that the SSA is hooked up to the right commercial data feeds and have the processes in place to use them. It seems as though the social security administration is quite proactive in their search for the dead and imprisoned, but people die and go to prison all the time. They also move, get married and become independent of their parents. &lt;br /&gt;
&lt;br /&gt;
If we try to imagine what it would take to achieve closer to 100% accuracy, it would take up-to-the-minute reference data. It seems that the only real solution is to put forth legislation that requires the reporting to the federal government any of these life changing events. Should we mandate the bereaved or perhaps funeral directors to report the death immediately in a central database? Even with such a law, there still would be a small percentage of checks that would be issued while the recipient was alive and delivered after the recipient is dead. We’d have better accuracy for this issue, but not 100%&lt;br /&gt;
&lt;br /&gt;
While this story takes a poke at the SSA for sending checks to dead people, I have to applaud their achievement of 99.8% accuracy. It could be a lot worse America.&amp;nbsp; A lot worse.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-5330541312509104434?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=rQ9cY61tFcQ:4wsSmUnQ0Ss:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=rQ9cY61tFcQ:4wsSmUnQ0Ss:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=rQ9cY61tFcQ:4wsSmUnQ0Ss:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=rQ9cY61tFcQ:4wsSmUnQ0Ss:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=rQ9cY61tFcQ:4wsSmUnQ0Ss:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/5330541312509104434/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=5330541312509104434" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5330541312509104434?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5330541312509104434?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/rQ9cY61tFcQ/is-998-data-accuracy-enough.html" title="Is 99.8 % data accuracy enough?" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/10/is-998-data-accuracy-enough.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEUDRXc4eip7ImA9Wx5QEE0.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-5543035549744591007</id><published>2010-08-28T09:26:00.001-04:00</published><updated>2010-08-28T09:31:14.932-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-28T09:31:14.932-04:00</app:edited><title>ERP and SCM Data Profiling Techniques</title><content type="html">In this &lt;a href="http://www.youtube.com/user/TalendChannel?feature=mhum#p/a/u/0/etSgMDI04P0"&gt;YouTube tutorial for Talend&lt;/a&gt;, I walk through some techniques for profiling ERP, SCM and materials master data using &lt;a href="http://www.talend.com/download_form.php?cont=qual&amp;amp;?src=DataGovernanceBlog"&gt;Talend Open Profiler&lt;/a&gt;. In addition to basic profiling, the correlation analysis feature can be used to identify relationships between part numbers and descriptions.&lt;br /&gt;
&lt;br /&gt;
&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_3TMxkaIa5WY/THkOES0L_wI/AAAAAAAAAQc/CpVFLcjM_yE/s1600/corrAna.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/THkOES0L_wI/AAAAAAAAAQc/CpVFLcjM_yE/s320/corrAna.jpg" /&gt;&amp;nbsp;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://www.youtube.com/user/TalendChannel?feature=mhum#p/a/u/0/etSgMDI04P0"&gt;Link&lt;/a&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-5543035549744591007?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=QWDph--LrUM:FAHYgeq2lm4:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=QWDph--LrUM:FAHYgeq2lm4:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=QWDph--LrUM:FAHYgeq2lm4:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=QWDph--LrUM:FAHYgeq2lm4:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=QWDph--LrUM:FAHYgeq2lm4:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/5543035549744591007/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=5543035549744591007" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5543035549744591007?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/5543035549744591007?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/QWDph--LrUM/erp-and-scm-data-profiling-techniques.html" title="ERP and SCM Data Profiling Techniques" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_3TMxkaIa5WY/THkOES0L_wI/AAAAAAAAAQc/CpVFLcjM_yE/s72-c/corrAna.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/08/erp-and-scm-data-profiling-techniques.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DkAFQ347eip7ImA9Wx5SGUo.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-3038972448998241098</id><published>2010-08-16T12:05:00.000-04:00</published><updated>2010-08-16T12:05:12.002-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-16T12:05:12.002-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="social media" /><title>Data Governance and Data Quality Insider 100th</title><content type="html">I have reached my 100th post milestone.&amp;nbsp; I hope you won't mind if I get a little introspective here and tell you a little about my social media journey over these past three years.&lt;br /&gt;
&lt;br /&gt;
How did I get started?&amp;nbsp; One day back in 2007, I disagreed with &lt;a href="http://it.toolbox.com/blogs/infosphere/"&gt;Vince McBurney’s&lt;/a&gt; post (topic unimportant now).&amp;nbsp; I responded and Vince politely told me to shut up and if I really wanted to have an opinion to write my own blog.&amp;nbsp; I did.&amp;nbsp; Thanks for the kick in the pants, Vince.&lt;br /&gt;
&lt;br /&gt;
Some of my most popular posts over these past three years have been:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;&lt;a href="http://data-governance.blogspot.com/2007/12/probabilistic-matching-sounds-like-good.html"&gt;Probabilistic Matching: Sounds like a good idea, but…&lt;/a&gt;&lt;br /&gt;
Here, I take a swipe at the sanctity of probabilistic matching. I probably have received the most hate-mail from this post. My stance still is that a hybrid approach to matching, using both probabilistic and deterministic is key to getting match results. Probabilistic alone is not the solution.&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="http://data-governance.blogspot.com/2009/03/data-governance-and-coke-machine.html"&gt;Data Governance and the Coke Machine Syndrome&lt;/a&gt;&lt;br /&gt;
I recount a parable given to me by a well-respected boss in my past about meeting management. Meetings can take unexpected turns where huge issues can be settled in minutes, while insignificant ones can eat up the resources of your company. I probably wrote it just after a meeting.&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="http://data-governance.blogspot.com/2009/07/data-quality-project-selection.html"&gt;Data Quality Project Selection&lt;/a&gt;&lt;br /&gt;
A posting about picking the right data quality projects to work on.&lt;br /&gt;
&lt;/li&gt;
&lt;li&gt;&lt;a href="http://data-governance.blogspot.com/2008/01/do-nothing-option.html"&gt;The “Do Nothing” Option&lt;/a&gt;&lt;br /&gt;
A posting the recounts a lesson I learned about selling the power of data quality to management.&lt;/li&gt;
&lt;/ul&gt;Somewhere around my 50th post, I was contacted by a small publishing  firm in the UK about publishing a book on data governance. They liked  what they saw in the blog.&amp;nbsp; I  published the &lt;a href="http://www.itgovernance.co.uk/products/2445"&gt;Data Governance Imperative&lt;/a&gt; in 2009. I pulled upon my experiences with some of the people I met while working in the industry. It's thanks to some of you that the book is a reality.&lt;br /&gt;
&lt;br /&gt;
Blogging has not always been easy. I’ve met some opposition to along the way. There were times when my blogging was perceived as somehow threatening to corporate. At the time, blogging was new and corporations didn't know how to handle it. More companies now have definitive blogging policies and realize the positive impact it has. &lt;br /&gt;
&lt;br /&gt;
What about the people I’ve met? I’ve gained a lot of friendships along the way with people I’ve yet to meet face-to-face. We’re able to build a community here in cyberspace – a data geek community that I am very fond of.&amp;nbsp; I’m hesitant to write a list because I don’t want to leave anyone out, but you know who you are.&lt;br /&gt;
&lt;br /&gt;
If you're thinking of blogging, please, find something you’re passionate about and write.&amp;nbsp; You’ll have a great time!&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-3038972448998241098?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=couT_-GIV00:Hnk-8AdK_2k:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=couT_-GIV00:Hnk-8AdK_2k:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=couT_-GIV00:Hnk-8AdK_2k:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=couT_-GIV00:Hnk-8AdK_2k:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=couT_-GIV00:Hnk-8AdK_2k:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/3038972448998241098/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=3038972448998241098" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/3038972448998241098?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/3038972448998241098?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/couT_-GIV00/data-governance-and-data-quality.html" title="Data Governance and Data Quality Insider 100th" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><thr:total>2</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/08/data-governance-and-data-quality.html</feedburner:origLink></entry><entry gd:etag="W/&quot;DEUBQHk4fCp7ImA9Wx9QEU4.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-9028880437638284474</id><published>2010-08-12T15:29:00.001-04:00</published><updated>2010-12-23T15:30:51.734-05:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-12-23T15:30:51.734-05:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="change management" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance team" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><category scheme="http://www.blogger.com/atom/ns#" term="economy" /><title>Change Management  and Data Governance</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_3TMxkaIa5WY/TGRKKWegYRI/AAAAAAAAAQU/F2q2iEjfeCI/s1600/Change.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="168" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TGRKKWegYRI/AAAAAAAAAQU/F2q2iEjfeCI/s200/Change.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;Years ago, I worked for a large company that spent time and effort on change management. It has been popular with corporations that plan significant changes as they grow or down-size. Companies, particularly high-tech companies, use change management to be more agile and respond to rapid changes in the market.&lt;br /&gt;
&lt;br /&gt;
As I read through the large amount of information on change management, I’m struck by the parallels between change management and data governance. The focus is on processes. It ensures that no matter what changes happen in a corporation, whether it’s downsizing or rapid growth, significant changes are implemented in an orderly fashion and make everyone more effective. &lt;br /&gt;
&lt;br /&gt;
On the other hand, humans are resistant to change. Change management aims to gain buy-in from management to achieve the organization's goal of an orderly and effective transformation. Sound familiar? Data governance speaks to this ability to manage data properly, no matter what growth spurts, mergers or downsizing occurs. It is about changing the hearts and minds of individuals to better manage data and achieve more success while doing so.&lt;br /&gt;
&lt;br /&gt;
&lt;b&gt;Change Management Models&lt;/b&gt;&lt;br /&gt;
As you examine data governance models, look toward change management models that have been developed by vendors and analysts in the change management space.&amp;nbsp; One that struck my attention was the ADKAR model developed by a company called &lt;a href="http://www.prosci.com/cm-new.htm"&gt;Prosci&lt;/a&gt;. In this model, there are five specific stages that must be realized in order for an organization to successfully change. They include:&lt;br /&gt;
&lt;ul&gt;&lt;li&gt;Awareness - An organization must know why a specific change is necessary.&lt;/li&gt;
&lt;li&gt;Desire - The organizational must have the motivation and desire to participate in the call for change.&lt;/li&gt;
&lt;li&gt;Knowledge – The organization must know how to change. Knowing why you must change is not enough.&lt;/li&gt;
&lt;li&gt;Ability - Every individual in the company must implement new skills and processes to make the necessary changes happen.&lt;/li&gt;
&lt;li&gt;Reinforcement - Individuals must sustain the changes, making them the new behavior, averting the tendency to revert back to their old processes.&lt;/li&gt;
&lt;/ul&gt;These same factors can be applied when assessing how to change our own teams to manage data more effectively.&amp;nbsp; Positive change will only come if you work on all of these factors. &lt;br /&gt;
&lt;br /&gt;
I often talk about business users and IT working together to solve the data governance problem. By looking at the extensive information available on change management, you can learn a lot about making changes for data governance.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-9028880437638284474?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FuznIPTtejI:tAEP8iFMliU:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FuznIPTtejI:tAEP8iFMliU:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FuznIPTtejI:tAEP8iFMliU:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=FuznIPTtejI:tAEP8iFMliU:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=FuznIPTtejI:tAEP8iFMliU:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/9028880437638284474/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=9028880437638284474" title="2 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/9028880437638284474?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/9028880437638284474?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/FuznIPTtejI/change-management-and-data-governance.html" title="Change Management  and Data Governance" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_3TMxkaIa5WY/TGRKKWegYRI/AAAAAAAAAQU/F2q2iEjfeCI/s72-c/Change.jpg" height="72" width="72" /><thr:total>2</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/08/change-management-and-data-governance.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CEIMR3s7eyp7ImA9Wx5SE0Q.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-2832759012291340168</id><published>2010-08-09T13:48:00.001-04:00</published><updated>2010-08-09T18:23:06.503-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-08-09T18:23:06.503-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="master data management" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance team" /><category scheme="http://www.blogger.com/atom/ns#" term="data profiling" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>Data Quality Pro Discussion</title><content type="html">Last week I sat down with Dylan Jones of &lt;a href="http://dataqualitypro.com/"&gt;DataQualityPro.com&lt;/a&gt; to talk about data governance. Here is the replay. We discussed a range of topics including organic governance approaches,  challenges of defining data governance, industry adoption trends, policy  enforcement vs legislature and much more.&lt;br /&gt;
&lt;br /&gt;
&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;
&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_3TMxkaIa5WY/TGA-lnRgeSI/AAAAAAAAAQM/H9LWOeDIOp4/s1600/DQPro.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/_3TMxkaIa5WY/TGA-lnRgeSI/AAAAAAAAAQM/H9LWOeDIOp4/s320/DQPro.jpg" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;a href="http://www.dataqualitypro.com/data-quality-home/episode-4-data-governance-qa-with-guest-steve-sarsfield.html"&gt;Link&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-2832759012291340168?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=eHzEn_OVIro:BZkJ8e6M7oA:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=eHzEn_OVIro:BZkJ8e6M7oA:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=eHzEn_OVIro:BZkJ8e6M7oA:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=eHzEn_OVIro:BZkJ8e6M7oA:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=eHzEn_OVIro:BZkJ8e6M7oA:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/2832759012291340168/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=2832759012291340168" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/2832759012291340168?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/2832759012291340168?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/eHzEn_OVIro/data-quality-pro-discussion.html" title="Data Quality Pro Discussion" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_3TMxkaIa5WY/TGA-lnRgeSI/AAAAAAAAAQM/H9LWOeDIOp4/s72-c/DQPro.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/08/data-quality-pro-discussion.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CUEASH4zcCp7ImA9Wx5TFU0.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-6113859236833927155</id><published>2010-07-30T11:27:00.000-04:00</published><updated>2010-07-30T11:27:29.088-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-07-30T11:27:29.088-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="tools" /><category scheme="http://www.blogger.com/atom/ns#" term="deterministic" /><category scheme="http://www.blogger.com/atom/ns#" term="matching algorithms" /><category scheme="http://www.blogger.com/atom/ns#" term="dedupe records" /><category scheme="http://www.blogger.com/atom/ns#" term="probabilistic" /><title>Deterministic and Probabilistic Matching White Paper</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_3TMxkaIa5WY/TFLt5yilkyI/AAAAAAAAAQE/2hn5GOOtuO0/s1600/wp_match.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="200" src="http://1.bp.blogspot.com/_3TMxkaIa5WY/TFLt5yilkyI/AAAAAAAAAQE/2hn5GOOtuO0/s200/wp_match.jpg" width="156" /&gt;&lt;/a&gt;&lt;/div&gt;I’ve been busy this summer working on a white paper on record matching, the result of which is available on the Talend web site &lt;a href="http://www.talend.com/document-download.php?doc=matchdq&amp;amp;src=DataGovernanceBlog"&gt;here&lt;/a&gt;.&lt;br /&gt;
&lt;br /&gt;
The white paper is sort of a primer containing elementary principles of record matching,&amp;nbsp; As the description says, it outline&lt;span id="goog_1451239157"&gt;&lt;/span&gt;&lt;span id="goog_1451239158"&gt;&lt;/span&gt;s the basic theories and strategies of record matching. It describes the nuances of deterministic and probabilistic matching and the algorithms used to identify relationships within records. It covers the processes to employ in conjunction with matching technology to transform raw data into powerful information that drives success in enterprise applications like CRM, data warehouse and ERP.&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-6113859236833927155?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=mS5dvJnkVck:a-HYjUF9iQ8:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=mS5dvJnkVck:a-HYjUF9iQ8:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=mS5dvJnkVck:a-HYjUF9iQ8:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=mS5dvJnkVck:a-HYjUF9iQ8:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=mS5dvJnkVck:a-HYjUF9iQ8:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/6113859236833927155/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=6113859236833927155" title="0 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/6113859236833927155?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/6113859236833927155?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/mS5dvJnkVck/deterministic-and-probabilistic.html" title="Deterministic and Probabilistic Matching White Paper" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_3TMxkaIa5WY/TFLt5yilkyI/AAAAAAAAAQE/2hn5GOOtuO0/s72-c/wp_match.jpg" height="72" width="72" /><thr:total>0</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/07/deterministic-and-probabilistic.html</feedburner:origLink></entry><entry gd:etag="W/&quot;CkMDR34zeyp7ImA9Wx5TE04.&quot;"><id>tag:blogger.com,1999:blog-6895175514429514812.post-1478374701263895424</id><published>2010-07-28T11:21:00.000-04:00</published><updated>2010-07-28T11:21:16.083-04:00</updated><app:edited xmlns:app="http://www.w3.org/2007/app">2010-07-28T11:21:16.083-04:00</app:edited><category scheme="http://www.blogger.com/atom/ns#" term="data quality" /><category scheme="http://www.blogger.com/atom/ns#" term="business strategy" /><category scheme="http://www.blogger.com/atom/ns#" term="data governance" /><title>DGDQI Viewer Mail</title><content type="html">&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_3TMxkaIa5WY/TFBJrhUw7PI/AAAAAAAAAP8/DSn96JguZSg/s1600/mail.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="137" src="http://1.bp.blogspot.com/_3TMxkaIa5WY/TFBJrhUw7PI/AAAAAAAAAP8/DSn96JguZSg/s200/mail.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;From time to time, people read my blog or &lt;a href="https://www.amazon.com/dp/1849280126?tag=parentjournal&amp;amp;camp=213381&amp;amp;creative=390973&amp;amp;linkCode=as4&amp;amp;creativeASIN=1849280126&amp;amp;adid=1DKVNV9J4ZE33SV9Z9M7&amp;amp;"&gt;book &lt;/a&gt;and contact me to chat about data governance and data quality. I welcome it. It’s great to talk to people in the industry and hear their concerns. &lt;br /&gt;
&lt;br /&gt;
Occasionally, I see things in my in-box that bother me, though.&amp;nbsp; Here is one item that I’ll address in a post. The names have been changed to protect the innocent.&lt;br /&gt;
&lt;br /&gt;
A public relations firm asked:&lt;br /&gt;
&lt;br /&gt;
&lt;blockquote&gt;Hi Steve,&lt;br /&gt;
I wonder if you could answer these questions for me.&lt;br /&gt;
- What are the key business drivers for the advent of data governance software solutions?&lt;br /&gt;
- What industries can best take advantage of data governance software solutions?&lt;br /&gt;
- Do you see cloud computing-based data governance solutions developing?&lt;/blockquote&gt;&lt;br /&gt;
I couldn’t answer these questions, because they all pre-supposed that data governance is a software solution.&amp;nbsp; It made me wonder if I have made myself clear enough on the fact that data governance is mostly about changing the hearts and minds of your colleagues to re-think their opinion of data and its importance.&amp;nbsp; Data governance is a company’s mindful decision that information is important and they’re going to start leveraging it. Yes, technology can help, but a complete data governance software solution would have more features than a &lt;a href="http://www.swissarmy.com/MultiTools/Pages/Product.aspx?category=doityourself&amp;amp;product=53771&amp;amp;"&gt;Workchamp XL Swiss Army Knife&lt;/a&gt;. It would have to include data profiling, data quality, data integration, business process management, master data management, wikis, a messaging platform, a toothpick and a nail file in order to be complete.&amp;nbsp; &lt;br /&gt;
&lt;br /&gt;
Can you put all this on the cloud?&amp;nbsp; Yes.&amp;nbsp; Can you put the hearts and minds of your company on a cloud?&amp;nbsp; If only it were that easy...&lt;div class="blogger-post-footer"&gt;Covering the world of data integration, data governance, and data quality from the perspective of an industry insider.&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/6895175514429514812-1478374701263895424?l=data-governance.blogspot.com' alt='' /&gt;&lt;/div&gt;&lt;div class="feedflare"&gt;
&lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=dwIXrSAfgWI:U9yCOYOnSgc:yIl2AUoC8zA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=yIl2AUoC8zA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=dwIXrSAfgWI:U9yCOYOnSgc:7Q72WNTAKBA"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=7Q72WNTAKBA" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=dwIXrSAfgWI:U9yCOYOnSgc:V_sGLiPBpWU"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?i=dwIXrSAfgWI:U9yCOYOnSgc:V_sGLiPBpWU" border="0"&gt;&lt;/img&gt;&lt;/a&gt; &lt;a href="http://feeds.feedburner.com/~ff/blogspot/fCls?a=dwIXrSAfgWI:U9yCOYOnSgc:dnMXMwOfBR0"&gt;&lt;img src="http://feeds.feedburner.com/~ff/blogspot/fCls?d=dnMXMwOfBR0" border="0"&gt;&lt;/img&gt;&lt;/a&gt;
&lt;/div&gt;</content><link rel="replies" type="application/atom+xml" href="http://data-governance.blogspot.com/feeds/1478374701263895424/comments/default" title="Post Comments" /><link rel="replies" type="text/html" href="http://www.blogger.com/comment.g?blogID=6895175514429514812&amp;postID=1478374701263895424" title="1 Comments" /><link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/1478374701263895424?v=2" /><link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/6895175514429514812/posts/default/1478374701263895424?v=2" /><link rel="alternate" type="text/html" href="http://feedproxy.google.com/~r/blogspot/fCls/~3/dwIXrSAfgWI/dgdqi-viewer-mail.html" title="DGDQI Viewer Mail" /><author><name>Steve Sarsfield</name><uri>http://www.blogger.com/profile/12892788380306110697</uri><email>noreply@blogger.com</email><gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="30" height="32" src="http://4.bp.blogspot.com/_3TMxkaIa5WY/TDtOrWc5XzI/AAAAAAAAAPU/oP7EzaO8kUc/S220/Steve_sm.jpg" /></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_3TMxkaIa5WY/TFBJrhUw7PI/AAAAAAAAAP8/DSn96JguZSg/s72-c/mail.jpg" height="72" width="72" /><thr:total>1</thr:total><feedburner:origLink>http://data-governance.blogspot.com/2010/07/dgdqi-viewer-mail.html</feedburner:origLink></entry></feed>

