<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" media="screen" href="/~d/styles/rss2full.xsl"?><?xml-stylesheet type="text/css" media="screen" href="http://feeds.feedburner.com/~d/styles/itemcontent.css"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule" version="2.0">

<channel>
	<title>Martijn's C# Programming Blog</title>
	
	<link>http://www.dijksterhuis.org</link>
	<description>Information, news about programming in C#</description>
	<lastBuildDate>Fri, 07 Aug 2009 21:26:47 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.5.1</generator>
		<atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="self" type="application/rss+xml" href="http://feeds.feedburner.com/MartijnsCHashCodingBlog" /><feedburner:info xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0" uri="martijnschashcodingblog" /><atom10:link xmlns:atom10="http://www.w3.org/2005/Atom" rel="hub" href="http://pubsubhubbub.appspot.com/" /><creativeCommons:license>http://creativecommons.org/licenses/by/2.0/</creativeCommons:license><feedburner:emailServiceId xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">MartijnsCHashCodingBlog</feedburner:emailServiceId><feedburner:feedburnerHostname xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">http://feedburner.google.com</feedburner:feedburnerHostname><item>
		<title>C# Autosuggestion Textbox</title>
		<link>http://www.dijksterhuis.org/autosuggestion-textbox/</link>
		<comments>http://www.dijksterhuis.org/autosuggestion-textbox/#comments</comments>
		<pubDate>Fri, 07 Aug 2009 21:24:56 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Algorithms]]></category>
		<category><![CDATA[textbox]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=938</guid>
		<description><![CDATA[A few days ago I needed a textbox that automatically suggests common input options to the user. In my situation, names of companies. Because of screen space constraints I am unable to use a ComboBox (which already has this functionality). The idea is that if the user enters one or more characters that the Textbox [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p>A few days ago I needed a textbox that automatically suggests common input options to the user. In my situation, names of companies. Because of screen space constraints I am unable to use a ComboBox (which already has this functionality). </p>
<p>The idea is that if the user enters one or more characters that the Textbox will search its list of suggestions. In this test implementation this is done through a simple <em>List<></em>. Ultimately my list will contain many thousands of items and I will need to replace the <em>List<></em> with a more efficient search algorithm. </p>
<p>In the below example code typing a single &#8220;W&#8221; will expand to &#8220;Waterland&#8221;, if you continue typing beyond the final &#8220;d&#8221; it will suggest &#8220;Waterland Investments&#8221;. Type a &#8220;T&#8221; and it will branch to &#8220;Waterland Telecommunication Systems&#8221;. </p>
<pre class="brush: c#">
using System;
using System.Collections.Generic;
using System.Text;
using System.Windows.Forms;

namespace Dijksterhuis.org
{
    class AutoSuggestControl : TextBox
    {
        List&lt;string&gt; Suggestions;
        int PreviousLength; 

        // V1.0 We are using a simple sorted list for the suggestions
        public AutoSuggestControl() : base()
        {
            Suggestions = new List&lt;string&gt;();

            // We keep track of the previous length of the string
            // If the user tries to delete characters we do not interfere
            PreviousLength = 0; 

            // Very basic list, too slow to be suitable for systems with many entries
            Suggestions.Add(&quot;Waterland&quot;);
            Suggestions.Add(&quot;Waterland Investments&quot;);
            Suggestions.Add(&quot;Waterland Telecommuncation Systems&quot;);
            Suggestions.Sort();
        }

        /// &lt;summary&gt;
        /// Search through the collection of suggestions for a match
        /// &lt;/summary&gt;
        /// &lt;param name=&quot;Input&quot;&gt;&lt;/param&gt;
        /// &lt;returns&gt;&lt;/returns&gt;

        private string FindSuggestion(string Input)
        {
            if (Input != &quot;&quot;) 
            foreach (string Suggestion in Suggestions)
            {
                if (Suggestion.StartsWith(Input))
                    return Suggestion;
            }
            return null;
        }

        /// &lt;summary&gt;
        /// We only interfere after receiving the OnTextChanged event. 
        /// &lt;/summary&gt;
        /// &lt;param name=&quot;e&quot;&gt;&lt;/param&gt;
        protected override void OnTextChanged(EventArgs e)
        {
            base.OnTextChanged(e);
          
            // We don&#039;t do anything if the user is trying to shorten the sentence
            int CursorPosition = SelectionStart;
            if (Text.Length &gt; PreviousLength &amp;&amp; CursorPosition &gt;= 0)
            {
                string Suggestion = FindSuggestion(Text.Substring(0, CursorPosition));
                if (Suggestion != null)
                {
                    // Set the contents of the textbox to the suggestion
                    Text = Suggestion;
                    // Setting text puts the cursor at the beginning of the textbox, so we need to reposition it
                    Select(CursorPosition, 0);
                }
            }
            PreviousLength = Text.Length;
        }

  

    }
}

</pre>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/autosuggestion-textbox/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>Exploring C# Boxing</title>
		<link>http://www.dijksterhuis.org/exploring-boxing/</link>
		<comments>http://www.dijksterhuis.org/exploring-boxing/#comments</comments>
		<pubDate>Fri, 20 Mar 2009 07:29:49 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Chapter]]></category>
		<category><![CDATA[Learn C#]]></category>
		<category><![CDATA[boxing]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=908</guid>
		<description><![CDATA[Boxing in C# has little to do with Saturday night television but quite a bit more with that part-time job at the warehouse you had as a student. It is an important concept in C# that is related to how the compiler handles different kinds of variables in memory. Knowing how the compiler handles the [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/boxes2.jpg" alt="C# Boxing Explained" title="C# Boxing Explained" width="570" height="246" class="alignleft size-full wp-image-934" /></p>
<p><em>Boxing in C# has little to do with Saturday night television but quite a bit more with that part-time job at the warehouse you had as a student. It is an important concept in C# that is related to how the compiler handles different kinds of variables in memory. Knowing how the compiler handles the various types allows you to avoid unexpected side effects in your code. </em></p>
<p>This article explains what boxing is, how it works and how it can negatively effect your code if you don&#8217;t pay attention to it. We also look at how generics can be used to improve your code&#8217;s efficiency. And we try to answer the ultimate question: Is everything in C# an object?</p>
<p><span id="more-908"></span></p>
<h3>Everything is an object in C# .. but not all objects are created equally</h3>
<p>In C#, everything inherits from System.Object, so they share all the common object methods :</p>
<blockquote><p>
string a = &#8220;hello world&#8221;;<br />
int     b = 34;<br />
Console.WriteLine( &#8220;{0} {1}&#8221;,a.ToString(),b.ToString());
</p></blockquote>
<p>At first glance all objects look the same in C#. But an important distinction is made between <em>value types</em> and <em>reference types</em>. Value types are basic types such as <em>struct</em>, <em>int</em>, <em>long</em>, <em>short</em> etc. Reference types are all the classes you will use (strings, delegates, objects)</p>
<ul>
<li>Value types (int,struct..) etc are located on the stack. (unless they are part of a reference type, such as a class)</li>
<li>Reference types (classes&#8230;) are accessed through a pointer to their actual location on the heap.</li>
</ul>
<p>We simplify things a little for this article: The stack keeps track of what is executing in our application. As we enter and exit methods items are added to or removed from the top. The heap stores all the applications data, and since data can be added to and removed from it randomly it needs to be garbage collected.</p>
<p>Why the difference ? The stack is faster to access for the runtime so by putting simple types close at hand the code gains in efficiency.</p>
<p><strong>Assigning variables</strong></p>
<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/boxing.png" alt="C# Boxing" title="C# Boxing" width="250" height="261" class="alignleft size-full wp-image-920" />The graphic shows four possible scenarios :</p>
<ol>
<li><strong>Boxing: </strong>When you assign an integer to an object (object b=a): a new managed memory block is created on the heap. For this C# has to allocate memory at runtime.</li>
<li><strong>By Value: </strong>If you copy the integer to another integer (int c=a) the value is simply copied to another memory slot on the stack (which is allocated at compile time), this is the fastest assignment as most checks can be done at compile time.</li>
<li><strong>Unboxing:</strong> When you cast an object to a value type (d=(int)b) the result is stored on the stack. This is also an expensive operation as at runtime the value has to be retrieved from the heap and is then checked to see if the cast was valid. (If the source argument is null or a reference to an incompatible object, an<strong> InvalidCastException</strong> is thrown.)</li>
<li><strong>By Reference: </strong>Classes are reference types so if you assign a new object to an older object (object e = b) no new instance is created, the object pointer on the stack simply points to the older object. A direct result of this is that if you modify either b or e, they will both reflect the change as they both point to the same location in memory.</li>
</ol>
<p>Boxing is as simply putting a basic type in wrapper (making it a fully blown object), and unboxing taking that wrapped object and converts it back to a simpler type. To do the boxing managed memory needs to be allocated on the heap, references need to be updated, and the contents of the value type have to be copied. </p>
<p><strong>Value types are copied, reference types just refer to the original object</strong></p>
<p>The following code example shows how a simple change in code can have very different results.</p>
<div>
<table border="1" cellspacing="0" cellpadding="3" width="100%">
<tbody>
<tr>
<td width="50%"><strong>Example A</strong></td>
<td width="50%"><strong>Example B</strong></td>
</tr>
<tr>
<td width="50%">
<pre class="brush: c#">
    class MainClass
    {
        struct Demo
        {
            public int x;
            public Demo(int x)
            {
                this.x = x;
            }
        }
        
        public static void Main(string[] args)
        {
            Demo p = new Demo(10);
            object box = p;
            p.x = 20;
            Console.Write(((Demo)box).x);
        }
    }
</pre>
</td>
<td width="50%">
<pre class="brush: c#">
   class MainClass
    {
        class Demo
        {
            public int x;
            public Demo(int x)
            {
                this.x = x;
            }
        }

        public static void Main(string[] args)
        {
            Demo p = new Demo(10);
            object reference = p;
            p.x = 20;
            Console.Write(((Demo)reference).x);
        }
    }
</pre>
</td>
</tr>
<tr>
<td width="50%">Result: 10</td>
<td width="50%">Result: 20</td>
</tr>
</tbody>
</table>
</div>
<p>In Example A, because Demo is a struct (and thus a value type on the stack) when we box it a copy is made. When we modify the original no change is made to the copy.</p>
<p>In Example B, we have created Demo as a class. In this situation we can assign it to another object and we don&#8217;t need to box it. A reference is made instead. So when we update the original, the copy is also updated as they both point to the same location in memory.</p>
<h3>Boxing and unboxing value types slows things down</h3>
<p>Often you can&#8217;t rely on what the type of variable a function will take so you need to use an object variable as object is the lowest common denominator in .NET. In the following example we use the <em>ArrayList</em> class to store a set of integers. The <em>ArrayList</em> can store any type of variable, but to be able to do this it accept the <em>object</em> class. </p>
<pre class="brush: c#">
        using System.Collections;

        public static void Main(string[] args)
        {
            int total = 0;
            ArrayList myList = new ArrayList();
            for (int Lp = 0; Lp &lt; 10000000; Lp++)
                myList.Add(Lp); // Box: Integer to an object
            foreach(object item in myList)
                total += (int)item; // Unbox: Object to integer
        }
</pre>
<p>When we add an integer to the Arraylist it is boxed into an object, and when we retrieve it it is unboxed back into an integer.</p>
<p>Note that this is only an issue if we are trying to store value types in the arraylist. If we were trying to store objects (eg. classes) there is no boxing done, as the original type was an object already. In this situation a simple reference is stored. On retrieval there is no unboxing necessary as only a reference is returned.</p>
<h3>Boxing in Action</h3>
<p>When we look at our code in a dissassembler the boxing operation is clearly visible in the output stream:</p>
<blockquote><p>
;<em> myList.Add(Lp); // Box: Integer to an object</em><br />
IL_000f:  ldloc.1<br />
IL_0010:  ldloc.2<br />
IL_0011:  box [mscorlib]System.Int32<br />
IL_0016:  callvirt instance int32 class [mscorlib]System.Collections.ArrayList::Add(object)
</p></blockquote>
<p>And when we convert the object back to an integer the reverse happens:</p>
<blockquote><p>
; total += (int)item; <strong>// Unbox: Object to integer</strong><br />
IL_0042:  unbox [mscorlib]System.Int32
</p></blockquote>
<h3>Generics remove the need to box and unbox value types</h3>
<p>In .NET 2.0 generics and generic collections were introduced that remove the need to box and unbox variables in many common situations. The following example is functionally equivalent to the earlier boxing example. We use a generic here, but force it to be &#8220;object&#8221;:</p>
<pre class="brush: c#">
        using System.Collections.Generic;
        public static void Main(string[] args)
        {
            int total = 0;
            List&lt;object&gt; myList = new List&lt;object&gt;();
            for (int Lp = 0; Lp &lt; 10000000; Lp++)
                myList.Add((object)Lp); // We need a boxing operation
            foreach(int item in myList)
                total += (int)item; // Unbox back to an int
        }
</pre>
<p>The above example of course makes little sense because we are inefficient on purpose. I included it to demonstrate how with generics we can get around the need for boxing with generic collections.</p>
<pre class="brush: c#">
        using System.Collections.Generic;
        public static void Main(string[] args)
        {
            int total = 0;
            List&lt;int&gt; myList = new List&lt;int&gt;();
            for (int Lp = 0; Lp &lt; 10000000; Lp++)
                myList.Add(Lp); // No need to box things
            foreach(int item in myList)
                total += item; // No need to unbox
        }
</pre>
<p>Generic are similar to templates in C++, they allow you to specify a type and the compiler will generate all the required code. In our example we specify that our list is of type int, so the compiler will create a List class that supports integers. There is no longer a need to convert our integer type to an object first.  If you would like to know more about how generics work have a look at another post I wrote on the subject: <a id="s6of" title="Generics in C#" href="../generics-in-c/">Generics in C#</a> .</p>
<h3>So how much of a difference does boxing make?</h3>
<p>Boxing is slower but how much is the difference and do you need to care? To put things into perspective I tested the timing on the three code examples above. For this I used .NET&#8217;s Stopwatch class to <a id="gx9k" title="measure our code's performance" href="../timing-function-performance-stopwatch-class/">measure the code&#8217;s performance</a>. First of all, the .NET runtime is pretty fast and I found that I needed to set the loop iterations at some 10 milion before I was able to get a consistent result across runs.</p>
<ul>
<li>Boxing/Unboxing ListArray example: 2580 ms</li>
<li>Boxing/Unboxing List&lt;object&gt; example: 2050ms</li>
<li>Non boxing List&lt;int&gt; example: 825 ms</li>
</ul>
<p>The non boxing List&lt;int&gt; example is about 2.5 times faster than List&lt;object&gt;, which is quite considerable.</p>
<h3>To finish up&#8230;</h3>
<p>Based on this should you rewrite all your code to implement generics? Probably not unless you have several tight loops that could use your attention, note that this only applies to value types (structs, ints) . For reference types (i.e. all classes) this is less of a problem as they are stored as a reference anyway. </p>
<p>It is however a good idea to implement generics in any code you write from here on. Not just because of the potential gains in speed but also because generics provide the compiler with much more information.</p>
<ul>
<li>Generics reduce the number of coding errors as you have compile-time checking of types</li>
<li>Generics are more readable, you don&#8217;t need to cast all over the place and it&#8217;s always obvious what type it is associated with</li>
</ul>
<p>Also if possible avoid passing value types as parameters to methods if these force a conversion to an object.</p>
<p>Image credit: Wall of boxed by <a rel="nofollow" href="http://www.flickr.com/photos/celesteh/">celesteh</a></p>
<p><a href="http://www.dotnetkicks.com/kick/?url=http%3a%2f%2fwww.dijksterhuis.org%2fexploring-boxing%2f"><img src="http://www.dotnetkicks.com/Services/Images/KickItImageGenerator.ashx?url=http%3a%2f%2fwww.dijksterhuis.org%2fexploring-boxing%2f" border="0" alt="kick it on DotNetKicks.com" /></a></p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/exploring-boxing/feed/</wfw:commentRss>
		<slash:comments>10</slash:comments>
		</item>
		<item>
		<title>10 C# Shorthands that improve productivity</title>
		<link>http://www.dijksterhuis.org/10-c-coding-shorthands-that-improve-productivity/</link>
		<comments>http://www.dijksterhuis.org/10-c-coding-shorthands-that-improve-productivity/#comments</comments>
		<pubDate>Tue, 17 Mar 2009 07:54:38 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Learn C#]]></category>
		<category><![CDATA[c#]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=897</guid>
		<description><![CDATA[One of the best things about C# is that as the language and libraries expand thought is put into keeping things readable. Below I have listed 10 shorthands that you can use to make your code tighter and less wordy. No doubt you know one or more already &#8212; but do you currently use all [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/coding.jpg" alt="C# shortcuts and shorthand" title="C# shortcuts and shorthand" width="570" height="236" class="alignright size-full wp-image-905" /></p>
<p><I>One of the best things about C# is that as the language and libraries expand thought is put into keeping things readable. Below I have listed 10 shorthands that you can use to make your code tighter and less wordy. No doubt you know one or more already &#8212; but do you currently use all ten of them ?</I></p>
<p><span id="more-897"></span></p>
<p><strong>1. The ? conditional evaluation operator</strong></p>
<p>I first read about the ? operator in the &#8220;The C Programming Language&#8221; book by Brian Kernighan and Dennis Ritchie (first published in 1978, but I didn&#8217;t get around to reading it until well into the second edition). So this isn&#8217;t anything new. But it IS handy. It allows you to compress a common if-then-else pattern into a single assignment.</p>
<pre class="brush: c#">
int x = 10;
int y = 20;
int max;

if (x &gt; y)
  max = x;
else
  max = y;
</pre>
<p>Using the (Question) ? Positive Answer : Negative Answer patterns the above can be rewritten as:</p>
<pre class="brush: c#">
int x = 10;
int y = 20;
int max = (x &gt; y) ? x : y;
</pre>
<p><strong>2. Null-Coalesce operator (??)</strong></p>
<p>How often do you test for null values in your code? Often? Then the null-coalesce operator (??) comes in handy. To see how it works consider the following code example:</p>
<pre class="brush: c#">
object cache = null;
object d = new object();
object e;

if (c != null)
    e = c;
else
   e = d;
</pre>
<p>It is obvious that we can rewrite this using the single ? operator :</p>
<pre class="brush: c#">
object cache = null;
object d = new object();
object e = (c != null) ? c : d;
</pre>
<p>Using the ?? operator we can make it even shorter. If the left hand side is null, the right hand side is assigned.</p>
<pre class="brush: c#">
object cache = null;
object d = new object();
object e = c ?? d;
</pre>
<p><strong>3. Object Initializers</strong></p>
<p>After you create a new object you often have to assign one or more of its properties. With the introduction of C# 3.0 it is now possible to use object initializers to both improve the readability of this, and to shorten your code.</p>
<pre class="brush: c#">
Customer c = new Customer();
c.Name = &quot;James&quot;;

c.Address = &quot;204 Lime Street&quot;;
</pre>
<p>can be written as:</p>
<pre class="brush: c#">
Customer c = new Customer { Name=&quot;James&quot;, Address = &quot;204 Lime Street&quot; };
</pre>
<p><strong>4. The using statement</strong></p>
<p>Often you will need to allocate a system resource such as a font, file handle, network resource etc. Each time you need such a resource there are three critical steps to go through: You acquire the resource, you use it, and then you dispose of it. If you forget to properly dispose of it you will have created a memory or resource leak. This is best illustrated through the following patterns:</p>
<pre class="brush: c#">
     // 1. Allocation of the object
     Font font1 = new Font(&quot;Arial&quot;, 10.0f);
     try
     {
      // 2. The bit where we use the resource
     }
     finally
     {
     // 3. Disposal of the object
     if (font1 != null)
     ((IDisposable)font1).Dispose();
     }
</pre>
<p>The using statement allows us to compress this down to:</p>
<pre class="brush: c#">
// Allocate the resource
using (Font font1 = new Font(&quot;Arial&quot;, 10.0f))
{
    // The bit where we use the resource
}
// Disposal is automatic
</pre>
<p>The using statement is intended to be used with objects that implement the &#8220;IDisposable&#8221; interface which in practice is all .NET objects that allocate and manage resources.</p>
<p><strong>5. Aliases for long winded namespaces and types</strong></p>
<p>The names of C# identifiers can become quite long. If you are doing Microsoft Office automation in C# you might want to do something simple like open MS Word and change a document. You can use the &#8220;using&#8221; statement to create an alias for either a class or a namespace.</p>
<pre class="brush: c#">
using Word = Microsoft.Office.Interop.Word;
...
Word.Application = new Word.Application() { Visible = True; }
</pre>
<p><strong>6. Nullable objects</strong></p>
<p>A variable needs to have a value, it cannot be null. Sometimes it would be handy it was possible to assign &#8220;null&#8221; (eg. undefined) to a variable. .NET 2.0 introduced the Nullable<T> generic that makes this possible. The following two lines produce exactly the same object:</p>
<pre class="brush: c#">
Nullable&lt;int&gt; x = null; 
int? x = null;
</pre>
<p>By putting a ? following a variable definition the compiler will wrap a Nullable&lt;T&gt; generic around the type.</p>
<p><strong>7. Automatic Properties</strong></p>
<p>C# 3.0 introduced automatic properties. A property typically consists of (but doesn&#8217;t have to) a private variable which is exposed to the outside world through getters and setters. The following is common example of this:</p>
<pre class="brush: c#">
public class Person 
{
 private string _firstName;
 public string FirstName 
 {
    get { return _firstName; }
    set { _firstName = value; }
 }
}
</pre>
<p>From C# 3.0 on we can reduce the above to: </p>
<pre class="brush: c#">
public class Person
{
 public string Firstname { get; set; }
}
</pre>
<p>The C# compiler will automatically generate a backing variable and the correct get and set properties. Why is this useful? After all you could have just made a string variable in the class public instead. </p>
<p>By defining it as a property allows you to add the actual validation logic to your class at a later stage. The in-memory signature of the class won&#8217;t change which means that any external libraries compiled against your code will not have to be recompiled.</p>
<p><strong>8. Type inference</strong></p>
<p>In typical C# you will always carefully spell out your definitions:</p>
<pre class="brush: c#">
    string MyString = “Hello World”;
</pre>
<p>From the right side of the declaration it is obvious that only one type (string) will ever match this definition. So instead of us doing the work, why not let the compiler figure this out?</p>
<pre class="brush: c#">
    var MyString = “Hello World”;
</pre>
<p>The above definition will also create a string variable named “MyString”. It is important to note that C# is still strongly typed.There is no performance impact results from using type inference.</p>
<p>The compiler does all the work figuring out the data type at compile time. Of course this feature created two opposite camps, one that thinks var should be liberally applied, and another one that abhors the whole idea. The middle ground seems to be that var should be used there were its use is obvious.</p>
<pre class="brush: c#">
var SeniorStaff = Employees.Where(s =&gt; s.Age &gt; 35);         
foreach(var Member in SeniorStaff)
           Console.WriteLine(&quot;Name: {0} Age: {1}&quot;,Member.Name,Member.Age);
</pre>
<p>For example what type would SeniorStaff be ?</p>
<blockquote><p>((System.Linq.Enumerable.Iterator<<>f__AnonymousType0<string,int>>)(SeniorStaff))</p></blockquote>
<p>This should make you glad that the compiler figured it all out for you. I wrote more about this in another post: <a href="http://www.dijksterhuis.org/csharp-anonymous-types-basics/">Anonymous Types : The Basics</a>.</p>
<p><strong>9. Lambda Expressions</strong></p>
<p>C# 2.0 introduced anonymous methods, which are methods defined inside a method. Incredibly powerful and a nice way to put all kinds of evaluation logic inside your code they had the drawback that they could be quite hard to read.</p>
<blockquote><p>
Func<int,bool> mySeniorStaffFilter = delegate(int a) { return a > 35; };
</p></blockquote>
<p>The above method takes an integer as a parameter, and returns a boolean. It checks if the staff member passed to it is older than 35. If so, it returns true. </p>
<p>Lamba expressions make things a little easier to read, while being functionally exactly the same:</p>
<blockquote><p>
    Func<int,bool> mySeniorStaffFilter = a => a > 35;
</p></blockquote>
<p>Even better, you can define them anywhere a delegate would have fitted:</p>
<blockquote><p>
    var SeniorStaff = Employees.Where(s => s.Age > 35);
</p></blockquote>
<p><strong>10. string.IsNullOrEmpty</strong></p>
<p>Not really a language feature, but a useful little library function in its own right. How often do you need to test if a string is empty? Or null? The string.IsNullOrEmpty method returns true if this is the case.</p>
<pre class="brush: c#">
if (String.IsNullOrEmpty(s) == true)
    return &quot;is null or empty&quot;;
else
     return String.Format(&quot;(\&quot;{0}\&quot;) is not null or empty&quot;, s);
</pre>
<h3>Optional and named parameters in C# 4.0</h3>
<p>As a bonus a little on the upcoming C# 4.0 which will introduce optional and named parameters. It is quite difficult to see what all the hoopla is about until you realize what optional parameters will do to improve Office automation coding in C#:</p>
<p>Before optional and named parameters:</p>
<pre class="brush: c#">
Word.Application app = new Word.Application() { Visible = true; }
object missing = System.Reflection.Missing.Value;
object filename = @&quot;c:\Document.docx&quot;;
object readOnlyValue = true;

app.Document.Open(ref filename, ref missing, ref readOnlyValue, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing);
</pre>
<p>With the support of optional and named parameters this becomes:</p>
<pre class="brush: c#">
Word.Application app = new Word.Application() { Visible = true; }
app.Documents.Open(@&quot;c:\Document.docx&quot;,ReadOnly:true);
</pre>
<p>Microsoft Office certainly uses a lot of optional parameters. Note that in the first example all parameters are passed by reference. They still are in the second example. Behind the scenes the C# compiler creates the appropriate temporary variables to pass on, saving us from having to hand add them to the code. </p>
<p>To give proper credit, the above example comes from a video on this subject by <a href="http://channel9.msdn.com/posts/mike+ormond/C-40-New-Features-COM-Interop-Enhancements/">Mike Ormond</a> on Channel 9.</p>
<p>Image credit: Thinking About Something by <a rel="nofollow" href="http://www.flickr.com/photos/goljadkin/"> Grazie Daverro</a></p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/10-c-coding-shorthands-that-improve-productivity/feed/</wfw:commentRss>
		<slash:comments>30</slash:comments>
		</item>
		<item>
		<title>MonoDevelop Keyboard Shortcut overview</title>
		<link>http://www.dijksterhuis.org/monodevelop-keyboard-shortcut-overview/</link>
		<comments>http://www.dijksterhuis.org/monodevelop-keyboard-shortcut-overview/#comments</comments>
		<pubDate>Mon, 16 Mar 2009 04:41:44 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[monodevelop]]></category>
		<category><![CDATA[cheatsheet]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=885</guid>
		<description><![CDATA[Ever watched an experienced programmer flick through code like there is no tomorrow? That is what caffeine addiction does to you. If you think that normal scrolling up and down isn&#8217;t fast enough you will find that a good development environment offers many keyboard shortcuts to let you work ever faster. Keyboard shortcuts allow for [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/monodevelop.png" alt="Mono Develop Keyboard Shortcuts" title="Mono Develop Keyboard Shortcuts" width="570" height="150" class="alignright size-full wp-image-886" /></p>
<p>Ever watched an experienced programmer flick through code like there is no tomorrow? That is what caffeine addiction does to you. If you think that normal scrolling up and down isn&#8217;t fast enough you will find that a good development environment offers many keyboard shortcuts to let you work ever faster. </p>
<p>Keyboard shortcuts allow for things like indenting a selection, making it uppercase or lowercase, commenting it , jumping between bookmarks and moving lines of code up and down. These can be real time savers. </p>
<p>I was looking for a printable overview of available keyboard shortcuts for MonoDevelop but couldn&#8217;t find one so I have created my own. </p>
<p>If you are running a standard Linux distribution you will probably still be using the 1.0 version of MonoDevelop. If you compiled MonoDevelop 2.0 from source then you will be pleased to discover many new shortcuts. As the difference between 1.0 and 2.0 is significant I have made separate tables for the both of them. </p>
<ul>
<li><a href='http://www.dijksterhuis.org/wp-content/uploads/2009/03/monodevelop-keyboard-shortcuts.odt'>MonoDevelop 1.0/2.0 Keyboard Shortcuts (Open Office)</a></li>
<li><a href='http://www.dijksterhuis.org/wp-content/uploads/2009/03/monodevelop-keyboard-shortcuts.pdf'>Monodevelop 1.0/2.0 Keyboard Shortcuts (PDF)</a></li>
</ul>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/monodevelop-keyboard-shortcut-overview/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>C# guidelines that improve your code quality</title>
		<link>http://www.dijksterhuis.org/csharp-coding-guidelines/</link>
		<comments>http://www.dijksterhuis.org/csharp-coding-guidelines/#comments</comments>
		<pubDate>Fri, 13 Mar 2009 08:25:37 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Review]]></category>
		<category><![CDATA[code guidelines]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=873</guid>
		<description><![CDATA[I am inspired to better myself after reading Dennis Doomen&#8217;s freshly released coding guidelines for C# 3.0 . He provides a very nice PDF document that puts the dot to many common C# coding &#8220;confusions&#8221;. It addresses how to consistently name your variables, namespaces, classes and assemblies. He specifies when to declare a variable as [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p>I am inspired to better myself after reading Dennis Doomen&#8217;s freshly released <a href="http://www.dennisdoomen.net/2009/03/new-coding-guidelines-for-c-30.html">coding guidelines for C# 3.0</a> . He provides a very nice PDF document that puts the dot to many common C# coding &#8220;confusions&#8221;. It addresses how to consistently name your variables, namespaces, classes and assemblies. He specifies when to declare a variable as static, as readonly and when to seal a class among others. </p>
<p>Another topic he pushes is code readability and ensuring that all code statements are as clear as possible. </p>
<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/screenshot.png" alt="Example Coding Guideline" title="Example Coding Guideline" width="561" height="250" class="alignright size-full wp-image-874" /></p>
<p>You will often have your own way of doing things. A shared coding guideline is a good way to ensure that each member of your team is writing code styled as similar as possible.</p>
<p>If your company is looking at adopting a coding guideline for its C# development this would be a very good place to start. </p>
<p>At 32 pages it might seem a little long at first but it reads quickly and most topics should already be familiar if you do a lot of coding. Its companion quick reference guide neatly puts most key items onto a single page and makes for a great handout.</p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/csharp-coding-guidelines/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		</item>
		<item>
		<title>Advanced Regular Expressions in C#</title>
		<link>http://www.dijksterhuis.org/regular-expressions-advanced/</link>
		<comments>http://www.dijksterhuis.org/regular-expressions-advanced/#comments</comments>
		<pubDate>Wed, 11 Mar 2009 10:44:23 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Regular Expressions]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=846</guid>
		<description><![CDATA[In this third and for now last post on using regular expressions we look at some advanced topics. When your expressions become more complicated they also become harder to understand so documenting them can help. And isn&#8217;t standard string replacement a little bit too basic? We also look at how speeding things up can improve [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/advanced.jpg" alt="Regular Expressions in C# - Advanced Topics" title="Regular Expressions in C# - Advanced Topics" width="570" height="281" class="alignright size-full wp-image-857" /><br />
<em>In this third and for now last post on using regular expressions we look at some advanced topics. When your expressions become more complicated they also become harder to understand so documenting them can help. And isn&#8217;t  standard string replacement a little bit too basic? We also look at how speeding things up can improve your code&#8217;s efficiency.<br />
</em><br />
In this post we look at three topics: </p>
<ol>
<li>Improving your code&#8217;s readability by documenting regular expressions</li>
<li>Creating conditional string replacement by using MatchEvaluators</li>
<li>Speeding up regular expressions by compiling them, caching them in memory and pre-compiling them to their own DLL.</li>
</ol>
<p>If you are new to regular expressions in C# have a look at the theory of regular expression in <a href="http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/">Regular Expressions : The Basics</a>. The second post <a href="http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/">Regular Expressions in C#: Practical Usage</a> introduced the most common uses of regular expressions. </p>
<p><span id="more-846"></span></p>
<h3>Documenting your Regular Expressions</h3>
<p><P>Regular expressions can make for fine alphabet soup. The following expression validates an e-mail address and it does a good job at it. It is also very intimidating at first. So just imagine rereading your code after a few weeks, what is going on in there?</P></p>
<pre>
string validEmail = @"\b([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})\b";
</pre>
<p>With a little squinting you see that I would like to extract two groups: the username part, and the domain name part. C# allows us to name each group to make things a little easier to read. We can use the <i>?&lt;groupname&gt;</i> pattern to name each group. </p>
<p>A little rewrite can make our expression a lot easier to read. C# offers the &#8220;#&#8221; character to document our expressions in line.</p>
<pre class="brush: c#">
       static string validEmail = @&quot;\b    # Find a word boundary
                       (?&lt;Username&gt;       # Begin group: Username
                       [a-zA-Z0-9._%+-]+  #  Characters allowed in username, 1 or more
                       )                  # End group: Username
                       @                  # The e-mail &#039;@&#039; character
                       (?&lt;Domainname&gt;     # Begin group: Domain name
                       [a-zA-Z0-9.-]+     #  Domain name(s), we include a dot so that
                                          #  mail.dijksterhuis is also possible
                       .[a-zA-Z]{2,4}     #  The top level domain can only be 4 characters
                                          #  So .info works, .telephone doesn&#039;t. 
                       )                  # End group: Domain name
                       \b                 # Ending on a word boundary
                       &quot;;
</pre>
<p>Because we have added a lot of spaces and new lines to our expression we need to tell Regex about them by specifying the<em> RegexOptions.Multiline</em> and <em>RegexOptions.IgnorePatternWhitespace</em> options.</p>
<pre class="brush: c#">
          string testEmail = &quot;martijn@dijksterhuis.org&quot;;
          Regex TestValidEmail = new Regex(validEmail,RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
           
           // Test the e-mail address
           Match TestResult = TestValidEmail.Match(testEmail);
            
           if (TestResult.Success)
           {
                Console.WriteLine(&quot;E-mail is: {0}@{1}&quot;,TestResult.Groups[&quot;Username&quot;].Value,
                                                         TestResult.Groups[&quot;Domainname&quot;].Value);
           }
</pre>
<h3>Conditional string replacement</h3>
<p>The<em> RegEx.Replace</em> method allows you to use substitution parameters to change the original content around. In a previous post we looked at how we could swap two words around by using grouped patterns and the $1 and $2 conditional replacement names.</p>
<pre class="brush: c#">
Regex Replacer = new Regex(@&quot;(\w*) (\w*)&quot;);
string Input = &quot;Molly Mallone&quot;;
string Output = Replacer.Replace(Input,&quot;$2 $1&quot;);
Console.WriteLine(Output);
</pre>
<p>That is sufficient if you just want to move the data around a little, but it would be nice if you could make a replacement conditional on some external condition. The <em>Regex.Replace</em> method allows you to specify a<em> MatchEvaluator</em> which does just that. <em>MatchEvaluator</em> is a delegate which takes Match as a parameter and returns the replacement string.</p>
<p>Handy for example if you are cleaning up a mailing list and want to conditionally update some, but not all, e-mail addresses. In the following code example we know that <em>mail.dijksterhuis.org</em> is now served by <em>smtp.dijksterhuis.org</em>, so we want to move all those users to the new domain name and leave all other e-mail addresses the same.</p>
<pre class="brush: c#">
using System;
using System.Text.RegularExpressions;

namespace RegularExpression
{
	class MainClass
	{

    	static string validEmail = @&quot;\b   			# Find a word boundary
							  (?&lt;Username&gt;			# Begin group: Username
							  [a-zA-Z0-9._%+-]+     #  Characters allowed in username, 1 or more
							  )                     # End group: Username
							  @					    # The e-mail &#039;@&#039; character
							  (?&lt;Domainname&gt;        # Begin group: Domain name
							  [a-zA-Z0-9.-]+        #  Domain name(s), we include a dot so that
                                                    #  mail.dijksterhuis is also possible
							  .[a-zA-Z]{2,4}        #  The top level domain can only be 4 characters
													#  So .info works, .telephone doesn&#039;t. 
							  )                     # End group: Domain name
                              \b
							  &quot;;
		
		public static string UpdateDomainNames(Match match)
		{
			if (match.Groups[&quot;Domainname&quot;].Value==&quot;mail.dijksterhuis.org&quot;)
			 return match.Groups[&quot;Username&quot;].Value + &quot;@&quot; + &quot;smtp.dijksterhuis.org&quot;;
			return match.Groups[0].Value; // The original
		}
		
		public static void Main(string[] args)
		{

		   Regex TestValidEmail = new Regex(validEmail,RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
		   
		   string[] MailingList = new string[] { &quot;martijn@dijksterhuis.org&quot;,
												 &quot;user@mail.dijksterhuis.org&quot;,
												 &quot;willy@wortel.org&quot;};

		   foreach(string email in MailingList)
		   {
				// Conditionaly replace e-mail addresses 
				Console.WriteLine( TestValidEmail.Replace(email,UpdateDomainNames) );
		   }
		
		}
	}
}
</pre>
<h3>Speeding up regular expressions by compiling them</h3>
<p>Regular expressions can be quite slow and in another post I found that a simple string replacement routine <a href="http://www.dijksterhuis.org/manipulating-strings-in-csharp-replacing-part-string/">was some 40 times faster</a> than the equivalent regular expression.  Often you will want to stick with the regular expression as it will save you many lines of coding. </p>
<p>As the <em>RegEx</em> class encounters your expressions it compiles them to an internal format. It steps through this internal format each time you query the expression. It is also possible compile your expression to MSIL (the byte code to which C# is compiled) directly. In the best possible scenario the Just-In-Time compiler then translates this MSIL code directly to machine code giving another speed boost to your expression.</p>
<p>A note of caution: According to the MSDN team<a href="http://blogs.msdn.com/bclteam/archive/2004/11/12/256783.aspx"> the increase in speed can be up to 30%</a> which is nice but certainly isn&#8217;t amazing.</p>
<p>You can do this by setting the <em>RegexOptions.Compiled</em> option when you create a new RegEx:</p>
<blockquote><p>Regex theExpression = new Regex(thePattern,RegexOptions.Compiled);</p></blockquote>
<p>The penalty for this is the time to compile the expression which can add significantly to your applications start-up time. So although &#8220;compiled&#8221; might sound faster it might actually be slower. This is best applied if you frequently use the expression and it has a very long lifetime.</p>
<p><B>The expression cache</b></p>
<p>If you use many regular expressions the RegEx cache is also an important factor in how quickly your code executes.  Each time you define a regular expression the library needs to parse it. If you frequently use a small set of regular expressions they won&#8217;t be compiled over and over again, instead they come from a cache. You will find that .NET/C# caches the last 15 expressions. Any more and it will have to recompile them as it encounters them.</p>
<p>It is possible to expand the size of the cache by setting the <em>Regex.CacheSize</em> property to a higher value. This is probably best done after you made an overview of how many expressions are used by your code.</p>
<p><b>Compiling to an assembly</b></p>
<p>For compiling a regular expression to MSIL you need to pay a hefty price. But with your project about to ship it might be worthwhile to investigate taking your most frequently used regular expressions and putting them pre-compiled into a new assembly. The <em>Regex.CompileToAssembly</em> method performs this function. You will have to write a separate program to do the actual compilation, but once done you can link in the regular expression like any other assembly to your main application.</p>
<p>You can use the following class to create your own set of regular expressions and save them to a new assembly: </p>
<pre class="brush: c#">
using System;
using System.Collections;
using System.Text.RegularExpressions;

namespace CompileExpression
{
	class MainClass
	{
		// Add the expressions to the hash table 
	 	public static Hashtable TheExpressions = new Hashtable();

		// CompileExpressions
		public static void CompileExpressions(string AssemblyName)
		{
			// Reserve space for each expression
			RegexCompilationInfo[] CI = new RegexCompilationInfo[TheExpressions.Count];

			int Cnt = 0;
        	foreach(DictionaryEntry de in TheExpressions)
        	{
				CI[Cnt++] = new RegexCompilationInfo((string)de.Value,		  // the reg. ex pattern
				                                     RegexOptions.Compiled,   // Options to specify
				                                     (string)de.Key,		  // name of the pattern
				                                     &quot;TheRegularExpressions&quot;, // name space name
				                                     true );                  // Public? 
        	}

		   // Create a new assembly name structure
		   System.Reflection.AssemblyName aName = new System.Reflection.AssemblyName( );

		   // Assign the name
  		   aName.Name = AssemblyName;

		   // Compile all the regular expressions into the assembly
  		   Regex.CompileToAssembly(CI, aName);
		}
		
		public static void Main(string[] args)
		{
			// Add two expressions to the collection
			TheExpressions.Add(&quot;FindHTML&quot;,@&quot;(&lt;\/?[^&gt;]+&gt;)&quot;);
			TheExpressions.Add(&quot;FindTCPIP&quot;, @&quot;(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})&quot;);

			// Compile them to my new assembly called &quot;RegEx&quot;
			CompileExpressions(&quot;RegEx&quot;);
		}
	}
}
</pre>
<p>This will create a file called &#8220;RegEx.dll&#8221; in the home directory of your program. The next step is to verify if this works as advertised. Create a new project in Visual Studio and add a reference (in the Solution Explorer right click the name of the new project and click &#8220;Add Reference&#8230;&#8221; and navigate to where the RegEx.DLL file is located.</p>
<p>The following class will load the FindTCPIP expression from the DLL and execute it: </p>
<pre class="brush: c#">
using System;

namespace TCPSolution
{
   class Program
   {
       static void Main(string[] args)
       {
           TheRegularExpressions.FindTCPIP MatchTCP = new TheRegularExpressions.FindTCPIP();

           if (MatchTCP.Match(&quot;10.0.0.6&quot;).Success)
           {
               Console.WriteLine(&quot;This works!&quot;);
           }
       }
   }
} 
</pre>
<h3>Regular Expressions and Mono</h3>
<p>I tested, prodded and played with the code for these regular expression posts on MonoDevelop and Mono. With the exception of the final &#8220;Compile to DLL&#8221; example. The code for that example compiles but on execution it will throw an &#8220;Not Implemented&#8221; exception in <em>Regex.CompileToAssembly</em>. </p>
<h3>The end</h3>
<p>This ends the mini series of three posts on regular expressions. I hope you have enjoyed them. The previous posts in this series are: </p>
<ul>
<li><a href="http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/">Regular Expressions : The Basics</a>. The theory behind regular expressions. </li>
<li><a href="http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/">Regular Expressions in C#: Practical Usage</a> Examples of common usage. </li>
</ul>
<p><a href="http://www.dotnetkicks.com/kick/?url=http%3a%2f%2fwww.dijksterhuis.org%2fregular-expressions-advanced"><img src="http://www.dotnetkicks.com/Services/Images/KickItImageGenerator.ashx?url=http%3a%2f%2fwww.dijksterhuis.org%2fregular-expressions-advanced%2f%3fpreview%3dtrue" border="0" alt="kick it on DotNetKicks.com" /></a></p>
<p>Image through Flickr by <a rel="nofollow" href="http://www.flickr.com/photos/djenan/">Djenan</a></p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/regular-expressions-advanced/feed/</wfw:commentRss>
		<slash:comments>12</slash:comments>
		</item>
		<item>
		<title>Regular Expressions in C# – Practical Usage</title>
		<link>http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/</link>
		<comments>http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/#comments</comments>
		<pubDate>Tue, 10 Mar 2009 07:02:10 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Regular Expressions]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=808</guid>
		<description><![CDATA[This is the second post in the C# regular expression series and it follows up on &#8220;Regular Expressions in C# &#8211; The Basics&#8221; which explained the theory behind Regular expressions in C#. In this post we look at how to make practical use of regular expressions in our C# code. This post touches on four [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/lions1.jpg" alt="Regular Expression - Practical Usage" title="Regular Expression - Practical Usage" width="570" height="253" class="alignright size-full wp-image-826" /></p>
<p><i>This is the second post in the C# regular expression series and it follows up on &#8220;<a href="http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/">Regular Expressions in C# &#8211; The Basics</a>&#8221; which explained the theory behind Regular expressions in C#. In this post we look at how to make practical use of regular expressions in our C# code.</i> </p>
<p>This post touches on four major regular expression subjects:</p>
<ul>
<li><strong>String Comparison</strong> &#8211; does a string contain a particular sub-string?</li>
<li><strong>Splitting a string into segments</strong> &#8211; we will take an IPv4 address and retrieve its dotted components</li>
<li><strong>Replacement</strong> &#8211; modifying an input string</li>
<li><strong>Stricter input validation</strong> &#8211; how to harden your expressions</li>
</ul>
<p><span id="more-808"></span></p>
<h3>String Comparison &#8211; finding valid HTML tags</h3>
<p>One of the essential functions of expressions are their ability to find if a string is contained inside another one. The <strong>RegEx.Matches</strong> method tests if a given string matches the pattern. </p>
<p>We start with a simple example: finding out where the letter &#8220;a&#8221; is mentioned in a sentence:</p>
<pre class="brush: c#">
            string Input = &quot;apples make for great party accessories&quot;;
            Regex FindA = new Regex(&quot;a&quot;);

            foreach(Match Tag in FindA.Matches(Input))
            {
                Console.WriteLine(&quot;Found &#039;a&#039; at {0}&quot;,Tag.Index);
            }
</pre>
<p>That was almost too easy. Regular expressions really shine if you don&#8217;t know exactly what you are looking for but you can describe it. In the following example we will look for all valid HTML tags in an input string.</p>
<p>What is a valid HTML tag? &lt;code&gt;, &lt;/code&gt;, &lt;b&gt;,&lt;img src=&#8221;"&gt;, &lt;/br&gt; are all valid HTML tags.</p>
<blockquote><p>Regex HTMLTag = new Regex(@&#8221;(<\/?[^>]+>)&#8221;);</p></blockquote>
<p>To break this down:</p>
<ol>
<li>All valid HTML tags start with a &#8220;&lt;&#8221;</li>
<li>They might or not have a forward slash (we need to escape the forward slash) \/?</li>
<li>There is at least one or more characters which are not &#8220;&gt;&#8221;</li>
<li>The tag ends with a &#8220;&gt;&#8221;</li>
</ol>
<p>The following code example searches for all valid HTML tags in the input string:</p>
<pre class="brush: c#">
using System;
using System.Text.RegularExpressions;

namespace RegularExpression
{
    class MainClass
    {
        public static void Main(string[] args)
        {
            Regex HTMLTag = new Regex(@&quot;(&lt;\/?[^&gt;]+&gt;)&quot;);

            string Input = &quot;&lt;b&gt;&lt;i&gt;&lt;a href=&#039;http://apple.com&#039;&gt;Ipod News&lt;/a&gt;&lt;/b&gt;&lt;/i&gt;&quot;;
            
            foreach(Match Tag in HTMLTag.Matches(Input))
            {
                Console.WriteLine(&quot;Found {0}&quot;,Tag.Value);
            }
        }
    }
}
</pre>
<p>Resulting in: </p>
<div style="margin-left: 40px;">Found &lt;b&gt;<br />
Found &lt;i&gt;<br />
Found &lt;a href=&#8217;http://apple.com&#8217;&gt;<br />
Found &lt;/a&gt;<br />
Found &lt;/b&gt;<br />
Found &lt;/i&gt;</div>
<h3>Splitting a string into parts</h3>
<p>Parentheses () not only allow you to group your expressions into parts they allow you to split a single string into multiple segments which we can inspect individually.  To demonstrate we will use a regular expression to split an IPv4 address into its components. </p>
<p>A decimal TCP/IP address looks like <b>XXX.XXX.XXX.XXX</b> with X being a decimal number. Each column has at least 1 digit, and a maximum of 3. So a single column can be described as &#8220;<b>(\d{1-3})</b>&#8220;. There are four columns, each seperated by a dot. The dot (.) has a special meaning in regex so we need to escape it. <b>(\.)</b></p>
<p>The <b>Regex.Match</b> method returns a new <b>Match</b> instance. We can now test <b>Match.Success</b> to see if the input string matched the TCP/IP address pattern. Through the <b>Match.Groups</b> property can we then extract each of the four IP address columns.The zero entry in the Groups property is alway the complete match, in this case &#8220;10.0.0.6&#8243;. The [1] entry contains the first groups contents, [2] the second etc. </p>
<pre class="brush: c#">
            string IPMatchExp = @&quot;(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})&quot;;
            Match theMatch  = Regex.Match(&quot;10.0.0.6&quot;,IPMatchExp);
            if (theMatch.Success)
            {
                Console.WriteLine(&quot;{0}.{1}.{2}.{3}&quot;,theMatch.Groups[1].Value,
                                                      theMatch.Groups[2].Value,
                                                      theMatch.Groups[3].Value,
                                                      theMatch.Groups[4].Value);
            }

</pre>
<h3>String Replacement</h3>
<p>Often is useful to manipulate a string, by replacing the matched pattern with something new. The <b>RegEx.Replace</b> method allows us to specify a pattern to look for and a replacement string. </p>
<p>The following example matches the last character and space following each word and replaces it with &#8220;b_&#8221;. </p>
<pre class="brush: c#">
            Regex Replacer = new Regex(@&quot;\w &quot;); // Single [a-zA-Z] followed by a space
            string Input  = &quot;ax bx sax dam pom&quot;;
            string Output = Replacer.Replace(Input,&quot;b_&quot;); // Replace all items found with a b and underscore
            Console.WriteLine(Output);
</pre>
<p><b>Substitution Patterns</b></p>
<p>What to do if you would like to flip parts of a string? C# offers several substitution patterns for this. Substitution patterns can only be used in a replacement string, and are used in combination with grouping. </p>
<p>They are useful if you would like to format the results of the match. A common task is to flip two words around.  In the below example we flip the name &#8220;Molly Malone&#8221; into &#8220;Malone Molly&#8221;: </p>
<pre class="brush: c#">
            Regex Replacer = new Regex(@&quot;(\w*) (\w*)&quot;);
            string Input  = &quot;Molly Mallone&quot;;
            string Output = Replacer.Replace(Input,&quot;$2 $1&quot;);
            Console.WriteLine(Output);
</pre>
<p>The regular expression is defined as two groups of words (\w*) separated by a space. Each group can be referred to with a substitution pattern. $1 refers to the first group, $2 to the second (and if we had defined more $3 would be the third etc).</p>
<h3>Input validation &#8211; we have to be more strict</h3>
<p>Often we need to check if the data inputed or read from a file matches a definition so that we know its valid. But for this to work we need to ensure that our expressions only match a valid input. Many expressions of convenience are defined too loose. If we are to use them for input validation we need to harden them. </p>
<p>The pattern we used in an earlier example neatly broke down a valid IP address. But it wasn&#8217;t very strict and there are many combinations that would have matched that aren&#8217;t valid IP addresses. <b>999.999.999.999</b> is not a valid IPv4 address but it would have matched our pattern (<b>@&#8221;(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})&#8221;</b>). So we couldn&#8217;t have used it for testing for a valid IP address.</p>
<p>So what is a valid match? We need to define this first.</p>
<p>A valid IP address range is from <b>0.0.0.0</b> to <b>255.255.255.255</b> (with each column being represented by a byte).</p>
<p>At this point there are two things we can do: we can validate the results returned by our expressions with a few additional lines of C# code or we modifying our regular expression to become stricter. As this post is about regular expressions we will modify our expression to match only valid IP addresses.</p>
<p>How do we define valid ? 0,9,10,19,100,199,200,249,255 are all valid inputs for each column. 300 isn&#8217;t valid, and neither is 299. To keep things simple, we don&#8217;t allow 09 as a valid input. </p>
<ul>
<li>Single digit: 0 &#8211; 9 :&nbsp;&nbsp; [0-9]</li>
<li>Double digit: 10 &#8211; 99: [1-9][0-9]</li>
<li>Triple digit 1:&nbsp; 100 &#8211; 199:&nbsp; 1[0-9]{2}</li>
<li>Triple digit 2: and 200 &#8211; 249:&nbsp; 2[0-4][0-9]</li>
<li>Triple digit 3: 250 &#8211; 255 25[0-5]</li>
</ul>
<p>The single ([0-9])and double digit ([1-9][0-9]) combinations can be combined into: <b>[1-9]?[0-9]</b>. (Read as: The first 1-9 is optional, occurs 0 or 1 time)</p>
<p>So a single column can be defined as:&nbsp;<b>(([1-9]?[0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\</b><b>.)</b> Note the &#8220;.&#8221; at the end.</p>
<p>On the final column we do not need a &#8220;dot&#8221;. We can save some space by repeating the first expression three times, but we need to write out the fourth in full. Thus our expressions becomes: <b>([1-9]?[0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.{3}</b><b>([1-9]?[0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])</b></p>
<p>Not exactly easy to read, but lets test to see if it works as expected. The following example program tries all column combinations from 0-999</p>
<pre class="brush: c#">
using System;
using System.Text.RegularExpressions;

namespace RegularExpression
{
    class MainClass
    {
        public static void Main(string[] args)
        {
            string IPTestExp = @&quot;(([1-9]?[0-9]|1[0-9]{2}|2[0-4][0-9]|255[0-5])\.){3}([1-9]?[0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])&quot;;

            for (int Lp = 0; Lp &lt; 999; Lp++)
            {
                string IPAddress = String.Format(&quot;{0}.{0}.{0}.{0}&quot;,Lp);
                
                if (Regex.Match(IPAddress,IPTestExp).Success)
                    Console.WriteLine(&quot;{0} is valid&quot;,IPAddress);
                else
                {
                    Console.WriteLine(&quot;{0} is invalid&quot;,IPAddress);
                    break;
                }
            }
          }
    }
}
</pre>
<p>For brevity the program ends at the first invalid combination. If we had let it run it would have shown 256-999 as invalid.</p>
<div style="margin-left: 40px;">0.0.0.0 is valid<br />
1.1.1.1 is valid<br />
2.2.2.2 is valid<br />
&#8230;<br />
254.254.254.254 is valid<br />
255.255.255.255 is valid<br />
256.256.256.256 is invalid</div>
<p>This took a bit of work but we now have a single line test to see if a string is a valid IPv4 address.</p>
<p><b>Concluding</b></p>
<p>This ends the second post in this series. In the next post I will look at some advanced regular expression topics. </p>
<p>If you would like to read more on the theory behind regular expressions have a look at the first post in the series: <a href="http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/">Regular Expressions in C# &#8211; The Basics</a></p>
<p>Image credit: <a rel="nofollow" href="http://www.flickr.com/photos/tambako/">Tambako</a></p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
		</item>
		<item>
		<title>Regular Expressions in C# – The Basics</title>
		<link>http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/</link>
		<comments>http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/#comments</comments>
		<pubDate>Mon, 09 Mar 2009 03:49:22 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Regular Expressions]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=789</guid>
		<description><![CDATA[One of the most common coding tasks is to take an input, munch it around and turn it into something different altogether. Are you looking for FedEx numbers in a text file? Do you want to replace &#8220;love&#8221; with &#8220;hate&#8221; in your source files? Is a string a valid e-mail address? Problems like these can [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/expression.jpg" alt="Regular Expressions in C#" title="Regular Expressions in C#" width="580" height="206" class="alignright size-full wp-image-800" /></p>
<p><em>One of the most common coding tasks is to take an input, munch it around and turn it into something different altogether. Are you looking for FedEx numbers in a text file? Do you want to replace &#8220;love&#8221; with &#8220;hate&#8221; in your source files? Is a string a valid e-mail address? Problems like these can be solved by applying regular expressions, or &#8220;regex&#8221; for short. </em><br />
<span id="more-789"></span></p>
<h3>Introduction</h3>
<p>This post explores the basic theory of expressions. If you are already familiar with them and want to know how to use them in your own C# programs have a look at the next post &#8220;<a href="http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/">Regular Expressions in C# &#8211; Practical Applications</a>&#8221; </p>
<p>Expressions offer a method of describing and testing for particular combinations of characters in a string. A simple regular expression can often save you from having to write many lines of regular code.</p>
<ul>
<li>Are you looking for the characters &#8220;car&#8221;  in &#8220;cartoon&#8221;, &#8220;carbonate&#8221; or  &#8220;carton&#8221; ?</li>
<li>Do you want to only match when the word &#8220;car&#8221; is standing by itself as in  &#8220;car sales for 2009&#8243; ?</li>
<li>Or only return true when the car is red or blue ? &#8220;blue car&#8221;/ &#8220;red car&#8221; / &#8220;green car&#8221;</li>
</ul>
<p>In C# expressions are provided by the <em>RegEx</em> class in the <em>System.Text.RegularExpressions</em> namespace.</p>
<p>The expressions themselves are more or less standard between computer languages. You can often take an expression from another language and with a little or no work apply them to your C# code. If you are not familiar with them yet you should consider learning to use them.</p>
<div id="attachment_797" class="wp-caption alignright" style="width: 160px"><a href="http://xkcd.com/208/"><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/3005983191_41ca486eec-150x150.jpg" alt="Regular Expressions to the rescue" title="Regular Expressions to the rescue" width="150" height="150" class="size-thumbnail wp-image-797" /></a><p class="wp-caption-text">Regular Expressions to the rescue</p></div>
<p><strong>What can you use regular expressions for?</strong></p>
<ul>
<li><strong>Data capture</strong>: split a string into multiple fields which you can manipulate. 13-Jan-2006 becomes (day,month,year)</li>
<li><strong>Data input validation</strong>: Check if the input followed the required formatting rules. For example test if a valid telephone number was entered.</li>
<li><strong>String comparison</strong>: Does A exist in B?</li>
<li><strong>String replacement</strong>: Replace &#8220;foo&#8221; with &#8220;bar&#8221;</li>
<li><strong>Code size reduction</strong>: One line of regular expression code can replace large amount of dedicated code</li>
</ul>
<p><strong>When not to use regular expressions?</strong></p>
<p>Don&#8217;t use them when <strong>speed</strong> is of the essence. Expressions have a serious drawback in that they can be slow to execute. If you are concerned about optimizing a part of your code it can be worthwhile to write your own replacement. In <a id="s-38" title="a previous post" href="../manipulating-strings-in-csharp-replacing-part-string/">a previous post</a> I noticed that a simple string replacement routine was 40 times faster than the regular expression equivalent.</p>
<h3>The basics</h3>
<p>To understand expressions we need a little bit of theory. This bit explains all the main operators and how to use them.</p>
<p><strong>Literal characters</strong></p>
<p>The most basic expression contains a single character. If we define &#8220;c&#8221; as the expression and test it against &#8220;car company&#8221; it will match against the &#8220;c&#8221; in &#8220;car&#8221;. If we ask the RegEx class to search again it will match against the &#8220;c&#8221; in &#8220;company&#8221;.</p>
<p>Several characters have a special meaning: ?, +, *, \, [, ( , ), ], {, }, . (dot) and ^</p>
<p>If we want to include them we need to escape them first using a backslash:</p>
<ul>
<li>10 * 10 = 100 <strong><em style="color: #ff0000;">wrong</em></strong></li>
<li><em>10 \* 10 = 100 <strong><span style="color: #00ff00;">ok</span></strong></em></li>
</ul>
<p><em>Normally when parsing strings C# will try to break down escaped sequences such as \n,\r etc. Expression statements usually contain many backslash operators. By adding the &#8220;@&#8221; string literal the compiler will not inspect the string too much and take it literally instead.<br />
</em></p>
<div style="margin-left: 40px;">string exampleLiteral = @&#8221;10 \* 10 = 100&#8243;;</div>
<p><strong>Character Sets</strong></p>
<p>Character sets allow us to limit the characters that can match. Say for example we want to use just the numbers 0-9: <strong>[0-9]</strong> , or the characters a-z &amp; A-Z: <strong>[a-zA-Z]</strong>. A character set only matches against a single character, so the following doesn&#8217;t work: &#8220;c[a-z]kie&#8221; matches against &#8220;cokie&#8221; but not &#8220;cookie&#8221;.</p>
<p>You can also define your own sets. If you are matching a date, a date separator can be a defined as a space, dash or slash: <strong>[ -/]</strong></p>
<p>Many character sets are used so often that they have been given their own shorthands:</p>
<ul>
<li>\w matches any word character [a-z,A-Z]</li>
<li>\s matches any whitespace (space, tab)</li>
<li>\d matches against any digit [0-9]</li>
</ul>
<p>For a longer list of the available short hands have a look at my <a id="tv6h" title="C# Regular Expression Cheat Sheet" href="../csharp-regular-expression-operator-cheat-sheet/">C# Regular Expression Cheat Sheet</a> .<strong></strong></p>
<p><strong>The Dot is special</strong></p>
<p><strong></strong>The dot &#8220;.&#8221; matches against any character, except for line breaks. You should use it sparingly as it can introduce unwanted results. Often it is better to be more specific, using \w or \d, or a character set that limits the set of possible characters.</p>
<ul>
<li><strong>&#8220;g..gle&#8221;</strong> matches &#8220;google&#8221;, &#8220;gaagle&#8221;,&#8221;g%$gle&#8221; and much more.</li>
<li><strong>&#8220;\d\d.\d\d.\d\d&#8221;</strong> matches a valid date such as &#8220;12-08-99&#8243;  and &#8220;12/08/99&#8243; but also to an invalid date: &#8220;12508799&#8243;</li>
</ul>
<p><strong>Creating alternatives using the boolean &#8220;or&#8221;</strong></p>
<p>A vertical bar separates (|) alternatives, so &#8220;red|blue car&#8221; would match either a red or blue car. Written in C# code: </p>
<p>if (Regex.Match(&#8220;blue car&#8221;,&#8221;blue|red car&#8221;).Success)<br />
Console.WriteLine(&#8220;Matches!&#8221;);</p>
<p>You can add as many alternatives as you would like, so &#8220;red|blue|purple|yellow car&#8221; are all possible.</p>
<p><strong>Grouping with parentheses ()</strong></p>
<p>Parentheses () make it easier to group things  together. So if you would like to match for either &#8220;color&#8221; or &#8220;colour&#8221; you could write the word &#8220;color&#8221; (or &#8220;colour&#8221;) as one of:</p>
<ul>
<li>col(o|ou)r</li>
<li>(color|colour)</li>
</ul>
<p><strong>Repetition</strong></p>
<p>A repetition quantifier specifies how often a preceding element is allowed to repeat.</p>
<table border="0" cellspacing="1" cellpadding="1">
<tbody>
<tr style="vertical-align: top;">
<td style="width: 15px;"><code><strong>?</strong></code></td>
<td>A question mark indicates <em>zero or one</em> of the preceding element. For example &#8220;S?DRAM&#8221; matches &#8220;SDRAM&#8221; and &#8220;DRAM&#8221;</td>
</tr>
<tr style="vertical-align: top;">
<td><code><strong>*</strong></code></td>
<td>The asterisk indicates there are <em>zero or more</em> of the preceding element. For example, <code>ab*c</code> matches &#8220;<em>ac</em>&#8220;, &#8220;<em>abc</em>&#8220;, &#8220;<em>abbc</em>&#8220;, &#8220;<em>abbbc</em>&#8220;, and so on.</td>
</tr>
<tr style="vertical-align: top;">
<td><code><strong>+</strong></code></td>
<td>The plus sign indicates that there is <em>one or more</em> of the preceding element. For example, <code>ab+c</code> matches &#8220;<em>abc</em>&#8220;, &#8220;<em>abbc</em>&#8220;, &#8220;<em>abbbc</em>&#8220;, and so on, but not &#8220;<em>ac</em>&#8220;.</td>
</tr>
<tr style="vertical-align: top;">
<td>{n}{n,}{n,m}</td>
<td>If you would like to match an exact number of times use <em>{n}</em>, for at least n matches use <em>{n,}</em>. For at least <em>n</em> matches, and more than <em>m</em> use <em>{n,m}</em></td>
</tr>
</tbody>
</table>
<p>To give some examples: </p>
<ul>
<li>\d{1,3} reads as &#8220;a decimal digit (0-9)&#8221;, minimum of 1, maximum of 3</li>
<li>[az]+ reads as &#8220;one or more of a-z&#8221;, &#8220;abc&#8221; matches, and so does &#8220;axxxz&#8221;</li>
</ul>
<p>In the following example &#8220;aab&#8221; matches, but so does &#8220;aaab&#8221;.</p>
<div style="margin-left: 40px;">// {a2,3}b reads as: 2 or 3 times a, followed by a b<br />
if (Regex.Match(&#8220;aab&#8221;,&#8221;a{2,3}b&#8221;).Success)<br />
Console.WriteLine(&#8220;Matches!&#8221;);<br />
else<br />
Console.WriteLine(&#8220;No Match!&#8221;);</div>
<p>Repetition is useful for testing if an input matches a required pattern. If you need to test for a telephone number formatted as : <strong>XXX-XXXX</strong> you could write this as <strong>\d{3}[-]\d{4}</strong>.</p>
<p><strong>Lazy and Greedy matching</strong></p>
<p>All the above repetition operators are &#8220;greedy&#8221;, they match to the longest possible string they can find.</p>
<ul>
<li><strong>a[bz]+z </strong>against &#8220;<strong>abcbzcdze</strong>&#8221; returns &#8220;<strong>abcbzcdz</strong>&#8220;</li>
<li><strong>&lt;a</strong><tt class="regex"><strong>.+&gt;</strong> against "<strong>&lt;a href='index.php'&gt;Beginning&lt;/a&gt;</strong>" matches everything, instead of just the opening &lt;a href""&gt;.</tt></li>
</ul>
<p>To avoid this we can apply &#8220;lazy&#8221; matching instead. In a lazy match, as soon as it finds a match the parser stops and returns the result. You can make a match lazy by simply adding a question mark:</p>
<ul>
<li><strong>a[bz]+?z </strong>against &#8220;<strong>abcbzcdze</strong>&#8221; returns &#8220;<strong>abcbz</strong>&#8220;</li>
<li><strong>&lt;a</strong><tt class="regex"><strong>.+?&gt;</strong> against "<strong>&lt;a href='index.php'&gt;Beginning&lt;/a&gt;</strong>" returns </tt><tt class="regex"><strong>&lt;a href='index.php'&gt;</strong></tt><tt class="regex">.</tt></li>
</ul>
<p><strong>Anchoring</strong></p>
<p>All the above examples didn&#8217;t care where in the string the match was made. You could also use them repeatedly to find more instances of the match in the input string. Anchoring allows you to match only those strings that are close to the beginning and/or end.</p>
<ul>
<li><strong>^string </strong>reads as: only match if &#8220;string&#8221; is at the beginning of the input. The &#8220;^&#8221; indicates the beginning. So &#8220;string of wool&#8221; matches, but &#8220;woolly string&#8221; doesn&#8217;t.</li>
<li><strong>string$ </strong>reads as: only match if &#8220;string&#8221; is at the end of the input. Here the &#8220;$&#8221; indicates the end. In this case &#8220;string of wool&#8221; can&#8217;t match, but &#8220;woolly string&#8221; can.</li>
<li><strong>$string^</strong> reads as: only match if &#8220;string&#8221; is the whole pattern. The &#8220;s&#8221; comes as the first character, and the &#8220;g&#8221; as the last. So only &#8220;string&#8221; can match this pattern.</li>
</ul>
<p>This ends the theoretical introduction to Regular Expressions &#8212; see also the next post &#8220;<a href="http://www.dijksterhuis.org/regular-expressions-csharp-practical-use/">Regular Expressions in C# &#8211; Practical Applications</a>&#8221; .</p>
<p>Image credit: <a rel="nofollow" href="http://www.flickr.com/photos/sarae/2082776106/">Sarae</a></p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/regular-expressions-in-csharp-the-basics/feed/</wfw:commentRss>
		<slash:comments>7</slash:comments>
		</item>
		<item>
		<title>C# Regular Expression Cheat Sheet</title>
		<link>http://www.dijksterhuis.org/csharp-regular-expression-operator-cheat-sheet/</link>
		<comments>http://www.dijksterhuis.org/csharp-regular-expression-operator-cheat-sheet/#comments</comments>
		<pubDate>Fri, 06 Mar 2009 05:38:31 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Learn C#]]></category>
		<category><![CDATA[Regular Expressions]]></category>
		<category><![CDATA[regex]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=769</guid>
		<description><![CDATA[I have been doing quite a bit with regular expressions recently and to avoid having to look them up again and again I made myself a little table with the most important C# regular expression operators and stuck it on the wall. This post contains the C# regular expression operators as used by the .NET [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p>I have been doing quite a bit with regular expressions recently and to avoid having to look them up again and again I made myself a little table with the most important C# regular expression operators and stuck it on the wall. This post contains the C# regular expression operators as used by the .NET regular expression classes such as <em>RegEx</em>.</p>
<p>If you would like to print this, click here for a <a href='http://www.dijksterhuis.org/wp-content/uploads/2009/03/regular_expressions_in_.html'>pure HTML version</a>. </p>
<p><span id="more-769"></span></p>
<h3>Escape Characters</h3>
<table width="100%" border="1" cellpadding="0" cellspacing="0">
<tr>
<td width="20%"><b>Character</b></td>
<td><b>Description</b></td>
</tr>
<tr>
<td>ordinary characters</td>
<td>Characters other than . $ ^ { [ ( | ) * + ? \ match<br />
themselves.</td>
</tr>
<tr>
<td>. (dot)</td>
<td>Matches any character</td>
</tr>
<tr>
<td>\w</td>
<td>Matches any word character. </td>
</tr>
<tr>
<td>\W</td>
<td>The negation of \w</td>
</tr>
<tr>
<td>\s</td>
<td>Matches any white-space character.</td>
</tr>
<tr>
<td>\S</td>
<td>Matches any non-white-space character. </td>
</tr>
<tr>
<td>\d</td>
<td>Matches any decimal digit. </td>
</tr>
<tr>
<td>\D</td>
<td>Matches any non-decimal digit.</td>
</tr>
<tr>
<td><b>\a</b></td>
<td>Matches a bell (alarm) \u0007.</td>
</tr>
<tr>
<td><b>\b</b></td>
<td>Matches a backspace \u0008 if in a [] character class</td>
</tr>
<tr>
<td><b>\t</b></td>
<td>Matches a tab</td>
</tr>
<tr>
<td><b>\r</b></td>
<td>Carriage return</td>
</tr>
<tr>
<td><b>\v</b></td>
<td>Vertical tab</td>
</tr>
<tr>
<td><b>\f</b></td>
<td>Form feed</td>
</tr>
<tr>
<td><b>\n</b></td>
<td>New line</td>
</tr>
<tr>
<td><b>\e</b></td>
<td>Matches an escape</td>
</tr>
<tr>
<td><b>\040</b></td>
<td>Matches an ASCII character as octal (up to three digits);</td>
</tr>
<tr>
<td><b>\x20</b></td>
<td>Matches an ASCII character using hexadecimal representation<br />
(exactly two digits).</td>
</tr>
<tr>
<td><b>\cC</b></td>
<td>Matches an ASCII control character; for example, \cC is<br />
control-C.</td>
</tr>
<tr>
<td><b>\u0020</b></td>
<td>Matches a Unicode character using hexadecimal representation<br />
(exactly four digits).</td>
</tr>
<tr>
<td><b>\</b></td>
<td>When followed by a character that is not recognized as an<br />
escaped character, matches that character. For example, <b>\*</b><br />
is the same as <b>\x2A</b>.</td>
</tr>
</table>
<p></p>
<h3>Alternation</h3>
<table width="100%" cellpadding="0" cellspacing="0" border="1">
<tbody>
<tr>
<th width="20%"><b>Alternation</b></th>
<th><b>Definition</b></th>
</tr>
<tr>
<td><b>|</b></td>
<td>Matches any one of the terms separated by the | (vertical bar)<br />
character; for example, <span>cat|dog|tiger</span>. The leftmost<br />
successful match wins.</td>
</tr>
<tr>
<td><b>(?(</b><i>expression</i><b>)yes|no)</b></td>
<td>Matches the &#8220;yes&#8221; part if the expression matches at this point;<br />
otherwise, matches the &#8220;no&#8221; part.&nbsp;</td>
</tr>
<tr>
<td><b>(?(</b><i>name</i><b>)yes|no)</b></td>
<td>Matches the &#8220;yes&#8221; part if the named capture string has a match;<br />
otherwise, matches the &#8220;no&#8221; part.</td>
</tr>
</tbody>
</table>
<p></p>
<h3>Substitutions</h3>
<table width="100%" border="1" cellpadding="0" cellspacing="0">
<tr>
<td width="20%"><b>Character</b></td>
<td><b>Description</b></td>
</tr>
<tr>
<td><b>$</b><i>number</i></td>
<td>Substitutes the last substring matched by group number<br />
<i>number</i> (decimal).</td>
</tr>
<tr>
<td><b>${</b><i>name</i><b>}</b></td>
<td>Substitutes the last substring matched by a<br />
(?&lt;<i>name</i>&gt; ) group.</td>
</tr>
<tr>
<td><b>$$</b></td>
<td>Substitutes a single &#8220;$&#8221; literal.</td>
</tr>
<tr>
<td><b>$&amp;</b></td>
<td>Substitutes a copy of the entire match itself.</td>
</tr>
<tr>
<td><b>$`</b></td>
<td>Substitutes all the text of the input string before the<br />
match.</td>
</tr>
<tr>
<td><b>$&#8217;</b></td>
<td>Substitutes all the text of the input string after the<br />
match.</td>
</tr>
<tr>
<td><b>$+</b></td>
<td>Substitutes the last group captured.</td>
</tr>
<tr>
<td><b>$_</b></td>
<td>Substitutes the entire input string.</td>
</tr>
</table>
<p></p>
<h3>Word boundaries</h3>
<table width="100%" border="1" cellpadding="0" cellspacing="0">
<tbody>
<tr>
<td width="20%"><b>Assertion</b></td>
<td><b>Description</b></td>
</tr>
<tr>
<td><b>^</b></td>
<td>Specifies that the match must occur at the beginning of the<br />
string or the beginning of the line.</td>
</tr>
<tr>
<td><b>$</b></td>
<td>Specifies that the match must occur at the end of the string,<br />
before <b>\n</b> at the end of the string, or at the end of the<br />
line.</td>
</tr>
<tr>
<td><b>\A</b></td>
<td>Specifies that the match must occur at the beginning of the<br />
string (ignores the <b>Multiline</b> option).</td>
</tr>
<tr>
<td><b>\Z</b></td>
<td>Specifies that the match must occur at the end of the string or<br />
before <b>\n</b> at the end of the string (ignores the<br />
<b>Multiline</b> option).</td>
</tr>
<tr>
<td><b>\z</b></td>
<td>Specifies that the match must occur at the end of the string<br />
(ignores the <b>Multiline</b> option).</td>
</tr>
<tr>
<td><b>\G</b></td>
<td>Specifies that the match must occur at the point where the<br />
previous match ended. When used with Match.NextMatch(), this<br />
ensures that matches are all contiguous.</td>
</tr>
<tr>
<td><b>\b</b></td>
<td>Specifies that the match must occur on a boundary between<br />
<b>\w</b> (alphanumeric) and <b>\W</b> (nonalphanumeric)<br />
characters. The match must occur on word boundaries (that is, at<br />
the first or last characters in words separated by any<br />
nonalphanumeric characters). The match can also occur on a word<br />
boundary at the end of the string.</td>
</tr>
<tr>
<td><b>\B</b></td>
<td>Specifies that the match must not occur on a <b>\b</b><br />
boundary.</td>
</tr>
</tbody>
</table>
<p></p>
<h3>Quantifiers</h3>
<table width="100%" border="1" cellpadding="0" cellspacing="0">
<tbody>
<tr>
<td width="20%">*</td>
<td>Matches the preceding element zero or more times. It is<br />
equivalent to <b>{0,}</b>. <span>*</span> is a greedy quantifier<br />
whose non-greedy equivalent is <span class="input">*?</span>.</td>
</tr>
<tr>
<td>+</td>
<td>Matches the preceding element one or more times. It is<br />
equivalent to <span>{1,}</span>. <span class="input">+</span> is a<br />
greedy quantifier whose non-greedy equivalent is<br />
<span>+?</span>.</td>
</tr>
<tr>
<td>?</td>
<td>Matches the preceding element zero or one time. It is<br />
equivalent to <span>{0,1}</span>. <span class="input">?</span> is a<br />
greedy quantifier whose non-greedy equivalent is<br />
<span>??</span>.</td>
</tr>
<tr>
<td>{n}</td>
<td>Matches the preceding element exactly <i>n</i> times.<br />
<span>{n}</span> is a greedy quantifier whose non-greedy equivalent<br />
is <span>{n}?</span>.</td>
</tr>
<tr>
<td>{n,}</td>
<td>Matches the preceding element at least <i>n</i> times.<br />
<span>{n,}</span> is a greedy quantifier whose non-greedy<br />
equivalent is <span>{n}?</span>.</td>
</tr>
<tr>
<td>{<i>n</i>,<i>m</i>}</td>
<td>Matches the preceding element at least <i>n</i>, but no more<br />
than <i>m</i>, times. <span>{n,m}</span> is a greedy quantifier<br />
whose non-greedy equivalent is <span class=<br />
"input">{n,m}?</span>.</td>
</tr>
<tr>
<td>*?</td>
<td>Matches the preceding element zero or more times, but as few<br />
times as possible. It is a lazy quantifier that is the counterpart<br />
to the greedy quantifier <span>*</span>.</td>
</tr>
<tr>
<td>+?</td>
<td>Matches the preceding element one or more times, but as few<br />
times as possible. It is a lazy quantifier that is the counterpart<br />
to the greedy quantifier <span>+</span>.</td>
</tr>
<tr>
<td>??</td>
<td>Matches the preceding element zero or one time, but as few<br />
times as possible. It is a lazy quantifier that is the counterpart<br />
to the greedy quantifier <span>?</span>.</td>
</tr>
<tr>
<td>{<i>n</i>}?</td>
<td>Matches the preceding element exactly <span class=<br />
"parameter">n</span> times. It is a lazy quantifier that is the<br />
counter to the greedy quantifier <span class=<br />
"input">{n}+</span>.</td>
</tr>
</tbody>
</table>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/csharp-regular-expression-operator-cheat-sheet/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>Safely cleaning HTML with strip_tags in C#</title>
		<link>http://www.dijksterhuis.org/safely-cleaning-html-with-strip_tags-in-csharp/</link>
		<comments>http://www.dijksterhuis.org/safely-cleaning-html-with-strip_tags-in-csharp/#comments</comments>
		<pubDate>Wed, 04 Mar 2009 05:37:50 +0000</pubDate>
		<dc:creator>Martijn</dc:creator>
				<category><![CDATA[Beginner]]></category>
		<category><![CDATA[Learn C#]]></category>
		<category><![CDATA[c#]]></category>
		<category><![CDATA[html]]></category>
		<category><![CDATA[striptags]]></category>
		<category><![CDATA[strip_tags]]></category>

		<guid isPermaLink="false">http://www.dijksterhuis.org/?p=758</guid>
		<description><![CDATA[One of my favorites in the PHP libraries is the strip_tags function. Not only does it neatly remove HTML from an input it also allows you to specify which tags should stay. This is great if you are allowing your visitors to apply some basic HTML tags to their comments. This post explores two issues: [...]<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></description>
				<content:encoded><![CDATA[<p><img src="http://www.dijksterhuis.org/wp-content/uploads/2009/03/sub.jpg" alt="Removing unwanted tags with StripTags/strip_tags" title="Removing unwanted tags with StripTags/strip_tags" width="500" height="205" class="aligncenter size-full wp-image-763" /></p>
<p><em>One of my favorites in the PHP libraries is the strip_tags function. Not only does it neatly remove HTML from an input it also allows you to specify which tags should stay. This is great if you are allowing your visitors to apply some basic HTML tags to their comments. This post explores two issues: using C# to remove unwanted tags, and cleaning up unwanted attributes that might be hidden in the allowed tags.</em></p>
<p><span id="more-758"></span></p>
<p>I wanted to clean some comments posted to a website from unwanted HTML tags. The users are allowed to &lt;B&gt; or &lt;I&gt; and even &lt;A href=&#8221;"&gt;&lt;/a&gt; their posts but anything else must be stripped before it is posted to the site. I found several regular expressions for C# that allow you to strip HTML but these magically wipe all the HTML and leave nothing.</p>
<p>Below is the end result of of some hacking, and of course much love-hate with the regular expression library. </p>
<p><strong><span>string StripTags(string Input, string[] AllowedTags)</span></strong></p>
<p>The StripTags method takes an input string, and an array of allowed tags. It returns the input as a string, minus all not wanted tags.</p>
<pre class="brush: c#">
string test1 = StripTags(&quot;&lt;p&gt;George&lt;/p&gt;&lt;b&gt;W&lt;/b&gt;&lt;i&gt;Bush&lt;/i&gt;&quot;, new string[]{&quot;i&quot;,&quot;b&quot;});
string test2 = StripTags(&quot;&lt;p&gt;George &lt;img src=&#039;someimage.png&#039; onmouseover=&#039;someFunction()&#039;&gt;W &lt;i&gt;Bush&lt;/i&gt;&lt;/p&gt;&quot;, new string[]{&quot;p&quot;});
string test3 = StripTags(&quot;&lt;a href=&#039;http://www.dijksterhuis.org&#039;&gt;Martijn &lt;b&gt;Dijksterhuis&lt;/b&gt;&lt;/a&gt;&quot;, new string[]{&quot;a&quot;});
</pre>
<p></span>Using the above example code returns the following:<br />
<span><br />
</span></p>
<div style="margin-left: 40px;"><span>George&lt;b&gt;W&lt;/b&gt;&lt;i&gt;Bush&lt;/i&gt;</span><br />
<span>&lt;p&gt;George W Bush&lt;/p&gt;</span><br />
<span>&lt;a href=&#8217;http://www.dijksterhuis.org&#8217;&gt;Martijn Dijksterhuis&lt;/a&gt;</span></div>
<p><em><br />
</em><strong><span>string StripTagsAndAttributes(string Input, string[] AllowedTags)</span></strong></p>
<p>The above StripTags function is similar to the original PHP strip_tags function in having the same weakness: It is still possible for a malicious user to insert attributes into each of the tags. Think &#8220;style=&#8221; and &#8220;id=&#8221;. We would be somewhat saver if we cleaned these as well. The <em><span>StripTagsAndAttributes </span></em><span>method</span> does just that.</p>
<p>It first runs the input through <em>StripTags</em>, and for the remaining tags is strips out all but a restricted set of attributes.</p>
<pre class="brush: c#">
string test4 = &quot;&lt;a class=\&quot;classof69\&quot; onClick=&#039;crosssite.boom()&#039; href=&#039;http://www.dijksterhuis.org&#039;&gt;Martijn Dijksterhuis&lt;/a&gt;&quot;;
Console.WriteLine(StripTagsAndAttributes(test4, new string[]{&quot;a&quot;}));
</pre>
<p>That &#8220;OnClick&#8221; attribute looks mighty unsafe. Running the above string through </span><em><span>StripTagsAndAttributes </span></em><span>as in the example above returns: </span></p>
<div style="margin-left: 40px;"><span>&lt;a class=&#8221;classof69&#8243; href=&#8217;http://www.dijksterhuis.org&#8217;&gt;Martijn Dijksterhuis&lt;/a&gt;</span></div>
<p>This function probably needs some tuning if you want to allow, or restrict things even further.</p>
<p><strong>A word of caution</strong></p>
<p>Regular expressions are voodoo, very cool, but still voodoo. The above functions work for the tests I have applied to them, but your mileage may vary! If you have a special situation that doesn&#8217;t work leave a note below and maybe we can work out the problems.</p>
<p><strong>Credits</strong></p>
<p>The strip_tags function is of course inspired by the <a id="ixfp" title="PHP version" href="http://tw.php.net/manual/en/function.strip-tags.php">PHP version</a> , and a Javascript implementation thereof by <a id="evns" title="Kevin van Sonderveld" href="http://kevin.vanzonneveld.net/techblog/article/javascript_equivalent_for_phps_strip_tags/">Kevin van Sonderveld. </a>The attribute stripping routine is based on the regular expressions by <a id="q100" title="mdw252" href="http://tw.php.net/manual/en/function.strip-tags.php#88491">mdw252</a> in one of the strip_tags manual page comments.</p>
<p><strong>Source code</strong></p>
<p>The complete source code for the <em>StripTags</em> function and <span><em>StripTagsAndAttributes</em> function with my test code can be found below:</p>
<p></span></p>
<pre class="brush: c#">

using System;
using System.Text.RegularExpressions;

namespace StripHTML
{
	class MainClass
	{
		
        private static string ReplaceFirst(string haystack, string needle, string replacement)
        {
       		int pos = haystack.IndexOf(needle);
            if (pos &lt; 0) return haystack;
            return haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length);
        }

		private static string ReplaceAll(string haystack, string needle, string replacement)
        {
             int pos;
			 // Avoid a possible infinite loop
             if (needle == replacement) return haystack;
              while((pos = haystack.IndexOf(needle))&gt;0)
                       haystack = haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length);
                        return haystack;
        }		

		public static string StripTags(string Input, string[] AllowedTags)
		{
			Regex StripHTMLExp = new Regex(@&quot;(&lt;\/?[^&gt;]+&gt;)&quot;);
		    string Output = Input;

			foreach(Match Tag in StripHTMLExp.Matches(Input))
			{
				string HTMLTag = Tag.Value.ToLower();
				bool IsAllowed = false;
				
				foreach(string AllowedTag in AllowedTags)
				{
					int offset = -1;

					// Determine if it is an allowed tag 
					// &quot;&lt;tag&gt;&quot; , &quot;&lt;tag &quot; and &quot;&lt;/tag&quot; 
					if (offset!=0) offset = HTMLTag.IndexOf(&#039;&lt;&#039;+AllowedTag+&#039;&gt;&#039;);
					if (offset!=0) offset = HTMLTag.IndexOf(&#039;&lt;&#039;+AllowedTag+&#039; &#039;);
					if (offset!=0) offset = HTMLTag.IndexOf(&quot;&lt;/&quot;+AllowedTag);

					// If it matched any of the above the tag is allowed
					if (offset==0)
					{
					 	IsAllowed = true;
						break;
					}
				}

				// Remove tags that are not allowed
				if (!IsAllowed) Output = ReplaceFirst(Output,Tag.Value,&quot;&quot;);
			}

			return Output;
		}

		public static string StripTagsAndAttributes(string Input, string[] AllowedTags)
		{
			/* Remove all unwanted tags first */
			string Output = StripTags(Input,AllowedTags);

			/* Lambda functions */
			MatchEvaluator HrefMatch = m =&gt; m.Groups[1].Value + &quot;href..;,;..&quot; + m.Groups[2].Value;
			MatchEvaluator ClassMatch = m =&gt; m.Groups[1].Value + &quot;class..;,;..&quot; + m.Groups[2].Value;
			MatchEvaluator UnsafeMatch = m =&gt; m.Groups[1].Value + m.Groups[4].Value;
			
			/* Allow the &quot;href&quot; attribute */
			Output = new Regex(&quot;(&lt;a.*)href=(.*&gt;)&quot;).Replace(Output,HrefMatch);

			/* Allow the &quot;class&quot; attribute */
			Output = new Regex(&quot;(&lt;a.*)class=(.*&gt;)&quot;).Replace(Output,ClassMatch);

			/* Remove unsafe attributes in any of the remaining tags */
			Output = new Regex(@&quot;(&lt;.*) .*=(\&#039;|\&quot;&quot;|\w)[\w|.|(|)]*(\&#039;|\&quot;&quot;|\w)(.*&gt;)&quot;).Replace(Output,UnsafeMatch);

			/* Return the allowed tags to their proper form */
			Output = ReplaceAll(Output,&quot;..;,;..&quot;, &quot;=&quot;);
			
			return Output;
		}
			

		public static void Main(string[] args)
		{
			string test1 = StripTags(&quot;&lt;p&gt;George&lt;/p&gt;&lt;b&gt;W&lt;/b&gt;&lt;i&gt;Bush&lt;/i&gt;&quot;, new string[]{&quot;i&quot;,&quot;b&quot;});
			string test2 = StripTags(&quot;&lt;p&gt;George &lt;img src=&#039;someimage.png&#039; onmouseover=&#039;someFunction()&#039;&gt;W &lt;i&gt;Bush&lt;/i&gt;&lt;/p&gt;&quot;, new string[]{&quot;p&quot;});
			string test3 = StripTags(&quot;&lt;a href=&#039;http://www.dijksterhuis.org&#039;&gt;Martijn &lt;b&gt;Dijksterhuis&lt;/b&gt;&lt;/a&gt;&quot;, new string[]{&quot;a&quot;});
			
			Console.WriteLine(test1);
			Console.WriteLine(test2);
			Console.WriteLine(test3);

			string test4 = &quot;&lt;a class=\&quot;classof69\&quot; onClick=&#039;crosssite.boom()&#039; href=&#039;http://www.dijksterhuis.org&#039;&gt;Martijn Dijksterhuis&lt;/a&gt;&quot;; 
			Console.WriteLine(StripTagsAndAttributes(test4, new string[]{&quot;a&quot;}));
		}
	}

</pre>
<p>Image credit: <a rel="nofollow" href="http://www.flickr.com/photos/jesper/">Jesper Rønn-Jensen&#8217;s</a></p>
<p>This is a post from <a href="http://www.dijksterhuis.org">Martijn's C# Coding Blog</a>. </p>
]]></content:encoded>
			<wfw:commentRss>http://www.dijksterhuis.org/safely-cleaning-html-with-strip_tags-in-csharp/feed/</wfw:commentRss>
		<slash:comments>9</slash:comments>
		</item>
	</channel>
</rss>
