<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:blogger='http://schemas.google.com/blogger/2008' xmlns:georss='http://www.georss.org/georss' xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-7715485</id><updated>2026-01-28T15:11:45.172-05:00</updated><category term="javascript"/><category term="performance"/><category term="hack"/><category term="security"/><category term="web"/><category term="linux"/><category term="unix"/><category term="conference"/><category term="bandwidth"/><category term="css"/><category term="mysql"/><category term="network"/><category term="programming"/><category term="sysadmin"/><category term="thoughts"/><category term="database"/><category term="date"/><category term="http"/><category term="latency"/><category term="4.01-strict"/><category term="html"/><category term="php"/><category term="statistics"/><category term="yui"/><category term="comic strip"/><category term="dhtml"/><category term="rum"/><category term="ssl"/><category term="ayttm"/><category term="boomerang"/><category term="filesystem"/><category term="firefox"/><category term="foss.in"/><category term="macosx"/><category term="passwords"/><category term="timezone"/><category term="twitter"/><category term="ubuntu"/><category term="webdev"/><category term="xss"/><category term="YQL"/><category term="geo"/><category term="ie"/><category term="iso8601"/><category term="json"/><category term="mail"/><category term="measurement"/><category term="programming style"/><category term="regex"/><category term="regular expressions"/><category term="sendmail"/><category term="shell"/><category term="smtp"/><category term="webperf"/><category term="yahoo"/><category term="404"/><category term="ISP"/><category term="SQL"/><category term="android"/><category term="apache"/><category term="authentication"/><category term="bof"/><category term="bug"/><category term="c"/><category term="curl"/><category term="cwv"/><category term="dragdrop"/><category term="dynamic script node"/><category term="email"/><category term="flickr"/><category term="freebsd"/><category term="gmail"/><category term="google"/><category term="hacker"/><category term="i18n"/><category term="im"/><category term="internet"/><category term="ios"/><category term="ip"/><category term="ipv6"/><category term="lcp"/><category term="localisation"/><category term="mathematics"/><category term="microformats"/><category term="opensource"/><category term="perl"/><category term="phone"/><category term="progressive enhancement"/><category term="sampling"/><category term="scalability"/><category term="shell script"/><category term="talks"/><category term="tcp"/><category term="thisisfixed"/><category term="unicode"/><category term="url"/><category term="ux"/><category term="velocity"/><category term="webtiming"/><category term="wifi"/><category term="yslow"/><category term="2fa"/><category term="API"/><category term="DST"/><category term="LC_TIME"/><category term="MAC"/><category term="X"/><category term="accessibility"/><category term="acer"/><category term="airport wifi"/><category term="algorithm"/><category term="ansible"/><category term="att"/><category term="audio"/><category term="australia"/><category term="badges"/><category term="bbc"/><category term="bcp"/><category term="berlin"/><category term="bigsur"/><category term="blog"/><category term="blogger"/><category term="blogger template"/><category term="book"/><category term="broken"/><category term="byte order"/><category term="caching"/><category term="chrome"/><category term="closure"/><category term="cls"/><category term="cmc"/><category term="cms"/><category term="codepo8"/><category term="colours"/><category term="comments"/><category term="communication"/><category term="compile"/><category term="confoo"/><category term="congestion-control"/><category term="cookies"/><category term="correlation"/><category term="couchdb"/><category term="cracker"/><category term="crash"/><category term="creative"/><category term="credit card"/><category term="crockford"/><category term="cron"/><category term="crux"/><category term="csrf"/><category term="data tags"/><category term="db"/><category term="debugging"/><category term="delicious"/><category term="design"/><category term="developer"/><category term="dns"/><category term="docker"/><category term="dom"/><category term="dopplr"/><category term="dos"/><category term="education"/><category term="emotion"/><category term="endianness"/><category term="entities"/><category term="ephemeral ports"/><category term="epicondylitis"/><category term="error checking"/><category term="esmtp"/><category term="everybuddy"/><category term="extensions"/><category term="facebook"/><category term="favicon"/><category term="fc9"/><category term="fedora"/><category term="fidelity"/><category term="firesheep"/><category term="flot"/><category term="form"/><category term="forms"/><category term="fosdem"/><category term="foss"/><category term="freedom"/><category term="freestyle"/><category term="ftp"/><category term="function currying"/><category term="gdb"/><category term="geek"/><category term="george"/><category term="gnome"/><category term="gradient"/><category term="groupon"/><category term="hardy"/><category term="hash"/><category term="howtos"/><category term="htc"/><category term="html5"/><category term="icici"/><category term="iit"/><category term="innerHTML"/><category term="inp"/><category term="instant messaging"/><category term="interfaces"/><category term="internet explorer"/><category term="ip address"/><category term="ipc"/><category term="iphone"/><category term="jabber"/><category term="jinja2"/><category term="jslint"/><category term="julia"/><category term="keynote"/><category term="latex"/><category term="load"/><category term="login"/><category term="lsm"/><category term="luhn"/><category term="mathjax"/><category term="media queries"/><category term="meetup"/><category term="memory"/><category term="messaging"/><category term="missing kids"/><category term="mobile"/><category term="montreal"/><category term="movable type"/><category term="mp3"/><category term="mvc"/><category term="name generator"/><category term="navtiming"/><category term="nexus"/><category term="nodejs"/><category term="notes"/><category term="opera"/><category term="ops"/><category term="partition"/><category term="pdf"/><category term="perception"/><category term="planet"/><category term="png"/><category term="ports"/><category term="prerender"/><category term="printing"/><category term="privacy"/><category term="psychology"/><category term="puzzle"/><category term="ram"/><category term="recovery"/><category term="redhat"/><category term="resource timing"/><category term="review"/><category term="rfc2822"/><category term="rfc3339"/><category term="rhel"/><category term="roundtrip"/><category term="rss"/><category term="rwd"/><category term="safari"/><category term="scripting"/><category term="search"/><category term="secnet"/><category term="sed"/><category term="segfault"/><category term="self extracting tarball"/><category term="seo"/><category term="server"/><category term="sigdashes"/><category term="site"/><category term="slideshare"/><category term="soap"/><category term="sockets"/><category term="spoofing"/><category term="starttls"/><category term="startup"/><category term="stoyan"/><category term="strftime"/><category term="stubbornella"/><category term="sydney"/><category term="tablespace"/><category term="tcp-slow-start"/><category term="testing"/><category term="text mode"/><category term="theme"/><category term="thisisbroken"/><category term="throughput"/><category term="tim berners-lee"/><category term="tips"/><category term="toc"/><category term="toy"/><category term="transactions"/><category term="two factor auth"/><category term="typing"/><category term="ui"/><category term="usability"/><category term="vint cerf"/><category term="w3c"/><category term="wav"/><category term="web services"/><category term="webcam"/><category term="webdu"/><category term="webkit"/><category term="whois"/><category term="widgets"/><category term="windows"/><category term="workaround"/><category term="write performance"/><category term="ydn"/><title type='text'>The other side of the moon</title><subtitle type='html'>/bb|[^b]{2}/&lt;br&gt;&#xa;Never stop Grokking</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default?redirect=false'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default?start-index=26&amp;max-results=25&amp;redirect=false'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>151</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-7715485.post-1229160219958317440</id><published>2025-05-14T16:27:00.004-04:00</published><updated>2025-05-14T16:27:52.888-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="ansible"/><category scheme="http://www.blogger.com/atom/ns#" term="jinja2"/><category scheme="http://www.blogger.com/atom/ns#" term="ops"/><title type='text'>Ansible: Extracting multiple attributes from a list of dicts</title><content type='html'>&lt;p&gt;
I&#39;ve been writing a bunch of ansible playbooks, and in one case I had to transform a list of dicts to extract two attributes from each dict and create a new list of dicts.
i.e., given a list like this:
&lt;/p&gt;
&lt;pre&gt;
entities:
  - id: 123
    label: Label 1
    type: foo
    status: enabled
  - id: 234
    label: Label 2
    type: foo
    status: enabled
  - id: 345
    label: Label 3
    type: bar
    status: enabled
&lt;/pre&gt;
&lt;p&gt;
  I need to transform it into this:
&lt;/p&gt;
&lt;pre&gt;
entities:
  - id: 123
    type: foo
  - id: 234
    type: foo
  - id: 345
    type: bar
&lt;/pre&gt;
&lt;p&gt;
I found the examples in the ansible docs to be very limited. In most cases there are no examples that show the use of additional parameters to filters. I defintely couldn&#39;t find anything that would let me extract two attributes from a list of dicts. There are examples to extract a single element using &lt;code&gt;map(attribute=&#39;xxx&#39;)&lt;/code&gt;, but nothing to extract more than one attribute, so I had to come up with something of my own.
&lt;/p&gt;
&lt;p&gt;
I ended up with two possible solutions depending on how much flexibility you have in your playbook.
&lt;/p&gt;
&lt;h4&gt;1. Using &lt;code&gt;loop&lt;/code&gt;&lt;/h4&gt;
&lt;p&gt;
The easier option is to use &lt;code&gt;loop&lt;/code&gt; and construct the new list one element at a time. You can do this if you have the option to use a &lt;code&gt;set_fact&lt;/code&gt; block separate from where you need to use the variable.
&lt;/p&gt;
&lt;pre&gt;
- set_fact:
    entities_transformed: &#39;{{ entities_transformed|d([]) + [{&quot;id&quot;: item.id, &quot;type&quot;: item.type}] }}&#39;
  loop: &#39;{{ entities }}&#39;
&lt;/pre&gt;
&lt;p&gt;
This &lt;code&gt;set_fact&lt;/code&gt; block creates a new fact called &lt;code&gt;entities_transformed&lt;/code&gt; by repeatedly appending each transformed element to a new list.
&lt;/p&gt;
&lt;h4&gt;2. As a one liner&lt;/h4&gt;
&lt;p&gt;
If you need to write it all as a one liner without a &lt;code&gt;set_fact&lt;/code&gt;, then this second approach works for you.
&lt;/p&gt;
&lt;pre&gt;
  entities_transformed: &#39;{{ entities
                            | map(&quot;dict2items&quot;)
                            | map(&quot;selectattr&quot;, &quot;key&quot;, &quot;in&quot;, [&quot;id&quot;, &quot;type&quot;])
                            | map(&quot;items2dict&quot;) }}&#39;
&lt;/pre&gt;
&lt;p&gt;
This works in multiple steps and I&#39;ll explain each with what the output looks like at that stage.
&lt;/p&gt;
&lt;h5&gt;&lt;code&gt;map(&quot;dict2items&quot;)&lt;/code&gt;&lt;/h5&gt;
&lt;p&gt;This transforms the entities list into the following:&lt;/p&gt;
&lt;pre&gt;
  - - key: id
      value: 123
    - key: label
      value: Label 1
    - key: type
      value: foo
    - key: status
      value: enabled
  - - key: id
      value: 234
    - key: label
      value: Label 2
    - key: type
      value: foo
    - key: status
      value: enabled
  - - key: id
      value: 345
    - key: label
      value: Label 3
    - key: type
      value: bar
    - key: status
      value: enabled
&lt;/pre&gt;
&lt;h5&gt;&lt;code&gt;map(&quot;selectattr&quot;, &quot;key&quot;, &quot;in&quot;, [&quot;id&quot;, &quot;type&quot;])&lt;/code&gt;&lt;/h5&gt;
&lt;p&gt;This strips down to the required keys:&lt;/p&gt;
&lt;pre&gt;
  - - key: id
      value: 123
    - key: type
      value: foo
  - - key: id
      value: 234
    - key: type
      value: foo
  - - key: id
      value: 345
    - key: type
      value: bar
&lt;/pre&gt;
&lt;h5&gt;&lt;code&gt;map(&quot;items2dict&quot;)&lt;/code&gt;&lt;/h5&gt;
&lt;p&gt;This reverses the first step giving us the following:&lt;/p&gt;
&lt;pre&gt;
  - id: 123
    type: foo
  - id: 234
    type: foo
  - id: 345
    type: bar
&lt;/pre&gt;
&lt;p&gt;
Both options work equally well, but I prefer the second because it avoids creating additional facts and requiring loops in places where I cannot use one.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/1229160219958317440/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2025/05/ansible-extracting-multiple-attributes.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/1229160219958317440'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/1229160219958317440'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2025/05/ansible-extracting-multiple-attributes.html' title='Ansible: Extracting multiple attributes from a list of dicts'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-6109583372466382401</id><published>2025-04-22T15:00:00.002-04:00</published><updated>2025-04-22T15:00:14.100-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="debugging"/><category scheme="http://www.blogger.com/atom/ns#" term="dos"/><category scheme="http://www.blogger.com/atom/ns#" term="ram"/><category scheme="http://www.blogger.com/atom/ns#" term="text mode"/><title type='text'>Fixing a system without enough RAM for a text editor</title><content type='html'>&lt;p&gt;
Someone on quora asked why people still use editors like emacs and vim when more modern alternatives exist.
&lt;/p&gt;
&lt;p&gt;
There were so many great answers that I didn&#39;t need to answer the original question. I couldn&#39;t possibly add more to the question of why emacs or vim. Instead, I was reminded of an experience where even emacs/vim weren&#39;t options.
&lt;/p&gt;
&lt;p&gt;
Sometime in the mid to late &#39;90s, I visited my sister at university. She was in a biology lab, and they had a single 80386 PC with DOS and Windows 3.1. The computer wouldn&#39;t start Windows and they didn&#39;t know why and they asked me if I could do anything.
&lt;/p&gt;
&lt;p&gt;
Since I love debugging obscure problems like this, I decided to take a look. It turned out to be a simple case of there not being enough available RAM to start Windows. The box did however have 4MB of RAM, which should have been more than enough to start Windows... except, this was a 386, and for compatibility with older 8 bit software, RAM was split into &lt;a href=&quot;https://en.wikipedia.org/wiki/Conventional_memory&quot;&gt;Conventional memory&lt;/a&gt; (640KB), System ROM (640K-1M) and &lt;a href=&quot;https://en.wikipedia.org/wiki/Extended_memory&quot;&gt;Extended memory&lt;/a&gt; (everything above 1MB), and this box wasn&#39;t configured to use extended memory (if you remember HIMEM.SYS).
&lt;/p&gt;
&lt;p&gt;
  To make matters worse, &lt;a href=&quot;https://en.wikipedia.org/wiki/AUTOEXEC.BAT&quot;&gt;AUTOEXEC.BAT&lt;/a&gt; loaded a bunch of programs at startup that used up a bunch of RAM, which meant that I couldn&#39;t even start the basic EDIT program to edit AUTOEXEC.BAT or &lt;a href=&quot;https://en.wikipedia.org/wiki/CONFIG.SYS&quot;&gt;CONFIG.SYS&lt;/a&gt;.
&lt;/p&gt;
&lt;p&gt;
My only option at that point was to fall back to the absolute basics.
&lt;/p&gt;
&lt;pre&gt;
COPY CON AUTOEXEC.BAT
COPY CON CONFIG.SYS
&lt;/pre&gt;
&lt;p&gt;
  The equivalent on unix would be &lt;code&gt;cat&lt;/code&gt;. &lt;code&gt;COPY CON&lt;/code&gt; on &lt;code&gt;MS-DOS&lt;/code&gt; stands for &lt;code&gt;COPY&lt;/code&gt; what&#39;s on the &lt;code&gt;CONSOLE&lt;/code&gt; (the keyboard in this case) to the destination file, overwriting it if it exists.
&lt;/p&gt;
&lt;p&gt;
  (See &lt;a href=&quot;http://www.computerhope.com/jargon/c/copycon.htm&quot;&gt;What is copy con&lt;/a&gt;? for details)
&lt;/p&gt;
&lt;p&gt;
And I had to be really careful with what I typed because typing in the wrong thing would mean the system might not start up, and I didn&#39;t have a boot disk on me (remember I was just visiting with no plan of actually fixing a computer), and if I did have a boot disk, none of this would have been necessary.
&lt;/p&gt;
&lt;p&gt;
Anyway, I managed to build a very basic AUTOEXEC.BAT and CONFIG.SYS from memory (though I cannot remember now what I put into them), which allowed me to reboot the machine with enough RAM to start EDIT which allowed me to further edit the files to reboot with enough RAM to start Windows.
&lt;/p&gt;
&lt;p&gt;
What I learnt from this is that no matter how good a system you may have access to, you need to be prepared to use the absolute minimum available tools. On DOS this was COPY CON. On unix over a slow or lossy network, you might actually have to edit a file by sending single lines of sed. In order to be prepared to do this, you need to do this a lot. It turns out that vim and emacs are really just one step above sed (well technically one step above ed which is half a step above sed) although they are extendable to have all the features of Eclipse or Visual Studio if you like, but even without those extensions, they are far more powerful.
&lt;/p&gt;
&lt;p&gt;
Even while working with Eclipse, I&#39;ll find that there are times when I need to quit Eclipse, open my files in Vim, run a few commands to do things that would take me ages to do in Eclipse and then return to Eclipse. I need to use Eclipse because that&#39;s what our dev team has standardized on, and it makes it easier when screen sharing with other devs.
&lt;/p&gt;
&lt;p&gt;
If you liked this post, there&#39;s a far more fun video of how the &lt;a href=&quot;https://www.youtube.com/watch?v=YcUycQoz0zg&quot;&gt;JPL team debugged and fixed an issue 15 billion miles away on Voyager 1&lt;/a&gt;.</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/6109583372466382401/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2025/04/fixing-system-without-enough-ram-for.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6109583372466382401'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6109583372466382401'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2025/04/fixing-system-without-enough-ram-for.html' title='Fixing a system without enough RAM for a text editor'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-294768888219064641</id><published>2025-03-17T13:31:00.006-04:00</published><updated>2025-03-17T21:34:11.903-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="authentication"/><category scheme="http://www.blogger.com/atom/ns#" term="i18n"/><category scheme="http://www.blogger.com/atom/ns#" term="passwords"/><title type='text'>On Migrating Character Encodings</title><content type='html'>&lt;p&gt;
Several discussions I&#39;ve had with friends and colleagues recently reminded me of an incident we faced several years ago at Yahoo!
&lt;/p&gt;
&lt;p&gt;
Now Yahoo! as a company was made up of many different local offices around the world, each responsible for content in their locale. Since there was a lot of user generated content, this meant users in a particular locale could easily enter content (blog posts, restaurant reviews, etc.) in their local language script.
&lt;/p&gt;
&lt;p&gt;
Everyone was happy!
&lt;/p&gt;
&lt;p&gt;
From about 2005 onwards, the company was looking to unify some of the platforms used around the world. For example, we had something like 4 or 5 different platforms to do ratings and reviews and it didn&#39;t make sense to have different architectures, database layouts, BCP setups, and a separate team managing each of these, so we started unifying. Building a common architecture was the easy part. I worked on several of these projects. Getting front end teams to migrate was also not terribly hard. Migrating content though, was tough because each region had content in their own locale and MySQL didn&#39;t let you set multiple character encodings on text columns.
&lt;/p&gt;
&lt;p&gt;
So the &lt;abbr title=&quot;Internationalization&quot;&gt;i18n&lt;/abbr&gt; team started working with teams across Y! to move everything to utf8. The easy part was changing HTTP headers and &amp;lt;meta&amp;gt; tags. Content was a little harder, but doable with &lt;a href=&quot;https://linux.die.net/man/1/iconv&quot;&gt;iconv(1)&lt;/a&gt; since in most cases we knew the source character encoding and the destination was always utf-8. In some cases we had to guess, but it generally worked...
&lt;/p&gt;
&lt;p&gt;
...until at one point we also decided to do it for authentication.
&lt;/p&gt;
&lt;p&gt;
One of the things that was localized was authentication, because it allowed users in, for example, South Korea, to use Hangul characters in their passwords. Usernames were always restricted to just alphanumeric characters and underscores (If I remember correctly).
&lt;/p&gt;
&lt;p&gt;
Passwords are stored, as they should be, salted and hashed, so the character encoding of the database column was always us-ascii, which is compatible with utf-8, so no biggie..., except the input character encoding used by the browser was based on the HTTP headers or META tags of the page, and the transfer encoding was based on the enctype of the login FORM.
&lt;/p&gt;
&lt;p&gt;
Prior to this move, these were all set to a character encoding that made sense locally, so Korea used &lt;a href=&quot;https://en.wikipedia.org/wiki/Extended_Unix_Code&quot;&gt;EUC-KR&lt;/a&gt; and China used &lt;a href=&quot;https://en.wikipedia.org/wiki/Big5&quot;&gt;Big5&lt;/a&gt;, and so the hashed passwords used the byte sequences that resulted from treating the input as one of these encodings.
&lt;/p&gt;
&lt;p&gt;
After the move, the user would still type in the same password, but when we converted them to bytes, we used utf-8, which resulted in a different byte sequence than the original encoding, so hashing this new sequence of bytes resulted in a different hash, and users could no longer log in.  Well, only users that used non-ASCII characters in their passwords.
&lt;/p&gt;
&lt;p&gt;
I forget what the actual fix was, but there were several options on the table. One was to revert the character encoding changes on the login page and to re-encode all passwords after a successful login. Another was to generate two hashes, one using utf-8 and another using the pre-migration character encoding for the region and to allow a success on either to go through.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/294768888219064641/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2025/03/on-migrating-character-sets.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/294768888219064641'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/294768888219064641'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2025/03/on-migrating-character-sets.html' title='On Migrating Character Encodings'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-9078408045723363006</id><published>2025-02-01T10:43:00.009-05:00</published><updated>2025-02-28T11:01:41.920-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="cls"/><category scheme="http://www.blogger.com/atom/ns#" term="cwv"/><category scheme="http://www.blogger.com/atom/ns#" term="inp"/><category scheme="http://www.blogger.com/atom/ns#" term="javascript"/><category scheme="http://www.blogger.com/atom/ns#" term="lcp"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="statistics"/><category scheme="http://www.blogger.com/atom/ns#" term="webperf"/><title type='text'>When users interact</title><content type='html'>&lt;style&gt;FIGCAPTION {font-style: italic; text-align: center; font-size: 0.9em; margin-top:0em; margin-bottom: 1.5em; border-top: dotted 0.5px #aaa; }&lt;/style&gt;

&lt;p&gt;
When looking at the Core Web Vitals, we often try optimizing each independently of the others, but that&#39;s not how users experience the web.  A user&#39;s web experience
is made up of many metrics, and it&#39;s important to look at these metrics together for each experience. Real User Measurement (RUM) allows us to do that by collecting
operational metrics in conjunction with user actions, the combination of which can tell us whether our pages actually meet the user&#39;s expectations.
&lt;/p&gt;
&lt;p&gt;
In this experiment, I decided to look at each of the events in a page&#39;s loading cycle, and break that down by when the user tried interacting with the
page. For those interactions, I looked at the &lt;a href=&quot;https://web.dev/articles/inp&quot;&gt;Interaction to Next Paint&lt;/a&gt;, and the rate of rage clicking to get an idea of user
experience and whether that experience may have been &lt;a href=&quot;https://www.frustrationindex.com/&quot;&gt;frustrating&lt;/a&gt; or not.
&lt;/p&gt;
&lt;p&gt;
Before I jump into the charts, I should note an important caveat about the data. This analysis was done using &lt;abbr title=&quot;Real User Measurement&quot;&gt;RUM&lt;/abbr&gt; data collected
by Akamai&#39;s &lt;a href=&quot;https://www.akamai.com/products/mpulse-real-user-monitoring&quot;&gt;mPulse&lt;/a&gt; product which collects data at or soon after page load. Not all page views resulted
in an interaction before data was collected.  Most of the analysis was restricted to page views where we had at least one interaction prior to data collection.  We see
on average, between 2-25% of beacons collected (across sites) had an interaction. Most sites had a recorded interaction on about 10% of beacons. I also separately looked at
data collected during page unload/pagehide and while it captured more interactions, it did not have a noticeable effect on the results.
&lt;/p&gt;
&lt;p&gt;
Each of the following charts is from a different website in mPulse&#39;s dataset.
&lt;/p&gt;

&lt;h4&gt;Exploring the chart&lt;/h4&gt;

&lt;img src=&quot;https://blog.akamai-mpulse.com/blog/2024-12-16-when-users-interact/interaction-analysis-2.jpg&quot; alt=&quot;Interaction Analysis - Virtual Globe Trotting&quot;&gt;
&lt;FIGCAPTION&gt;Interaction analysis chart for &lt;a href=&quot;https://virtualglobetrotting.com/&quot;&gt;Virtual Globe Trotting&lt;/a&gt;&lt;/FIGCAPTION&gt;

&lt;p&gt;
Let&#39;s now look at the various features of this chart.
&lt;/p&gt;
&lt;p&gt;
The chart shows multiple dimensions of data projected onto a 2D surface, so some parts of it will appear wonky. We&#39;ll walk through that in this section.
&lt;/p&gt;

&lt;h5&gt;Event labels&lt;/h5&gt;

&lt;p&gt;
The first thing we&#39;ll describe are the events. These are the vertical colored lines with labels to their right. These represent transition events in the page load
cycle. The events we include are:
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/First_paint&quot;&gt;First Paint&lt;/a&gt; (FP)&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/First_contentful_paint&quot;&gt;First Contentful Paint&lt;/a&gt; (FCP)&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/Largest_contentful_paint&quot;&gt;Largest Contentful Paint&lt;/a&gt; (LCP)&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://akamai.github.io/boomerang/oss/BOOMR.plugins.Continuity.html#toc10__anchor&quot;&gt;Time to Visually Ready&lt;/a&gt; (TTVR)&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://akamai.github.io/boomerang/oss/BOOMR.plugins.Continuity.html#toc11__anchor&quot;&gt;Time to Interactive&lt;/a&gt; (TTI)&lt;/li&gt;
  &lt;li&gt;Page Load Time&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;
You may have already noticed that in this particular chart, First Paint is _after_ First Contentful Paint, which is counter-intuitive. The reason we see this is that
the number of data points with First Paint on them is different from those with First Contentful Paint. Safari and Firefox, for example, support FCP but not FP. When
aggregating these points, the same percentile value when applied to two data sets will likely get you values from two different experiences. This effect is more prominent when
the sample sizes are different. In general we would not expect the delta to be too far off, and in the data I&#39;ve looked at, it hasn&#39;t been more than 50ms off.
&lt;/p&gt;
&lt;p&gt;
The events to keep an eye on are the Largest Contentful Paint or Time to Visually Ready, the Time to Interactive, and the delta between them. LCP is not currently supported
on Safari, so we use &lt;a href=&quot;https://github.com/akamai/boomerang&quot;&gt;boomerang&lt;/a&gt;&#39;s cross-browser calculation of TTVR in those cases.
&lt;/p&gt;
&lt;p&gt;
Time to Interactive is considered a lab measurement, but `boomerang` measures it in a cross-browser manner during RUM sessions, and passes that data back to mPulse. It is
approximately the time when interactions with the page are expected to be smooth due to no more long animation frames and blocking time.
&lt;/p&gt;
&lt;p&gt;
The next thing to note are that these events are positioned on this projection based on when they occurred relative to interactions _as well as_ when they occurred
relative to page load time. By definition this means that all interactions should show up after &lt;abbr title=&quot;Largest Contentful Paint&quot;&gt;LCP&lt;/abbr&gt; but it may show up
differently on the chart due to the projection from multiple dimensions down to two. There&#39;s also the fact that TTVR calculations do not stop at first interaction, so
on browsers that do not support LCP, we may see interactions before the proxy for that event.
&lt;/p&gt;
&lt;p&gt;
The absolute value of each event is calculated across the entire dataset, even on pages without intereactions, so it might look like events aren&#39;t placed where
their values dictate they should be, however the percentage of users interacting before &amp; after an event is always correct.
&lt;/p&gt;
&lt;p&gt;
The last label to take note of is the fraction of users that interacted before `boomerang` considered the page to be interactive. In this case, it&#39;s 12% of users.
&lt;/p&gt;

&lt;h5&gt;Data distributions&lt;/h5&gt;
&lt;img src=&quot;https://blog.akamai-mpulse.com/blog/2024-12-16-when-users-interact/interaction-analysis-1.jpg&quot; alt=&quot;Interaction Analysis Site. 2&quot;&gt;
&lt;FIGCAPTION&gt;Interaction analysis chart showing mouseover details.&lt;/FIGCAPTION&gt;

&lt;p&gt;
There are a few different distributions shown on this chart, (and even more when we look at the mouseover in the chart above).
&lt;/p&gt;
&lt;p&gt;
The blue area chart is the _population density_. It shows, for every 5% interval of the page load time, how many users first interacted with the page at that point in
the page&#39;s loading cycle.
&lt;/p&gt;
&lt;p&gt;
The blue dots that trace the population density chart show the median _Interaction to Next Paint_ value for all of those interactions. Keep in mind that INP is not
supported on Safari, whereas `boomerang`&#39;s own measurements for TTI do work across browsers.
&lt;/p&gt;
&lt;p&gt;
The vertical position of the red dots shows the _probability_ that interactions at that time resulted in _rage clicks_ while the size of the red dots shows the _intensity_
of these rage clicks. Rage clicks are collected across browsers.
&lt;/p&gt;
&lt;p&gt;
The thin orange line shows &lt;a href=&quot;https://blog.akamai-mpulse.com/blog/2024-11-15-frustration-index-intro/&quot;&gt;Frustration Index&lt;/a&gt; for users that interacted within that window.
&lt;/p&gt;
&lt;p&gt;
We also have the median &lt;a href=&quot;https://web.dev/articles/tbt&quot;&gt;Total Blocking Time&lt;/a&gt; for each of these interactions, though that&#39;s only visible in the live versions of these charts
and not in most of the screenshots posted here.
&lt;/p&gt;
&lt;p&gt;
In this second chart, we see that 59% of users interacted with the site before it became interactive. Its &lt;abbr title=&quot;Time to Interactive&quot;&gt;TTI&lt;/abbr&gt;
is further from the &lt;abbr title=&quot;Largest Contentful Paint&quot;&gt;LCP&lt;/abbr&gt; time compared to the first site.
&lt;/p&gt;

&lt;h4&gt;Insights from the data&lt;/h4&gt;
&lt;img src=&quot;https://blog.akamai-mpulse.com/blog/2024-12-16-when-users-interact/interaction-analysis-3.jpg&quot; alt=&quot;Interaction Analysis Fig. 3&quot;&gt;
&lt;FIGCAPTION&gt;Interaction analysis chart showing INP increasing around TTI.&lt;/FIGCAPTION&gt;

&lt;p&gt;
When we look at this data across websites, we see the same patterns. Users expect to be able to interact with the site once the page is largely visible, however, the
user experience for interactions is sub-optimal until the time to interactive which can be much later in the page&#39;s loading cycle.
&lt;/p&gt;
&lt;p&gt;
In most cases we see a high Total Blocking Time in the period between LCP and TTI, resulting in a slow &lt;abbr title=&quot;Interaction to Next Paint&quot;&gt;INP&lt;/abbr&gt;, and higher
probability of rage clicking.
&lt;/p&gt;
&lt;p&gt;
When looking to optimize a site for user experience, we shouldn&#39;t look at each metric in isolation. A really fast LCP is a great first user experience, but it&#39;s also a
signal to the user that they can proceed with interacting to complete their task. It&#39;s important that the rest of the page be ready for those interactions and keep up
the good experience.
&lt;/p&gt;

&lt;h4&gt;The elephant in the room&lt;/h4&gt;
&lt;img src=&quot;https://blog.akamai-mpulse.com/blog/2024-12-16-when-users-interact/interaction-analysis-4.jpg&quot; alt=&quot;Interaction Analysis Fig. 4&quot;&gt;
&lt;FIGCAPTION&gt;Interaction analysis chart for &lt;a href=&quot;https://akamai.com/&quot;&gt;Akamai.com&lt;/a&gt; focussing on the population series.&lt;/FIGCAPTION&gt;

&lt;p&gt;
As an aside, has anyone else noticed that these charts almost always look like a sleeping elephant (or maybe
&lt;a href=&quot;https://nyheritage.contentdm.oclc.org/digital/collection/p15281coll76/id/779/&quot;&gt;a hat&lt;/a&gt;)? I&#39;ve seen very few sites where this isn&#39;t the case, so I looked into
that pattern.
&lt;/p&gt;
&lt;p&gt;
The population distribution pattern we see is a gradual curve increasing, then a dip that looks like the elephant&#39;s neck, then a bump that could be its ears, a sharp
dip and long flat region that could be its trunk.
&lt;/p&gt;
&lt;p&gt;
It could well be a &lt;a href=&quot;https://en.wikipedia.org/wiki/Normal_distribution&quot;&gt;Normal distribution&lt;/a&gt; if it weren&#39;t for the dip and spike right around &lt;abbr title=&quot;Page Load Time&quot;&gt;PLT&lt;/abbr&gt;.
&lt;/p&gt;

&lt;img src=&quot;https://blog.akamai-mpulse.com/blog/2024-12-16-when-users-interact/normal-distribution.jpg&quot; alt=&quot;Normal Distribution&quot;&gt;
&lt;FIGCAPTION&gt;A basic Normal Distribution curve with a mean of 75 and standard deviation of 30.&lt;/FIGCAPTION&gt;

&lt;p&gt;
The drop-off after OnLoad is expected. `boomerang.js` sends a beacon on or soon after page load (sites
&lt;a href=&quot;https://techdocs.akamai.com/mpulse/docs/most-mpulse-beacon#delay-real-time-measurement-reporting&quot;&gt;can configure a beacon delay of a few seconds&lt;/a&gt; to capture
post-onload events). This results in a drop-off in data with interactions after onload. The post onload interactions are on pages that are faster than the average.
&lt;/p&gt;
&lt;p&gt;
The strange pattern is the spike in interactions just at or after onload (it&#39;s sometimes at 100% and sometimes at 105%). The dip at 95% &amp; 100% shows up on most, but not
all sites, but the spike shows up everywhere.
&lt;/p&gt;
&lt;p&gt;
I looked closer at the data around those buckets and there is very little difference in terms of experience.  The page load time, LCP time, TTI time, etc. are all very
similar at the 25th and 75th percentile (in other words, the experiences are comparable). The only difference is that more users prefer to interact with the site just after
the onload event has fired than just before it. It&#39;s not a big delay - about 200-400ms on average across sites, but it does look like some portion of users still wait for
the loading indicator to complete before they interact.
&lt;/p&gt;

&lt;h4&gt;Conclusions&lt;/h4&gt;

&lt;p&gt;
In conclusion, I think there&#39;s a lot to be learned from looking at when your users interact with your site. Which parts of the page have finished loading when that interaction
happens? What&#39;s still in flight? What do they experience? Is there too much of a delay between your LCP and the site becoming usable?
&lt;/p&gt;
&lt;p&gt;
A good loading experience needs your page to transition from state to state smoothly without too much delay between states. Looking at the loading Frustration Index can
identify pages where this isn&#39;t the case.
&lt;/p&gt;
&lt;p&gt;
When comparing different events on the page, look at the aggregate of deltas rather than the delta of aggregates.
&lt;/p&gt;
&lt;p&gt;
And lastly, keep an eye out for that elephant.
&lt;/p&gt;

&lt;h4&gt;References&lt;/h4&gt;
&lt;h5&gt;Glossary on Mozilla Developer Network&lt;/h5&gt;
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/First_paint&quot;&gt;First Paint&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/First_contentful_paint&quot;&gt;First Contentful Paint&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/Largest_contentful_paint&quot;&gt;Largest Contentful Paint&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://developer.mozilla.org/en-US/docs/Glossary/Interaction_to_next_paint&quot;&gt;Interaction to Next Paint&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h5&gt;Web Vitals on Google&#39;s Web.Dev&lt;/h5&gt;
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://web.dev/articles/inp&quot;&gt;Interaction to Next Paint&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://web.dev/articles/tbt&quot;&gt;Total Blocking Time&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://web.dev/articles/lcp&quot;&gt;Largest Contentful Paint&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://web.dev/articles/fcp&quot;&gt;First Contentful Paint&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h5&gt;Implementations in mPulse&lt;/h5&gt;
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://github.com/akamai/boomerang&quot;&gt;boomerang&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.frustrationindex.com/&quot;&gt;Frustration Index by Tim Vereecke&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://akamai.github.io/boomerang/oss/BOOMR.plugins.Continuity.html#toc10__anchor&quot;&gt;Time to Visually Ready in boomerang&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://akamai.github.io/boomerang/oss/BOOMR.plugins.Continuity.html#toc11__anchor&quot;&gt;Time to Interactive in boomerang&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://akamai.github.io/boomerang/oss/BOOMR.plugins.Continuity.html#toc6__anchor&quot;&gt;Monitoring Interactions&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://blog.akamai-mpulse.com/blog/2024-11-15-frustration-index-intro/&quot;&gt;Frustration Index in mPulse&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h5&gt;Other useful links&lt;/h5&gt;
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://paulcalvano.com/2019-01-11-correlating-performance-metrics-to-page-characteristics/&quot;&gt;Paul Calvano on different performance metrics&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://https://rumarchive.org/&quot;&gt;The RUM Archive&lt;/a&gt; to do your own RUM analysis.&lt;/li&gt;
&lt;/ul&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/9078408045723363006/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2025/02/when-users-interact.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/9078408045723363006'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/9078408045723363006'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2025/02/when-users-interact.html' title='When users interact'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-2232747700124223354</id><published>2022-05-03T11:12:00.006-04:00</published><updated>2022-05-03T11:12:59.879-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="curl"/><category scheme="http://www.blogger.com/atom/ns#" term="ftp"/><category scheme="http://www.blogger.com/atom/ns#" term="julia"/><title type='text'>Uploading a file using SFTP through Julia&#39;s LibCURL</title><content type='html'>&lt;h4&gt;The problem&lt;/h4&gt;
&lt;p&gt;
A colleague was recently tasked with using SFTP to upload an automated report to a customer&#39;s server. The code that generates the data runs in Julia 1.6, but there were no restrictions on where the upload code had to run.
&lt;/p&gt;
&lt;p&gt;
Unfortunately our command line &lt;code&gt;curl&lt;/code&gt; wasn&#39;t build with sftp support so we couldn&#39;t use that, and our system doesn&#39;t have &lt;code&gt;sftp&lt;/code&gt; installed, so we couldn&#39;t use that either. The question was, whether we could use Julia&#39;s built-in &lt;code&gt;LibCURL&lt;/code&gt; library to do the upload.
&lt;/p&gt;

&lt;h4&gt;tl; dr&lt;/h4&gt;
&lt;p&gt;
Yes, you can, by converting a Julia IOStream to a C pointer to the file that needs uploading.
&lt;/p&gt;

&lt;h4&gt;The details&lt;/h4&gt;
&lt;p&gt;
Julia uses &lt;code&gt;LibCURL&lt;/code&gt; which is a simple wrapper around the &lt;code&gt;libcurl&lt;/code&gt; &lt;code&gt;C&lt;/code&gt; API. This generally means that we have to pass in &lt;code&gt;C&lt;/code&gt; pointers for a lot of things.  Now Julia can automatically do type conversions for basic types like &lt;code&gt;String&lt;/code&gt;s and &lt;code&gt;Number&lt;/code&gt;s, but more complex &lt;code&gt;struct&lt;/code&gt;s will need some work to make sure they&#39;re in the right format.
&lt;/p&gt;
&lt;p&gt;
The basic &lt;code&gt;libcurl&lt;/code&gt; code we need to reproduce is this:
&lt;/p&gt;
&lt;pre&gt;
curl = curl_easy_init()

curl_easy_setopt(curl, CURLOPT_URL, &quot;sftp://user:password@server:port/path/to/file.csv&quot;) # This would be a NULL terminated string in C, but Julia does that conversion for us
curl_easy_setopt(curl, CURLOPT_UPLOAD, 1)
curl_easy_setopt(curl, CURLOPT_PROTOCOLS, CURLPROTO_SFTP)

curl_easy_setopt(curl, CURLOPT_READDATA, file_ptr)   # We need to pass in a C file pointer here

res = curl_easy_perform(curl)
curl_easy_cleanup(curl)
&lt;/pre&gt;

&lt;p&gt;
The complication is that Julia uses &lt;code&gt;libuv&lt;/code&gt; to open files, and the return value from Julia&#39;s &lt;code&gt;open&lt;/code&gt; function is an &lt;code&gt;IOStream&lt;/code&gt;.  Fortunately, Julia has code in &lt;code&gt;Libc&lt;/code&gt; that converts between an &lt;code&gt;IO&lt;/code&gt; and a &lt;code&gt;FILE *&lt;/code&gt;:
&lt;/p&gt;
&lt;pre&gt;
struct FILE
    ptr::Ptr{Cvoid}
end

modestr(s::IO) = modestr(isreadable(s), iswritable(s))
modestr(r::Bool, w::Bool) = r ? (w ? &quot;r+&quot; : &quot;r&quot;) : (w ? &quot;w&quot; : throw(ArgumentError(&quot;neither readable nor writable&quot;)))

function FILE(fd::RawFD, mode)
    FILEp = ccall((@static Sys.iswindows() ? :_fdopen : :fdopen), Ptr{Cvoid}, (Cint, Cstring), fd, mode)
    systemerror(&quot;fdopen&quot;, FILEp == C_NULL)
    FILE(FILEp)
end

function FILE(s::IO)
    f = FILE(dup(RawFD(fd(s))),modestr(s))
    seek(f, position(s))
    f
end

Base.unsafe_convert(T::Union{Type{Ptr{Cvoid}},Type{Ptr{FILE}}}, f::FILE) = convert(T, f.ptr)
&lt;/pre&gt;

&lt;p&gt;
  Using this, we can open a file in Julia, and pass it on to &lt;code&gt;curl&lt;/code&gt;:
&lt;/p&gt;
&lt;pre&gt;
fh = open(&quot;file.csv&quot;, &quot;r&quot;)   # Open the file for reading and get an IOStream
fp = Libc.FILE(fh)           # Convert the IOStream to a FILE*
curl_easy_setopt(curl, CURLOPT_READDATA, fp.ptr)
&lt;/pre&gt;

&lt;p&gt;
  Note that in the call to &lt;code&gt;CURLOPT_READDATA&lt;/code&gt;, we need to pass in the &lt;code&gt;ptr&lt;/code&gt; member of the &lt;code&gt;FILE&lt;/code&gt; struct, since that&#39;s the actual C object.
&lt;/p&gt;

&lt;h4&gt;Complete example&lt;/h4&gt;

&lt;pre&gt;
curl = curl_easy_init()

curl_easy_setopt(curl, CURLOPT_URL, &quot;sftp://user:password@server:port/path/to/file.csv&quot;)
curl_easy_setopt(curl, CURLOPT_UPLOAD, 1)
curl_easy_setopt(curl, CURLOPT_PROTOCOLS, CURLPROTO_SFTP)

fh = open(&quot;file.csv&quot;, &quot;r&quot;)
fp = Libc.FILE(fh)
curl_easy_setopt(curl, CURLOPT_READDATA, fp.ptr)

res = curl_easy_perform(curl)
curl_easy_cleanup(curl)
&lt;/pre&gt;
</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/2232747700124223354/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2022/05/uploading-file-using-sftp-through.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2232747700124223354'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2232747700124223354'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2022/05/uploading-file-using-sftp-through.html' title='Uploading a file using SFTP through Julia&#39;s LibCURL'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-2434178715547971769</id><published>2021-08-30T14:07:00.000-04:00</published><updated>2021-08-30T14:21:07.557-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="crux"/><category scheme="http://www.blogger.com/atom/ns#" term="cwv"/><category scheme="http://www.blogger.com/atom/ns#" term="lcp"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="rum"/><category scheme="http://www.blogger.com/atom/ns#" term="search"/><category scheme="http://www.blogger.com/atom/ns#" term="seo"/><category scheme="http://www.blogger.com/atom/ns#" term="ux"/><title type='text'>The metrics game</title><content type='html'>&lt;p&gt;
A recent &lt;a href=&quot;https://twitter.com/geekybiz/status/1429746730258427911&quot;&gt;tweet by Punit Sethi&lt;/a&gt; about a Wordpress plugin that reduces Largest Contentful Paint (LCP) without actually improving user experience led to a discussion about faking/gaming metrics.
&lt;/p&gt;
&lt;h4&gt;Core Web Vitals&lt;/h4&gt;
&lt;p&gt;
Google recently started using the LCP and other &lt;a href=&quot;https://web.dev/vitals/&quot;&gt;Core Web Vitals&lt;/a&gt; (aka CWV) as a signal for ranking search results. Google&#39;s goal in using CWV as a ranking signal is to make the web better for end users. The understanding is that these metrics (Input delays, Layout shift, and Contentful paints) reflect the end user experience, so sites with good CWV scores should (in theory) be better for users... reducing wait time, frustration, and annoyance with the web.
&lt;/p&gt;
&lt;p&gt;
If I&#39;ve learnt anything over the last 20 years of working with the web, it&#39;s that getting to the top of a Google search result page (SRP) is a major goal for most site owners, so metrics that affect that ranking tend to be researched a lot. The LCP is no different, and the result often shows up in such &quot;quick fix&quot; plugins that Punit discusses above. Web performance (Page Load Time) was only ever spoken about as a sub-topic in highly technical spaces until Google decided to start using it as a signal for page ranking, and then suddenly everyone wanted to make their sites faster.
&lt;/p&gt;
&lt;h4&gt;My background in performance&lt;/h4&gt;
&lt;p&gt;
I started working with web performance in the mid 2000s at Yahoo!. We had amazing Frontend Engineering experts at Yahoo!, and for the first time, engineering processes on the front-end were as strong as the back-end. In many cases we had to be far more disciplined, because Frontend Engineers do not have the luxury of their code being private and running on pre-selected hardware and software specs.
&lt;/p&gt;
&lt;p&gt;
At the time, Yahoo! had a performance team of one person &amp;mdash; Steve &quot;Chief Performance Yahoo&quot; Souders. He&#39;d gotten a small piece of JavaScript to measure front-end performance onto the header of all pages by pretending it was an &quot;Ad&quot;, and Ash Patel, who may have been an SVP at the time, started holding teams accountable for their performance.
&lt;/p&gt;
&lt;h4&gt;Denial&lt;/h4&gt;
&lt;p&gt;
Most sites&#39; first reaction was to deny the results, showing scans from Keynote and Gomez, which at the time only synthetically measured load times from the perspective of well connected backbone agents, and were very far off from the numbers that roundtrip was showing.
&lt;/p&gt;
&lt;h4&gt;The Wall of Shame&lt;/h4&gt;
&lt;p&gt;
I wasn&#39;t working on any public facing properties, but became interested in Steve&#39;s work when he introduced the Wall of Fame/Shame (depending on which way you sorted it). It would periodically show up on the big screen at URLs (the Yahoo! cafeteria). Steve now had a team of 3 or 4, and somehow in late 2007 I managed to get myself transferred into this team.
&lt;/p&gt;
&lt;p&gt;
The Wall of Shame showed a kind of stock-ticker like view where a site&#39;s current performance was compared against its performance from a week ago, and one day we saw a couple of sites (I won&#39;t mention them) jump from the worst position to the best! We quickly visited the sites and timed things with a stop-watch, but they didn&#39;t actually appear much faster. In many instances they might have even been slower. We started looking through the source and saw what was happening.
&lt;/p&gt;
&lt;p&gt;
The sites had discovered AJAX!
&lt;/p&gt;
&lt;h4&gt;Faking it&lt;/h4&gt;
&lt;p&gt;
There was almost nothing loaded on the page before the onload event. The only content was some JavaScript that ran on onload and downloaded the framework and data for the rest of the site. Once loaded, it was a long-lived single page application with far fewer traditional page views.
&lt;/p&gt;
&lt;p&gt;
Site owners argued that it would make the overall experience better, and they weren&#39;t intentionally trying to fake things. Unfortunately we had no way to actually measure this, so we added a way for them to call an API when their initial framework had completed loading. That way we&#39;d get some data to trend over time.
&lt;/p&gt;
&lt;p&gt;
At Yahoo! we had the option of speaking to every site builder and to work with them to make things better. Outside though, is a different matter.
&lt;/p&gt;
&lt;h4&gt;Measuring Business Impact&lt;/h4&gt;
&lt;p&gt;
Once we&#39;d started LogNormal (and continuing with mPulse), and were serving multiple customers, it soon became clear that we&#39;d need both business and engineering champions at each customer site. We needed to sell the business case for performance, but also make sure engineering used it for their benefit rather than gaming the metrics. We started correlating business metrics like revenue, conversions, and activity with performance. There is no cheap way to game these metrics because they depend on the behaviour of real users.
&lt;/p&gt;
&lt;p&gt;
Sites that truly care about performance and the business impact of that performance, worked hard to make their sites faster.
&lt;/p&gt;
&lt;p&gt;
This changed when Google started using speed as a ranking signal.
&lt;/p&gt;
&lt;p&gt;
With this change, sites now had to serve two users, and when in conflict, Real Users lost out to Googlebot. After all, you can&#39;t serve real users if they can&#39;t see your site. Switching to CWV does not change the situation because things like Page Load Time, Largest Contentful Paint, and Layout Shift can all be faked or gamed by clever developers.
&lt;/p&gt;
&lt;h4&gt;Ungameable Metrics&lt;/h4&gt;
&lt;p&gt;
This brings us back to the metrics that we&#39;ve seen couldn&#39;t be gamed. Things like time spent on a site, bounce rate, conversions, and revenue, are an indication of actual user behaviour. Users are only motivated by their ability to complete the task they set out to do, and using this as a ranking signal is probably a better idea.
&lt;/p&gt;
&lt;p&gt;
Unfortunately, activity, conversions, and revenue are also fairly private corporate data. Leaking this data can affect stock prices and clue competitors in to how you&#39;re doing.
&lt;/p&gt;
&lt;h4&gt;User frustration &amp;amp; CrUX&lt;/h4&gt;
&lt;p&gt;
Now the goal of using these signals is to measure user frustration. Google Chrome periodically sends user interaction measurements back to their servers, collected as part of the Chrome User Experience report (&lt;a href=&quot;https://developers.google.com/web/tools/chrome-user-experience-report&quot;&gt;CrUX&lt;/a&gt;). This includes things like the actual user experienced LCP, FID, and CLS In my opinion, it should also include measures like rage clicks, missed, and dead clicks, jank while scrolling, CPU busy-ness, battery drain, etc. Metrics that only come into play while a user is interacting with the site, and that affect or reflect how frustrating the experience may be.
&lt;/p&gt;
&lt;p&gt;
It would also need to have buy-in from a few more browsers. Chrome has huge market share, but doesn&#39;t reflect the experience of all users. Data from mPulse shows that across websites, Chrome only makes up, on average, 44% of page loads. Edge and Safari (including mobile) also have a sizeable share. Heck, even IE has a 3% share on sites where it&#39;s still supported.
&lt;/p&gt;
&lt;p&gt;
&lt;span style=&quot;font-size: 0.8em;&quot;&gt;In the chart below, each box shows the distribution of a browser&#39;s traffic share across sites. The plot includes (in descending order of number of websites with sizeable traffic for that browser) Chrome, Edge, Mobile Safari, Chrome Mobile, Firefox, Safari, Samsung Internet, Chrome Mobile iOS, Google, IE, and Chrome Mobile WebView.&lt;/span&gt;
&lt;img src=&quot;https://bluesmoon.info/images/posts/metrics-game/browser-share.png&quot;
     alt=&quot;Box Plot of browser share across websites.&quot;
     style=&quot;box-shadow:none;width:95%;padding:0;&quot;&gt;
&lt;p&gt;
It&#39;s unlikely that other browsers would trust Google with this raw information, so there probably needs to be an independent consortium that collects, anonymizes, and summarizes the data, and makes it available to any search provider.
&lt;/p&gt;
&lt;p&gt;
Using something like the &lt;a href=&quot;https://www.frustrationindex.com/&quot;&gt;Frustration Index&lt;/a&gt; is another way to make it hard to fake ranking metrics without also accidentally making the user experience better.
&lt;/p&gt;
&lt;p&gt;
Comparing these metrics with Googlebot&#39;s measures could hint at whether the metrics are being gamed or not, or perhaps it even lowers the weight of Googlebot&#39;s measures, restricting it only to pages that haven&#39;t received a critical mass of users.
&lt;/p&gt;
&lt;p&gt;
We need to move the balance of ranking power back to the users whose experience matters!
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/2434178715547971769/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2021/08/the-metrics-game.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2434178715547971769'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2434178715547971769'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2021/08/the-metrics-game.html' title='The metrics game'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-6575775365216611195</id><published>2021-08-06T14:28:00.003-04:00</published><updated>2021-08-06T22:54:35.943-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="docker"/><category scheme="http://www.blogger.com/atom/ns#" term="security"/><title type='text'>Safely passing secrets to a RUN command in a Dockerfile</title><content type='html'>&lt;p&gt;
  There may be cases where you need to pass in secrets to a &lt;code&gt;RUN&lt;/code&gt; command in a &lt;code&gt;Dockerfile&lt;/code&gt;, and it&#39;s very important that these secrets not be leaked into the environment or the image. In particular, these secrets should not be stored in the image (either on disk or in the environment, not even in intermediate layers), they should not show up when using &lt;code&gt;docker history&lt;/code&gt;.
&lt;/p&gt;
&lt;p&gt;
While working this topic, I found many blog posts that point to pieces that may be used, but nothing that pulls it all together, so I decided to write this post with everything I&#39;ve found. I&#39;ll provide a list of references at the end.
&lt;/p&gt;
&lt;p&gt;
In my case, I needed to temporarily pass a valid odbc.ini file to my Julia code so that I could build a SysImage with the appropriate database query and result parsing functions compiled. I did not want the odbc.ini file available in the image.
&lt;/p&gt;

&lt;h4&gt;Step 0: Make sure you have docker &gt; 18.09&lt;/h4&gt;

&lt;pre&gt;
docker version
&lt;/pre&gt;

&lt;p&gt;
You most likely have a new enough version of docker, but in the odd chance that you&#39;re running a version older than 18.09, please upgrade. My tests were run on 19.03 and 20.10.
&lt;/p&gt;

&lt;h4&gt;Developing the Dockerfile:&lt;/h4&gt;

&lt;h5&gt;Step 1: Specify the Dockerfile syntax&lt;/h5&gt;

&lt;p&gt;
At the top of your &lt;code&gt;Dockerfile&lt;/code&gt; (this has to be the absolute first line), add the following:
&lt;/p&gt;

&lt;pre&gt;
# syntax=docker/dockerfile:1
&lt;/pre&gt;

&lt;p&gt;
This tells &lt;code&gt;docker build&lt;/code&gt; to use the latest 1.x version of the Dockerfile syntax.
&lt;/p&gt;
&lt;p&gt;
There are various docs that specify using 1.2 or 1.0-experimental. These values were valid when the docs were written, but are dated at this point.  Specifying version &lt;code&gt;1&lt;/code&gt; tells &lt;code&gt;docker build&lt;/code&gt; to use whatever is latest on the 1.x tree, so you can still use 1.3, 1.4, etc.  Specifying 1.2 restricts it to the 1.2.x tree.
&lt;/p&gt;

&lt;h5&gt;Step 2: Mount a secret file where you need it&lt;/h5&gt;
&lt;p&gt;
At the &lt;code&gt;RUN&lt;/code&gt; command where you need a secret, &lt;code&gt;--mount&lt;/code&gt; it as follows:
&lt;/p&gt;
&lt;pre&gt;
RUN --mount=type=secret,id=mysecret,dst=/path/to/secret.key,uid=1000 your-command-here
&lt;/pre&gt;
&lt;p&gt;
There are a few things in here, which I&#39;ll explain one by one.
&lt;/p&gt;
&lt;dl&gt;
  &lt;dt&gt;type=secret&lt;/dt&gt;
  &lt;dd&gt;This tells docker that we&#39;re mounting a secret file from the host (as opposed to a directory or something else)&lt;/dd&gt;
  &lt;dt&gt;id=mysecret&lt;/dt&gt;
  &lt;dd&gt;This is any string you&#39;d like. It has to match the &lt;code&gt;id&lt;/code&gt; passed in on the &lt;code&gt;docker build&lt;/code&gt; command line&lt;/dd&gt;
  &lt;dt&gt;dst=/path/to/secret.key&lt;/dt&gt;
  &lt;dd&gt;This is where you&#39;d like the secret file to be accessible. Any file already at this location will be &lt;em&gt;temporarily hidden&lt;/em&gt; while the secret file is mounted, so it&#39;s safe to use a location that your code will expect at run time.&lt;/dd&gt;
  &lt;dt&gt;uid=1000&lt;/dt&gt;
  &lt;dd&gt;This is the userid that should own the file. This defaults to &lt;code&gt;0 (root)&lt;/code&gt;, so is useful if your command runs as a different user. You can also specify a &lt;code&gt;gid&lt;/code&gt;&lt;/dd&gt;
&lt;/dl&gt;
&lt;p&gt;
The full list of supported parameters for secret mounts is available at &lt;a href=&quot;https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/syntax.md#run---mounttypesecret&quot;&gt;the buildkit github page&lt;/a&gt;
&lt;/p&gt;
&lt;p&gt;
You can add the same &lt;code&gt;--mount&lt;/code&gt; at different locations in your &lt;code&gt;Dockerfile&lt;/code&gt;, and with different &lt;code&gt;dst&lt;/code&gt; and &lt;code&gt;uid&lt;/code&gt; values. The file is mounted only for the duration of that &lt;code&gt;RUN&lt;/code&gt; command and not persisted to any layers.
&lt;/p&gt;
&lt;h4&gt;Running &lt;code&gt;docker build&lt;/code&gt;&lt;/h4&gt;

&lt;h5&gt;Step 3: Set the environment variable&lt;/h5&gt;
&lt;p&gt;
This step is optional on newer versions of Docker.
&lt;/p&gt;
&lt;p&gt;
Once you&#39;re ready to run &lt;code&gt;docker build&lt;/code&gt;, tell &lt;code&gt;docker&lt;/code&gt; to use &lt;code&gt;BuildKit&lt;/code&gt;
&lt;/p&gt;
&lt;pre&gt;
DOCKER_BUILDKIT=1
&lt;/pre&gt;
&lt;p&gt;
You can either put this right before running the command, or export it into your shell.
&lt;/p&gt;
&lt;h5&gt;Step 4: Run &lt;code&gt;docker build&lt;/code&gt; with your secret file&lt;/h5&gt;

&lt;pre&gt;
docker build --secret id=mysecret,src=/full/path/to/secret.key .
&lt;/pre&gt;
&lt;p&gt;
It&#39;s important to note that tilde (&lt;code&gt;~&lt;/code&gt;) expansion does not work here. You can use an absolute or relative path, but you cannot use expansion.
&lt;/p&gt;
&lt;p&gt;
That&#39;s IT!!!
&lt;/p&gt;
&lt;h4&gt;Jenkins&lt;/h4&gt;
&lt;p&gt;
If you run your docker builds through jenkins, you&#39;ll need a few more steps. The bulk of it is documented in this &lt;a href=&quot;https://docs.cloudbees.com/docs/cloudbees-ci/latest/cloud-secure-guide/injecting-secrets&quot;&gt;Cloudbees-CI article about injecting secrets&lt;/a&gt;.
&lt;/p&gt;
&lt;p&gt;
Once you&#39;ve gotten your secret file into Jenkins, and bound it to an environment variable in your Build Environment, you have to update the &lt;code&gt;docker build&lt;/code&gt; command to use this variable instead.
&lt;/p&gt;
&lt;p&gt;
For example, if we bound the secret file to a variable called &lt;code&gt;MYSECRETFILE&lt;/code&gt;, then we&#39;d change our build command to:
&lt;/p&gt;
&lt;pre&gt;
docker build --secret id=mysecret,src=${MYSECRETFILE} .
&lt;/pre&gt;


&lt;h4&gt;References&lt;/h4&gt;
&lt;p&gt;
These links were very useful in figuring out this solution.
&lt;/p&gt;
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://pythonspeed.com/articles/docker-build-secrets/&quot;&gt;Don’t leak your Docker image’s build secrets &lt;em&gt;by Itamar Turner-Trauring&lt;/em&gt;&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://stackoverflow.com/questions/59143685/consume-secret-inside-dockerfile/59143954#59143954&quot;&gt;Slava Semushin on Stackoverflow&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://docs.docker.com/develop/develop-images/build_enhancements/#overriding-default-frontends&quot;&gt;Dockerfile documentation about secrets&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://github.com/moby/buildkit/blob/master/frontend/dockerfile/docs/syntax.md&quot;&gt;BuildKit specific syntax for Dockerfiles&lt;/a&gt; (these docs are not in the Dockerfile docs)&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://docs.cloudbees.com/docs/cloudbees-ci/latest/cloud-secure-guide/injecting-secrets&quot;&gt;Cloudbees-CI article about injecting secrets into Jenkins&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/6575775365216611195/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2021/08/safely-passing-secrets-to-run-command.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6575775365216611195'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6575775365216611195'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2021/08/safely-passing-secrets-to-run-command.html' title='Safely passing secrets to a RUN command in a Dockerfile'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-5655816262604601806</id><published>2021-02-17T00:00:00.003-05:00</published><updated>2021-02-17T00:00:36.646-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="bigsur"/><category scheme="http://www.blogger.com/atom/ns#" term="filesystem"/><category scheme="http://www.blogger.com/atom/ns#" term="macosx"/><category scheme="http://www.blogger.com/atom/ns#" term="unix"/><title type='text'>Recovering from Big Sur upgrade snafu</title><content type='html'>&lt;p&gt;
Apple recently pushed out a new release of MacOS called Big Sur. Unfortunately, the upgrade process is problematic.
Specifically, the upgrader does not check for the required disk space before starting the upgrade, and if the target system doesn&#39;t have enough disk space (35GB or so), then the upgrade fails partway through, leaving your system in a mostly unusable state.
&lt;/p&gt;
&lt;p&gt;
This is what happened to me.
&lt;/p&gt;
&lt;h4&gt;My environment&lt;/h4&gt;
&lt;ul&gt;
  &lt;li&gt;The system was a 13&quot; Macbook with a 128GB SSD drive. 128 is pretty small and doesn&#39;t leave much space for too many large items.&lt;/li&gt;
  &lt;li&gt;The system had just a single user.&lt;/li&gt;
  &lt;li&gt;At the start of the upgrade, the system had about 13GB of free disk space (&gt;10%).&lt;/li&gt;
  &lt;li&gt;Desktop, Documents and Photos were backed up to iCloud, but Downloads weren&#39;t, and some very large photos &amp;amp; videos had been removed from iCloud to save space there, so they only existed locally.&lt;/li&gt;
&lt;/ul&gt;

&lt;h4&gt;Prior discussion&lt;/h4&gt;
&lt;p&gt;
Mr. Macintosh has published &lt;a href=&quot;https://mrmacintosh.com/big-sur-upgrade-not-enough-hd-space-serious-issue-possible-data-loss/&quot;&gt;a very detailed explanation of the issue&lt;/a&gt; and various ways to get around it without any data loss. This is a very good article that got me very far in my investigation. I was lucky that the latest updates had been posted just a few hours before I hit the problem myself.
&lt;/p&gt;
&lt;p&gt;
Unfortunately, none of the suggested fixes worked for me.
&lt;/p&gt;
&lt;ol&gt;
  &lt;li&gt;I couldn&#39;t mount the drive in Target Disk Mode as my password wouldn&#39;t work (the password still worked when logging in locally, but that took me back to the upgrade loop).&lt;/li&gt;
  &lt;li&gt;I couldn&#39;t start up the system in Recovery Mode as it wanted a password, but again, wouldn&#39;t accept the password (the same password that worked when fully booting up).&lt;/li&gt;
  &lt;li&gt;I couldn&#39;t access the disk when booting from an external startup disk because of the same issue.&lt;/li&gt;
&lt;/ol&gt;
&lt;p&gt;
Many posts I found online seemed to suggest that a firmware password was required, but I&#39;d never set this up.
&lt;/p&gt;

&lt;h4&gt;Single User Mode&lt;/h4&gt;
&lt;p&gt;
Eventually, what showed the most promise was booting into Single User Mode and then fiddling around with all available disk devices.
&lt;/p&gt;

&lt;h5&gt;Password worked for Single User Mode&lt;/h5&gt;
&lt;ol&gt;
  &lt;li&gt;To start up in Single User Mode, press Cmd+S when starting up until the Apple logo shows up.&lt;/li&gt;
  &lt;li&gt;The system prompts you for a password, and my password did in fact work in this mode.&lt;/li&gt;
  &lt;li&gt;After signing in, you&#39;re dropped into a unix shell.&lt;/li&gt;
  &lt;li&gt;There&#39;s only a basic file system mounted, which contains a limited number of unix commands and none of your data&lt;/li&gt;
&lt;/ol&gt;

&lt;h5&gt;Mount the Data partition&lt;/h5&gt;
&lt;p&gt;
Once in single user mode, I had to mount my data partition. I first used the &lt;code&gt;mount&lt;/code&gt; command to see what was already mounted.
It showed that the only mounted device was &lt;code&gt;/dev/disk1s1&lt;/code&gt;. I assumed that my Data partition would be &lt;code&gt;/dev/disk1s2&lt;/code&gt; and that it would have the same filesystem, and I chose a convenient mount point:
&lt;/p&gt;
&lt;pre&gt;
# mount -t apfs /dev/disk1s2 /System/Volumes/Data
&lt;/pre&gt;
&lt;p&gt;
Miraculously, this did not ask me for a password, and mounted my Data partition. I was able to look through the files and identify potential targets to remove. I also noticed that the disk was no completely full (0 bytes free). This was due to the Big Sur installer, which took up 11GB, and then added a few files, using up the entire 13GB that I had available.
&lt;/p&gt;
&lt;p&gt;
Things were getting a little cumbersome here as most of the unix commands I needed to use were not on the primary partition, but on the mounted partition, so I added the appropriate folders to the unix &lt;code&gt;PATH&lt;/code&gt; environment variable:
&lt;/p&gt;
&lt;pre&gt;
PATH=&quot;$PATH&quot;:/System/Volumes/Data/usr/bin
&lt;/pre&gt;
&lt;p&gt;
I was starting to see that choosing a 3 level deep path as my mount point perhaps wasn&#39;t a great idea. I also learned that while the screen is quite wide, the terminal environment is set to show 80 columns of text, and goes into very weird line wrapping issues if you type past that. It&#39;s even worse if you try tab completion at this point.
&lt;/p&gt;

&lt;h5&gt;Transferring large files&lt;/h5&gt;
&lt;p&gt;
Some of the large files &amp;amp; folders I identified were downloaded packages that could be removed. Unfortunately this only got me 2GB back.
To get enough space back, I&#39;d have to remove some photos and videos that weren&#39;t stored on iCloud. I figured I&#39;d copy them over to an SD card and then could delete them.
&lt;/p&gt;
&lt;p&gt;
I popped in the SD Card, and the kernel displayed some debug messages on the terminal. It told me that the card was in &lt;code&gt;/dev/disk4&lt;/code&gt;, so I tried mounting that at a random empty directory:
&lt;/p&gt;
&lt;pre&gt;
# mount -t exfat /dev/disk4 /System/VM
&lt;/pre&gt;
&lt;p&gt;
This did not work!
&lt;/p&gt;

&lt;h5&gt;No SD Cards in Single User Mode&lt;/h5&gt;
&lt;p&gt;
By default, SD Cards are formatted with an EXFAT file system (the kind used by Windows and all digital cameras). Unfortunately, you cannot mount an EXFAT filesystem in Single User Mode as the &lt;code&gt;exfatfs&lt;/code&gt; driver isn&#39;t compiled into the kernel. It&#39;s loaded up as a dynamic module when required. This only works when booting in standard mode with a kernel that allows dynamic loading. Single User Mode does not.
&lt;/p&gt;

&lt;h5&gt;Reformat the SD Card&lt;/h5&gt;
&lt;p&gt;
This was a brand new SD Card, so I decided to reformat it as an Apple file system. I used a different Macbook to do this, however my first attempt didn&#39;t work. It isn&#39;t sufficient to just format the SD Card, you also need to partition it, and that&#39;s where the filesystem is created.
&lt;/p&gt;
&lt;p&gt;
I created a single APFS partition across the entire SD Card and then tried mounting it.
&lt;/p&gt;
&lt;p&gt;
Unfortunately, now it was no longer at &lt;code&gt;/dev/disk4&lt;/code&gt; even though that&#39;s what the kernel debug messages said. Looking at &lt;code&gt;/dev/disk*&lt;/code&gt; showed me that &lt;code&gt;/dev/disk5s1&lt;/code&gt; was a potential candidate.
&lt;/p&gt;
&lt;pre&gt;
# mount -t apfs /dev/disk5s1 /System/VM
&lt;/pre&gt;
&lt;p&gt;
Finally, this worked. I was able to copy my files over, and remove them from the Data partition. This freed up about 45GB, which allowed me to continue with the upgrade.
&lt;/p&gt;
&lt;p&gt;
After the upgrade completed, I appear to have 75GB free. I haven&#39;t had a chance to check where the space has changed. I also plan to permanently use the SD Card (256GB) as an external hard drive.
&lt;/p&gt;
</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/5655816262604601806/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2021/02/recovering-from-big-sur-upgrade-snafu.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/5655816262604601806'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/5655816262604601806'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2021/02/recovering-from-big-sur-upgrade-snafu.html' title='Recovering from Big Sur upgrade snafu'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total><georss:featurename>Cambridge, MA, USA</georss:featurename><georss:point>42.3736158 -71.10973349999999</georss:point><georss:box>14.063381963821158 -106.26598349999999 70.683849636178849 -35.95348349999999</georss:box></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-8396715189426318377</id><published>2020-11-18T10:44:00.001-05:00</published><updated>2021-02-16T23:14:25.529-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="emotion"/><category scheme="http://www.blogger.com/atom/ns#" term="perception"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="psychology"/><category scheme="http://www.blogger.com/atom/ns#" term="rum"/><category scheme="http://www.blogger.com/atom/ns#" term="ux"/><title type='text'>Understanding Emotion for Happy Users</title><content type='html'>&lt;h4 class=&quot;subtitle&quot;&gt;How does your site make your users feel?&lt;/h4&gt;
&lt;h4&gt;Introduction&lt;/h4&gt;
&lt;p&gt;
So you’ve come here for a post about performance, but here I am talking about emotion… what gives? I hope that if you haven’t already, then as this post progresses, you’ll see that performance and emotion are closely intertwined.
&lt;/p&gt;
&lt;p&gt;
While we may be web builders, our goal is to run a business that provides services or products to real people. The website we build is a means of connecting people to that service or product.
&lt;/p&gt;
&lt;h5&gt;The way things are…&lt;/h5&gt;
&lt;p&gt;
The art and science of measuring the &lt;a href=&quot;https://speakerdeck.com/bluesmoon/metrics-that-matter?slide=4&quot;&gt;effects of signal latency on real users is now about 250 years old&lt;/a&gt;. We now call this Real User Measurement, or RUM for short, and it’s come a long way since Steve Souders’ early work at Yahoo.
&lt;/p&gt;
&lt;p&gt;
Browsers now provide us with many APIs to fetch performance metrics that help site owners make sites faster. Concurrently, the Core Web Vitals initiative from Google helps identify metrics that most affect the user experience.
&lt;/p&gt;
&lt;p&gt;
These metrics, while useful operationally, don’t give us a clear picture of the user experience, or why we need to optimise them for our site in particular. They don’t answer the business or human questions of, “Why should we invest in web performance?” (v/s for example, a feature that customers really want), or even more specifically, “What should we work on first?”.
&lt;/p&gt;
&lt;p&gt;
Andy Davies recently &lt;a href=&quot;https://andydavies.me/blog/2020/10/12/strengthening-the-link-between-site-speed-and-business-outcomes/&quot;&gt;published a post&lt;/a&gt; about the link between site speed and business outcomes…
&lt;/p&gt;
&lt;blockquote class=&quot;person&quot; style=&quot;background-image: url(&#39;https://i1.bluesmoon.info/friends/AndyDavies2.jpg&#39;);&quot;&gt;
Context influences experience,&lt;br&gt;
Experience influences behaviour,&lt;br&gt;
Behaviour influences business outcomes.
&lt;/blockquote&gt;
&lt;p&gt;
All of the metrics we collect and optimise for deal with context, and we spend very little time measuring and optimising the rest of the flow.
&lt;/p&gt;
&lt;h5&gt;Switching Hats&lt;/h5&gt;
&lt;p&gt;
Over the last decade working on &lt;a href=&quot;https://github.com/akamai/boomerang&quot;&gt;boomerang&lt;/a&gt; and &lt;a href=&quot;http://www.akamai.com/mpulse&quot;&gt;mPulse&lt;/a&gt;, we slowly came to the realisation that we’ve been approaching performance metrics from a developer centric view. We’d been drawing on our experience as developers – users who have browser dev tools shortcuts committed to  muscle memory. We were measuring and optimising the metrics that were useful and easy to collect from a developer’s point of view.
&lt;/p&gt;
&lt;p&gt;
Once we switched hats to draw on our experiences as consumers of the web, the metrics that really matter became clearer. We started asking better questions...
&lt;/p&gt;
&lt;ul&gt;
  &lt;li&gt;What does it mean that performance improved by 100ms?&lt;/li&gt;
  &lt;li&gt;Are all 100ms the same?&lt;/li&gt;
  &lt;li&gt;Do all users perceive time the same way?&lt;/li&gt;
  &lt;li&gt;Is performance all that matters?&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;
In this post, we’ll talk about measuring user experience and its effects on behaviour, what we can infer from that behaviour, and how it affects business outcomes.
&lt;/p&gt;
&lt;h4&gt;Delight &amp;amp; Frustration&lt;/h4&gt;
&lt;p&gt;
In &lt;a href=&quot;https://books.google.com/books?id=_Hf_Jk44Et8C&amp;pg=PA91&amp;dq=user+frustration+behavior&quot;&gt;Group Psychology and the Analysis of Ego&lt;/a&gt;, Freud notes that &lt;em&gt;“Frustration occurs when there is an inhibiting condition that interferes with or stops the realization of a goal.”&lt;/em&gt;
&lt;/p&gt;
&lt;p&gt;
Users visit our sites to accomplish a goal. Perhaps they’re doing research to act on later, perhaps they want to buy something, perhaps they’re looking to share an article they read a few days ago.
&lt;/p&gt;
&lt;p&gt;
Anything that slows down or prevents the user from accomplishing this goal can cause frustration. On the other hand, making their goal easy to find and achieve can be delightful.
&lt;/p&gt;
&lt;p&gt;
How a user feels when using our site affects whether they’ll come back and “convert” into customers (however you may define convert).
&lt;/p&gt;
&lt;h5&gt;The Link Between Latency &amp;amp; Frustration&lt;/h5&gt;
&lt;p&gt;
In 2013, Tammy Everts and her team at Radware ran a &lt;a href=&quot;https://blog.radware.com/applicationdelivery/applicationaccelerationoptimization/2013/12/mobile-web-stress-the-impact-of-network-speed-on-emotional-engagement-and-brand-perception-report/&quot;&gt;usability lab experiment&lt;/a&gt;. The study hooked participants up to EEG devices, and asked them to shop on certain websites. Half the users had an artificial delay added to their browsing experience and neither group were made aware of the performance changes. They all believed they were testing the usability of the sites. The study showed that...
&lt;/p&gt;
&lt;blockquote class=&quot;person&quot; style=&quot;background-image: url(&#39;https://i1.bluesmoon.info/friends/TammyEverts.jpg&#39;);&quot;&gt;
A 500ms connection speed delay resulted in up to a 26% increase in peak frustration and up to an 8% decrease in engagement.
&lt;/blockquote&gt;
&lt;p&gt;
Similarly in 2015, Ericsson ConsumerLab neuro research &lt;a href=&quot;https://www.ericsson.com/en/press-releases/2016/2/streaming-delays-mentally-taxing-for-smartphone-users-ericsson-mobility-report&quot;&gt;studied the effects of delayed web pages on mobile users&lt;/a&gt; and found that &lt;em&gt;“Delayed web pages caused a 38% rise in mobile users&#39; heart rates — equivalent to the anxiety of watching a horror movie alone.”&lt;/em&gt;
&lt;/p&gt;
&lt;p&gt;
This may not be everyone’s cup of tea, and the real implication is that users make a conscious or unconscious decision on whether to stick around, return, or leave the site.
&lt;/p&gt;
&lt;h5&gt;Cognitive Bias&lt;/h5&gt;
&lt;p&gt;
Various cognitive biases affect how individual experiences affect perception and behaviour. Understanding these biases, and intervening when an experience tends negative can improve the overall experience.
&lt;/p&gt;
&lt;h6&gt;Perceptual Dissonance&lt;/h6&gt;
&lt;p&gt;
Also known as Sensory Dissonance, Perceptual Dissonance results from unexpected outcomes of common actions.
&lt;/p&gt;
&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/IMG_20160424_152738.jpg&quot; style=&quot;width:30%;float:right;margin-left:1em;&quot; alt=&quot;&quot; role=&quot;presentation&quot; loading=&quot;lazy&quot;&gt;
&lt;p&gt;
The brain’s predictive coding is what helps you do things like &lt;em&gt;“figure out if a car coming down the road is going slow enough for you to cross safely”&lt;/em&gt;. A perceptive violation of this coding is useful in that it helps us learn new things, but if that violation breaks long standing “truths”, or if violations are inconsistent, it makes learning impossible, and leads to psychological stress, and frustration.
&lt;/p&gt;
&lt;p&gt;
On the web, users expect websites to behave in a certain way. Links should be clickable, sites should in general scroll vertically, etc. Things like jank while scrolling, nothing happening when a user clicks a link (dead clicks), or a click target moving as the user attempts to click on it (layout shift) causes perceptual dissonance and frustration.
&lt;/p&gt;
&lt;p&gt;
If these bad experiences are consistent, then users come to expect them. Our data shows that users from geographies where the internet is slower than average tend to be more patient with web page loads.
&lt;/p&gt;
&lt;h6&gt;Survivorship Bias&lt;/h6&gt;
&lt;p&gt;
We only measure users who can reach our site. For some users, a very slow experience is better than an unreachable site.
&lt;/p&gt;
&lt;p&gt;
In 2012, after Youtube made their site lighter, &lt;a href=&quot;https://blog.chriszacharias.com/page-weight-matters&quot;&gt;Chris Zakariahs found that aggregate performance had gotten worse&lt;/a&gt;. On delving into the data, they found that new users who were previously unable to access the site were now coming in at the long tail. The site appeared slower in aggregate, but the number of users who could use it had gone up.
&lt;/p&gt;
&lt;h6&gt;Negativity Bias&lt;/h6&gt;
&lt;p&gt;
Users are more likely to remember and talk to their friends about their bad experiences with a site than they are about the good ones. We need only run a twitter search for “$BRAND_NAME slow” to see complaints about bad experiences.
&lt;/p&gt;
&lt;p&gt;
Bad experiences are also perceived to be far more intense than equivalent good experiences. To end up with a neutral overall experience, bad experiences need to be balanced with more intense good experiences. A single bad experience over the course of the session makes it harder to result in overall delight.
&lt;/p&gt;
&lt;h6&gt;Active Listening&lt;/h6&gt;
&lt;p&gt;
&lt;a href=&quot;https://affect.media.mit.edu/pdfs/02.klein-moon-picard.pdf&quot;&gt;Research shows that practicing Active Listening&lt;/a&gt; can have a &lt;a href=&quot;https://www.sciencedirect.com/science/article/abs/pii/S1071581904000060?via%3Dihub&quot;&gt;big impact on countering Negativity Bias&lt;/a&gt;. Simply acknowledging when you’ve screwed up and didn’t meet the user’s expectations can alleviate negative perception. If we detect, via JavaScript, that the page is taking too long to transition between loading states, we could perhaps display a message that acknowledges and apologizes for things going slower than expected.
&lt;/p&gt;
&lt;blockquote&gt;
Hey, we realise that it’s taking a little longer than expected to get to what you want. You deserve better. We’re sorry and hope you’ll stick around a bit.
&lt;/blockquote&gt;
&lt;p&gt;
Users will be more forgiving if their pain is acknowledged.
&lt;/p&gt;
&lt;h4&gt;Measuring Emotion&lt;/h4&gt;
&lt;p&gt;
There are many ways we could measure the emotional state of users using our site. These range from active engagement to completely creepy. Naturally not all of these will be applicable for websites...
&lt;/p&gt;
&lt;ul&gt;
  &lt;li&gt;Use affective computing (facial analysis, EEGs, pulse tracking, etc.)&lt;/li&gt;
  &lt;li&gt;Ask the user via a survey popover&lt;/li&gt;
  &lt;li&gt;Business outcomes of behaviour&lt;/li&gt;
  &lt;li&gt;Behavioural analysis&lt;/li&gt;
&lt;/ul&gt;

&lt;h6&gt;Affective Computing&lt;/h6&gt;
&lt;p&gt;
For website owners, affective computing isn’t really in play. Things like &lt;a href=&quot;https://webgazer.cs.brown.edu/&quot;&gt;eye tracking&lt;/a&gt;, &lt;a href=&quot;https://www.emotiv.com/&quot;&gt;wireless brain interfaces&lt;/a&gt;, and other &lt;a href=&quot;https://en.wikipedia.org/wiki/Affective_computing&quot;&gt;affective computing&lt;/a&gt; methodologies are too intrusive. They work well in a lab environment where users consent to this kind of tracking and can be hooked up to measurement devices. This is both inconvenient, and creepy to run on the web.
&lt;/p&gt;

&lt;h6&gt;Ask the user&lt;/h6&gt;
&lt;p&gt;
Asking the user can be effective as shown by a &lt;a href=&quot;https://twitter.com/WikiResearch/status/1241026254058532865&quot;&gt;recent study from Wikipedia&lt;/a&gt;. The study used a very simple Yes/No/No Comment style dialog with randomized order. They found that users’ perceived quality of experience is inversely proportional to median load time. A 4% temporary improvement to page load time resulted in an equally temporary 1% extra satisfied users.
&lt;/p&gt;
&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/wikipedia-perception.png&quot; style=&quot;width:90%;text-align:center;&quot;
     alt=&quot;Area chart of two timeseries: Median loadEventEnd, and Satisfaction Ratio (positive/total). Time axis covers 1 year from Oct 2019 to Oct 2020. More details in the text preceding this image.&quot;
     loading=&quot;lazy&quot;&gt;
&lt;p&gt;
This method requires active engagement by the user and suffers from &lt;a href=&quot;https://en.wikipedia.org/wiki/Selection_bias&quot;&gt;selection bias&lt;/a&gt; and the &lt;a href=&quot;https://en.wikipedia.org/wiki/Hawthorne_effect&quot;&gt;hawthorne effect&lt;/a&gt;.
&lt;/p&gt;
&lt;p&gt;
It’s hard to quantify what kinds of experiences would reduce the effects of selection bias and result in users choosing to answer the survey, or how you’d want to design the popover to increase self-selection.
&lt;/p&gt;
&lt;p&gt;
The Hawthorne effect, on the other hand, suggests that individuals change the way they react to stimuli if they know they’re being measured or observed.
&lt;/p&gt;

&lt;h6&gt;Business Outcomes&lt;/h6&gt;
&lt;p&gt;
Measuring business outcomes is necessary but it can be hard to identify what context resulted in an outcome. One needs to first understand the intermediate steps of experience and behaviour. Did a user bounce because the experience was bad, or did they just drop in to do some research and will return later to complete a purchase?
&lt;/p&gt;

&lt;h6&gt;Behavioural analysis&lt;/h6&gt;
&lt;p&gt;
Applying the results of lab based research to users actively using a website can help tie experience to behaviour. We first need to introduce some new terms that we’ll define in the paragraphs that follow.
&lt;/p&gt;
&lt;p&gt;
&lt;em&gt;Rage Clicks&lt;/em&gt;, &lt;em&gt;Wild Mouse&lt;/em&gt;, &lt;em&gt;Scrandom&lt;/em&gt;, and &lt;em&gt;Backtracking&lt;/em&gt; are behavioural signals we can use.  In conjunction with when in a page’s life cycle users typically expect different events to take place, they can paint a picture of user expectations and behaviour.
&lt;/p&gt;
&lt;p&gt;
Correlating these metrics with contextual metrics like Core Web Vitals on one hand, and business outcomes on the other can help us tell a more complete story of which performance metrics we should care about and why.
&lt;/p&gt;

&lt;h5&gt;Rage, Frustration &amp;amp; Confusion&lt;/h5&gt;
&lt;p&gt;
To measure Rage, Frustration &amp;amp; Confusion, we look at Rage Clicks, Wild Mouse and Backtracking.
&lt;/p&gt;

&lt;h6&gt;Rage Clicks&lt;/h6&gt;
&lt;p&gt;
Rage Clicks occur when users rapid-fire click on your site. It is the &lt;a href=&quot;https://www.psychologytoday.com/us/blog/hide-and-seek/201205/hell-yes-the-7-best-reasons-swearing&quot;&gt;digital equivalent of cursing to release frustration&lt;/a&gt;. We’ve probably all caught ourselves rage clicking at some point. Click once, nothing happens, click again, still nothing, and then on and on. This could be a result of interaction delays, or of users expecting something to be clickable when it isn&#39;t.
&lt;/p&gt;
&lt;p&gt;
Rage clicks can be measured easily and non-intrusively, and are easy to analyse.
&lt;/p&gt;
&lt;p&gt;
&lt;a href=&quot;https://www.fullstory.com/resources/guide-to-understanding-frustrating-user-experiences-online/&quot;&gt;Fullstory has some great resources around Rage Clicks&lt;/a&gt;.
&lt;/p&gt;

&lt;h6&gt;Wild Mouse&lt;/h6&gt;
&lt;p&gt;
&lt;a href=&quot;https://www.telegraph.co.uk/technology/news/12050481/Websites-could-read-emotions-by-seeing-how-fast-you-move-your-mouse.html&quot;&gt;Research shows that&lt;/a&gt; people who are angry are more likely to use the mouse in a jerky and sudden, but surprisingly slow fashion.
&lt;/p&gt;
&lt;p&gt;
People who feel frustrated, confused or sad are less precise in their mouse movements and move it at different speeds.
&lt;/p&gt;
&lt;p&gt;
&lt;a href=&quot;https://www.academia.edu/3085041/Patterns_of_cursor_movement_for_different_devices&quot;&gt;There are several expected mouse movements while a user traverses a website&lt;/a&gt;. Horizontal and vertical reading patterns are expected and suggest that the user is engaged in your content.
&lt;/p&gt;
&lt;p&gt;
On the other hand, random patterns, or jumping between options in a form can suggest confusion, doubt, and frustration.
See &lt;a href=&quot;https://www.academia.edu/3085041/Patterns_of_cursor_movement_for_different_devices&quot;&gt;Churruca, 2011&lt;/a&gt; for the full study.
&lt;/p&gt;
&lt;p&gt;
The JavaScript library &lt;a href=&quot;https://github.com/miguel-perez/Dawdle.js/&quot;&gt;Dawdle.js&lt;/a&gt; can help classify these mouse patterns.
&lt;/p&gt;

&lt;h6&gt;Scrandom&lt;/h6&gt;
&lt;p&gt;
&lt;a href=&quot;https://cxl.com/blog/user-frustration/#finding-a-needle-in-the-haystack&quot;&gt;Scrandom&lt;/a&gt; is the act of randomly scrolling the page up and down with no particular scroll target. This can indicate that a user is unsure of the content, the page is too long, or is waiting for something to happen and making sure that the page is still responsive without accidentally clicking anything.
&lt;/p&gt;

&lt;h6&gt;Backtracking&lt;/h6&gt;
&lt;p&gt;
Backtracking is the process of hitting the back button on the web. Users who are confused or lost on your site may hit the back button often to get back to a safe space. This behaviour may manifest itself in different ways, but can often be identified with very long sessions that appear to loop.
&lt;/p&gt;

&lt;h5&gt;Tie this into the Page Load Timeline&lt;/h5&gt;
&lt;p&gt;
In his post on &lt;a href=&quot;https://addyosmani.com/blog/usability/&quot;&gt;Web Page Usability&lt;/a&gt;, &lt;a href=&quot;https://twitter.com/addyosmani&quot;&gt;Addy Osmani&lt;/a&gt; states that loading a page is a progressive journey with four key moments to it: Is it happening? Is it useful? Is it usable? and Is it delightful? And he includes this handy graphic to explain it:
&lt;/p&gt;
&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/pw-metrics-strip.png&quot; style=&quot;width:90%;text-align:center;&quot;
     alt=&quot;When did the user feel they could interact? When could they interact? Speed metrics illustrate First Paint, First Contentful Paint, Time to Interactive for a page&quot;
     loading=&quot;lazy&quot;&gt;

&lt;p&gt;
The first three are fairly objective. With only minor differences between browsers, it’s straightforward to pull this information out of standard APIs, and possibly supplement it with custom APIs like User Timing.
&lt;/p&gt;
&lt;p&gt;
We’ve found that over 65% of users expect a site to be usable after elements have started becoming visible but before it is actually Interactive. Contrast that with 30% who will wait until after the onload event has fired.
&lt;/p&gt;

&lt;h5&gt;Correlating Rage with Loading Events&lt;/h5&gt;
&lt;p&gt;
Comparing the points in time when users rage click with the loading timeline above, we see some patterns.
&lt;/p&gt;
&lt;figure&gt;
&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/rage-vs-load-events.png&quot; style=&quot;width:90%;text-align:center;&quot;
     alt=&quot;Relative time series showing the intensity of rage clicks tied to when users first interact with a page relative to page load. We also include the First Input Delay as a separate series, and show 25th-75th percentile bands for the First Paint, Largest Contentful Paint, Visually Ready, and Interactive times relative to Page Load.&quot;
     loading=&quot;lazy&quot;&gt;
&lt;figcaption&gt;
The horizontal axis on this chart is time as a relative percent of the full page load time. -50 indicates half of the page load time while +50 is 1.5x the page load time. The vertical axis indicates intensity of rage while point radius indicates probability of rage clicks at that time point. The coloured bars indicate 25th to 75th percentile ranges for the particular timer relative to full page load with the line going through indicating the median.
&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;p&gt;
We see a large amount of rage between content becoming visible and the page becoming interactive. Users expect to be able to interact with the page soon after content becomes visible, and if that expectation isn’t met, it results in rage clicking.
&lt;/p&gt;
&lt;p&gt;
We also see a small stream of rage clicks after the page has completed loading, caused by interaction delays.
&lt;/p&gt;
&lt;p&gt;
There’s a small gap just before the onload event fires. The onload event is when many JavaScript event handlers run, which in turn result in Long Tasks, and increased Interaction Delays. What we’re seeing here is not the absence of any interaction, but survivorship bias where the interactions that happen at that time aren’t captured until later.
&lt;/p&gt;
&lt;p&gt;
The horizontal axis on this chart is relative time along the page load timeline. We looked at various combinations of absolute and relative time across multiple timers, and it was clear that relativity is a stronger model, which brings us to a new metric based on relative timers...
&lt;/p&gt;

&lt;h4&gt;Frustration Index&lt;/h4&gt;
&lt;p&gt;
The &lt;a href=&quot;https://www.frustrationindex.com/&quot;&gt;frustration index&lt;/a&gt;, developed by &lt;a href=&quot;https://www.twitter.com/TimVereecke&quot;&gt;Tim Vereecke&lt;/a&gt;, is a measure based on the relation between loading phases. We’ve seen that once one event occurs, users expect the next to happen within a certain amount of time. If we miss that expectation, the user&#39;s perception is that something is stopping or inhibiting their ability to complete their task, resulting in frustration.
&lt;/p&gt;
&lt;p&gt;
The Frustration Index encapsulates that relationship. The formula we use is constantly under development as research brings new things to light, but it’s helpful to visit the website to understand exactly how it works and see some examples.
&lt;/p&gt;
&lt;p&gt;
So how do we know that this is a good metric to study?
&lt;/p&gt;

&lt;h5&gt;Correlating Rage &amp;amp; Frustration&lt;/h5&gt;
&lt;p&gt;
It turns out that there is a strong correlation (ρ=0.91) between the intensity of rage (vertical axis) that a user expresses and the calculated frustration index (horizontal axis) of the page.
&lt;/p&gt;
&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/frustration-vs-rage.png&quot; style=&quot;width:90%;text-align:center;&quot;
     alt=&quot;Scatter Plot showing Frustration Index on the horizontal axis and intensity of rage clicks on the vertical axis. The two variables have a pearson&#39;s correlation coefficient of 0.91.&quot;
     loading=&quot;lazy&quot;&gt;
&lt;p&gt;
Rather than looking at individual timers for optimization, it is better to consider all timers in cohesion. Improving one of them changes the user’s expectation of when other events should happen and missing that expectation results in frustration.
&lt;/p&gt;
&lt;p&gt;
However, further to this, the formula is something we can apply client-side to determine if we’re meeting expectations, and practice active listening if we’re not.
&lt;/p&gt;

&lt;h5&gt;Correlating Frustration &amp;amp; Business Outcomes&lt;/h5&gt;
&lt;p&gt;
Looking at the correlation between Frustration Index and certain business metrics also shows a pattern.
&lt;/p&gt;
&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/frustration-vs-business.png&quot; style=&quot;width:90%;text-align:center;&quot;
     alt=&quot;Double Scatter Plot showing Frustration Index on the horizontal axis and bounce rate on the first vertical axis and average session duration in minutes on the second.&quot;
     loading=&quot;lazy&quot;&gt;
&lt;ul&gt;
  &lt;li&gt;Bounce Rate is proportional to the frustration index with a sharp incline around what we call the LD50 point (for this particular site). ρ&lt;sub&gt;b&lt;/sub&gt;=0.65&lt;/li&gt;
  &lt;li&gt;Average Time spent on the site goes down as frustration increases, again sharply at first and then tapering off. ρ&lt;sub&gt;t&lt;/sub&gt;=-0.49&lt;/li&gt;
&lt;/ul&gt;

&lt;h6&gt;LD&lt;sub&gt;50&lt;/sub&gt;&lt;/h6&gt;
&lt;p&gt;
The LD&lt;sub&gt;50&lt;/sub&gt;, or Median Lethal Dose is a term borrowed from the biological sciences. &lt;a href=&quot;https://www.slideshare.net/buddybrewer/tying-web-performance-data-to-human-behavior&quot;&gt;Buddy Brewer first applied the term to web performance&lt;/a&gt; in 2012, and we’ve been using it ever since.
&lt;/p&gt;
&lt;p&gt;
In biology, it’s the dosage of a toxin that kills off 50% of the sample, be it tumour cells, or mice.
&lt;/p&gt;
&lt;p&gt;
On the web, we think of it more in terms of when 50% of users decide not to move on in their journey. We could apply it to bounce rate, or retention rate, or any other rate that’s important to your site, and the “dose”, may be a timer value, or frustration index, or anything else. Depending on the range of the metric in question, we may also use a percentile other than the median, for example, LD&lt;sub&gt;25&lt;/sub&gt; or LD&lt;sub&gt;75&lt;/sub&gt;.
&lt;/p&gt;
&lt;p&gt;
This isn’t a single magic number that works for all websites. It isn’t even a single number that works for all pages on a site or for all users. Different pages and sites have different levels of importance to a user, and a user’s emotional state, or even the state of their device (eg: low battery), when they visit your site can affect how patient they are.
&lt;/p&gt;

&lt;img src=&quot;https://i1.bluesmoon.info/posts/emotion/LD25-frustration-by-country.png&quot; style=&quot;width:50%;box-shadow:none;margin-left:1em;float:right;&quot;
     alt=&quot;Column chart showing the LD25 frustration index value for users from different Geos: US:26, Germany:10, Japan:18, Australia:42, Canada:44.&quot;
     loading=&quot;lazy&quot;&gt;
&lt;h5&gt;Patience is also a Cultural Thing&lt;/h5&gt;
&lt;p&gt;
People from different parts of the world have a different threshold for frustration.
&lt;/p&gt;
&lt;p&gt;
Many of our customers have international audiences and they have separate sites customized for each locale. We find that users from different global regions have different expectations of how fast a site should be.
&lt;/p&gt;
&lt;p&gt;
In this chart, looking at 5 high GDP countries (that we have data for), we see a wide distribution in LD&lt;sub&gt;25&lt;/sub&gt; value across them, ranging from a value of 10 for Germany to the 40s for Australia and Canada. It’s not shown in this chart, but the difference is even wider when we look at LD&lt;sub&gt;50&lt;/sub&gt;, with Germany at 14 and Canada at 100.
&lt;/p&gt;

&lt;h4&gt;So how fast should our site be?&lt;/h4&gt;
&lt;p&gt;
We’ve heard a lot about how our site’s performance affects the user experience, and consequently how people feel when using our site. We’ve seen how the “feel” of a site can affect the business, but what does all of that tell us about how to build our sites?
&lt;/p&gt;
&lt;ul&gt;
  &lt;li&gt;How fast should we be to reduce frustration?&lt;/li&gt;
  &lt;li&gt;What should we be considering in our performance budgets?&lt;/li&gt;
  &lt;li&gt;How do we leave our users feeling happy?&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;
I think these may be secondary questions…
&lt;/p&gt;
&lt;p&gt;
A better question to start with, is:
&lt;/p&gt;
&lt;p style=&quot;font-size: 3em; font-style: italic; font-family: Garamond, serif; text-align: center; color: #555;margin:2em 1em;&quot;&gt;
  Will adding a new feature &lt;em style=&quot;color:#e91c63;&quot;&gt;delight&lt;/em&gt; or &lt;em style=&quot;color:#4170a0&quot;&gt;frustrate&lt;/em&gt; the user?
&lt;/p&gt;


&lt;h4&gt;References&lt;/h4&gt;
&lt;ul&gt;
  &lt;li&gt;&lt;a href=&quot;https://en.wikipedia.org/wiki/Computer_rage&quot;&gt;Computer Rage on Wikipedia&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.psychologyinaction.org/psychology-in-action-1/2015/12/27/the-psychology-of-computer-rage&quot;&gt;The Psychology of Computer Rage&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.bulldogreporter.com/over-a-third-of-americans-confess-to-verbal-or-physical-abuse-of-their-computers-ne/&quot;&gt;A third of Americans confess to verbal or physical abuse of their computers&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.telegraph.co.uk/technology/5086091/Computer-rage-affects-more-than-half-of-Britons.html&quot;&gt;Computer Rage affects more than half of Britons&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://books.google.com/books?id=_Hf_Jk44Et8C&amp;pg=PA91&amp;dq=user+frustration+behavior&quot;&gt;Social and Psychological Influences on Computer User Frustration&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://blog.radware.com/applicationdelivery/applicationaccelerationoptimization/2013/12/mobile-web-stress-the-impact-of-network-speed-on-emotional-engagement-and-brand-perception-report/&quot;&gt;The impact of network speed on emotional engagement&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.ericsson.com/en/press-releases/2016/2/streaming-delays-mentally-taxing-for-smartphone-users-ericsson-mobility-report&quot;&gt;Ericsson ConsumerLab neuro research 2015&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://en.wikipedia.org/wiki/Negativity_bias&quot;&gt;Negativity Bias on Wikipedia&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://uxdesign.cc/the-fastest-way-to-pinpoint-frustrating-user-experiences-1f8b95bc94aa&quot;&gt;The fastest way to pinpoint frustrating user experiences&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.psychologytoday.com/us/blog/hide-and-seek/201205/hell-yes-the-7-best-reasons-swearing&quot;&gt;The 7 best reasons for swearing&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.interaction-design.org/literature/topics/emotional-design&quot;&gt;Emotional Design&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.frustrationindex.com/&#39;&quot;&gt;Frustration Index&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://blog.chriszacharias.com/page-weight-matters&quot;&gt;Page Weight Matters&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;http://alistapart.com/article/improving-ux-through-front-end-performance/&quot;&gt;Improving UX through Front End Performance&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.fullstory.com/resources/guide-to-understanding-frustrating-user-experiences-online/&quot;&gt;Guide to understanding frustrating user experiences online&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://affect.media.mit.edu/pdfs/02.klein-moon-picard.pdf&quot;&gt;This computer responds to user frustration: Theory, design &amp;amp; results. [PDF]&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://doi.org/10.1016/j.ijhcs.2004.01.002&quot;&gt;Toward a more civilized design: studying the effects of computers that apologize&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.telegraph.co.uk/technology/news/12050481/Websites-could-read-emotions-by-seeing-how-fast-you-move-your-mouse.html&quot;&gt;Websites could read emotions by seeing how fast you move your mouse&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://conversionxl.com/blog/user-frustration/&quot;&gt;Your users are frustrated&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.academia.edu/3085041/Patterns_of_cursor_movement_for_different_devices&quot;&gt;Patterns of cursor movement for different devices [PDF]&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://nonsns.github.io/paper/rossi19www.pdf&quot;&gt;Wikipedia Paper on User Satisfaction v/s Performance&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.mediawiki.org/wiki/Wikimedia_Performance_Team/Perceived_Performance&quot;&gt;Wikimedia Performance Team: Perceived Performance&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://andydavies.me/blog/2020/10/12/strengthening-the-link-between-site-speed-and-business-outcomes/&quot;&gt;Strengthening The Link between Site Speed and Business Outcomes&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://addyosmani.com/blog/usability/&quot;&gt;Web page usability matters&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://en.wikipedia.org/wiki/Median_lethal_dose&quot;&gt;Median Lethal Dose or LD&lt;sub&gt;50&lt;/sub&gt;&lt;/a&gt;&lt;/li&gt;
  &lt;li&gt;&lt;a href=&quot;https://www.slideshare.net/buddybrewer/tying-web-performance-data-to-human-behavior&quot;&gt;Tying web performance data to human behavior&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h4&gt;Acknowledgements&lt;/h4&gt;
&lt;p&gt;
Thanks to Andy Davies, Nic Jansma, Paul Calvano, Tim Vereecke, and Cliff Crocker for feedback on an earlier draft of this post.
&lt;/p&gt;
&lt;p&gt;
Thanks also to the innumerable practitioners whose research I&#39;ve built upon to get here including Addy Osmani, Andy Davies, Gilles Dubuc, Lara Hogan, Nicole Sullivan, Silvana Churruca, Simon Hearne, Tammy Everts, Tim Kadlec, Tim Vereecke, the folks from Fullstory, and many others that I&#39;m sure I&#39;ve missed.
&lt;/p&gt;


</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/8396715189426318377/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2020/11/understanding-emotion-for-happy-users.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/8396715189426318377'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/8396715189426318377'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2020/11/understanding-emotion-for-happy-users.html' title='Understanding Emotion for Happy Users'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total><georss:featurename>Cambridge, MA, USA</georss:featurename><georss:point>42.3736158 -71.10973349999999</georss:point><georss:box>14.063381963821158 -106.26598349999999 70.683849636178849 -35.95348349999999</georss:box></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-6914554194494142649</id><published>2019-10-07T12:00:00.000-04:00</published><updated>2019-10-07T12:00:06.461-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="correlation"/><category scheme="http://www.blogger.com/atom/ns#" term="database"/><category scheme="http://www.blogger.com/atom/ns#" term="SQL"/><category scheme="http://www.blogger.com/atom/ns#" term="statistics"/><title type='text'>Implementing Spearman&#39;s Rank Correlation in SQL</title><content type='html'>&lt;p&gt;
In my last post, I showed &lt;a href=&quot;https://tech.bluesmoon.info/2019/10/implementing-corr-as-database-window.html&quot;&gt;how to implement
Pearson&#39;s Correlation as an SQL Window function with window frame support&lt;/a&gt;. In this post, I&#39;ll follow up with implementing
&lt;a href=&quot;https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient&quot;&gt;Spearman&#39;s Rank correlation co-efficient&lt;/a&gt; in
SQL.
&lt;/p&gt;
&lt;p&gt;
While Pearson&#39;s correlation looks for linear relationships between two vectors (ie, you wouldn&#39;t use it for exponential relationships),
Spearman&#39;s rank correlation looks for monotonicity, or in plain english, do the two values go up &amp; down together?
&lt;/p&gt;
&lt;p&gt;
So here&#39;s the really cool part.  Spearman&#39;s Rank correlation co-efficient is the Pearson&#39;s correlation co-efficient of the ranks of the
two vectors. We already know how to calculate Pearson&#39;s correlation co-efficient, so what we need to do here is first calculate ranks
of our vectors.
&lt;/p&gt;
&lt;p&gt;
We can do this using the SQL &lt;code&gt;RANK&lt;/code&gt; function, which also works as a window function with window frame support:
&lt;/p&gt;
&lt;pre&gt;
RANK() OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY x ASC) as R_X,

RANK() OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY y ASC) as R_Y,
&lt;/pre&gt;
&lt;p&gt;
The two important things to note here are that &lt;code&gt;RANK()&lt;/code&gt; does not take a parameter, instead you specify what you want to rank on
in the &lt;code&gt;ORDER BY&lt;/code&gt; clause, and secondly, make sure both parameters are ordered in the same direction, &lt;code&gt;ASC&lt;/code&gt; or
&lt;code&gt;DESC&lt;/code&gt;.
&lt;/p&gt;
&lt;p&gt;
Now even though the &lt;code&gt;RANK()&lt;/code&gt; function supports window frames, you don&#39;t want to use them here.  This is so because if you&#39;re
using sliding windows, each row will have a different rank depending on the window, and we won&#39;t be able to correlate an outer window.
&lt;/p&gt;
&lt;p&gt;
Once we have the ranks in an inner query, we can run either the standard &lt;code&gt;CORR&lt;/code&gt; function, or the windowed &lt;code&gt;CORR&lt;/code&gt;
that we developed in &lt;a href=&quot;https://tech.bluesmoon.info/2019/10/implementing-corr-as-database-window.html&quot;&gt;the previous post&lt;/a&gt; on these
derived columns instead:
&lt;/p&gt;
&lt;pre&gt;
SELECT CORR(R_X, R_Y) FROM (
    SELECT
        RANK() OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY x ASC) as R_X,

        RANK() OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY y ASC) as R_Y
      FROM ...
)
&lt;/pre&gt;
&lt;p&gt;
If implementing this as a window function, then use &lt;code&gt;R_X&lt;/code&gt; and &lt;code&gt;R_Y&lt;/code&gt; as the inputs to the &lt;code&gt;SUM()&lt;/code&gt; functions
with an additional nested query.
&lt;/p&gt;
&lt;p&gt;
I hope this was helpful, leave a comment or tweet &lt;a href=&quot;https://twitter.com/bluesmoon&quot;&gt;@bluesmoon&lt;/a&gt; if you&#39;d like to chat.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/6914554194494142649/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2019/10/implementing-spearmans-rank-correlation.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6914554194494142649'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6914554194494142649'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2019/10/implementing-spearmans-rank-correlation.html' title='Implementing Spearman&#39;s Rank Correlation in SQL'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-3450182262458030296</id><published>2019-10-02T13:56:00.000-04:00</published><updated>2019-10-07T12:56:15.243-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="database"/><category scheme="http://www.blogger.com/atom/ns#" term="SQL"/><category scheme="http://www.blogger.com/atom/ns#" term="statistics"/><title type='text'>Implementing Pearson&#39;s CORR as a database window function</title><content type='html'>&lt;p&gt;
I recently needed to find the &lt;a href=&quot;https://en.wikipedia.org/wiki/Pearson_correlation_coefficient&quot;&gt;Pearson&#39;s Correlation Coefficient&lt;/a&gt; between two columns in a
Snowflake database. Now Snowflake and PostgreSQL both support a &lt;code&gt;CORR&lt;/code&gt; aggregate function that correlates along a &lt;code&gt;GROUP BY&lt;/code&gt;.
Snowflake additionally supports &lt;code&gt;CORR&lt;/code&gt; as a window function, but only for use with &lt;code&gt;PARTITION BY&lt;/code&gt;. It does not support window frames.
Other databases like MySQL do not have a CORR function at all.
See &lt;a href=&quot;https://www.oreilly.com/library/view/sql-in-a/9780596155322/re56.html&quot;&gt;SQL in a nutshell&lt;/a&gt; for more information on database support.
&lt;/p&gt;
&lt;p&gt;
If you want to know about window functions, &lt;a href=&quot;https://twitter.com/b0rk&quot;&gt;Julia Evans (@b0rk)&lt;/a&gt; has a great &lt;a href=&quot;https://twitter.com/b0rk/status/1179419244808851462&quot;&gt;SQL tip on them&lt;/a&gt;.
&lt;/p&gt;
&lt;p&gt;
In my case, I needed to find the Pearson&#39;s coefficient along a sliding window of rows.  ie, for a list of &lt;code&gt;N&lt;/code&gt; rows, I needed &lt;code&gt;N-k&lt;/code&gt;
coefficients, one for each sliding window of size &lt;code&gt;k&lt;/code&gt;. As far as I could tell, only Oracle supports this functionality, and I wasn&#39;t that desperate,
so I set about figuring out how to implement it myself.
&lt;/p&gt;
&lt;p&gt;
Fortunately, Pearson&#39;s correlation coefficient is calculated using a very simple algebraic function. The full details are on the Wikipedia page linked above,
but it&#39;s helpful to break it down into manageable pieces.
&lt;/p&gt;
&lt;p&gt;
At a high level, the coefficient &lt;code&gt;ρ&lt;/code&gt; (the greek letter rho) is defined as the covariance of the vectors divided by the product of standard deviation
of the two vectors, or mathematically:
&lt;/p&gt;
&lt;p&gt;
&lt;code&gt;ρ(x,y) = cov(x, y) / (σ(x) * σ(y))&lt;/code&gt;
&lt;/p&gt;
&lt;p&gt;
In SQL, this would be
&lt;/p&gt;
&lt;p&gt;
&lt;code&gt;COVAR_POP(y, x) / (STDDEV_POP(x) * STDDEV_POP(y))&lt;/code&gt;
&lt;/p&gt;
&lt;p&gt;
This simplifies things a bit since &lt;code&gt;STDDEV_POP&lt;/code&gt; does support window frames, but &lt;code&gt;COVAR_POP&lt;/code&gt; does not. That reduces our problem to implementing
&lt;code&gt;COVAR_POP&lt;/code&gt; as a window function.
&lt;/p&gt;
&lt;p&gt;
This is much simpler, because the covariance uses sum and count, both of which are implemented as window functions with window frame support:
&lt;/p&gt;
&lt;p&gt;
&lt;code&gt;COVAR_POP = (SUM(x * y) - SUM(x) * SUM(y) / COUNT(*)) / COUNT(*)&lt;/code&gt;, or mathematically:
&lt;/p&gt;
&lt;p&gt;
&lt;code&gt;cov(x, y) = (Σ(x * y) - Σx * Σy / N) / N&lt;/code&gt;
&lt;/p&gt;
&lt;p&gt;
But it gets even better. Since we&#39;re calculating these SUMs and COUNTs anyway, why not use them to implement STDDEV as well?  A simplified formula for STDDEV uses the
the sum of squares and the square of the sum as follows:
&lt;/p&gt;
&lt;p&gt;
&lt;code&gt;σ = SQRT(N * Σx^2 - (Σx)^2) / N&lt;/code&gt;.
&lt;/p&gt;
&lt;p&gt;
Combining the formulae above, we get:
&lt;/p&gt;
&lt;pre&gt;
ρ(x,y) = cov(x, y) / (σ(x) * σ(y))

       = ( &lt;span style=&quot;padding:2px;border:dotted 1px red;&quot;&gt;(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;Σ(x * y) - Σx * Σy / N&lt;/span&gt;) / N&lt;/span&gt; ) / (     &lt;span style=&quot;padding:2px;border:dotted 1px red;&quot;&gt;(SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σx^2 - (Σx)^2&lt;/span&gt;) / N) * (SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σy^2 - (Σy)^2&lt;/span&gt;) / N)&lt;/span&gt; )

       =     (&lt;span style=&quot;background:#ffe2cc;&quot;&gt;Σ(x * y) - Σx * Σy / N&lt;/span&gt;)      / ( N * &lt;span style=&quot;padding:2px;border:dotted 1px red;&quot;&gt;(SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σx^2 - (Σx)^2&lt;/span&gt;) / N) * (SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σy^2 - (Σy)^2&lt;/span&gt;) / N)&lt;/span&gt; )

       =     (&lt;span style=&quot;background:#ffe2cc;&quot;&gt;Σ(x * y) - Σx * Σy / N&lt;/span&gt;)      / (     &lt;span style=&quot;padding:2px;border:dotted 1px red;&quot;&gt;SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σx^2 - (Σx)^2&lt;/span&gt;)       * SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σy^2 - (Σy)^2&lt;/span&gt;) / N&lt;/span&gt; )

       = N * (&lt;span style=&quot;background:#ffe2cc;&quot;&gt;Σ(x * y) - Σx * Σy / N&lt;/span&gt;)      / (     SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σx^2 - (Σx)^2&lt;/span&gt;)       * SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σy^2 - (Σy)^2&lt;/span&gt;) )

       =     (&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σ(x * y) - Σx * Σy&lt;/span&gt;)      / (     SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σx^2 - (Σx)^2&lt;/span&gt;)       * SQRT(&lt;span style=&quot;background:#ffe2cc;&quot;&gt;N * Σy^2 - (Σy)^2&lt;/span&gt;) )
&lt;/pre&gt;
&lt;p&gt;
I&#39;ve left the product of the two squareroots in the denominator as-is rather than simplifying it further because the simplifaction could result in numeric overflow.
&lt;/p&gt;
&lt;p&gt;
So, we now have a function for Pearson&#39;s correlation coefficient using only SUM &amp; COUNT, both of which support window functions and window frames.
&lt;/p&gt;
&lt;p&gt;
For each of these, we can now SELECT something like this in an inner query:
&lt;/p&gt;
&lt;pre&gt;
COUNT( * )   OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY &amp;lt;minute&amp;gt; ROWS BETWEEN $k2 PRECEDING AND $k2 FOLLOWING) AS N,

SUM(x)       OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY &amp;lt;minute&amp;gt; ROWS BETWEEN $k2 PRECEDING AND $k2 FOLLOWING) AS SUM_X,

SUM(x * x)   OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY &amp;lt;minute&amp;gt; ROWS BETWEEN $k2 PRECEDING AND $k2 FOLLOWING) AS SUM2_X,

SUM(y)       OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY &amp;lt;minute&amp;gt; ROWS BETWEEN $k2 PRECEDING AND $k2 FOLLOWING) AS SUM_Y,

SUM(y * y)   OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY &amp;lt;minute&amp;gt; ROWS BETWEEN $k2 PRECEDING AND $k2 FOLLOWING) AS SUM2_Y,

SUM(x * y)   OVER (PARTITION BY &amp;lt;partition cols&amp;gt; ORDER BY &amp;lt;minute&amp;gt; ROWS BETWEEN $k2 PRECEDING AND $k2 FOLLOWING) AS SUM_XY,
&lt;/pre&gt;
&lt;p&gt;
&lt;code&gt;$k2&lt;/code&gt; is half the sliding window size, so if you wanted a window of 60 elements, k2 would be 30. The &lt;code&gt;ROWS BETWEEN r1 PRECEDING AND r2 FOLLOWING&lt;/code&gt; syntax specifies a window of rows extending at most r1 rows before the current row, and at most r2 rows beyond the current row.

We follow that with an outer query that SELECTs this:
&lt;/p&gt;
&lt;pre&gt;
x,
y,
CASE
    WHEN N * SUM2_X &amp;gt; SUM_X * SUM_X AND N * SUM2_Y &amp;gt; SUM_Y * SUM_Y
    THEN (N * SUM_XY - SUM_X * SUM_Y) / (SQRT(N * SUM2_X - SUM_X * SUM_X) * SQRT(N * SUM2_Y - SUM_Y * SUM_Y))
    ELSE 0.0
END AS corr_yx
&lt;/pre&gt;
&lt;p&gt;
And there we have it... Pearson&#39;s correlation coefficient implemented as a window function with window frame support.
&lt;/p&gt;
&lt;p&gt;
With the above combination, we can get a Pearson&#39;s correlation for each &lt;code&gt;(x, y)&lt;/code&gt; tuple in the table that correlates a sliding window of data. In my case this was a timeseries database, so for each minute of data, I get a correlation co-efficient of (at most) 61 minutes around that minute (at most 30 before and at most 30 after).
&lt;/p&gt;
&lt;p&gt;
We can still use the &lt;code&gt;CORR&lt;/code&gt; aggregate function on the entire list of &lt;code&gt;(x, y)&lt;/code&gt; tuples, or post calculate that in a language like Julia.
&lt;/p&gt;
&lt;p&gt;
For the next installment, maybe &lt;a href=&quot;https://tech.bluesmoon.info/2019/10/implementing-spearmans-rank-correlation.html&quot;&gt;I&#39;ll write up how to&lt;/a&gt; do &lt;a href=&quot;https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient&quot;&gt;Spearman&#39;s Rank Correlation Coefficient&lt;/a&gt;.
&lt;/p&gt;
</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/3450182262458030296/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2019/10/implementing-corr-as-database-window.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/3450182262458030296'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/3450182262458030296'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2019/10/implementing-corr-as-database-window.html' title='Implementing Pearson&#39;s &lt;code&gt;CORR&lt;/code&gt; as a database window function'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-6874288390518150078</id><published>2017-04-13T17:23:00.001-04:00</published><updated>2017-10-27T09:35:27.369-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="internet"/><category scheme="http://www.blogger.com/atom/ns#" term="ISP"/><category scheme="http://www.blogger.com/atom/ns#" term="security"/><category scheme="http://www.blogger.com/atom/ns#" term="windows"/><title type='text'>Set your password to &quot;password&quot; - A tale of datacenter security</title><content type='html'>&lt;p&gt;
This is an old story.  Old enough that the guilty parties have either learnt from their mistakes or have moved on to other things.
&lt;/p&gt;
&lt;p&gt;
It was 1999.  I was an intern at an ERP/CRM company called Thirdware Solutions Pvt. Ltd. or TSPL for short.  If you&#39;d asked me a year earlier, I wouldn&#39;t have heard of this company, and I wouldn&#39;t have imagined ever working in this space.  However the people who interviewed us made a convincing argument that as interns we&#39;d be able to work on fringe technologies that they couldn&#39;t spare the rest of their full time engineers for.  The web was one of these.  It was very new to Indian businesses, the dot com bubble was at its peak. I&#39;d been dabbling with HTML, JavaScript, and CSS for about 3 years and was interested in learning more about the underlying protocols.  So I joined them.
&lt;/p&gt;
&lt;p&gt;
I largely consider this a good decision.  After the initial training on what the company did and how it did it, I was given some freedom to explore.  In about about a week I&#39;d reformatted my windows box and installed RedHat Linux 5.2.  Reading through all the HOWTOs that I could find, I managed to set up daemons for DNS, DHCP, SMTP, HTTP and POP3.
&lt;/p&gt;
&lt;p&gt;
At the time the company had a single Pentium box that would connect to the internet over dialup.  They used a commercial HTTP &amp;amp; Email proxy on this box that only allowed 3 people to connect at a time, so people in the office developed an honour system where each would connect, send &amp;amp; receive email, and then disconnect as soon as possible.  If anyone was expecting large customer requests, they&#39;d let the rest of the office know and people would stay off the network.  I took it upon myself to &quot;fix&quot; this.  With the blessing of our network administrator and the company CEO, I wrote a little Java app that proxied ports 25 and 110. I couldn&#39;t figure out HTTP proxying yet, but POP3 &amp;amp; SMTP were ok.  I just had to give everyone a new &quot;proxy email address&quot; that they&#39;d use when connecting to the proxy and that would be translated to the actual address when going out to the server.
&lt;/p&gt;
&lt;p&gt;
We left the web throttled at 3 users since no one in the office needed to access the web, and email was the most critical use of the internet.
&lt;/p&gt;
&lt;p&gt;
This worked out quite well, so the leadership team started to trust me a fair bit.  I do not think that any other company would have trusted an intern with the kinds of decisions they let me make following that, but it leads directly into how we avoided a fairly bad security situation.
&lt;/p&gt;
&lt;p&gt;
A few months later we decided to make our ERP/CRM system available over the web.  A full rewrite would take over a year, but we found something called Citrix App Server, that ran on Windows NT and would make any desktop application available to someone over the web taking care of basic authentication.  We tested it out locally and it worked well on our LAN, so we now had to make it available to our customers.  Except, this wasn&#39;t happening over a 56K dialup network that only allowed 3 users through at a time.
&lt;/p&gt;
&lt;p&gt;
We ended up speaking to the top ISPs in India at the time, and got a great deal from one of them to put our Windows NT box inside their datacenter, on their always on network.
&lt;/p&gt;
&lt;p&gt;
A few weeks later we locked the hard drive.  No, this is not a security thing.  This is a process of moving the drive&#39;s arm to the outer most &quot;locked&quot; position, so that significant vibrations would not result in the head hitting and damaging the disk platters.  We did this because the next step was me and our network admin sitting in the backseat of the company president&#39;s car with our Windows NT Pentium 6 Tower PC resting across our laps while our company president drove us down the length of Mumbai trying to avoid as many potholes as possible.
&lt;/p&gt;
&lt;p&gt;
We made it to the other end and when I powered the host back up and unlocked the drive (automatically on boot up), it still ran, so we were happy.  We went into an unmarked building, carried the box to a floor with security guards outside the door, and a keypad entry.  Inside, there were closets of blades and a few minitowers and tower hosts sitting on the bottom shelf of a rack.  We were told to put our box next to the others, and then the guy who ran the datacenter said the magic words.
&lt;/p&gt;
&lt;blockquote&gt;
Start up your server, and set the Administrator password to &quot;password&quot;&lt;/blockquote&gt;
&lt;p&gt;
I glanced over at the other boxes, and they all had stickers on them saying &quot;Administrator/password&quot;
&lt;/p&gt;
&lt;p&gt;
The three of us from TSPL looked at each other, and our president told me to decide.  I asked the datacenter guy why he needed that.  He said that sometimes they need to shutdown the boxes so they can move them to a different power strip.  I asked him if it would be sufficient to give him an account that only had local access and could only reboot the box.  He thought about it for a bit and said yes.
&lt;/p&gt;
&lt;p&gt;
So I created a new account that required a physically attached keyboard for login, and all it had was the ability to reboot the box.  Our app was set up to start up automatically on boot, so we weren&#39;t worried about someone having to start it.  DC guy physically locked the box to a rack, showed us that he was keeping they key, and we headed back to the office.
&lt;/p&gt;
&lt;p&gt;
We now needed to test our setup, so we asked everyone in the office to let us use the internet connection.  We tried accessing our app, and it worked!
&lt;/p&gt;
&lt;p&gt;
Since I had Admin access to our box, I was also able to open the &quot;Network Neighbourhood&quot; of our box in the datacenter.  On that network, I saw all the other hosts that were in the datacenter.  They had names identifying them from India&#39;s largest IT companies.  These were companies I&#39;d initially though of interning at.
&lt;/p&gt;
&lt;p&gt;
I looked at our president and grinned, and he looked back and said, &quot;Send me a safe summary report when you&#39;re done&quot; and walked off to his office.
&lt;/p&gt;
&lt;p&gt;
I double clicked on one of the other big boxes and was prompted for a username and password to connect to it.
&lt;/p&gt;
&lt;p&gt;
You can probably guess what happened next ;)
&lt;/p&gt;
</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/6874288390518150078/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2017/04/a-tale-of-datacenter-security.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6874288390518150078'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6874288390518150078'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2017/04/a-tale-of-datacenter-security.html' title='Set your password to &quot;password&quot; - A tale of datacenter security'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-2050635478293553108</id><published>2015-05-27T13:11:00.000-04:00</published><updated>2015-05-27T13:11:04.072-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="http"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="rum"/><category scheme="http://www.blogger.com/atom/ns#" term="velocity"/><category scheme="http://www.blogger.com/atom/ns#" term="webperf"/><title type='text'>Velocity Santa Clara 2015 -- My List</title><content type='html'>&lt;p&gt;At Velocity SC 2015, these are the talks that I&#39;d really like to see if only I could be in more than one room at a time.&lt;/p&gt;

&lt;h3&gt;Wednesday, May 27&lt;/h3&gt;

&lt;h4&gt;09:00 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/40485&quot;&gt;Service Workers&lt;/a&gt; by Pat Meenan&lt;/h4&gt;

&lt;p&gt;
Exciting new technology currently available in Chrome
&lt;/p&gt;

&lt;h4&gt;11:00 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/41656&quot;&gt;Building performant SPAs&lt;/a&gt; by Chris Love&lt;/h4&gt;

&lt;p&gt;
Since we&#39;re doing a lot of SPA work for boomerang at the moment, I&#39;m very interested in performance best practices for SPAs
&lt;/p&gt;

&lt;h4&gt;13:30 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42640&quot;&gt;Metrics Metrics Everywhere&lt;/a&gt; by Tammy &amp; Cliff&lt;/h4&gt;

&lt;p&gt;
Tammy and Cliff are my colleagues at SOASTA, and this talk is based on a lot of the data that I&#39;ve been working to collect over the last few years.  I&#39;m torn between this and the next one also at the same time.
&lt;/p&gt;

&lt;h4&gt;13:30 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/41601&quot;&gt;Self-healing systems&lt;/a&gt; by Todd &amp; Matt&lt;/h4&gt;

&lt;p&gt;
Todd &amp; Matt are also colleagues at SOASTA, and this talk is about the infrastructure we&#39;ve developed to collect the metrics that are covered in the talk that Tammy &amp; Cliff are doing.  I really wish I could be at both.
&lt;/p&gt;

&lt;h4&gt;15:30 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42513&quot;&gt;Linux Perf Tools&lt;/a&gt; by Brendan Gregg&lt;/h4&gt;

&lt;p&gt;
Always interested in tools to analyse linux performance.
&lt;/p&gt;

&lt;h3&gt;Thursday, May 28&lt;/h3&gt;

&lt;p&gt;
I haven&#39;t listed the keynotes here because that&#39;s the only track at the time and I don&#39;t need to choose which room to be in.
&lt;/p&gt;

&lt;h4&gt;13:45 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42026&quot;&gt;LinkedIn&#39;s use of RUM&lt;/a&gt; by Ritesh Maheshwari&lt;/h4&gt;

&lt;p&gt;
LinkedIn uses a modified version of boomerang, and I&#39;m keen to know what they&#39;ve done.
&lt;/p&gt;


&lt;h4&gt;13:45 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/41226&quot;&gt;Stream processing and anomaly detection&lt;/a&gt; by Arun Kejriwal&lt;/h4&gt;

&lt;p&gt;
Very interesting topic, something that I&#39;m very interested in.
&lt;/p&gt;

&lt;h4&gt;13:45 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42787&quot;&gt;Design &amp; Performance&lt;/a&gt; by Steve Souders&lt;/h4&gt;

&lt;p&gt;
Steve&#39;s talks are always educational
&lt;/p&gt;


&lt;h4&gt;14:40 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42400&quot;&gt;Visualising Performance Data&lt;/a&gt; by Mark Zeman&lt;/h4&gt;

&lt;p&gt;
Again, this is something I&#39;m working on at the moment, so very interested.
&lt;/p&gt;

&lt;h4&gt;14:40 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/41680&quot;&gt;Failure is an Option&lt;/a&gt; by Ian Malpass&lt;/h4&gt;

&lt;p&gt;
Etsy&#39;s devops talks are always educational.
&lt;/p&gt;

&lt;h4&gt;16:10 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42804&quot;&gt;Crafting performance alerting tools&lt;/a&gt; by Allison McKnight&lt;/h4&gt;

&lt;p&gt;
I&#39;m very interested in crafting alerts from RUM data.
&lt;/p&gt;


&lt;h3&gt;Friday, May 29&lt;/h3&gt;

&lt;h4&gt;09:00 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/43669&quot;&gt;RUM at MSN&lt;/a&gt; by Paul Roy&lt;/h4&gt;

&lt;h4&gt;14:25 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42395&quot;&gt;Missing Bandwidth&lt;/a&gt; by Bill Green&lt;/h4&gt;

&lt;h4&gt;14:25 &lt;a href=&quot;http://velocityconf.com/devops-web-performance-2015/public/schedule/detail/42327&quot;&gt;Winning Arguments with Performance Data&lt;/a&gt; by Buddy Brewer&lt;/h4&gt;

&lt;h4&gt;17:05 All talks at this time slot&lt;/h4&gt;

&lt;p&gt;
This last slot is unfortunate.  Every talk at this slot is interesting and by good speakers.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/2050635478293553108/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2015/05/velocity-santa-clara-2015-my-list.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2050635478293553108'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2050635478293553108'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2015/05/velocity-santa-clara-2015-my-list.html' title='Velocity Santa Clara 2015 -- My List'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-4496004284348857555</id><published>2015-01-08T13:59:00.000-05:00</published><updated>2015-01-08T14:00:24.235-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="ie"/><category scheme="http://www.blogger.com/atom/ns#" term="internet explorer"/><category scheme="http://www.blogger.com/atom/ns#" term="javascript"/><category scheme="http://www.blogger.com/atom/ns#" term="navtiming"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="resource timing"/><category scheme="http://www.blogger.com/atom/ns#" term="web"/><title type='text'>IE throws an &quot;Invalid Calling Object&quot; Exception for certain iframes</title><content type='html'>On a site that uses &lt;a href=&quot;http://www.lognormal.com/boomerang/doc/&quot;&gt;boomerang&lt;/a&gt;, I found a particular JavaScript error happen very often:

&lt;blockquote&gt;TypeError: Invalid calling object&lt;/blockquote&gt;

This only happens on Internet Explorer, primarily IE 11, but I&#39;ve seen it on versions as old as 9.

I searched through stack overflow for the cause of this error, and while many of the cases sounded like they could be my problem, further investigation showed that my case didn&#39;t match any of them.

The code in particular that threw the exception was collecting &lt;a href=&quot;http://www.w3.org/TR/resource-timing/&quot;&gt;resource timing&lt;/a&gt; information for all resources on the page.  Part of the algorithm involves drilling into &lt;code&gt;iframe&lt;/code&gt;s on the page, and this error showed up on one particular &lt;code&gt;iframe&lt;/code&gt;.

There are a few things to note:

&lt;pre&gt;
   (&quot;performance&quot; in frame) === true;

   frame.hasOwnProperty(&quot;performance&quot;) === false;
&lt;/pre&gt;

The latter is not a surprise since &lt;code&gt;hasOwnProperty(&quot;performance&quot;)&lt;/code&gt; is not supported for window objects on IE (I&#39;ve seen this before when investigating &lt;a href=&quot;http://tech.bluesmoon.info/2014/08/jslints-suggestion-will-break-your-site.html&quot;&gt;JSLint problems&lt;/a&gt;.)

There was no problem accessing &lt;code&gt;frame.document&lt;/code&gt;, but accessing &lt;code&gt;frame.performance&lt;/code&gt; threw an exception.

&lt;pre&gt;
    frame.performance;    // &lt;-- throws &quot;TypeError: Invalid calling object&quot; with error code -2147418113

    frame[&quot;performance&quot;]; // &lt;-- throws &quot;TypeError: Invalid calling object&quot; with error code -2147418113
&lt;/pre&gt;

In fact, &lt;code&gt;frame.&amp;lt;anything except document&amp;gt;&lt;/code&gt; would throw the same exception.

So I looked at the &lt;code&gt;iframe&lt;/code&gt;&#39;s document object some more, and found this:

&lt;pre&gt;
    frame.document.pathname === &quot;/xxx/yyy/123/4323.pdf&quot;;
&lt;/pre&gt;

The frame was pointing to a PDF document, and while IE was creating a reference to hold the &lt;code&gt;performance&lt;/code&gt; object of this document, it prevented any attempts to access this reference.

I tested Chrome and Firefox, and they both create and populate a &lt;code&gt;frame.performance&lt;/code&gt; object for PDF documents.
</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/4496004284348857555/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2015/01/ie-throws-invalid-calling-object.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/4496004284348857555'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/4496004284348857555'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2015/01/ie-throws-invalid-calling-object.html' title='IE throws an &quot;Invalid Calling Object&quot; Exception for certain iframes'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-5919944752501671273</id><published>2014-08-22T00:45:00.001-04:00</published><updated>2014-08-22T00:46:10.909-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="firefox"/><category scheme="http://www.blogger.com/atom/ns#" term="ie"/><category scheme="http://www.blogger.com/atom/ns#" term="javascript"/><category scheme="http://www.blogger.com/atom/ns#" term="jslint"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="web"/><title type='text'>jslint&#39;s suggestion will break your site: Unexpected &#39;in&#39;...</title><content type='html'>&lt;p&gt;
I use &lt;a href=&quot;http://jslint.com/&quot;&gt;jslint&lt;/a&gt; to validate my JavaScript before it goes out to production.  The tool is somewhat useful, but you really have to spend some time ignoring all the false errors it flags.  In some cases you can take its suggestions, while in others you can ignore them with no ill effects.
&lt;/p&gt;
&lt;p&gt;
In this particular case, I came across an error, where, if you follow the suggestions, your site will break.
&lt;/p&gt;
&lt;p&gt;
My code looks like this:
&lt;/p&gt;
&lt;pre&gt;
   if (!(&quot;performance&quot; in window) || !window.performance) {
      return null;
   }
&lt;/pre&gt;
&lt;p&gt;
jslint complains saying:
&lt;/p&gt;
&lt;blockquote&gt;
Unexpected &#39;in&#39;. Compare with undefined, or use the hasOwnProperty method instead.
&lt;/blockquote&gt;
&lt;p&gt;
This is very bad advice for the following reasons:
&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Comparing with &lt;code&gt;undefined&lt;/code&gt; will throw an exception on Firefox 31 if used inside an anonymous iframe.&lt;/li&gt;
&lt;li&gt;Using &lt;code&gt;hasOwnProperty&lt;/code&gt; will cause a false negative on IE 10 because &lt;code&gt;window.hasOwnProperty(&quot;performance&quot;)&lt;/code&gt; is false even though IE supports the performance timing object.&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;
So, the only course of action, is to use &lt;code&gt;in&lt;/code&gt; for this case.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/5919944752501671273/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2014/08/jslints-suggestion-will-break-your-site.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/5919944752501671273'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/5919944752501671273'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2014/08/jslints-suggestion-will-break-your-site.html' title='jslint&#39;s suggestion will break your site: Unexpected &#39;in&#39;...'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-4829262107458411106</id><published>2013-08-08T04:01:00.000-04:00</published><updated>2014-06-27T12:33:48.945-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="date"/><category scheme="http://www.blogger.com/atom/ns#" term="DST"/><category scheme="http://www.blogger.com/atom/ns#" term="iso8601"/><category scheme="http://www.blogger.com/atom/ns#" term="timezone"/><title type='text'>Don&#39;t guess at TimeZones in JavaScript</title><content type='html'>&lt;p&gt;
I spent quite some time a couple of months ago working on timezone support for &lt;a href=&quot;http://mpulse.soasta.com/&quot;&gt;mPulse&lt;/a&gt; and thought I should document the insanity, but never quite got around to it.  Then there was this &lt;a href=&quot;http://trevoro.net/2013/whats-your-timezone/&quot;&gt;post on hacker news&lt;/a&gt; about reading a user&#39;s timezone in JavaScript and using that to display the right time.  That post brought back a flood of horrific memories, prompting me to put my thoughts down.
&lt;/p&gt;
&lt;p&gt;
First, while Trevor&#39;s post has a good hack to display a time in the user&#39;s current timezone, that hack works in only one case -- displaying the current time to the user in their device&#39;s timezone.
&lt;/p&gt;
&lt;p&gt;
If you&#39;ve worked with timezones and front end development for a while, this is probably the first hack you&#39;ll think up.&amp;nbsp; It turns out that in most cases, this is insufficient.
&lt;/p&gt;
&lt;p&gt;
We&#39;ll first look at the problems with this approach, and then look at the requirements for proper timezone support.
&lt;/p&gt;

&lt;h3&gt;Problems&lt;/h3&gt;
&lt;ul&gt;
&lt;li&gt;The user&#39;s device timezone is not always correct.  Some users fix their device timezone to their home time even when they&#39;re travelling, however the information you need to display may be pertinent for the location where they are right now.&lt;/li&gt;
&lt;li&gt;On the other hand, the user may have their device set to automatically update timezone, but they actually want to see times in their home time (because, for example, that&#39;s when they call home, or have their calendar app configured).&lt;/li&gt;
&lt;li&gt;The timezone offset (which is what you actually get from JavaScript), only tells you the offset from &lt;abbr title=&quot;Coordinated Universal Time&quot;&gt;UTC&lt;/abbr&gt; for &quot;right now&quot;.  This information is irrelevant if you need to display a time that is not &lt;em&gt;now&lt;/em&gt;, because daylight saving rules may come into effect.&lt;/li&gt;
&lt;li&gt;You cannot use a lookup table for offset to timezone, because there isn&#39;t a one-to-one mapping between offset and timezone.  It&#39;s a many-to-many mapping, and it changes.&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;Second attempt&lt;/h3&gt;
&lt;p&gt;
A second attempt might be to figure out the timezone name by parsing the JavaScript &lt;code&gt;Date.toString()&lt;/code&gt; output.  This was my second attempt when writing the &lt;code&gt;strftime&lt;/code&gt; function for the &lt;abbr title=&quot;Yahoo! User Interface&quot;&gt;YUI&lt;/abbr&gt; Library.
&lt;/p&gt;

&lt;p&gt;
I did this study in 2008, and it turns out that &lt;a href=&quot;http://tech.bluesmoon.info/2008/09/date-inconsistencies-in-javascript.html&quot;&gt;browsers are pretty inconsistent wrt &lt;code&gt;Date.toString()&lt;/code&gt; output&lt;/a&gt;.
&lt;/p&gt;

&lt;h3&gt;Requirements&lt;/h3&gt;
&lt;p&gt;
Ok, before going into this, read this &lt;a href=&quot;http://stackoverflow.com/questions/2532729/daylight-saving-time-and-timezone-best-practices/3269325&quot;&gt;post on stackoverflow about daylight saving time and timezones&lt;/a&gt;.
&lt;/p&gt;

&lt;p&gt;
So, what we need is the ability to do the following:
&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;store any date or range of dates.&lt;/li&gt;
&lt;li&gt;display a date in any timezone that makes sense for the user, and/or the event(s) being displayed, and/or the environment.&lt;/li&gt;
&lt;li&gt;display date ranges that may cross a timezone boundary.&lt;/li&gt;
&lt;li&gt;display a historic date in a historic timezone that may have changed due to political decisions.&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;
The first requirement should be pretty straightforward.  We&#39;d like to store dates, and the best way is really a unix timestamp or an ISO8601 date.  I prefer the latter because it takes into account leap seconds as well (unix timestamps are leap second agnostic [&lt;a href=&quot;http://stackoverflow.com/questions/178704/are-unix-timestamps-the-best-way-to-store-timestamps&quot; title=&quot;StackOverflow: Are Unix Timestamps the best way to store timestamps&quot;&gt;1&lt;/a&gt;],[&lt;a href=&quot;http://derickrethans.nl/leap-seconds-and-what-to-do-with-them.html&quot; title=&quot;Derick: Leap Seconds and What to do With Them (Highly Recommend Derick&#39;s writings for anything time)&quot;&gt;2&lt;/a&gt;]).  I also always use Zulu time for an ISO8601 date.
&lt;/p&gt;

&lt;p&gt;
This is not sufficient, however.  We also need to store the timezone name of the event.  This is so that we can display historic events in the timezone they originally occurred in, even if the definition of that timezone changes.  This comes from the Olson Database.
&lt;/p&gt;

&lt;p&gt;
With these two pieces of information (event date/time &amp;amp; event timezone name), we can render the date in several ways... the original event date/time, the event date/time relative to the user&#39;s current timezone, etc.
&lt;/p&gt;

&lt;p&gt;
We also need to handle date ranges.  This could be something like your spring vacation, that just happened to cross several timezones because you left San Francisco on March 8th, flew to the UK, stayed there until April 7th, and then flew back.  Your flight departure from SFO is in Pacific Standard Time and your arrival at LHR is in British Standard Time.  Your departure from LHR is in British Daylight Time, and your arrival at SFO is in Pacific Daylight Time.
&lt;/p&gt;

&lt;p&gt;
What&#39;s most important is that you display these dates in their specific timezones &lt;strong&gt;regardless&lt;/strong&gt; of where the user actually is.
&lt;/p&gt;

&lt;h3&gt;So should we guess or ask the user what they want?&lt;/h3&gt;
&lt;p&gt;
By all means guess at what the user&#39;s timezone might be.  Use a combination of GeoIP + JavaScript timezone offset to figure out where they might be (note that both of these could be wrong), but give them the option to specify the timezone that they care about.
&lt;/p&gt;

&lt;p&gt;
Also, when displaying event dates, use a date local to the event, but use JavaScript to allow the user an easy way to flip it to their local timezone if they like.  That&#39;s progressive enhancement.
&lt;/p&gt;

&lt;h3&gt;What else shouldn&#39;t we do?&lt;/h3&gt;
&lt;p&gt;
Don&#39;t try and guess the user&#39;s language or preferred currency from their current location.  Always ask and store their response.
&lt;/p&gt;

&lt;a href=&quot;http://www.google.com/&quot; target=&quot;_blank&quot; onclick=&#39;if(BOOMR) { BOOMR.responseEnd(&quot;fake-click&quot;, new Date().getTime(), { metrics: [&quot;Fake_Conversion&quot;], url: &quot;http://www.fakebook.com/converted&quot;, subresource: true}); }&#39;&gt;Click to help me test boomerang (opens google.com in a new window)&lt;/a&gt;.</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/4829262107458411106/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2013/08/dont-guess-at-timezones-in-javascript.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/4829262107458411106'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/4829262107458411106'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2013/08/dont-guess-at-timezones-in-javascript.html' title='Don&#39;t guess at TimeZones in JavaScript'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-2877228100902784325</id><published>2013-03-18T16:48:00.002-04:00</published><updated>2013-03-18T16:49:16.099-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="ayttm"/><category scheme="http://www.blogger.com/atom/ns#" term="design"/><category scheme="http://www.blogger.com/atom/ns#" term="interfaces"/><category scheme="http://www.blogger.com/atom/ns#" term="ui"/><title type='text'>Reducing checkboxes</title><content type='html'>&lt;p&gt;
Alex Limi has an excellent &lt;a href=&quot;http://limi.net/checkboxes-that-kill&quot;&gt;post on the overuse of checkboxes&lt;/a&gt; in Firefox&#39;s preferences screen.  It reminded me of something Nat mentioned during his talk with Miguel de Icaza back at &lt;a href=&quot;http://linux-bangalore.org/2003/&quot;&gt;Linux Bangalore 2003&lt;/a&gt; about Gnome.  They mentioned several UI idioms including checkboxes and disabled menu items, but the gist of it was, every time you give the user a decision to make, you&#39;re making their lives harder.  As the domain expert for this product, it&#39;s your job to pick sane defaults and not bother the user with these choices.
&lt;/p&gt;
&lt;p&gt;
We took this to heart on the &lt;a href=&quot;http://ayttm.sourceforge.net/&quot;&gt;Ayttm&lt;/a&gt; project.  At the time ayttm probably had over 200 user modifiable configuration options in the preferences screen, and each plugin could add its own.  It was way past the point of violating one of our primary design requirements, that it should be easy enough for &lt;a href=&quot;http://www.colino.net/wordpress/bienvenue/&quot;&gt;Colin&lt;/a&gt;&#39;s mum to use.  We had a bit of a dilemma though.  While our target audience was definitely non technical, we had a significant number of geeky early adopters who really wanted the ability to modify everything.
&lt;/p&gt;
&lt;p&gt;
Over the next few days we stripped out almost every configurable option from the Preferences screen, however, we left them all in the config file on disk.  Any user that really wanted to modify the options could edit the config file in their favourite text editor and make the changes themselves.  This made everyone happier.  Our technical users were happy that they didn&#39;t have to click through too many screens to change all their options, and our non technical users had a preferences screen where the most they&#39;d have to do was enter their account information, and the type of &lt;a href=&quot;http://ayttm.sourceforge.net/smileys/&quot;&gt;smileys&lt;/a&gt; they wanted.
&lt;/p&gt;
&lt;p&gt;
The &lt;a href=&quot;https://developer.gnome.org/hig-book/stable/&quot;&gt;Gnome Human Interface Guidelines&lt;/a&gt; cover a lot about designing intuitive interfaces, so go read that.
&lt;/p&gt;
</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/2877228100902784325/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2013/03/reducing-checkboxes.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2877228100902784325'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2877228100902784325'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2013/03/reducing-checkboxes.html' title='Reducing checkboxes'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-2615594449229141159</id><published>2013-02-01T12:41:00.000-05:00</published><updated>2013-02-15T18:07:33.458-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="2fa"/><category scheme="http://www.blogger.com/atom/ns#" term="android"/><category scheme="http://www.blogger.com/atom/ns#" term="google"/><category scheme="http://www.blogger.com/atom/ns#" term="nexus"/><category scheme="http://www.blogger.com/atom/ns#" term="phone"/><title type='text'>Nexus 4 &amp;mdash; First impressions</title><content type='html'>&lt;p&gt;
Just got a Nexus 4.  Started it up, and tried to sign in to my gmail account.  I have two-factor auth set up for extra security, and you&#39;d expect a google device to work well with google auth, but here&#39;s a list of bugs I&#39;ve found within minutes of opening the package.
&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;It first asks you to sign in to your gmail account.  Type in your username and password.&lt;/li&gt;
&lt;li&gt;It then tells you you need to sign in on the web to continue, so it opens the browser for you to sign in.&lt;/li&gt;
&lt;li&gt;Focus is on the username field, but there&#39;s no keyboard available.  You need to tap on the username field (which already has focus) to bring up the keyboard.  This is counter-intuitive.&lt;/li&gt;
&lt;li&gt;Type in your username and password and submit.  It then takes you to the &lt;abbr title=&quot;2 Factor Auth&quot;&gt;2FA&lt;/abbr&gt; page (at least in my case) where you enter your security code.&lt;/li&gt;
&lt;li&gt;Again, tap on the field to bring up the keyboard.  This field is a numeric field, but the keyboard starts out in alphabetic mode.  This is probably a bug with all mobile devices, but you&#39;d think that something this new would have fixed it.&lt;/li&gt;
&lt;li&gt;Type in the code, and now try to click on the checkbox that says &quot;Remember this device&quot;.  Except the keyboard goes away and you now end up clicking a link that explains what 2FA is.&lt;/li&gt;
&lt;li&gt;Ok, cool, I just want to go back and hit submit... except there&#39;s no back button on this browser.  There&#39;s no toolbar that normally pops up at the bottom of the browser.  No, this is a special browser that does not allow you to navigate through the browser history.  FAIL.&lt;/li&gt;
&lt;li&gt;The only way forward is to shut down the phone and then start it up again.  Except at this point you start from scratch, including selecting your language.
&lt;/ol&gt;
&lt;p&gt;
A few other things I&#39;ve noticed...
&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;The timezone by default appears to be UTC.  You&#39;d think that it&#39;d localise this based on my location, which it knows and is configured to use.&lt;/li&gt;
&lt;li&gt;If a transient alert message pops up, and you try to tap on it, you&#39;ll actually tap on the item below the message.&lt;/li&gt;
&lt;li&gt;The icons are a bit too small for someone with normal sized fingers like me.  It&#39;s easy to tap on the wrong item.&lt;/li&gt;
&lt;li&gt;The position of the power button and volume control buttons means that when you press the power button with your thumb, your forefinger will inadvertently hit the volume control (or vice-versa).  This happens because of Newton&#39;s third law of motion.  Google/LG engineers should know this since it&#39;s a 300 year old basic law of motion.&lt;/li&gt;
&lt;li&gt;You cannot move a widget from one screen to another by dragging it.  You have to remove it from the old screen and then go through the process of adding and configuring it again for the new screen.&lt;/li&gt;
&lt;li&gt;When you select punctuation on the keyboard, entering an apostrophe should switch back to alphabetic mode.  It doesn&#39;t.&lt;/li&gt;
&lt;li&gt;It just reboots at times.&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;
Will post more as I use it.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/2615594449229141159/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2013/02/nexus-4-first-impressions.html#comment-form' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2615594449229141159'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/2615594449229141159'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2013/02/nexus-4-first-impressions.html' title='Nexus 4 &amp;mdash; First impressions'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-7001500025860906590</id><published>2012-12-12T02:03:00.000-05:00</published><updated>2012-12-12T02:03:10.837-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="caching"/><category scheme="http://www.blogger.com/atom/ns#" term="latency"/><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><title type='text'>A correlation between load time and usage</title><content type='html'>We frequently see reports of website usage going up as load time goes down, or vice-versa.  It seems logical.  Users use a site much more if it&#39;s fast, and less if it&#39;s slow.

However, consider the converse too.  Is it possible that a site merely appears to be faster because users are using it more, and therefore have more of it cached?  I&#39;ve seen sites where the server-side cache-hit ratio is much higher when usage is high resulting in lower latency.

At this point I haven&#39;t seen any data that can convince me one way or the other.  Do you?</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/7001500025860906590/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/12/a-correlation-between-load-time-and.html#comment-form' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/7001500025860906590'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/7001500025860906590'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/12/a-correlation-between-load-time-and.html' title='A correlation between load time and usage'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-7195586286612360102</id><published>2012-10-25T17:48:00.000-04:00</published><updated>2012-10-25T17:48:47.679-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="startup"/><title type='text'>Startup Lessons: What should you optimise?</title><content type='html'>&lt;p&gt;
In the early days, when you can&#39;t afford hardware, optimise for efficient code.
&lt;/p&gt;
&lt;p&gt;
Sometimes this results in unreadable code or a language that not too many developers are familiar with.  This is okay.  You&#39;re trying to reduce the cost of hardware.
&lt;/p&gt;

&lt;p&gt;
When you get large enough to hire other developers, the cost of hardware is no longer your largest expense.  At this point, optimise for code readability.
&lt;/p&gt;

&lt;p&gt;
This might mean writing slightly less efficient code or moving to a more popular language.  That&#39;s okay.  Developer efficiency is more important at this time.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/7195586286612360102/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/10/startup-lessons-what-should-you-optimise.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/7195586286612360102'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/7195586286612360102'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/10/startup-lessons-what-should-you-optimise.html' title='Startup Lessons: What should you optimise?'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-9202693916464833877</id><published>2012-08-13T10:54:00.001-04:00</published><updated>2023-06-29T10:55:04.227-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="performance"/><category scheme="http://www.blogger.com/atom/ns#" term="rum"/><category scheme="http://www.blogger.com/atom/ns#" term="statistics"/><title type='text'>Analyzing Performance Data with Statistics</title><content type='html'>&lt;p&gt;At &lt;a href=&quot;http://www.lognormal.com/&quot;&gt;LogNormal&lt;/a&gt;, we’re all about collecting and making sense of real user performance data.
We collect over a billion data points a month, and there’s a lot you can tell about the web and your users if you know what to
look at in your data.  In this post, I’d like to go over some of the statistical methods we use to make sense of this data.&lt;/p&gt;

&lt;p&gt;You’d use these methods if you wanted to build your own Real User Measurement (RUM) tool.&lt;/p&gt;

&lt;p&gt;The entire topic is much larger than I can cover in a single post, so go through the references if you’re interested in more information.
&lt;a name=&#39;more&#39;&gt;&lt;/a&gt;&lt;/p&gt;
&lt;h2 id=&quot;data-and-distribution&quot;&gt;Data and Distribution&lt;/h2&gt;

&lt;p&gt;The data we’re looking at is the time between the user initiating an action (like a page load), and that action completing.  In
most cases, this is the time from a user requesting a page and that page’s onload event firing.&lt;/p&gt;

&lt;p&gt;For a site with few page views, the probability distribution looks something like this:&lt;/p&gt;

&lt;p&gt;&lt;img src=&quot;https://i1.bluesmoon.info//blog/assets/analysing-performance-data/sparse-distribution.png&quot; alt=&quot;Sparse data distribution&quot; /&gt;&lt;/p&gt;

&lt;p&gt;And for a site with a large number of page views, the probability distribution looks like this:&lt;/p&gt;

&lt;p&gt;&lt;img src=&quot;https://i1.bluesmoon.info//blog/assets/analysing-performance-data/lognormal-distribution.png&quot; alt=&quot;Dense data showing a Log-normal distribution&quot; /&gt;&lt;/p&gt;

&lt;p&gt;The x-axis is page load time while the y-axis is the number of data points that fell into a particular bucket.
The actual number of data points that fall into a bucket is not important, it’s the relative value that is.&lt;/p&gt;

&lt;p&gt;We notice that as the number of data points increases, the distribution becomes more striking until you’re able to map well
known probability distribution functions (PDFs) to it.  In this case, the load time looks a lot like a Log-normal distribution, ie, a
distribution where the y-axis v/s the logarithm of the x-axis is Normal (or Gaussian) in nature.&lt;/p&gt;

&lt;p&gt;You can read more about Log-normal distributions at &lt;a href=&quot;http://en.wikipedia.org/wiki/Log-normal_distribution&quot; title=&quot;Wikipedia article on Log-normal distributions&quot;&gt;Wikipedia&lt;/a&gt; and
on &lt;a href=&quot;http://www.wolframalpha.com/input/?i=log-normal&quot; title=&quot;Wolfram Alpha information on Log-normal distributions&quot;&gt;Wolfram Alpha&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;Now while these tend to be the most common, they aren’t the only distribution.  We also sometimes see a bimodal distribution
like this:&lt;/p&gt;

&lt;p&gt;&lt;img src=&quot;https://i1.bluesmoon.info//blog/assets/analysing-performance-data/bimodal-distribution.png&quot; alt=&quot;Dense data showing a Bimodal distribution&quot; /&gt;&lt;/p&gt;

&lt;p&gt;This often shows up when two different distributions are added together.&lt;/p&gt;

&lt;h2 id=&quot;central-tendency&quot;&gt;Central tendency&lt;/h2&gt;

&lt;p&gt;Now regardless of the distribution, we need a measure of central tendency to tell us what the “Average” user experience is.
Note that I’ve included the term “Average” in quotes because it’s often misinterpreted.  Most people use the term average to
refer to the arithmetic mean.  Statistically though, the Average could refer to any single number that summarises a dataset,
and there are many that we can pick from.&lt;/p&gt;

&lt;p&gt;The most common are the arithmetic mean, the median and the mode.  There’s also the geometric mean, the harmonic mean and a few
others that we won’t cover.&lt;/p&gt;

&lt;p&gt;I’ll briefly go over each of these terms.&lt;/p&gt;

&lt;h3 id=&quot;the-arithmetic-mean&quot;&gt;The arithmetic mean&lt;/h3&gt;

&lt;p&gt;The arithmetic mean is simply the sum of all readings divided by the number of readings.  In the rest of this document, we’ll refer
to the arithmetic mean as &lt;code&gt;amean&lt;/code&gt; or &lt;code&gt;&lt;abbr title=&quot;The greek letter mu&quot;&gt;&amp;mu;&lt;/abbr&gt;&lt;/code&gt;.&lt;/p&gt;

&lt;p&gt;The best thing about the amean is that it’s really easy to calculate.  Even with a very large number of data points, you only need
to hold on to the sum of all points and the number of points.  In terms of memory, this should require two integers.&lt;/p&gt;

&lt;p&gt;The biggest drawback of the amean is that it is very susceptible to outliers.  For example, the arithmetic mean of the following three
sets is the same:&lt;/p&gt;

&lt;pre&gt;
Set 1: 1, 1, 1, 1, 1, 1, 1, 1, 1, 91
Set 2: 6, 7, 8, 9, 10, 11, 12, 13, 14
Set 3: 2, 3, 3, 3, 4, 16, 17, 17, 17, 18
&lt;/pre&gt;

&lt;h3 id=&quot;the-median&quot;&gt;The median&lt;/h3&gt;

&lt;p&gt;Unlike the amean, the median actually does show up in the data set (more or less).  The median is the middle data point (also called
the 50% percentile) after sorting all data points in ascending order.  In the Set 2 above, the median is 10.  Set 1 is slightly different
because there are an even number of data points, and consequently, two middle points.  In this case, we take the amean of the two middle
points, and we end up with &lt;code&gt;(1+1)/2 == 1&lt;/code&gt;.&lt;/p&gt;

&lt;p&gt;The cool thing about medians is that they aren’t susceptible to outliers.  The point with value 91 in Set 1 doesn’t affect the median
at all.  It reacts more to where the bulk of the data is.&lt;/p&gt;

&lt;p&gt;What makes medians hard to calculate is that you need to hold a sorted list of all data points in memory at once.  This is fine if your
dataset is small, but as you start growing beyond a few thousand data points, it gets fairly memory intensive.  (Ever notice how most
databases do not have a MEDIAN function, but most spreadsheet applications do?)&lt;/p&gt;

&lt;p&gt;Medians are also not a great measure of central tendency when you have a bimodal distribution like in Set 3.  In this case we have 
two separate clusters of data, but the median ends up being &lt;code&gt;(5+15)/2 == 10&lt;/code&gt;, which isn’t even in the dataset.&lt;/p&gt;

&lt;h3 id=&quot;the-mode&quot;&gt;The mode&lt;/h3&gt;

&lt;p&gt;We’ve mentioned bimodal distributions.  The term bimodal comes from the fact that the distribution has two modes.  Which brings up the
question, “What is a mode?”.  The French term &lt;em&gt;la mode&lt;/em&gt; means fashion (the related phrase &lt;em&gt;à la mode&lt;/em&gt; means in style).
In terms of a data distribution, the mode is the the most popular term in the dataset.&lt;/p&gt;

&lt;p&gt;Looking at our three datasets above, the modes are 1, n/a and (3, 17).&lt;/p&gt;

&lt;p&gt;In the first set, the most popular (or frequent) value is 1.  In set 3, it’s a tie between 3 and 17.  In set 2, each term shows up only
once, so there’s no “most popular” term.&lt;/p&gt;

&lt;p&gt;When looking at large sets of real data though, we’ll approximate a bit, we may call a distribution multi-modal (or bimodal) if it has
more than one term that’s far more popular than the others, even if all modes do not have equal popularity.&lt;/p&gt;

&lt;p&gt;Looking back at our third distribution, we see that the two peaks aren’t of the same height, but they’re close enough, and each is a local
maximum so we consider the distribution bimodal.&lt;/p&gt;

&lt;p&gt;Finding a single mode involves finding the most popular data point in a data set.  This isn’t too hard, and only requires a frequency table,
which is less memory intensive than storing every data point.  Finding multiple modes involves walking the distribution to find local maxima.&lt;/p&gt;

&lt;h3 id=&quot;the-geometric-mean&quot;&gt;The geometric mean&lt;/h3&gt;

&lt;p&gt;Just like the arithmetic mean, the geometric mean involves counters.  Unlike the aritmetic mean, it uses multiplication and roots rather than
addition and division. We’ll use the terms &lt;code&gt;gmean&lt;/code&gt; or &lt;code&gt;&amp;mu;&lt;sub&gt;g&lt;/sub&gt;&lt;/code&gt; to refer to the geometric mean.&lt;/p&gt;

&lt;p&gt;The geometric mean of a set of &lt;em&gt;N&lt;/em&gt; numbers is the &lt;em&gt;N&lt;sup&gt;th&lt;/sup&gt; root of the product of all N numbers&lt;/em&gt;.  Writing this in code,
it would look like:&lt;/p&gt;

&lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c&quot; data-lang=&quot;c&quot;&gt;&lt;span class=&quot;n&quot;&gt;pow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;...&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;xN&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;N&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt;

&lt;p&gt;The problem we’re most likely to encounter when calculating a geometric mean is numeric overflow.  If you multiply too many numbers, your
product is going to overflow sooner or later.  Luckily, there’s more than one way to do things mathematically, and we can covert multiplication
and roots into summation and division using logs and exponents.  So the above expression turns into:&lt;/p&gt;

&lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c&quot; data-lang=&quot;c&quot;&gt;&lt;span class=&quot;n&quot;&gt;exp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;((&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;log&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;log&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;...&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;+&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;log&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;xN&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;N&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt;

&lt;p&gt;So when would one use the geometric mean?&lt;/p&gt;

&lt;p&gt;It turns out that the amean is great for Normal distributions and similarly, the gmean is great for Log-normal distributions.&lt;/p&gt;

&lt;h2 id=&quot;spread&quot;&gt;Spread&lt;/h2&gt;

&lt;p&gt;So we’ve looked at central tendency a bit, but that only tells us what our data is centered around.  None of the central tendency numbers
tell us how closely we’re centered around that value.  We also need a means to tell us the spread of the data.&lt;/p&gt;

&lt;p&gt;When dealing with means (arithmetic or geometric), we can use the variance, standard deviation or standard error.  The method for calculating
these numbers is mostly the same for arithmetic and geometric means, except that we’ll use exponents and logs for the geometric spread.&lt;/p&gt;

&lt;p&gt;For the median, spread is determined by looking at other percentile values, quartiles and inter quartile filtering.&lt;/p&gt;

&lt;h3 id=&quot;arithmetic-standard-deviation&quot;&gt;Arithmetic Standard Deviation&lt;/h3&gt;

&lt;p&gt;The traditional way of calculating a standard deviation involves taking the square of the difference between each data point and the amean,
then taking the mean of all those squares, and finally taking the square root of this new mean.  This is also called a Root Mean Square (RMS for short)&lt;/p&gt;

&lt;p&gt;This requires us to keep track of every data point until we’ve calculated the mean.&lt;/p&gt;

&lt;p&gt;A less memory intensive way, and one which can be streamed is to calculate a running sum of squares.  If we then want the standard deviation at any point,
we use the following expression:&lt;/p&gt;

&lt;figure class=&quot;highlight&quot;&gt;&lt;pre&gt;&lt;code class=&quot;language-c&quot; data-lang=&quot;c&quot;&gt;&lt;span class=&quot;n&quot;&gt;sqrt&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;sum&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;^&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;N&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;-&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;sum&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;N&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;^&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/figure&gt;

&lt;p&gt;As data comes in, we need to add it to our sum and add its square to our sum of squares, and that’s about it.&lt;/p&gt;

&lt;p&gt;The details of how we end up with this expression are well documented on the &lt;a href=&quot;http://en.wikipedia.org/wiki/Standard_deviation&quot;&gt;Wikipedia
page about Standard deviation&lt;/a&gt;, so I won’t go into them here.  We’ll refer to the standard deviation using the terms &lt;code&gt;SD&lt;/code&gt; or 
&lt;code&gt;&lt;abbr title=&quot;The greek letter sigma&quot;&gt;&amp;sigma;&lt;/abbr&gt;&lt;/code&gt;.&lt;/p&gt;

&lt;h3 id=&quot;geometric-standard-deviation&quot;&gt;Geometric Standard Deviation&lt;/h3&gt;

&lt;p&gt;The geometric standard deviation is almost the same, except we use &lt;code&gt;log(x)&lt;/code&gt; instead of x, and once we get the final result, pass it to
the &lt;code&gt;exp()&lt;/code&gt; function. Similar to the arithmetic standard deviation, we’ll use the symbol &lt;code&gt;&amp;sigma;&lt;sub&gt;g&lt;/sub&gt;&lt;/code&gt; to refer to the
geometric standard deviation.&lt;/p&gt;

&lt;p&gt;The one difference between arithmetic and geometric standard deviation is the notation we use for the spread.  For arithmetic standard deviation, we use
&lt;code&gt;&amp;mu; &amp;plusmn; &amp;sigma;&lt;/code&gt; whereas for the geometric standard deviation, we use &lt;code&gt;&amp;mu;&lt;sub&gt;g&lt;/sub&gt; */ &amp;sigma;&lt;sub&gt;g&lt;/sub&gt;&lt;/code&gt;.&lt;/p&gt;

&lt;h3 id=&quot;percentiles&quot;&gt;Percentiles&lt;/h3&gt;

&lt;p&gt;As we’ve seen in the curves above, real user performance data can have a very long tail, and if you care about user experience, you’ll want to know
how far to the right that tail goes.  We can do this by looking at things like the 95&lt;sup&gt;th&lt;/sup&gt;, 98&lt;sup&gt;th&lt;/sup&gt; or 99&lt;sup&gt;th&lt;/sup&gt; percentile values
of the curve.  The method of getting a percentile is exactly the same as the method for getting the median.  We need to sort all data points in ascending
order, and then pick the p&lt;sup&gt;th&lt;/sup&gt; point depending on the percentile we care about.&lt;/p&gt;

&lt;p&gt;For example, if we have 1 million data points, and want to know the 95&lt;sup&gt;th&lt;/sup&gt; percentile, we’d look for the 1,000,000 * 0.95 == 950,000&lt;sup&gt;th&lt;/sup&gt; point.
Similarly, for the 75&lt;sup&gt;th&lt;/sup&gt; or 25&lt;sup&gt;th&lt;/sup&gt; percentiles, we’d look for the 750,000&lt;sup&gt;th&lt;/sup&gt; or 250,000&lt;sup&gt;th&lt;/sup&gt; points.&lt;/p&gt;

&lt;p&gt;In Set 2 (reproduced below), with 9 points, what would the 95&lt;sup&gt;th&lt;/sup&gt; and 25&lt;sup&gt;th&lt;/sup&gt; percentiles be?  For fractional indices, round down if your arrays
are zero based.&lt;/p&gt;
&lt;pre&gt;
Set 2: 6, 7, 8, 9, 10, 11, 12, 13, 14
&lt;/pre&gt;

&lt;h3 id=&quot;inter-quartile-range&quot;&gt;Inter Quartile Range&lt;/h3&gt;

&lt;p&gt;The Inter Quartile Range is the middle 50% of data points in a set.  It includes all points between the 25&lt;sup&gt;th&lt;/sup&gt; and 75&lt;sup&gt;th&lt;/sup&gt; percentiles.
The IQR is more robust than the entire range (min, max) since outliers are not included in the set.  IQR is actually a single number which is the difference
between the 75&lt;sup&gt;th&lt;/sup&gt; and 25&lt;sup&gt;th&lt;/sup&gt; percentile numbers.&lt;/p&gt;

&lt;h2 id=&quot;data-filtering&quot;&gt;Data filtering&lt;/h2&gt;

&lt;p&gt;When dealing with real user performance data, we may need to apply two levels of filtering.  The first is to strip out absurd data.  Remember that as with
any data received over a web interface, you really cannot trust that the data you’re receiveing was sent by code you wrote, or someone else trying to
masquerade as your code.  The best you can do is require sane limits on your inputs, and make sure they fit these limits.&lt;/p&gt;

&lt;p&gt;In addition to limiting for sanity, you also need to split your data set into two (or three) parts, one of which includes typical data points, and the
remainder including outliers.  These are both interesting sets, but need to be analysed separately.&lt;/p&gt;

&lt;h3 id=&quot;band-pass--sanity-filtering&quot;&gt;Band-pass &amp;amp; sanity filtering&lt;/h3&gt;

&lt;p&gt;If you’ve ever used a graphic equaliser on a music system, you’ve worked with a band pass filter.  In the audio-electronics world, a band-pass filter
passes through components of the audio stream that fall within a certain frequency band, while blocking everything else.  For example, to enhance bass
effects and dampen other effects, we might pass through signals between 20Hz and 200Hz and block everything else, or let something else deal with it
through a parallel stream.&lt;/p&gt;

&lt;p&gt;With performance data, we can define similar limits.  You should never see a page load time less than 0 seconds, and in fact it’s highly unlikely that
you’d see a page load time of under 50 milliseconds (loading content from cache may be an exception).  It’s also unlikely that you’d see a page load
time of over 3-4 minutes… not because it doesn’t happen, but because users are unlikely to hang around that long&lt;sup&gt;&lt;a href=&quot;#bm-stats-note-1&quot;&gt;*&lt;/a&gt;&lt;/sup&gt;.&lt;/p&gt;

&lt;p&gt;Similarly, if you see timestamps in the distant past or distant future, chances are that it’s either fake data, or a very badly misconfigured system
sending you that data.  In both cases it’s probably something you want to drop (or pass to a separate handler for further analysis).&lt;/p&gt;

&lt;p id=&quot;bm-stats-note-1&quot; style=&quot;font-size:0.9em;&quot;&gt;* Users may tolerate very long page load times if the page loads in a background tab and they never actually see it.&lt;/p&gt;

&lt;h3 id=&quot;iqr-filtering&quot;&gt;IQR Filtering&lt;/h3&gt;

&lt;p&gt;IQR filtering is based on the Inter Quartile Range that we saw earlier.  Its job is to strip out outliers so we only look at typical data.  To filter a
dataset using IQR filtering, we first find the inter quartile range &lt;code&gt;(Q3-Q1)&lt;/code&gt;.&lt;/p&gt;

&lt;p&gt;We then define a field width of 1.5 times this range: &lt;code&gt;fw = 1.5 * (Q3-Q1)&lt;/code&gt;.&lt;/p&gt;

&lt;p&gt;Finally, we run a band-pass filter on the data set with an &lt;strong&gt;open&lt;/strong&gt; interval of &lt;code&gt;(Q1-fw, Q3+fw)&lt;/code&gt;.  An open interval is one in
which the end points are &lt;strong&gt;not&lt;/strong&gt; included, so your test would be &lt;code&gt;x[i] &amp;gt; Q1-fw &amp;amp;&amp;amp; x[i] &amp;lt; Q3+fw&lt;/code&gt;.&lt;/p&gt;

&lt;p&gt;We can also include points that fall below the interval into a low-outliers group, and points that fall above the interval into a high-outliers group.&lt;/p&gt;

&lt;p&gt;The great thing about IQR filtering is that it’s based on your dataset and not on some arbitrary limits derived from intuition ;)  In other words,
it will work just as well for datapoints coming in over a slow dialup network and datapoints coming in over a T3 line.  A straight band-pass filter
might not.&lt;/p&gt;

&lt;h2 id=&quot;example-numbers&quot;&gt;Example Numbers&lt;/h2&gt;

&lt;p&gt;As an example, here are some of the numbers we see for a near Log-normal distribution of data:&lt;/p&gt;
&lt;table class=&quot;data&quot; summary=&quot;Sample values for arithmetic mean, geometric mean and median&quot;&gt;
&lt;thead&gt;
 &lt;tr&gt;&lt;th scope=&quot;col&quot;&gt;Beacons&lt;/th&gt; &lt;th scope=&quot;col&quot;&gt;A-Mean&lt;/th&gt; &lt;th scope=&quot;col&quot;&gt;G-Mean&lt;/th&gt; &lt;th scope=&quot;col&quot;&gt;Median&lt;/th&gt; &lt;th scope=&quot;col&quot;&gt;95&lt;sup&gt;th&lt;/sup&gt;&lt;/th&gt; &lt;th scope=&quot;col&quot;&gt;98&lt;sup&gt;th&lt;/sup&gt;&lt;/th&gt;&lt;/tr&gt;
&lt;/thead&gt;
 &lt;tr&gt;&lt;td&gt;1.5M&lt;/td&gt; &lt;td&gt;4.2s&lt;/td&gt; &lt;td&gt;2.50s&lt;/td&gt; &lt;td&gt;2.39s&lt;/td&gt; &lt;td&gt;11s&lt;/td&gt; &lt;td&gt;19s&lt;/td&gt;&lt;/tr&gt;
 &lt;tr&gt;&lt;td&gt;870K&lt;/td&gt; &lt;td&gt;6.4s&lt;/td&gt; &lt;td&gt;3.99s&lt;/td&gt; &lt;td&gt;3.87s&lt;/td&gt; &lt;td&gt;17s&lt;/td&gt; &lt;td&gt;29s&lt;/td&gt;&lt;/tr&gt;
&lt;tbody&gt;
&lt;/tbody&gt;
&lt;/table&gt;

&lt;p&gt;Notice how close the geometric mean and the median are to each other while the arithmetic mean gets pulled out to the right.&lt;/p&gt;

&lt;h2 id=&quot;endgame&quot;&gt;Endgame&lt;/h2&gt;

&lt;p&gt;We’ve gone over how to find central tendency and spread and how to filter your data.  This covers everything you need to analyse your performance data.  Have a look at
the references for more information.&lt;/p&gt;

&lt;p&gt;If you need an easy way to collect data, have a look at &lt;a href=&quot;http://www.lognormal.com/boomerang/doc/&quot;&gt;boomerang&lt;/a&gt;, &lt;a href=&quot;http://www.lognormal.com/&quot;&gt;LogNormal&lt;/a&gt;’s
OpenSource Extendible JavaScript library that measures page load time, bandwidth, dns and a bunch of other performance characteristics in the user’s
browser.  It is actively developed and supported by the folks at LogNormal, and contributors from around the world.&lt;/p&gt;

&lt;p&gt;And if you’d rather not do all of this yourself, have a look at the &lt;abbr title=&quot;Real User Measurement&quot;&gt;RUM&lt;/abbr&gt; tool we’ve built
at &lt;a href=&quot;http://www.lognormal.com/&quot;&gt;LogNormal&lt;/a&gt;. Send us up to 100 million data points a month and we’ll do the rest.&lt;/p&gt;

&lt;p&gt;Also, to celebrate &lt;a href=&quot;http://www.speedawarenessmonth.com/&quot;&gt;Speed Awareness Month&lt;/a&gt;, we’re giving away &lt;strong&gt;2 free months&lt;/strong&gt; of our Pro service if
you sign up at &lt;a href=&quot;http://www.lognormal.com/promos/speedawarenessmonth&quot;&gt;http://www.lognormal.com/promos/speedawarenessmonth&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;Lastly, I will be speaking about this topic at Performance meetup groups around the US.  The first one will be 
&lt;a href=&quot;http://www.meetup.com/Web-Performance-Boston/events/75714302/&quot;&gt;tomorrow (August 14, 2012) in Boston&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;Thanks for following along, and let’s go make the web faster.&lt;/p&gt;

</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/9202693916464833877/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/08/analyzing-performance-data-with.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/9202693916464833877'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/9202693916464833877'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/08/analyzing-performance-data-with.html' title='Analyzing Performance Data with Statistics'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-4543273398155731472</id><published>2012-06-29T00:56:00.002-04:00</published><updated>2012-06-29T00:58:21.740-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="authentication"/><category scheme="http://www.blogger.com/atom/ns#" term="google"/><category scheme="http://www.blogger.com/atom/ns#" term="ios"/><category scheme="http://www.blogger.com/atom/ns#" term="two factor auth"/><category scheme="http://www.blogger.com/atom/ns#" term="wifi"/><title type='text'>iOS, Google WiFi and 2 factor auth -- clearly untested UX</title><content type='html'>&lt;p&gt;
So after &lt;a href=&quot;http://www.webperfdays.org&quot;&gt;WebPerfDays&lt;/a&gt; today, a bunch of us ended up at a Pizza place in Mountain View.  Naturally the first thing we all did was search for wifi in the area and try to get on to a network from our mobile devices.
&lt;/p&gt;
&lt;p&gt;
Now Mountain View has Google Wifi, and it appears as if they now require you to sign in with your Gmail account, and that&#39;s where the problem comes in... for me at least.  I have two factor auth turned on for my google accounts, which means that after I type in my username and password, I get to a second screen to enter my second authentication token.  This token comes from an app on my iOS device... the same device I was trying to log in with.
&lt;/p&gt;
&lt;p&gt;
I switched to the app to get the token number, but as soon as I did that, iOS decided that I didn&#39;t actually want to sign in to the wireless network, and disassociated itself from the Access Point (AP).
&lt;/p&gt;
&lt;p&gt;
Once I&#39;d got the number, I switched back to the settings app and it initiated login again, which means I had to enter my username and password again, and by the time I&#39;d reached the token screen, the token had expired.
&lt;/p&gt;
&lt;p&gt;
This is what the token screen looks like:
&lt;/p&gt;
&lt;div style=&quot;text-align: center;&quot;&gt;&lt;img src=&quot;https://img.skitch.com/20120629-qgdjgqr985wq7sd66fxnfba5ah.png&quot; href=&quot;iOS screenshot with Google 2 factor auth screen and Authenticator app&quot;&gt;&lt;/div&gt;
&lt;p&gt;
It was rather annoying.
&lt;/p&gt;
&lt;p&gt;
It then hit me that I could copy the token to the clipboard, and then paste it into the token text field, which should shave a few seconds off and maybe let me through.
&lt;/p&gt;
&lt;p&gt;
That worked, but it was still annoying.
&lt;/p&gt;
&lt;p&gt;
We started talking about how this interface could be improved.  There are a few reasons why this is a problem, and I think they&#39;re mostly Apple&#39;s fault.
&lt;/p&gt;
&lt;p&gt;
When you connect to a wireless network, iOS attempts to connect to www.apple.com.  If it gets redirected somewhere else, it assumes that it&#39;s being asked to authenticate, and displays whatever page it gets redirected to in a browser like window.
&lt;/p&gt;
&lt;p&gt;
The problem is that if you do anything other than interact with the content in this window, iOS treats it exactly the same as hitting the &quot;Cancel&quot; button (top right of the screenshot), terminates the login and dissociates from the AP.
&lt;/p&gt;
&lt;p&gt;
This means that you cannot switch to the Authenticator App (second app at the bottom of the screenshot) to get your token.
&lt;/p&gt;
&lt;p&gt;
Can Apple fix this?
&lt;/p&gt;
&lt;p&gt;
Yes, just don&#39;t cancel sign in unless I explicitly click cancel
&lt;/p&gt;
&lt;p&gt;
Can Google fix this?
&lt;/p&gt;
&lt;p&gt;
Maybe, if they could provide a link or something that would open the Authenticator app right from that page and let me pull the number out of it (I don&#39;t know enough about iOS to know if this is possible).
&lt;/p&gt;
&lt;p&gt;
Do any Apple/Google engineers want to take this up?
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/4543273398155731472/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/06/ios-google-wifi-and-2-factor-auth.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/4543273398155731472'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/4543273398155731472'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/06/ios-google-wifi-and-2-factor-auth.html' title='iOS, Google WiFi and 2 factor auth -- clearly untested UX'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-6012780358683396175</id><published>2012-06-09T14:17:00.002-04:00</published><updated>2012-06-09T22:19:59.305-04:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="groupon"/><category scheme="http://www.blogger.com/atom/ns#" term="passwords"/><category scheme="http://www.blogger.com/atom/ns#" term="security"/><category scheme="http://www.blogger.com/atom/ns#" term="ssl"/><title type='text'>Password reset over HTTP -- Part 3</title><content type='html'>&lt;p&gt;
It&#39;s been a while since my &lt;a href=&quot;http://tech.bluesmoon.info/2012/01/https-for-login-but-http-for-password.html&quot;&gt;last&lt;/a&gt; &lt;a href=&quot;http://tech.bluesmoon.info/2012/01/password-reset-over-http-part-2.html&quot;&gt;two&lt;/a&gt; posts on the topic.  This time it&#39;s Groupon.
&lt;/p&gt;

&lt;p&gt;
The password reset page is over HTTP:&lt;br&gt;
&lt;img src=&quot;https://img.skitch.com/20120609-q5p2x266t7kd7x362me6fig3sx.png&quot; alt=&quot;groupon-forgot-password&quot; /&gt;
&lt;/p&gt;

&lt;p&gt;
The reset password email that you receive contains a link that looks like this:
&lt;/p&gt;

&lt;pre&gt;
http://groupon.com/users/password_reset/{token}?utm_source=password_reset \
    &amp;utm_medium=email&amp;sid={sid}&amp;user={uid}&amp;date={YYYYmmdd}
&lt;/pre&gt;
&lt;p&gt;
This link does a 301 to itself and then a 302 to a HTTPS version of itself.
&lt;/p&gt;
&lt;p&gt;
The good thing is that your new password is sent over SSL.  The bad thing is that your reset token is sent in clear text.
&lt;/p&gt;
&lt;p&gt;
&lt;b&gt;Update:&lt;/b&gt; This issue has been fixed by Groupon a couple of hours after reporting it.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/6012780358683396175/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/06/password-reset-over-http-part-3.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6012780358683396175'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6012780358683396175'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/06/password-reset-over-http-part-3.html' title='Password reset over HTTP -- Part 3'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-6902571639913344665</id><published>2012-01-27T17:36:00.000-05:00</published><updated>2012-01-27T17:36:28.119-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="form"/><category scheme="http://www.blogger.com/atom/ns#" term="hack"/><category scheme="http://www.blogger.com/atom/ns#" term="macosx"/><category scheme="http://www.blogger.com/atom/ns#" term="pdf"/><category scheme="http://www.blogger.com/atom/ns#" term="printing"/><title type='text'>Saving a PDF that doesn&#39;t allow saving form contents</title><content type='html'>&lt;p&gt;Several organisations, consulates, for example, have forms that need to be filled up in PDFs.  They have very smart PDFs that change as you fill out the form and generate nice 2D bar codes at the end with all the information easily scannable when you submit the form.  Most of these forms can be saved after you&#39;ve filled them out, which is important if it&#39;s complex and you need to work on it for a few days, or if you need to put it on a pen drive and take it somewhere else to print.&lt;/p&gt;
&lt;p&gt;
Every now and then however, I&#39;ve come across a PDF that won&#39;t allow you to save the form contents.  This kinda sucks, so I decided to find a work around.
&lt;/p&gt;&lt;p&gt;
I first tried the print to PDF option, however Adobe won&#39;t let you print these particular PDFs to a PDF.
&lt;/p&gt;&lt;p&gt;
I tried Preview, but you can&#39;t fill out the form in Preview.
&lt;/p&gt;&lt;p&gt;
Then I tried actually printing it to a dummy printer.  Note that this is for MacOSX.

&lt;ol&gt;
&lt;li&gt;The first step is to open your printer settings.  If you don&#39;t have a printer create one:

&lt;div class=&quot;thumbnail&quot;&gt;
&lt;a href=&quot;https://skitch.com/bluesmoon/ghtue/print-and-fax&quot;&gt;&lt;img alt=&quot;Print &amp;amp; Fax&quot; src=&quot;https://img.skitch.com/20120127-q7asqxmnd3jac5wmaargj8rnyx.preview.jpg&quot; /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;/li&gt;

&lt;li&gt;Then open the Print Queue and pause it
&lt;div class=&quot;thumbnail&quot;&gt;&lt;a href=&quot;https://skitch.com/bluesmoon/ghtwe/hp-deskjet-f4200-series&quot;&gt;&lt;img src=&quot;https://img.skitch.com/20120127-gtrgr7cxusp1pyxrwhynqudq1u.preview.jpg&quot; alt=&quot;HP Deskjet F4200 series&quot; /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;/li&gt;

&lt;li&gt;Step 3, is to print your document to this printer.&lt;/li&gt;

&lt;li&gt;Now look into &lt;code&gt;/private/var/spool/cups&lt;/code&gt; for a file that was created within the last few minutes.  It should start with &lt;code&gt;d&lt;/code&gt; and have a lot of numbers after it. You&#39;ll need sudo:
&lt;pre&gt;sudo ls -l /private/var/spool/cups&lt;/pre&gt;&lt;/li&gt;

&lt;li&gt;copy this file somewhere convenient, and give it a &lt;code&gt;.ps&lt;/code&gt; extension (it&#39;s a postscript file).&lt;/li&gt;

&lt;li&gt;You can now open this file with Preview.  I recommend opening it and reordering the pages, and then save it as a PDF.&lt;/li&gt;
&lt;/ol&gt;

That&#39;s it.

There&#39;s a way to do it for windows too, but I don&#39;t have a windows box handy to document it.  Linux is trivial.</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/6902571639913344665/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/01/saving-pdf-that-doesnt-allow-saving.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6902571639913344665'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/6902571639913344665'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/01/saving-pdf-that-doesnt-allow-saving.html' title='Saving a PDF that doesn&#39;t allow saving form contents'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7715485.post-8460544245142580307</id><published>2012-01-25T12:21:00.000-05:00</published><updated>2012-01-25T12:22:37.688-05:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="http"/><category scheme="http://www.blogger.com/atom/ns#" term="passwords"/><category scheme="http://www.blogger.com/atom/ns#" term="security"/><category scheme="http://www.blogger.com/atom/ns#" term="ssl"/><title type='text'>Password reset over HTTP -- Part 2</title><content type='html'>&lt;p&gt;
So it looks like I&#39;ve been forgetting a lot of my passwords recently.  After yesterday&#39;s issue with &lt;a href=&quot;http://tech.bluesmoon.info/2012/01/https-for-login-but-http-for-password.html&quot;&gt;delicious submitting passwords in the clear&lt;/a&gt;, today I have a problem with livemocha.com.
&lt;/p&gt;&lt;p&gt;
As before, their login page is properly secured, but the password reset page is over HTTP:
&lt;/p&gt;&lt;p&gt;
This is the password reset page:
&lt;/p&gt;
&lt;div class=&quot;thumbnail&quot;&gt;&lt;a href=&quot;https://skitch.com/bluesmoon/g52th/livemocha-password-reset-over-http&quot;&gt;&lt;img src=&quot;https://img.skitch.com/20120125-g2u5ngmj819qbc6hdg7i9khk96.preview.jpg&quot; alt=&quot;livemocha - password reset over http&quot; /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;p&gt;
And this is the URL the passwords are POSTed to, in clear text:
&lt;/p&gt;
&lt;div class=&quot;thumbnail&quot;&gt;&lt;a href=&quot;https://skitch.com/bluesmoon/g52ug/livemocha-password-reset-submitted-over-http&quot;&gt;&lt;img src=&quot;https://img.skitch.com/20120125-xwubw8hy8484ngbhy3xewq56k.preview.jpg&quot; alt=&quot;livemocha - password reset submitted over http&quot; /&gt;&lt;/a&gt;&lt;/div&gt;
&lt;p&gt;
They also include third party code on their page, in this case it&#39;s a flash object from userplane.com, google analytics, and some JavaScript from pbc.com (alias for paybycash.com)
&lt;/p&gt;&lt;p&gt;
I&#39;ve gotten in touch with them via their online form.  Let&#39;s hope they respond.
&lt;/p&gt;</content><link rel='replies' type='application/atom+xml' href='http://tech.bluesmoon.info/feeds/8460544245142580307/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://tech.bluesmoon.info/2012/01/password-reset-over-http-part-2.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/8460544245142580307'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7715485/posts/default/8460544245142580307'/><link rel='alternate' type='text/html' href='http://tech.bluesmoon.info/2012/01/password-reset-over-http-part-2.html' title='Password reset over HTTP -- Part 2'/><author><name>Philip</name><uri>http://www.blogger.com/profile/18075968083522627991</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgB3VMwgP9Uv9oArFfhcSIDVXc6Fx3KC89gOnDJpNHr04JAy3holcxTlv6x0Hk3mxD7Vi5Bug_k9r1f7Uvr0_hyRjnXmxsh8mF6pcmjke7QFQMMya4VXIIGUKgM8uxAnrM/s1600/bluesmoon.jpg'/></author><thr:total>0</thr:total></entry></feed>