<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Metalinguistic Abstraction</title>
	<atom:link href="http://metalinguist.wordpress.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://metalinguist.wordpress.com</link>
	<description>Computer Languages, Programming, and Free Software</description>
	<lastBuildDate>Thu, 05 Jan 2012 08:00:30 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='metalinguist.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>Metalinguistic Abstraction</title>
		<link>http://metalinguist.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://metalinguist.wordpress.com/osd.xml" title="Metalinguistic Abstraction" />
	<atom:link rel='hub' href='http://metalinguist.wordpress.com/?pushpress=hub'/>
		<item>
		<title>emacs develock customization for Python</title>
		<link>http://metalinguist.wordpress.com/2010/02/14/emacs-develock-customization-for-python/</link>
		<comments>http://metalinguist.wordpress.com/2010/02/14/emacs-develock-customization-for-python/#comments</comments>
		<pubDate>Sun, 14 Feb 2010 17:53:33 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[lisp]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[elisp]]></category>
		<category><![CDATA[emacs]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/?p=46</guid>
		<description><![CDATA[This has been annoying me for some time: develock mode doesn&#8217;t support Python out of the box.  I had a hacked-up develock where I simply changed all references of Ruby (via string-replace) to Python, and things worked pretty well&#8230;but I had to constantly load my hacked up develock. Well, I finally got around to customizing [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=46&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This has been annoying me for some time: develock mode doesn&#8217;t support Python out of the box.  I had a hacked-up develock where I simply changed all references of Ruby (via string-replace) to Python, and things worked pretty well&#8230;but I had to constantly load my hacked up develock.</p>
<p>Well, I finally got around to customizing develock properly post-facto, I think.  Rejoice, fellow whitespace pedants.  Here&#8217;s the snippet:</p>
<blockquote>
<pre>;;
;; develock-py.el
;;
;; Made by Daniel Farina
;; Login   &lt;drfarina@acm.org&gt;
;;
;; Started on  Sun Feb 14 09:21:21 2010 Daniel Farina
;; Last update Sun Feb 14 09:27:12 2010 Daniel Farina
;;

(require 'develock)

(defcustom develock-python-font-lock-keywords
 '(;; a long line
 (develock-find-long-lines
 (1 'develock-long-line-1 t)
 (2 'develock-long-line-2 t))
 ;; long spaces
 (develock-find-tab-or-long-space
 (1 'develock-whitespace-2)
 (2 'develock-whitespace-3 nil t))
 ;; trailing whitespace
 ("[^\t\n ]\\([\t ]+\\)$"
 (1 'develock-whitespace-1 t))
 ;; spaces before tabs
 ("\\( +\\)\\(\t+\\)"
 (1 'develock-whitespace-1 t)
 (2 'develock-whitespace-2 t))
 ;; tab space tab
 ("\\(\t\\) \t"
 (1 'develock-whitespace-2 append))
 ;; only tabs or spaces in the line
 ("^[\t ]+$"
 (0 'develock-whitespace-2 append))
 ;; reachable E-mail addresses
 ("&lt;?[-+.0-9A-Z_a-z]+@[-0-9A-Z_a-z]+\\(\\.[-0-9A-Z_a-z]+\\)+&gt;?"
 (0 'develock-reachable-mail-address t))
 ;; things to be paid attention
 ("\\&lt;\\(?:[Ff][Ii][Xx][Mm][Ee]\\|[Tt][Oo][Dd][Oo]\\)\\(?::\\|\\&gt;\\)"
 (0 'develock-attention t)))
 "Extraordinary level highlighting for the Python mode."
 :type develock-keywords-custom-type
 :set 'develock-keywords-custom-set
 :group 'develock
 :group 'font-lock)

(defvar python-font-lock-keywords-x nil
 "Extraordinary level font-lock keywords for the Python mode.")

(setq develock-keywords-alist
 (cons '(python-mode
 python-font-lock-keywords-x
 develock-python-font-lock-keywords)
 develock-keywords-alist))

(plist-put develock-max-column-plist 'python-mode 79)
</pre>
</blockquote>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/46/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/46/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/46/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/46/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/46/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/46/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/46/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/46/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=46&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2010/02/14/emacs-develock-customization-for-python/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>dirsync: for completing metadata writes durably</title>
		<link>http://metalinguist.wordpress.com/2010/01/26/dirsync-for-completing-metadata-writes-durably/</link>
		<comments>http://metalinguist.wordpress.com/2010/01/26/dirsync-for-completing-metadata-writes-durably/#comments</comments>
		<pubDate>Wed, 27 Jan 2010 05:30:49 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[dbms]]></category>
		<category><![CDATA[storage]]></category>
		<category><![CDATA[systems]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/?p=30</guid>
		<description><![CDATA[The crux of the problem is that the call to rename(2) does not guarantee durability of the changes when rename returns. Using dirsync promises that metadata alterations in a directory are synchronous rather than asynchronous. One may want to read this post in more detail if he/she isn't already aware of dirsync and maintains programs that heavily rely on the atomicity of rename and other metadata operations. This includes all renames, creations, and deletions.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=30&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Today I encountered an obscure file attribute/equivalent mount option (if you are fine with these semantics mount-wide). It is more likely that one would know about this option should he/she be familiar with MTA software and presumably other software with strict data durability guarantees made by a POSIX file system, especially with regard to metadata.</p>
<p>The crux of the problem is that the call to <a title="rename man page" href="http://linux.die.net/man/2/&lt;code&gt;rename&lt;/code&gt;"><code>rename(2)</code></a> does not guarantee durability of the changes when <code>rename</code> returns.  Using <code>dirsync</code> promises that metadata alterations in a directory are synchronous rather than asynchronous.  One may want to read this post in more detail if he/she isn&#8217;t already aware of <code>dirsync</code> and maintains programs that heavily rely on the atomicity of <code>rename</code> and other metadata operations. This includes all renames, creations, and deletions.</p>
<p><code>rename</code> makes <em>atomicity</em> guarantees, which are not to be confused with <em>durability</em> guarantees.  Guarantees include:</p>
<ul>
<li>One will never have two persistent links to the same file, even     if one should suffer a crash during or after a <code>rename</code> operation. (A transient double-existence while the system is still     on is deemed acceptable)</li>
<li>Even if another link is being destroyed by     the <code>rename</code> (i.e. a file exists with the destination     name), there will exist no time where the destination file name     does not exist (as     <strong>either</strong> as the old <strong>or</strong> new     content)</li>
</ul>
<p>I wrote this post because I did not know a-priori what to be looking for when encountering some self-doubt about the robustness of a two disparate systems utilizing two phase commit during crash recovery, of which one half was a file system.  Keywords that came to my mind did not yield useful search results, so I ended walking around the Linux source instead when I came upon <code>dirsync</code>. This use of the search term is sufficiently obscure (it is much more often used as a shorthand for &#8216;directory synchronization&#8217;, e.g. <code>rsync</code>-ish tools) that one must disambiguate it by adding fairly specific keywords, such as &#8216;inode&#8217;.  Hopefully this post will raise awareness about the possible danger faced by most program assuming the atomicity <em>and</em> durability of metadata changes and serve as good search-engine fodder to that effect.</p>
<p>Edit: I need to do some more investigation on how what the tradeoffs are vs. fsync().  I think there&#8217;s mostly a speed benefit to avoiding a heavy fsync() call.  To the best of my knowledge, there is no fsync_metadata_only library function, and dirsync will give you those semantics, albeit using fairly blunt tools.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/30/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/30/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/30/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/30/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/30/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/30/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/30/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/30/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=30&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2010/01/26/dirsync-for-completing-metadata-writes-durably/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>A different way to main()</title>
		<link>http://metalinguist.wordpress.com/2008/02/13/a-different-way-to-main/</link>
		<comments>http://metalinguist.wordpress.com/2008/02/13/a-different-way-to-main/#comments</comments>
		<pubDate>Wed, 13 Feb 2008 09:54:59 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[python]]></category>
		<category><![CDATA[conventions]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/?p=19</guid>
		<description><![CDATA[import sys import getopt class Usage(Exception): def __init__(self, msg): self.msg = msg def main(argv=None): if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt(argv[1:], "h", ["help"]) except getopt.error, msg: raise Usage(msg) # more code, unchanged except Usage, err: print &#62;&#62;sys.stderr, err.msg print &#62;&#62;sys.stderr, "for help use --help" return 2 if __name__ == [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=19&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<blockquote>
<pre>import sys
import getopt

class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg

def main(argv=None):
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts, args = getopt.getopt(argv[1:], "h", ["help"])
        except getopt.error, msg:
             raise Usage(msg)
        # more code, unchanged
    except Usage, err:
        print &gt;&gt;sys.stderr, err.msg
        print &gt;&gt;sys.stderr, "for help use --help"
        return 2

if __name__ == "__main__":
    sys.exit(main())</pre>
</blockquote>
<p>What is this, you wonder? As it turns out, it&#8217;s<span class="as"> <a href="http://www.artima.com/weblogs/viewpost.jsp?thread=4829" title="main()">Guido van Rossum&#8217;s preferred way</a> to enter a Python program. It&#8217;s a sensible departure from the classic variant. Even though this post is from 2003, I am discovering for the first time; perhaps I will adopt this  idiom in some of my programs since it has been blessed by the lord of Python.</span></p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/19/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/19/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/19/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/19/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/19/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/19/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/19/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/19/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/19/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/19/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=19&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2008/02/13/a-different-way-to-main/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>Django file and stream serving performance Gotcha</title>
		<link>http://metalinguist.wordpress.com/2008/02/12/django-file-and-stream-serving-performance-gotcha/</link>
		<comments>http://metalinguist.wordpress.com/2008/02/12/django-file-and-stream-serving-performance-gotcha/#comments</comments>
		<pubDate>Tue, 12 Feb 2008 21:51:42 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[django]]></category>
		<category><![CDATA[projects]]></category>
		<category><![CDATA[python]]></category>
		<category><![CDATA[performance]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/?p=18</guid>
		<description><![CDATA[Recently I&#8217;ve been doing a little bit of work with the Django web framework for Python. Part of this project involves having a bit of reasonable binary file streaming to and from the server. There is currently a patch in trac (#2070) slated for acceptance. So I apply it and try it out and try [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=18&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Recently I&#8217;ve been doing a little bit of work with the Django web framework for Python. Part of this project involves having a bit of reasonable binary file streaming to and from the server.  There is currently a patch in trac (<a title="django streaming patch" href="http://code.djangoproject.com/ticket/2070">#2070</a>) slated for acceptance. So I apply it and try it out and try copying some files in and out through the web server. I have some problems with the particulars of this patch and I intend to amend my complaints, but that&#8217;s for another post. What I discovered was an annoying performance gotcha in simply reading back binary files to be served to the user.</p>
<p>The gotcha is simple to expose:</p>
<p>In a Django view, use the documented functionality of passing a file-like object to the response object from the view; preferably a big, binary one. So you do something like this:</p>
<blockquote>
<pre>return HttpResponse(open('/path/to/big/file.bin'))</pre>
</blockquote>
<p>And then you surf on over to localhost and try grabbing this file. Your hard drive whirs and you notice your CPU usage is at 100% while serving the file slowly. Most people then rationalize it away saying &#8220;well, of course, Python is slow, so it makes sense that it would suck at this. Set up a dedicated static file serving server written in C and use some URL routing incantations.&#8221;</p>
<p>The crucial information that I had to dig for is how Django emits bytes to users. Django calls iter() on the input object and then uses calls to .next() to grab more bytes to write out to the stream. Once you factor in that <em>the default</em> iter() behavior for a open file in Python is to read lines you realize that there&#8217;s just an enormous amount of time and unnecessarily evil buffering going on just to emit chunks of the file separated by (in the case of binary files) completely arbitrarily spaced newline bytes. The result is lots of heap abuse as well as lots of burned CPU time looking for these needles in the haystack.</p>
<p>The hack to address this is very simple: we write a tiny iterator wrapper that simply uses the read(<em>size</em>) call.  It can look something like this:</p>
<blockquote>
<pre>class FileIterWrapper(object):
  def __init__(self, flo, chunk_size = 1024**2):
    self.flo = flo
    self.chunk_size = chunk_size

  def next(self):
    data = self.flo.read(self.chunk_size)
    if data:
      return data
    else:
      raise StopIteration

  def __iter__(self):
    return self</pre>
</blockquote>
<p>1024 ** 2 in bytes is one megabyte in a chunk. When using this iterator the logic is simple and the result is that Python consumes very little CPU time and memory to rip through a file stream. It can be applied to the previous example like so:</p>
<blockquote>
<pre>return HttpResponse(FileIterWrapper(open('/path/to/big/file.bin')))</pre>
</blockquote>
<p>Now everything is fast and happy and running as it should.</p>
<p>So what should Django do about this? It could be just written off as an idiosyncrasy of the framework, but I think that the case is strong that Django should inspect for file-like objects and use more aggressive calls to .read() to prevent such unpredictable behavior. One problem with such large (1MB) read()s is that they may block for too long instead of trickling bytes to the user, so some asynchronous I/O strategy would be better.</p>
<p>There&#8217;s no reason why a small to moderate sized site should get hosed performance-wise because several people are downloading binary files from a Django server via modpython or wsgi.</p>
<p>Finally, proper error handling on disposing the file descriptor in the above examples is an exercise to the reader. I suggest the using the &#8220;with&#8221; statement that can be currently imported from future.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/18/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/18/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/18/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/18/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/18/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/18/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/18/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/18/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/18/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/18/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=18&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2008/02/12/django-file-and-stream-serving-performance-gotcha/feed/</wfw:commentRss>
		<slash:comments>9</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>the woes of &#8220;git gc &#8211;aggressive&#8221; (and how git deltas work)</title>
		<link>http://metalinguist.wordpress.com/2007/12/06/the-woes-of-git-gc-aggressive-and-how-git-deltas-work/</link>
		<comments>http://metalinguist.wordpress.com/2007/12/06/the-woes-of-git-gc-aggressive-and-how-git-deltas-work/#comments</comments>
		<pubDate>Thu, 06 Dec 2007 12:56:34 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[distributed]]></category>
		<category><![CDATA[version-control]]></category>
		<category><![CDATA[email]]></category>
		<category><![CDATA[git]]></category>
		<category><![CDATA[scm]]></category>
		<category><![CDATA[vcs]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/2007/12/06/git-gc-and-the-meek-shall-inherit-the-earth/</guid>
		<description><![CDATA[Today I found a gem in the git mailing lists that discusses a little bit about how git handles deltas in the pack and why — somewhat non-obviously — the aggressive git garbage collect (invoked by doing git gc --aggressive) is (generally) a big no-no. The verbatim email from Linus explaining this is affixed as part of the full text of this article.
<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=17&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Today I found a gem in the git mailing lists that discusses a little bit about how git handles deltas in the pack (i.e. efficiently storing revisions) and why — somewhat non-obviously — the aggressive git garbage collect (invoked by doing <code>git gc --aggressive</code>) is (generally) <b>a big no-no</b>. The verbatim email from Linus explaining this is affixed as part of the full text of this article.</p>
<h3>A quick summary</h3>
<p>Since there is little point in simply reposting this information (other than for personal archival), I will condense it here for quick reading:</p>
<p>Git does not use your standard per-file/per-commit forward and/or backward delta chains to derive files. Instead, it is legal to use <i>any</i> other stored version to derive another version. Contrast this to most version control systems where the only option is simply to compute the delta against the last version. The latter approach is so common probably because of a systematic tendency to couple the deltas to the revision history. In Git the development history is not in any way tied to these deltas (which are arranged to minimize space usage) and the history is instead imposed at a higher level of abstraction.</p>
<p>Now that we have exposed how git has some greater flexibility in choosing what revisions to derive another revision from we get to the problem with <code>--aggressive</code>.</p>
<p>Here&#8217;s  what the git-gc 1.5.3.7 man page has to say about it:</p>
<blockquote>
<pre>
       --aggressive
           Usually git-gc runs very quickly while providing good disk space
           utilization and performance. This option will cause git-gc to more
           aggressively optimize the repository at the expense of taking much
           more time. The effects of this optimization are persistent, so this
           option only needs to be used occasionally; every few hundred
           changesets or so.</pre>
</blockquote>
<p>Unfortunately, this characterization is very misleading. It can be true if one has a horrendous set of delta-derivations (for example: after doing a large <code>git-fast-import</code>), but its true behavior is to throw away all the old deltas and compute new ones from scratch. This may not sound so bad except that <code>--aggressive</code> isn&#8217;t aggressive enough at doing this to do a good job and may throw away better delta decisions made previously. For this reason <code>--aggressive</code> will probably be removed from the manpages and left as an undocumented feature for a while.</p>
<p>So now you ask: &#8220;Well, suppose I <i>do</i> really want to do the expensive thing because I just copied my company&#8217;s history into git and it has an inordinately large pack. How do I do it?&#8221;</p>
<p>Excerpted from Linus&#8217; mail here is a terse recipe (with some explanation) that may take a very long time and require a lot of RAM to run but should deliver results:</p>
<blockquote>
<pre>So the equivalent of "git gc --aggressive" - but done *properly* - is to
do (overnight) something like

	git repack -a -d --depth=250 --window=250

where that depth thing is just about how deep the delta chains can be
(make them longer for old history - it's worth the space overhead), and
the window thing is about how big an object window we want each delta
candidate to scan.

And here, you might well want to add the "-f" flag (which is the "drop all
old deltas", since you now are actually trying to make sure that this one
actually finds good candidates.</pre>
</blockquote>
<h3>Other notes and observations</h3>
<ul>
<li> If you have a development history where you constantly change between several particular versions of, say, a large binary blob — say a resource file of some kind — this operation can be very cheap under Git since it can delta against versions that are not adjacent in the development history.</li>
<li> The delta derivations don&#8217;t have to obey causality: a commit made chronologically later can be used to derive one made earlier. It&#8217;s just a bunch of blobs in a graph, there isn&#8217;t even a strictly necessary notion of time attached to each blob at all to begin with! That data is maintained at a higher level. Repack doesn&#8217;t have to know or care about when a commit was made. (The only reason it may care is to help implement heuristics. Right now no such heuristic exists[0])</li>
<li>Finding/verifying an optimal (space-minimizing) delta-derivation graph feels NP-hard. I now wave my hands furiously.</li>
</ul>
<p>[0]: From the <a href="http://www.kernel.org/pub/software/scm/git/docs/git-repack.html">git-repack man page</a>:</p>
<blockquote>
<pre>
--window=[N], --depth=[N]

    These two options affect how the objects contained in the pack are
    stored using delta compression. The objects are first internally
    sorted by type, size and optionally names and compared against the
    other objects within --window to see if using delta compression
    saves space. --depth limits the maximum delta depth; making it too
    deep affects the performance on the unpacker side, because delta
    data needs to be applied that many times to get to the necessary
    object. The default value for --window is 10 and --depth is 50.</pre>
</blockquote>
<p><span id="more-17"></span></p>
<h3>Linus&#8217; email to the list</h3>
<blockquote>
<pre>Date:	Wed, 5 Dec 2007 22:09:12 -0800 (PST)
From:	Linus Torvalds
Subject: Re: Git and GCC

On Thu, 6 Dec 2007, Daniel Berlin wrote:
&gt;
&gt; Actually, it turns out that git-gc --aggressive does this dumb thing
&gt; to pack files sometimes regardless of whether you converted from an
&gt; SVN repo or not.

Absolutely. git --aggressive is mostly dumb. It's really only useful for
the case of "I know I have a *really* bad pack, and I want to throw away
all the bad packing decisions I have done".

To explain this, it's worth explaining (you are probably aware of it, but
let me go through the basics anyway) how git delta-chains work, and how
they are so different from most other systems.

In other SCM's, a delta-chain is generally fixed. It might be "forwards"
or "backwards", and it might evolve a bit as you work with the repository,
but generally it's a chain of changes to a single file represented as some
kind of single SCM entity. In CVS, it's obviously the *,v file, and a lot
of other systems do rather similar things.

Git also does delta-chains, but it does them a lot more "loosely". There
is no fixed entity. Delta's are generated against any random other version
that git deems to be a good delta candidate (with various fairly
successful heursitics), and there are absolutely no hard grouping rules.

This is generally a very good thing. It's good for various conceptual
reasons (ie git internally never really even needs to care about the whole
revision chain - it doesn't really think in terms of deltas at all), but
it's also great because getting rid of the inflexible delta rules means
that git doesn't have any problems at all with merging two files together,
for example - there simply are no arbitrary *,v "revision files" that have
some hidden meaning.

It also means that the choice of deltas is a much more open-ended
question. If you limit the delta chain to just one file, you really don't
have a lot of choices on what to do about deltas, but in git, it really
can be a totally different issue.

And this is where the really badly named "--aggressive" comes in. While
git generally tries to re-use delta information (because it's a good idea,
and it doesn't waste CPU time re-finding all the good deltas we found
earlier), sometimes you want to say "let's start all over, with a blank
slate, and ignore all the previous delta information, and try to generate
a new set of deltas".

So "--aggressive" is not really about being aggressive, but about wasting
CPU time re-doing a decision we already did earlier!

*Sometimes* that is a good thing. Some import tools in particular could
generate really horribly bad deltas. Anything that uses "git fast-import",
for example, likely doesn't have much of a great delta layout, so it might
be worth saying "I want to start from a clean slate".

But almost always, in other cases, it's actually a really bad thing to do.
It's going to waste CPU time, and especially if you had actually done a
good job at deltaing earlier, the end result isn't going to re-use all
those *good* deltas you already found, so you'll actually end up with a
much worse end result too!

I'll send a patch to Junio to just remove the "git gc --aggressive"
documentation. It can be useful, but it generally is useful only when you
really understand at a very deep level what it's doing, and that
documentation doesn't help you do that.

Generally, doing incremental "git gc" is the right approach, and better
than doing "git gc --aggressive". It's going to re-use old deltas, and
when those old deltas can't be found (the reason for doing incremental GC
in the first place!) it's going to create new ones.

On the other hand, it's definitely true that an "initial import of a long
and involved history" is a point where it can be worth spending a lot of
time finding the *really*good* deltas. Then, every user ever after (as
long as they don't use "git gc --aggressive" to undo it!) will get the
advantage of that one-time event. So especially for big projects with a
long history, it's probably worth doing some extra work, telling the delta
finding code to go wild.

So the equivalent of "git gc --aggressive" - but done *properly* - is to
do (overnight) something like

	git repack -a -d --depth=250 --window=250

where that depth thing is just about how deep the delta chains can be
(make them longer for old history - it's worth the space overhead), and
the window thing is about how big an object window we want each delta
candidate to scan.

And here, you might well want to add the "-f" flag (which is the "drop all
old deltas", since you now are actually trying to make sure that this one
actually finds good candidates.

And then it's going to take forever and a day (ie a "do it overnight"
thing). But the end result is that everybody downstream from that
repository will get much better packs, without having to spend any effort
on it themselves.

			Linus</pre>
</blockquote>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/17/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/17/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/17/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/17/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/17/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/17/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/17/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/17/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/17/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/17/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=17&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2007/12/06/the-woes-of-git-gc-aggressive-and-how-git-deltas-work/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>Overview: GlusterFS &amp; Gluster</title>
		<link>http://metalinguist.wordpress.com/2007/09/22/overview-glusterfs-gluster/</link>
		<comments>http://metalinguist.wordpress.com/2007/09/22/overview-glusterfs-gluster/#comments</comments>
		<pubDate>Sat, 22 Sep 2007 10:45:02 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[infrastructure]]></category>
		<category><![CDATA[storage]]></category>
		<category><![CDATA[fs]]></category>
		<category><![CDATA[fuse]]></category>
		<category><![CDATA[gluster]]></category>
		<category><![CDATA[linux]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/2007/09/22/overview-glusterfs-gluster/</guid>
		<description><![CDATA[Gluster (and its filesystem, GlusterFS) is the only distributed computing + distributed file system project that gives me warm fuzzies inside.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=16&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<h6><span style="color:#808080;">Forgive the writing, I&#8217;ll fix it up later if I get complaints. </span></h6>
<p><a title="Supercomputing and Superstorage" href="http://gluster.org/">Gluster</a> (and its filesystem, GlusterFS) is the only distributed computing + distributed file system project that gives me warm fuzzies inside, and if you check my <a title="Filesystem tag" href="http://del.icio.us/drfarina/filesystem">del.icio.us tags</a>, you&#8217;ll see that I have visited and reviewed quite a few options in this space (many of which I didn&#8217;t bookmark as well). Also reviewed were OCFS(1|2) , GFS(1|2), GFarmFS, Ceph, and CODA.</p>
<p>Why warm fuzzies for Gluster? Because it doesn&#8217;t rebuild the world from scratch and it is relatively simple in configuration and implementation. GlusterFS is implemented as a FUSE file system for GNU/Linux (which incurs some overhead, but greatly speeds up development for the obvious reasons) and relies on the underlying file systems that already have received a lot of attention to detail. It also means that you can mix, match, compose, and migrate easily: since it sits above any normal POSIX block device, you can have your exorbitantly expensive fibre channel next to your cheap software RAID6 SATA array in combination with your medium-priced ATA over Ethernet and rely on GlusterFS to distribute data between them using an underlying file system you already know and love. Some of your block devices may be formatted ext3, others JFS or XFS. It doesn&#8217;t really matter as long as you have basic POSIX capabilities. GlusterFS also supports optional striping and replication, and I have heard a report of easily saturating a full-duplex 10GBit line in both directions from about five machines (granted, each was probably running RAID) while using GlusterFS.</p>
<p>As it is said: complexity is the enemy of dependability. Gluster is the only solution I&#8217;ve seen so far that I as a lone administrator would trust in part because it appeals to my brand of engineering sensibilities. Paramount among them is (to some people counter-intuitively) appreciation for many things that GlusterFS unabashedly <em>doesn&#8217;t</em> do, simplifying the design. An example of this is authentication. If you want to use Gluster with authentication, expose (on a trusted machine that&#8217;s a cluster client) a SMB/NFS server that takes care of user permissions and hooks up to your LDAP server et al. Gluster doesn&#8217;t include any baggage to not trust clients or have fancy quasi-centralized metadata servers, and this I see as a benefit. If someone invents such baggage later on, it will likely be fulfilled as a module (just like the replication module) that I can choose or not choose at-will. Is it as deeply integrated or slick as some of the clustered file systems that require a RDBMS to coordinate? Not really, but the deep and slick scare the bejeezus out of me because they tend to become unwelcome to combination with other techniques, and not in the least because you&#8217;re pulling your hair out trying to keep the trains running on time when you are exercising more of the features. (The industrial-strength clustered file systems are not known for their ease of maintenance)</p>
<p>It is also my belief that this dedication to simplicity will result in a more robust substrate to build more advanced fuzzy features on. I prefer my base functionality in a tool to be more predictable than clever. Clever translates to me as &#8220;often right, but at hard-to-predict times sometimes very, very funkily wrong.&#8221;</p>
<p>Besides the usual supercomputing cluster type applications, I believe Gluster+GlusterFS+(XenSource | LKVM | jails  | etc) would provide an excellent way to protect the underlying infrastructure (by using VM-style abstraction as a heavy handed form of capability-based security) and build a service much like Amazon&#8217;s EC2. Perhaps an experiment for one day&#8230;</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/16/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/16/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/16/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/16/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/16/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=16&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2007/09/22/overview-glusterfs-gluster/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>The Lisp Before the End of My Lifetime</title>
		<link>http://metalinguist.wordpress.com/2007/08/04/the-lisp-before-the-end-of-my-lifetime/</link>
		<comments>http://metalinguist.wordpress.com/2007/08/04/the-lisp-before-the-end-of-my-lifetime/#comments</comments>
		<pubDate>Sat, 04 Aug 2007 11:08:42 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[languages]]></category>
		<category><![CDATA[lisp]]></category>
		<category><![CDATA[projects]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/2007/08/04/the-lisp-before-the-end-of-my-lifetime/</guid>
		<description><![CDATA[The reason this post is titled in such a sober way is because the Lisp I envision is probably many lifetimes of work to accomplish, and as such, I cannot see myself accomplishing everything on my own<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=14&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Many wax poetic on the virtues of Lisp, and I would say for good reason: it was a language and philosophy that was (and is) far ahead of its time in principle and oftentimes in practice. But I have to cede the following: the foundations of Common Lisp are becoming somewhat ancient and there are many places that have more modern roots where I would have it borrow heavily to assist in creating my programming nirvana. In talking with yet another friend from Berkeley (and the author of <a href="http://sudorandom.blogspot.com/" title="sudo random blog">sudo random</a>) we had discussed some of these things and I decided it was worth enumerating some of them and pointing to ongoing work that implements those fragments or something close to it.</p>
<p>The reason this post is titled in such a sober way is because the Lisp I envision is probably many lifetimes of work to accomplish, and as such, I cannot see myself accomplishing everything on my own. Granted, I still have a lot of life ahead of me yet, but that only makes the equation all the more depressing. Implementation could probably span many PhD theses and industrial man-decades. As such, I can only hope that it&#8217;s the Lisp that more or less exists before The End Of My Lifetime. I would be glad to one day say that I contributed in some or large part to any one piece of it. This whole post smacks of the &#8220;sufficiently smart compiler&#8221; daydreaming, so turn away if you must. Alternatively, you can sit back, enjoy, and nit-pick at the details of compiler theory and implementation, some (or many) of which I&#8217;m sure have been overlooked by me.</p>
<p>Finally, this is not by any means a list of things that current implementations do not have, just things that I feel would seem most valuable. Some are not even necessarily technical challenges so much as social and design ones. I view this hypothetical Lisp as not only some new features, but a set of idioms that I more programmers generally agree on. &#8220;The Zen of Python&#8221; is an excellent example of this. There are definitely some lisp-idioms, but they have become somewhat antiquated and are hard to enumerate in some part because of the baroque and aging Common Lisp specification. The hardest idiom to get around is fearlessness and ease of metaprogramming, which in part is great, but also can make standardization difficult socially as it assists in making herding Lisp programmers difficult. Herding lisp programmers is about as tough as herding cats armed with machine guns.</p>
<p>However, I think Lisp&#8217;s guiding intentions have lied in flexibility. Common Lisp, for its time, was the kitchen sink. It still is, in large part, but may benefit from new idioms and a fresh slate, as well as deeper and more integrated compiler support for some of the features mentioned below.</p>
<h4>1. The Compiler is your Friend.</h4>
<p>Leaders in this area: SLIME and its Swank component<br />
Honorable Mentions: DrScheme, IPython</p>
<p>Nowadays modern IDEs seem to do everything up to the semantic analysis step in compilation to give you advanced searching and refactoring capabilities. Oftentimes a lot of compiler work is reimplemented to support the features of the given IDE at hand, and much work is duplicated, sometimes to the point of implementing a whole compiler, as in Eclipse.</p>
<p>SLIME and Swank have a twist on this that I like: Swank is responsible for asking the compiler implementation itself (in my case SBCL) for information on various symbols, their source location, documentation, and so on. It communicates all this information through a socket to a frontend, which comprises the rest of SLIME. In doing so it gains the authoritative answer to queries about the program because the compiler of choice itself is delivering its opinion on the matter, even as it runs.</p>
<p>This allows for an accurate way to track down references that may be created dynamically by asking the figurative question &#8220;What would the compiler do?&#8221;. From this SLIME gains extremely powerful auto-completion facilities that are robust to techniques are either unavailable in other programming cultures or, if used, would defeat the programmer&#8217;s completeness of assessment of the program. Lisp is the only runtime/language I know of where I can eval a string and still be able to access the resulting, say, function definition and documentation strings with full auto-completion and hinting in my editing environment.</p>
<p>Were Lisp more popular, I would bet Swank-compatibility and feature-richness  would be a defining feature for Lisp implementations, and frontends using Swank would be prolific. The socket interface was definitely the way to go here.</p>
<h4>2. Networked &amp; Concurrent Programming</h4>
<p>Leaders in this area: Erlang<br />
Honorable mentions: Termite &amp; Gambit, Rhino, SISC, Parallel Python, Stackless Python, and many others.</p>
<p>Sun Microsystems, despite its beleaguered business, had at least one thing very, very right: &#8220;The network is the computer.&#8221; The ability to talk on multiple computers on a network is increasingly important in our era, and making it convenient can lead to extraordinarily powerful, robust applications. Erlang definitely leads the pack in this area: an industrial strength, reasonably efficient compiler that can do I/O pumping using efficient kernel-assisted event polling as well as automatically distributing computation across multiple processors. It also can support sending of most higher-order objects &#8211; such as closures &#8211; across the network parts of messages, as well as a powerful pattern-matching syntax that allows for relatively easy handling of binary (and other) protocols.</p>
<p>With processors increasing the number of cores and computers continually falling in price the ability to (mostly) correctly use multiple machines and multiple processors on each machine will become a dominant influence for writing programs that require high performance. Erlang has been demonstrated to be excellent at managing network I/O switching and handling, which is not surprising considering that is its main application as a tool. It could, however, stand to improve upon sequential execution performance: let&#8217;s just say I won&#8217;t be rewriting my numeric codes in Erlang just yet, despite the potential for mass distribution of computation. I also miss some of my amenities I&#8217;ve gotten used to in Lisp, but Erlang excels in its area for sure and has many lessons to teach.</p>
<h4>3. First Class Environments</h4>
<p>Leaders in this area: T, MIT-Scheme — mostly academia<br />
Honorable Mentions: Python, Common Lisp</p>
<p>First class environments are the beginning and end of many problems, but I feel that having this facility would be useful for debugging and implementing creative namespacing and many other important features. Opaque environments can sometimes still be handled with mostly reasonable performance, but as far as I know nice, transparent environments — i.e., things that look like property or assoc lists straight out of the SICP — are just an absolute killer for performance and make compiler optimizations nigh near impossible. But that&#8217;s OK&#8230;because there&#8217;s nothing more annoying that shying away from using thunks or currying when these techniques are the most simple and expressive solution because you are afraid that it will become a chore to poke at the environment to debug these anonymous function instances later. By contrast, &#8220;locals()&#8221; in Python, for example, can be a godsend for special tasks and quick debugging, even if it only returns the local (and generally most useful) environment.</p>
<p>First class environments also help in &#8220;fixing&#8221; tricky issues that crop up and are cause for Scheme&#8217;s motivation for hygienic macros, famous for being hellishly picky to get right (Lisp-2 fans always seem to harp on this point, although what I&#8217;m suggesting may be something more like dynamic-lisp-N). I still feel that the quasiquote, despite its sometimes-ugliness, is the right primitive model to follow. And, in fact, since there seem to be hygienic macro packages build on top of the primitive variants, one could get those almost for free. Perhaps hygienic macros could also be idiomatic, I know not.</p>
<p>In conclusion, the goal is to break down some of the final barriers between code and data and allow for some interesting if unorthodox transformations and redefinitions at run-time and compile-time. It&#8217;s also important to have this functionality if one wants to dynamically redistribute computations across machines or perform run-time metaprogramming, which may be a great way to introduce new compiler features that can be toggled on and off.</p>
<h4>4. An External Native-Code Generator</h4>
<p>Leaders in this area: LLVM, JVM<br />
Honorable Mentions: Parrot (if only because of relative vaporwareness), Mono, C&#8211;, Bit-C</p>
<p>More important than the individual merits of any of these specific VMs is that they are maintained separately by Other People™. It is high time to stop re-inventing architecture-specific code generators and local optimizers over and over. With the JVM catching up or passing up language-specific native code generators (it&#8217;s now more or less tied with OCaml on the <a href="http://shootout.alioth.debian.org/gp4/benchmark.php?test=all&amp;lang=java&amp;lang2=ocaml" title="Ocaml vs Java">Alioth compiler shootout</a> with Java and doing well with Scala) and LLVM recently showing <a href="http://lucille.atso-net.jp/blog/?p=294" title="LLVM 20% better than GCC4.2 on a benchmark">on-par and sometimes better performance</a> than vanilla versions of GCC for some C code, I am buoyed with hope that one can generate relatively high-level (or at least architecture-independent-ish) bytecode and still get respectable or even good performance. JIT, ironically enough, may be more well suited to the lispy world than the Java one (although its instrumental in the Java world for sure) considering that it&#8217;s pretty common to go in and rebind definitions in Lisp while a system is running. One might argue that changing declaim/proclaim statements and evaluating code is in fact better than JIT, and I could see there being a case for that, but it just seems that lots of work is being poured into run-time code generators that could be leveraged.</p>
<p>One interesting idea is compiling to <a href="http://www.bitc-lang.org/" title="Bit-C Language">Bit-C</a>, which has support for low-level manipulations and type-verification, yet also is a lisp.</p>
<h4>5. Optionally Exposed Type Inference and Static Typing</h4>
<p>Leaders in this area: Epigram, Qi, Haskell, the ML family<br />
Honorable Mentions: CMUCL and descendant SBCL</p>
<p>Inferred static typing and type inference is all the rage these days, with claims for increased program execution and correctness. And I&#8217;m all for that, and Qi is an excellent example of the ability to do considerable amount with standard Common Lisp facilities. Qi has the extremely sensible goal of remaining in Common Lisp, and thus ensuring that it has measurable chance of having traction in my lifetime.</p>
<p>Although I&#8217;m not sure that the interface to typing I would expose is necessarily (or necessarily not) Qi&#8217;s, but I do want my compiler to tell me what it thinks about various tokens littered throughout my code, allowing my editor to do things like red-flagging unsafe operations and type disagreements or have a mode to show expensive dynamic operations or inferred types when I&#8217;m seeking optimization. Ultimately, not all of my code will fit neatly into the pure-functional paradigm and may be better served by the occasional side-effect or global state, and I would like type rigor to extend as far as possible, but not become a burden. Sometimes I just want a heterogeneous hash table of elements without any baggage. I think it makes sense to rigorously type nuggets of code, but the Lisp in question should not be fascist about maintaining &#8216;perfect&#8217; consistency throughout an entire program. Epigram and Qi have this model exactly right: pay as you go. Flexibilty when you need it, but not to the point where it is fascist. In the future, it&#8217;d be nice to see some efficiency benefits from compiler-awareness of carefully statically typed nuggets of code that otherwise would not be possible, such as eliminating some bounds checking.</p>
<p>Finally, CMUCL and SBCL already do quite a bit of type-inference, it&#8217;s just not exposed to the user so nicely in SLIME except through warnings blown out of stdout. Even then, they can be very useful. Ideally I could simply ask SLIME to access the type of a given symbol and (CMU|SB)CL could tell me what it thinks.</p>
<h4>6. Pattern Matching</h4>
<p>Leaders in this area: Many. MLs, Haskell, Erlang, lisp macros for Scheme and CL.</p>
<p>This is an amenity that should become standard part of the lisper&#8217;s idiom for convenience if nothing else. It&#8217;s just that there are a number of pattern matchers and none of them that I am aware of has become the idiomatic one.</p>
<h4>7. Continuations and Dynamic-Wind</h4>
<p>Leaders in this area: Scheme, almost exclusively, and an implementation: STALIN</p>
<p>Scheme is probably the canonical continuation and dynamic-wind implementation. Implementation is subtle and performance impacts can be significant, but give pleasant generality to schemers when designing new control constructs. Combined with first-class environments one could do quite a few interesting things, such as save the entire program state as an environment-continuation pair. Unfortunately, implementation is incredibly painful. Yet, it has been done, and with a pay-as-you-go model it may not need to hurt most code&#8217;s performance very much (few people ought to be writing code riddled with continuations). See the STALIN compiler, which does all sorts of rather insane things, along with the insane compilation time. It&#8217;s mostly intended for numerical codes, though.</p>
<h4>8. Pretty and easy (but optional) Laziness</h4>
<p>Leaders in this area: Haskell, Python, Ruby, Common Lisp Iterate package, many others<br />
Honorable Mentions: Anything with closures, Screamer. Scheme for call/cc allowing even more strange general flow control.</p>
<p>I like Python&#8217;s yield operator that transforms a normal function into a lazy one that is expressed as an iterator. In particular, I like to avoid, when possible, specifying representation formats for a sequence or set of things when they aren&#8217;t strictly necessary. With continuations one can get very nice looking implementations of generator-type functions although this may have an undesirable performance impact. As such, most languages that have laziness or generators implement special restricted-case behavior to get good performance, and that should probably sit pretty high up on the optimization list for this Lisp.</p>
<h4>9. Convenient and Pervasive Tail Recursion</h4>
<p>Leaders in this area: Erlang, Scheme<br />
Honorable mentions: anything with tail recursion elimination and optional arguments</p>
<p>I like using tail recursion to express loops, as I find them more flexible, easier to debug, and understandable than loops and mutation. Combined with pattern matching it&#8217;s fiendishly convenient at times and can in some circumstances greatly assist a compiler if assignments go unused. I don&#8217;t meant to say this should be the only means, but I would like to see it be idiomatic and terse to write. The Scheme named-let and Erlang&#8217;s pattern matching both assist this process. One of the main priorities that is key is making it easy to hide the extra arguments often required in a tail-recursive function to hold state from the outside world, and Scheme&#8217;s named-let, I think, handles this rather beautifully for common cases.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/14/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/14/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/14/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/14/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/14/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/14/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/14/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/14/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/14/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/14/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=14&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2007/08/04/the-lisp-before-the-end-of-my-lifetime/feed/</wfw:commentRss>
		<slash:comments>14</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>Monads for Schemers/Lispers</title>
		<link>http://metalinguist.wordpress.com/2007/07/21/monads-for-schemerslispers/</link>
		<comments>http://metalinguist.wordpress.com/2007/07/21/monads-for-schemerslispers/#comments</comments>
		<pubDate>Sat, 21 Jul 2007 13:37:34 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[lisp]]></category>
		<category><![CDATA[mathematics]]></category>
		<category><![CDATA[theory]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/2007/07/21/monads-for-schemerslispers/</guid>
		<description><![CDATA[I would still like to expose monads in a way that Schemers and Lispers would relate to them and perhaps grok the potential usefulness of the concept, which at times may seem to be a strange dodge performed by those "pure" functional people. I am forgoing some of the background theory I learned in exchange for trying to highlight some of the key ideas as applied to writing programs by example and narrative.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=13&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I was originally writing a much more ambitious post that tried to introduce category theory and its uses, but have been having a hard time writing it. Unfortunately it is not something that I think can be easily explained tersely, although my attempts to do so have lead me to learn a lot more about category theory than I thought I&#8217;d ever want to know. Yet, I would still like to expose monads in a way that Schemers and Lispers would relate to them and perhaps grok the potential usefulness of the concept, which at times may seem to be a strange dodge performed by those &#8220;pure&#8221; functional people. I am forgoing some of the background theory I learned in exchange for trying to highlight some of the key ideas as applied to writing programs by example and narrative.</p>
<p>The only obvious prerequisite knowledge here is that of higher order functions and using functions as data. These concepts are well-covered and taught by a variety of computer science texts, such as the venerable <a href="http://mitpress.mit.edu/sicp/" title="Structure and Interpretation of Computer Programs">SICP</a>, webcast university lectures, and many other mediums. I assert that this is an important prerequisite because monads<em> </em>provide you is a rigorous and structured abstraction for dealing with data transformations and the application of higher order functions.</p>
<h3>Maybe</h3>
<p>Consider the &#8220;Maybe&#8221; monad that is often used as a primer.  We&#8217;re going to offer a quick rough translation into Scheme.  The semantics we&#8217;re interested in of the Maybe monad are as follows:</p>
<ul>
<li>Any function should be able to be applied to the monad
<ul>
<li>Not necessarily successfully such as in the case of a type error, although with a little more work we could get these semantics as well via dynamic-wind or unwind-protect.</li>
</ul>
</li>
<li>The monad will prevent us from operating on the value NIL, and instead simply ignore computations that would attempt to operate on NIL.
<ul>
<li>This is similar to how computer architecture will propagate NaNs in computations.</li>
</ul>
</li>
</ul>
<p>We only need three tools to get this behavior:</p>
<ol>
<li>A procedure to construct the monad from some object in the category of Scheme values</li>
<li>A procedure to transform functions that would normally operate on those values into a function that will operate on the monad value</li>
<li>A procedure to merge double-wrapped &#8220;Maybe&#8221; monads to normalize them</li>
</ol>
<p>Without further ado, here are some function definitions:</p>
<blockquote></blockquote>
<pre>(define (make-maybe value)
  value)

(define (map-function-to-maybe fn)
  (lambda (maybe-object)
    (if (null? maybe-object)
        '()
        (make-maybe (fn maybe-object)))))

(define (join-maybe maybe-object)
  maybe-object)</pre>
<pre></pre>
<p>This may seem incredibly pointless, but consider this motivating example:</p>
<blockquote></blockquote>
<pre>&gt; (define maybe-cdr (map-function-to-maybe cdr))
&gt; (maybe-cdr (make-maybe '(1 2)))
(2)
&gt; (maybe-cdr (maybe-cdr (make-maybe '(1 2))))
()
&gt; (maybe-cdr (maybe-cdr (maybe-cdr (maybe-cdr (make-maybe '(1 2))))))
()</pre>
<blockquote></blockquote>
<p>As you can see you would have normally had to have check for NIL values to prevent crashing in the last expression, but we now have a generic way to instrument functions with checks. The result is a function that accepts and returns  the monad&#8217;s type. This is the essence of Functors (of which monads are a special type of) to take home: Functors are morphisms between categories, categories themselves  contain objects and morphisms, and in order to a be a functor we must be able to transform both the objects in a category and the morphisms between them. There are few additional properties and rules to be considered a functor or monad that can be useful in proving correctness or getting certain guaranteed behavior, but the main take-away idea is writing a uniform &#8220;layer&#8221; that will map both functions and the data they operate on to another &#8220;type&#8221; with different semantics. Monads also have to obey rules for compositions of &#8220;join&#8221; and &#8220;map&#8221; to maintain invariants, but it is sometimes okay to bend these rules: there are many not-quite-monad functors that can prove very useful. Consider the monad and the functor as guiding metaphors.</p>
<p>Moving on: Why do we bother at all with make-maybe and join-maybe? It so happens that the underlying representation is simple in this case (since we only need to tweak functions) and thus the construction and join operators are trivial. This would not be the case in say, Haskell, where the typing system will prevent happy accidents like this one; (make-maybe (make-maybe &#8216;(7))) would actually have a double-wrapped Maybe-type and &#8220;join&#8221; would have to do some thinking on how to sensibly put the nested Maybes together. (This is also called &#8220;multiplication&#8221; in category theory parlance)</p>
<p>Also notice there is no way to &#8220;get out of&#8221; the monad. Getting &#8220;out of&#8221; the monad type is an additional functionality that goes above and beyond what it means to be a monad; it would be a monad with an additional transformation to whatever is seen to be fit. However, once again, dynamic typing provides us a happy accident in this particular case and we can actually use this particular monad&#8217;s representation directly in our programs. Generally this is not the case and probably should not be encouraged: one should think carefully before breaking into a monad to deal with its representation ad-hoc, otherwise we lose some benefits of containment.</p>
<h3>List, the monad you&#8217;ve used before</h3>
<p>Let&#8217;s just rephrase &#8220;list&#8221; as a monad here:</p>
<blockquote></blockquote>
<pre>(define (make-list value)
  (list value))

(define (map-function-to-list fn)
  (lambda (list-object)
    ;; "Map" has the proper return type "for free"
    (map fn list-object)))

(define (join-list value)
  (apply append value))</pre>
<blockquote></blockquote>
<p>We have now defined a monad. Again, it may seem stupidly trivial, but consider what it has in common with &#8220;Maybe&#8221;:</p>
<ul>
<li>We can accept anything and create a list monad instance</li>
<li>We have some transformation that modifies a function to operate on list-monads instances.</li>
<li>We can get rid of monad layers
<ul>
<li>Notice that this definition maintains composition, a useful property of monads. This means you can perform a map of a join and then another join on the result or simply perform two joins to get the same value.</li>
</ul>
<ul>
<li>This presumes the lists are well-formed, i.e. have no non-lists as elements when join is called. Otherwise we rightly should have a type error because you are applying &#8220;join&#8221; to a non-list, although you could imagine writing a more forgiving implementation with Maybe-like qualities, or force all internal values to be singleton lists rather than scalars. Such is the flexibility of thinking in terms of this abstraction.</li>
</ul>
</li>
<li>We have no way of getting anything out of the monad, and this time it&#8217;s more visible: we can never go from &#8216;(1) -&gt; 1, unless we provide another transformation, such as &#8220;car&#8221;, which would have the type &#8220;List-Monad -&gt; *&#8221;.</li>
</ul>
<h3>Something less contrived, a &#8220;Watchful&#8221; monad</h3>
<p>The final example I will be presenting here is not-quite useful, but pretty close. It is not complicated, but it is non-trivial. We seek to accomplish the simple task of letting us hook into the application of a function to a value, useful in debugging or notifying other components of a system of changes. This is often a task given over to mutation because it&#8217;s a pain to pass around this state all the time. Here is an attempt to try to give an alternative to mutating global state while allowing arbitrary procedures to be notified of when a value is being operated on by any function that has been transformed by our &#8220;map-function-to-watchful&#8221; procedure. I implement some simple functionality using this monad to attach arbitrary functions (called &#8220;snoopers&#8221;) that are allowed to look at the value being passed to a function being applied to the monad and their own previous state.</p>
<p>In this section I will first show how one can use define and use the snoopers, how using them appears in an interpreter, and then finally the monad implementation itself.</p>
<h4>Using the Watchful monad</h4>
<p>Here is how one would define some &#8220;snoopers&#8221; to watch a value, along with defining a watched-value instantiation for the number zero.</p>
<pre>;; Some watchers
(define (modification-watcher state thing)
  ;;Counts the number of times the value has functions applied to it.
  ;;Notice that we do not use "thing," but we must accept it to have the
  ;;proper number of arguments
  (if (null? state)
      1
      (+ state 1)))

(define (previous-values-watcher state thing)
  ;; Retains previous values in the stat
  (cons thing state))

;;Using watchers
;;Add both watchers, notice how each add-watcher call returns a monad in turn.
(define zero-being-watched (add-watcher previous-values-watcher
                                        (add-watcher modification-watcher
                                                     (make-watchful 0))))

;; Let's transform a function...say an increment function
(define watcher-incr (map-function-to-watchful
                      (lambda (x) (+ x 1))))</pre>
<h4>Showing use of the watched-value and the watch-increment function in the interpreter</h4>
<p>Here&#8217;s an example interaction:</p>
<blockquote></blockquote>
<pre>&gt; zero-being-watched
(((#&lt;procedure:previous-values-watcher&gt; ()) (#&lt;procedure:modification-watcher&gt; ())) . 0) 0)</pre>
<pre>
&gt; (watcher-incr zero-being-watched)
(((#&lt;procedure:previous-values-watcher&gt; (0)) (#&lt;procedure:modification-watcher&gt; 1)) . 1)</pre>
<pre>
&gt; (watcher-incr (watcher-incr zero-being-watched))
(((#&lt;procedure:previous-values-watcher&gt; (1 0)) (#&lt;procedure:modification-watcher&gt; 2)) . 2)</pre>
<pre>
&gt; (watcher-incr (watcher-incr (watcher-incr zero-being-watched)))
(((#&lt;procedure:previous-values-watcher&gt; (2 1 0)) (#&lt;procedure:modification-watcher&gt; 3)) . 3)</pre>
<blockquote></blockquote>
<p>As you may be able to tell from the above, the internal representation format of the monad is an association list paired with the watched value. That association list contains procedures and state that they are allowed to store things in (you could do an even cleaner job with continuations, but then they wouldn&#8217;t print as nicely), so right now all state-saving has to be done explicitly.</p>
<p>If this example were to be used for anything serious one would have to fix up a couple of things, but it&#8217;s pretty functional as-is (And purely functional in the other meaning of the word). The following definition is not as long as it looks; there are lots of comments and some stuff (like filter and the merge-and-remove-duplicates for &#8220;join&#8221;) that eat up lines without being intrinsically related to monads. There is no subtle twist; it&#8217;s exactly the same as the previous two examples (make, map, and join) with an additional function (add-watcher) that operates on the monad directly to add snoopers. Notice that functions passed through map-function-to-watchful are instrumented to call all the snoopers and construct a new list full of the procedure reference and the new state each snooper returns for its next invocation.</p>
<p>One final thing deserving explanation is that my add-watcher reuses the join operator: basically, I construct a new monad with the input monad as the wrapped value, then run join. This is so that I can get the duplication elimination and state list construction for free. It certainly makes the definition of add-watcher short. (Despite its relatively huge commenting)</p>
<h4>The watchful monad implementation</h4>
<blockquote></blockquote>
<pre>;;Watchful Monad
(define (make-watchful value)
  ;; the car of the monad is a list of snoopers, the cdr is the value being watched
  ;; Each snooper is in the form (function state), where state is returned
  ;; by the snooper after every invocation so it can store things as it chooses.
  ;; The snooper function should accept two args: state and data, so that they
  ;; can have some memory and report on the data being operated on by the
  ;; watched procedure.
  (cons '() value))

(define (map-function-to-watchful fn)
  (lambda (watchful-object)
    (let* ((snoopers (car watchful-object))
           (arg-data (cdr watchful-object)))
      ;; Compute new snooper state, now they all know what "fn" is operating on.
      (cons (map (lambda (snooper)
                   (let ((snooper-fn (car snooper))
                         (snooper-state (cadr snooper)))
                     (list snooper-fn (snooper-fn snooper-state arg-data))))
                 snoopers)
            (fn arg-data)))))

(define (join-watchful value)
  (letrec ((inner-snoopers (car (cdr value)))
           (outer-snoopers (car value))
           (inner-value (cddr value))
           ;; The standard "filter" we all know and love, but not included in
           ;; R5RS
           (filter (lambda (predicate seq)
                     (if (null? seq)
                         '()
                         (if (predicate (car seq))
                             (cons (car seq) (filter predicate (cdr seq)))
                             (filter predicate (cdr seq)))))))
    ;; Merge the outer-snoopers and inner-snoopers, outer-snoopers win in event
    ;; of a collision, which in this case means the same procedure with the same
    ;; state.
    (cons (append outer-snoopers (filter (lambda (inner-snooper)
                                           (not (member inner-snooper outer-snoopers)))
                                         inner-snoopers))
          inner-value)))

;; functions that operate on the watcher-monad directly (e.g. Watcher-Monad -&gt; Watcher-Monad)
(define (add-watcher watcher-fn watcher-object)
  ;;watcher-fn must accept two args: state and the value in the monad when a
  ;;function is called on that value and return a new state. We abuse the
  ;;join-watchful function I wrote by first encapsulating the watcher-object
  ;;as a value inside another watcher-object and then employing "join-watchful"

  (join-watchful
   ;; We make a singleton assoc-list with watcher-fn with state NIL to start.
   ;; We know our monad internally is just a cons pair, for brevity break the
   ;; abstraction here...
   (cons `((,watcher-fn ()))
         watcher-object)))</pre>
<blockquote></blockquote>
<h3>Conclusion</h3>
<p>Hopefully these examples shed more light on monads and their uses for those who come from the Lisp-ish backgrounds, or dynamic typing backgrounds in general. I often found the Haskell-annotated versions caught up in typing which sometimes made it more difficult than absolutely necessary to explain the generalities the idea and motivation for monads with languages where mutation is more convenient and typing is more relaxed. As such, I wrote this for the programmer already adept in using higher order functions and thinking of functions as data yet unsure of what the fuss is about monads and how they can be useful.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/13/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/13/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/13/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/13/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/13/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/13/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/13/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/13/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/13/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/13/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=13&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2007/07/21/monads-for-schemerslispers/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>Large Binary Data is (not) a Weakness of Erlang</title>
		<link>http://metalinguist.wordpress.com/2007/07/09/largbe-binary-data-is-a-weakness-of-erlang/</link>
		<comments>http://metalinguist.wordpress.com/2007/07/09/largbe-binary-data-is-a-weakness-of-erlang/#comments</comments>
		<pubDate>Mon, 09 Jul 2007 11:24:58 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[erlang]]></category>
		<category><![CDATA[languages]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/2007/07/09/largbe-binary-data-is-a-weakness-of-erlang/</guid>
		<description><![CDATA[Update: Good news in the comments, with some good details. To sum up:

split_binary/2 is the call to use. Binaries are also refcounted instead of garbage collected. The next release will fix large binary pattern matching due to improper handling of bignums in the current release. So the internet wins again and the original blog author should be happy.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=10&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Update: Good news in the comments, with some good details. To sum up:</p>
<p>split_binary/2 is the call to use. Binaries are also refcounted instead of garbage collected. The next release will fix large binary pattern matching due to improper handling of bignums in the current release. So the internet wins again and the original <a href="http://chlorophil.blogspot.com/2007/07/erlang-and-very-large-binary.html" title="Original article details adventurs with bad-matching-ness">blog author</a> should be happy.</p>
<blockquote><p> Finally — even if we get all this bookkeeping right — we have to deal with the possibility of obscene wastage. Suppose someone loads five hundred megabytes of binary data into memory, takes a five hundred byte slice, and discards the old instance. We can&#8217;t deallocate just parts of an array with standard [mc]alloc(), so we have to make a decision on how many bytes of wastage is worth not copying the salient part of the array to a fresh, appropriately sized heap allocation. Sometimes it may be clear, but with lots of small-ish binary instances wasting <em>some</em> memory it may be less clear. Or I guess we could just call realloc() every time, which may be a little bit overkill but would be simple-ish and predictable&#8230;</p></blockquote>
<p>One thing that does remain, however, is shrinking the region allocated for the binary memory. There is a danger of keeping big chunks of binary around when one has taken a tiny slice, so either some GC work needs to be put in place to realloc() properly or the user just needs to be cognizant and copy the binary if they feel that it may release a large chunk of memory and then let all previous references die.</p>
<p>Venturing onward on this posting should be reserved for archivists and the curious.</p>
<h4><span id="more-10"></span>The following should only be read by the curious. I&#8217;m just keeping for&#8230;posterity?</h4>
<p>Erlang&#8217;s binary manipulation procedures are <a href="http://chlorophil.blogspot.com/search/label/erlang" title="http://chlorophil.blogspot.com/search/label/erlang">definitely not suited for large-scale bit-management</a>, especially considering how a seemingly deficient pattern matching implementation is the primary means of selecting slices of a binary. Jumping 2**27 &#8211; 1 bytes at a time just seems so wrong. I poked around a while to find an Erlang binary++ third-party package or some sort of built-in that would ease my disquiet, but alas, I found nothing. This is most saddening, because my naive instinct is that the act of taking a slice of a binary chunk should be practically free with the no-mutate contract with Erlang. It may, however, require addressing of the following issues:</p>
<p>Obligatory Disclaimer: The following is based on my general understanding of the world and not the fine-level details of Erlang&#8217;s implementation in these areas. I would certainly invite anyone intimately familiar with Erlang&#8217;s internals to share corrections or insight.</p>
<ul>
<li>In the naive-implementation one could allocate a new Erlang binary with a pointer into somewhere in the middle of another Erlang binary. This is happy because of no-mutate until someone goes and rudely disposes the underlying binary as garbage or moves it (presuming Erlang has a compacting/moving garbage collector).</li>
<li>We could solve some of this with another level of indirection (if not present already) that decouples the Erlang binary instance from the binary data itself and decorating the data with a refcount or somesuch. Hopefully the garbage collector will have a finalizer-type hook that will allow the refcount to be updated properly when one of our referrers gets destroyed. Obviously for this kind of thing to work we want to keep our binary data more or less outside the garbage collector&#8217;s knowledge by using a fundamental system call ala [mc]alloc(), unless we want to get a whole lot more complicated and let it get compacted/moved and be capable of updating all the referrers&#8217; pointers.</li>
<li>Finally — even if we get all this bookkeeping right — we have to deal with the possibility of obscene wastage. Suppose someone loads five hundred megabytes of binary data into memory, takes a five hundred byte slice, and discards the old instance. We can&#8217;t deallocate just parts of an array with standard [mc]alloc(), so we have to make a decision on how many bytes of wastage is worth not copying the salient part of the array to a fresh, appropriately sized heap allocation. Sometimes it may be clear, but with lots of small-ish binary instances wasting <em>some</em> memory it may be less clear. Or I guess we could just call realloc() every time, which may be a little bit overkill but would be simple-ish and predictable&#8230;</li>
<li>Where should one expose this? Should the pattern matcher be &#8216;fixed&#8217;? Should it just be some functions in a module like &#8220;binary&#8221; (much as we have &#8220;lists&#8221; and &#8220;dict&#8221; and so on)? Should we write the former and then write some parse_transform statements to mangle it into the latter? (That sounds possible, but probably painful&#8230;)</li>
</ul>
<p>In other words, a lot more work than one may think. Unless we just copy stuff and not fix the binary pattern matching, and then it&#8217;s all relatively easy. I think. But then we don&#8217;t get O(1) slicing!</p>
<p>In this case, I&#8217;d probably vote for the Worse-is-Better approach: copy data to get O(N)-speed copies to avoid trying to mimic copy-on-write while fighting with the garbage collector and just write additional functions instead of poking at the pattern matcher. Then throw that copy away and write a better one that gets O(1) slicing. Then fix the pattern matcher.</p>
<p align="justify">On the subject of largish-files from disk: where is my mmap functionality? Does that exist? Another question for another post&#8230;my cursory web search insofar is not fruitful, so either no one talks about it because it&#8217;s so nicely abstracted into Erlang somehow or because it&#8217;s not being employed.</p>
<p>Addendum:</p>
<p>Getting momentum may be as simple as writing a low-level Port Driver that will offer a somewhat painful (but functional) way to address these issues. If enough people care it&#8217;ll be worth getting into mainstream Erlang. (And making sure it runs on all those platforms et al)</p>
<p>Actually, one might want to think of this as a Functor/Monad, although performing the advanced category mappings in C might be a truly painful task, depending on what one wants to support.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/10/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/10/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/10/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/10/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/10/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/10/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/10/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/10/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/10/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/10/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=10&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2007/07/09/largbe-binary-data-is-a-weakness-of-erlang/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
		<item>
		<title>A Case for Erlang</title>
		<link>http://metalinguist.wordpress.com/2007/07/04/case-for-erlang/</link>
		<comments>http://metalinguist.wordpress.com/2007/07/04/case-for-erlang/#comments</comments>
		<pubDate>Wed, 04 Jul 2007 12:33:30 +0000</pubDate>
		<dc:creator>fdr</dc:creator>
				<category><![CDATA[erlang]]></category>
		<category><![CDATA[languages]]></category>

		<guid isPermaLink="false">http://metalinguist.wordpress.com/2007/07/04/case-for-erlang/</guid>
		<description><![CDATA[[...]Erlang attacks a lot of these problems on many fronts including in its implementation, syntax, and semantics. Yet, people seem to be unfazed by the idea of re-inventing Erlang's wheels when it comes to Erlang's choice application domain, and I suspect a large part of the reason for this is that most people who are vaguely aware of Erlang and its reputation don't know what wheels have already been invented in Erlang. Included in those are some wheels that they probably haven't thought of yet when starting out and could use to assist implementation, others are wheels (some quite elaborate) that they'd be forced to implement, test, and maintain on their own otherwise or suffer from something painful.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=9&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Brief prelude on how I got to this topic for flavor and context: earlier this week I was having discussion from an old friend from Berkeley about methodologies in scaling, set off by a discussion rooted in a set of <a href="http://www.slideshare.net/Arbow/comparing-cpp-and-erlang-for-motorola-telecoms-software/" title="Motorola slides">Motorola slides</a>[0] comparing an Erlang, C plus Erlang, and C(++) telecom equipment code that I had forwarded to him. He was aware of Erlang and its general properties since I had been talking about Erlang some time back when I had been playing around with it as a way to coordinate and cluster nodes for <a href="http://www.cs.utexas.edu/users/mfkb/RKF/km.html" title="KM Homepage">KM</a>[1].</p>
<p>He then remarked that he was working on some stuff that needed to be parallelized and support high concurrency and referred me to the <a href="http://www.eecs.harvard.edu/~mdw/proj/seda/" title="SEDA's home page">SEDA research project</a> as a guiding influence on his budding architecture. SEDA emphasized using event queues to send messages between parallel parts of the architecture with feedback loops for control and updates. I took a look at this and felt there were a few problems:</p>
<ol>
<li>SEDA is a mothballed research project, so there&#8217;s no up to date code</li>
<li>No project I know of maintains a well proven, high quality implementation to abstract a comfortable amount of the mundane details away from me.</li>
<li>Sometimes the model you are working with calls for a thread and <em>not</em> events.<em> </em></li>
</ol>
<p>Qualms one and two are strictly at practical and not at a ideological level. SEDA has some high level ideas that I have no strong crystallized negative reaction to and are probably good reading&#8230;but at a nuts and bolts level I am left unenthusiastic by the general prospect of not having powerful abstractions readily available to achieve these aims.</p>
<p>Qualm three is much more philosophical and meaty. I have seen <a href="http://capriccio.cs.berkeley.edu/pubs/threads-hotos-2003.pdf" title="Why events are a bad idea">a paper</a> that pretty much sums up some of my feelings on the matter: sometimes, you don&#8217;t want an event-driven abstraction. The paper even mentions SEDA by name as an example of an event-oriented tool used by the authors to set up high-concurrency servers. Despite the fact that SEDA <em>tried</em> to make this easy (or easier) the authors felt that sometimes a thread was really what one wanted and the event-driven model was just not as clear or easy to write as the thread-oriented one. Not being as clear or easy to write means more bugs. However, not all is lost: the paper concludes that there&#8217;s<em> no fundamental reason why events should be the only way to achieve high concurrency.</em> There is some passing mention of Erlang, but nothing substantial. But what have we gained here?  Validation of <em>threads? </em>Aren&#8217;t threads the <a href="http://www.eecs.berkeley.edu/Pubs/TechRpts/2006/EECS-2006-1.html" title="The Problem with Threads">road to madness</a>? We can probably do better than just threads and synchronization constructs which themselves pose a substantial risk to program reliability.</p>
<p>With this background information it&#8217;s easy to imagine a relatively annoying scenario with a {C, Cpp, C#, Java, Python, Ruby, Lisp, Haskell, damn-near-anything} program: What happens when you write part of your system in a threaded manner (because it was natural feeling, and that&#8217;s not a necessarily a dirty instinct, as supported by the paper in qualm three) but then need to extract this threaded functionality because it needs to handle more concurrency or be made network-accessible to work over multiple machines? Generally you get to rewrite a lot of code to fit into the SEDA diagram, including re-writing threaded code to be event-driven and network-accessible, which also means having to take care of the network and protocol issues. Don&#8217;t forget to having to update your old code to use the event-driven version, a painful affair if you used synchronization constructs. Your only alternative to these rewrites is to defensively program <em>everything</em> with the intention of being event-driven which will only waste a lot of time and make your program <em>less</em> efficient unless you provide even more code to do shared-memory interactions as a secondary mode; otherwise, you will be stuck doing lots of serialization/serialization to interact with stuff on the same machine. Let&#8217;s not even mention that code that could have been handled more gracefully with context switches rather than event handling will end up being maddening to write and more opaque than one feels it should be. Welcome to <a href="http://en.wikipedia.org/wiki/Tartarus" title="Wikipedia page on Tartarus, Grecian Hell">Tartarus</a>, enjoy your stay.</p>
<p>So now we finally talk about Erlang. Erlang attacks a lot of these problems on many fronts including in its implementation, syntax, and semantics. Yet, people seem to be unfazed by the idea of re-inventing Erlang&#8217;s wheels when it comes to Erlang&#8217;s choice application domain, and I suspect a large part of the reason for this is that most people who are vaguely aware of Erlang and its reputation don&#8217;t know what wheels have already been invented in Erlang. Included in those are some wheels that they probably haven&#8217;t thought of yet when starting out and could use to assist implementation, others are wheels (some quite elaborate) that they&#8217;d be forced to implement, test, and maintain on their own otherwise or suffer from something painful.</p>
<p>Here is a list of some of the more important things that came to my mind that you get &#8220;for free&#8221; for using Erlang:</p>
<ul>
<li>A generally expressive syntax that reduces the amount of code from somewhere between one tenth and one forth of the roughly equivalent code in C/C++[3]. Error density was seen to be about the same, so that also means about a fourth to a tenth of the number of bugs.</li>
<li>A virtual machine that itself supports kernel event polling (at least under Linux) to allow you to easily handle tens of thousands of persistent connections (such as TCP streams) modeled with a simple one-context-per-connection abstraction[4]. This is not the default and can be enabled with &#8220;+K true&#8221; when starting the &#8220;erl&#8221; interpreter.</li>
<li>The overhead of a process is <a href="http://www.erlang.org/doc/efficiency_guide/processes.html" title="Erlng Efficiency Document">318 machine words.</a></li>
<li>A virtual machine that can efficiently automatically handle SMP (at least under Linux) and distribute processes between nodes</li>
<li>Semantically simple process-oriented concurrency (which avoids a lot of bugs seen with shared-state threads) with high-speed message passing and process scheduling (how else could it handle 80,000 processes?), thanks in large part to no-mutate language semantics</li>
<li>Extensive heap sharing between processes to avoid message copying, once again from no-mutate language semantics. (used the &#8220;-shared&#8221; switch)
<ul>
<li>This is not the default behavior: otherwise, per-process heaps and full-copies are used to maintain short GC pauses for real-time applications</li>
</ul>
</li>
<li>Network-transparency between processes, even when passing higher-order objects like closures(!)
<ul>
<li>Some of the more &#8220;leaky&#8221; abstractions made for performance such as ETS or process tables that allow for mutation can have opaque continuation returns that cannot be serialized in this way. In any case, it&#8217;s in a very small minority.</li>
</ul>
</li>
<li>Node and process monitoring and restart strategies to allow you to write robust, idiomatic, fault-tolerant programs</li>
<li>Automatic transitive closing of Erlang nodes for maximum reliablity/message passing speeds as well as (when that full-connectivity is impossible) message routing between intermediate nodes.</li>
<li>Pattern-matching of several data types. Not only the obvious tuples, but also for binary streams, largely eliminating temptations to use inefficient ASCII-based wire protocols.</li>
<li>Distributed synchronization constructs</li>
<li>Safe local and global name registration</li>
<li>A powerful distributed database, MNesia</li>
<li>Code hot-swapping</li>
<li>Application versioning and packaging support</li>
<li>Metaprogramming using the AST, so Lispish-style macros exist. NOTE: in Erlang parlance, this is called the &#8220;parse_transform&#8221; procedure. &#8220;Macros&#8221; in Erlang lexicon refer to something more like the C preprocessor.</li>
<li>Generic constructs for common tasks: finite state machines, event-driven (for when they are the most natural model), and the ever-useful and flexible &#8220;generic server&#8221; (gen_server) behavior.</li>
<li>A community that is focused on reliability, performance, and distributed applications</li>
<li>More community that is trying to give Erlang some more tools with &#8220;general appeal&#8221; such as a web application framework</li>
<li>Heavy documentation, both of the libraries and of methodology refined by twenty years of language development, research, and application</li>
</ul>
<p>Let&#8217;s revisit the scenario above, where you were stuck in Tartarus re-writing your threads into events and making them accessible via network, but now starting out with an Erlang code.</p>
<p>Once again, as before, some of your code which you had written using a process-oriented model has outgrown its britches and needs to be made scalable and concurrent.  The latter part of that is mostly taken care of for you: simply spawn a process for every work item, as you were before. Thanks to the Erlang VM, your eight-processes turned eight-thousand are having no problem handling the flood of work and utilizing as many machine resources as as possible. You don&#8217;t need to coerce yourself into writing an event-based server, introducing bugs and obfuscating code as you go, a huge win already.</p>
<p>Now you get to worry about making things distributed, which is a little more complicated. Your first attempt is to allocate some dedicated machines to running the code in question for more power. Since message-passing is network-transparent, the changes to your code elsewhere in your application is minimal. A send is still a send, whether across a network or on the same node. You write some code to decide how to allocate those queries across these machine resources, which themselves may dynamically reallocate work, carrying along with it the process name to send the return message to to avoid centralized response multiplexing overhead[5]. Ultimately some node in the cluster sends the response or the requester times out. In many cases you are now done, you can just constantly add machines to this glob to get more power.</p>
<p>To spice up the story, let us suppose that you notice that there&#8217;s a lot of state-passing going on to synchronize nodes that&#8217;s just too network-intensive that wasn&#8217;t a problem when this was a single-node solution, so you rewrite some of your code to pass a closure that contains instructions on how to update the node&#8217;s state. This means you just avoided having to write some sort of fancy differential state transfer procedure; you&#8217;ve simply told the node at the other end how to compute the new state from an old one by sending the procedure itself on the wire instead of the finished product.</p>
<p>Finally, if you had followed some of the OTP design principles[6] to begin with (which is not uncommon, even when working in a single-node environment, they are exceedingly convenient abstractions) and used the gen_server (or likewise) behavior you can get (or might have already had) a supervision tree going that&#8217;ll make your application serving this stuff fight down till the last man. And so ends our tale.</p>
<p>Don&#8217;t make this glowing report make you think there aren&#8217;t difficulties here; there definitely are real tangible downsides to Erlang, not the least of which is recruiting programmers, questionable string handling, and the somewhat-warty records. It&#8217;s also considerably slower than C when it comes to sequential computation. However, consider that it is clearly <em>not a toy</em>, and that groups of programmers — not <em>all</em> of them Gods, I&#8217;m sure — have employed it to process unfathomable quantities of our telephony data with <strong>nine nines</strong> of reliability[7], all in two million lines of code[8]. Erlang is an artifact designed with a purpose and a vision and will be difficult to best in technical merit in its chosen problem domain without embarking on a project of similar or greater scope.</p>
<p>Footnotes:</p>
<p>[0]: Gist: Erlang is more terse, has better degradation under high load conditions, better reliability. You know, what you might expect against hand-rolled C++ that&#8217;s significantly less complex and tested than Erlang&#8217;s implementation itself. (See <a href="http://en.wikipedia.org/wiki/Greenspun's_Tenth_Rule" title="Greenspun's Tenth Rule">Greenspun&#8217;s Tenth Rule</a>, except with the obvious reapplication)</p>
<p>[1]: Yet unfinished. Actually, stalled for some other priorities. The clustering part of it is done, the missing section is writing the appropriate bindings between the Erlang nodes and the Lisp process, as well as a job allocator to decide on how to allocate work to the nodes. We also don&#8217;t yet have a lot of machines to run this thing on, a circumstance that may change in coming months. In the off chance that you are interested in contributing to a parallel, clustered knowledge base, let me know in the comments.</p>
<p align="justify">[2]: As opposed to full-blown Prolog style unification where everything is in terms of rules. That is, you can say sum(3, 4, A) and conclude A is 7, but you can also ask Prolog sum(A, B, 7), and constantly ask it for legitimate values of A and B, which it&#8217;ll happily return for as long as you&#8217;d like. This is why a simplistic fixed-size Sodoku solvers in Prolog look just like articulating the rules instead of actually finding the answers.</p>
<p>[4]: <a href="http://www.sics.se/~joe/apachevsyaws.html" title="Apache vs. YAWS">An oft cited &#8220;benchmark&#8221; of sorts</a> showing an Erlang web server, YAWS, vs. Apache, which uses pthreads. Both of these servers are handing a trivial load — a slow &#8220;GET&#8221; request — so the main determination of who wins here is who is least choked by the massive number of requests. Since Apache uses a pthreads based server it is largely limited by the operating system&#8217;s threading implementation.</p>
<p>[5]: Notice the security problem here? Erlang by default will only communicate with other nodes with the same magic cookie, a simple and robust security mechanism to prevent &#8220;messages from strangers.&#8221; In case you were wondering: message encryption in Erlang is supported in case you don&#8217;t trust your link. It&#8217;s not as straightforward, but someone has <a href="http://wiki.trapexit.org/index.php/Distributed_erlang_using_ssl_through_firewalls" title="Distributed Erlang through SSL and Firewalls">written something about it.</a></p>
<p>[6]: See <a href="http://erlang.org/doc/design_principles/part_frame.html" title="OTP Design Principles ">OTP Design Principles</a> which discuss supervision trees, monitors, and links, among many other things. Also see <a href="http://www.sics.se/~joe/thesis/armstrong_thesis_2003.pdf" title="Joe Armstrong's Thesis">Joe Armstrong&#8217;s Thesis</a>; it&#8217;s easy to read and extremely informative as a perspective on writing reliable software, even if you are not writing Erlang.</p>
<p>[7]: <a href="http://www.pragmaticprogrammer.com/articles/erlang.html" title="Armstrong's article on Pragmatic Programmer">An article written by Armstrong</a>. He links to his thesis, but this is a little bit more conversational and brings out some highlights. I have excerpted the relevant portion for lazy clickers:</p>
<blockquote><p>Does it work?</p>
<p>Yes. Erlang is used all over the world in high-tech projects where reliability counts. The Erlang flagship project (built by Ericsson, the Swedish telecom company) is the AXD301. This has over 2 million lines of Erlang.</p>
<p>The AXD301 has achieved a NINE nines reliability (yes, you read that right, 99.9999999%). Let&#8217;s put this in context: 5 nines is reckoned to be good (5.2 minutes of downtime/year). 7 nines almost unachievable &#8230; but we did 9.</p>
<p>Why is this? No shared state, plus a sophisticated error recovery model. You can read all the details in <a href="http://www.sics.se/%7Ejoe/armstrong_thesis_2003.pdf">my PhD thesis.</a></p></blockquote>
<p>[8]: In case you thought this was a piddly amount of code, <a href="http://www.erlang.se/publications/Ulf_Wiger.pdf" title="Wiger 01 paper">a paper</a> pegs the equivalent amount of C/C++ code at somewhere between four to ten times as much code to get the same stuff done. This is not a extraordinary claim considering Erlang&#8217;s advantages in automatic memory management and functional programming constructs such as the ever-useful map(). This is a huge win, despite what some people try to tell me&#8230;to be explored in a future blog post which I have tentatively named in my head &#8220;Verbosity <em>is</em> a valid complaint!,&#8221; or something like that.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/metalinguist.wordpress.com/9/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/metalinguist.wordpress.com/9/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/metalinguist.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/metalinguist.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/metalinguist.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/metalinguist.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/metalinguist.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/metalinguist.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/metalinguist.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/metalinguist.wordpress.com/9/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=metalinguist.wordpress.com&amp;blog=1293160&amp;post=9&amp;subd=metalinguist&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://metalinguist.wordpress.com/2007/07/04/case-for-erlang/feed/</wfw:commentRss>
		<slash:comments>17</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/2ac9e89349327ec77018f08365098f59?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">fdr</media:title>
		</media:content>
	</item>
	</channel>
</rss>
