<?xml version="1.0" encoding="UTF-8"?>
<!-- $Id: web-fall08.xml 897 2008-12-03 03:19:23Z dret $ -->
<?hotspot layout-path="hotspot/hotspot/layout" ?>
<?hotspot kilauea-path="hotspot/kilauea" ?>
<?hotspot layout="iSchool" ?>
<hotspot xmlns="http://dret.net/xmlns/hotspot/1" xmlns:hotspot="http://dret.net/xmlns/hotspot/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://dret.net/xmlns/hotspot/1 hotspot/hotspot/schemas/hotspot.xsd">
	<configuration>
		<link subsections="yes" bookmarks="yes" versions="web-fall08.xml" home="./" help="quick" contents="./" glossary="http://dret.net/glossary/" author="http://dret.net/netdret/"/>
		<paths img="img" listing="src"/>
		<outline count-text=" [*]" count-depth="all"/>
		<hyperlink extra=""/>
		<extension file="html" link=""/>
		<counter separator=":&#160;"/>
	</configuration>
	<license uri="http://creativecommons.org/licenses/by/3.0/" short="CC 3.0">
		<div class="license">
			<p><a rel="license" title="view full text of license" href="http://creativecommons.org/licenses/by/3.0/"><img alt="Creative Commons License" src="hotspot/hotspot/layout/iSchool/iSchool/somerights20.png" border="0" height="31" width="88"/></a></p>
			<p><a class="outlink" rel="license" title="view full text of license" href="http://creativecommons.org/licenses/by/3.0/">This work is licensed under a CC<br/>Attribution 3.0 Unported License</a></p>
		</div>
	</license>
    <title short="Web Architecture"><a href="./" title="Course Homepage">Web Architecture</a> (INFO 290-03)</title>
    <author short="E. Wilde" affiliation="UC Berkeley ISchool"><a href="http://dret.net/netdret/" title="dret.net">Erik Wilde</a></author>
    <affiliation short="UC Berkeley ISchool"><a href="http://www.berkeley.edu/" title="University of California, Berkeley">UC Berkeley</a> <a href="http://ischool.berkeley.edu/" title="ISchool">School of Information</a></affiliation>
    <date short="Fall 2008">Fall Semester 2008</date>
    <copyright>2008 Erik Wilde</copyright>
	<!--<style type="text/css" src="print.css" media="print"/>-->
	<index name="categories.html">
		<html>
			<head>
				<title>Category Index</title>
				<link rel="stylesheet" type="text/css" href="/dretnet.css"/>
				<link rel="stylesheet" type="text/css" href="fall08.css"/>
				<link rel="icon" href="/favicon.ico" type="image/x-icon"/>
				<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"/>
				<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
				<style type="text/css">
					table {
						border-collapse: collapse;
					}
					td, th {
						padding: 1ex;
						vertical-align: top;
					}
					th + th, td + td {
						border-left: 1px dotted #333;
					}
					tr + tr {
						border-top: 1px dotted #333;
					}
					a, a:link, a:visited {
						color: #69c;
						text-decoration: underline;
					}
					a:hover {
						text-decoration: none;
					}
					.context {
						font-size: 80%;
					}
					.context em {
						color: #956;
						font-style: normal;
						font-weight: bold;
					}
				</style>
			</head>
			<body>
				<div class="header">
					<h1><title/></h1>
					<h2>Category Index (Index Terms by Category)</h2>
				</div>
				<table>
					<tr>
						<th>Category</th>
						<th>References</th>
					</tr>
					<for-each-category>
						<tr>
							<td><category/></td>
							<td>
								<dl>
									<for-each-term>
										<dt><term/></dt>
										<dd><ol>
											<for-each-reference>
												<li>In <reference>presentation <q>*</q> on slide <slide/></reference></li>
											</for-each-reference>
										</ol></dd>
									</for-each-term>
								</dl>
							</td>
						</tr>
					</for-each-category>
				</table>
			</body>
		</html>
	</index>
	<categories>
		<category element="xml" class="xml" name="XML"/>
		<category element="elem" class="xml elem" name="XML Element"/>
		<category element="html" class="html" name="HTML"/>
		<category element="htmel" class="html elem" name="HTML Element"/>
		<category element="cssp" class="css" name="CSS Property"/>
		<category element="csss" class="css" name="CSS Selector"/>
		<category element="css" class="css" name="CSS"/>
		<category element="xpathf" class="xpath" name="XPath Function"/>
		<category element="xpath" class="xpath" name="XPath"/>
		<category element="xslte" class="xslt elem" name="XSLT Element"/>
		<category element="xslta" class="xslt" name="XSLT Attribute"/>
		<category element="xslt" class="xslt" name="XSLT"/>
		<category element="xsde" class="xsd elem" name="XSD Element"/>
		<category element="xsda" class="xsd" name="XSD Attribute"/>
		<category element="xsd" class="xsd" name="XSD"/>
		<category element="xq" class="xq" name="XQuery"/>
		<category element="uri" class="uri" name="URI"/>
		<category element="http" class="http" name="HTTP"/>
		<category element="mime" class="mime" name="MIME"/>
		<category element="atom" class="atom" name="Atom"/>
	</categories>
	<toc name="toc.html">
		<table rules="all" cellspacing="0" cellpadding="5" width="100%">
			<thead>
				<tr>
					<th valign="bottom">Date</th>
					<th valign="bottom">Subject</th>
					<th valign="bottom">Slides</th>
					<th valign="bottom">Required Reading</th>
					<th valign="bottom">Additional Resources</th>
					<th valign="bottom"><a href="a/">Assignments</a></th>
				</tr>
			</thead>
			<tbody>
				<hotspot:for-each-presentation>
					<tr>
						<td align="right" valign="top"><hotspot:date/></td>
						<td valign="top"><b><hotspot:title/><span class="toggle">:</span></b> <span class="toggle"><span class="abstract"><hotspot:toc class="abstract"/></span></span></td>
						<td align="center"><hotspot:presentation-link title="Lecture Slides"><hotspot:title form="short"/></hotspot:presentation-link> <span class="toggle"><hotspot:slides>(*&#160;Slides)</hotspot:slides></span></td>
						<td align="center"><hotspot:toc class="reading"/></td>
						<td align="center"><hotspot:toc class="resources"/></td>
						<td align="center"><hotspot:toc class="assignment-number"/><span class="toggle"><hotspot:toc class="assignment-title"/></span></td>
					</tr>
				</hotspot:for-each-presentation>
			</tbody>
		</table>
	</toc>
    <presentation id="intro">
        <title short="Introduction">Overview and Introduction</title>
        <date>2008-08-28</date>
        <toc class="abstract">This introductory lecture gives the motivation for the course, some information about the people involved and the organization of the course, a high-level overview of the course's topics, and an overview of the assignments which are an important part of the course program.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Building Things That Actually Work</title>
			<ul>
				<li>Building things …</li>
				<ul>
					<li>building is designing (with a lot of implicit design decisions)</li>
					<li>building things uncovers an essential set of constraints</li>
					<li>changing things should not be not too hard (if they are well-designed)</li>
				</ul>
				<li>… that actually work</li>
				<ul>
					<li><q>work</q> means more than <q>look good for the final demo</q></li>
					<li>how adaptable is an application to a changing environment?</li>
					<li>how easy is it to integrate new input and output channels?</li>
					<li>how easy can it be extended to meet new requirements?</li>
				</ul>
				<li>System design is (an important) part of design</li>
				<ul>
					<li>isolated design approaches (for example, UI only) will not deliver the best solutions</li>
					<li><a href="http://courses.ischool.berkeley.edu/i290-1/f08/ISD-Fall2008-Syllabus.html">an <q>end-to-end view</q> involves the complete view of a system</a></li>
				</ul>
				<li>Start building things as early as possible</li>
			</ul>
		</slide>
		<slide>
			<title>What is Architecture?</title>
			<table width="95%">
				<tr>
					<td>
						<img style="width : 90% ; margin : 2% ; " src="map-newyork.png" title="New York City" href="http://maps.google.com/maps?ie=UTF8&amp;hl=en&amp;ll=40.75792,-73.982191&amp;spn=0.049281,0.093641&amp;z=14"/>
					</td>
					<td>
						<img style="width : 90% ; margin : 2% ; " src="map-luebeck.png" title="Lübeck" href="http://maps.google.com/maps?ie=UTF8&amp;hl=en&amp;ll=53.866447,10.68974&amp;spn=0.019182,0.046821&amp;z=15"/>
					</td>
				</tr>
			</table>
		</slide>
		<slide>
			<title>Architecture vs. Design</title>
			<img style="width : 90% ; margin : 2% ; " src="rooftop-pool.jpg" title="Nice Design, Expensive Architecture"/>
		</slide>
		<slide>
			<title>What is an Architect?</title>
			<img style="float : right ; margin-right : 2em ; " src="gherkin.jpg" title="London Gherkin" href="http://www.fosterandpartners.com/Projects/1004/Default.aspx"/>
			<ul>
				<li><q>Star Architects</q> are not typical</li>
				<ul>
					<li>they sell brand names and deliver high profile results</li>
					<li>most architects are more modest and less visible</li>
				</ul>
				<li>Architects must understand how things work</li>
				<ul>
					<li>a reasonable understanding of the disciplines involved</li>
					<li>an excellent understanding of disciplines have to interact</li>
					<li>negotiating between specialists for a good overall design</li>
				</ul>
				<li>Architects are guides</li>
				<ul>
					<li>they provide guidance for going in the right direction</li>
					<li>they can tell why this direction is the right direction</li>
					<li>they can explain why a wrong direction is wrong</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>How to become a Web Architect?</title>
			<ul>
				<li>Understand Web technologies and their dependencies</li>
				<ul>
					<li>no need to become an expert in all of the areas</li>
					<li>the important part is understanding the dependencies</li>
				</ul>
				<li>Understand how to compare application architectures</li>
				<ul>
					<li>there is no <q>best solution</q> for any given problem</li>
					<li>every solution must be evaluated in terms of <em>various constraints</em></li>
				</ul>
				<li>Next steps for your career in Web architecture</li>
				<ul>
					<li>understand how the <a href="../xml-fall08/" title='"XML Foundations" course fall 2008'>back-end plumbing (a.k.a. XML)</a> works in detail</li>
					<li>get involved in <a href="http://isd.ischool.berkeley.edu/project/" title="ISD Clinic project overview">real-world projects</a> in the <a href="http://isd.ischool.berkeley.edu/about/clinic" title="ISD Clinic project overview">ISD Clinic</a></li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Course Setup</title>
			<ul>
				<li>Broad overview of core Web technologies</li>
				<ul>
					<li>this is <em>not</em> a Web design or Web programming course</li>
				</ul>
				<li><a href="a/">Assignments</a> working with various Web technologies</li>
				<ul>
					<li>how to setup and configure a Web server</li>
					<li>how to deliver client-specific content</li>
					<li>how to design client-specific styles</li>
					<li>using Ajax for creating more dynamic Web pages</li>
					<li>repurposing existing content for syndication</li>
				</ul>
				<li>Final project looking at a real-world case study</li>
				<ul>
					<li>apply a Web architecture view o a Web application architecture</li>
					<li>come up with an <link href="swot">analysis of the strengths, weaknesses, opportunities, and threats</link></li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Motivation</title>
			<slide>
				<title>Closed World Assumption</title>
				<blockquote>If the only tool you have is a hammer, you tend to see every problem as a nail.</blockquote>
				<p class="quotenote"><a href="http://en.wikipedia.org/wiki/Abraham_Maslow">Abraham Maslow</a></p>
				<ul>
					<li>People, and thus content creators, typically are lazy</li>
					<ul>
						<li>developing content and code for diverse users and clients is hard</li>
						<li>by making assumptions, this job can become considerably easier</li>
					</ul>
					<li>Tools often hide complexity and/or take away freedom</li>
					<ul>
						<li>they are good if tool users <em>know what they are doing</em></li>
						<li>tool users should <em>know alternatives and when to switch tools</em></li>
					</ul>
					<li>Tool makers provide support for lazy people</li>
					<ul>
						<li>built-in simplifications of the tool's target technology</li>
						<li>pre-packaged excuses why it is appropriate to use the tool</li>
					</ul>
				</ul>
			</slide>
			<part>
				<title>Bad Content</title>
				<slide>
					<title>Poorly Equipped Developers</title>
					<img style="height : 75% ; margin : 2% ; " src="mercedes.png" href="http://www.mbusa.com/"/>
				</slide>
				<slide>
					<title>Popular Screen Resolutions</title>
					<img style="width : 90% ; margin : 2% ; " src="resolutions.png" title="dret.net Statistics 2007"/>
				</slide>
			</part>
			<part>
				<title>Bad Systems</title>
				<slide>
					<title>Your Tax $ @ Work</title>
					<ul>
						<li><a href="http://www.grants.gov/applicants/apply_for_grants.jsp">How to apply for NSF grants</a></li>
						<li>Over $400 billion in grants each year</li>
						<li><em>PureEdge</em> is <em>required</em> as the technology to fill out grant forms</li>
							<ul>
								<li><a href="http://www-306.ibm.com/software/swnews/swnews.nsf/n/nhan6eerne">acquired by IBM</a> and now called <em href="http://www-142.ibm.com/software/workplace/products/product5.nsf/wdocs/formshome">IBM Workplace Forms</em></li>
							</ul>
						<li>All of this probably looked nice for the final demo …</li>
						<li>Web forms do not provide all the features required by the specification</li>
						<ul>
							<li>offline editing of applications is not possible, Web forms work online only</li>
							<li>there is no built-in signing of form contents, but there are technologies for it</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Version 1: Go IE or Go Home</title>
					<ul>
						<li>Well, but not if you are using Vista …</li>
						<li>PureEdge is an IE plug-in for filling out forms online and offline</li>
						<ul>
							<li>plug-ins are specific for the browser for which they are developed</li>
							<li>plug-ins are specific for the OS on which they run</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Version 2: Buy a Virtual Computer</title>
					<ul>
						<li>Government authorities are (usually) concerned about accessibility</li>
						<ul>
							<li>restricting $400 billion of grant money to IE users only seems a bit restrictive</li>
							<li>is there a <em>reasonable argument</em> to be made for this restriction</li>
						</ul>
						<li>Grants.gov recommended to get a virtual PC to access the portal</li>
						<ul>
							<li>users have to buy virtual PC software</li>
							<li>users have to buy Windows to run on the virtual PC</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Version 3: Use a Virtual Computer</title>
					<ul>
						<li>Grants.gov set up a <em>Citrix server</em> for grant applicants</li>
						<ul>
							<li>Citrix server licenses are not cheap to buy</li>
							<li>applicants still have to install the Citrix client (which is free)</li>
							<li>running a Citrix server farm is pretty expensive</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Version 4: Crash your Computer</title>
					<ul>
						<li>After some time, <a href="http://www.grants.gov/resources/download_software.jsp#pureedgeviewer">PureEdge for Mac was released</a>, features include:</li>						
						<ul>
							<li><q cite="http://www.grants.gov/resources/download_software.jsp#pureedgeviewer">occasional crashes and subsequent loss of any unsaved data</q></li>
							<li><q cite="http://www.grants.gov/resources/download_software.jsp#pureedgeviewer">inability to run on Mac OS version prior to 10.4.6</q></li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Classical Lock-In</title>
					<ul>
						<li>Companies usually sell <em>products</em>, not just <em>solutions</em></li>
						<li>Lock-in happens quickly and is hard to escape from later</li>
						<li>Lock-in usually carries a pretty high price tag</li>
						<li>Lock-in solutions can be good, but it is an important decision</li>
						<li>Standards-based solutions may lack some sophistication</li>
						<ul>
							<li>but often they may still be good enough to solve a problem</li>
							<li>being able to change the platform easily is a valuable asset</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Bad Planning</title>
				<slide>
					<title>Tax on Taxes</title>
					<img style="height : 60% ; padding : 0 4% 4% 4% ; float : right ; " src="tax-filing-statistics.jpg" title="Tax E-Filing Statistics" href="http://www.nytimes.com/2007/04/23/technology/23intuit.html"/>
					<ul>
						<li>The I.R.S. perspective</li>
						<ul>
							<li>electronic filing saves money and should be encouraged</li>
							<li>processing a paper version: $2.65</li>
							<li>processing an electronic submission: $0.29</li>
						</ul>
						<li>The company perspective</li>
						<ul>
							<li>have a <q>monopoly</q> for 2005-2009</li>
							<li>taxpayers paid $1.01 billion for submission fees</li>
							<li>the servers could not handle the load</li>
						</ul>
						<li>The taxpayer perspective</li>
						<ul>
							<li>IRS saves money, but taxpayers are charged</li>
							<li>electronic filing must go through a company</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part>
			<title>Varia</title>
			<slide>
				<title>About Me</title>
				<ul>
					<li>Computer Science at <a href="http://www.tu-berlin.de/eng/">Technical University of Berlin (TUB)</a> (88-91)</li>
					<li>Ph.D. at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> (92-97)</li>
					<li>Post-Doc at <a href="http://www.icsi.berkeley.edu/">ICSI, Berkeley</a> (97/98)</li>
					<ul>
						<li>book on <q><a href="http://dret.net/netdret/publications#wil98">Technical Foundations of the World Wide Web</a></q>
						</li>
					</ul>
					<li>Various activities back in Switzerland (98-06)</li>
					<ul>
						<li>teaching at <a href="http://www.ethz.ch/index_EN">ETH Zürich</a> and <a href="http://www.fhnw.ch/">FHNW</a></li>
						<li>working as independent consultant (training, courses, consulting)</li>
						<li>research in <a href="http://dret.net/projects/">various XML-related areas</a></li>
					</ul>
					<li>Professor at the <a href="http://ischool.berkeley.edu/">School of Information</a> (since Fall 2006)</li>
					<ul>
						<li>technical director of the <a href="http://isd.ischool.berkeley.edu/">Information and Service Design (ISD) program</a></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>About this Course</title>
				<ul>
					<li>Course Web page: <code><a href="./">http://dret.net/lectures/web-fall08/</a></code></li>
					<li><a href="https://bspace.berkeley.edu/portal/site/4f3236cc-0fb0-4849-afc4-14111f436593">bspace course Web page</a> for additional information</li>
					<li>Course mailing list: <code><a href="mailto:web-fall08@bspace.berkeley.edu">web-fall08@bspace.berkeley.edu</a></code></li>
					<ul>
						<li>also available through the <a href="https://bspace.berkeley.edu/portal/site/4f3236cc-0fb0-4849-afc4-14111f436593/page/8e3c14dd-454c-42d0-8023-e34640f50967">bspace mail tool</a></li>
						<li>the bspace mail tool supports role-based emails</li>
						<li>archived in the <a href="https://bspace.berkeley.edu/portal/site/4f3236cc-0fb0-4849-afc4-14111f436593/page/1c9de764-2a85-48c0-90a0-3e35f57a9fa6">bspace email archive</a></li>
					</ul>
					<li>Letter grade based on assignments, mid-term exam, and final project</li>
				</ul>
			</slide>
			<slide>
				<title>About these Slides</title>
					<ul>
						<li>Generated from <a href="http://dret.net/projects/xslidy/">Hotspot</a> <a href="web-fall08.xml">XML</a></li>
						<li>Designed for online presentation and use (lots of links!)</li>
						<ul>
							<li>Firefox <a href="http://dret.typepad.com/dretblog/2008/07/go-up.html">Go Up</a> allows easy navigation up one level</li>
							<li>Firefox <a href="https://addons.mozilla.org/en-US/firefox/addon/1949">Site Navigation Bar</a> supports navigation of <html>link</html> links</li>
							<li>Firefox <a href="https://addons.mozilla.org/en-US/firefox/addon/2933">Link Widgets</a> requires a bit more configuration (more flexibility)</li>
							<li>for printing, use <q>a</q> (all slides), and then <q>s</q> (smaller font) a couple of times</li>
						</ul>
						<li>A good real-world example for Web-based publishing</li>
						<ul>
							<li>Slidy/Kilauea is useful, but there is no support for structures and hyperlinking</li>
							<li>Hotspot adds these features by adding an XSLT transformation</li>
							<li>Hotspot is useful, but there is no interface (XML editing only)</li>
						</ul>
					</ul>
			</slide>
			<slide>
				<title>Additional Resources</title>
				<ul>
					<li><a href="http://dret.net/glossary/">Online Glossary at <code>http://dret.net/glossary/</code></a></li>
					<ul>
						<li>suggestions, updates, corrections are very welcome (really!)</li>
						<li>another exercise in how to use XML and XSLT for information management</li>
					</ul>
					<li><a href="http://dret.net/biblio/">Bibliography at <code>http://dret.net/biblio/</code></a></li>
					<ul>
						<li>suggestions, updates, corrections are very welcome (really!)</li>
						<li>produced by an XML-centric system for managing bibliography data</li>
					</ul>
					<li>The <a href="http://www.w3.org/"><em>World Wide Web Consortium (W3C)</em></a></li>
					<ul>
						<li>headed by <em href="http://www.w3.org/People/Berners-Lee/">Tim-Berners Lee</em>, inventor of the Web (with <a href="http://en.wikipedia.org/wiki/Robert_Cailliau">Robert Cailliau</a>)</li>
					</ul>
					<li>The <a href="http://www.ietf.org/"><em>Internet Engineering Task Force (IETF)</em></a></li>
					<ul>
						<li>mainly Internet standards, but also responsible for URIs and HTTP</li>
					</ul>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="architecture">
        <title short="Web Architecture">Architecture of the World Wide Web</title>
        <date>2008-09-02</date>
        <toc class="reading"><a href="http://www.martinfowler.com/ieeeSoftware/whoNeedsArchitect.pdf" title='Martin Fowler, "Who Needs an Architect?," IEEE Software, vol. 20,  no. 5,  pp. 11-13,  Sept/Oct 2003'>Architecture?</a>&#160;· <a href="http://www.w3.org/TR/webarch/summary.html" title="W3C Web Architecture Specification Summary">Architecture Summary</a></toc>
        <toc class="resources"><a href="http://www.w3.org/TR/webarch/" title="W3C Web Architecture Specification">Architecture</a></toc>
        <toc class="abstract">The Web's architecture has very simple principles revolving around the ideas of <em>placing a heavy emphasis on a consistent and global identification mechanism for resources</em>, a <em>standardized way of how resource representations can be retrieved</em>, and a <em>standardized way of how resource representations should be usable by using standardized media types</em>. This lecture presents an overview of these architectural principles and illustrates them with using blogs as an example of Web-based applications.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Today's Reading</title>
			<blockquote><a href="http://www.w3.org/TR/webarch/summary.html">Summary</a> of <a href="http://www.w3.org/TR/webarch/">Ian Jacobs, Norman Walsh, <q>Architecture of the World Wide Web, Volume One</q>, World Wide Web Consortium, Recommendation REC-webarch-20041215, December 2004</a></blockquote>
			<ul>
				<li>Examples (or counter-examples) for the following principles, practices, and constraints:</li>
				<ul>
					<li>URIs identify a single resource (versioning, aliases)</li>
					<li>URI opacity (assuming a specific resource representation)</li>
					<li>Available representations (XML namespaces as really bad example)</li>
					<li>Hypertext links (resource representations should be good Web citizens)</li>
					<li>Orthogonality (identification, interaction, and representation are orthogonal)</li>
				</ul>
			</ul>
		</slide>
		<part>
			<title>Parsimony</title>
			<slide>
				<title>Keep It Simple</title>
				<ul>
					<li>Loose coupling vs. tight coupling</li>
					<ul>
						<li>fewer requirements for cooperation mean fewer potential sources of problems</li>
						<li>taking independent developments into consideration (graceful degradation)</li>
					</ul>
					<li>Parsimony may conflict with optimization</li>
					<ul>
						<li>a fully backlinked Web would be a very different hypermedia system</li>
						<li>modifying resources would be expensive and require considerable efforts</li>
						<li>an uncontrolled Web allows failure and innovative development</li>
					</ul>
					<li>Programming Languages vs. Frameworks</li>
					<ul>
						<li>programming languages are very simple and very powerful</li>
						<li>frameworks are more complex and have some choices built into them</li>
						<li>both can be used to build good systems</li>
						<li>framework applications are more likely to not do really innovative things</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Web Design as System Design</title>
				<blockquote>There are two ways of constructing a software design: One way is to make it so simple that there are <em>obviously</em> no deficiencies, and the other way is to make it so complicated that there are no <em>obvious</em> deficiencies. The first method is far more difficult.</blockquote>
				<p class="quotenote"><a href="http://en.wikipedia.org/wiki/Charles_Antony_Richard_Hoare">C. A. R. Hoare</a>, <a href="http://dret.net/biblio/reference/hoa81"><q>The Emperor's Old Clothes</q>, 1980 Turing Award Lecture</a></p>
				<ul>
					<li>Web: URI + HTTP + HTML ( + XML)</li>
					<li>OASIS: <a href="http://www.infoworld.com/article/07/08/09/sca-oasis_1.html">Six SOA simplification committees</a> for <a href="http://en.wikipedia.org/wiki/List_of_Web_service_specifications">about 60 WS-* specs</a></li>
				</ul>
			</slide>
			<slide>
				<title>Technology Blinders</title>
				<ul>
					<li>Web architecture is an additional set of constraints</li>
					<ul>
						<li>it is not a very complicated set of constraints</li>
						<li>but it still makes life more complicated than in an unconstrained world</li>
						<li>it may require a major redesign of an application</li>
					</ul>
					<li>Technology providers sometimes ignore Web architecture</li>
					<ul>
						<li>multimedia presentation concepts are often disconnected from the Web</li>
						<li>hypermedia researchers often regard the Web as inferior (or not as hypermedia at all)</li>
						<li>questions of client capabilities are often ignored (or brushed aside using statistics)</li>
					</ul>
					<li>Integration vs. Transport</li>
					<ul>
						<li>integrating into the Web requires application to conform to Web architecture</li>
						<li>sitting on top of the Web just requires to use HTTP for data transfer</li>
						<li>many <q>Web Technologies</q> are <em>not</em> integrated into the Web</li>
						<li>many <q>Web Applications</q> are <em>not</em> integrated into the Web</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Principles</title>
			<slide>
				<title>Identification</title>
				<ul>
					<li>Everything should be identified in a uniform way</li>
					<li>Identification and access methods evolve over time</li>
					<ul>
						<li><uri>sms:</uri> and <uri>callto:</uri> did not exist when the Web was created</li>
					</ul>
					<li>Identification and access support evolve over time</li>
					<ul>
						<li><uri>tel:</uri> now can be supported by an increasing number of clients</li>
					</ul>
					<li>The Web is one huge proof for the power of <em>network effects</em></li>
					<ul>
						<li>it also is a lesson for many who did not take it seriously and failed</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Interaction</title>
				<ul>
					<li>Many URI schemes are named after protocols</li>
					<ul>
						<li><uri>http:</uri> can be accessed using the <em>Hypertext Transfer Protocol (HTTP)</em></li>
						<li><uri>ftp:</uri> can be accessed using the <em>File Transfer Protocol (FTP)</em></li>
						<li><uri>mailto:</uri> sends electronic mail using the <em>Simple Mail Transfer Protocol (SMTP)</em></li>
					</ul>
					<li>Some URI schemes do not really imply a protocol</li>
					<ul>
						<li><uri>mailto:</uri> sends electronic mail using the <em>Simple Mail Transfer Protocol (SMTP)</em></li>
						<li><uri>mailto:</uri> may use any other appropriate technology for sending email</li>
						<li><a href="http://dret.typepad.com/dretblog/2008/06/web-based-sms.html">instead of using protocols directly, they can be accessed indirectly through services</a></li>
					</ul>
					<li>Some URI schemes have no protocol for dereferencing resources</li>
					<ul>
						<li><uri>urn:</uri> URIs are abstract names from some namespace</li>
						<li><uri>urn:ietf:rfc:2648</uri> identifies an IETF standard and not some specific copy</li>
						<li><uri href="http://maps.google.com/maps?ll=27.988056,86.925278&amp;spn=0.1,0.1&amp;q=27.988056,86.925278+(Mount_Everest)">geo:27.988056,86.925278</uri> identifies a physical resource (accessing it is really hard)</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Data Formats</title>
				<ul>
					<li>Agreement on the interpretation of resource representations</li>
					<li>HTML was the first standardized data format on the Web</li>
					<ul>
						<li>CSS and XML have become successful formats as well</li>
					</ul>
					<li>Some data formats are <em>de-facto standards</em> as Web formats</li>
					<ul>
						<li>GIF and JPEG for images and PNG as the successor of GIF</li>
					</ul>
					<li>Some formats are less integrated but still widely used</li>
					<ul>
						<li>PDF for paginated documents</li>
					</ul>
					<li>Some formats have become replacements for missing standards</li>
					<ul>
						<li>Flash for audio and video because no single format was sufficiently successful</li>
					</ul>
					<li>Some formats were intended to become standards but failed</li>
					<ul>
						<li>SVG for vector graphics</li>
						<li>SMIL for multimedia presentations</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Identifier, Resource, and Representation</title>
				<img style="height : 75% ; margin : 2% ; " src="uri-res-rep.png" href="http://www.w3.org/TR/webarch/#p21"/>
			</slide>
		</part>
		<part>
			<title>Constraints and Good Practices</title>
			<slide>
				<title>Constraints</title>
				<ul>
					<li>Some things on the Web can be inconsistent</li>
					<ul>
						<li>guaranteeing consistency by design can lead to tight coupling</li>
						<li>well-defined ways of handling inconsistencies are better scalable</li>
					</ul>
					<li>Some things on the Web are not perfect</li>
					<ul>
						<li>technologies being used in ways not anticipated (XML, XML Namespaces)</li>
						<li>company goals vs. the greater good (<a href="http://en.wikipedia.org/wiki/Browser_wars">browser war</a>)</li>
					</ul>
					<li><q>The ideal Web</q> vs. <q>the real Web</q></li>
					<ul>
						<li>dealing with a given landscape can introduce additional constraints</li>
						<li>handling these constraints should not violate the general principles</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Good Practices</title>
				<ul>
					<li>Design for openness and extensibility is a key factor</li>
					<ul>
						<li>design for and support evolution and extension and reuse</li>
						<li>try to be a good Web citizen by embracing integration</li>
					</ul>
					<li>Design with the Web in mind</li>
					<ul>
						<li>use Web standards where appropriate (URIs for identification)</li>
						<li>even intranet applications typically evolve and should be designed for the Web</li>
					</ul>
					<li>Make content visible, accessible, usable, reusable</li>
					<ul>
						<li>URI design guidelines should be defined and followed</li>
						<li>think about aggregation and granularity and access to resources</li>
						<li>use well-defined and well-documented XML for B2B scenarios</li>
						<li>reuse existing vocabularies or vocabulary parts whenever possible</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="dretblog">
			<title>Blogs as Web Applications</title>
			<slide>
				<title>Blog in XML</title>
				<listing src="dretblog.xml"/>
			</slide>
			<slide>
				<title>Support URI Guessing (Year Index)</title>
				<listing src="dretblog2html.xsl" line="43-56"/>
			</slide>
			<slide>
				<title>Support URI Guessing (Month Index)</title>
				<listing src="dretblog2html.xsl" line="57-73"/>
			</slide>
			<slide>
				<title>Support URI Guessing (Day Index)</title>
				<listing src="dretblog2html.xsl" line="74-92"/>
			</slide>
			<slide>
				<title>Support Spontaneous Navigation</title>
				<listing src="dretblog2html.xsl" line="26-39"/>
			</slide>
			<slide>
				<title>Publishing as Atom Feed</title>
				<listing src="dretblog2atom.xsl" line="4-27"/>
			</slide>
			<slide>
				<title>Blog as Atom Feed</title>
				<listing src="dretblog.atom" line="2-26"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Web Architecture Essentials</title>
				<ul>
					<li>Principles (violating these causes architectural problems)</li>
					<li>Constraints (disregarding these causes technical problems)</li>
					<li>Good Practices (ignoring these causes user problems)</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="internet">
        <title short="Internet">Internet Foundations</title>
        <date>2008-09-04</date>
        <toc class="reading"><a href="http://www.zakon.org/robert/internet/timeline/" title="Hobbes' Internet Timeline">Timeline</a></toc>
        <toc class="resources"><a href="http://en.wikipedia.org/wiki/Category:Internet_architecture" title="Wikipedia: Internet Architecture">Internet Architecture</a></toc>
        <toc class="abstract">The Internet is the technical infrastructure on top of which the Web is built. Some of the services provided by the Internet are essential for the Web, most importantly the naming service and the data transfer service. The <em>Domain Name System (DNS)</em> provides the human-readable names for computers, which can then be used in the addresses of Web servers and ultimately Web pages. The <em>Transmission Control Protocol (TCP)</em> provides the reliable data transfer service between Web Servers and Web Browsers, building on the very robust <em>Internet Protocol (IP)</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
        <part id="networks">
			<title>Computer Networks</title>
			<slide>
				<title>Network History</title>
				<ul>
					<li>First regarded as a convenient workaround for floppy disks</li>
					<ul>
						<li><q>real computer scientists write compilers</q></li>
						<li>the value of computer networks depends on their size</li>
					</ul>
					<li>Early networking solutions were vendor-specific islands</li>
					<ul>
						<li>DECnet for <em>Digital Equipment Corporation (DEC)</em> customers</li>
						<li>XNS for <em>Xerox</em> customers</li>
						<li>SNA for <em>IBM</em> customers</li>
						<li>transmitting data between these networks was very cumbersome</li>
					</ul>
					<li>Bridging networks transparently became increasingly important</li>
					<ul>
						<li>more computers and networks increase the benefit of interconnections</li>
						<li>layering being used for internetworks, not only for networks</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Networks vs. Internetworks</title>
				<ul>
					<li>Specific networks use specific abstractions</li>
					<ul>
						<li>how to address nodes (computers, phones, PDAs, RFID tags)</li>
						<li>how to address applications on these nodes</li>
						<li>how to transmit data to these applications</li>
					</ul>
					<li>Internetworks provide a network-independent abstraction</li>
					<ul>
						<li>nodes are addressed uniformly (IP addresses)</li>
						<li>applications are identified uniformly (ports)</li>
						<li>data transmission uses one set of protocols (TCP/UDP)</li>
					</ul>
				</ul>
			</slide>
			<part id="protocols">
				<title>Networking Protocols</title>
				<slide>
					<title>Internet vs. ISO/OSI</title>
					<ul>
						<li>Global network emerges by the end of the 80's</li>
						<ul>
							<li>some kind of internetworking protocols were required</li>
							<li>ARPANET had been running since the late 60's (1965: Berkeley-MIT)</li>
						</ul>
						<li>ISO/OSI was a new specification</li>
						<ul>
							<li>the idea was to build something new</li>
							<li><em>Open Systems Interconnection (OSI)</em> as a general model for open systems</li>
							<li>OSI was specified rather than developed and tested</li>
						</ul>
						<li>For some time, it was unclear what the <q>global internetwork</q> would be based on</li>
						<ul>
							<li>Internet protocols were already established and running</li>
							<li>OSI promised a fresh start with <q>bigger is better</q> protocols</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Internet</title>
					<ul>
						<li>Very early start and a lot of experience</li>
						<ul>
							<li>pragmatic and evolutionary approach</li>
							<li><q>if it's not broken, don't fix it</q></li>
						</ul>
						<li>Standardization by independent technical experts</li>
						<ul>
							<li>avoids the <q>designed by committee</q> effect of consortiums</li>
							<li>conservative and concentrating on stability</li>
							<li>implementations are required to prove technical feasibility</li>
							<li>simplicity whenever possible</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Internet Principles</title>
					<blockquote>Be liberal in what you accept, and conservative in what you send.</blockquote>
					<p class="quotenote"><a href="http://www.postel.org/postel.html">Jon Postel</a>, <a href="http://dret.net/rfc-index/reference/RFC1122">RFC 1122</a></p>
					<blockquote>Whenever possible, communications protocol operations should be defined to occur at the end-points of a communications system, or as close as possible to the resource being controlled.</blockquote>
					<p class="quotenote"><a href="http://dret.net/biblio/reference/sal84">J. Saltzer, D. Reed, D. Clark, <q>End-to-end Arguments in System Design</q></a></p>
				</slide>
				<slide>
					<title>Internet Protocols</title>
					<img style="width : 90% ; margin : 2% ; " src="internet-protocols.png" title="Internet Protocol Layers"/>
				</slide>
				<slide>
					<title>Network Convergence</title>
					<img style="width : 90% ; margin : 2% ; " src="network-convergence.png" title="Convergence in the Protocol Stack"/>
				</slide>
			</part>
			<part id="ip">
				<title short="IP">Internet Protocol (IP)</title>
				<slide>
					<title>IP Features</title>
					<ul>
						<li>End-to-end data transfer (IP addresses)</li>
						<li>Hiding lower-level heterogeneity</li>
						<li>Connection-less (each packet routed individually)</li>
						<li>Unreliable (packets may be lost or duplicated)</li>
					</ul>
				</slide>
				<slide id="ip-address">
					<title>IP Address</title>
					<ul>
						<li>IP identifies nodes by an IP address</li>
						<li>IP addresses are globally unique (<a href="http://api.hostip.info/get_html.php?position=true">and can be geocoded</a>)</li>
						<li>IP uses 4 bytes for addresses (e.g., <code>128.32.226.29</code>)</li>
						<ul>
							<li>maximum number of addresses: 2<sup>32</sup> = 4 billion</li>
							<li>IPv6	extends the address format to 16 bytes (2<sup>128</sup> addresses)</li>
						</ul>
						<li>IP address shortage led to the some trickery using IP addresses</li>
						<ul>
							<li><em href="http://en.wikipedia.org/wiki/Dhcp">Dynamic Host Configuration Protocol (DHCP)</em> is used to assign addresses on-demand</li>
							<li><em href="http://en.wikipedia.org/wiki/Network_address_translation">Network Address Translation (NAT)</em> uses one IP address for more than one device</li>
						</ul>
						<li>IP addresses are well-organized</li>
						<ul>
							<li>important for routing (i.e., sending packets to the target host)</li>
							<li>not ideally suited for mobile or ad-hoc networks</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="tcp">
				<title short="TCP">Transmission Control Protocol (TCP)</title>
				<slide>
					<title>TCP Features</title>
					<ul>
						<li>Flow-controlled (avoiding congestion)</li>
						<li>Reliable (no data lost or duplicated)</li>
						<li>Connection-oriented</li>
						<li>Application addressing</li>
					</ul>
				</slide>
				<slide>
					<title>Reliable Connections</title>
					<ul>
						<li>IP may drop or duplicate packets</li>
						<ul>
							<li>TCP adds serial numbers in data packets</li>
							<li>if problems are detected, TCP recovers automatically</li>
						</ul>
						<li>TCP avoids network congestion and system overload</li>
						<ul>
							<li><em>slow start</em> avoid flooding receivers with data they cannot process</li>
							<li><em>fast retransmit</em> for avoiding timeouts when losing data</li>
							<li>a <em>sliding window</em> for controlling the amount of outstanding packets</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>TCP Window</title>
					<img style="height : 70% ; margin : 2% ; " src="tcp-window.png"/>
				</slide>
			</part>
			<part id="dns">
				<title short="DNS">Domain Name System (DNS)</title>
				<slide>
					<title>Naming vs. Addressing</title>
					<ul>
						<li>IP addresses depend on network topology and organization</li>
						<ul>
							<li>reorganizing a network may change all IP addresses</li>
							<li>identifying important hosts should not be address-based</li>
						</ul>
						<li>Names are supposed to be more stable than addresses</li>
						<ul>
							<li>a name is an abstract identification of something</li>
							<li>names can be used to obtain more information</li>
						</ul>
						<li>Network services should use names instead of addresses</li>
						<ul>
							<li>before using the service, a mapping has to be performed</li>
							<li>the <em>Domain Name System (DNS)</em> is providing this service</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>DNS Properties</title>
					<ul>
						<li>DNS has a bootstrap problem</li>
						<ul>
							<li>DNS provides a service and should thus be identified by a name</li>
							<li>for resolving names into addresses, the DNS service is required</li>
						</ul>
						<li>DNS configuration is part of basic Internet configuration</li>
						<ul>
							<li><em>Dynamic Host Configuration Protocol (DHCP)</em> provides <link href="ip-address"/>, netmask, gateway, and DNS server address</li>
						</ul>
						<li>DNS names are hierarchically structured</li>
						<ul>
							<li><code>ischool.berkeley.edu</code>, <code>edu</code> is the <em>Top-Level Domain (TLD)</em></li>
							<li>TLDs are either <em>generic (gTLD)</em> or <em>country code (ccTLD)</em></li>
							<li>subdomains are federated (e.g., <code>edu</code>, <code>us</code>, <code>uk</code>, <code>tv</code>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Names Matter</title>
					<ul>
						<li>Names are not unique and namespaces are finite</li>
						<ul>
							<li>name disputes arise which were irrelevant before the Web</li>
							<li><q>cybersquatting</q> as a popular way to make money</li>
						</ul>
						<li>Names can be worth a lot of money</li>
						<ul>
							<li><code>business.com</code> was sold for $7.5 million</li>
						</ul>
						<li>Name inflation can be used to generate money</li>
						<ul>
							<li><code>aero</code>, <code>biz</code>, <code>coop</code>, <code>info</code>, <code>jobs</code>, <code>mobi</code>, <code>museum</code>, <code>name</code>, <code>pro</code>, <code>travel</code></li>
							<li>starting 2009, <a href="http://dret.typepad.com/dretblog/2008/06/dret.html">user-defined top-level domains will go on sale</a></li>
						</ul>
						<li>Names can have political significance</li>
						<ul>
							<li>ccTLDs are assigned based on the UNO's idea of what a country is</li>
						</ul>
						<li>Names can have symbolic significance</li>
						<ul>
							<li>Catalonia managed to get a domain of its own (<code>cat</code>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Domain Name Space</title>
					<img style="height : 70% ; margin : 2% ; " src="dns-namespace.png"/>
				</slide>
				<slide>
					<title>DNS Namespace Organization</title>
					<ul>
						<li>Domain owners can organize the assignment of subdomains</li>
						<ul>
							<li><code href="http://www.berkeley.edu/">berkeley.edu</code> is an U.S. educational institution</li>
							<li><code href="http://www.ethz.ch/">ethz.ch</code> is a Swiss university</li>
							<li><code href="http://www.imperial.ac.uk/">imperial.ac.uk</code> is a British university</li>
							<li><code href="http://www.uts.edu.au/">uts.edu.au</code> is an Australian university</li>
						</ul>
						<li>Special rules may apply (Germany does not assign car license plate names)</li>
						<li>Organizations may be countries or companies</li>
						<ul>
							<li>countries have national organizations for assigning names</li>
							<li>companies may create an internal multi-level namespace (<code href="http://www.ischool.berkeley.edu/">www.ischool.berkeley.edu</code>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Using DNS</title>
					<ul>
						<li>DNS is used by virtually all Internet applications</li>
						<ul>
							<li>names are more stable than addresses</li>
						</ul>
						<li>E-mail has some dedicated features built into DNS</li>
						<ul>
							<li>special entries (<code>MX</code> records) identify the e-mail server for a domain</li>
							<li>fallback entries help dealing with failing e-mail servers</li>
						</ul>
						<li>most URIs are based on DNS names</li>
						<ul>
							<li><code>http://ischool.berkeley.edu/</code> identifies the access protocol and the host</li>
							<li>the browser first performs a DNS lookup</li>
							<li>a TCP connection is then established to the address returned by the DNS</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="other-protocols">
				<title>Other Internet Protocols</title>
				<slide id="udp">
					<title short="UDP">User Datagram Protocol (UDP)</title>
					<ul>
						<li>Transport protocol based on <link href="ip"/>, just like <link href="tcp"/></li>
						<ul>
							<li>very thin protocol, adds few features to IP</li>
							<li>provides application addressing</li>
						</ul>
						<li>UDP is unreliable and connection-less</li>
						<ul>
							<li>ideal for fast streaming media (delay is critical, lost packets are tolerable)</li>
							<li>acceptable for one-packet applications (lightweight and fast)</li>
							<li>not acceptable for reliable data transfer</li>
						</ul>
					</ul>
				</slide>
				<slide id="arp">
					<title short="ARP">Address Resolution Protocol (ARP)</title>
					<ul>
						<li>How to find an Internet host</li>
						<ul>
							<li>hosts are configured (manually or by using DHCP)</li>
							<li>there is no externally controlled registry of available hosts</li>
						</ul>
						<li><link href="ip"/> routing finds the network, but what about the host?</li>
						<ul>
							<li>the sender broadcasts a request with the <link href="ip-address"/></li>
							<li>if there is such a host, it responds with its physical address</li>
							<li>the sender can now send the IP packet to the physical address</li>
						</ul>
					</ul>
				</slide>
			</part>
        </part>
        <part>
			<title>Conclusions</title>
			<slide>
				<title>Internet Technologies</title>
				<ul>
					<li><q href="http://www.youtube.com/watch?v=f99PcP0aFNE">The Internet is a series of tubes! It's not a big truck!</q></li>
					<li>The Internet can use various underlying networks</li>
					<li><link href="ip"/> transmits data between Internet hosts</li>
					<li><link href="tcp"/> provides reliable data transfer for applications</li>
					<li><link href="dns"/> allows to use names rather than addresses</li>
				</ul>
			</slide>
			<slide>
				<title>Web Technologies</title>
				<ul>
					<li>The Internet is a communications infrastructure</li>
					<li>The Web is an Internet-based distributed hypermedia system</li>
					<li>In theory, the Web could run on any network infrastructure</li>
					<li>The Web's core network technology is <link href="http">HTTP</link></li>
					<li>HTTP uses TCP and DNS for transmitting Web content</li>
				</ul>
			</slide>
        </part>
    </presentation>
    <presentation id="foundations">
        <title short="Foundations">Web Foundations (URI &amp; HTTP)</title>
        <date>2008-09-09</date>
        <toc class="reading"><a href="http://www.w3.org/Provider/Style/URI" title="Cool URIs don't change">Cool URIs</a></toc>
        <toc class="resources"><a href="http://www.w3.org/International/questions/qa-apache-lang-neg" title="Apache Language Negotiation Setup">Language Negotiation</a></toc>
        <toc class="abstract">The Web assumes an underlying network infrastructure providing a reliable, connection-oriented, flow-controlled, end-to-end transport service. Based on such a network service (today provided by the Internet), the Web's transport protocol moves representations of resources identified by a <em>Uniform Resource Identifier (URI)</em> between Web servers and clients. The most important protocols for data transfer on the Web is the <em>Hypertext Transfer Protocol (HTTP)</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>Web Server Service</title>
			<ul>
				<li>Web servers do more than just <q>deliver files</q></li>
				<li>They receive a request for acting on a resource</li>
				<ul>
					<li>this may be a simple file retrieval</li>
					<li>additional information is available from the request's <link href="http-headers">header fields</link></li>
					<li>the request URI may contain additional <link href="uri-query">query information</link></li>
					<li>the request may <link href="http-post">transmit complex data</link></li>
				</ul>
				<li>Processing can mean anything, it is transparent for the client</li>
				<ul>
					<li>the result of processing yields a <em>resource representation</em></li>
				</ul>
			</ul>
		</slide>
		<part id="uri">
			<title short="URI">Uniform Resource Identifier (URI)</title>
			<slide>
				<title>Resource Identification</title>
				<ul>
					<li>The Web is centered around resources</li>
					<ul>
						<li>HTTP has been designed to manipulate resources</li>
						<li>HTTP provides methods for getting, putting, updating, and even deleting resources</li>
					</ul>
					<li>Resources are useful abstractions for interfaces</li>
					<ul>
						<li>instead of an API, interaction is built around manipulating resources</li>
						<li>does that sound familiar?</li>
						<li><q><a href="http://www.docengineering.com/">Document exchanges as components of business models</a></q></li>
						<li>APIs change and bind closely, documents can better withstand change and bind loosely</li>
						<li>the whole Web is built around resources, not APIs</li>
						<li><link href="rest"/> is the principle behind this design</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>URI Schemes</title>
				<pre>URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]</pre>
				<ul>
					<li>URIs in their general case are very simple</li>
					<ul>
						<li>the scheme identifies how resources are identified</li>
						<li>the identification may be hierarchical or non-hierarchical</li>
					</ul>
					<li>Many URI schemes are hierarchical</li>
					<ul>
						<li>it is then possible to use relative URIs such as in <elem>a href="../"</elem></li>
						<li>the slash character is not just a character, in URIs it has semantics</li>
					</ul>
				</ul>
				<blockquote>[…] the URI syntax is a federated and extensible naming system wherein each scheme's specification may further restrict the syntax and semantics of identifiers using that scheme.</blockquote>
				<p class="quotenote"><a href="http://dret.net/rfc-index/reference/RFC3986"><q>Uniform Resource Identifier (URI): Generic Syntax</q>, RFC 3986, January 2005</a></p>
			</slide>
			<slide id="uri-query">
				<title>Query Information</title>
				<ul>
					<li>Query components specify additional information</li>
					<ul>
						<li>it is non-hierarchical information further identifying the resource</li>
						<li>in most cases, it can be regarded as <q>input</q> to the resource</li>
					</ul>
				</ul>
				<blockquote>The query component contains non-hierarchical data that, along with data in the path component […], serves to identify a resource within the scope of the URI's scheme and naming authority […].</blockquote>
				<p class="quotenote"><a href="http://dret.net/rfc-index/reference/RFC3986"><q>Uniform Resource Identifier (URI): Generic Syntax</q>, RFC 3986, January 2005</a></p>
			</slide>
			<slide>
				<title>Processing URIs</title>
				<ul>
					<li>Processing URIs is not as trivial as it may seem</li>
					<ul>
						<li>escaping and normalization rules are non-trivial</li>
						<li>many implementations are broken</li>
						<li>complain about broken implementations</li>
					</ul>
					<li>URIs are not just strings</li>
					<ul>
						<li>URIs are strings with a considerable set of rules attached to them</li>
						<li>implementing all these rules is non-trivial</li>
						<li>implementing all these rules is crucial</li>
						<li>application development environments provide functions for URI handling</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Resources vs. Representations</title>
				<ul>
					<li>URIs identify <em>resources</em></li>
					<ul>
						<li>abstractions which may not have physical representation</li>
					</ul>
					<li>Requesting a URI yields a <em>resource representation</em></li>
					<ul>
						<li>should be an appropriate and useful manifestation of the abstraction</li>
					</ul>
					<li>Resources can have <em>different representations</em></li>
					<ul>
						<li>in a well-designed environment, you should get what works best for you</li>
						<li>HTML for big screens vs. HTML for mobile devices</li>
						<li>an event calendar based on my location and preferences</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="http">
			<title short="HTTP">Hypertext Transfer Protocol (HTTP)</title>
			<slide>
				<title>The Web's Protocol</title>
				<img style="height : 60% ; margin : 4% ; float : left ; " src="internet-traffic-trends.png"/>
				<p class="quotenote">provided by <a href="http://www.cachelogic.com/">CacheLogic Inc.</a></p>
			</slide>
			<slide>
				<title>DNS &amp; HTTP</title>
				<p>The two basic protocols which every Web browser must implement are DNS access and HTTP. However, most operating systems provide an API for DNS access, so the browser can use this service locally and only has to implement HTTP. TCP (which is required as the foundation for HTTP) is usually provided by the operating system.</p>
				<img style="width : 90% ; margin : 2% ; " src="browser-dns-http.png"/>
			</slide>
			<part>
				<title>HTTP Basics</title>
				<slide>
					<title>HTTP Messages</title>
					<ul>
						<li>HTTP needs a reliable connection</li>
						<ul>
							<li>the foundation for HTTP is the <link href="tcp"/></li>
							<li>DNS resolution yields an IP address</li>
							<li>open TCP connection to port 80 or port specified in URI (<code>http://rosetta.sims.berkeley.edu:8085/</code>)</li>
						</ul>
						<li>HTTP is a text-based protocol</li>
						<ul>
							<li>the connection is used to transmit text messages</li>
							<li>all HTTP messages are human-readable (not all <em>entities</em>, though)</li>
							<li>basic HTTP operations can be carried out by hand</li>
						</ul>
					</ul>
<pre>start-line
message-header *

message-body ?</pre>
				</slide>
				<slide id="http-headers">
					<title>HTTP Header Fields</title>
					<ul>
						<li>Header fields contain information about the message</li>
						<ul>
							<li><em>general header:</em> <code>Date</code> as the message origination date</li>
							<li><em>request header:</em> <code>Accept-Language</code> indicated language preferences</li>
							<li><em>response header:</em> <code>Server</code> contains system information</li>
							<li><em>entity header:</em> <code>Content-Type</code> specifies the media type of the entity</li>
						</ul>
						<li>HTTP defines <a href="http://www.cs.tut.fi/~jkorpela/http.html">a number of header fields</a></li>
						<ul>
							<li>unknown fields must be ignored (extensibility)</li>
							<li>unstandardized fields should use a <q><code>X-</code></q> prefix</li>
						</ul>
						<li>HTTP is about acting on these fields</li>
						<ul>
							<li>HTTP defines what HTTP implementations must or should do</li>
						</ul>
					</ul>
				</slide>
				<slide id="http-request">
					<title>HTTP Requests</title>
					<ul>
						<li>After opening a connection, the client sends a request</li>
						<ul>
							<li>the method indicates the action to be performed on the resource</li>
							<li>HTTP's most interesting methods are: <code>GET</code>, <code>HEAD</code>, <code>POST</code></li>
							<li>other interesting methods are: <code>PUT</code>, <code>DELETE</code></li>
						</ul>
						<li>The URI identifies the resource to which the request should be applied</li>
						<ul>
							<li>absolute URIs are required when contacting <link href="proxies"/></li>
							<li>absolute paths are required when contacting a server directly</li>
							<li>the URI may contain <link href="uri-query"/></li>
							<li>fragment identifiers are not sent (they are interpreted on the client side)</li>
						</ul>
						<li>The <code>Host</code> header field must be included in every request</li>
					</ul>
<pre>Method Request-URI HTTP/Major.Minor
[Header]*

[Entity]?</pre>
				</slide>
				<slide id="http-get">
					<title>HTTP GET</title>
					<ul>
						<li>Retrieval action based on the URI</li>
						<ul>
							<li>maybe implemented by reading a file</li>
							<li>maybe implemented by processing a file (PHP)</li>
							<li>maybe implemented by invoking a process</li>
						</ul>
						<li>Semantics may change based on header fields</li>
						<ul>
							<li><code>If-*:</code> only reply with the entity if necessary</li>
							<li><code>Range:</code> only reply with the requested part of the entity</li>
						</ul>
						<li>Cacheability depends on header fields of the response</li>
					</ul>
<pre>GET / HTTP/1.1
Host: ischool.berkeley.edu</pre>
				</slide>
				<slide id="http-response">
					<title>HTTP Responses</title>
					<ul>
						<li>The server's response to interpreting a request</li>
						<ul>
							<li>the status code is given numerically and as text</li>
							<li><code>2**</code> for variations of <q>ok</q></li>
							<li><code>3**</code> for redirections</li>
							<li><code>4**</code> are different client side problems (<code>404</code>: not found)</li>
							<li><code>5**</code> are different server side problems</li>
						</ul>
						<li>Header fields specify additional information</li>
						<ul>
							<li>information about the server</li>
							<li>information about the entity (media type, encoding, language)</li>
						</ul>
					</ul>
<pre>HTTP/Major.Minor Status-Code Text
[Header]*

[Entity]?</pre>
				</slide>
				<slide id="http-performance">
					<title>HTTP Performance</title>
					<ul>
						<li>HTTP/1.0 allowed one transaction per connection</li>
						<ul>
							<li>TCP connection setup and teardown are expensive</li>
							<li>TCP's <em>slow start</em> slows down the initial phase of data transfer</li>
							<li>typical Web pages use between 10-20 resources (HTML + images)</li>
							<li>typically, these resources are stored on the same server</li>
						</ul>
						<li>HTTP/1.1 introduces <em>persistent connections</em></li>
						<ul>
							<li>the TCP connection stays open for some time (10 sec is a popular choice)</li>
							<li>additional requests to the same server use the same TCP connection</li>
						</ul>
						<li>HTTP/1.1 introduces <em>pipelined connections</em></li>
						<ul>
							<li>instead of waiting for a response, requests can be queued</li>
							<li>the server responds as fast as possible</li>
							<li>the order may not be changed (there is no sequence number)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>HTTP Connection Handling</title>
					<img style="width : 90% ; margin : 2% ; " src="http-phttp-pipelining.png"/>
				</slide>
			</part>
			<part id="http-content-negotiation">
				<title>Content Negotiation</title>
				<slide>
					<title>What is Content Negotiation?</title>
					<ul>
						<li>Negotiation between two HTTP peers</li>
						<ul>
							<li>resources may be available in different representations</li>
							<li>possible dimensions are language, graphics format, character encoding, …</li>
							<li>using <u>one</u> URI, it should be possible to get the <q>best</q> resource</li>
						</ul>
						<li>Negotiation requires knowledge about the resource user</li>
						<ul>
							<li>languages depend on humans reading pages</li>
							<li>graphics formats depend on the browser's functionality</li>
						</ul>
						<li>Content negotiation is a form of a Web-based service</li>
						<ul>
							<li>client request a URI and have some constraints</li>
							<li>using these constraints, the best representation should be served</li>
							<li>ideally, content negotiation should not be too expensive</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Three Different Variants</title>
					<ul>
						<li>Server Side Content Negotiation</li>
						<ul>
							<li>the server has a set of representations and information from the request</li>
							<li>the server returns the <q>best</q> representation based on the request</li>
						</ul>
						<li>Client Side Content Negotiation</li>
						<ul>
							<li>the server responds with a list of different representations</li>
							<li>the client (browser or user) makes a choice and sends a second request</li>
						</ul>
						<li>Transparent Content Negotiation</li>
						<ul>
							<li>Caches act as in client side negotiation and thus know the available representations</li>
							<li>Clients contacting the cache can be served by the cache as in server side negotiation</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Server Side Content Negotiation</title>
					<ul>
						<li>Clients usually tell something about themselves</li>
						<ul>
							<li><code>Accept</code>, <code>Accept-Charset</code>, <code>Accept-Encoding</code>, <code>Accept-Language</code></li>
							<li>the server also knows their IP address</li>
							<li>the server may also use additional information (<link href="cookie"/>s)</li>
						</ul>
						<li>The server needs to find the <q>best representation</q></li>
						<ul>
							<li>most easily by matching the request with available representations</li>
							<li>could also be implemented more dynamically by generating new representations</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="proxies">
			<title>Proxies</title>
			<slide>
				<title>Proxies</title>
				<ul>
					<li>HTTP often is end-to-end</li>
					<ul>
						<li>there is a direct connection between my browser and the server</li>
						<li>HTTP allows using proxies, which are HTTP intermediaries</li>
					</ul>
					<li>Proxies are used for security reasons</li>
					<ul>
						<li>a proxy is an important part of a firewall</li>
						<li>it hides the user's identity by acting on behalf of the user</li>
						<li>proxies are ideally suited for logging and filtering</li>
					</ul>
					<li>Proxies are used for performance reasons</li>
					<ul>
						<li>requests and responses can be cached, speeding up responses significantly</li>
						<li>caching depends on the ability to know when the cache is outdated</li>
						<li>HTTP enables proxies to validate their cached copies</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Browsers &amp; Proxies</title>
				<p>A <em>proxy</em> is configured in the browser (manually or automatically), so that the browser sends all requests to the proxy instead of the target Web server. The proxy then forwards the request. Proxies can be chained, so that the requests and responses travel through a number of HTTP systems.</p>
				<img style="width : 90% ; margin : 2% ; " src="proxy.png"/>
			</slide>
			<slide id="firewalls">
				<title>Firewalls</title>
				<ul>
					<li>Firewalls are used to protect computers</li>
					<ul>
						<li>protecting users from worms and viruses</li>
						<li>protecting servers from intrusion attacks</li>
						<li>firewalls analyze and block traffic based on complex rules</li>
					</ul>
					<li>A <em>reverse proxy</em> can be part of a firewall concept</li>
					<ul>
						<li>it is configured and maintained by the service provider</li>
						<li>it is a single access point through which HTTP traffic goes</li>
						<li>it is good because it bundles access control to servers behind it</li>
						<li>it is bad because it is a <em>single point of failure</em></li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Web Server Service</title>
				<ul>
					<li>HTTP is much more than file transfer</li>
					<ul>
						<li>it is a protocol for the concept of <em>resource manipulation</em></li>
						<li>it is a distinct step away from the <em>API approach</em> to building distributed systems</li>
					</ul>
					<li>HTTP servers can be configured to deliver good or bad service</li>
					<ul>
						<li>this is a question of how well they are configured on the HTTP level</li>
						<li>it is also a question of how good the Web design is</li>
						<li>both issues together are required to set up a good Web server</li>
					</ul>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="security">
        <title short="Security">Security Issues</title>
        <date>2008-09-11</date>
        <toc class="resources"><a href="http://dret.net/rfc-index/reference/RFC4346" title="TLS RFC">TLS</a>&#160;· <a href="http://www.simonsingh.net/The_Code_Book.html">Code Book</a></toc>
        <toc class="abstract">TCP and thus HTTP are clear-text protocols, which make no attempt to hide the data being transmitted. For secure data transfers, it thus is necessary to use additional technologies for providing secure data transfers. This lecture looks briefly into the foundations of <em>cryptographic primitives</em> (such as one-way functions and encryption) and <em>cryptographic protocols</em>. For the Web, the most interesting security feature are secure HTTP interactions, which are provided by <em>HTTP over SSL (HTTPS)</em>, a protocol that layers an encryption layer (<em>SSL</em> or <em>TLS</em>) between TCP and HTTP.</toc>
		<toc class="assignment-number"><a href="a/1/">Assignment&#160;1</a></toc>
		<toc class="assignment-title">: HTTP Content Negotiation</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="security-101">
			<title>Security 101</title>
			<slide>
				<title>Cryptography</title>
				<ul>
					<li>Cryptography is structured into different layers</li>
					<ul>
						<li>layering is a well-established principle for <em>separation of concerns</em></li>
					</ul>
					<li><em>Cryptographic primitives</em> implement very basic functionality</li>
					<ul>
						<li>changes and advancements in this area are limited to very specialized researchers</li>
						<li>it is easy to make fatal mistakes which then challenge everything built on top if it</li>
					</ul>
					<li><em>Cryptographic protocols</em> assemble primitives into application-level solutions</li>
					<ul>
						<li>primitives solve very basic security problems (fingerprints, encryption, …)</li>
						<li>protocols combine these into applications (digital signatures, secure communications, …)</li>
					</ul>
				</ul>
			</slide>
			<part id="one-way-function">
				<title>One-Way Function</title>
				<slide>
					<title>Essence of Data</title>
					<ul>
						<li>Hashes (or <em>message digests</em>) are a well-known principle in computer science</li>
						<ul>
							<li>fast to compute (the goal is to make data handling more efficient)</li>
							<li>few collisions (there are always collisions because of the smaller size)</li>
							<li><em>checksums</em> and <em>Cyclic Redundancy Check (CRC)</em> are popular hashes</li>
						</ul>
						<li>One-way functions are cryptographically safe hashes</li>
						<ul>
							<li>not just for detecting errors, but also for preventing tampering</li>
							<li>often referred to as <em>cryptographic hash</em> or <em>digital fingerprint</em></li>
						</ul>
						<li>One-way functions must satisfy additional criteria</li>
						<ul>
							<li>it must be very hard to find an input producing a given output</li>
							<li>it must be very hard to find two inputs producing the same output (<q>collision</q>)</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Reducing Data</title>
					<img style="width : 90% ; margin : 2% ; " src="hash.gif" title="Hash"/>
				</slide>
			</part>
			<part>
				<title>Secret-Key Cryptography</title>
				<slide>
					<title>Plausible Encryption</title>
					<ul>
						<li>Secret-Key is was most people think of when thing of encryption</li>
						<ul>
							<li><em>symmetric cryptography</em> is another popular term</li>
						</ul>
						<li>One key for encryption and decryption</li>
						<li>Losing the key makes encrypted data openly readable</li>
						<ul>
							<li>there must be a secure channel to transport keys</li>
						</ul>
						<li>Good for long-term relationships with few partners</li>
						<ul>
							<li>exchange secret keys as part of the initial setup of a relationships</li>
							<li>adding partners requires a <em>secure channel</em> for key exchange</li>
							<li>changing keys requires a <em>secure channel</em> for key exchange</li>
						</ul>
						<li>Almost impractical in an environment with many ad-hoc partners</li>
					</ul>
				</slide>
				<slide>
					<title>Notice the Arrow</title>
					<img style="width : 90% ; margin : 2% ; " src="secret-key.gif" title="Secret-Key Cryptography"/>
				</slide>
			</part>
			<part>
				<title>Public-Key Cryptography</title>
				<slide>
					<title>Implausible Encryption</title>
					<ul>
						<li>Public-Key intuitively is hard to accept as a concept</li>
						<ul>
							<li><em>asymmetric cryptography</em> is another popular term</li>
						</ul>
						<li>Key pairs of one public and one secret key</li>
						<ul>
							<li><em>key generation</em> is the process of generating these key pairs</li>
						</ul>
						<li>The public key can be made available to the public</li>
						<ul>
							<li>only the secret key can do the inverse operation of the public key</li>
						</ul>
						<li>Good for short-term relationships with many partners</li>
						<ul>
							<li>publish your public key so that it can be used worldwide</li>
							<li>everybody can encrypt data using the public key</li>
							<li>only the owner of the secret can can decrypt the message and read it</li>
						</ul>
						<li>Computationally expensive and not good for a large amounts of data</li>
					</ul>
				</slide>
				<slide>
					<title>No Arrow Here …</title>
					<img style="width : 90% ; margin : 2% ; " src="public-key-public-encrypt.gif" title="Public-Key Cryptography (Encrypting with Public Key)"/>
				</slide>
				<slide>
					<title>And No Arrow Here …</title>
					<img style="width : 90% ; margin : 2% ; " src="public-key-secret-encrypt.gif" title="Public-Key Cryptography (Encrypting with Secret Key)"/>
				</slide>
			</part>
		</part>
		<part>
			<title>Cryptographic Protocols</title>
			<slide>
				<title>Building Secure Applications</title>
				<ul>
					<li><em>Cryptographic primitives</em> in most cases are not sufficient</li>
					<ul>
						<li>they provide basic functionality for fundamental tasks</li>
						<li>they must by combined to provide solutions for real-world problems</li>
					</ul>
					<li>Typical problem #1: How to ensure key authenticity</li>
					<ul>
						<li>with insecure keys, the majority of cryptographic methods is worthless</li>
					</ul>
					<li>Typical problem #2: How to communicate securely without shared keys</li>
					<ul>
						<li>many interesting scenarios are based on ad-hoc interactions</li>
						<li>secret-key does not work, public-key needs to verify the peer</li>
					</ul>
					<li>Typical problem #3: How to check authenticity and integrity of data</li>
					<ul>
						<li>integrity can be done with checksums, but these could be forged</li>
						<li>authenticity needs a cryptographically secure way of combining identity and data</li>
					</ul>
				</ul>
			</slide>
			<part id="digital-signature">
				<title>Digital Signature</title>
				<slide>
					<title>Encrypted Fingerprints</title>
					<ul>
						<li>Hashes are used to check data integrity</li>
						<li><link href="one-way-function"/>s are used to check data integrity securely</li>
						<ul>
							<li>it is not possible to reverse engineer data for a given hash</li>
						</ul>
						<li>Signed hashes can be used to ensure data authenticity</li>
						<ul>
							<li>if the hash sum is signed, it cannot be changed</li>
							<li>if the data is changed, its hash will not match the signed hash</li>
						</ul>
						<li>Digital signatures work as long as the hash can be securely signed</li>
						<ul>
							<li>there must be a trusted public key for checking the hash signature</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Creating a Digital Signature</title>
					<img style="height : 70% ; margin : 2% ; " src="signature-sign.jpg" href="http://en.wikipedia.org/wiki/Digital_signature"/>
				</slide>
				<slide>
					<title>Verifying a Digital Signature</title>
					<img style="height : 70% ; margin : 2% ; " src="signature-verify.jpg" href="http://en.wikipedia.org/wiki/Digital_signature"/>
				</slide>
				<slide id="certificate">
					<title>Certificate</title>
					<ul>
						<li>Certificates are digital signatures issued by a trusted party</li>
						<ul>
							<li>most digital signatures are created with certified public keys</li>
							<li>this means the digital signature is created based on a digitally signed key</li>
						</ul>
						<li>Who can you trust on the Web?</li>
						<ul>
							<li>trust can only start to grow based on initial trust in something</li>
							<li>many systems come with pre-installed trust (<em>root certificates</em>)</li>
							<li>certificates from other issuers will cause <a href="https://katapultmedia.com/">browsers to complain</a></li>
						</ul>
						<li>Certificates (like domain names) are a very easy way to make money</li>
						<ul>
							<li>in theory there are different levels of certificates with different levels of identity checking</li>
							<li>in practice most sites choose the cheapest one that does not give an error message</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part>
				<title>Secure Communications</title>
				<slide>
					<title>Encrypted Keys</title>
					<ul>
						<li>Public-Key cryptography is computationally expensive</li>
						<ul>
							<li>it is possible to encrypt all traffic using asymmetric key pairs</li>
							<li>this generates considerably more load on the server side</li>
						</ul>
						<li>Combining public- and secret-key cryptography</li>
						<ol>
							<li>check the public key for authenticity (using a <link href="certificate"/>)</li>
							<li>generate a key for a secret-key encryption scheme</li>
							<li>use the public key to securely transmit the secret key</li>
							<li>use the secret key for securely transmitting the payload</li>
						</ol>
						<li>Combines the advantages of both methods</li>
						<ul>
							<li>the lower complexity of secret-key algorithms</li>
							<li>the ability of public-key algorithms to work without a secure channel</li>
						</ul>
					</ul>
				</slide>
			</part>
		</part>
		<part id="https">
			<title short="HTTPS">HTTP over SSL (HTTPS)</title>
			<slide>
				<title>HTTP and Security</title>
				<ul>
					<li>HTTP sends clear-text messages</li>
					<ul>
						<li>listening to HTTP traffic is trivial</li>
						<li>information transferred via simple HTTP is public</li>
						<li>many Web data transfers (in most cases form data) should be secure</li>
					</ul>
					<li>Making HTTP secure requires additional mechanisms</li>
					<ul>
						<li><em>S-HTTP</em> was an attempt to define a secure version of HTTP</li>
						<li><em>HTTPS</em> uses a secure communication layer underneath HTTP</li>
					</ul>
					<li>Encryption is done by a layer on top of TCP</li>
					<ul>
						<li><em>Secure Sockets Layer (SSL)</em> is the protocol layer invented by Netscape</li>
						<li><em>Transport Layer Security (TLS)</em> is the standardized Internet version</li>
						<li>TLS adds more encryption schemes and more flexibility</li>
					</ul>
					<li>Lower-level methods may also provide encryption</li>
					<ul>
						<li><em>Virtual Private Networks (VPN)</em> provide IP-based encryption</li>
						<li><em>WEP</em> and <em>WPA</em> provide network interface encryption</li>
						<li>IP-based encryption is not <em>end-to-end</em> in the sense of <em>application-to-application</em></li>
					</ul>
				</ul>
			</slide>
			<slide id="s-http">
				<title>HTTPS vs. S-HTTP</title>
				<ul>
					<li>Securing HTTP can be done in two ways</li>
					<ol>
						<li>using a secure communications infrastructure</li>
						<li>making the protocol itself secure (adding cryptographic features)</li>
					</ol>
					<li>Secure infrastructures are better in terms of layering</li>
					<ul>
						<li>they can be reused for other applications (email, file transfer, …)</li>
						<li>they can be evolved independently from the application layer</li>
					</ul>
					<li>Secure protocols can provide better integration of security features</li>
					<ul>
						<li>signed interactions which can be archived for later retrieval</li>
					</ul>
					<li>Layering is more important than tightly integrated security</li>
				</ul>
			</slide>
			<slide>
				<title>HTTP and SSL</title>
				<img style="width : 80% ; margin : 2% ; " src="https.gif" title="HTTP and SSL"/>
			</slide>
			<slide>
				<title>TLS vs. IPsec</title>
				<img style="width : 90% ; margin : 2% ; " src="tls-vs-ipsec.png" title="TLS vs. IPsec"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Internet Security</title>
				<ul>
					<li>Certificates are used to guarantee a party's authenticity</li>
					<li>Certificates are digital signatures issued by trusted parties</li>
					<li>One authenticated, public keys can be used to securely communicate</li>
					<li>For efficiency reasons, payload encryption uses secret-key cryptography</li>
					<li>Encryption on the Web is based on HTTPS</li>
					<li>VPN-based encryption can be used to secure all traffic</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="authentication">
        <title short="Authentication">Identity and Authentication</title>
        <date>2008-09-16</date>
        <toc class="resources"><a href="http://dret.net/rfc-index/reference/RFC2617" title="HTTP Authentication RFC">HTTP Authentication Spec</a></toc>
        <toc class="abstract">For any task involving personalization and/or trust, it is not only necessary to have a concept for providing privacy, but also to have concepts for <em>identity</em> and how to prove identity, which needs <em>authentication</em>. HTTP has built-in mechanisms for authentication, and the standard <em>HTTP Authentication</em> mechanisms are <em>Basic Authentication</em> and <em>Digest Access Authentication</em>. Instead of these mechanisms, many applications implement their own ways of authentication, which often are based around authentication using <em>HTML Forms</em>.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part>
			<title>Anonymous Authenticity</title>
			<slide>
				<title>Certificates and Identity</title>
				<ul>
					<li>What can you be certain of when using <link href="https">HTTPS</link>?</li>
					<ul>
						<li>in most cases there only is a server side <link href="certificate"/></li>
						<li>if attackers can manipulate your DNS they can attack HTTPS</li>
					</ul>
					<li>Many scenarios require reliable identification of the client</li>
					<ul>
						<li>HTTPS supports client side certificates for identifying clients</li>
						<li>rarely used outside of closed user groups</li>
					</ul>
					<li>Traditional user identification against a trusted server</li>
					<ul>
						<li>trusted servers can be used for securely transmitting authentication credentials</li>
						<li>identity and authentication methods are handled on a per-server basis</li>
						<li>when using many services, users end up with many identities</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Usernames and Password</title>
				<ul>
					<li>Usernames represent identities</li>
					<ul>
						<li>whoever uses the name is <em>identical with the user</em></li>
						<li>giving away the username means giving away the identity</li>
					</ul>
					<li>Passwords are used to authenticate a user</li>
					<ul>
						<li>identities are secured and checked by using password</li>
						<li>giving away the password means giving away the authenticity</li>
					</ul>
					<li>Usernames and passwords must be handled securely</li>
					<ul>
						<li>transmitting them in clear text (such as <link href="http">HTTP</link>) is a very bad idea</li>
						<li>reusing username/password pairs is a risky practice</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="http-authentication">
			<title>HTTP Authentication</title>
			<slide>
				<title>HTTP Access Control</title>
				<ul>
					<li>HTTP servers can <a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes#4xx_Client_Error">deny access</a> because of access control</li>
					<ul>
						<li><code>401 Unauthorized</code> means the resource is access controlled</li>
						<li><code>403 Forbidden</code> means the resource is inaccessible</li>
						<li><code>405 Method Not Allowed</code> signals a request using the wrong <link href="http-request">request method</link></li>
					</ul>
					<li>Two different approaches to unauthorized access are possible</li>
					<ul>
						<li>repeat the HTTP request with the proper authentication credentials</li>
						<li>redirect to a <link href="login-page"/> and establish an authenticated <link href="session"/></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>HTTP Authentication</title>
				<img style="width : 90% ; margin : 2% ; " src="authentication-http.gif" title="HTTP Authentication" href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/Security5.html"/>
			</slide>
			<part id="basic-authentication">
				<title>Basic Authentication</title>
				<slide id="realm">
					<title>Authentication Information</title>
					<ul>
						<li>Authentication is based on <em>authentication realms</em></li>
						<ul>
							<li>a set of resources for which the authentication is required</li>
							<li>an opaque name which is used to signal which login is required</li>
							<li>username/password probably is specific for a given realm</li>
						</ul>
						<li>Users supply username and password through the client</li>
						<ul>
							<li>sent as <a href="http://en.wikipedia.org/wiki/Base64">Base64</a> encoded <q><code>username:password</code></q> string</li>
							<li>username and password are <a href="http://www.google.com/search?hl=en&amp;q=base64+decoder"><em>not</em> transmitted securely</a></li>
							<li>basic authentication should <em>always</em> use <link href="https">HTTPS</link></li>
						</ul>
						<li>Authorization is handled on the server side</li>
						<pre href="http://en.wikipedia.org/wiki/Basic_access_authentication">HTTP/1.0 401 Unauthorized
WWW-Authenticate: Basic realm="SokEvo"</pre>
						<pre href="http://en.wikipedia.org/wiki/Basic_access_authentication">GET /private/index.html HTTP/1.0
Authorization: Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==</pre>
					</ul>
				</slide>
				<slide>
					<title>Repeated Access</title>
					<ul>
						<li>Clients typically access more than one protected resource</li>
						<ul>
							<li>a perfectly stateless client would always request authentication from the user</li>
							<li>using the <link href="realm"/> clients can identify repeated accesses</li>
						</ul>
						<li>Clients remember the authentication and replay it automatically</li>
						<ul>
							<li>browsers provide little control over this feature</li>
							<li><q>logging out</q> of HTTP authenticated sessions is hard</li>
						</ul>
					</ul>
				</slide>
			</part>
			<part id="digest-authentication">
				<title>Digest Access Authentication</title>
				<slide>
					<title>Better HTTP Authentication</title>
					<ul>
						<li><link href="basic-authentication"/> is a serious security problem</li>
						<ul>
							<li>username and password are transmitted unencrypted</li>
						</ul>
						<li><em>Digest Access Authentication</em> does not require transmission of the password</li>
						<ul>
							<li>only information computed using a <link href="one-way-function"/> is transmitted via HTTP</li>
							<li>server-side needs clear-text password to compute HTTP header values</li>
						</ul>
						<li>Three-step one-way function calculation of <http>response</http> value</li>
						<ol>
							<li>HA1 = MD5(username, realm, password)</li>
							<li>HA2 = MD5(HTTP method, request URI)</li>
							<li>Response = MD5(HA1, nonce, nc, cnonce, qop, HA2)</li>
						</ol>
						<li>Server responses may include <http>AuthenticationInfo</http></li>
						<ul>
							<li>information for the next authenticated request</li>
						</ul>
					</ul>
				</slide>
				<slide>
					<title>Example Headers</title>
					<pre href="http://en.wikipedia.org/wiki/Digest_access_authentication">HTTP/1.0 401 Unauthorized
WWW-Authenticate: Digest realm="testrealm@host.com",
	qop="auth,auth-int",
	nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093",
	opaque="5ccc069c403ebaf9f0171e9517f40e41"</pre>
					<pre href="http://en.wikipedia.org/wiki/Digest_access_authentication">GET /dir/index.html HTTP/1.0
Authorization: Digest username="Mufasa",
	realm="testrealm@host.com",
	nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093",
	uri="/dir/index.html",
	qop=auth,
	nc=00000001,
	cnonce="0a4f113b",
	response="6629fae49393a05397450978507c4ef1",
	opaque="5ccc069c403ebaf9f0171e9517f40e41"</pre>
				</slide>
			</part>
		</part>
		<part id="app-authentication">
			<title>Application Authentication</title>
			<slide id="login-page">
				<title>Login Page</title>
				<ul>
					<li><link href="http-authentication"/> works with browser controls (including window)</li>
					<ul>
						<li>no possibility to <q>log out</q> without using browser-specific controls</li>
						<li>client side security depends on browser security measures</li>
					</ul>
					<li>Using <link href="forms"/> gives more freedom in session management</li>
					<ul>
						<li>authentication and authorization are completely application-based</li>
						<li>if there were <q>secure personal browsers</q> this would not work very well</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>HTTP and Form-Based Login</title>
				<ul>
					<li>HTTP is used inconsistently with form-based login</li>
					<ul>
						<li>some frameworks use redirections to login pages and then to resources</li>
						<li>some frameworks use login pages as front ends to protected resources</li>
					</ul>
					<li>HTTP messages never indicate any kind of authentication problem</li>
					<ul>
						<li><http>302 Found</http> is being sent for unauthenticated access (<http>Location</http> login page)</li>
						<li>successful login redirects to the original resource</li>
						<li><http>401 Unauthorized</http> signals missing authorization (no <http>WWW-Authenticate</http>)</li>
					</ul>
					<li>Use HTTP to display login pages</li>
					<ul>
						<li><http>403 Forbidden</http> when requesting a protected page without credentials</li>
						<li>send the login page as the HTML in the response</li>
						<li>after authentication and authorization serve the same page as <http>200 OK</http></li>
					</ul>
					<li>The <q>login representation</q> is the preferred method</li>
				</ul>
			</slide>
			<slide>
				<title>Form-Based Authentication</title>
				<img style="width : 90% ; margin : 2% ; " src="authentication-form.gif" title="Form-Based Authentication" href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/Security5.html"/>
			</slide>
			<slide>
				<title>HTML Session Management</title>
				<listing src="html-form-state.xml"/>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Web or Application Architecture</title>
				<ul>
					<li><link href="http-authentication"/> uses standards and standard mechanisms</li>
					<ul>
						<li><link href="basic-authentication"/> <em>should always use</em> <link href="https">HTTPS</link></li>
						<li><link href="digest-authentication"/> can be safely used over unencrypted connections</li>
					</ul>
					<li><link href="app-authentication"/> uses fewer Web methods</li>
					<ul>
						<li>some frameworks use <q>login representations</q> for protected resources</li>
						<li>some frameworks use redirects to channel requests through a login page</li>
					</ul>
				</ul>
			</slide>
		</part>
	</presentation>
    <presentation id="state">
        <title short="State">State Management</title>
        <date>2008-09-18</date>
        <toc class="reading"><a href="http://en.wikipedia.org/wiki/HTTP_cookie" title="Wikipedia about HTTP Cookies">Wikipedia</a></toc>
        <toc class="resources"><a href="http://www.w3.org/2001/tag/doc/state.html" title="State in Web Application Design">State</a>&#160;· <a href="http://dret.net/rfc-index/reference/RFC2965" title="Cookies RFC">Cookies Spec</a></toc>
        <toc class="abstract">HTTP is a stateless protocol, where each request/response interaction is a separate interaction and there is no protocol support for longer sessions (such as a user logging in and working on a Web site as an identified user). <em>State management</em> refers to mechanisms which provide support for this kind of scenario, the most popular choice for state management are <em>cookies</em>. Another possibility is URI-based state management. This lecture is a first glimpse into the world of <em>Representational State Transfer (REST)</em>, the Web's fundamental model of handling interaction with resources.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<part id="session">
			<title>Session</title>
			<slide>
				<title>HTTP and Sessions</title>
				<ul>
					<li>HTTP has no session concept</li>
					<ul>
						<li>interactions are HTTP request/response pairs and not site visits</li>
						<li><link href="http-performance">HTTP/1.1</link> does not change this, it is only a performance optimization</li>
						<li>servers can not reliably identify users interacting with a Web site</li>
					</ul>
					<li>Sessions should not be used to track resource state</li>
					<ul>
						<li>the semantics of resource interactions should not depend on client state</li>
						<li>application behavior can depend on client state</li>
					</ul>
					<li>HTTP's concept of <em>stateless interaction</em> is important</li>
					<ul>
						<li>the Web's idea is to use <em>loose coupling</em> between clients and servers/resources</li>
						<li>retrofitting the Web with <em>tight coupling</em> through server state is bad design</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Client-Side State</title>
				<ul>
					<li>Sessions should be maintained on the client</li>
					<ul>
						<li>the client has all relevant information about a session</li>
						<li>when the server restarts, no information will be lost</li>
						<li>if something has to be persistent, it should be a resource</li>
					</ul>
					<li>Small and short-term solutions may work well with server state</li>
					<ul>
						<li><em>scaling</em> these solutions typically introduces many problems</li>
						<li><em>debugging</em> can be hard because the state is transient</li>
						<li><em>integration</em> with other clients can become a difficult problem</li>
					</ul>
					<li>Three ways of client-side state are possible</li>
					<ol>
						<li>sending back and forth state as part of the interaction</li>
						<li>store state in the server and refer to it from the client (not recommended)</li>
						<li>store state at a URI and use the URI to refer to that state</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>State in HTML or HTTP</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-client-state.png" title="State in HTML or HTTP"/>
			</slide>
			<slide>
				<title>State in the Server Application</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-server-state.png" title="State in the Server Application"/>
			</slide>
			<slide>
				<title>State as a Resource</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-resource-state.png" title="State as a Resource"/>
			</slide>
			<slide>
				<title>Stateless Shopping</title>
				<ul>
					<li>Typical <q>session scenarios</q> can be <a href="http://www.peej.co.uk/articles/no-sessions.html">mapped to resources</a></li>
					<ul>
						<li>Client: Show me your products</li>
						<li>Server: Here's a list of all the products</li>
						<li>Client: I'd like to buy 1 of http://ex.org/product/X, I am "John"/"Password"</li>
						<li>Server: I've added 1 of http://ex.org/product/X to http://ex.org/users/john/basket</li>
						<li>Client: I'd like to buy 1 of http://ex.org/product/Y, I am "John"/"Password"</li>
						<li>Server: I've added 1 of http://ex.org/product/Y to http://ex.org/users/john/basket</li>
						<li>Client: I don't want http://ex.org/product/X, remove it, I am "John"/"Password"</li>
						<li>Server: I've removed http://ex.org/product/X to http://ex.org/users/john/basket</li>
						<li>Client: Okay I'm done, username/password is "John"/"Password"</li>
						<li>Server: Here is the total cost of the items in http://ex.org/users/john/basket</li>
					</ul>
					<li>This is more than just renaming <q>session</q> to <q>resource</q></li>
					<ul>
						<li>all relevant data is stored persistently on the server</li>
						<li>the shopping cart's URI can be used by other services for working with its contents</li>
						<li>instead of <em>hiding the cart in the session</em>, it is <em>exposed as a resource</em></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Reusing Resources</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-reusing-resource.png" title="Reusing Resources"/>
			</slide>
		</part>
		<part id="cookie">
			<title>Cookie</title>
			<slide>
				<title>Tracking Sessions</title>
				<ul>
					<li>Invented as a way to compensate for HTTP's lack of state</li>
					<ul>
						<li>application state is being sent to the client (<http>SetCookie2</http>)</li>
						<li>the client transmits application state in requests (<http>Cookie</http>)</li>
					</ul>
					<li>Cookies do not contain code that is executed</li>
					<ul>
						<li>some data that represents application state (by value or by reference)</li>
						<li>this data is stored by the client and returned to the server</li>
						<li>the client is not supposed to interpret the data in any way</li>
					</ul>
					<li>Cookies can be used in many different ways</li>
					<ul>
						<li>when used for tracking application state they are unproblematic</li>
						<li>when used for tracking resource state they introduce problems</li>
					</ul>
					<li>Cookies tightly bind clients to opaque concepts on the server</li>
				</ul>
			</slide>
			<slide>
				<title>Cookies for State Management</title>
				<img style="width : 90% ; margin : 2% ; " src="web-app-cookie-state.png" title="Cookies for State Management"/>
			</slide>
			<part>
				<title>Third-Party Cookie</title>
				<slide>
					<title>Advertising &amp; Privacy</title>
					<ul>
						<li>Big ad servers are digital hubs in the commercial Web</li>
						<ul>
							<li>consumers switch content providers but get the same ad provider</li>
							<li>tracking consumers <em>across</em> content providers is very valuable</li>
						</ul>
						<li>Cookies set by ad providers are sent very frequently</li>
						<ul>
							<li>each site that uses the ad provider triggers the cookies to be sent</li>
							<li>detailed profiling can be employed for creating consumer profiles</li>
						</ul>
						<li>Content and ad providers can cooperate for better profiling</li>
						<ul>
							<li>consumers log in to content providers are are reliably identified</li>
							<li>their personal profile can be matched with the ad provider's profile</li>
							<li>ad provider consolidation makes this scenario realistic</li>
						</ul>
					</ul>
				</slide>
			<slide>
				<title>Browsers Assemble Web Pages</title>
				<p>Typical Web resources (HTML pages) are assembled from a number of resources retrieved by HTTP. Any resource not originating on the server that is hosting the HTML page is considered a <q>third-party resource</q>. If the HTTP response for such a resource contains a cookie, it is a <q>third-party cookie</q>.</p>
				<img style="width : 90% ; margin : 2% ; " src="third-party-cookie.png" title="Third Party Cookie"/>
			</slide>
			</part>
		</part>
		<part>
			<title>Cookie-Less State Tracking</title>
			<slide>
				<title>Cookie Support</title>
				<ul>
					<li>Authentication can be tracked with <link href="http-authentication"/></li>
					<ul>
						<li>this is possible because authentication is built into HTTP</li>
					</ul>
					<li>Other session concepts are not supported by HTTP</li>
					<ul>
						<li>cookies have become the generic solution for all session tracking</li>
					</ul>
					<li>Cookies are increasingly limited by browsers</li>
					<ul>
						<li>cookies have gained some notoriety as privacy invaders</li>
						<li>browsers have more restrictive default settings</li>
						<li>an increasing number of users restricts cookie support</li>
					</ul>
					<li>Session-oriented Web sites often depend on cookies</li>
				</ul>
			</slide>
			<slide id="uri-rewriting">
				<title>URI Rewriting</title>
				<ul>
					<li><link href="cookie"/>s are a piece of information stored on the client</li>
					<ul>
						<li>they are sent by the server as a result of a request</li>
						<li>they are returned by the browser in a response to the same site</li>
					</ul>
					<li>The same information can also be encoded in the URI</li>
					<ul>
						<li>normally a response contains a cookie and an HTML page</li>
						<li>the same effect is achieved when all links include the <q>cookie value</q></li>
						<li>this method often results in very long URIs</li>
					</ul>
					<li>Some Web application frameworks switch automatically</li>
					<ul>
						<li>J2EE checks for cookie support and switches to URI rewriting if required</li>
					</ul>
					<li>Problems with bookmarks and caches</li>
				</ul>
			</slide>
			<slide>
				<title>Hidden Form Fields</title>
				<ul>
					<li><link href="cookie"/>s transmit session information via HTTP</li>
					<li><link href="uri-rewriting"/> encodes session information in URIs</li>
					<li><link href="forms"/> are a way to send data to a server</li>
					<ul>
						<li>in most cases this is data that is entered by the user</li>
					</ul>
					<li>Hidden form fields can be used to send data that is part of the HTML</li>
					<ul>
						<li>hidden form fields are never displayed to the user</li>
						<li>their predefined values are sent as part of the form submission</li>
					</ul>
					<li>Hidden form fields are essentially the same as <link href="uri-rewriting"/></li>
					<ul>
						<li>they can only be used if the interaction is based on forms</li>
						<li>they also require the Web page to be dynamically generated for each request</li>
						<li>the values end up as URI query string or request entity</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Session for Application State</title>
				<ul>
					<li>Sessions should only be used for application state</li>
					<li>Cookies are the best way to track sessions</li>
					<ul>
						<li>cookies should be self-contained rather than referential</li>
					</ul>
					<li>Alternative methods are URI rewriting and hidden form fields</li>
					<ul>
						<li>more robust than cookies but unpleasant side-effects</li>
					</ul>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="rest">
        <title short="REST">Representational State Transfer (REST)</title>
        <date>2008-09-23</date>
        <toc class="reading"><a href="http://www.mulberrytech.com/Extreme/Proceedings/html/2002/Prescod01/EML2002Prescod01.html" title='P. Prescod, "Roots of the REST/SOAP Debate", Extreme Markup Languages Conference, August 2002'>REST vs. SOAP</a>&#160;· <a href="http://www.eioba.com/a69755/how_i_explained_rest_to_my_wife">What is REST?</a>&#160;· <a href="http://bitworking.org/news/193/Do-we-need-WADL">REST Interfaces</a></toc>
        <toc class="resources"><a href="http://rest.blueoxen.net/cgi-bin/wiki.pl">RESTwiki</a></toc>
        <toc class="abstract"><em>Representational State Transfer (REST)</em> is an architectural style for building distributed systems. The Web is an example for such a system. REST-style applications can be built using a wide variety of technologies. REST's main principles are those of resource-oriented states and functionalities, the idea of a unique way of identifying resources, and the idea of how operations on these resources are defined in terms of a single protocol for interacting with resources. REST-oriented system design leads to systems which are open, scalable, extensible, and easy to understand.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
		<slide>
			<title>The Web as a System</title>
			<ul>
				<li>The Web is one distributed hypermedia system</li>
				<ul>
					<li>the main architectural components are URIs, HTTP, and HTML</li>
					<li>all other Web technologies are built on that foundation</li>
					<li>if they are not, they are very likely not well-designed Web technologies</li>
				</ul>
				<li>The Web is amazingly open, scalable, extensible, and easy to understand</li>
				<ul>
					<li><em>openness</em> allows new technologies to be introduced</li>
					<li><em>scalability</em> ensures that the system does not contain bottlenecks</li>
					<li><em>extensibility</em> allows the Web to evolve without any redesign of existing parts</li>
					<li><em>simplicity</em> makes sure that the system survives and evolves</li>
				</ul>
				<li>No other information system gets even close to the Web</li>
				<ul>
					<li>but not all information system designs can accept the Web's limitations</li>
					<li>REST should be seen as a guideline how to build a true Web application</li>
					<li>other applications will continue to be built using other approaches</li>
				</ul>
			</ul>
		</slide>
		<slide>
			<title>Web System Design</title>
			<blockquote>There are two ways of constructing a software design: One way is to make it so simple that there are <em>obviously</em> no deficiencies, and the other way is to make it so complicated that there are no <em>obvious</em> deficiencies. The first method is far more difficult.</blockquote>
			<p class="quotenote"><a href="http://en.wikipedia.org/wiki/Charles_Antony_Richard_Hoare">C. A. R. Hoare</a>, <a href="http://dret.net/biblio/reference/hoa81"><q>The Emperor's Old Clothes</q>, 1980 Turing Award Lecture</a></p>
		</slide>
		<part>
			<title>Technologies and Implementations</title>
			<slide>
				<title>Object-Orientation</title>
				<ul>
					<li>Object-Orientation is a <em>Software Engineering Style</em></li>
					<ul>
						<li>it can be applied to any programming language</li>
						<li>depending on the language, this is more or less easy</li>
						<li>OO languages support or even enforce certain design patterns</li>
						<li><em>spaghetti code</em> can be written in every programming language</li>
					</ul>
					<li>Implementations can always be bad or good</li>
					<ul>
						<li>the quality of the implementation depends on the programmer</li>
						<li>programmers can be better supported and controlled with an OO language</li>
						<li>implementation quality metrics must be based on the product, not the language</li>
					</ul>
					<li>Good programmers always produce good code</li>
					<li>Bad programmers always produce bad code</li>
					<li>Average programmers need good tools to produce good code</li>
				</ul>
			</slide>
			<slide>
				<title>Technologies are Tools</title>
				<ul>
					<li>Technologies help solving problems</li>
					<ul>
						<li>they are built with certain goals in mind</li>
						<li>they specialize in solving a problem <em>in a specific way</em></li>
					</ul>
					<li>Technology choices are very important</li>
					<ul>
						<li>the technology (i.e., the tool) shapes the way how a problem is solved</li>
						<li>working <q>against</q> the tool is possible, but hard and rarely done</li>
					</ul>
					<li>Technologies sometimes cloud the more important issues</li>
					<ol>
						<li>the problem must be well-defined and fully understood</li>
						<li>the general approach to solve a problem must be identified</li>
						<li>finally, a technology supporting this approach must be chosen</li>
					</ol>
				</ul>
			</slide>
			<slide>
				<title>Implementations are Products</title>
				<ul>
					<li>Implementations are built on (and shaped by) technologies</li>
					<li>Implementation = Concepts + Technologies</li>
					<li>Implementation quality depends on both factors</li>
					<ul>
						<li>the right choice of technologies is very important</li>
						<li>the responsible use of that foundation is equally important</li>
					</ul>
					<li>Good REST is as hard to grasp as good OO</li>
					<ul>
						<li>products may claim that they are REST/OO</li>
						<li>they may even use technologies which support REST/OO</li>
						<li>only careful inspection reveals the truth of this claim</li>
						<li>in most cases, this only happens when the product needs to be changed</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="rest-principle">
			<title>REST Principles</title>
			<slide>
				<title>Definition</title>
				<ul>
					<li>Resources are defined by URIs</li>
					<li>Resources are manipulated through their representations</li>
					<li>Messages are self-descriptive and stateless</li>
					<li>There can be multiple representations for a resource</li>
					<li>Application state is driven by resource manipulations</li>
				</ul>
			</slide>
			<slide>
				<title>Resources</title>
				<ul>
					<li>Resources are defined by URIs</li>
					<ul>
						<li>resources can never be accessed or manipulated directly</li>
						<li>REST works with resource representations</li>
					</ul>
					<li>Resources are all the things we want to work with</li>
					<ul>
						<li>if you cannot name something, you cannot do anything with it</li>
						<li>a popular resource type on the Web are documents</li>
						<li>documents usually are a structured collection of information</li>
					</ul>
					<li>Documents are abstract concepts of descriptive resources</li>
					<ul>
						<li>they may be used in different contexts (e.g., formats)</li>
						<li>different applications may be interested in different representations</li>
						<li>the underlying resource is always the same</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>State</title>
				<ul>
					<li>State is represented as part of the content being transferred</li>
					<ul>
						<li>server interruptions do not create problems for the client</li>
						<li>it is possible to switch between servers for different interactions</li>
						<li>clients can simply store the representation to save the state</li>
					</ul>
					<li>State transfer makes the system scalable</li>
					<ul>
						<li>data transfer is not state-specific (no stateful connection handling)</li>
						<li>state is transferred between client and server</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Establishing a Common Model</title>
				<ul>
					<li>Distributed systems must be based on a shared model</li>
					<ul>
						<li>traditional systems must agree on a common API</li>
						<li>REST systems structure agreement into three areas</li>
					</ul>
					<li>REST is built around the idea of simplifying agreement</li>
					<ul>
						<li><em>nouns</em> are required to name the resources that can be talked about</li>
						<li><em>verbs</em> are the operations that can be applied to named resources</li>
						<li><em>content types</em> define which information representations are available</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Nouns</title>
				<ul>
					<li>Nouns are the names of resources</li>
					<ul>
						<li>in most designs, these names will be URIs</li>
						<li>URI design is a very important part of a REST-based system design</li>
					</ul>
					<li>Everything of interest should be named</li>
					<ul>
						<li>by supporting well-designed names, applications can talk about named things</li>
						<li>new operations and representations can be introduced</li>
					</ul>
					<li>Separating nouns from verbs and representations improves extensibility</li>
					<ul>
						<li>applications might still work with resources without being able to process them</li>
						<li>introducing new operations on the Web does not break the Web</li>
						<li>introducing new content types on the Web does not break the Web</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Verbs</title>
				<ul>
					<li>Operations which can be applied to resources</li>
					<li>The core idea of REST is to use <em>universal verbs</em> only</li>
					<ul>
						<li>universal verbs can be applied to all nouns</li>
					</ul>
					<li>For most applications, HTTP's basic methods are sufficient</li>
					<ul>
						<li><http>GET</http>: Fetching a resource (there must be no side-effects)</li>
						<li><http>PUT</http>: Transfers a resource to a server (overwriting if there already is one)</li>
						<li><http>POST</http>: Adds to an existing resource on the server</li>
						<li><http>DELETE</http>: Discards a resource (its name cannot be used anymore)</li>
					</ul>
					<li>Corresponding to the most popular basic database operations</li>
					<ul>
						<li>CRUD: Create, Read, Update, Delete</li>
					</ul>
				</ul>
			</slide>
			<slide id="http-post">
				<title><http>POST</http>ing</title>
				<ul>
					<li><http>POST</http> adds instead of an overwriting update</li>
					<li><http>POST</http> can have different effects</li>
					<ul>
						<li>by <http>POST</http>ing, state is changed and a new resource is created</li>
						<li>by <http>POST</http>ing, only the existing resource is changed</li>
						<li>the server signals the difference using HTTP responses (<http>200 OK</http> or <http>201 Created</http>)</li>
					</ul>
					<li>This is a <em>design choice</em></li>
					<ul>
						<li>if the added information needs to be accessible individually, create a new resource</li>
						<li>for changes of an existing resource, no new resource has to be created</li>
					</ul>
					<li>Make sure that resources are navigable using URIs</li>
					<ul>
						<li>if appropriate, a relationship can be represented in the resource format</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Content Types</title>
				<ul>
					<li>Representations should be machine-processable</li>
					<ul>
						<li>they don't have to, they may be opaque to applications</li>
						<li>in many cases, machine-processable representations are advantageous</li>
					</ul>
					<li>Resources are abstractions, REST passes representations around</li>
					<ul>
						<li>resources can have various representations (i.e., content types)</li>
						<li>clients can request content types they are interested in</li>
					</ul>
					<li>Adding or changing content types does not change the system architecture</li>
					<ul>
						<li>different clients and servers support different content types</li>
						<li><link href="http-content-negotiation"/> allows content types to be negotiated dynamically</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>REST vs. <q>Web Services</q></title>
				<ul>
					<li>REST is a description of the Web's design principles</li>
					<ul>
						<li>it is not something new, it is simply a systematic view of the Web</li>
						<li>REST's claim is to be able to learn from the Web's success</li>
					</ul>
					<li>Web Services (the SOAP flavor) do not build on REST</li>
					<ul>
						<li>they use HTTP as a transport protocol</li>
						<li>they re-create Web functionality through additional specifications (WS-*)</li>
						<li>they have been built by programmers using a top-down approach</li>
					</ul>
					<li>REST and Web Services have different design approaches</li>
					<ul>
						<li>REST starts at the resources and takes everything from there</li>
						<li>Web Services focus on messages, which in most cases are operations</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="rest-implementation">
			<title>REST Implementation</title>
			<slide>
				<title>REST Technologies</title>
				<ul>
					<li>REST is not tied to a particular set of technologies</li>
					<ul>
						<li><link href="rest-uri"/> are the most common choice for nouns</li>
						<li><link href="rest-http"/> methods are the most common choice for verbs</li>
						<li><link href="rest-xml"/> is the most common choice for content types</li>
					</ul>
					<li>Choosing other technologies should have a very good reason</li>
					<ul>
						<li>building a REST system should make it open and accessible</li>
						<li>technology choices are as important as architectural choices</li>
					</ul>
				</ul>
			</slide>
			<slide id="rest-uri">
				<title>URIs</title>
				<ul>
					<li>REST requires a lot of URI design</li>
					<ul>
						<li>instead of being generated as a side-effect, they are the core of the system</li>
					</ul>
					<li>Designing URIs and starting from them is a new way of thinking</li>
					<ul>
						<li>URIs are much more powerful than just being an address of a Web page</li>
					</ul>
					<li>URIs are names for concepts</li>
					<ul>
						<li>concepts are never transmitted, only their representation</li>
						<li>having to focus on concepts rather than representations is helpful</li>
					</ul>
				</ul>
			</slide>
			<slide id="rest-http">
				<title>HTTP</title>
				<ul>
					<li>HTTP is the most successful RESTful protocol</li>
					<ul>
						<li>HTTP's author Roy Fielding coined the term <q>REST</q> in his <a href="http://dret.net/biblio/reference/fie00">Ph.D. thesis</a></li>
					</ul>
					<li>HTTP should be regarded as an <q>application-level protocol</q></li>
					<ul>
						<li>Web Service technologies use HTTP as a transport protocol</li>
						<li>HTTP has much more to offer than a firewall-penetrating pipe</li>
					</ul>
					<li>Web infrastructure is built around proper HTTP usage</li>
					<ul>
						<li>caching is built into HTTP and caches optimize the Web transparently</li>
						<li>authentication can be done using HTTP's authentication methods</li>
						<li>secure data transfer can be done using <link href="https"/></li>
					</ul>
				</ul>
			</slide>
			<slide id="rest-xml">
				<title>XML</title>
				<ul>
					<li>URI-identified resources are abstract concepts</li>
					<ul>
						<li>for machine-based processing, XML is a good representation</li>
						<li>for human-oriented interactions, HTML probably is a better choice</li>
					</ul>
					<li>Connections to other resources must be done by URI</li>
					<ul>
						<li>XML does not make built-in assumptions about identifiers</li>
						<li>but it does support URIs, for example with <em>XInclude</em> and <em>XML Base</em></li>
						<li>RESTful applications are about navigating a Web of URI-identified resources</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part>
			<title>Conclusions</title>
			<slide>
				<title>Better Services</title>
				<ul>
					<li>REST is an architectural style</li>
					<ul>
						<li>URI/HTTP/HTML/XML may be replaced</li>
						<li>the general principle of resource-based interaction remains valid</li>
					</ul>
					<li>RESTful system designs can create better systems</li>
					<ul>
						<li>a little bit more design effort in the beginning</li>
						<li>a lot less headaches later</li>
					</ul>
					<li>SOA often are not really RESTful</li>
					<ul>
						<li>SOA often focuses on operations</li>
						<li>REST focuses on resources</li>
					</ul>
					<li>RESTful design is a good starting point for OO implementations</li>
				</ul>
			</slide>
		</part>
    </presentation>
    <presentation id="unicode">
        <title short="Unicode">Character Set Issues &amp; Unicode</title>
        <date>2008-09-25</date>
        <toc class="resources"><a href="http://unicode.org/" title="Unicode Web Site">Unicode</a>&#160;· <a href="http://homepages.cwi.nl/~dik/english/codes/stand.html" title="History of Character Sets">History</a></toc>
        <toc class="abstract">Every character-based document is based on some model of which characters are available, and how they are encoded. <em>Unicode</em> is the most popular character set today and provides a variety of encoding schemes, each of them being a <em>Unicode Transformation Format (UTF)</em>. In addition to character sets and encodings, other issues relevant when dealing with characters are <em>transcoding</em> and <em>normalization</em>, which deal with the problems arising when using different character encodings or different encodings of particular characters.</toc>
		<slide>
			<title>Abstract</title>
			<p class="abstract"><toc class="abstract"/></p>
		</slide>
        <part id="characters">
			<title>Characters</title>
			<slide>
				<title>Characters and Computers</title>
				<ul>
					<li><em>American Standard Code for Information Interchange (ASCII)</em></li>
					<ul>
						<li>for the first time a basic set of characters had a universally accepted encoding</li>
						<li>many Internet protocols (such as <a href="../services-fall06/web1#(12)">HTTP</a>) encode their information in ASCII commands</li>
					</ul>
					<li>ASCII is a very limited repertoire of characters</li>
					<ul>
						<li>basic ASCII contains 128 characters (7 bit) with a number of control chars</li>
						<li>no variants of characters (german umlauts, french accents) are supported</li>
						<li>various code pages extending ASCII to 8 bit exist and are hard to distinguish</li>
					</ul>
					<li><em>Character</em> is not a trivial concept when regarded globally</li>
					<ul>
						<li>european languages all have writing systems based on a small number of <q>atoms</q></li>
						<li>other languages and writing systems have vastly different ideas of <q>language atoms</q></li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Characters</title>
				<blockquote>Character. (1) The smallest component of written language that has semantic value; refers to the abstract meaning and/or shape […]</blockquote>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/unicode4"><em>The Unicode Standard, Version 4.0</em>, Addison-Wesley, 2003</a></p>
				<ul>
					<li>The alphabetic approach is only one of several possibilities</li>
					<ul>
						<li>A character in <em>Japanese hiragana and katakana scripts</em> corresponds to a syllable (usually a combination of consonant plus vowel)</li>
						<li><em>Korean Hangul</em> combines symbols for individual sounds of the language into square blocks, each of which represents a syllable; depending on the user and the application, either the individual symbols or the syllabic clusters can be considered to be characters</li>
						<li>In <em>Indic scripts</em> each consonant letter carries an inherent vowel that is eliminated or replaced using semi-regular or irregular ways to combine consonants and vowels into clusters; depending on the user and the application, either individual consonants or vowels, or the consonant or consonant-vowel clusters can be perceived as characters</li>
						<li><em>Arabic and Hebrew vowel sounds</em> are typically not written at all; when they are written they are indicated by the use of combining marks placed above and below the consonantal letters</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>Glyphs</title>
				<blockquote>[A Glyph is] a recognizable abstract graphic symbol which is independent of a specific design.</blockquote>
				<p class="quotenote"><a href="http://dret.net/biblio/reference/iso9541"><em>ISO/IEC 9541:1991, Information Technology – Font Information Interchange</em></a></p>
				<ul>
					<li><em>Visual rendering</em> introduces the notion of a glyph.</li>
					<li>There is <em>not</em> a one-to-one correspondence between characters and glyphs</li>
					<ul>
						<li>A single character can be represented by multiple glyphs (each glyph is then part of the representation of that character); these glyphs may be physically separated from one another</li>
						<li>A single glyph may represent a sequence of characters (this is the case with ligatures, among others)</li>
						<li>A character may be rendered with very different glyphs depending on the context</li>
						<li>A single glyph may represent different characters (e.g. capital Latin A, capital Greek A and capital Cyrillic A)</li>
					</ul>
				</ul>
			</slide>
		</part>
		<part id="charactersets">
			<title>Character Sets</title>
			<slide>
				<title>History of Character Sets</title>
				<ul>
					<li>Text documents need ways to represent characters</li>
					<ul>
						<li>computers handle bits, not characters</li>
						<li>to handle characters, computers need a mapping from characters to bits</li>
					</ul>
					<li>For a long time, computers were doing their work in a very isolated way</li>
					<ul>
						<li><q>I think there is a world market for maybe five computers.</q> (<a href="http://en.wikipedia.org/wiki/Thomas_J._Watson#Famous_misquote">¬ T. J. Watson</a>)</li>
					</ul>
					<li>With more computers being used, more data is exchanged between computers</li>
					<li><em>Data rot</em> happens on all levels (media, formats, applications)</li>
					<li>Standardization of character sets started in the 60's</li>
					<ul>
						<li>ASCII was the first generally accepted character set</li>
						<li>EBCDIC was invented and marketed by IBM (and a terribly designed character encoding)</li>
						<li>ISO 8859 was the first attempt to better support character sets beyond ASCII</li>
						<li><em>asian scripts</em> were always a problem because of the number of characters they need</li>
					</ul>
				</ul>
			</slide>
			<slide>
				<title>ASCII 1963</title>
				<img style="width : 90% ; margin : 2% ; " src="ascii-1963.gif" title="ASCII 1963"/>
			</slide>
			<slide>
				<title>ASCII 1965</title>
				<img style="width : 90% ; margin : 2% ; " src="ascii-1965.gif" title="ASCII 1965"/>
			</slide>
			<slide>
				<title>ASCII 1967</title>
				<img style="width : 90% ; margin : 2% ; " src="ascii-1967.gif" title="ASCII 1967"/>
			</slide>
			<slide>
				<title>EBCDIC</title>
				<table width="95%">
					<tr>
						<td>
							<img style="width : 90% ; margin : 2% ; " src="ebcdic.gif" title="EBCDIC"/>
							<br/>
							<p style="text-align : center">EBCDIC (1964)</p>
						</td>
						<td>
							<img style="width : 90% ; margin : 2% ; " src="ebcdic-augmented.gif" title="Augmented EBCDIC"/>
							<br/>
							<p style="text-align : center">Augmented EBCDIC</p>
						</td>
					</tr>
				</table>
			</slide>
			<slide>
				<title>Beyond ASCII</title>
				<ul>
					<li>ASCII is called ASCII for a reason</li>
					<ul>
						<li>it works well for english-speaking countries</li>
						<li>the majority of other languages cannot be represented</li>
					</ul>
					<li>Character sets and the 8 bit computer start to collide</li>
					<ul>
						<li>ASCII is very convenient because characters and bytes correspond 1:1</li>
						<li>every character set expanding ASCII will make this more complicated</li>
						<li>complications can occur within and/or outside of the character set</li>
					</ul>
					<li>Introducing a character set beyond 8 bit is a fundamental change</li>
					<ul>
						<li>dealing with and counting bytes is a seductively simple idea</li>
					</ul>
					<li>Introducing several 8 bit character sets saves the 8 bit world</li>
					<ul>
						<li>by introducing several character sets, each of them can remain 8 bit</li>
						<li>the complexity has now been shifted to the handling of various character sets</li>
					</ul>
				</ul>
			</slide>
			<slide id="iso8859">
				<title>ISO 8859</title>
				<ul>
					<li>A <em>family of character sets</em> rather that a single character set</li>
					<ul>
						<li>each ISO 8859 family member is an 8 bit character set (256 characters)</li>
						<li>the lower half (128 characters) are always the same (ASCII)</li>
						<li>the upper half is supporting different user groups and changes between versions</li>
					</ul>
					<li>ISO 8859 files cannot be identified by inspection</li>
					<ul>
						<li>ASCII characters can always be safely interpreted (identical on all ISO 8859 code pages)</li>
						<li>the upper half can only be interpreted if the code page is well-known</li>
					</ul>
					<li>ISO 8859 environments must carefully track the code pages being used</li>
					<ul>
						<li>failure to do so results in misinterpretation of characters</li>
					</ul>
					<listing src="iso8859-15.txt" encoding="ISO-8859-15" 