view minimum_bisection/paper/term_paper.tex @ 168:cc6bb3ca79fb default tip

6/4 is not 2
author Markus Kaiser <markus.kaiser@in.tum.de>
date Fri, 28 Nov 2014 01:41:50 +0100
parents 8e25d4b73391
children
line wrap: on
line source

\input{preamble.tex}
\input{tikzpictures.tex}

\title{Oblivious Routing and Minimum Bisection}
\author{\href{mailto:markus.kaiser@in.tum.de}{Markus Kaiser}}
\institute{Technische Universit\"at M\"unchen\\
\email{\href{mailto:markus.kaiser@in.tum.de}{markus.kaiser@in.tum.de}}}

\begin{document}

\maketitle
\thispagestyle{plain}
\begin{abstract}
    Oblivious routing is generalization of multi commodity flows where the actual demand function is unknown.
    This paper proofs the existence of an approximate solution using tree metrics which can easily be transformed into an $\Oh(\log n)$ approximation algorithm.
    This result is then applied to the minimum bisection problem asking for an vertex bisection with minimal cost in the edges between the sets, also resulting in an $\Oh(\log n)$ approximation.
\end{abstract}

\section{Oblivious Routing}
\label{sec:oblivious_routing}

\define{Maximum flow} is a well understood algorithmic problem.
Given a directed graph $G = (V, E)$, a mapping $c : E \to \Rnn$ denoted by $c_e \defeq c(e)$ assigning a maximum non-negative capacity to every edge and a \define{source} and \define{target} node $s, t \in V$, we are asked to find a flow of maximum value from $s$ to $t$.
For simplicity, we assume that $G$ is complete, which can be achieved by adding edges of capacity $0$.

\begin{definition}[Flow \cite{intro}]
    A function $f : E \to \Rnn$ assigning a throughput to every edge.
    It satisfies the following two properties:
    \begin{description}
        \item[Capacity Constraint:] For all $e \in E$, we have $f_e \leq c_e$.
        \item[Flow Conservation:] For all $v \in V \setminus \left\{ s, t \right\}$, we have
            \[\sum_{(u, v) \in E} f_{uv} = \sum_{(v, w) \in E} f_{vw}.\]
    \end{description}
    We denote $f_e \defeq f(e)$ for $e \in E$ and $f_{uv} \defeq f(u, v)$ for $u, v \in V$.
\end{definition}

The maximum flow problem can be solved in polynomial time using a number of algorithms, for example the Ford-Fulkerson method or Push-relabel algorithms, both of which are described in \cite{intro}.

This paper will introduce a generalization of this problem known as \define{oblivious routing} and proof the existence of an $\Oh(\log n)$ approximation algorithm based on \cite{racke2008optimal} and described in \cite{approx}.
After giving a definition of the problem, it is formulated as a linear problem and rewritten to yield the desired algorithm using a theorem giving an $\Oh(\log n)$ approximation of arbitrary metrics using tree metrics.
Finally, the result is applied to give an $\Oh(\log n)$ approximation of the \define{minimum bisection} problem.

\subsection{Problem definition}
\label{sub:or_problem}

\begin{figure}[tb]
    \centering
    \begin{tikzpicture}[flow graph]
        \path[use as bounding box] (-4, 2.4) rectangle (3.5,6.15);
        \flownodes
        \flowedges

        \draw
            (n) node[below=3] {\structure{u}}
            (j) node[below=3] {\structure{v}};

        \begin{pgfonlayer}{demand}
            \draw[demand edge, bend left=20] (n) edge (j);
        \end{pgfonlayer}

        \begin{pgfonlayer}{marked}
            \draw[marked edge, tumgreen]
                (n.center) edge (l.center)
                (l.center) edge (h.center)
                (h.center) edge (i.center)
                (i.center) edge (j.center);

            \draw[marked edge, tumblue]
                (n.center) edge (m.center)
                (m.center) edge (e.center)
                (e.center) edge (c.center)
                (c.center) edge (k.center)
                (k.center) edge (j.center);

            \draw[marked edge, tumred]
                (n.center) edge (b.center)
                (b.center) edge (j.center);
        \end{pgfonlayer}

        \node at (-0.25, 2.4) {$\lambda_{green} = 0.25 \qquad \lambda_{blue} = 0.5 \qquad \lambda_{red} = 0.25$};
    \end{tikzpicture}
    \caption{A solution to the oblivious routing problem gives a set of paths and an associated convex combination for any two nodes $u, v \in V$. In this case, we are given the green, blue and red paths from $u$ to $v$. The dashed demand $d_{uv}$ gets routed along these three paths, split according to the factors $\lambda_i$.}
    \label{fig:oblivous_routing}
\end{figure}

To define oblivious routing, we first consider a simpler generalization of maximum flow, the \define{multi-commodity flow} problem.
Instead of a single source and target node, we now allow an arbitrary number of nonnegative flow demands between two nodes given by a \define{demand function $d : V^2 \to \Rnn$} on an undirected graph.
Every such demand $d_{uv}$ must be routed from $u$ to $v$ using a set of $u$-$v$-paths and the total flow $f_e$ on an edge $e \in E$ is the sum of all demands routed through it.
The task of is now to find a flow satisfying all demands while exceeding the capacities of all edges as little as possible.
This excess is called \define{congestion}.

\begin{definition}[Congestion]
    The congestion $\rho$ attributed to a flow $f$ denotes the smallest factor $\rho$ such that for all edges $e \in E$ we have $f_e \leq \rho \cdot c_e$.
    \begin{align}
        \rho = \max_{e \in E} \frac{f_e}{c_e}
    \end{align}
\end{definition}

This problem is still solvable in polynomial time using linear programming \cite{approx}.
One more generalization leads to the oblivious routing problem.
We now want to find flow solutions without knowing the demands beforehand, i.e. we want to find a set of $u$-$v$-paths and associated fractions of demands for all $u, v \in V$ such that for any demand function this set of paths performs well.

\begin{problem}[Oblivious Routing]
    Given an undirected Graph $G = (V, E)$ and an edge capacity function $c : E \to \R^+$.
    Calculate a convex combination of paths for each $(u, v) \in V^2$ such that for any demand function the congestion of the resulting flow will be as small as possible.
\end{problem}

\subsection{Formulation as a linear program}
\label{sub:or_lp}

\begin{figure}[tb]
    \centering
    \begin{tikzpicture}[flow graph]
        \flownodes
        \flowedges
        \flowtreeedges

        \begin{pgfonlayer}{background}
            \node[draw=tumblue, fill=tumblue!20, thick, ellipse, rotate fit=30, fit=(n)(l)(m), inner sep=8pt] (el) {};
        \end{pgfonlayer}

        \path (el)
            ++ (200:1)
            ++ (0, -0.5) coordinate (label);
        \node[below of = label] {\structure{$S(\alert{e_T})$}};

        \begin{pgfonlayer}{marked}
            \foreach \source/ \dest in {l/h,m/e,m/f,n/b}
            \draw[tree edge, tumorange] (\source.center) edge (\dest.center);
        \end{pgfonlayer}

        \begin{pgfonlayer}{marked}
            \draw[tree edge, tumblue] (m.center) edge (e.center);
        \end{pgfonlayer}
        \path (m) -- node[flow capacity]{$e_T$} (e);
    \end{tikzpicture}

    \caption{Removing the edge $e_T$ from the green spanning tree introduces a tree split. One of the two vertex sets we denote as $S(e_T)$ and define the capacity of the split as the sum of all capacities of edges crossing the boundary of $S(e_T)$, the blue and orange edges.}
    \label{fig:tree_splits}
\end{figure}

To find the desired approximation algorithm, we must first develop some lower bound for the optimal solution.
A simple idea to create a valid (but probably not optimal) solution is to find any spanning tree $T = (V, E_T)$ of $G$ and route any demand along its edges.

Since it is a spanning tree, there is a unique path from any vertex $u$ to any vertex $v$ in $T$ which will be used to satisfy the complete demand.
For any edge $e_T \in E_T$ the resulting flow $f_{e_T}$ will be the sum of all demands between two nodes connected by this tree edge.
These are exactly the nodes in different connected components created by removing the edge $e_T$.

\begin{definition}[Tree Split]
    Given a tree $T = (V, E_T)$ and an edge $e_T \in E_T$.
    Removing $e_T$ from $T$ splits it into two connected components, one of which we call $S(e_T)$ and the other one being $V \setminus S(e_T)$.

    We call $C(e_T)$ the capacity and $D(e_T)$ the demand of this split given by the sum of all capacities and demands connecting nodes in different connected components.
    \begin{align}
        C(e_T) &= \sum_{\substack{u \in S(e_T), \\ v  \not\in S(e_T)}} c_{uv}\\
        D(e_T) &= \sum_{\substack{u \in S(e_T), \\ v  \not\in S(e_T)}} c_{uv}
    \end{align}
\end{definition}

This definition illustrated by \cref{fig:tree_splits} allows us to write the flow generated by a simple solution to oblivious routing using a spanning tree $T$ as $f_{e_T} = D(e_T)$ for all $e_T \in T$ and $0$ otherwise. We now observe that for a given demand function any solution to the resulting multi-commodity flow problem will be bounded below by all tree splits.

\begin{lemma}
    \label{lem:optimal}
    For any tree $T$ and any tree edge $e_T$, any routing in $G$ must contain an edge $e$ with congestion
    \begin{align}
        \rho_e \geq \frac{D(e_T)}{C(e_T)}.
    \end{align}
    Therefore, the optimal congestion $\rho^\ast$ of the flow problem can be no better.
\end{lemma}

We will use this observation to proof our approximation guarantee.
Suppose we find a spanning tree such that every edge has capacity of the capacity of the corresponding tree split divided by some factor $\alpha \geq 1$.
\begin{align}
    \forall e_T \in E_T.\quad c_{e_T} &\geq \frac{1}{\alpha} C(e_T)
    \intertext{Using \cref{lem:optimal} we can bound the congestion of this spanning tree in relation to the optimal solution.}
    \structure{\rho_T} &= \max_{e_T} \frac{D(e_T)}{c_{e_T}}\\
        &\structure{\leq} \alpha \max_{e_T} \frac{D(e_T)}{C(e_T)}\\
        &\leq \structure{\alpha \rho^\ast}
\end{align}
Note that this factor $\alpha$ is a property of the tree and not any specific demand function, thus we know that this tree yields a solution of cost at most $\alpha$ times the optimal solution for any demand.

This tree however does not have to exist and therefore this approach will not yield any approximation guarantee.
Since oblivious routing allows us to define multiple $u$-$v$-paths, a natural extension is to consider not only one spanning tree but a convex combination of multiple spanning trees.
We denote each of these trees as some $T_i = (V, E_{T_i})$ and identify $e \in E_{T_i}$ with $e \in T_i$ for compactness.
Solutions of this kind are a set of trees and factors $\left\{ (T_i, \lambda_i) \right\}$ with $\lambda_i \geq 0$ and $\sum_i \lambda_i = 1$.

For a given demand function, the demand $d_{uv}$ is routed through the unique $u$-$v$-paths in the trees $T_i$ according to the convex fractions.
For every edge $e$ in the original graph we get
\begin{align}
    f_e &= \sum_{\substack{i:\\e \in T_i}} \lambda_i D_i(e)
\end{align}
where we denote the demand of the split introduced by $T_i$ using some edge $e \in T_i$ as $D_i(e)$.

This solution scheme is still too strict though and we want to allow more sophisticated structures.
Instead of using the spanning trees directly, we interpret every node on a $u$-$v$-path of $T_i$ as an intermediate point on a path from $u$ to $v$.
Instead of using the direct connections given by the tree edges to traverse the intermediate points, we are allowed to take any path in $G$.
A pair of spanning tree and set of paths connecting two neighboured nodes in the tree is called a path tree.

\begin{figure}[tb]
    \centering
    \begin{tikzpicture}[flow graph]
        \flownodes
        \flowedges

        \draw
            (l) node[above=3] {\structure{u}}
            (i) node[above=3] {\structure{v}}
            (e) node[above left] {\structure{x}}
            (d) node[above=3] {\structure{z}}
            (c) node[below=3] {\structure{y}} ;

        \begin{pgfonlayer}{demand}
            \draw[demand edge, bend left=10] (l) edge (i);
        \end{pgfonlayer}

        \begin{pgfonlayer}{marked}
            \foreach \source/ \dest in {l/m,m/e,d/k,k/j,j/i}
            \draw[tree edge, tumgreen] (\source.center) edge (\dest.center);
        \end{pgfonlayer}

        \begin{pgfonlayer}{marked}
            \draw[tree edge, line width=4pt, white] (e.center) edge (d.center);
            \draw[tree edge, densely dashed, tumgreen!80] (e.center) edge (d.center);

            \foreach \source/\dest in {e/c,c/d}
            \draw[tree edge, tumblue!80] (\source.center) edge (\dest.center);
        \end{pgfonlayer}

        % \begin{pgfonlayer}{marked}
        %     \foreach \source/ \dest in {l/h,h/i}
        %     \draw[tree edge, tumorange] (\source.center) edge (\dest.center);
        % \end{pgfonlayer}
    \end{tikzpicture}
    \caption{A path tree is a pair of a spanning tree $T$ and a mapping $P$ from its edges to arbitrary paths in $G$. Instead of routing the demand $d_{uv}$ along the green tree edges, the edge $(x, z)$ is replaced by the blue edges $P((x, z)) = ((x, y), (y, z))$.}
    \label{fig:pathtrees}
\end{figure}

\begin{definition}[Path Tree]
    A path tree of an undirected graph $G = (V, E)$ is a pair $(T, P)$ of a spanning tree $T$ of $G$ and a function $P : E_T \to E$ identifying every edge $e_T = (x, y)$ of $T$ with a path in $G$ from $x$ to $y$.
\end{definition}

Note that the same edge in $G$ may well be used by multiple different paths in the same path tree.
To now route any demand $d_{uv}$ using a path tree, we move along the unique $u$-$v$-path in the tree and stitch together the paths corresponding to the edges in the path.

Since if routing with spanning tree $T_i$ the flow through every tree edge $e_T$ would be $D_i(T_i)$, the same amount of flow must now be added to every edge on the path $P_i(e_T)$.
If we now consider a convex combination of path trees, the flow through every edge $e \in E$ is
\begin{align}
    f_e &= \sum_{i} \lambda_i \sum_{\substack{\alert{e_T} \in T_i:\\\structure{e} \in P_i(\alert{e_T})}} D_i(\alert{e_T}).
\end{align}

Suppose we find a set of path trees such that
\begin{align}
    \forall \structure{e} \in E.\quad c_{e} &\geq \frac{1}{\alpha} \sum_{i} \lambda_i \sum_{\substack{\alert{e_T} \in T_i:\\\structure{e} \in P_i(\alert{e_T})}} C_i(\alert{e_T})
    \intertext{we can again bound its congestion by the optimal solution using \cref{lem:optimal}.}
    \rho &= \hphantom{\alpha}\max_{\structure{e}} \frac{f_e}{c_{\structure{e}}} \\
    &\leq \alpha \max_{\structure{e}} \frac{\sum_{i} \lambda_i \sum_{\substack{\alert{e_T} \in T_i:\\\structure{e} \in P_i(e_T)}} D_i(\alert{e_T})}{\sum_{i} \lambda_i \sum_{\substack{\alert{e_T} \in T_i:\\\structure{e} \in P_i(e_T)}} C_i(\alert{e_T})} \\
    &\leq \alpha \max_{\structure{e}} \max_{i} \frac{D_i(\structure{e})}{C_i(\structure{e})} \leq \alpha \rho^\ast
\end{align}

We will now show that there always is a solution using path trees such that $\alpha \in \Oh(\log n)$ and sketch how to find such a set of trees.
We denote the finite but exponentially sized set of all path trees over $G$ as $\Ih$.
We can then formulate a linear program which enforces the assumption that every edge capacity is large enough and chooses a set of path trees such that $\alpha$ is minimal.
\begin{alignat}{3}
    \min_{\structure{\alpha, \lambda}} \quad & \mathrlap{\structure{\alpha}} \label{eq:primal}\\
    \st \quad && \sum_{i \in \Ih} \structure{\lambda_i} \sum_{\substack{e_T \in T_i: \\ \alert{(u, v)} \in P_i(e_T)}} C_i(e_T) & \leq \structure{\alpha} \alert{c_{uv}} & \qquad \forall \alert{u,v} \in V \\
    && \sum_{i \in \Ih} \structure{\lambda_i} &= 1 \\
    && \structure{\lambda} &\geq 0
\end{alignat}

To gain an $\Oh(\log n)$ approximation algorithm from this linear program, we have to show that $\alpha$ is of at most logarithmic size and that it is possible to solve this linear program with exponentially sized sums in polynomial time, yielding a solution of polynomially many path trees.

\subsection{Approximation guarantee using the dual}
\label{sub:or_approx}

To proof the bound for the value of the linear program, we will consider the dual program and show that it is logarithmically bounded.
The dual problem has exponentially many constraints, one for each path tree, and a decision variable $\ell_{uv} \in \Ell$ for every pair of vertices.
\begin{alignat}{3}
    \max_{\structure{z, \Ell}} \quad & \mathrlap{\structure{z}} \label{eq:dual}\\ %
    \st \quad && \sum_{u, v \in V} c_{uv} \structure{\ell_{uv}} &= 1 \\
    && \structure{z} & \leq \sum_{e_T \in T_i} C_i(e_T) \sum_{(u,v) \in P_i(e_T)} \structure{\ell_{uv}} & \qquad \forall i \in \Ih \label{eq:dual_constraints}\\
    && \structure{\Ell} &\geq 0
\end{alignat}

We will rewrite this dual program to be able to apply a result about the approximation of metrics with tree metrics.
To this end we will interpret the variables $\ell_{uv}$ as distances between vertices and obtain a shortest path metric $d_\ell(u, v)$ which we will approximate later. For an edge $e = (x, y)$ we denote $d_\ell(e) \defeq d_\ell(x, y)$.

We first observe that in \cref{eq:dual_constraints} for any given $i$, the length of $P_i(e_T)$ is always at least $d_\ell(e_T)$ and since all constraints bound $z$ above and there is some tree choosing the shortest path, we can replace the constraints by
\begin{align}
    \structure{z} & \leq \sum_{e_T \in T_i} C_i(e_T) d_{\structure{\ell}}(e_T) \qquad \forall i \in \Ih.\\
    \intertext{We can further reduce the constraints to the smallest constraint, again because we are bounding $z$ above.}
    \structure{z} & \leq \min_{i \in \Ih}\sum_{e_T \in T_i} C_i(e_T) d_{\structure{\ell}}(e_T)
\end{align}

Since we are only left with one constraint concerning $z$ and it is not otherwise needed in the dual program we can move it into the objective function and obtain a smaller LP.
While it is no longer of exponential size, it might still take exponential time to find the minimum.
\begin{alignat}{3}
    \max_{\structure{\Ell}} \quad & \mathrlap{\min_{i \in \Ih}\sum_{e_T \in T_i} C_i(e_T) d_{\structure{\ell}}(e_T)}\\
    \st \quad && \tikzmark{a}\sum_{u, v \in V} c_{uv} \structure{\ell_{uv}} &= 1\tikzmark{b} \\
    && \structure{\Ell} &\geq 0
\end{alignat}

While the nonnegativity of the distances is actually desired, we now want to show that the last remaining constraint does not in fact reduce the solution space.
Suppose $\sum_{u, v \in V} c_{uv} \ell_{uv} = \beta > 0$, the products of of capacities and lengths sum up to an arbitrary positive value.
We can then transform the set of lengths $\Ell$ to a feasible solution of the dual problem by scaling every length by $\frac{1}{\beta}$.
This, however, changes the objective function by the same factor.
If we drop the constraint enforcing $\beta = 1$, we have to divide the objective function by $\beta$.
This yields a rather compact representation of the dual program.
\begin{alignat}{3}
    \max_{\structure{\Ell}} \quad & \min_{i \in \Ih}\frac{\sum_{e_T \in T_i} C_i(e_T) d_{\structure{\ell}}(e_T)}{\sum_{u, v \in V} c_{uv} \structure{\ell_{uv}}} \label{eq:dual_compact}\\
    \st \quad & \structure{\Ell} \geq 0
\end{alignat}

\begin{figure}[tb]
    \centering
    \begin{tikzpicture}[flow graph]
        \flownodes
        \flowedges
        \flowtreeedges

        \draw
            (n) node[above=3] {\structure{u}}
            (m) node[below=3] {a}
            (e) node[below=3] {b}
            (d) node[above=3] {c}
            (c) node[below=3] {d}
            (b) node[below=3] {\structure{v}};

        \path
            (n) -- node[node on edge] {$c_{uv}$} (b);

        \begin{pgfonlayer}{marked}
            \draw[tree edge, tumorange] (n.center) edge (b.center);
        \end{pgfonlayer}

        \begin{pgfonlayer}{marked}
            \draw[tree edge, tumred] (m.center) edge (e.center);
        \end{pgfonlayer}

        \begin{pgfonlayer}{background}
            \node[draw=tumblue, fill=tumblue!20, thick, ellipse, rotate fit=30, fit=(n)(l)(m), inner sep=8pt] (el) {};
        \end{pgfonlayer}
    \end{tikzpicture}
    \caption{The left hand side contains a product of capacity $c_{uv}$ and distance $M_{ab}$ iff $c_{uv}$ is part of the tree split introduced by the tree edge $(a, b)$. A capacity is part of a tree split iff $u$ and $v$ are not in the same connected component in the split. This is the case iff the tree edge lies on the path between $u$ and $v$. The sum of all tree edges between $u$ and $v$ is $M_{uv}$ by definition, yielding the right hand side of the sum.}
    \label{fig:sum_over_capacities}
\end{figure}

To proof the desired approximation guarantee we now approximate $d_\ell$ using a result obtained about tree metrics from \cite{approx}.
\begin{theorem}[Tree Metric]
    For any nonnegative set of costs $c_{uv}$ and any metric $d_\ell$ there exists a tree metric $(V, M)$ such that
    \begingroup
        \addtolength{\jot}{.5em}
        \begin{alignat}{2}
            d_\ell(u, v) &\leq M_{uv} & \forall u, v \in V\\
            \sum_{u, v \in V} c_{uv} M_{uv} &\leq \Oh(\log n) \sum_{u, v \in V} c_{uv} d_\ell(u, v).
        \end{alignat}
    \endgroup
\end{theorem}

To be able to apply this result to our dual, we first need to show one more lemma.
We need to rewrite the enumerator in \cref{eq:dual_compact} to not sum over tree edges but all edges in the graph.
\begin{lemma}
    Let $T$ be a spanning tree and $(V, M)$ a tree metric of $G = (V, E)$.
    Then it holds that
    \begin{align}
        \sum_{(x, y) \in E_T} C(x, y) M_{xy} = \sum_{(u, v) \in E} c_{uv} M_{uv}.
    \end{align}
\end{lemma}
\begin{proof}
    See \cref{fig:sum_over_capacities}.
    \qed
\end{proof}

Combining all observations and lemmas now bounds the value of both linear programs logarithmically, directly following from the logarithmic bound of the tree metrics.

\begin{theorem}
    The primal and dual linear program have a value of $\Oh(\log n)$.
\end{theorem}
\begin{proof}
    We proof the value of the dual program, the value of the primal program then follows by strong duality.

    For any lengths $\Ell$ we know that for the minimizing tree in \cref{eq:dual_compact} we have the following chain of inequalities, using the definition of tree metrics, the previous lemma and the observation that the distance $d_\ell(u, v)$ is never larger than $\ell_{uv}$ since $d_\ell$ is a shortest path metric.
    \begin{align}
        \sum_{e_T \in T_i} C_i(e_T) d_\ell(e_T) &\leq \sum_{e_T \in T_i} C_i(e_T) M_{e_T} \\
        &= \sum_{u, v \in V} c_{uv} M_{uv} \\
        &\leq \Oh(\log n) \sum_{u, v \in V} c_{uv} d_\ell(u, v) \\
        &\leq \Oh(\log n) \sum_{u, v \in V} c_{uv} \ell_{uv} \\
        \intertext{Dividing by the sum directly gives the value guarantee:}
        \frac{\sum_{e_T \in T_i} C_i(e_T) d_\ell(e_T)}{\sum_{u, v \in V} c_{uv} \ell_{uv}} &\leq \Oh(\log n)
    \end{align}
    \qed
\end{proof}

While this proofs the existence of a solution of value $\Oh(\log n)$, it still remains to be shown that polynomially many trees are enough and the solution can actually be found in polynomial time.
While this is done rigorously in \cite{approx}, the idea is to rewrite the linear program in such a way to not actually search for the minimal $\alpha$ but enforce an $\alpha \in \Oh(\log n)$.
This results in a linear program which can be solved using the ellipsoid method in polynomial time resulting in a set of polynomially many path trees.
This then proofs the existence of the desired $\Oh(\log n)$ approximation algorithm.

\section{Minimum Bisection}
\label{sec:minimum_bisection}

\begin{figure}[tb]
    \centering
    \begin{tikzpicture}[flow graph]
        \flownodes
        \flowedges

        \begin{pgfonlayer}{background}
            \node[draw=tumblue, fill=tumblue!20, thick, ellipse, fit=(n)(l)(m)(h)(e)(f), rotate=10, inner sep=-8pt] (el) {};
        \end{pgfonlayer}

        \path (el)
            ++ (-120:2.1) node {\structure{$S$}};

        \begin{pgfonlayer}{marked}
            \foreach \source/\dest in {n/b,f/b,f/c,e/c,e/d,e/g,h/i}
            \draw [tree edge, tumorange] (\source.center) -- (\dest.center);
        \end{pgfonlayer}
    \end{tikzpicture}
    \caption{The cost $c(\delta(S))$ of the bisection $S$ is defined as the sum of the nonnegative costs of all orange edges leaving the set $S$.}
    \label{fig:minimum_bisection}
\end{figure}

The knowledge about the existence of an $\Oh(\log n)$ approximation algorithm for the oblivious routing problem can be applied to find approximation algorithms for a number of other problems, some of which are described in \cite{racke2008optimal}.
As an example, we will now give a definition of the minimum bisection problem and show that it can be approximated using oblivious flow.

Minimum bisection has a similar structure to oblivious flow as it is also defined over undirected graphs with a function mapping the edges to non negative numbers.
Instead of interpreting these numbers as a capacity, they are now understood as costs.
We want to find a set containing half of the vertices such that the sum of all costs of edges leaving this set is minimal.
\begin{problem}[Minimum Bisection]
    Given an undirected Graph $G = (V, E)$ and an edge-cost function $c : E \to \Rnn$.
    Find a set $S \subset V$ containing half the vertices with minimal split cost $c(\delta(S))$.
    \begin{align}
        \delta(S) &\defeq \left\{ (x, y) \in E \mid x \in S \wedge y \not\in S \right\} \\
        c(\delta(S)) &\defeq \sum_{\substack{e \in E:\\e \in \delta(S)}} c_e
    \end{align}
\end{problem}

We will now show that the set of path trees obtained by solving the oblivious flow problem on $G$ with capacity function $c$ contains a tree that defines a bisection of $G$ which costs logarithmically more than an optimal solution.
We call this bisection a minimum tree bisection.
It is obtained by reweighing the edges of the tree to cost as much as the tree splits introduced by them and then solving the minimum bisection problem on those trees.
This can be solved in polynomial time with a straightforward dynamic programming approach described in \cite{approx}.

\begin{definition}[Minimum Tree Bisection]
    Given a spanning tree $T$ of $G$ with an edge $e_T \in E_T$.
    We define a new cost function $c_T : E_T \to \Rnn$ based on the tree splits induced by the edges of $T$.
    The cost of a bisection $S$ is the sum of tree split costs of all tree edges cut by $S$.
    \begin{align}
        c_T(e_T) &= C(e_T)\\
        c_T(\delta(S)) &= \sum_{\substack{e_T \in E_T:\\e_T \in \delta(S)}} C(e_T)
    \end{align}
    The minimum tree bisection $X$ of $T$ is an optimal solution of the minimum bisection problem with respect to the cost function $c_T$.
\end{definition}

Assuming we can find minimum tree bisections in polynomial time, the following algorithm describes an $\Oh(\log n)$ approximation algorithm for the minimum bisection problem.
While the polynomial running time is clear, it remains to show that the obtained solution actually satisfies the approximation guarantee.

\begin{algorithm}
    \label{alg:minimum_bisection}
    Given graph $G = (V, E)$ and cost function $c : E \to \Rnn$.
    \begin{enumerate}
        \item Interpret costs $c(e)$ as capacities
        \item Solve oblivious routing on $G$, obtaining trees $T_i$
        \item Find minimum tree bisections $X_i$ for all trees $T_i$
        \item Choose the $X_i$ with lowest $c(\delta(X_i))$
    \end{enumerate}
\end{algorithm}

To proof the guarantee we will need two lemmas connecting the cost of a split in the original graph $G$ with the costs in a single spanning tree $T$ relative to its cost function $c_T$ and a convex combination of trees.

\begin{lemma}
    \label{lem:bisection_combination}
    Let $\left\{ (T_i, \lambda_i) \right\}$ be an $\Oh(\log n)$-approximation to the oblivious flow problem.
    Then for any cut $S \subseteq V$ the convex combination of tree bisections is bounded above by the cost of $S$ times a logarithmic factor.
    \begin{align}
        \sum_i \lambda_i c_{T_i}(\delta(S)) \leq \Oh(\log n) c(\delta(S))
    \end{align}
\end{lemma}
\begin{proof}
    We remember the first constraint of the primal program in \cref{eq:primal}.
    Since we have already proven that $\alpha \in \Oh(\log n)$ we can replace it by this bound and sum up the inequalities for all edges in $\delta(S)$.
    \begin{align}
        \sum_i \lambda_i \sum_{\structure{(u, v) \in \delta(S)}} \sum_{\substack{e_T \in T_i:\\\structure{(u,v)} \in P_i(e_T)}} C_i(e_T) \leq \Oh(\log n)c(\delta(S))
    \end{align}
    The right hand side then sums up to the desired term, while we have to rewrite the left hand side a bit.
    \begin{align}
        c_{T_i}(\delta(S)) = \sum_{\substack{e_T \in E_{T_i}:\\e_T \in \delta(S)}} C_i(e_T) \leq \sum_{\structure{(u, v) \in \delta(S)}} \sum_{\substack{e_T \in T_i:\\\structure{(u,v)} \in P_i(e_T)}} C_i(e_T)
    \end{align}
    While the equality holds by definition, we observe that for every tree edge $e_T$ contained in $\delta(S)$ there must be an edge in $P_i(e_T)$ which crosses the boundary of $S$ since $e_T$ starts in $S$ and ends outside of $S$.
    Therefore, all summands in the left side are contained in the right side, the inequality holds.
    If we apply this for all $T_i$, we have proven the lemma.
    \qed
\end{proof}

While we can bound the convex combination of minimum tree bisections obtained by the oblivious flow approximation by the original cost function, we can also show that the original cost is always smaller than the cost of the same set relative to some tree cost function.

\begin{figure}[tb]
    \begin{tikzpicture}[flow graph]
        \flownodes
        \flowedges
        \flowtreeedges

        \begin{pgfonlayer}{background}
            \node[draw=tumblue, fill=tumblue!20, thick, ellipse, fit=(c)(d)(g)(k)(j)(i)(e)(h), inner sep=3pt] (el) {};
        \end{pgfonlayer}

        \path (el)
            ++ (-30:2.6) node {\structure{$S$}};

        \draw
            (b) node[below=3] {\structure{u}}
            (j) node[below=3] {\structure{v}};

        \begin{pgfonlayer}{marked}
            \foreach \source/\dest in {f/c,b/j,l/h}
            \draw [tree edge, tumblue, dash pattern=on 7pt off 7pt] (\source.center) -- (\dest.center);

            \foreach \source/\dest in {b/c,e/f,e/m}
            \draw [tree edge, tumred] (\source.center) -- (\dest.center);
        \end{pgfonlayer}

        \foreach \source/\dest/\name in {b/c/$e_3$,e/f/$e_2$,e/m/$e_1$}
        \path (\source) -- node[node on edge] {\name} (\dest);
    \end{tikzpicture}
    \caption{The cost of $S$ in the original graph is the sum of all red and blue dashed edge costs. While the red edges are tree edges and thus contained in the tree cost function, the blue dashed edge connecting $u$ and $v$ is contained in the tree split introduced by edge $e_3$ on the tree path between $u$ and $v$.}
    \label{fig:tree_bisections}
\end{figure}

\begin{lemma}
    \label{lem:bisection_single}
    For any spanning tree $T$ and any cut $S \subseteq V$ the cost of the cut is bounded above by the tree bisection of $T$.
    \begin{align}
        c(\delta(S)) \leq c_T(\delta(S))
    \end{align}
\end{lemma}
\begin{proof}
    An edge $(u, v)$ is contained in $\delta(S)$ iff it starts in $S$ and ends outside of $S$.
    Since there is an unique path from $u$ to $v$ in $T$ there must be an edge $e_T$ on this path that is also contained in $\delta(S)$.
    But then $(u, v)$ must be contained in the tree split introduced by $e_T$ and is therefore contained in the right hand side costs by definition.
    See \cref{fig:tree_bisections} for an illustration of this proof.
    \qed
\end{proof}

These lemmas allow us to quickly prove the approximation guarantee of the algorithm.

\begin{theorem}
    \cref{alg:minimum_bisection} is an $\Oh(\log n)$ approximation algorithm for the minimum bisection problem.
\end{theorem}
\begin{proof}
    Let $X^\ast$ be an optimal minimum bisection of $G$ and $\left\{ (T_i, \lambda_i) \right\}$ be a set of trees obtained from the oblivious flow algorithm with minimum tree bisections $X_i$.
    We consider the convex combination of the costs of all minimum tree bisections.
    \begin{align}
        \sum_i \lambda_i c(\delta(X_i)) &\leq \sum_i \lambda_i c_{T_i}(\delta(X_i)) \\
        &\leq \sum_i \lambda_i c_{T_i}(\delta(X^\ast) \\
        &\leq \Oh(\log n) c(\delta(X^\ast))
    \end{align}
    Using \cref{lem:bisection_single} we know that every single tree bisection $X_i$ can only cost more relative to its respective cost function $c_{T_i}$.
    But since the $X_i$ are optimal solutions for the trees $T_i$, the global optimal solution $X^\ast$ cannot be better.
    We now apply \cref{lem:bisection_combination} to show that the original convex combination of bisections is bounded by our desired bound.
    Since the combination of nonnegative costs is bounded, so must be the smallest cost or otherwise the inequality cannot hold.
    This proofs the correctness of the algorithm.
    \qed
\end{proof}

\nocite{*}
\bibliographystyle{alpha}
\bibliography{term_paper}
\end{document}