{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'the': 3,\n",
       " 'quick': 2,\n",
       " 'brown': 1,\n",
       " 'fox': 2,\n",
       " 'jumps': 1,\n",
       " 'over': 1,\n",
       " 'lazy': 1,\n",
       " 'dog': 1,\n",
       " 'was': 1}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from functools import reduce\n",
    "from collections import defaultdict\n",
    "\n",
    "# Example input\n",
    "text = \"the quick brown fox jumps over the lazy dog the fox was quick\"\n",
    "\n",
    "# Step 1: Map phase - create (word, 1) for each word\n",
    "words = text.split()\n",
    "mapped_words = list(map(lambda word: (word, 1), words))\n",
    "\n",
    "# Step 2: Reduce phase - combine counts for each word\n",
    "def reducer(acc, pair):\n",
    "    word, count = pair\n",
    "    acc[word] += count\n",
    "    return acc\n",
    "\n",
    "# Use reduce to aggregate word counts\n",
    "word_freq = reduce(reducer, mapped_words, defaultdict(int))\n",
    "\n",
    "# Convert defaultdict to a regular dictionary\n",
    "word_freq = dict(word_freq)\n",
    "word_freq\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'dog': ['doc2', 'doc1'],\n",
       " 'cat': ['doc3', 'doc1'],\n",
       " 'fox': ['doc2'],\n",
       " 'mouse': ['doc3']}"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from functools import reduce\n",
    "from collections import defaultdict\n",
    "\n",
    "# Example input documents\n",
    "documents = {\n",
    "    'doc1': \"dog cat\",\n",
    "    'doc2': \"dog fox\",\n",
    "    'doc3': \"cat mouse\"\n",
    "}\n",
    "\n",
    "# Step 1: Map phase - Use map to create (word, document_id) for each word in each document\n",
    "mapped_words = list(map(lambda doc_id_content: [(word, doc_id_content[0]) for word in doc_id_content[1].split()], documents.items()))\n",
    "mapped_words = [item for sublist in mapped_words for item in sublist]  # Flatten the list of lists\n",
    "\n",
    "# Step 2: Reduce phase - Use reduce to group document_ids for each word\n",
    "def reducer(acc, pair):\n",
    "    word, doc_id = pair\n",
    "    acc[word].add(doc_id)\n",
    "    return acc\n",
    "\n",
    "# Reduce to aggregate the document lists for each word\n",
    "inverted_index = reduce(reducer, mapped_words, defaultdict(set))\n",
    "\n",
    "# Convert sets to lists for better readability\n",
    "inverted_index = {word: list(docs) for word, docs in inverted_index.items()}\n",
    "\n",
    "# Display the result\n",
    "inverted_index\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<>:11: SyntaxWarning: invalid escape sequence '\\e'\n",
      "<>:11: SyntaxWarning: invalid escape sequence '\\e'\n",
      "C:\\Users\\acoma\\AppData\\Local\\Temp\\ipykernel_37800\\1057440648.py:11: SyntaxWarning: invalid escape sequence '\\e'\n",
      "  file_path = 'data\\eulerGraphs3.txt'\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'even': 2, 'odd': 98}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from functools import reduce\n",
    "from collections import defaultdict\n",
    "\n",
    "# Function to load a graph from a file where each line contains an edge (x, y)\n",
    "def load_graph(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        edges = [tuple(map(int, line.strip().split())) for line in file]\n",
    "    return edges\n",
    "\n",
    "# Example usage\n",
    "file_path = 'data\\eulerGraphs3.txt'\n",
    "graph_edges = load_graph(file_path)\n",
    "\n",
    "# Step 1: Map phase - Use map to create a list of vertex appearances\n",
    "vertex_appearances = list(map(lambda edge: [edge[0], edge[1]], graph_edges))\n",
    "vertex_appearances = [vertex for sublist in vertex_appearances for vertex in sublist]  # Flatten the list\n",
    "\n",
    "# Step 2: Reduce phase - Count the occurrences of each vertex (degree count)\n",
    "def count_vertices(acc, vertex):\n",
    "    acc[vertex] += 1\n",
    "    return acc\n",
    "\n",
    "# Reduce to aggregate the degree counts for each vertex\n",
    "degree_count = reduce(count_vertices, vertex_appearances, defaultdict(int))\n",
    "\n",
    "# Step 3: Use reduce to count even and odd degree vertices\n",
    "def count_even_odd(acc, degree):\n",
    "    if degree % 2 == 0:\n",
    "        acc['even'] += 1\n",
    "    else:\n",
    "        acc['odd'] += 1\n",
    "    return acc\n",
    "\n",
    "# Reduce to count even and odd vertices\n",
    "even_odd_count = reduce(count_even_odd, degree_count.values(), {'even': 0, 'odd': 0})\n",
    "\n",
    "# Display the result\n",
    "even_odd_count\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1, 8 : 6, 3, 7\n",
      "1, 6 : 8\n",
      "1, 7 : 8, 2\n",
      "1, 2 : 7, 3\n",
      "1, 3 : 8, 2\n",
      "2, 7 : 1, 4\n",
      "2, 3 : 1\n",
      "2, 4 : 7\n",
      "3, 8 : 1\n",
      "4, 8 : 7\n",
      "4, 7 : 8, 2\n",
      "6, 8 : 1\n",
      "7, 8 : 1, 4\n"
     ]
    }
   ],
   "source": [
    "from functools import reduce\n",
    "\n",
    "# Step 1: Load the graph from the adjacency list file\n",
    "def load_adjacency_list(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        graph = {}\n",
    "        for line in file:\n",
    "            node, neighbors = line.strip().split(' : ')\n",
    "            graph[node] = set(neighbors.split(','))\n",
    "    return graph\n",
    "\n",
    "# Step 2: Use map to generate adjacent vertex pairs and their neighbors\n",
    "def generate_adjacent_pairs(graph):\n",
    "    return list(map(lambda node: [(node, neighbor, graph[node], graph[neighbor]) \n",
    "                                  for neighbor in graph[node] if node < neighbor], graph))\n",
    "\n",
    "# Step 3: Use reduce to find common neighbors\n",
    "def find_common_friends(acc, pair):\n",
    "    for node1, node2, neighbors1, neighbors2 in pair:\n",
    "        common_neighbors = neighbors1.intersection(neighbors2)\n",
    "        if common_neighbors:\n",
    "            acc.append((node1, node2, common_neighbors))\n",
    "    return acc\n",
    "\n",
    "# Step 4: Apply map and reduce\n",
    "def common_friends(file_path):\n",
    "    graph = load_adjacency_list(file_path)\n",
    "    \n",
    "    # Map phase: Generate adjacent vertex pairs\n",
    "    adjacent_pairs = generate_adjacent_pairs(graph)\n",
    "    \n",
    "    # Reduce phase: Find common friends\n",
    "    return reduce(find_common_friends, adjacent_pairs, [])\n",
    "\n",
    "# Example usage (replace with your actual file path)\n",
    "file_path = 'data/friends.txt'\n",
    "common_friends_list = common_friends(file_path)\n",
    "\n",
    "# Output the common friends for each pair of adjacent vertices\n",
    "for node1, node2, common in common_friends_list:\n",
    "    print(f\"{node1}, {node2} : {', '.join(common)}\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex 6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Problem Setup:\n",
    "We have a graph represented as a set of edges, and we want to count the number of triangles in this graph. A **triangle** consists of three vertices that are all pairwise connected.\n",
    "\n",
    "### Map and Reduce Operations:\n",
    "\n",
    "- **Map**: For each edge \\( (a, b, c) \\), we will map it to pairs \\( \\langle a, b \\rangle \\), where \\( a \\) is a vertex, and \\( b \\) is one of its neighbors. The key is the vertex, and the value is its neighbor set. This ensures that each vertex has a list of its neighbors.\n",
    "  \n",
    "  $$ \\text{map}(a, b, c) \\rightarrow \\langle a, b \\rangle $$\n",
    "\n",
    "- **Reduce**: For each vertex \\( a \\) and its associated list of neighbors \\( [b_1, b_2, \\dots, b_k] \\), we will reduce it by finding all the pairs of neighbors \\( (b_i, b_j) \\) that are also connected. This checks for the existence of common neighbors and counts a triangle every time two neighbors share an edge with \\( a \\).\n",
    "\n",
    "  $$ \\text{reduce}(a, [b_1, \\dots, b_k]) \\rightarrow \\left(a, \\theta([b_1, \\dots, b_k])\\right) $$\n",
    "\n",
    "Here, \\( \\theta([b_1, \\dots, b_k]) \\) represents the number of triangles formed by vertex \\( a \\) and its neighbors. Since each triangle is counted three times (once at each vertex), the final result is divided by 3.\n",
    "\n",
    "### Conclusion:\n",
    "- The **map** function organizes the graph by vertices and their neighbors.\n",
    "- The **reduce** function identifies triangles by checking for common neighbors and counting them.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of triangles: 120676\n"
     ]
    }
   ],
   "source": [
    "from functools import reduce\n",
    "\n",
    "# Step 1: Load the graph from the edge list file\n",
    "def load_graph(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        edges = [tuple(map(int, line.strip().split())) for line in file]\n",
    "    return edges\n",
    "\n",
    "# Step 2: Create adjacency list from the edges\n",
    "def build_adjacency_list(edges):\n",
    "    adjacency_list = {}\n",
    "    for x, y in edges:\n",
    "        adjacency_list.setdefault(x, set()).add(y)\n",
    "        adjacency_list.setdefault(y, set()).add(x)\n",
    "    return adjacency_list\n",
    "\n",
    "# Step 3: Map phase - Generate pairs of vertices and their common neighbors\n",
    "def generate_triangle_candidates(adjacency_list):\n",
    "    triangle_candidates = []\n",
    "    for node in adjacency_list:\n",
    "        for neighbor in adjacency_list[node]:\n",
    "            if node < neighbor:  # To avoid duplicate pairs (x, y) and (y, x)\n",
    "                triangle_candidates.append((node, neighbor, adjacency_list[node], adjacency_list[neighbor]))\n",
    "    return triangle_candidates\n",
    "\n",
    "# Step 4: Reduce phase - Count triangles by finding common neighbors\n",
    "def count_triangles(acc, pair):\n",
    "    node1, node2, neighbors1, neighbors2 = pair\n",
    "    common_neighbors = neighbors1.intersection(neighbors2)\n",
    "    acc += len(common_neighbors)  # Each triangle will be counted 3 times (once per vertex)\n",
    "    return acc\n",
    "\n",
    "# Full process to count triangles\n",
    "def triangle_count(file_path):\n",
    "    edges = load_graph(file_path)\n",
    "    adjacency_list = build_adjacency_list(edges)\n",
    "    \n",
    "    # Map phase: Generate triangle candidates (pairs with common neighbors)\n",
    "    triangle_candidates = generate_triangle_candidates(adjacency_list)\n",
    "    \n",
    "    # Reduce phase: Count the number of triangles\n",
    "    total_triangle_count = reduce(count_triangles, triangle_candidates, 0)\n",
    "    \n",
    "    # Since each triangle is counted 3 times, divide the result by 3\n",
    "    return total_triangle_count // 3\n",
    "\n",
    "# Example usage (replace with your actual file path)\n",
    "file_path = 'data/roadnet.txt'\n",
    "total_triangles = triangle_count(file_path)\n",
    "\n",
    "# Output the total number of triangles\n",
    "print(f\"Total number of triangles: {total_triangles}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}