{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Ex 2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'the': 3,\n", " 'quick': 2,\n", " 'brown': 1,\n", " 'fox': 2,\n", " 'jumps': 1,\n", " 'over': 1,\n", " 'lazy': 1,\n", " 'dog': 1,\n", " 'was': 1}" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functools import reduce\n", "from collections import defaultdict\n", "\n", "# Example input\n", "text = \"the quick brown fox jumps over the lazy dog the fox was quick\"\n", "\n", "# Step 1: Map phase - create (word, 1) for each word\n", "words = text.split()\n", "mapped_words = list(map(lambda word: (word, 1), words))\n", "\n", "# Step 2: Reduce phase - combine counts for each word\n", "def reducer(acc, pair):\n", " word, count = pair\n", " acc[word] += count\n", " return acc\n", "\n", "# Use reduce to aggregate word counts\n", "word_freq = reduce(reducer, mapped_words, defaultdict(int))\n", "\n", "# Convert defaultdict to a regular dictionary\n", "word_freq = dict(word_freq)\n", "word_freq\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ex 3" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'dog': ['doc2', 'doc1'],\n", " 'cat': ['doc3', 'doc1'],\n", " 'fox': ['doc2'],\n", " 'mouse': ['doc3']}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functools import reduce\n", "from collections import defaultdict\n", "\n", "# Example input documents\n", "documents = {\n", " 'doc1': \"dog cat\",\n", " 'doc2': \"dog fox\",\n", " 'doc3': \"cat mouse\"\n", "}\n", "\n", "# Step 1: Map phase - Use map to create (word, document_id) for each word in each document\n", "mapped_words = list(map(lambda doc_id_content: [(word, doc_id_content[0]) for word in doc_id_content[1].split()], documents.items()))\n", "mapped_words = [item for sublist in mapped_words for item in sublist] # Flatten the list of lists\n", "\n", "# Step 2: Reduce phase - Use reduce to group document_ids for each word\n", "def reducer(acc, pair):\n", " word, doc_id = pair\n", " acc[word].add(doc_id)\n", " return acc\n", "\n", "# Reduce to aggregate the document lists for each word\n", "inverted_index = reduce(reducer, mapped_words, defaultdict(set))\n", "\n", "# Convert sets to lists for better readability\n", "inverted_index = {word: list(docs) for word, docs in inverted_index.items()}\n", "\n", "# Display the result\n", "inverted_index\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ex 4" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "<>:11: SyntaxWarning: invalid escape sequence '\\e'\n", "<>:11: SyntaxWarning: invalid escape sequence '\\e'\n", "C:\\Users\\acoma\\AppData\\Local\\Temp\\ipykernel_37800\\1057440648.py:11: SyntaxWarning: invalid escape sequence '\\e'\n", " file_path = 'data\\eulerGraphs3.txt'\n" ] }, { "data": { "text/plain": [ "{'even': 2, 'odd': 98}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from functools import reduce\n", "from collections import defaultdict\n", "\n", "# Function to load a graph from a file where each line contains an edge (x, y)\n", "def load_graph(file_path):\n", " with open(file_path, 'r') as file:\n", " edges = [tuple(map(int, line.strip().split())) for line in file]\n", " return edges\n", "\n", "# Example usage\n", "file_path = 'data\\eulerGraphs3.txt'\n", "graph_edges = load_graph(file_path)\n", "\n", "# Step 1: Map phase - Use map to create a list of vertex appearances\n", "vertex_appearances = list(map(lambda edge: [edge[0], edge[1]], graph_edges))\n", "vertex_appearances = [vertex for sublist in vertex_appearances for vertex in sublist] # Flatten the list\n", "\n", "# Step 2: Reduce phase - Count the occurrences of each vertex (degree count)\n", "def count_vertices(acc, vertex):\n", " acc[vertex] += 1\n", " return acc\n", "\n", "# Reduce to aggregate the degree counts for each vertex\n", "degree_count = reduce(count_vertices, vertex_appearances, defaultdict(int))\n", "\n", "# Step 3: Use reduce to count even and odd degree vertices\n", "def count_even_odd(acc, degree):\n", " if degree % 2 == 0:\n", " acc['even'] += 1\n", " else:\n", " acc['odd'] += 1\n", " return acc\n", "\n", "# Reduce to count even and odd vertices\n", "even_odd_count = reduce(count_even_odd, degree_count.values(), {'even': 0, 'odd': 0})\n", "\n", "# Display the result\n", "even_odd_count\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ex 5" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1, 8 : 6, 3, 7\n", "1, 6 : 8\n", "1, 7 : 8, 2\n", "1, 2 : 7, 3\n", "1, 3 : 8, 2\n", "2, 7 : 1, 4\n", "2, 3 : 1\n", "2, 4 : 7\n", "3, 8 : 1\n", "4, 8 : 7\n", "4, 7 : 8, 2\n", "6, 8 : 1\n", "7, 8 : 1, 4\n" ] } ], "source": [ "from functools import reduce\n", "\n", "# Step 1: Load the graph from the adjacency list file\n", "def load_adjacency_list(file_path):\n", " with open(file_path, 'r') as file:\n", " graph = {}\n", " for line in file:\n", " node, neighbors = line.strip().split(' : ')\n", " graph[node] = set(neighbors.split(','))\n", " return graph\n", "\n", "# Step 2: Use map to generate adjacent vertex pairs and their neighbors\n", "def generate_adjacent_pairs(graph):\n", " return list(map(lambda node: [(node, neighbor, graph[node], graph[neighbor]) \n", " for neighbor in graph[node] if node < neighbor], graph))\n", "\n", "# Step 3: Use reduce to find common neighbors\n", "def find_common_friends(acc, pair):\n", " for node1, node2, neighbors1, neighbors2 in pair:\n", " common_neighbors = neighbors1.intersection(neighbors2)\n", " if common_neighbors:\n", " acc.append((node1, node2, common_neighbors))\n", " return acc\n", "\n", "# Step 4: Apply map and reduce\n", "def common_friends(file_path):\n", " graph = load_adjacency_list(file_path)\n", " \n", " # Map phase: Generate adjacent vertex pairs\n", " adjacent_pairs = generate_adjacent_pairs(graph)\n", " \n", " # Reduce phase: Find common friends\n", " return reduce(find_common_friends, adjacent_pairs, [])\n", "\n", "# Example usage (replace with your actual file path)\n", "file_path = 'data/friends.txt'\n", "common_friends_list = common_friends(file_path)\n", "\n", "# Output the common friends for each pair of adjacent vertices\n", "for node1, node2, common in common_friends_list:\n", " print(f\"{node1}, {node2} : {', '.join(common)}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ex 6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Problem Setup:\n", "We have a graph represented as a set of edges, and we want to count the number of triangles in this graph. A **triangle** consists of three vertices that are all pairwise connected.\n", "\n", "### Map and Reduce Operations:\n", "\n", "- **Map**: For each edge \\( (a, b, c) \\), we will map it to pairs \\( \\langle a, b \\rangle \\), where \\( a \\) is a vertex, and \\( b \\) is one of its neighbors. The key is the vertex, and the value is its neighbor set. This ensures that each vertex has a list of its neighbors.\n", " \n", " $$ \\text{map}(a, b, c) \\rightarrow \\langle a, b \\rangle $$\n", "\n", "- **Reduce**: For each vertex \\( a \\) and its associated list of neighbors \\( [b_1, b_2, \\dots, b_k] \\), we will reduce it by finding all the pairs of neighbors \\( (b_i, b_j) \\) that are also connected. This checks for the existence of common neighbors and counts a triangle every time two neighbors share an edge with \\( a \\).\n", "\n", " $$ \\text{reduce}(a, [b_1, \\dots, b_k]) \\rightarrow \\left(a, \\theta([b_1, \\dots, b_k])\\right) $$\n", "\n", "Here, \\( \\theta([b_1, \\dots, b_k]) \\) represents the number of triangles formed by vertex \\( a \\) and its neighbors. Since each triangle is counted three times (once at each vertex), the final result is divided by 3.\n", "\n", "### Conclusion:\n", "- The **map** function organizes the graph by vertices and their neighbors.\n", "- The **reduce** function identifies triangles by checking for common neighbors and counting them.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total number of triangles: 120676\n" ] } ], "source": [ "from functools import reduce\n", "\n", "# Step 1: Load the graph from the edge list file\n", "def load_graph(file_path):\n", " with open(file_path, 'r') as file:\n", " edges = [tuple(map(int, line.strip().split())) for line in file]\n", " return edges\n", "\n", "# Step 2: Create adjacency list from the edges\n", "def build_adjacency_list(edges):\n", " adjacency_list = {}\n", " for x, y in edges:\n", " adjacency_list.setdefault(x, set()).add(y)\n", " adjacency_list.setdefault(y, set()).add(x)\n", " return adjacency_list\n", "\n", "# Step 3: Map phase - Generate pairs of vertices and their common neighbors\n", "def generate_triangle_candidates(adjacency_list):\n", " triangle_candidates = []\n", " for node in adjacency_list:\n", " for neighbor in adjacency_list[node]:\n", " if node < neighbor: # To avoid duplicate pairs (x, y) and (y, x)\n", " triangle_candidates.append((node, neighbor, adjacency_list[node], adjacency_list[neighbor]))\n", " return triangle_candidates\n", "\n", "# Step 4: Reduce phase - Count triangles by finding common neighbors\n", "def count_triangles(acc, pair):\n", " node1, node2, neighbors1, neighbors2 = pair\n", " common_neighbors = neighbors1.intersection(neighbors2)\n", " acc += len(common_neighbors) # Each triangle will be counted 3 times (once per vertex)\n", " return acc\n", "\n", "# Full process to count triangles\n", "def triangle_count(file_path):\n", " edges = load_graph(file_path)\n", " adjacency_list = build_adjacency_list(edges)\n", " \n", " # Map phase: Generate triangle candidates (pairs with common neighbors)\n", " triangle_candidates = generate_triangle_candidates(adjacency_list)\n", " \n", " # Reduce phase: Count the number of triangles\n", " total_triangle_count = reduce(count_triangles, triangle_candidates, 0)\n", " \n", " # Since each triangle is counted 3 times, divide the result by 3\n", " return total_triangle_count // 3\n", "\n", "# Example usage (replace with your actual file path)\n", "file_path = 'data/roadnet.txt'\n", "total_triangles = triangle_count(file_path)\n", "\n", "# Output the total number of triangles\n", "print(f\"Total number of triangles: {total_triangles}\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }