From daf58458deb62fd41aba57d6ecc490320062724f Mon Sep 17 00:00:00 2001 From: eliappo <237402327+eliappo@users.noreply.github.com> Date: Fri, 8 May 2026 11:25:38 +0200 Subject: [PATCH 1/2] feat(terraform): replace hardcoded manager droplets with count-based resource Previously, each manager VM was defined as a separate, named Terraform resource (manager1_prod, manager2_prod, etc.). Adding or removing a manager required manually duplicating or deleting entire resource blocks and updating the Ansible inventory, env file, null_resource depends_on, and triggers in multiple places. This change introduces a single manager_count variable in both the production and staging configurations. A single digitalocean_droplet resource with count = var.manager_count replaces the hardcoded resources. All downstream resources (Ansible inventory, .env file, null_resource depends_on) now use Terraform's splat expressions and template for-loops to dynamically adapt to however many managers are configured. Key changes: - Added manager_count variable (default: 2) with a validation rule enforcing an even number >= 2. This is required because db_prod/db_stage already acts as the swarm leader (a manager), so total managers = manager_count + 1 must be odd for Docker Swarm Raft quorum. - Replaced manager1/manager2 droplet resources with a single digitalocean_droplet.manager_{prod,stage} resource using count. - Ansible inventory now uses a %{ for } template loop to emit one line per manager, so it stays correct regardless of manager_count. - .env file uses the same loop to emit MANAGER1_IP, MANAGER2_IP, ... entries dynamically. Staging nip.io domain and URLs continue to use manager[0]. - Added triggers to null_resource.run_ansible_{prod,stage} so that Ansible is re-run whenever a manager IP changes or a new manager is added. Previously triggers were absent, meaning Ansible would only ever run on first apply. - All depends_on references updated from the old per-resource names to the new array-style reference (digitalocean_droplet.manager_{prod,stage}). To scale managers: set manager_count = 4 (or any even number >= 2) in terraform.tfvars or pass -var="manager_count=4" at apply time. Co-Authored-By: Claude Sonnet 4.6 --- terraform/production/main.tf | 42 +++++++++++--------------- terraform/production/variables.tf | 11 +++++++ terraform/stage/main.tf | 50 ++++++++++++++----------------- terraform/stage/variables.tf | 11 +++++++ 4 files changed, 62 insertions(+), 52 deletions(-) diff --git a/terraform/production/main.tf b/terraform/production/main.tf index a5f1191..f8b9fb6 100644 --- a/terraform/production/main.tf +++ b/terraform/production/main.tf @@ -29,21 +29,11 @@ resource "digitalocean_vpc" "minitwit_vpc" { ip_range = "10.10.10.0/24" } -# --- Manager 1 --- -resource "digitalocean_droplet" "manager1_prod" { +# --- Managers (count-controlled) --- +resource "digitalocean_droplet" "manager_prod" { + count = var.manager_count image = "ubuntu-22-04-x64" - name = "manager1-prod" - region = "fra1" - size = "s-1vcpu-1gb" - ssh_keys = [data.digitalocean_ssh_key.my_ssh_key.id] - vpc_uuid = digitalocean_vpc.minitwit_vpc.id - tags = [digitalocean_tag.minitwit_prod.id] -} - -# --- Manager 2 --- -resource "digitalocean_droplet" "manager2_prod" { - image = "ubuntu-22-04-x64" - name = "manager2-prod" + name = "manager${count.index + 1}-prod" region = "fra1" size = "s-1vcpu-1gb" ssh_keys = [data.digitalocean_ssh_key.my_ssh_key.id] @@ -128,8 +118,9 @@ resource "local_file" "ansible_inventory" { db-prod ansible_host=${digitalocean_droplet.db_prod.ipv4_address} [swarm_managers] -manager1-prod ansible_host=${digitalocean_droplet.manager1_prod.ipv4_address} -manager2-prod ansible_host=${digitalocean_droplet.manager2_prod.ipv4_address} +%{ for droplet in digitalocean_droplet.manager_prod ~} +${droplet.name} ansible_host=${droplet.ipv4_address} +%{ endfor ~} [all:vars] ansible_user=root @@ -138,10 +129,14 @@ EOT } resource "null_resource" "run_ansible_prod" { # null_resource is a TYPE does not create anything, just run commands + triggers = { + manager_ips = join(",", digitalocean_droplet.manager_prod[*].ipv4_address) + db_ip = digitalocean_droplet.db_prod.ipv4_address + } + depends_on = [ digitalocean_droplet.db_prod, - digitalocean_droplet.manager1_prod, - digitalocean_droplet.manager2_prod, + digitalocean_droplet.manager_prod, local_file.ansible_inventory ] @@ -191,8 +186,7 @@ resource "null_resource" "run_ansible_prod" { # null_resource is a TYPE does not resource "null_resource" "get_private_ips" { depends_on = [ digitalocean_droplet.db_prod, - digitalocean_droplet.manager1_prod, - digitalocean_droplet.manager2_prod + digitalocean_droplet.manager_prod ] } @@ -204,8 +198,9 @@ DB_ADDR=${digitalocean_droplet.db_prod.ipv4_address_private} DOMAIN=runtimetwiterror.dev -MANAGER1_IP=${digitalocean_droplet.manager1_prod.ipv4_address} -MANAGER2_IP=${digitalocean_droplet.manager2_prod.ipv4_address} +%{ for i, droplet in digitalocean_droplet.manager_prod ~} +MANAGER${i + 1}_IP=${droplet.ipv4_address} +%{ endfor ~} PROM_URL=https://runtimetwiterror.dev/prometheus GRAFANA_URL=https://runtimetwiterror.dev/grafana/ @@ -214,7 +209,6 @@ EOT depends_on = [ digitalocean_droplet.db_prod, - digitalocean_droplet.manager1_prod, - digitalocean_droplet.manager2_prod + digitalocean_droplet.manager_prod ] } \ No newline at end of file diff --git a/terraform/production/variables.tf b/terraform/production/variables.tf index 1fd8532..813731e 100644 --- a/terraform/production/variables.tf +++ b/terraform/production/variables.tf @@ -10,3 +10,14 @@ variable "ssh_key_name" { description = "Name of the SSH key in DigitalOcean" type = string } + +variable "manager_count" { + description = "Number of additional manager nodes (db_prod is always the swarm leader, so total managers = manager_count + 1, which must be odd)" + type = number + default = 2 + + validation { + condition = var.manager_count % 2 == 0 && var.manager_count >= 2 + error_message = "manager_count must be an even number >= 2 (e.g. 2, 4, 6) so that total managers including db_prod is odd." + } +} diff --git a/terraform/stage/main.tf b/terraform/stage/main.tf index e939da4..47e0319 100644 --- a/terraform/stage/main.tf +++ b/terraform/stage/main.tf @@ -29,21 +29,11 @@ resource "digitalocean_vpc" "minitwit_vpc" { ip_range = "10.10.20.0/24" } -# --- Manager 1 --- -resource "digitalocean_droplet" "manager1_stage" { +# --- Managers (count-controlled) --- +resource "digitalocean_droplet" "manager_stage" { + count = var.manager_count image = "ubuntu-22-04-x64" - name = "manager1-stage" - region = "fra1" - size = "s-1vcpu-1gb" - ssh_keys = [data.digitalocean_ssh_key.my_ssh_key.id] - vpc_uuid = digitalocean_vpc.minitwit_vpc.id - tags = [digitalocean_tag.minitwit_stage.id] -} - -# --- Manager 2 --- -resource "digitalocean_droplet" "manager2_stage" { - image = "ubuntu-22-04-x64" - name = "manager2-stage" + name = "manager${count.index + 1}-stage" region = "fra1" size = "s-1vcpu-1gb" ssh_keys = [data.digitalocean_ssh_key.my_ssh_key.id] @@ -128,8 +118,9 @@ resource "local_file" "ansible_inventory" { db-stage ansible_host=${digitalocean_droplet.db_stage.ipv4_address} [swarm_managers] -manager1-stage ansible_host=${digitalocean_droplet.manager1_stage.ipv4_address} -manager2-stage ansible_host=${digitalocean_droplet.manager2_stage.ipv4_address} +%{ for droplet in digitalocean_droplet.manager_stage ~} +${droplet.name} ansible_host=${droplet.ipv4_address} +%{ endfor ~} [all:vars] ansible_user=root @@ -138,10 +129,14 @@ EOT } resource "null_resource" "run_ansible_stage" { # null_resource is a TYPE does not create anything, just run commands + triggers = { + manager_ips = join(",", digitalocean_droplet.manager_stage[*].ipv4_address) + db_ip = digitalocean_droplet.db_stage.ipv4_address + } + depends_on = [ digitalocean_droplet.db_stage, - digitalocean_droplet.manager1_stage, - digitalocean_droplet.manager2_stage, + digitalocean_droplet.manager_stage, local_file.ansible_inventory ] @@ -191,8 +186,7 @@ resource "null_resource" "run_ansible_stage" { # null_resource is a TYPE does no resource "null_resource" "get_private_ips" { depends_on = [ digitalocean_droplet.db_stage, - digitalocean_droplet.manager1_stage, - digitalocean_droplet.manager2_stage + digitalocean_droplet.manager_stage ] } @@ -202,21 +196,21 @@ resource "local_file" "env_file" { DOCKER_IMAGE=runtimeerroritu/minitwit:latest DB_ADDR=${digitalocean_droplet.db_stage.ipv4_address_private} -# Use nip.io to create a magic domain for the staging environment using the Public IP -DOMAIN=${digitalocean_droplet.manager1_stage.ipv4_address}.nip.io +# Use nip.io to create a magic domain for the staging environment using the Public IP of manager1 +DOMAIN=${digitalocean_droplet.manager_stage[0].ipv4_address}.nip.io -MANAGER1_IP=${digitalocean_droplet.manager1_stage.ipv4_address} -MANAGER2_IP=${digitalocean_droplet.manager2_stage.ipv4_address} +%{ for i, droplet in digitalocean_droplet.manager_stage ~} +MANAGER${i + 1}_IP=${droplet.ipv4_address} +%{ endfor ~} # Update URLs to use the new nip.io domain -PROM_URL=http://${digitalocean_droplet.manager1_stage.ipv4_address}.nip.io/prometheus -GRAFANA_URL=http://${digitalocean_droplet.manager1_stage.ipv4_address}.nip.io/grafana/ +PROM_URL=http://${digitalocean_droplet.manager_stage[0].ipv4_address}.nip.io/prometheus +GRAFANA_URL=http://${digitalocean_droplet.manager_stage[0].ipv4_address}.nip.io/grafana/ EOT filename = "../../.env.stage" depends_on = [ digitalocean_droplet.db_stage, - digitalocean_droplet.manager1_stage, - digitalocean_droplet.manager2_stage + digitalocean_droplet.manager_stage ] } diff --git a/terraform/stage/variables.tf b/terraform/stage/variables.tf index 1182e78..20884cb 100644 --- a/terraform/stage/variables.tf +++ b/terraform/stage/variables.tf @@ -9,4 +9,15 @@ variable "do_token" { variable "ssh_key_name" { description = "Name of the SSH key in DigitalOcean" type = string +} + +variable "manager_count" { + description = "Number of additional manager nodes (db_stage is always the swarm leader, so total managers = manager_count + 1, which must be odd)" + type = number + default = 2 + + validation { + condition = var.manager_count % 2 == 0 && var.manager_count >= 2 + error_message = "manager_count must be an even number >= 2 (e.g. 2, 4, 6) so that total managers including db_stage is odd." + } } \ No newline at end of file From bdb17890709b522b664b7667788d0360172213f7 Mon Sep 17 00:00:00 2001 From: Aiting Lee Date: Sat, 9 May 2026 15:31:23 +0200 Subject: [PATCH 2/2] doc: add systems report sections and charts --- report/main.md | 328 ++++++++++++++++++- report/systems/perspective.md | 9 +- report/systems/sections/analysis.md | 3 + report/systems/sections/architecture.md | 22 ++ report/systems/sections/charts/c&c.md | 98 ++++++ report/systems/sections/charts/deployment.md | 108 ++++++ report/systems/sections/charts/flow_chart.md | 35 ++ report/systems/sections/charts/module.md | 58 ++++ report/systems/sections/stack.md | 3 + 9 files changed, 647 insertions(+), 17 deletions(-) create mode 100644 report/systems/sections/analysis.md create mode 100644 report/systems/sections/architecture.md create mode 100644 report/systems/sections/charts/c&c.md create mode 100644 report/systems/sections/charts/deployment.md create mode 100644 report/systems/sections/charts/flow_chart.md create mode 100644 report/systems/sections/charts/module.md create mode 100644 report/systems/sections/stack.md diff --git a/report/main.md b/report/main.md index 3770cb3..758606d 100644 --- a/report/main.md +++ b/report/main.md @@ -34,10 +34,323 @@ A description and illustration of the: + + + ## Design and architecture +### Module Viewpoint +```mermaid +flowchart TB +%% ========================================== +%% Define Folders using Subgraphs with invisible nodes +%% ========================================== + +subgraph PkgMain ["Main"] +N_Main[" "] +end + +subgraph CoreApplication ["Core Application"] +direction TB +User[User] +Follower[Follower] +Message[Message] +ApplicationState[Application State] + +%% Internal dependencies +User --> Follower +User --> Message +end + +subgraph PkgGin ["Gin"] +N_Gin[" "] +end + +subgraph PkgGorm ["Gorm"] +N_Gorm[" "] +end + +subgraph PkgPrometheus ["Prometheus"] +N_Prom[" "] +end + +%% ========================================== +%% Dependencies +%% ========================================== + +%% Main entry point triggers User logic +PkgMain --> User + +%% Frameworks depending on Core Application (Clean Architecture inward flow) +PkgGin --> CoreApplication +PkgGorm --> CoreApplication +PkgPrometheus --> CoreApplication + +%% ========================================== +%% FOLDER HACK: Make inner nodes completely invisible +%% ========================================== +style N_Main fill:none,stroke:none,color:transparent +style N_Gin fill:none,stroke:none,color:transparent +style N_Gorm fill:none,stroke:none,color:transparent +style N_Prom fill:none,stroke:none,color:transparent + +%% Style the subgraphs to look more like solid packages +classDef packageStyle fill:#f8f9fa,stroke:#adb5bd,stroke-width:2px,color:#212529; +class PkgMain,PkgGin,PkgGorm,PkgPrometheus,CoreApplication packageStyle; +``` + + + +### Component and Connector Viewpoint +```mermaid +flowchart LR + Client((Client)) + LetsEncrypt(("Let's Encrypt\n(External CA)")) + + subgraph TraefikIngress ["Traefik Ingress"] + Proxy["Traefik Reverse Proxy\n(TLS Termination & Routing)"] + end + + subgraph AppNet ["App (app-net)"] + App_Web["Minitwit Web Service\n(replicas: 3)"] + end + + subgraph VPCInfra ["VPC Infrastructure (Non-Swarm)"] + App_DB[("PostgreSQL Database\n(Standalone Compose)")] + end + + subgraph Monitoring ["Monitoring and Logging (app-net)"] + direction TB + App_Grafana["Grafana"] + App_Prometheus["Prometheus"] + App_Loki["Loki"] + + Agent_Promtail["Promtail (Global)"] + Agent_NodeExp["Node Exporter (Global)"] + end + +%% External Traffic & TLS + Client -->|"HTTPS [TCP: 443]"| Proxy + Client -.->|"HTTP [TCP: 80]\n(Redirect)"| Proxy + Proxy <-->|"ACME Protocol\n(Auto Cert Renewal)"| LetsEncrypt + +%% Traefik Routing + Proxy ==>|"HTTP [TCP: 5001]\nLoad Balanced"| App_Web + Proxy -->|"HTTP [TCP: 3000]\nPathPrefix(`/grafana`)"| App_Grafana + +%% Database Connection (Leaving Overlay, entering VPC) + App_Web ==>|"PostgreSQL\n[TCP: 5432]"| App_DB + +%% Monitoring Data Flow (Grafana Querying) + App_Grafana -.->|"HTTP [TCP: 9090]\nQuery Metric"| App_Prometheus + App_Grafana -.->|"HTTP [TCP: 3100]\nQuery Log"| App_Loki + +%% Monitoring Data Flow (Prometheus Scraping) + App_Prometheus -.->|"HTTP [TCP: 5001]"| App_Web + App_Prometheus -.->|"HTTP [TCP: 9100]"| Agent_NodeExp + +%% Monitoring Data Flow (Promtail Pushing) + Agent_Promtail -.->|"HTTP [TCP: 3100]\nPush Logs"| App_Loki + +%% Styles + classDef proxy fill:#ffe0b2,stroke:#f57c00,color:#000000,stroke-width:2px; + classDef app fill:#c8e6c9,stroke:#388e3c,color:#000000,stroke-width:2px; + classDef monitor fill:#e1bee7,stroke:#8e24aa,color:#000000,stroke-width:2px; + classDef agent fill:#cfd8dc,stroke:#455a64,color:#000000,stroke-width:2px; + classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; + classDef ext fill:#eceff1,stroke:#607d8b,color:#000000,stroke-dasharray: 5 5; + + class Proxy proxy; + class App_Web app; + class App_Grafana,App_Prometheus,App_Loki monitor; + class Agent_Promtail,Agent_NodeExp agent; + class App_DB db; + class LetsEncrypt ext; +``` + +```mermaid +flowchart TB + +%% Line definitions +L1(A) ==>|"Thick Line:\n Business Data Flow"| L2(B) +L3(C) -->|"Normal Line:\n Web Traffic Routing"| L4(D) +L5(E) -.->|"Dashed Line:\n Monitoring / Logging "| L6(F) + +%% Shape and Component Style definitions linked with invisible lines for vertical alignment +L_Proxy[Traefik Proxy Role] +L_Proxy ~~~ L_App[Application Web Role] +L_Monitor[Monitoring Stack Role] +L_Monitor ~~~ L_Agent[Global Agent Role] +L_DB[(Database Role)] +L_DB ~~~ L_Ext((External Entity)) + +%% Duplicated Style Definitions matching the main diagram +classDef proxy fill:#ffe0b2,stroke:#f57c00,color:#000000,stroke-width:2px; +classDef app fill:#c8e6c9,stroke:#388e3c,color:#000000,stroke-width:2px; +classDef monitor fill:#e1bee7,stroke:#8e24aa,color:#000000,stroke-width:2px; +classDef agent fill:#cfd8dc,stroke:#455a64,color:#000000,stroke-width:2px; +classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; +classDef ext fill:#eceff1,stroke:#607d8b,color:#000000,stroke-dasharray: 5 5; + +%% Binding styles +class L_Proxy proxy; +class L_App app; +class L_Monitor monitor; +class L_Agent agent; +class L_DB db; +class L_Ext ext; +``` + +### Allocation Viewpoint + +#### Deployment View +### Minitwit Deployment Infrastructure (VPC Private Network) + +```mermaid +flowchart LR +Internet(("Internet\n(HTTPS Traffic)")) +PostgresDB[("PostgresDB\n(Standalone)")] + +Overlay(["UDP 4789 (VXLAN Overlay)"]) +WebTraffic(["TCP 80/443 (Web Traffic)"]) +SSH(["TCP 22 (SSH Remote)"]) ~~~ +MgmtBus(["TCP 2377 (Mgmt)
TCP/UDP 7946 (Gossip)"]) +CnDB(["TCP 5432 (Connect to DB)"]) + +subgraph SwarmCluster ["Swarm Cluster (VPC)"] + direction LR + + subgraph Node1 ["Manager 1"] + direction LR + T1[Traefik] ~~~ P1[Promtail] ~~~ NE1[Node Exporter] + W1[APP] ~~~ W2[APP] + end + + subgraph Node2 ["Manager 2"] + direction TB + P2[Promtail] ~~~ NE2[Node Exporter] ~~~ W3[APP] + end + + subgraph Node3 ["DB/Monitoring"] + direction TB + Lok[Loki] + P3[Promtail] + Graf[Grafana] + Prom[Prometheus] + NE3[Node Exporter] + end +end + +%% Cluster Internal Communication +Node1 <==> MgmtBus +Node2 <==> MgmtBus +Node3 <==> MgmtBus + +%% External Entry Points (Routing through Firewall) +Internet ==> WebTraffic +WebTraffic ==> T1 + +Internet -.-> SSH +SSH -.-> SwarmCluster + +%% Overlay Networking (Inter-node Traffic) +T1 ==> Overlay +Overlay ==> W1 +Overlay ==> W2 +Overlay ==> W3 + +%% Database Access Path +W1 -.-> CnDB +W2 -.-> CnDB +W3 -.-> CnDB +CnDB -.-> PostgresDB + + + + + +%% Styles +classDef ingress fill:#e1f5fe,stroke:#0288d1,color:#000000; +classDef monitor fill:#f3e5f5,stroke:#7b1fa2,color:#000000; +classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; +classDef bus fill:#fafafa,stroke:#616161,color:#424242,stroke-width:1px,stroke-dasharray: 5 5; + +class Node1,Node2 ingress; +class Node3 monitor; +class PostgresDB db; +class MgmtBus,CnDB,SSH,Overlay,WebTraffic bus; + +``` + +#### Graph Key & Legend + +```mermaid +%% Deployment Graph Key & Legend +flowchart TB + +%% Line definitions +L1(A) ==>|"Thick Line:\n User Traffic"| L2(C) +L3(B) -.->|"Dashed Line:\n Management\n / DB Traffic"| L4(D) + +%% Shape and Style definitions (Removed quotes inside brackets to fix parse error) +L_DB[(Database Storage)] +L_FW([Security / Firewall Rule]) +L_Ingress[Application / Ingress Nodes] +L_Monitor[DB / Monitoring Nodes] + +%% Duplicated Style Definitions +classDef ingress fill:#e1f5fe,stroke:#0288d1,color:#000000; +classDef monitor fill:#f3e5f5,stroke:#7b1fa2,color:#000000; +classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; +classDef bus fill:#fafafa,stroke:#616161,color:#424242,stroke-width:1px,stroke-dasharray: 5 5; + + +%% Binding styles +class L_Ingress ingress; +class L_Monitor monitor; +class L_DB db; +class L_FW bus; + +``` +#### One Click Deployment Flow Chart +```mermaid + +sequenceDiagram +%% Define participants + participant Terraform + participant DigitalOcean + participant .ini + participant .env + participant Ansible + participant VirtualMachines + +%% Trigger Init/Apply + Note left of Terraform: Terraform Init Apply + activate Terraform + +%% Terraform creates infrastructure on Digital Ocean + Terraform->>DigitalOcean: Create Virtual Machines + Terraform->>DigitalOcean: Create Firewalls + +%% Terraform writes local files + Terraform->>.ini: Generate Ansible Inventory file + Terraform->>.env: Generate Env File + +%% Terraform triggers Ansible Playbook + Terraform->>Ansible: Run Ansible Playbook + deactivate Terraform + +%% Ansible sets up the VMs + activate Ansible + Ansible->>.ini: Read Inventory file + Ansible->>VirtualMachines: Setup Docker Swarm Cluster + Ansible->>.env: Read Environment Variables + Ansible->>VirtualMachines: Run Docker Compose DB And Stack Yaml + deactivate Ansible +``` + ## Dependencies and technology stack @@ -78,20 +391,13 @@ In particular, the following descriptions should be included: ## Availability and scaling -Currently our Minitwit service runs on a 3-node Docker Swarm in DigitalOcean. Two manager nodes run 3 replicas of the Minitwit app, while the third node runs the database and our monitoring system. - -We only have vertical scaling as an option for the database through upgrading the VM it is running on with more RAM and/or more CPU. The application can be scaled vertically like the database, and horizontally by deploying more instances of the application on one or more droplets. The configuration of how the system scales takes place in three systems: - -1. **Terraform** - - In Terraform the infrastructure of the system is defined in the form of "resources", which are the definitions of the VMs (Droplets) that are to be present. Each resource gets assigned a group in the Ansible inventory, such that Ansible knows the role of each machine at its disposal. +Our Minitwit service runs on a 3-node Docker Swarm in DigitalOcean. Two manager nodes run 3 replicas of the Minitwit app, while the third node runs the database and monitoring stack. Node roles are defined via Terraform resource groups, which Ansible uses to apply Docker Swarm placement labels during provisioning. Services in `docker-stack.yml` are constrained to nodes with matching labels, and Swarm automatically reschedules replicas if a node goes down. -2. **Ansible** - - Ansible runs the provisioning scripts when setting up a new VM. Based on the inventory and what group each resource is assigned to, Ansible will run the necessary commands to set up the VM such that it has the right resources (binaries, config files, etc.) and that it is assigned the correct role in the Docker network. +The database can only be scaled vertically (larger VM). The application supports horizontal scaling by adding droplets to the Terraform configuration and assigning them the ingress role. -3. **Docker Swarm** - - The swarm is defined in `docker-stack.yml`. Each service is constrained to only run on nodes that have a matching role assigned by Ansible during provisioning. The stack also defines the number of replicas that should be present, and Docker will then automatically make sure that the replicas are distributed among the nodes that are available with a matching role on the Docker network. If an instance crashes or goes down, Docker will automatically spin up another instance on one of the nodes. +When deploying a new version, Swarm performs a rolling update: each new replica starts before the old one stops (`order: start-first`), keeping at least two instances available throughout. If the new container fails to start, Swarm automatically rolls back (`failure_action: rollback`). Silent failures — where the container starts but behaves incorrectly — are not caught automatically; the CI/CD test suite is the primary guard here. -When deploying a new version of the application, Docker Swarm performs a rolling update to keep the service available throughout the process. For each replica, the new container is started *before* the old one is stopped (`order: start-first`), meaning at least two healthy replicas remain available while each individual replica is being updated. If the new container fails to start, Docker automatically rolls back to the previous version (`failure_action: rollback`). This means a bad patch that causes the container to crash on startup is automatically reverted without manual intervention. However, if the new version starts successfully but behaves incorrectly (e.g. returns errors or has broken logic), no automatic rollback occurs — the CI/CD test suite is the primary guard against this scenario. +**Known limits:** The database is a single point of failure with no replication or automated backups. Traefik runs as a single replica, so if its host node fails, ingress is lost until Swarm reschedules it. The app containers have no health checks beyond TCP port availability, so a broken-but-running instance will continue receiving traffic. diff --git a/report/systems/perspective.md b/report/systems/perspective.md index c2d3b4a..bfd986a 100644 --- a/report/systems/perspective.md +++ b/report/systems/perspective.md @@ -7,14 +7,11 @@ A description and illustration of the: -## Design and architecture - -## Dependencies and technology stack - +@include sections/architecture.md -## Static analysis and quality +@include sections/stack.md - +@include sections/analysis.md \ No newline at end of file diff --git a/report/systems/sections/analysis.md b/report/systems/sections/analysis.md new file mode 100644 index 0000000..d81a40d --- /dev/null +++ b/report/systems/sections/analysis.md @@ -0,0 +1,3 @@ +## Static analysis and quality + + diff --git a/report/systems/sections/architecture.md b/report/systems/sections/architecture.md new file mode 100644 index 0000000..052f036 --- /dev/null +++ b/report/systems/sections/architecture.md @@ -0,0 +1,22 @@ +## Design and architecture + + + +### Module Viewpoint +@include charts/module.md + + + + +### Component and Connector Viewpoint +@include charts/c&c.md + + +### Allocation Viewpoint + +#### Deployment View +@include charts/deployment.md + +#### One Click Deployment Flow Chart +@include charts/flow_chart.md + diff --git a/report/systems/sections/charts/c&c.md b/report/systems/sections/charts/c&c.md new file mode 100644 index 0000000..e419327 --- /dev/null +++ b/report/systems/sections/charts/c&c.md @@ -0,0 +1,98 @@ +```mermaid +flowchart LR + Client((Client)) + LetsEncrypt(("Let's Encrypt\n(External CA)")) + + subgraph TraefikIngress ["Traefik Ingress"] + Proxy["Traefik Reverse Proxy\n(TLS Termination & Routing)"] + end + + subgraph AppNet ["App (app-net)"] + App_Web["Minitwit Web Service\n(replicas: 3)"] + end + + subgraph VPCInfra ["VPC Infrastructure (Non-Swarm)"] + App_DB[("PostgreSQL Database\n(Standalone Compose)")] + end + + subgraph Monitoring ["Monitoring and Logging (app-net)"] + direction TB + App_Grafana["Grafana"] + App_Prometheus["Prometheus"] + App_Loki["Loki"] + + Agent_Promtail["Promtail (Global)"] + Agent_NodeExp["Node Exporter (Global)"] + end + +%% External Traffic & TLS + Client -->|"HTTPS [TCP: 443]"| Proxy + Client -.->|"HTTP [TCP: 80]\n(Redirect)"| Proxy + Proxy <-->|"ACME Protocol\n(Auto Cert Renewal)"| LetsEncrypt + +%% Traefik Routing + Proxy ==>|"HTTP [TCP: 5001]\nLoad Balanced"| App_Web + Proxy -->|"HTTP [TCP: 3000]\nPathPrefix(`/grafana`)"| App_Grafana + +%% Database Connection (Leaving Overlay, entering VPC) + App_Web ==>|"PostgreSQL\n[TCP: 5432]"| App_DB + +%% Monitoring Data Flow (Grafana Querying) + App_Grafana -.->|"HTTP [TCP: 9090]\nQuery Metric"| App_Prometheus + App_Grafana -.->|"HTTP [TCP: 3100]\nQuery Log"| App_Loki + +%% Monitoring Data Flow (Prometheus Scraping) + App_Prometheus -.->|"HTTP [TCP: 5001]"| App_Web + App_Prometheus -.->|"HTTP [TCP: 9100]"| Agent_NodeExp + +%% Monitoring Data Flow (Promtail Pushing) + Agent_Promtail -.->|"HTTP [TCP: 3100]\nPush Logs"| App_Loki + +%% Styles + classDef proxy fill:#ffe0b2,stroke:#f57c00,color:#000000,stroke-width:2px; + classDef app fill:#c8e6c9,stroke:#388e3c,color:#000000,stroke-width:2px; + classDef monitor fill:#e1bee7,stroke:#8e24aa,color:#000000,stroke-width:2px; + classDef agent fill:#cfd8dc,stroke:#455a64,color:#000000,stroke-width:2px; + classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; + classDef ext fill:#eceff1,stroke:#607d8b,color:#000000,stroke-dasharray: 5 5; + + class Proxy proxy; + class App_Web app; + class App_Grafana,App_Prometheus,App_Loki monitor; + class Agent_Promtail,Agent_NodeExp agent; + class App_DB db; + class LetsEncrypt ext; +``` + +```mermaid +flowchart TB + +%% Line definitions +L1(A) ==>|"Thick Line:\n Business Data Flow"| L2(B) +L3(C) -->|"Normal Line:\n Web Traffic Routing"| L4(D) +L5(E) -.->|"Dashed Line:\n Monitoring / Logging "| L6(F) + +%% Shape and Component Style definitions linked with invisible lines for vertical alignment +L_Proxy[Traefik Proxy Role] +L_Proxy ~~~ L_App[Application Web Role] +L_Monitor[Monitoring Stack Role] +L_Monitor ~~~ L_Agent[Global Agent Role] +L_DB[(Database Role)] +L_DB ~~~ L_Ext((External Entity)) + +%% Duplicated Style Definitions matching the main diagram +classDef proxy fill:#ffe0b2,stroke:#f57c00,color:#000000,stroke-width:2px; +classDef app fill:#c8e6c9,stroke:#388e3c,color:#000000,stroke-width:2px; +classDef monitor fill:#e1bee7,stroke:#8e24aa,color:#000000,stroke-width:2px; +classDef agent fill:#cfd8dc,stroke:#455a64,color:#000000,stroke-width:2px; +classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; +classDef ext fill:#eceff1,stroke:#607d8b,color:#000000,stroke-dasharray: 5 5; + +%% Binding styles +class L_Proxy proxy; +class L_App app; +class L_Monitor monitor; +class L_Agent agent; +class L_DB db; +class L_Ext ext; +``` \ No newline at end of file diff --git a/report/systems/sections/charts/deployment.md b/report/systems/sections/charts/deployment.md new file mode 100644 index 0000000..cc2d092 --- /dev/null +++ b/report/systems/sections/charts/deployment.md @@ -0,0 +1,108 @@ +### Minitwit Deployment Infrastructure (VPC Private Network) + +```mermaid +flowchart LR +Internet(("Internet\n(HTTPS Traffic)")) +PostgresDB[("PostgresDB\n(Standalone)")] + +Overlay(["UDP 4789 (VXLAN Overlay)"]) +WebTraffic(["TCP 80/443 (Web Traffic)"]) +SSH(["TCP 22 (SSH Remote)"]) ~~~ +MgmtBus(["TCP 2377 (Mgmt)
TCP/UDP 7946 (Gossip)"]) +CnDB(["TCP 5432 (Connect to DB)"]) + +subgraph SwarmCluster ["Swarm Cluster (VPC)"] + direction LR + + subgraph Node1 ["Manager 1"] + direction LR + T1[Traefik] ~~~ P1[Promtail] ~~~ NE1[Node Exporter] + W1[APP] ~~~ W2[APP] + end + + subgraph Node2 ["Manager 2"] + direction TB + P2[Promtail] ~~~ NE2[Node Exporter] ~~~ W3[APP] + end + + subgraph Node3 ["DB/Monitoring"] + direction TB + Lok[Loki] + P3[Promtail] + Graf[Grafana] + Prom[Prometheus] + NE3[Node Exporter] + end +end + +%% Cluster Internal Communication +Node1 <==> MgmtBus +Node2 <==> MgmtBus +Node3 <==> MgmtBus + +%% External Entry Points (Routing through Firewall) +Internet ==> WebTraffic +WebTraffic ==> T1 + +Internet -.-> SSH +SSH -.-> SwarmCluster + +%% Overlay Networking (Inter-node Traffic) +T1 ==> Overlay +Overlay ==> W1 +Overlay ==> W2 +Overlay ==> W3 + +%% Database Access Path +W1 -.-> CnDB +W2 -.-> CnDB +W3 -.-> CnDB +CnDB -.-> PostgresDB + + + + + +%% Styles +classDef ingress fill:#e1f5fe,stroke:#0288d1,color:#000000; +classDef monitor fill:#f3e5f5,stroke:#7b1fa2,color:#000000; +classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; +classDef bus fill:#fafafa,stroke:#616161,color:#424242,stroke-width:1px,stroke-dasharray: 5 5; + +class Node1,Node2 ingress; +class Node3 monitor; +class PostgresDB db; +class MgmtBus,CnDB,SSH,Overlay,WebTraffic bus; + +``` + +#### Graph Key & Legend + +```mermaid +%% Deployment Graph Key & Legend +flowchart TB + +%% Line definitions +L1(A) ==>|"Thick Line:\n User Traffic"| L2(C) +L3(B) -.->|"Dashed Line:\n Management\n / DB Traffic"| L4(D) + +%% Shape and Style definitions (Removed quotes inside brackets to fix parse error) +L_DB[(Database Storage)] +L_FW([Security / Firewall Rule]) +L_Ingress[Application / Ingress Nodes] +L_Monitor[DB / Monitoring Nodes] + +%% Duplicated Style Definitions +classDef ingress fill:#e1f5fe,stroke:#0288d1,color:#000000; +classDef monitor fill:#f3e5f5,stroke:#7b1fa2,color:#000000; +classDef db fill:#bbdefb,stroke:#1976d2,color:#000000,stroke-width:2px; +classDef bus fill:#fafafa,stroke:#616161,color:#424242,stroke-width:1px,stroke-dasharray: 5 5; + + +%% Binding styles +class L_Ingress ingress; +class L_Monitor monitor; +class L_DB db; +class L_FW bus; + +``` \ No newline at end of file diff --git a/report/systems/sections/charts/flow_chart.md b/report/systems/sections/charts/flow_chart.md new file mode 100644 index 0000000..04a9c94 --- /dev/null +++ b/report/systems/sections/charts/flow_chart.md @@ -0,0 +1,35 @@ +```mermaid + +sequenceDiagram +%% Define participants + participant Terraform + participant DigitalOcean + participant .ini + participant .env + participant Ansible + participant VirtualMachines + +%% Trigger Init/Apply + Note left of Terraform: Terraform Init Apply + activate Terraform + +%% Terraform creates infrastructure on Digital Ocean + Terraform->>DigitalOcean: Create Virtual Machines + Terraform->>DigitalOcean: Create Firewalls + +%% Terraform writes local files + Terraform->>.ini: Generate Ansible Inventory file + Terraform->>.env: Generate Env File + +%% Terraform triggers Ansible Playbook + Terraform->>Ansible: Run Ansible Playbook + deactivate Terraform + +%% Ansible sets up the VMs + activate Ansible + Ansible->>.ini: Read Inventory file + Ansible->>VirtualMachines: Setup Docker Swarm Cluster + Ansible->>.env: Read Environment Variables + Ansible->>VirtualMachines: Run Docker Compose DB And Stack Yaml + deactivate Ansible +``` \ No newline at end of file diff --git a/report/systems/sections/charts/module.md b/report/systems/sections/charts/module.md new file mode 100644 index 0000000..20642c5 --- /dev/null +++ b/report/systems/sections/charts/module.md @@ -0,0 +1,58 @@ +```mermaid +flowchart TB +%% ========================================== +%% Define Folders using Subgraphs with invisible nodes +%% ========================================== + +subgraph PkgMain ["Main"] +N_Main[" "] +end + +subgraph CoreApplication ["Core Application"] +direction TB +User[User] +Follower[Follower] +Message[Message] +ApplicationState[Application State] + +%% Internal dependencies +User --> Follower +User --> Message +end + +subgraph PkgGin ["Gin"] +N_Gin[" "] +end + +subgraph PkgGorm ["Gorm"] +N_Gorm[" "] +end + +subgraph PkgPrometheus ["Prometheus"] +N_Prom[" "] +end + +%% ========================================== +%% Dependencies +%% ========================================== + +%% Main entry point triggers User logic +PkgMain --> User + +%% Frameworks depending on Core Application (Clean Architecture inward flow) +PkgGin --> CoreApplication +PkgGorm --> CoreApplication +PkgPrometheus --> CoreApplication + +%% ========================================== +%% FOLDER HACK: Make inner nodes completely invisible +%% ========================================== +style N_Main fill:none,stroke:none,color:transparent +style N_Gin fill:none,stroke:none,color:transparent +style N_Gorm fill:none,stroke:none,color:transparent +style N_Prom fill:none,stroke:none,color:transparent + +%% Style the subgraphs to look more like solid packages +classDef packageStyle fill:#f8f9fa,stroke:#adb5bd,stroke-width:2px,color:#212529; +class PkgMain,PkgGin,PkgGorm,PkgPrometheus,CoreApplication packageStyle; +``` \ No newline at end of file diff --git a/report/systems/sections/stack.md b/report/systems/sections/stack.md new file mode 100644 index 0000000..1e14422 --- /dev/null +++ b/report/systems/sections/stack.md @@ -0,0 +1,3 @@ +## Dependencies and technology stack + +