From cd6d75d196b407311bd63eeacda10c8877bb83ba Mon Sep 17 00:00:00 2001
From: PeterBolha <xbolhap@gmail.com>
Date: Mon, 27 May 2024 11:20:47 +0200
Subject: [PATCH] fix(heuristics): heuristics page fixes

---
 README.md                                     |   6 +-
 config_templates/perun.proxygui.yaml          |   4 +-
 perun/proxygui/api/heuristic_api.py           | 110 +++++++++++++-----
 .../proxygui/gui/templates/HeuristicData.html |   4 +-
 perun/proxygui/user_manager.py                |   8 +-
 .../AuthEventLoggingDbModels.py               |  18 ++-
 6 files changed, 100 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 6eda8a16..ae41a069 100644
--- a/README.md
+++ b/README.md
@@ -136,9 +136,9 @@ Provides information about user authentication events gathered by the AuthEventL
 
 **Method:** `GET`
 
-**Description:** Used for showing gathered information about past athentications of user, and showing statistics based on that data.
+**Description:** Used for showing gathered information about past authentications of user, and showing statistics based on that data.
 
-**Performed MFA:** Gathered logs are checked if MFA was performed while handeling original logging event. Upstream ACRs values are compared to two hardcoded values: `https://refeds.org/profile/mfa` and `http://schemas.microsoft.com/claims/multipleauthn`
+**Performed MFA:** Gathered logs are checked if MFA was performed while handling the original logging event. Upstream ACRs values are compared to two hardcoded values: `https://refeds.org/profile/mfa` and `http://schemas.microsoft.com/claims/multipleauthn`. Database log for local MFA are checked apart from the upstream ACRs.
 
 **Input arguments:** ID of searched user
 
@@ -206,7 +206,7 @@ class delete_consent_schema(marshmallow.Schema):
 
 - **Response / redirect / abort** - in case of these responses, scheme in response decorator can be custom (it is ignored when creating endpoint response)
 
-- **String** - redo to JSON with already created schema `string_schema` with only atribute `_text`. Then in response handeling add additional `json.loads()` wrapping function
+- **String** - redo to JSON with already created schema `string_schema` with only atribute `_text`. Then in response handling add additional `json.loads()` wrapping function
 
 ```python
 return jsonify({"_text": "Original String text"})
diff --git a/config_templates/perun.proxygui.yaml b/config_templates/perun.proxygui.yaml
index da3e35a9..c8f3cea3 100755
--- a/config_templates/perun.proxygui.yaml
+++ b/config_templates/perun.proxygui.yaml
@@ -200,9 +200,11 @@ gui:
     few_time_logs: 5 # Number for logs for last connected cities, IPs and service
     some_time_logs: 20 # Number for logs for user agents
     many_time_logs: 100 # Number for logs for time graph
-    perun_user_name_attribute: urn:perun:user:attribute-def:core:displayName # OPTIONAL
     auth_event_logging: # REQUIRED
       logging_db: postgresql+psycopg2://user:password@hostname/database_name
+    perun_user_name_attribute: "urn:perun:user:attribute-def:core:displayName" # OPTIONAL
+    private_ip_segments: # OPTIONAL aliases for NAT segments
+      1.0.0.0/15: "VPN students"
   mfa_reset:
     preferred_mail_attribute: "urn:perun:user:attribute-def:def:preferredMail:" # REQUIRED mail to which MFA reset verification link will be sent
     all_mails_attribute: "urn:perun:user:attribute-def:virt:tcsMails:mu" # OPTIONAL mails where notification about the MFA reset will be sent if configured
diff --git a/perun/proxygui/api/heuristic_api.py b/perun/proxygui/api/heuristic_api.py
index 35e5f3d3..1596353b 100644
--- a/perun/proxygui/api/heuristic_api.py
+++ b/perun/proxygui/api/heuristic_api.py
@@ -1,3 +1,5 @@
+from ipaddress import ip_address, ip_network
+
 from perun.utils.auth_event_loggig.AuthEventLoggingDbModels import (
     AuthEventLoggingTable,
     UserAgentTable,
@@ -19,7 +21,7 @@ class AuthEventLoggingQueries:
     def __init__(self, cfg):
         # Vars for storing arrays with DB responses
         self.auth_result = None  # Auth data, Upstream, Requested and Services
-        self.user_agents = None  # User agents and upstream logs
+        self.raw_user_agents = None  # User agents and upstream logs
         self.time_result = None  # Many AuthEvent logs
         # Nunbers of retrieved rows from DB for various data
         self.few_time_logs = cfg["heuristic_page"]["few_time_logs"]
@@ -50,6 +52,7 @@ class AuthEventLoggingQueries:
             UpstreamAcrsTable.__table__,
             SessionIdTable.__table__,
         ]
+        self.private_ip_segments = cfg["heuristic_page"].get("private_ip_segments", [])
 
     # Modify ACRs value from string to list
     # '["acr1","acr2"]' -> ['acr1', 'acr2']
@@ -60,8 +63,11 @@ class AuthEventLoggingQueries:
             listed_acrs[i] = item.strip(" ").strip('"')
         return listed_acrs
 
-    # Simple function for return type of requested authenticaton
-    def requested_acr_status(self, raw_acr):
+    # Simple function for return type of requested authentication
+    def requested_acr_status(self, raw_acr, local_mfa_performed):
+        if local_mfa_performed:
+            return self.REQUEST_MFA_VALUE["required"]
+
         acr = self.strip_acrs(raw_acr)
         if not acr:
             return self.REQUEST_MFA_VALUE["other"]
@@ -72,7 +78,7 @@ class AuthEventLoggingQueries:
         else:
             return self.REQUEST_MFA_VALUE["other"]
 
-    # Beasic checker if MFA was performad based on upstream_acrs value
+    # Basic checker if MFA was performed based on upstream_acrs value
     def upstream_acr_status(self, acr):
         mfa_status = next((mfa for mfa in self.MFA_CONTEXTS if mfa in acr), None)
         return mfa_status is not None
@@ -89,20 +95,22 @@ class AuthEventLoggingQueries:
             meta_data.create_all(cnxn, self.tables, checkfirst=True)
 
             auth_table = AuthEventLoggingTable().__table__
-            agents_table = UserAgentTable().__table__
+            agents_raw_table = UserAgentRawTable().__table__
             upstream_table = UpstreamAcrsTable().__table__
             requested_table = RequestedAcrsTable().__table__
             services_table = LoggingSpTable().__table__
 
             # Returns last 'self.short_time_logs' logs
             # for specific user, sorted
-            # by decending time joined with upstream ACRs, requested ACRs
+            # by descending time joined with upstream ACRs, requested ACRs
             # and services table
-            query = (
+
+            inner_query = (
                 select(
                     auth_table.c.day.label("day"),
                     auth_table.c.geolocation_city.label("geolocation_city"),
                     auth_table.c.geolocation_country.label("geolocation_country"),
+                    auth_table.c.local_mfa_performed.label("local_mfa_performed"),
                     auth_table.c.ip_address.label("ip_address"),
                     requested_table.c.value.label("requested_value"),
                     upstream_table.c.value.label("upstream_value"),
@@ -119,10 +127,28 @@ class AuthEventLoggingQueries:
                 )
                 .join(services_table, services_table.c.id == auth_table.c.sp_id)
                 .where(auth_table.c.user_id == user_id)
-                .order_by(auth_table.c.day.desc())
+                .distinct(auth_table.c.ip_address)
+            ).alias("inner_query")
+
+            # Inner query allows to select distinct IPs and order by dates at the same time
+            outer_query = (
+                select(
+                    inner_query.c.day,
+                    inner_query.c.geolocation_city,
+                    inner_query.c.geolocation_country,
+                    inner_query.c.local_mfa_performed,
+                    inner_query.c.ip_address,
+                    inner_query.c.requested_value,
+                    inner_query.c.upstream_value,
+                    inner_query.c.name,
+                    inner_query.c.identifier,
+                )
+                .select_from(inner_query)
+                .order_by(inner_query.c.day.desc())
                 .limit(self.few_time_logs)
             )
-            response = cnxn.execute(query).fetchall()
+
+            response = cnxn.execute(outer_query).fetchall()
             self.auth_result = [r._asdict() for r in response]
 
             # Return last 'self.long_time_logs' logs
@@ -140,11 +166,15 @@ class AuthEventLoggingQueries:
             # joined on auth_event_logging table
             query = (
                 select(
-                    agents_table.c.value.label("agent_value"),
+                    agents_raw_table.c.value.label("agent_value"),
                     upstream_table.c.value.label("upstream_value"),
+                    auth_table.c.local_mfa_performed.label("local_mfa_performed"),
                 )
                 .select_from(auth_table)
-                .join(agents_table, agents_table.c.id == auth_table.c.user_agent_id)
+                .join(
+                    agents_raw_table,
+                    agents_raw_table.c.id == auth_table.c.user_agent_id,
+                )
                 .join(
                     upstream_table, upstream_table.c.id == auth_table.c.upstream_acrs_id
                 )
@@ -155,33 +185,34 @@ class AuthEventLoggingQueries:
             response = cnxn.execute(query).fetchall()
             # Returned dictionary:
             # {"agents_value": "val", "upstream_value": "val"}
-            self.user_agents = [r._asdict() for r in response]
+            self.raw_user_agents = [r._asdict() for r in response]
 
     # ----------------- Retrieving methods --------------
-    # Get information about last n cities (city name, timestamp, MFA performaed status)
+    # Get information about last n cities (city name, timestamp, MFA performed status)
     def get_last_n_cities(self):
         if self.auth_result is None:
             return []
 
         cities = []
 
-        # Retrieve relevant data from resluts
+        # Retrieve relevant data from results
         for item in self.auth_result:
-            city = item["geolocation_city"]
-            country = item["geolocation_country"]
+            city = item["geolocation_city"] or "Unknown city"
+            country = item["geolocation_country"] or "Unknown country"
             time = item["day"].strftime("%d. %m. %Y %H:%M")
             value = city + ", " + country + " (" + time + ")"
 
             cities.append(
                 {
                     "value": value,
-                    "mfa": self.upstream_acr_status(item["upstream_value"]),
+                    "mfa": self.upstream_acr_status(item["upstream_value"])
+                    or item["local_mfa_performed"],
                 }
             )
 
         return cities
 
-    # Retrieve inormation about last n IP addresses connected from
+    # Retrieve information about last n IP addresses connected from
     # (IP address, hostname lookup, MFA performed)
     def get_last_n_ips(self):
         if self.auth_result is None:
@@ -192,14 +223,25 @@ class AuthEventLoggingQueries:
             ip = item["ip_address"]
             ip_lookup = getnameinfo((ip, 0), 0)[0]
             ip_string = ip if (ip == ip_lookup) else ip + " (" + ip_lookup + ")"
-            city = item["geolocation_city"]
-            country = item["geolocation_country"]
-            value = ip_string + ", " + city + ", " + country
+
+            private_ip_range_name = ""
+            for private_ip_range, range_name in self.private_ip_segments.items():
+                if ip_address(ip) in ip_network(private_ip_range):
+                    private_ip_range_name = range_name
+                    break
+
+            if private_ip_range_name:
+                value = f"{ip_string}, {private_ip_range_name}"
+            else:
+                city = item["geolocation_city"] or "Unknown city"
+                country = item["geolocation_country"] or "Unknown country"
+                value = f"{ip_string}, {city}, {country}"
 
             ips.append(
                 {
                     "value": value,
-                    "mfa": self.upstream_acr_status(item["upstream_value"]),
+                    "mfa": self.upstream_acr_status(item["upstream_value"])
+                    or item["local_mfa_performed"],
                 }
             )
 
@@ -221,18 +263,21 @@ class AuthEventLoggingQueries:
         ]
         return json.dumps(times)
 
-    # Retieve data of used user agnets - compress same user agent and sort them by
-    # usage, MFA perfomed is True if it was performed at least once on that
+    # Retrieve data of used user agents - compress same user agent and sort them by
+    # usage, MFA performed is True if it was performed at least once on that
     # specific user agent
     def get_unique_user_agents(self):
-        if self.user_agents is None:
+        if self.raw_user_agents is None:
             return []
 
         agents = []
 
-        for item in self.user_agents:
+        for item in self.raw_user_agents:
             # Create default dictionary
-            mfa_performed = self.upstream_acr_status(item["upstream_value"])
+            mfa_performed = (
+                self.upstream_acr_status(item["upstream_value"])
+                or item["local_mfa_performed"]
+            )
             parsed_agent = str(parse(item["agent_value"]))
 
             index = next(
@@ -247,11 +292,13 @@ class AuthEventLoggingQueries:
                         "mfa": mfa_performed,
                     }
                 )
-            else:  # Alredy existing user agent, only actualize data
+            else:  # Already existing user agent, only update the data
                 agents[index]["value"] += 1
                 agents[index]["mfa"] |= mfa_performed
 
-        return sorted(agents, key=lambda d: d["value"], reverse=True)
+        sorted_agents = sorted(agents, key=lambda d: d["value"], reverse=True)
+
+        return sorted_agents
 
     # Retrieve data of used services, their name and identifier
     # Also with type of requested ACRs and upstream ACRs
@@ -262,7 +309,9 @@ class AuthEventLoggingQueries:
         services = []
 
         for item in self.auth_result:
-            requested_acrs = self.requested_acr_status(item["requested_value"])
+            requested_acrs = self.requested_acr_status(
+                item["requested_value"], item["local_mfa_performed"]
+            )
             upstream_acrs = self.upstream_acr_status(item["upstream_value"])
             services.append(
                 {
@@ -270,6 +319,7 @@ class AuthEventLoggingQueries:
                     "identifier": item["identifier"],
                     "requested_acrs": requested_acrs,
                     "upstream_acrs": upstream_acrs,
+                    "local_mfa_performed": item["local_mfa_performed"],
                 }
             )
         return services
diff --git a/perun/proxygui/gui/templates/HeuristicData.html b/perun/proxygui/gui/templates/HeuristicData.html
index 155cc7f6..06541b3f 100644
--- a/perun/proxygui/gui/templates/HeuristicData.html
+++ b/perun/proxygui/gui/templates/HeuristicData.html
@@ -99,7 +99,7 @@
                                     <ul>
                                     {% for service in sps %}
                                         <li>
-                                            {% if service.upstream_acrs %}
+                                            {% if service.upstream_acrs or service.local_mfa_performed %}
                                                 <span class="{% if cfg.css_framework == 'MUNI' %}
                                                 icon icon-user-check green {% else %}
                                                 fa fa-user-check {% endif %} success"
@@ -125,7 +125,7 @@
                         <div class="content">
                             <br/>
                             <h3><span>{{ _("Specify a Perun user ID to gather data:") }}</span></h3>
-                            <form action="{{ url_for('gui.get_heuristic') }}" method="get">
+                            <form action="{{ url_for('gui.heuristics') }}" method="get">
                                 <input type="number" id="user_id" name="user_id" min="1" required placeholder="User ID">
                                 <p class="btn-wrap">
                                     <button class="btn btn-primary btn-s btn-accept"
diff --git a/perun/proxygui/user_manager.py b/perun/proxygui/user_manager.py
index 16747d9a..e155a09f 100644
--- a/perun/proxygui/user_manager.py
+++ b/perun/proxygui/user_manager.py
@@ -36,7 +36,7 @@ class UserManager:
         self._KEYSTORE = USER_MANAGER_CFG["keystore"]
 
         if isinstance(cfg.get("heuristic_page", None), dict):
-            self._NAME_ATTRIBUTE = USER_MANAGER_CFG.get("heuristic_page", {}).get(
+            self._NAME_ATTRIBUTE = cfg.get("heuristic_page", {}).get(
                 "perun_user_name_attribute"
             )
 
@@ -57,8 +57,10 @@ class UserManager:
 
     def extract_user_attribute(self, attr_name: str, user_id: int) -> Any:
         user_attrs = self._ADAPTERS_MANAGER.get_user_attributes(user_id, [attr_name])
-        attr_value_candidates = user_attrs.get(attr_name, [])
-        attr_value = attr_value_candidates[0] if attr_value_candidates else None
+        attr_value_candidates = user_attrs.get(attr_name)
+        attr_value = attr_value_candidates
+        if attr_value_candidates and isinstance(attr_value_candidates, list):
+            attr_value = attr_value_candidates[0]
 
         return attr_value
 
diff --git a/perun/utils/auth_event_loggig/AuthEventLoggingDbModels.py b/perun/utils/auth_event_loggig/AuthEventLoggingDbModels.py
index 8722ab3a..9a4b1fcd 100644
--- a/perun/utils/auth_event_loggig/AuthEventLoggingDbModels.py
+++ b/perun/utils/auth_event_loggig/AuthEventLoggingDbModels.py
@@ -1,9 +1,4 @@
-from sqlalchemy import (
-    Column,
-    String,
-    ForeignKey,
-    Integer,
-)
+from sqlalchemy import Column, String, ForeignKey, Integer, Boolean
 from sqlalchemy.dialects.postgresql import TIMESTAMP
 from sqlalchemy.orm import declarative_base
 
@@ -22,6 +17,7 @@ class AuthEventLoggingTable(Base):
     ip_address = Column(String)
     geolocation_city = Column(String)
     geolocation_country = Column(String)
+    local_mfa_performed = Column(Boolean, default=False)
     session_id = Column(Integer, ForeignKey("session_id_values.id"))
     requested_acrs_id = Column(Integer, ForeignKey("requested_acrs_values.id"))
     upstream_acrs_id = Column(Integer, ForeignKey("upstream_acrs_values.id"))
@@ -49,32 +45,32 @@ class SessionIdTable(Base):
     __tablename__ = "session_id_values"
 
     id = Column(Integer, primary_key=True)
-    value = Column(String)
+    value = Column(String, unique=True)
 
 
 class RequestedAcrsTable(Base):
     __tablename__ = "requested_acrs_values"
 
     id = Column(Integer, primary_key=True)
-    value = Column(String)
+    value = Column(String, unique=True)
 
 
 class UpstreamAcrsTable(Base):
     __tablename__ = "upstream_acrs_values"
 
     id = Column(Integer, primary_key=True)
-    value = Column(String)
+    value = Column(String, unique=True)
 
 
 class UserAgentRawTable(Base):
     __tablename__ = "user_agent_raw_values"
 
     id = Column(Integer, primary_key=True)
-    value = Column(String)
+    value = Column(String, unique=True)
 
 
 class UserAgentTable(Base):
     __tablename__ = "user_agent_values"
 
     id = Column(Integer, primary_key=True)
-    value = Column(String)
+    value = Column(String, unique=True)
-- 
GitLab