#!/bin/bash # 24 - Data Privacy & PII Exposure # Tests: PII in URLs/errors/headers, stack trace leakage, sensitive endpoint caching, # GDPR indicators, response data minimization, verbose error responses SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../config.sh" OUT_FILE="$OUT/24_data_privacy.txt" echo "=== 24. Data Privacy & PII Exposure ===" | tee "$OUT_FILE" echo "Target: $TARGET API: $API_TARGET" | tee -a "$OUT_FILE" echo "" | tee -a "$OUT_FILE" W() { printf '%s' "$1" | python3 -c "import sys,urllib.parse; print(urllib.parse.quote(sys.stdin.read().strip()))"; } do_test() { local label="$1"; shift local code code=$(curl -sk -o /dev/null -w '%{http_code}' "$@") printf '[%s] %s\n' "$code" "$label" | tee -a "$OUT_FILE" } do_body_check() { local label="$1"; local pattern="$2"; shift 2 local body body=$(curl -sk "$@") local code=$(curl -sk -o /dev/null -w '%{http_code}' "$@") printf '[%s] %s\n' "$code" "$label" | tee -a "$OUT_FILE" if echo "$body" | grep -qiE "$pattern"; then echo " [WARN] Sensitive pattern found: $(echo "$body" | grep -ioE "$pattern" | head -3 | tr '\n' ' ')" | tee -a "$OUT_FILE" fi } do_header_check() { local label="$1"; shift local headers headers=$(curl -sk -D - -o /dev/null "$@") local code=$(echo "$headers" | head -1 | grep -oP '\d{3}' | head -1) printf '[%s] %s\n' "$code" "$label" | tee -a "$OUT_FILE" # Check Cache-Control on this endpoint local cc=$(echo "$headers" | grep -i '^cache-control:' | head -1) if [ -n "$cc" ]; then echo " Cache-Control: $(echo "$cc" | cut -d: -f2- | tr -d '\r')" | tee -a "$OUT_FILE" else echo " [MISS] No Cache-Control header" | tee -a "$OUT_FILE" fi } # ── 1. PII / credentials in URL parameters ──────────────────────────────────── echo "--- PII in URL Parameters ---" | tee -a "$OUT_FILE" do_body_check "Email in URL ?email=test@example.com" \ 'test@example\.com' \ -A "$BROWSER_UA" "$TARGET/?email=test@example.com" do_body_check "Password in URL ?password=Secret123" \ 'secret123' \ -A "$BROWSER_UA" "$TARGET/?password=Secret123" do_body_check "API key in URL ?api_key=abc123" \ 'abc123' \ -A "$BROWSER_UA" "$API_TARGET/status?api_key=abc123" do_body_check "Token in URL ?token=eyJhbGci" \ 'eyJhbGci' \ -A "$BROWSER_UA" "$TARGET/?token=eyJhbGciOiJIUzI1NiJ9.test.sig" do_body_check "SSN-like value in URL ?ssn=123-45-6789" \ '[0-9]{3}-[0-9]{2}-[0-9]{4}' \ -A "$BROWSER_UA" "$TARGET/?ssn=123-45-6789" echo "" | tee -a "$OUT_FILE" # ── 2. Stack trace / verbose error leakage ──────────────────────────────────── echo "--- Stack Trace / Verbose Error Leakage ---" | tee -a "$OUT_FILE" STACK_PATTERNS='(stack trace|exception|traceback|at [A-Za-z]+\.[A-Za-z]+\(|line [0-9]+|\.php:[0-9]+|\.rs:[0-9]+|/var/www|/home/|/root/|fatal error|undefined (variable|index)|parse error)' do_body_check "404 error verbosity" "$STACK_PATTERNS" \ -A "$BROWSER_UA" "$TARGET/this-page-definitely-does-not-exist-12345" do_body_check "API invalid JSON error verbosity" "$STACK_PATTERNS" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d '{invalid json}' "$API_TARGET/encrypt" do_body_check "API missing required field error" "$STACK_PATTERNS" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d '{}' "$API_TARGET/encrypt" do_body_check "API wrong type error verbosity" "$STACK_PATTERNS" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d '{"data":12345,"algorithm":true}' "$API_TARGET/encrypt" do_body_check "PHP error probe via bad extension" "$STACK_PATTERNS" \ -A "$BROWSER_UA" "$TARGET/index.php?XDEBUG_SESSION=1" do_body_check "Server path disclosure in 500" "$STACK_PATTERNS" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d '{"data":"'"$(python3 -c "print('A'*100000)")"'"}' "$API_TARGET/encrypt" echo "" | tee -a "$OUT_FILE" # ── 3. Sensitive data in response headers ───────────────────────────────────── echo "--- Sensitive Data in Response Headers ---" | tee -a "$OUT_FILE" check_sensitive_headers() { local label="$1"; shift local headers headers=$(curl -sk -D - -o /dev/null "$@") local code=$(echo "$headers" | head -1 | grep -oP '\d{3}' | head -1) printf '[%s] %s\n' "$code" "$label" | tee -a "$OUT_FILE" # Check for info-leaking headers for BAD in Server X-Powered-By X-AspNet-Version X-AspNetMvc-Version X-Generator X-Drupal X-WordPress X-Runtime X-Request-Id; do VAL=$(echo "$headers" | grep -i "^$BAD:" | head -1) if [ -n "$VAL" ]; then echo " [WARN] $VAL" | tee -a "$OUT_FILE" fi done # Check internal IPs in headers if echo "$headers" | grep -qiE '(127\.[0-9]+\.[0-9]+\.[0-9]+|10\.[0-9]+\.[0-9]+\.[0-9]+|192\.168\.[0-9]+\.[0-9]+|172\.(1[6-9]|2[0-9]|3[01])\.[0-9]+\.[0-9]+)'; then echo " [WARN] Internal IP found in headers" | tee -a "$OUT_FILE" fi } check_sensitive_headers "Main page response headers" -A "$BROWSER_UA" "$TARGET/" check_sensitive_headers "API response headers" -A "$BROWSER_UA" "$API_TARGET/status" check_sensitive_headers "404 response headers" -A "$BROWSER_UA" "$TARGET/nonexistent-12345" check_sensitive_headers "API 404 headers" -A "$BROWSER_UA" "$API_TARGET/nonexistent-endpoint" echo "" | tee -a "$OUT_FILE" # ── 4. Cache-Control on sensitive endpoints ─────────────────────────────────── echo "--- Cache-Control on Sensitive Endpoints ---" | tee -a "$OUT_FILE" SENSITIVE_PATHS=( "/crypt.php" "/admin/" "/admin/index.php" "/key-vault/" "/security-systems/" ) for SPATH in "${SENSITIVE_PATHS[@]}"; do do_header_check "Cache headers: $SPATH" -A "$BROWSER_UA" "$TARGET$SPATH" done do_header_check "Cache headers: API /encrypt" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d '{"data":"test","algorithm":"classical"}' "$API_TARGET/encrypt" echo "" | tee -a "$OUT_FILE" # ── 5. Sensitive file / data exposure ───────────────────────────────────────── echo "--- Sensitive Data File Exposure ---" | tee -a "$OUT_FILE" do_body_check "robots.txt PII/path disclosure" \ '(/var/www|/home/|/root/|password|secret|token|key)' \ -A "$BROWSER_UA" "$TARGET/robots.txt" do_body_check "sitemap.xml sensitive paths" \ '(admin|login|password|token|key|secret|user|account)' \ -A "$BROWSER_UA" "$TARGET/sitemap.xml" do_body_check "API spec / OpenAPI sensitive schema" \ '(password|secret|ssn|credit_card|cvv|pin)' \ -A "$BROWSER_UA" "$API_TARGET/openapi.json" # Version/build info leakage in main page PAGE_BODY=$(curl -sk -A "$BROWSER_UA" "$TARGET/") echo "" | tee -a "$OUT_FILE" echo "--- Version/Build Info in Page Source ---" | tee -a "$OUT_FILE" for PATTERN in 'version["\s:=]+[0-9]' 'build["\s:=]+[0-9a-f]' 'commit["\s:=]+[0-9a-f]{7}' 'php/[0-9]' 'apache/[0-9]' 'nginx/[0-9]'; do FOUND=$(echo "$PAGE_BODY" | grep -ioE "$PATTERN" | head -2) [ -n "$FOUND" ] && echo " [WARN] $PATTERN → $FOUND" | tee -a "$OUT_FILE" done echo " (version pattern scan complete)" | tee -a "$OUT_FILE" echo "" | tee -a "$OUT_FILE" # ── 6. Logging of sensitive input fields ────────────────────────────────────── echo "--- Sensitive Field Reflection (log/response test) ---" | tee -a "$OUT_FILE" FAKE_CC='4111-1111-1111-1111' FAKE_SSN='123-45-6789' FAKE_EMAIL='piitest_probe@example.com' do_body_check "Credit card reflected in API error" \ "$FAKE_CC" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d "{\"data\":\"$FAKE_CC\",\"algorithm\":\"invalid_algo_xyz\"}" "$API_TARGET/encrypt" do_body_check "SSN reflected in API error" \ "$FAKE_SSN" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d "{\"data\":\"$FAKE_SSN\",\"algorithm\":\"invalid_algo_xyz\"}" "$API_TARGET/encrypt" do_body_check "Email reflected in API error" \ "$FAKE_EMAIL" \ -A "$BROWSER_UA" -X POST -H 'Content-Type: application/json' \ -d "{\"data\":\"$FAKE_EMAIL\",\"algorithm\":\"invalid_algo_xyz\"}" "$API_TARGET/encrypt" echo "" | tee -a "$OUT_FILE" # ── 7. GDPR / Privacy indicators ────────────────────────────────────────────── echo "--- GDPR & Privacy Compliance Indicators ---" | tee -a "$OUT_FILE" do_test "Privacy policy page exists" -A "$BROWSER_UA" "$TARGET/privacy" do_test "Privacy policy /privacy-policy" -A "$BROWSER_UA" "$TARGET/privacy-policy" do_test "Cookie consent endpoint" -A "$BROWSER_UA" "$TARGET/cookie-consent" do_test "GDPR data request endpoint" -A "$BROWSER_UA" "$TARGET/gdpr" do_test "Data deletion endpoint" -A "$BROWSER_UA" "$TARGET/delete-account" # Check for analytics tracking without consent PAGE_HTML=$(curl -sk -A "$BROWSER_UA" "$TARGET/") if echo "$PAGE_HTML" | grep -qiE '(google-analytics|googletagmanager|gtag|fbq|hotjar|mixpanel|segment\.com|amplitude)'; then echo " [WARN] Third-party analytics/tracking detected in page source" | tee -a "$OUT_FILE" echo "$PAGE_HTML" | grep -ioE '(google-analytics\.com|googletagmanager\.com|connect\.facebook\.net|static\.hotjar\.com|cdn\.segment\.com|cdn\.amplitude\.com|cdn\.mixpanel\.com)[^"'"'"' >]*' | head -5 | sed 's/^/ /' | tee -a "$OUT_FILE" else echo " [OK] No third-party analytics trackers detected" | tee -a "$OUT_FILE" fi echo "" | tee -a "$OUT_FILE" # ── 8. Data minimization — response field check ─────────────────────────────── echo "--- API Response Data Minimization ---" | tee -a "$OUT_FILE" API_RESP=$(curl -sk -A "$BROWSER_UA" "$API_TARGET/status") echo " Status response fields: $(echo "$API_RESP" | python3 -c "import sys,json; d=json.load(sys.stdin); print(list(d.keys()))" 2>/dev/null || echo "(non-JSON or error)")" | tee -a "$OUT_FILE" # Check for over-exposure of internal fields for FIELD in password secret token key_material private_key db_host db_pass internal_ip server_path config; do if echo "$API_RESP" | grep -qi "\"$FIELD\""; then echo " [WARN] Sensitive field '$FIELD' in status response" | tee -a "$OUT_FILE" fi done echo "" | tee -a "$OUT_FILE" echo "=== 24. Data Privacy & PII Exposure COMPLETE ===" | tee -a "$OUT_FILE"