#!/bin/sh

################################################################################
# A utility function to convert an HTTP log stdin into CSV and pipe its output #
# to sqlite-utils for easy querying                                            #
#                                                                              #
# NOTE: It requires the `sqlite-utils` command-line tool to be installed.      #
# Install it with `pip install sqlite-utils`                                   #
#                                                                              #
# @author: Fabio Manganiello <fabio@manganiello.tech>                          #
# @license: MIT                                                                #
################################################################################

query-http-log() {
    if [[ -z "$1" ]]; then
        echo "Usage: [cat|head|tail] /var/log/nginx/[logfile] | query-http-log <query> [extra args for sqlite-utils]"
        echo ""
        echo "Example: cat /var/log/nginx/access.log | query-http-log \"select * from stdin where response_code = 404 and date > '2023-01-01\' limit 10"
        return 1
    fi

    cat <<EOF | sqlite-utils memory stdin:csv --csv $*
ip,date,query,response_code,response_size,user_agent
$(
    while read -r line; do
        ip=$(echo "$line" | awk '{print $1}')
        timestamp=$(echo "$line" | awk '{print $4}' | sed 's/\[//;s/\]//')
        # Convert to "DD MMM YYYY HH:MM:SS" format that date can handle
        formatted=$(echo "$timestamp" | sed 's|/| |g; s|:| |')
        date=$(date -d "$formatted" -Iseconds)
        query=$(echo "$line" | awk -F'"' '{print $2}')
        response_code=$(echo "$line" | awk '{print $9}')
        response_size=$(echo "$line" | awk '{print $10}')
        user_agent=$(echo "$line" | awk -F'"' '{print $6}')
        echo "$ip,\"$date\",\"$query\",$response_code,$response_size,\"$user_agent\""
    done
)
EOF
}