"""A handler that serves all app logs for an App Engine HTTP request.
StackDriver Logging API:
https://cloud.google.com/logging/docs/apis
"""
import calendar
import datetime
import html
import logging
import re
import time
import urllib.request, urllib.parse, urllib.error
from google.cloud import ndb
from google.cloud.logging import Client
import humanize
from oauth_dropins.webutil.util import json_dumps, json_loads
from .appengine_info import APP_ID
from . import flask_util, util
from .flask_util import error
logger = logging.getLogger(__name__)
LEVELS = {
logging.DEBUG: 'D',
logging.INFO: 'I',
logging.WARNING: 'W',
logging.ERROR: 'E',
logging.CRITICAL: 'F',
}
CACHE_TIME = datetime.timedelta(days=1)
MAX_LOG_AGE = datetime.timedelta(days=30)
# App Engine's launch, roughly
MIN_START_TIME = time.mktime(datetime.datetime(2008, 4, 1).timetuple())
MAX_START_TIME = time.mktime(datetime.datetime(2099, 1, 1).timetuple())
SANITIZE_RE = re.compile(r"""
((?:access|api|oauth)?[ _]?
(?:code|consumer_key|consumer_secret|nonce|secret|signature|token|verifier)
(?:u?['"])?
(?:=|:|\ |,\ |%3D)\ *
(?:u?['"])?
)
[^ &='"]+
""", flags=re.VERBOSE | re.IGNORECASE)
[docs]def sanitize(msg):
"""Sanitizes access tokens and Authorization headers."""
return SANITIZE_RE.sub(r'\1...', msg)
[docs]def url(when, key):
"""Returns the relative URL (no scheme or host) to a log page.
Args:
when: datetime
key: ndb.Key
"""
return f'log?start_time={calendar.timegm(when.utctimetuple())}&key={key.urlsafe().decode()}'
[docs]def maybe_link(when, key, time_class='dt-updated', link_class=''):
"""Returns an HTML snippet with a timestamp and maybe a log page link.
Example:
<a href="/log?start_time=1513904267&key=aglz..." class="u-bridgy-log">
<time class="dt-updated" datetime="2017-12-22T00:57:47.222060"
title="Fri Dec 22 00:57:47 2017">
3 days ago
</time>
</a>
The <a> tag is only included if the timestamp is 30 days old or less, since
Stackdriver's basic tier doesn't store logs older than that:
https://cloud.google.com/monitoring/accounts/tiers#logs_ingestion
https://github.com/snarfed/bridgy/issues/767
Args:
when: datetime
key: ndb.Key
time_class: string, optional class value for the <time> tag
link_class: string, optional class value for the <a> tag (if generated)
Returns: string HTML
"""
# always show time zone. assume naive timestamps are UTC.
if when.tzinfo is None:
when = when.replace(tzinfo=datetime.timezone.utc)
# humanize.naturaltime breaks on timezone-aware datetimes :(
# https://github.com/jmoiron/humanize/issues/9#issuecomment-322917865
now = datetime.datetime.now(tz=when.tzinfo)
time = f'<time class="{time_class}" datetime="{when.isoformat()}" title="{when.ctime()} {when.tzname()}">{humanize.naturaldelta(now - when)} ago</time>'
if now > when > now - MAX_LOG_AGE:
return f'<a class="{link_class}" href="/{url(when, key)}">{time}</a>'
return time
# datastore string keys are url-safe-base64 of, say, at least 32(ish) chars.
# https://cloud.google.com/appengine/docs/python/ndb/keyclass#Key_urlsafe
# http://tools.ietf.org/html/rfc3548.html#section-4
BASE64 = 'A-Za-z0-9-_='
DATASTORE_KEY_RE = re.compile("([^%s])(([%s]{8})[%s]{24,})([^%s])" % ((BASE64,) * 4))
[docs]def linkify_datastore_keys(msg):
"""Converts string datastore keys to links to the admin console viewer."""
def linkify_key(match):
try:
# Useful for logging, but also causes false positives in the search, we
# find and use log requests instead of real requests.
# logger.debug(f'Linkifying datastore key: {'match.group(2)}')
key = ndb.Key(urlsafe=match.group(2))
tokens = [(kind, f"{'id' if isinstance(id, int) else 'name'}:{id}")
for kind, id in key.pairs()]
key_str = '0/|' + '|'.join(f'{len(kind)}/{kind}|{len(id)}/{id}'
for kind, id in tokens)
key_quoted = urllib.parse.quote(urllib.parse.quote(key_str, safe=''), safe='')
html = f"{match.group(1)}<a title='{match.group(2)}' href='https://console.cloud.google.com/datastore/entities;kind={key.kind()};ns=__$DEFAULT$__/edit;key={key_quoted}?project={APP_ID}'>{match.group(3)}...</a>{match.group(4)}"
logger.debug(f'Returning {html}')
return html
except BaseException:
# logger.debug("Couldn't linkify candidate datastore key.") # too noisy
return match.group(0)
return DATASTORE_KEY_RE.sub(linkify_key, msg)
[docs]def log():
"""Flask view that searches for and renders app logs for an HTTP request.
URL parameters:
start_time: float, seconds since the epoch
key: string that should appear in the first app log
Install with:
app.add_url_rule('/log', view_func=logs.log)
Or:
@app.get('/log')
@cache.cached(600)
def log():
return logs.log()
"""
start_time = flask_util.get_required_param('start_time')
if not util.is_float(start_time):
return error(f"Couldn't convert start_time to float: {start_time!r}")
start_time = float(start_time)
if start_time < MIN_START_TIME:
return error(f'start_time must be >= {MIN_START_TIME}')
elif start_time > MAX_START_TIME:
return error(f'start_time must be <= {MAX_START_TIME}')
client = Client()
project = f'projects/{APP_ID}'
key = urllib.parse.unquote_plus(flask_util.get_required_param('key'))
# first, find the individual log message to get the trace id
utcfromtimestamp = datetime.datetime.utcfromtimestamp
timestamp_filter = (
f"timestamp>=\"{utcfromtimestamp(start_time - 60).isoformat() + 'Z'}\" "
f"timestamp<=\"{utcfromtimestamp(start_time + 120).isoformat() + 'Z'}\"")
query = f'logName="{project}/logs/python" textPayload:"{key}" {timestamp_filter}'
logger.info(f'Searching logs with: {query}')
try:
# https://googleapis.dev/python/logging/latest/client.html#google.cloud.logging_v2.client.Client.list_entries
log = next(iter(client.list_entries(filter_=query, page_size=1)))
except StopIteration:
logger.info('No log found!')
return 'No log found!', 404
logger.info(f'Got insert id {log.insert_id} trace {log.trace}')
# now, print all logs with that trace
resp = """\
<html>
<body style="font-family: monospace; white-space: pre">
"""
query = f'logName="{project}/logs/python" trace="{log.trace}" resource.type="gae_app" {timestamp_filter}'
logger.info(f'Searching logs with: {query}')
# sanitize and render each line
for log in client.list_entries(filter_=query, page_size=1000):
# payload is a union that can be string, JSON, or protobuf
# https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#FIELDS.oneof_payload
msg = log.payload
if not msg:
continue
elif isinstance(msg, (dict, list)):
msg = json_dumps(msg, indent=2)
else:
msg = str(msg)
msg = linkify_datastore_keys(util.linkify(html.escape(
msg if msg.startswith('Created by this poll:') else sanitize(msg),
quote=False)))
resp += '%s %s %s<br />' % (
log.severity[0], log.timestamp, msg.replace('\n', '<br />'))
resp += '</body>\n</html>'
return resp, {'Content-Type': 'text/html; charset=utf-8'}