227 lines
8.6 KiB
Python
Executable File
227 lines
8.6 KiB
Python
Executable File
#!runpy.sh
|
|
|
|
"""\
|
|
|
|
This module contains code for analyzing ViewerStats data as uploaded by the viewer.
|
|
|
|
$LicenseInfo:firstyear=2021&license=viewerlgpl$
|
|
Second Life Viewer Source Code
|
|
Copyright (C) 2021, Linden Research, Inc.
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation;
|
|
version 2.1 of the License only.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
|
|
$/LicenseInfo$
|
|
"""
|
|
|
|
import argparse
|
|
import numpy as np
|
|
import pandas as pd
|
|
import json
|
|
from collections import Counter, defaultdict
|
|
from llbase import llsd
|
|
import io
|
|
import re
|
|
import os
|
|
import sys
|
|
|
|
def show_stats_by_key(recs,indices,settings_sd = None):
|
|
result = ()
|
|
cnt = Counter()
|
|
per_key_cnt = defaultdict(Counter)
|
|
for r in recs:
|
|
try:
|
|
d = r
|
|
for idx in indices:
|
|
d = d[idx]
|
|
for k,v in d.items():
|
|
if isinstance(v,dict):
|
|
continue
|
|
cnt[k] += 1
|
|
if isinstance(v,list):
|
|
v = tuple(v)
|
|
per_key_cnt[k][v] += 1
|
|
except Exception as e:
|
|
print("err", e)
|
|
print("d", d, "k", k, "v", v)
|
|
raise
|
|
mc = cnt.most_common()
|
|
print("=========================")
|
|
keyprefix = ""
|
|
if len(indices)>0:
|
|
keyprefix = ".".join(indices) + "."
|
|
for i,m in enumerate(mc):
|
|
k = m[0]
|
|
bigc = m[1]
|
|
unset_cnt = len(recs) - bigc
|
|
kmc = per_key_cnt[k].most_common(5)
|
|
print(i, keyprefix+str(k), bigc)
|
|
if settings_sd is not None and k in settings_sd and "Value" in settings_sd[k]:
|
|
print(" ", "default",settings_sd[k]["Value"],"count",unset_cnt)
|
|
for v in kmc:
|
|
print(" ", "value",v[0],"count",v[1])
|
|
if settings_sd is not None:
|
|
print("Total keys in settings", len(settings_sd.keys()))
|
|
unused_keys = list(set(settings_sd.keys()) - set(cnt.keys()))
|
|
unused_keys_non_str = [k for k in unused_keys if settings_sd[k]["Type"] != "String"]
|
|
unused_keys_str = [k for k in unused_keys if settings_sd[k]["Type"] == "String"]
|
|
|
|
# Things that no one in the sample has set to a non-default value. Possible candidates for removal.
|
|
print("\nUnused_keys_non_str", len(unused_keys_non_str))
|
|
print( "======================")
|
|
print("\n".join(sorted(unused_keys_non_str)))
|
|
|
|
# Strings are not currently logged, so we have no info on usage.
|
|
print("\nString keys (usage unknown)", len(unused_keys_str))
|
|
print( "======================")
|
|
print("\n".join(sorted(unused_keys_str)))
|
|
|
|
# Things that someone has set but that aren't recognized settings.
|
|
unrec_keys = list(set(cnt.keys()) - set(settings_sd.keys()))
|
|
print("\nUnrecognized keys", len(unrec_keys))
|
|
print( "======================")
|
|
print("\n".join(sorted(unrec_keys)))
|
|
|
|
result = (settings_sd.keys(), unused_keys_str, unused_keys_non_str, unrec_keys)
|
|
return result
|
|
|
|
def parse_settings_xml(fname):
|
|
# assume we're in scripts/metrics
|
|
fname = "../../indra/newview/app_settings/" + fname
|
|
with open(fname,"r") as f:
|
|
contents = f.read()
|
|
return llsd.parse_xml(contents)
|
|
|
|
def read_raw_settings_xml(fname):
|
|
# assume we're in scripts/metrics
|
|
fname = "../../indra/newview/app_settings/" + fname
|
|
contents = None
|
|
with open(fname,"r") as f:
|
|
contents = f.read()
|
|
return contents
|
|
|
|
def write_settings_xml(fname, contents):
|
|
# assume we're in scripts/metrics
|
|
fname = "../../indra/newview/app_settings/" + fname
|
|
with open(fname,"w") as f:
|
|
f.write(llsd.format_pretty_xml(contents))
|
|
f.close()
|
|
|
|
def write_raw_settings_xml(fname, string):
|
|
# assume we're in scripts/metrics
|
|
fname = "../../indra/newview/app_settings/" + fname
|
|
with io.open(fname,"w", newline='\n') as f:
|
|
f.write(string.decode('latin1'))
|
|
f.close()
|
|
|
|
def remove_settings(string, to_remove):
|
|
for r in to_remove:
|
|
subs_str = r"<key>" + r + r"<.*?</map>\n"
|
|
string = re.sub(subs_str,"",string,flags=re.S|re.DOTALL)
|
|
return string
|
|
|
|
def get_used_strings(root_dir):
|
|
used_str = set()
|
|
skipped_ext = set()
|
|
for dir_name, sub_dir_list, file_list in os.walk(root_dir):
|
|
for fname in file_list:
|
|
if fname in ["settings.xml", "settings.xml.edit", "settings_per_account.xml"]:
|
|
print("skip", fname)
|
|
continue
|
|
(base,ext) = os.path.splitext(fname)
|
|
#if ext not in [".cpp", ".hpp", ".h", ".xml"]:
|
|
# skipped_ext.add(ext)
|
|
# continue
|
|
|
|
full_name = os.path.join(dir_name,fname)
|
|
|
|
with open(full_name,"r") as f:
|
|
#print full_name
|
|
lines = f.readlines()
|
|
for l in lines:
|
|
ms = re.findall(r'[>\"]([A-Za-z0-9_]+)[\"<]',l)
|
|
for m in ms:
|
|
#print "used_str",m
|
|
used_str.add(m)
|
|
print("skipped extensions", skipped_ext)
|
|
print("got used_str", len(used_str))
|
|
return used_str
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description="process tab-separated table containing viewerstats logs")
|
|
parser.add_argument("--verbose", action="store_true",help="verbose flag")
|
|
parser.add_argument("--preferences", action="store_true", help="analyze preference info")
|
|
parser.add_argument("--remove_unused", action="store_true", help="remove unused preferences")
|
|
parser.add_argument("--column", help="name of column containing viewerstats info")
|
|
parser.add_argument("infiles", nargs="+", help="name of .tsv files to process")
|
|
args = parser.parse_args()
|
|
|
|
for fname in args.infiles:
|
|
print("process", fname)
|
|
df = pd.read_csv(fname,sep='\t')
|
|
#print "DF", df.describe()
|
|
jstrs = df['RAW_LOG:BODY']
|
|
#print "JSTRS", jstrs.describe()
|
|
recs = []
|
|
for i,jstr in enumerate(jstrs):
|
|
recs.append(json.loads(jstr))
|
|
show_stats_by_key(recs,[])
|
|
show_stats_by_key(recs,["agent"])
|
|
if args.preferences:
|
|
print("\nSETTINGS.XML")
|
|
settings_sd = parse_settings_xml("settings.xml")
|
|
#for skey,svals in settings_sd.items():
|
|
# print skey, "=>", svals
|
|
(all_str,_,_,_) = show_stats_by_key(recs,["preferences","settings"],settings_sd)
|
|
print()
|
|
|
|
#print "\nSETTINGS_PER_ACCOUNT.XML"
|
|
#settings_pa_sd = parse_settings_xml("settings_per_account.xml")
|
|
#show_stats_by_key(recs,["preferences","settings_per_account"],settings_pa_sd)
|
|
|
|
if args.remove_unused:
|
|
# walk codebase looking for strings
|
|
all_str_set = set(all_str)
|
|
used_strings = get_used_strings("../../indra")
|
|
used_strings_set = set(used_strings)
|
|
unref_strings = all_str_set-used_strings_set
|
|
# Some settings names are generated by appending to a prefix. Need to look for this case.
|
|
prefix_used = set()
|
|
print("checking unref_strings", len(unref_strings))
|
|
for u in unref_strings:
|
|
for k in range(6,len(u)):
|
|
prefix = u[0:k]
|
|
if prefix in all_str_set and prefix in used_strings_set:
|
|
prefix_used.add(u)
|
|
#print "PREFIX_USED",u,prefix
|
|
print("PREFIX_USED", len(prefix_used), ",".join(list(prefix_used)))
|
|
print()
|
|
unref_strings = unref_strings - prefix_used
|
|
|
|
print("\nUNREF_IN_CODE " + str(len(unref_strings)) + "\n")
|
|
print("\n".join(list(unref_strings)))
|
|
settings_str = read_raw_settings_xml("settings.xml")
|
|
# Do this via direct string munging to generate minimal changeset
|
|
settings_edited = remove_settings(settings_str,unref_strings)
|
|
write_raw_settings_xml("settings.xml.edit",settings_edited)
|
|
|
|
|
|
|
|
|
|
|
|
|