Input:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix, accuracy_score, cohen_kappa_score
from sklearn.metrics import precision_recall_fscore_support, roc_curve, roc_auc_score
from itertools import combinations
df = pd.read_csv("data/validation.csv", encoding="ISO-8859-1")
for c in df.columns: print(c, end=", ")
def print_kappa(name, x, y): print(name+": "+str(round(cohen_kappa_score(x,y), 3)))
print_kappa("SentiStrength Pos", df.pos, df.ss_pos)
print_kappa("SentiStrength Neg", df.neg, df.ss_neg)
def print_eval_binary(name, x, y):
print("*** Classification Evaluation for: " + name + " ***\n")
cm = pd.DataFrame(
confusion_matrix(x, y),
index=['human:neg', 'human:pos'],
columns=['pred:neg', 'pred:pos']
)
print(cm, "\n")
print('accuracy:\t{}'.format(round(accuracy_score(x, y) * 100, 2)), "\n")
precision, recall, fscore, support = precision_recall_fscore_support(x, y, labels=[1,0])
print('\t\tpos\tneg')
print('precision:\t{}\t{}'.format(*[round(i, 2) for i in precision]))
print('recall:\t\t{}\t{}'.format(*[round(i, 2) for i in recall]))
print('fscore:\t\t{}\t{}'.format(*[round(i, 2) for i in fscore]))
print('support:\t{}\t{}'.format(*[round(i, 2) for i in support]))
print('')
print_eval_binary("SentiStrength", df.binary, df.ss_binary)
print_eval_binary("LIWC", df.binary, df.liwc_binary)
print_eval_binary("Bing", df.binary, df.bing_binary)
print_eval_binary("AFINN", df.binary, df.afinn_binary)
print_eval_binary("Loughran", df.binary, df.loughran_binary)
print_eval_binary("NRC", df.binary, df.nrc_binary)
print_eval_binary("Tidytext", df.binary, df.tidytext_binary)
print_eval_binary("VADER", df.binary, df.vader_binary)
def print_eval_binary_pair(a, b, x, y):
print("*** Pairwise comparison: " + a + " + " + b + " ***\n")
print('accuracy:\t{}'.format(round(accuracy_score(x, y) * 100, 2)), "\n")
for a, b in list(combinations(["ss_binary", "liwc_binary", "tidytext_binary", "vader_binary"], 2)):
print_eval_binary_pair(a, b, df[a], df[b])
def print_eval_trinary(name, x, y):
print("*** Classification Evaluation for: " + name + " ***\n")
cm = pd.DataFrame(
confusion_matrix(x, y),
index=['human:neg', 'human:neutral', 'human:pos'],
columns=['pred:neg', 'pred:neutral', 'pred:pos']
)
print(cm, "\n")
print('accuracy:\t{}'.format(round(accuracy_score(x, y) * 100, 2)), "\n")
precision, recall, fscore, support = precision_recall_fscore_support(x, y, labels=[1,0,-1])
print('\t\tpos\tneut\tneg')
print('precision:\t{}\t{}\t{}'.format(*[round(i, 2) for i in precision]))
print('recall:\t\t{}\t{}\t{}'.format(*[round(i, 2) for i in recall]))
print('fscore:\t\t{}\t{}\t{}'.format(*[round(i, 2) for i in fscore]))
print('support:\t{}\t{}\t{}'.format(*[round(i, 2) for i in support]), "\n")
print_eval_trinary("SentiStrength", df.trinary, df.ss_trinary)
print_eval_trinary("LIWC", df.trinary, df.liwc_trinary)
print_eval_trinary("Bing", df.trinary, df.bing_trinary)
print_eval_trinary("AFINN", df.trinary, df.afinn_trinary)
print_eval_trinary("Loughran", df.trinary, df.loughran_trinary)
print_eval_trinary("NRC", df.trinary, df.nrc_trinary)
print_eval_trinary("Tidytext", df.trinary, df.tidytext_trinary)
print_eval_trinary("VADER", df.trinary, df.vader_trinary)
def print_eval_trinary_pair(a, b, x, y):
print("*** Pairwise comparison: " + a + " + " + b + " ***\n")
print('accuracy:\t{}'.format(round(accuracy_score(x, y) * 100, 2)), "\n")
for a, b in list(combinations(["ss_trinary", "liwc_trinary", "tidytext_trinary", "vader_trinary"], 2)):
print_eval_binary_pair(a, b, df[a], df[b])
from sinfo import sinfo
sinfo()