This commit is contained in:
Kenneth Jao 2018-05-30 00:27:48 -04:00
commit aa229d93d5
2 changed files with 2095 additions and 8 deletions

2000
SmearcarDB/phoible Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,15 @@
from flask import Flask from flask import Flask
from flask import render_template, jsonify, request from flask import render_template, jsonify, request
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from numpy.polynomial.polynomial import polyfit
from numpy import corrcoef
import numpy as np
import tkinter
import matplotlib.pyplot as plt
from flask import send_file from flask import send_file
import datetime import datetime
import os import os
from scipy import stats
app = Flask(__name__) app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///data.db' app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///data.db'
@ -41,7 +47,6 @@ class Update(db.Model):
date = db.Column(db.DateTime, nullable=False, date = db.Column(db.DateTime, nullable=False,
default=datetime.datetime.now()) default=datetime.datetime.now())
class Editor(db.Model): class Editor(db.Model):
id = db.Column(db.Integer, primary_key=True, autoincrement=True) id = db.Column(db.Integer, primary_key=True, autoincrement=True)
authority = db.Column(db.Integer, nullable=False, default=1) authority = db.Column(db.Integer, nullable=False, default=1)
@ -49,10 +54,91 @@ class Editor(db.Model):
# 1: Below + create Updates # 1: Below + create Updates
# 2: Edit values and Add files # 2: Edit values and Add files
# 3: No Access # 3: No Access
username = db.Column(db.String(32), nullable=False) username = db.Column(db.String(32), nullable=False)
password = db.Column(db.String(32), nullable=False) password = db.Column(db.String(32), nullable=False)
def rand_jitter(arr):
stdev = .01*(max(arr)-min(arr))
return arr + np.random.randn(len(arr)) * stdev
def uniqueness(title="Figure 1"):
x = []
y = []
languages = Language.query.all()
for phoneme in Phoneme.query.all():
frequencies = Frequency.query.filter_by(phoneme_id=phoneme.id).all()
values = [x.value for x in frequencies]
x.append(len(frequencies) / len(languages))
y.append(sum(values) / len(frequencies))
print(corrcoef(x, y))
b, m = polyfit(x, y, 1)
plt.scatter(rand_jitter(x), y, s=7)
bestfit = [b + m * number for number in x]
plt.plot(x, bestfit, '-')
plt.xlabel("Phoneme Presence in Studied Languages")
plt.ylabel("Average Frequency / %")
plt.title(title)
plt.show()
def phoneme_rank(scatter=False, detail=1000, textOutput=False, title="Figure 2"):
speakers = {
'Spanish (Castillian)': 46.4,
'English (American)': 308.9,
'Spanish (American)': 435.7,
'Japanese': 128,
'German': 76,
'Arabic': 315,
'Mandarin': 909,
'Portuguese (Brazilian)': 194,
'French': 76.8,
'Hindi': 260,
'Polish': 40.3,
'Samoan': 0.40742,
'Kaiwa': 0.0021,
'Bengali': 243,
'Swedish': 12.8,
'Malay': 60.7,
'Italian': 64.8
}
total = sum(speakers.values())
calculation = sorted([(phoneme.name, sum([frequency.value * speakers[Language.query.filter_by(id=frequency.language_id).first().name] / (total * len(Language.query.filter_by(name=Language.query.filter_by(id=frequency.language_id).first().name).all()) ) for frequency in Frequency.query.filter_by(phoneme_id=phoneme.id).all()])) for phoneme in Phoneme.query.limit(detail).all()], key=lambda x:-x[1])
labels, data = zip(*calculation)
if textOutput:
return labels
if scatter:
plt.yscale("log")
plt.plot(range(len(data)), data)
else:
plt.bar(range(len(data)), data)
plt.xlabel("Phoneme Rank")
plt.ylabel("Frequency weighted by Number of Speakers / %")
plt.title(title)
plt.show()
def phoible_compare():
# lang_id = Language.query.filter_by(name=lang).first().id
with open("phoible", "r") as f:
phoible = f.read().splitlines()
phonemes = [phoneme.name for phoneme in Phoneme.query.all()]
return [x for x in phoible if x in phonemes]
def rank_compare(textOutput=True, title="Rank Comparison"):
phoible = phoible_compare()
original = phoneme_rank(textOutput=True)
phoible_ranks = list(range(len(phoible)+1))[1:]
original_ranks = [original.index(phoneme) + 1 for phoneme in phoible]
if textOutput:
return {"kendall": stats.kendalltau(phoible_ranks, original_ranks),
"spearman": stats.spearmanr(phoible_ranks, original_ranks),
"wilcoxon": stats.wilcoxon(phoible_ranks, original_ranks, zero_method="pratt")}
plt.scatter(phoible_ranks, original_ranks)
plt.xlabel("Phoible Rank")
plt.ylabel("Weighted Rank")
plt.title(title)
plt.show()
def database(): def database():
final = {'values': []} final = {'values': []}
@ -83,18 +169,19 @@ def phoneme_add(info):
# } # }
phoneme = Phoneme.query.filter_by(name=info['phoneme']).first() phoneme = Phoneme.query.filter_by(name=info['phoneme']).first()
language = Language.query.filter_by(id=info['language_id']).first() language = Language.query.filter_by(id=info['language_id']).first()
if not phoneme:
phoneme = Phoneme(name=info['phoneme'])
db.session.add(phoneme)
link = Frequency.query.filter_by( link = Frequency.query.filter_by(
language_id=language.id, language_id=language.id,
phoneme_id=phoneme.id).first() phoneme_id=phoneme.id).first()
if phoneme and link: if not link:
link.value = info['value']
else:
if not phoneme:
phoneme = Phoneme(name=info['phoneme'])
link = Frequency(value=info['value']) link = Frequency(value=info['value'])
link.phoneme = phoneme link.phoneme = phoneme
language.phonemes.append(link) language.phonemes.append(link)
db.session.add_all([phoneme, link]) db.session.add(link)
else:
link.value = info['value']
def phoneme_remove(info): def phoneme_remove(info):