import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
rawData = pd.read_csv("renamedHeaders.csv", skiprows=[0])
rawData.head()
Start Date | End Date | IP Address | Progress | Duration (in seconds) | Finished | Recorded Date | Response ID | Recipient Last Name | Recipient First Name | ... | External Data Reference | Location Latitude | Location Longitude | Distribution Channel | User Language | campus | educationLevel | isGrad | experience | continued | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3/25/21 12:03 | 3/25/21 12:05 | 67.183.121.24 | 100 | 95 | True | 3/25/21 12:05 | R_YQ7iSdzAhzSuTu1 | NaN | NaN | ... | NaN | 47.497299 | -122.355499 | anonymous | EN | Seattle | Undergraduate student | 0 | Mostly negative | My professors have done a great job overall wi... |
1 | 3/25/21 12:04 | 3/25/21 12:05 | 67.170.122.210 | 100 | 39 | True | 3/25/21 12:05 | R_3jGNkoQPznVw0Df | NaN | NaN | ... | NaN | 47.749496 | -122.297600 | anonymous | EN | Seattle | Undergraduate student | 0 | A mix of positive and negative | Also providing online resources like updating ... |
2 | 3/25/21 12:07 | 3/25/21 12:07 | 98.232.33.143 | 100 | 17 | True | 3/25/21 12:07 | R_9ZDweGffIIsCve1 | NaN | NaN | ... | NaN | 47.651703 | -117.083801 | anonymous | EN | Seattle | Undergraduate student | 0 | Mostly positive | NaN |
3 | 3/25/21 12:05 | 3/25/21 12:07 | 172.58.44.174 | 100 | 120 | True | 3/25/21 12:07 | R_29hsSW6x49hZS6c | NaN | NaN | ... | NaN | 47.900497 | -122.247200 | anonymous | EN | Bothell | Undergraduate student | 0 | A mix of positive and negative | I enjoy the zoom session options, and while I ... |
4 | 3/25/21 12:06 | 3/25/21 12:07 | 73.254.146.138 | 100 | 57 | True | 3/25/21 12:07 | R_1I61O4XyIcHW8A3 | NaN | NaN | ... | NaN | 47.698700 | -117.439697 | anonymous | EN | Seattle | Undergraduate student | 0 | A mix of positive and negative | Flexible class time, less waste of time, less ... |
5 rows × 21 columns
rawData.shape
(3769, 21)
rawData.isna().sum()
Start Date 0 End Date 0 IP Address 0 Progress 0 Duration (in seconds) 0 Finished 0 Recorded Date 0 Response ID 0 Recipient Last Name 3769 Recipient First Name 3769 Recipient Email 3769 External Data Reference 3769 Location Latitude 0 Location Longitude 0 Distribution Channel 0 User Language 0 campus 2 educationLevel 2 isGrad 0 experience 4 continued 494 dtype: int64
campusTotals = nonEmptyC.groupby("campus").ones.sum()
campusTotals
campus Bothell 864.0 Seattle 2110.0 Tacoma 297.0 Name: ones, dtype: float64
nonEmptyC.groupby("educationLevel").ones.sum()
educationLevel Graduate student 634.0 Undergraduate student 2637.0 Name: ones, dtype: float64
nonEmptyExp = rawData[~rawData.experience.isna()].copy()
### Adding a numerical positive/negative experience
def getExpInt(text):
if ("Mostly positive" in text):
return 1
elif ("Mostly negative" in text):
return -1
else:
return 0
nonEmptyExp["experienceInt"] = nonEmptyExp.apply(lambda x: getExpInt(x.experience), axis=1)
nonEmptyExp[["experienceInt", "experience"]].head(3)
experienceInt | experience | |
---|---|---|
0 | -1 | Mostly negative |
1 | 0 | A mix of positive and negative |
2 | 1 | Mostly positive |
nonEmptyExp.groupby("educationLevel").experienceInt.mean()
educationLevel Graduate student 0.198087 Undergraduate student 0.094688 Name: experienceInt, dtype: float64
nonEmptyExp.groupby("campus").experienceInt.mean()
campus Bothell 0.263158 Seattle 0.032862 Tacoma 0.250000 Name: experienceInt, dtype: float64
nonEmptyC = nonEmptyExp[~nonEmptyExp.continued.isna()].copy()
nonEmptyC.isna().sum()
Start Date 0 End Date 0 IP Address 0 Progress 0 Duration (in seconds) 0 Finished 0 Recorded Date 0 Response ID 0 Recipient Last Name 3273 Recipient First Name 3273 Recipient Email 3273 External Data Reference 3273 Location Latitude 0 Location Longitude 0 Distribution Channel 0 User Language 0 campus 2 educationLevel 2 isGrad 0 experience 0 continued 0 experienceInt 0 dtype: int64
nonEmptyC["commute"] = nonEmptyC.apply(lambda x: int("commut" in x.continued), axis=1)
nonEmptyC.loc[nonEmptyC.commute == 1].shape
(143, 25)
round(nonEmptyC.groupby("commute").experienceInt.mean(), 2)
commute 0 0.11 1 0.50 Name: experienceInt, dtype: float64
nonEmptyC["record"] = nonEmptyC.apply(lambda x: int("record" in x.continued.lower()), axis=1)
nonEmptyC.record.sum()
938
nonEmptyC.shape[0]
3273
round(938/3273, 2)
0.29
nonEmptyC["officeHrs"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, ["office hours", "office-hours"]), axis=1)
nonEmptyC.officeHrs.sum()
264
gradOfficeHrs = nonEmptyC.groupby("isGrad").officeHrs.sum()
gradOfficeHrs
isGrad 0 222 1 42 Name: officeHrs, dtype: int64
gradTotals = nonEmptyC.groupby("isGrad").ones.sum()
gradTotals
isGrad 0 2637.0 1 636.0 Name: ones, dtype: float64
round(gradTotals / gradOfficeHrs, 1)
isGrad 0 11.9 1 15.1 dtype: float64
openBookSyn = ["open book", "open-book", "open note", "open-note", "take home", "take-home"]
def containsSynonym(text, synonyms): # Checks if given text contains any of the given synonyms
text = text.lower()
for token in synonyms:
if token in text:
return 1
return 0
nonEmptyC["openBook"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, openBookSyn), axis=1)
nonEmptyC.openBook.sum()
93
nonEmptyC["exam"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, ["quiz", "exam"]), axis=1)
nonEmptyC.exam.sum()
255
nonEmptyC.loc[(nonEmptyC.exam == 1) & (nonEmptyC.openBook == 1)].shape
(61, 31)
round(61/255, 2)
0.24
255-61
194
# Generating data for honeycomb visual
bookMentions = np.append(np.ones(shape=61), np.zeros(shape=(255-61)))
np.random.shuffle(bookMentions)
np.savetxt("openBook.csv", bookMentions, delimiter=",")
nonEmptyC["nada"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, ["nothing", "none", "n/a"]), axis=1)
nonEmptyC.nada.sum()
189
nonEmptyC["async"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, ["asynchronous", "hybrid"]), axis=1)
nonEmptyC["async"].sum()
468
round(468/3273, 2)
0.14
softies = ["lenien", "understanding", "grace", "flexibility"]
nonEmptyC["includedProf"] = nonEmptyC.apply(lambda x: "prof" in x.continued.lower(), axis=1)
nonEmptyC["softerProfs"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, softies), axis=1)
nonEmptyC.includedProf.sum()
351
nonEmptyC.softerProfs.sum()
429
nonEmptyC.loc[(nonEmptyC.includedProf == True) & (nonEmptyC.softerProfs == True)].shape
(104, 30)
round(104/351, 2)
0.3
# Generating datapoints to use for scatter visualization
profs = np.append(np.ones(shape=104), np.zeros(shape=(351-104)))
np.random.shuffle(profs)
np.savetxt("softProf2.csv", profs, delimiter=",")
flexList = ['save', 'saving', 'saves', 'commute', 'commuting', 'available', 'availability', 'conflict', 'freedom', 'manage', 'managing', 'flexibility', 'flexible', 'responsiveness', 'responsive', 'accommodation', 'able', 'own', 'allow', 'allowed', 'allows', 'plan', 'ahead', 'comfort', 'prepare', 'ability', 'choose', 'sick', 'parent', 'parents', 'balance', 'work', 'convenient', 'opens', 'leniency']
featuresList = ['S/NS', 'accommodation', 'grades', 'grade', 'grading', 'lecture', 'presentation', 'professors', 'professor', 'participation', 'record', 'recorded', 'pre-recorded', 'discussion', 'participant', 'powerpoint', 'canvas', 'extensions', 'understanding', 'open book', 'exams', 'exam', 'finals', 'midterms', 'final', 'midterm', 'take-home', 'note', 'test', 'policies', 'office hours', 'office hour', 'online resources', 'zoom', 'online communities', 'attendance', 'remote', 'group projects']
accessList = ['accessible', 'access', 'accessibility', 'option', 'accommodation', 'immunocompromised', 'disabled', 'disability', 'disabilities', 'struggles', 'help', 'anxious', 'difficult', 'subtitles', 'feasible', 'international', 'choice', 'concern', 'safe', 'resources', 'services', 'equipment', 'need', 'possible']
negsList = ['lack', 'not helpful', 'suffer', 'suffered', 'little', 'degraded', 'hate', 'uncomfortable', 'terrible', 'failed', 'fail', 'failing', 'hard to', 'negative', 'anxiety', 'procrastination', 'barely', 'not getting', 'joke', 'lost', 'nothing', 'none', 'n/a', 'back']
nonEmptyC["flexCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, flexList), axis=1)
nonEmptyC["featuresCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, featuresList), axis=1)
nonEmptyC["accessCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, accessList), axis=1)
nonEmptyC["negCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, negsList), axis=1)
# all responses
nonEmptyC.shape
(3273, 37)
nonEmptyC[["flexCt", "featuresCt", "accessCt", "negCt"]].sum()
flexCt 1589 featuresCt 2382 accessCt 1488 negCt 527 dtype: int64
nonEmptyC.groupby("campus")[["flexCt", "featuresCt", "accessCt", "negCt"]].sum()
flexCt | featuresCt | accessCt | negCt | |
---|---|---|---|---|
campus | ||||
Bothell | 379 | 569 | 372 | 122 |
Seattle | 1050 | 1619 | 976 | 361 |
Tacoma | 158 | 193 | 139 | 44 |
shortFlex = ['flexibil', 'asynchronous', 'hybrid']
shortAccess = ['record', 'access', 'commut']
shortFeatures = ['quiz', 'assignment', 'exam']
# Course Guidelines - open_note, take_home
def addCt(dictionary):
for i in dictionary:
name = i + "_ct"
nonEmptyC[name] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, [i]), axis=1)
addCt(shortFlex)
addCt(shortAccess)
addCt(shortFeatures)
nonEmptyC[["flexibil_ct", "asynchronous_ct", "hybrid_ct"]].sum()
flexibil_ct 308 asynchronous_ct 341 hybrid_ct 140 dtype: int64
campusFlex = nonEmptyC.groupby("campus")[["flexibil_ct", "asynchronous_ct", "hybrid_ct"]].sum()
campusFlex
flexibil_ct | asynchronous_ct | hybrid_ct | |
---|---|---|---|
campus | |||
Bothell | 62 | 61 | 66 |
Seattle | 221 | 250 | 64 |
Tacoma | 25 | 30 | 9 |
campusFlex["totals"] = campusFlex.flexibil_ct + campusFlex.asynchronous_ct + campusFlex.hybrid_ct
campusFlex.totals.sum()
788
campusTotals.T
campus Bothell 864.0 Seattle 2110.0 Tacoma 297.0 Name: ones, dtype: float64
round(campusFlex / 788, 2)
flexibil_ct | asynchronous_ct | hybrid_ct | totals | |
---|---|---|---|---|
campus | ||||
Bothell | 0.08 | 0.08 | 0.08 | 0.24 |
Seattle | 0.28 | 0.32 | 0.08 | 0.68 |
Tacoma | 0.03 | 0.04 | 0.01 | 0.08 |
campusFlex["campusTotal"] = campusTotals
campusFlex.iloc[0] / campusTotals[0]
flexibil_ct 0.071759 asynchronous_ct 0.070602 hybrid_ct 0.076389 totals 0.218750 campusTotal 1.000000 Name: Bothell, dtype: float64
campusFlex.iloc[1] / campusTotals[1]
flexibil_ct 0.104739 asynchronous_ct 0.118483 hybrid_ct 0.030332 totals 0.253555 campusTotal 1.000000 Name: Seattle, dtype: float64
campusFlex.iloc[2] / campusTotals[2]
flexibil_ct 0.084175 asynchronous_ct 0.101010 hybrid_ct 0.030303 totals 0.215488 campusTotal 1.000000 Name: Tacoma, dtype: float64
normalizedFlex
flexibil_ct 0.0717593 asynchronous_ct 0.0706019 hybrid_ct 0.0763889 totals 0.21875 bothell flexibil_ct 0.071759 asynchronous_ct ... seattle flexibil_ct 0.104739 asynchronous_ct ... tacoma flexibil_ct 0.084175 asynchronous_ct ... Name: Bothell, dtype: object
nonEmptyC[["quiz_ct", "exam_ct", "assignment_ct"]].sum()
quiz_ct 82 exam_ct 201 assignment_ct 168 dtype: int64
campusFeatures = nonEmptyC.groupby("campus")[["quiz_ct", "exam_ct", "assignment_ct"]].sum()
campusFeatures["cTotal"] = campusTotals
campusFeatures
quiz_ct | exam_ct | assignment_ct | cTotal | |
---|---|---|---|---|
campus | ||||
Bothell | 10 | 38 | 39 | 864.0 |
Seattle | 67 | 153 | 112 | 2110.0 |
Tacoma | 4 | 10 | 17 | 297.0 |
campusFeatures["normQuiz"] = campusFeatures.quiz_ct / campusFeatures.cTotal
campusFeatures["normAss"] = campusFeatures.assignment_ct / campusFeatures.cTotal
campusFeatures["normExam"] = campusFeatures.exam_ct / campusFeatures.cTotal
campusFeatures[["normQuiz", "normAss", "normExam"]]
normQuiz | normAss | normExam | |
---|---|---|---|
campus | |||
Bothell | 0.011574 | 0.045139 | 0.043981 |
Seattle | 0.031754 | 0.053081 | 0.072512 |
Tacoma | 0.013468 | 0.057239 | 0.033670 |
campusTotals
campus Bothell 864.0 Seattle 2110.0 Tacoma 297.0 Name: ones, dtype: float64
nonEmptyC[["record_ct", "access_ct", "commut_ct"]].sum()
record_ct 938 access_ct 289 commut_ct 144 dtype: int64
campusAccess = nonEmptyC.groupby("campus")[["record_ct", "access_ct", "commut_ct"]].sum()
campusAccess
record_ct | access_ct | commut_ct | |
---|---|---|---|
campus | |||
Bothell | 161 | 43 | 43 |
Seattle | 720 | 229 | 84 |
Tacoma | 57 | 16 | 17 |
campusAccess["cTotal"] = campusTotals
campusAccess["recordNorm"] = campusAccess.record_ct / campusAccess.cTotal
campusAccess["accessNorm"] = campusAccess.access_ct / campusAccess.cTotal
campusAccess["commutdNorm"] = campusAccess.commut_ct / campusAccess.cTotal
campusAccess[["commutdNorm", "recordNorm", "accessNorm"]]
commutdNorm | recordNorm | accessNorm | |
---|---|---|---|
campus | |||
Bothell | 0.049769 | 0.186343 | 0.049769 |
Seattle | 0.039810 | 0.341232 | 0.108531 |
Tacoma | 0.057239 | 0.191919 | 0.053872 |
nonEmptyC["sFlexCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, shortFlex), axis=1)
nonEmptyC["sFeaturesCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, shortAccess), axis=1)
nonEmptyC["sAccessCt"] = nonEmptyC.apply(lambda x: containsSynonym(x.continued, shortFeatures), axis=1)
nonEmptyC[["sFlexCt", "sFeaturesCt", "sAccessCt"]].sum()
sFlexCt 717 sFeaturesCt 1202 sAccessCt 394 dtype: int64
nonEmptyC.groupby("campus")[["sFlexCt", "sFeaturesCt", "sAccessCt"]].sum()
sFlexCt | sFeaturesCt | sAccessCt | |
---|---|---|---|
campus | |||
Bothell | 179 | 224 | 76 |
Seattle | 479 | 894 | 289 |
Tacoma | 58 | 83 | 28 |
### importing nltk stopwords broken; hardcoded temporarily
stopWords = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
wordCount = pd.DataFrame(columns = ["word", "ct", "posCt", "negCt", "ntCt"])
## Sweep through text and add frequency of single and double_words to appropriate dictionaries (overall counts and pos/neg counts)
def countWords(text, dictionary, exp):
# Choose appropriate experience group
headerDict = ["negCt", "ntCt", "posCt"]
ctHeader = headerDict[exp + 1]
# Tokenize Text
words = text.split(" ")
prev_word = ""
global wordCount
# Add each token to the frequency dictionaries
for curWord in words:
curWord = curWord.lower().strip('!?(.,\')')
# Add Single Token
if ((wordCount.loc[wordCount.word == curWord].shape[0] == 0) and (not (curWord in stopWords))): # Append new-showing words
wordCount = wordCount.append({'word': curWord, 'ct': 0, 'posCt': 0, 'negCt':0, 'ntCt': 0}, ignore_index=True)
wordCount.loc[wordCount.word == curWord, [dictionary, ctHeader]] += 1 # Increase Both Counts
# Add Double_Tokens
if ((prev_word != "") and (not ((prev_word in stopWords) and (curWord in stopWords)))):
concat = prev_word + "_" + curWord
if (wordCount.loc[wordCount.word == concat].shape[0] == 0): # add new entry if new word
wordCount = wordCount.append({'word': concat, 'ct': 0, 'posCt': 0, 'negCt':0, 'ntCt': 0}, ignore_index=True)
wordCount.loc[wordCount.word == concat, [dictionary, ctHeader]] += 1
prev_word = curWord # Reset for next itertion
nonEmptyC.apply(lambda x: countWords(x.continued, "ct", x.experienceInt), axis=1)
0 None 1 None 3 None 4 None 5 None ... 3764 None 3765 None 3766 None 3767 None 3768 None Length: 3273, dtype: object
wordCount.head()
word | ct | posCt | negCt | ntCt | |
---|---|---|---|---|---|
0 | professors | 338 | 68 | 52 | 218 |
1 | my_professors | 20 | 2 | 4 | 14 |
2 | professors_have | 25 | 4 | 2 | 19 |
3 | done | 46 | 10 | 12 | 24 |
4 | have_done | 10 | 0 | 3 | 7 |
nonEmptyC["ones"] = np.ones(shape=nonEmptyC.shape[0])
responses = nonEmptyC.groupby("experienceInt").ones.sum()
responses
experienceInt -1 502.0 0 1840.0 1 931.0 Name: ones, dtype: float64
responseRatio = responses[-1]/responses[1]
responseRatio
0.5392051557465092
wordCount.loc[wordCount.posCt == 0, "posCt"] = 1
wordCount["negRatio"] = wordCount.negCt / (wordCount.posCt * responseRatio)
wordCount.sort_values(by="negRatio", ascending=False).head(10)
word | ct | posCt | negCt | ntCt | negRatio | |
---|---|---|---|---|---|---|
733 | none | 134 | 1 | 108 | 25 | 200.295 |
6334 | none_of | 30 | 1 | 28 | 2 | 51.9283 |
413 | nothing | 57 | 4 | 35 | 18 | 16.2276 |
1985 | none_i | 10 | 1 | 8 | 2 | 14.8367 |
8135 | almost | 21 | 2 | 14 | 5 | 12.9821 |
14008 | struggle | 9 | 1 | 7 | 1 | 12.9821 |
1993 | tuition | 22 | 2 | 13 | 7 | 12.0548 |
4010 | paying_full | 6 | 1 | 6 | 0 | 11.1275 |
4011 | full_tuition | 6 | 1 | 6 | 0 | 11.1275 |
3790 | experience_i | 10 | 1 | 6 | 3 | 11.1275 |
math.log(2)
0.6931471805599453
wordCount["logNegRatio"] = np.log(wordCount.negRatio.astype(float))
wordCount.sort_values(by="logNegRatio", ascending=False)
word | ct | posCt | negCt | ntCt | negRatio | logNegRatio | |
---|---|---|---|---|---|---|---|
733 | none | 134 | 1 | 108 | 25 | 200.295 | 5.299790 |
6334 | none_of | 30 | 1 | 28 | 2 | 51.9283 | 3.949864 |
413 | nothing | 57 | 4 | 35 | 18 | 16.2276 | 2.786713 |
1985 | none_i | 10 | 1 | 8 | 2 | 14.8367 | 2.697101 |
8135 | almost | 21 | 2 | 14 | 5 | 12.9821 | 2.563569 |
... | ... | ... | ... | ... | ... | ... | ... |
15759 | this_continues | 3 | 1 | 0 | 3 | 0 | -inf |
15758 | and_efficient | 3 | 1 | 0 | 2 | 0 | -inf |
15757 | they've_been | 2 | 1 | 0 | 2 | 0 | -inf |
15756 | hours_they've | 1 | 1 | 0 | 1 | 0 | -inf |
38952 | meetings_will | 1 | 1 | 0 | 0 | 0 | -inf |
38953 rows × 7 columns
wordCount.drop(wordCount.loc[wordCount.word == "none_i"].index, inplace=True)
freqDict = pd.Series(wordCount["ct"].values,index=wordCount.word).to_dict()
posDict = pd.Series(wordCount["posCt"].values,index=wordCount.word).to_dict()
negDict = pd.Series(wordCount["negCt"].values,index=wordCount.word).to_dict()
ratioDict = pd.Series(wordCount["logNegRatio"].values, index=wordCount.word).to_dict()
len(freqDict)
38952
from wordcloud import WordCloud
def showWordCloud(frequencies, clrmap, save="na"):
wordcloud = WordCloud(width = 800, height = 800,
background_color ='white',
colormap=clrmap,
min_font_size = 10).generate_from_frequencies(frequencies)
# plot the WordCloud image
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
if (save != "na"):
plt.savefig(save)
plt.show()
showWordCloud(freqDict, singleShadePurp, "singleShades/inclusive.png")
showWordCloud(freqDict, newcmp, "inclPur2.png")
showWordCloud(posDict, "ocean")
showWordCloud(negDict, "hot")
huge = ["class", "person", "would"]
for x in huge:
del posDict[x]
del negDict[x]
print(len(posDict))
38942
showWordCloud(posDict, singleShadePurp, "singleShades/positives.png")
showWordCloud(posDict, newcmp, "finalPos.png")
showWordCloud(negDict, "hot", "shortenedNeg.png")
del ratioDict["experience_i"]
from matplotlib import cm
from matplotlib.colors import ListedColormap,LinearSegmentedColormap
# modified hsv in 256 color class
purples = cm.get_cmap('Purples', 256)
# create new hsv colormaps with custom range
darkPruples = ListedColormap(purples(np.linspace(0.4, 1, 256)))
singleShadePurp = ListedColormap(purples(np.linspace(0.85, 0.95, 256)))
posPurp = ListedColormap(purples(np.linspace(0.4, 0.8, 256)))
bupu = cm.get_cmap('twilight_shifted', 256)
negPurp = ListedColormap(bupu(np.linspace(0.04, 0.14, 256)))
grayc = cm.get_cmap('gray', 256)
greys = ListedColormap(grayc(np.linspace(0.2, 0.5, 256)))
singleShadeGrey = ListedColormap(grayc(np.linspace(0.3, 0.4, 256)))
showWordCloud(ratioDict, singleShadeGrey, "singleShades/negatives.png")
showWordCloud(ratioDict, greys, "greyNegs2.png")