Password_Generator_Analysis/password_generator.py at master · patricknormile/Password_Generator_Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288

# In[]:
"""

I created a password generator that generates a random password that has several features:
    8 characters
    first character always a letter
    uses uppercase and lowercase letters
    no special characters
    at least one number
    no repeat characters (same case)
    no character used more than twice
    when typed, will use at least 3 key strokes from each hand

The generator tests a proposed password, and accepts if it meets all the above criteria. otherwise it will try again

I want to see how often the generator can't generate a password on the first try, and what is the distribution

"""


import numpy as np
import matplotlib.pyplot as plt
import string
import pandas as pd
import collections
import itertools as it


class Setup():
    """
    Create lookup key for random number letter value/LR of keyboard
    """
    def __init__(self):
        A = list(string.ascii_lowercase)
        B = [str(i) for i in range(10)]
        C = list(string.ascii_uppercase)
        A.extend(B)
        A.extend(C)
        # A is a list of all lowercase, uppercase, and single digit characters
        vals = []
        # Ll and LL are letters you would hit with a left finger while typing
        # LN are the digits you would hit with a left finger while typing
        Ll = ['a','b','c','d','e','f','g','q','r','s','t','w','x','z']
        LL = [x.upper() for x in Ll]
        LN = ['1','2','3','4']
        for i in A:
            # construct the lookup dictionary that assigns each lc/UC/int to a key and identifies if it would be typed with left or right hand
            if i in Ll or i in LL or i in LN:
                vals.append([i,'L'])
            else:
                vals.append([i,'R'])
        self.vals = vals

        self.keys = [*range(len(self.vals))]
        self.lookup = dict(zip(self.keys, self.vals))
        #end of Setup class


# In[]:
"""
this class generates a password and lets you know how many tries it took, and which criteria weren't met when it failed
"""


class Pwd(Setup):
    """
    This class generates random passwords
    Each instance inherits the Setup class and a class attribute is assigned to the lookup created in Setup
    """
    lookup = Setup().lookup

    def __init__(self):
        pass
    def generate(self):
        i = 0
        end = False
        n = len(self.lookup.keys())
        smpl = [*range(n)]
        self.failreason = []
        self.fails = []
        #get a set of 8 random numbers to test a password
        while end != True and i < 100:

            i += 1
            #generate potential password
            test = np.random.choice(smpl, 8)
            test_pwd = ''.join([list(self.lookup.values())[x][0] for x in test])

            # Create tests to see if password will work

            LR = [list(self.lookup.values())[x][1] for x in test]
            n_lc = len([x for x in test if x < 26])
            n_UC = len([x for x in test if x >= 36])
            n_N = len([x for x in test if x >= 26 and x < 36])

            #check if you don't use the same character too many times
            ltest = [*test]
            freqval = max(set(ltest), key=ltest.count)
            freqvalfreq = len([x for x in test if x == freqval])
            if freqvalfreq < 3:
                valfreq = 1
            else:
                valfreq = 0

            #check if starting char is a letter
            strt = test[0]
            if strt < 26 or strt >= 36:
                strt_cond = 1
            else:
                strt_cond = 0

            #check if you use Left and Right hand to type the password enough times
            n_minLR = min([LR.count('L'), LR.count('R')])
            if n_minLR < 3:
                minLR = 0
            else:
                minLR = 1

            #check if consecutive character shows up multiple times
            same = [test[i] == test[i-1] for i in range(1,8)]
            if same.count(True) > 0 :
                rep_check = 0
            else:
                rep_check = 1

            #combine all checks
            all_check = [n_lc, n_UC, n_N, valfreq, strt_cond, minLR, rep_check]
            if min(all_check) > 0 :
                end = True
                self.password = test_pwd
                self.ntries = i
            else :
                """
                note:
                0 - no lowercase
                1 - no UPPERCASE
                2 - no numbers
                3 - char appears too often
                4 - starts with number
                5 - not enough L/R
                6 - repeated chars
                """
                self.password = "Failed to generate password" #if exceeds 100 (highly unlikely)
                self.ntries = i
                self.fails.append(test_pwd)
                #keeps track if multiple criteria were not met
                fr = ','.join([str(i) for i,x in enumerate(all_check) if x == 0])
                self.failreason.append(fr)


# See examples:
A = Pwd()
A.lookup
A.generate()
print(A.password)
print(A.ntries)
print(A.failreason)
print(A.fails)

# In[]:
"""

See distribution of attempts needed to make a password

"""
n = 50000
A = Pwd()
cmpl_dist = {}
for i in range(n):
    A.generate()
    cmpl_dist.update({i : [A.ntries, A.failreason]})

# In[]:
df = pd.DataFrame.from_dict(data = cmpl_dist, orient='index', columns=['Tries', 'Fail Reason'])

print(max(df['Tries']))

print(df.groupby('Tries')['Tries'].agg('count'))
lb = min(df['Tries'])
ub = max(df['Tries'])
#center histogram around integers
bucks = np.arange(lb-0.5, ub+0.5, 1)
plt.figure(figsize=(10,7))
plt.hist(data = df, x = 'Tries', bins = bucks, density=True)
plt.title("No. tries until success")
plt.show()

# In[]:
#looks very much like geometric distribution
plt.figure(figsize=(10,7))
plt.hist(data = df, x = 'Tries', bins = bucks, density=True, log=True)
plt.title("No. tries until success (log)")
plt.show()

dist = df.groupby('Tries')['Tries'].agg('count').div(df.shape[0])
dist
print(df['Tries'].mean())
#parameter estimate using MME
# add in parameter estimate to graphs
phat = n / (sum(df['Tries']))
print(phat)
# In[]:
print(dist)

# probability 2 or more tries
print(dist.loc[2:].sum())


# In[]:
"""
fail reasons:
0 - no lowercase
1 - no UPPERCASE
2 - no numbers
3 - char appears too often
4 - starts with number
5 - not enough L/R
6 - repeated chars
"""
# count up how many failures from each
F = df['Fail Reason']
packstr = ''
for i,v in F.items():
    if v != []:
        packstr = packstr  + ','.join(v) + ','
#packstr

# In[]:
#count up how many by fail reason (keep in mind, could have multiple failures per iteration)
upackstr = packstr[:-1].split(',')
print(len(upackstr))
errcnt = {}
for i in range(7):
    errcnt.update({i : upackstr.count(str(i))})
cnts = pd.DataFrame.from_dict(data = errcnt, orient = 'index', columns = ['Count'])
cnts.index = ['0 - no lowercase', '1 - no UPPERCASE', '2 - no numbers', '3 - char appears too often',
              '4 - starts with number', '5 - not enough L/R', '6 - repeated chars']
print(cnts.div(len(upackstr)))
_, ax = plt.subplots()
ax.pie(cnts.values.flatten(), labels = cnts.index, autopct='%1.1f%%')
ax.axis('equal')
plt.show()

# In[]:
ls = [*df['Fail Reason'].values]
mrgls = list(it.chain.from_iterable(ls))
print(len(mrgls))

inst = collections.Counter(mrgls)
inst.items()

# In[]:
df1 = pd.DataFrame.from_dict(dict(inst.items()), orient='index', columns = ['Count']).reset_index()

df1['no.errs'] = (df1['index'].str.len() + 1) / 2
df1['has0'] = df1['index'].str.contains('0')
df1['has1'] = df1['index'].str.contains('1')
df1['has2'] = df1['index'].str.contains('2')
df1['has3'] = df1['index'].str.contains('3')
df1['has4'] = df1['index'].str.contains('4')
df1['has5'] = df1['index'].str.contains('5')
df1['has6'] = df1['index'].str.contains('6')

df1.head()
# In[]:
# of instances with error code
rng = list(range(7))
cols = ['has' + str(x) for x in rng]
nm = ['0 - no lowercase', '1 - no UPPERCASE', '2 - no numbers', '3 - char appears too often',
              '4 - starts with number', '5 - not enough L/R', '6 - repeated chars']
for i in rng:
    col = cols[i]
    x = df1[[col,'Count']].groupby(by = col).agg('sum')['Count'] / df1['Count'].agg('sum')
    print(nm[i], x)

# In[]:
# see how this compares to expectation
# can see pretty similar
# need to divide out 1-phat since above is probability given failure

print(["No lcase: " ,(1-(26/(26+10+26)))**8 / (1-phat)])
print(["No UCASE: " ,(1-(26/(26+10+26)))**8 / (1-phat)])
print(["No numbers: " ,(1-(10/(26+10+26)))**8 / (1-phat)])