- download pass winning numbers:
import pandas as pd
import matplotlib.pyplot as plt
url="https://nclottery.com/powerball-download"
df1=pd.read_csv(url)
print(df.columns)
- drop 'Date', 'Jackpot' and 'Power play' columns:
df = df1.drop(['Date', 'Jackpot', 'Power Play'], axis=1)
- drop missing values:
# replace "?" to NaN
df.replace("?", np.nan, inplace = True)
# drop rows with missing data
df = df.dropna(axis=0)
- combine all columns except 'Powerball' into one:
columns_to_combine=[df.iloc[:, 0], df.iloc[:, 1], df.iloc[:, 2], df.iloc[:, 3], df.iloc[:, 4]]
df2 = pd.DataFrame(pd.concat(columns_to_combine, axis=0))
- count the frequency of numbers:
- count the frequency of powerball numbers:
df['Powerball'].value_counts()
- create a boxplot for the numbers:
plt.boxplot(df2)
plt.show()
- create a boxplot for the powerball numbers:
df3 = pd.DataFrame(df['Powerball'])
plt.boxplot(df3)
plt.show
- drop 'Powerball' from df:
num=df.drop(['Powerball'], axis=1)
df4 = pd.DataFrame(num)
df4.columns
- find out the most common two number pairs:
from itertools import combinations
from collections import Counter
# Generate all possible 2-number combinations from each row and count their frequency
pair_combinations = []
for index, row in df4.iterrows():
numbers = [row['Number 1'], row['Number 2'], row['Number 3'], row['Number 4'], row['Number 5']]
# Generate all 2-combinations of numbers for the current row
for combo in combinations(numbers, 2):
# Sort the combo to ensure that (1, 2) and (2, 1) are counted as the same combination
pair_combinations.append(tuple(sorted(combo)))
# Count the frequency of each combination
pair_combination_counts = Counter(pair_combinations)
# Get the 5 most common 2-number combinations
most_common_pairs = pair_combination_counts.most_common(15)
# Display the 5 most common 2-number combinations and their frequencies
most_common_pairs
- find out the most frequent three number pairs:
from itertools import combinations
from collections import Counter
# Generate all possible 2-number combinations from each row and count their frequency
pair_combinations = []
for index, row in df4.iterrows():
numbers = [row['Number 1'], row['Number 2'], row['Number 3'], row['Number 4'], row['Number 5']]
# Generate all 2-combinations of numbers for the current row
for combo in combinations(numbers, 3):
# Sort the combo to ensure that (1, 2) and (2, 1) are counted as the same combination
pair_combinations.append(tuple(sorted(combo)))
# Count the frequency of each combination
pair_combination_counts = Counter(pair_combinations)
# Get the 5 most common 2-number combinations
most_common_pairs = pair_combination_counts.most_common(15)
# Display the 5 most common 2-number combinations and their frequencies
most_common_pairs
- find out the most frequent four number pairs:
from itertools import combinations
from collections import Counter
# Generate all possible 2-number combinations from each row and count their frequency
pair_combinations = []
for index, row in df4.iterrows():
numbers = [row['Number 1'], row['Number 2'], row['Number 3'], row['Number 4'], row['Number 5']]
# Generate all 2-combinations of numbers for the current row
for combo in combinations(numbers, 4):
# Sort the combo to ensure that (1, 2) and (2, 1) are counted as the same combination
pair_combinations.append(tuple(sorted(combo)))
# Count the frequency of each combination
pair_combination_counts = Counter(pair_combinations)
# Get the 5 most common 2-number combinations
most_common_pairs = pair_combination_counts.most_common(15)
# Display the 5 most common 2-number combinations and their frequencies
most_common_pairs