4 built-in data types

In Python, lists, sets, tuples, and dictionaries are used to store collections of data. Each has different properties and use cases.

list	an ordered, mutable (changeable) collection that allows duplicate values
set	an unordered, mutable collection that does not allow duplicates.
tuple	an ordered, immutable collection that allows duplicate values
dictionary	an unordered, mutable collection that stores key-value pairs.

list

# create a list
my_list = [1, 2, 3, "hello", 4.5]  # Mixed data types allowed

type(my_list) # list

dir(my_list) # .....
             # 'append',
             # 'clear',
             # 'copy',
             # 'count',
             # 'extend',
             # 'index',
             # 'insert',
             # 'pop',
             # 'remove',
             # 'reverse',
             # 'sort'

# accessing elements
x = [1, 4, 9, 16, 25]
print(x[0]) # 1
print(x[-1]) # 25
# indexing and slicing
print(x[1:3]) # [4, 9]
print(x[:3]) # [1, 4, 9]
print(x[3:]) # [16, 25]
print(x[-3:]) # [9, 16, 25]
print(x[:2]) # [1, 4]
print(x[::2]) # [1, 9, 25]
print(x[0::]) # [1, 4, 9, 16, 25]
print(x[::-1]) # [25, 16, 9, 4, 1]

# modify / update
x[0] = 0 # [0, 4, 9, 16, 25]

# add / append
x.append(36) # [0, 4, 9, 16, 25, 36]

# extend the list to the back
x.extend([40, 45]) # [0, 4, 9, 16, 25, 36, 40, 45]

or:

# extend the list to the back
x = x + [40, 45] # [0, 4, 9, 16, 25, 36, 40, 45]

# extend the list to the front
x = [1, 4, 9] + x # [1, 4, 9, 0, 4, 9, 16, 25, 36, 40, 45]

# remove by value
x.remove(0) # [1, 4, 9, 4, 9, 16, 25, 36, 40, 45]

# remove by index using "del"
del x[2] # [1, 4, 4, 9, 16, 25, 36, 40, 45]

# remove duplicate values
x = list(set(x)) # [1, 4, 36, 40, 9, 45, 16, 25]

# remove the last value
x.pop() # [1, 4, 36, 40, 9, 45, 16]

# remove two biggest values
x = sorted(x)[:-2] # [1, 4, 9, 16, 36]

# remove the first element
x.pop(0) # [4, 9, 16, 36]

#sort
x = [1, 9, 25, 16, 4]
print(sorted(x)) # [1, 4, 9, 16, 25]
print(sorted(x, reverse=True)) # [25, 16, 9, 4, 1]

# insert by index
x.insert(2, 7) # [1, 9, 7, 25, 16, 4]

# creat a list with the split method
fruits = 'apple,banana,pear'.split(',')

# use f-string to concatenate
for fruit in fruits:
  print(f'I like {fruit}s') # I like apples
                            # I like bananas
                            # I like pears

# duplicate data
basket = ['apple', 'orange', 'apple', 'pear', 'orange', 'banana']
basket.count('apple') # 2

# find duplicate data
from collections import Counter

counter = Counter(basket)

duplicates = {item: count for item, count in counter.items() if count > 1}

print(counter) # Counter({'apple': 2, 'orange': 2, 'pear': 1, 'banana': 1})
print(duplicates) # {'apple': 2, 'orange': 2}

# show indices of duplicate data
from collections import defaultdict

# Create a dictionary to store indices of each item
indices = defaultdict(list)

# Populate the dictionary with item indices
for index, item in enumerate(basket):
    indices[item].append(index)

# Filter out only the duplicates (items appearing more than once)
duplicates_with_indices = {item: idx_list for item, idx_list in indices.items() if len(idx_list) > 1}

print(duplicates_with_indices) # {'apple': [0, 2], 'orange': [1, 4]}

# sort and remove duplicate values
for i in sorted(basket):
    print(i) # apple
             # apple
             # banana
             # orange
             # orange
             # pear
for i in sorted(basket, reverse=True):
    print(i) # pear
             # orange
             # orange
             # banana
             # apple
             # apple
# remove duplicate values using set()
for i in sorted(set(basket)):
  print(i) # apple
           # banana
           # orange
           # pear
# save the sorted, unduplicated list to new_list
new_list = []
for i in sorted(set(basket)):
  new_list.append(i)
  print(new_list) # ['apple', 'banana', 'orange', 'pear']

or:

new_list = sorted(set(basket)) # ['apple', 'banana', 'orange', 'pear']

# squaring with "for" loop
y = [1, 2, 3]
for a in y:
  print(f'{a} square is {a**2}.') # 1 square is 1.
                                  # 2 square is 4.
                                  # 3 square is 9.

# squaring for even numbers
for a in y:
  if a % 2 == 0:
    print(f'{a} square is {a**2} for even numbers.') # 2 square is 4 for even numbers.

# squaring with odd numbers
for a in y:
  if a % 2 != 0:
    print(f'{a} square is {a**2} for odd numbers.') # 1 square is 1 for odd numbers.
                                                    # 3 square is 9 for odd numbers.

# squaring for both even and odd numbers
for a in y:
  if a % 2 == 0:
    print(f'{a} square is {a**2} for even numbers.')
  else:
    print(f'{a} square is {a**2} for odd numbers.') # 1 square is 1 for odd numbers.
                                                    # 2 square is 4 for even numbers.
                                                    # 3 square is 9 for odd numbers.

# squaring numbers created with "range"
for i in range(5):
  print(i*i) # 0
             # 1
             # 4
             # 9
             # 16

# make a list from a to g
letters = [chr(i) for i in range(ord('a'), ord('g') + 1)] # ['a', 'b', 'c', 'd', 'e', 'f', 'g']

# replace some values
letters[2:5] = ['C', 'D', 'E'] # ['a', 'b', 'C', 'D', 'E', 'f', 'g']

# remove some values
letters[2:5] = [] # ['a', 'b', 'f', 'g']
len(letters) # 4

# clear the list
letters = [] # []

# removing NaNs

import math

data = [10, float('nan'), 25, 30, math.nan, 40, None, "apple", True, 3.5]
cleaned_data = [x for x in data if not (isinstance(x, float) and math.isnan(x))]
print(cleaned_data) # [10, 25, 30, 40, None, 'apple', True, 3.5]

# isinstance(x, float): Checks if x is a float data type.
# math.isnan(x): Checks if x is "Not a Number" (NaN).

# removing NaNs and strings
cleaned_data = [x for x in data if isinstance(x, (int, float)) and not math.isnan(x)]
print(cleaned_data) # [10, 25, 30, 40, True, 3.5]

# isinstance(x, (int, float)): Ensures only numbers (int or float) are considered.
# not math.isnan(x): Removes NaN values.

# removing NaNs, strings and boolean values
cleaned_data = [x for x in data if isinstance(x, (int, float)) and not isinstance(x, bool) and not math.isnan(x)]
print(cleaned_data) # [10, 25, 30, 40, 3.5]

# not isinstance(x, bool): Excludes boolean values (True and False).