Experiment 1
Explore ML libraries

Explore popular machine learning libraries in Python, including Pandas, NumPy, Matplotlib, Scikit-Learn, TensorFlow, and PyTorch.
"""
## Pandas
"""

import pandas as pd

# Creating a dataframe
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35]}
df = pd.DataFrame(data)

print(df.head())

# select column
ages = df['Age']

# select mult cols
subset = df[['Name', 'Age']]

print(ages)
print(subset)

data_with_nan = {
    'Name': ['Alice', 'Bob', None, 'David'],
    'Age': [25, None, 35, 40]
}
df_nan = pd.DataFrame(data_with_nan)

print("\nDataFrame with missing values:")
print(df_nan)

# Filling missing values
df_nan.fillna({'Name': 'Unknown', 'Age': df_nan['Age'].mean()}, inplace=True)
print("\nDataFrame after filling missing values:")
print(df_nan)

"""## NumPy"""

import numpy as np

array_1d = np.array([1, 2, 3, 4, 5])
print("1D Array:")
print(array_1d)

array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("\n2D Array:")
print(array_2d)

print("\nShape of 2D Array:", array_2d.shape)
print("Size of Array:", array_2d.size)
print("Data Type of Array:", array_2d.dtype)

# Two arrays for arithmetic operations
arr1 = np.array([10, 20, 30])
arr2 = np.array([5, 10, 15])

# Addition
add_result = np.add(arr1, arr2)
print("\nAddition Result:", add_result)

# Subtraction
sub_result = np.subtract(arr1, arr2)
print("Subtraction Result:", sub_result)

# Multiplication
mul_result = np.multiply(arr1, arr2)
print("Multiplication Result:", mul_result)

# Division
div_result = np.divide(arr1, arr2)
print("Division Result:", div_result)

# Reshaping a 1D array into a 2D array
reshaped_array = np.arange(12).reshape(3, 4)
print("\nReshaped Array (3x4):")
print(reshaped_array)

"""## Matplotlib"""

import matplotlib.pyplot as plt
import numpy as np

# Data for plotting
x = np.array([1, 2, 3, 4, 5])
y = np.array([23,333, 335, 457, 311])

# Creating the line plot
plt.plot(x, y)
plt.title("Basic Line Plot")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.grid(True)
plt.show()

# Data for bar chart
categories = ['A', 'B', 'C', 'D']
values = [4, 7, 11, 8]

# Creating the bar chart
plt.bar(categories, values)
plt.title("Bar Chart")
plt.xlabel("Categories")
plt.ylabel("Values")
plt.show()

# Data for histogram
data = np.random.randn(1000)

# Creating the histogram
plt.hist(data, bins=30, alpha=0.7)
plt.title("Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.grid(axis='y')
plt.show()

# Data for scatter plot
x = np.random.rand(50)
y = np.random.rand(50)

# Creating the scatter plot
plt.scatter(x, y)
plt.title("Scatter Plot")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.grid(True)
plt.show()

# Data for pie chart
sizes = [15, 30, 45, 10]
labels = ['A', 'B', 'C', 'D']

# Creating the pie chart
plt.pie(sizes, labels=labels, autopct='%1.1f%%')
plt.title("Pie Chart")
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()

"""## Tensorflow"""

import tensorflow as tf

# Use compat.v1 to access Session
with tf.compat.v1.Session() as sess:
    # Your operations here
    result = sess.run(tf.add(5, 3))
    print("Result:", result)

# Creating a tensor filled with zeros
zeros_tensor = tf.zeros((2, 3))
print("\nZeros Tensor:")
print(zeros_tensor)

# Creating a tensor filled with random values
random_tensor = tf.random.uniform((2, 3), minval=0, maxval=10)
print("\nRandom Tensor:")
print(random_tensor)

constant_tensor = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
print("Constant Tensor:")
print(constant_tensor)

tensor_a = tf.constant([[1, 2], [3, 4]])
tensor_b = tf.constant([[5, 6], [7, 8]])

# Addition
add_result = tf.add(tensor_a, tensor_b)
print("\nAddition Result:")
print(add_result)

"""## Scikit learn"""

from sklearn import datasets

# Load the Iris dataset
iris = datasets.load_iris()

# Display the features and target
print("Features:", iris.data)
print("Target:", iris.target)

from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)

from sklearn.svm import SVC

# Create an SVM classifier
clf = SVC(kernel='linear')

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

print("Predictions:", predictions)

from sklearn.metrics import accuracy_score, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n", conf_matrix)

from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}

# Create a GridSearchCV object
grid_search = GridSearchCV(SVC(), param_grid, cv=5)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

# Best parameters found
print("Best Parameters:", grid_search.best_params_)

"""### **1. Pandas**

#### **Advantages:**
- **Efficient Data Manipulation**: Allows for quick data manipulation with less code compared to traditional programming languages.
- **Flexible Data Structures**: Provides DataFrames and Series that can handle various data types and structures.
- **Data Cleaning and Preparation**: Streamlines processes for cleaning and preparing data for analysis.
- **Integration with Other Libraries**: Works well with NumPy, Matplotlib, and SciPy for enhanced functionality.

#### **Disadvantages:**
- **Memory Consumption**: Can be inefficient with very large datasets (over 2-3 GB).
- **Complex Syntax**: Some users may find the syntax complex compared to standard Python.
- **Performance Limitations**: Slower than lower-level languages like C or C++ for certain operations.

#### **Applications:**
- Data cleaning and preparation
- Exploratory Data Analysis (EDA)
- Feature engineering for machine learning
- Time series analysis
- Data visualization (in conjunction with Matplotlib)

---

### **2. NumPy**

#### **Advantages:**
- **Performance**: Highly efficient for numerical computations due to its array-oriented approach.
- **Multidimensional Arrays**: Supports n-dimensional arrays which are essential for scientific computing.
- **Broad Functionality**: Offers a wide range of mathematical functions to operate on arrays.

#### **Disadvantages:**
- **Limited Data Types**: Primarily designed for numerical data; less effective for handling non-numeric data.
- **Learning Curve**: May require some time to understand its array manipulation capabilities fully.

#### **Applications:**
- Numerical simulations
- Linear algebra operations
- Fourier transforms
- Random number generation
- Data preprocessing for machine learning

---

### **3. Matplotlib**

#### **Advantages:**
- **Comprehensive Visualization Options**: Supports a wide variety of static, animated, and interactive plots.
- **Customization**: Highly customizable plots with extensive options for styling.
- **Integration**: Works seamlessly with NumPy and Pandas.

#### **Disadvantages:**
- **Complexity in Advanced Visualizations**: Creating complex visualizations can require verbose code.
- **Performance Issues**: May not perform well with very large datasets or real-time plotting.

#### **Applications:**
- Data visualization in EDA
- Creating publication-quality plots
- Interactive visualizations in Jupyter notebooks
- Plotting results from machine learning models

---

### **4. Scikit-Learn**

#### **Advantages:**
- **User-Friendly API**: Simplifies the process of implementing machine learning algorithms.
- **Wide Range of Algorithms**: Provides numerous algorithms for classification, regression, clustering, and more.
- **Built-in Cross-validation Tools**: Facilitates model evaluation through cross-validation techniques.

#### **Disadvantages:**
- **Limited to Classical ML Algorithms**: Does not support deep learning models natively.
- **Performance on Large Datasets**: May struggle with very large datasets compared to specialized libraries like TensorFlow.

#### **Applications:**
- Classification tasks (e.g., spam detection)
- Regression analysis (e.g., predicting house prices)
- Clustering (e.g., customer segmentation)
- Model evaluation and selection

---

### **5. TensorFlow**

#### **Advantages:**
- **Scalability**: Designed to scale across multiple CPUs and GPUs, making it suitable for large-scale machine learning tasks.
- **Flexibility**: Offers both high-level APIs (like Keras) for quick model building and low-level APIs for custom model design.
- **Ecosystem Support**: Integrates well with other tools in the TensorFlow ecosystem for deployment and serving models.

#### **Disadvantages:**
- **Steep Learning Curve**: Can be complex for beginners due to its extensive features and options.
- **Verbose Syntax**: Requires more lines of code compared to simpler libraries like Keras or Scikit-Learn.

#### **Applications:**
- Deep learning applications (e.g., image recognition, natural language processing)
- Reinforcement learning
- Time series forecasting
- Generative models (e.g., GANs)

"""

import seaborn as sns

sns.set_theme()

# Load an example dataset
tips = sns.load_dataset("tips")

# Create a visualization
sns.relplot(
    data=tips,
    x="total_bill", y="tip", col="time",
    hue="smoker", style="smoker", size="size",
)

fmri = sns.load_dataset("fmri")
sns.relplot(
    data=fmri, kind="line",
    x="timepoint", y="signal", col="region",
    hue="event", style="event",
)

sns.displot(data=tips, x="total_bill", col="time", kde=True)

penguins = sns.load_dataset("penguins")
sns.jointplot(data=penguins, x="flipper_length_mm", y="bill_length_mm", hue="species")

sns.pairplot(data=penguins, hue="species")

sns.relplot(
    data=penguins,
    x="bill_length_mm", y="bill_depth_mm", hue="body_mass_g"
)

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="dark")

# Simulate data from a bivariate Gaussian
n = 10000
mean = [0, 0]
cov = [(2, .4), (.4, .2)]
rng = np.random.RandomState(0)
x, y = rng.multivariate_normal(mean, cov, n).T

# Draw a combo histogram and scatterplot with density contours
f, ax = plt.subplots(figsize=(6, 6))
sns.scatterplot(x=x, y=y, s=5, color=".15")
sns.histplot(x=x, y=y, bins=50, pthresh=.1, cmap="mako")
sns.kdeplot(x=x, y=y, levels=5, color="w", linewidths=1)

"""## Pytorch"""

import torch
import numpy as np

# Create a tensor from a NumPy array
ndarray = np.array([0, 1, 2])
tensor = torch.from_numpy(ndarray)

print(tensor)  # Outputs: tensor([0, 1, 2])
print(tensor.shape)  # Outputs: torch.Size([3])
print(tensor.dtype)   # Outputs: torch.int64
print(tensor.device)  # Outputs: cpu

# Create a 3x2 tensor
tensor_a = torch.tensor([[1, 2], [3, 4], [5, 6]])

# Indexing
element = tensor_a[1, 0]
print(f"Indexed Element (Row 1, Column 0): {element}")  # Outputs: 3

# Slicing
slice_tensor = tensor_a[:2, :]
print(f"Sliced Tensor (First two rows): \n{slice_tensor}")

# Reshaping
reshaped_tensor = tensor_a.view(2, 3)
print(f"Reshaped Tensor (2x3): \n{reshaped_tensor}")

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

# Set up the figure and axis
fig, ax = plt.subplots()
x = np.linspace(0, 2 * np.pi, 100)  # x values from 0 to 2π
line, = ax.plot(x, np.sin(x), color='blue')  # Initial sine wave

# Set the limits of the axes
ax.set_ylim(-1.5, 1.5)
ax.set_xlim(0, 2 * np.pi)
ax.set_title('Animated Sine Wave')
ax.set_xlabel('x')
ax.set_ylabel('sin(x)')

# Animation function that updates the y data of the line
def animate(i):
    line.set_ydata(np.sin(x + i / 10))  # Update the y data for the sine wave
    return line,

# Create an animation
ani = FuncAnimation(fig, animate, frames=100, interval=50)

# Display the animation
plt.show()