MySQL + Python for Data Analysts¶
Using MySQL Connector and Python to implement a database on MySQL Server, and to create, read, update and delete data in that database.
Introduction¶
The notebook goes through all the processes involved with using Python and the MySQL Connector to perform the standard CRUD functions on a database running on MySQL Server.
1. Import Libraries¶
1.1 - Import Libraries¶
import mysql.connector
from mysql.connector import Error
import pandas as pd
def create_server_connection(host_name, user_name, user_password):
connection = None
try:
connection = mysql.connector.connect(
host=host_name,
user=user_name,
passwd=user_password
)
print("MySQL Database connection successful")
except Error as err:
print(f"Error: '{err}'")
return connection
pw = "******" # IMPORTANT! Put your MySQL Terminal password here.
db = "school" # This is the name of the database we will create in the next step - call it whatever you like.
connection = create_server_connection("localhost", "root", pw)
MySQL Database connection successful
2.2 - Create a New Database¶
Now we define a function to create a new database on our server.
def create_database(connection, query):
cursor = connection.cursor()
try:
cursor.execute(query)
print("Database created successfully")
except Error as err:
print(f"Error: '{err}'")
create_database_query = "CREATE DATABASE school"
create_database(connection, create_database_query)
Database created successfully
2.3 - Modify Server Connection Function, Create Database Connection Function¶
Now that we've created a DB, let's modify our create_server_connection function to create a new function for connecting directly to that DB. This will prove more useful than just connecting to our server.
def create_db_connection(host_name, user_name, user_password, db_name):
connection = None
try:
connection = mysql.connector.connect(
host=host_name,
user=user_name,
passwd=user_password,
database=db_name
)
print("MySQL Database connection successful")
except Error as err:
print(f"Error: '{err}'")
return connection
2.4 - Define Query Execution Function¶
The final step of this stage is to create a function which will allow us to execute queries written in SQL.
def execute_query(connection, query):
cursor = connection.cursor()
try:
cursor.execute(query)
connection.commit()
print("Query successful")
except Error as err:
print(f"Error: '{err}'")
# Assign our SQL command to a python variable using triple quotes to create a multi-line string
create_teacher_table = """
CREATE TABLE teacher (
teacher_id INT PRIMARY KEY,
first_name VARCHAR(40) NOT NULL,
last_name VARCHAR(40) NOT NULL,
language_1 VARCHAR(3) NOT NULL,
language_2 VARCHAR(3),
dob DATE,
tax_id INT UNIQUE,
phone_no VARCHAR(20)
);
"""
connection = create_db_connection("localhost", "root", pw, db) # Connect to the Database
execute_query(connection, create_teacher_table) # Execute our defined query
MySQL Database connection successful Query successful
3.2 - Create Remaining Tables¶
Now let's create the rest of our tables.
create_client_table = """
CREATE TABLE client (
client_id INT PRIMARY KEY,
client_name VARCHAR(40) NOT NULL,
address VARCHAR(60) NOT NULL,
industry VARCHAR(20)
);
"""
create_participant_table = """
CREATE TABLE participant (
participant_id INT PRIMARY KEY,
first_name VARCHAR(40) NOT NULL,
last_name VARCHAR(40) NOT NULL,
phone_no VARCHAR(20),
client INT
);
"""
create_course_table = """
CREATE TABLE course (
course_id INT PRIMARY KEY,
course_name VARCHAR(40) NOT NULL,
language VARCHAR(3) NOT NULL,
level VARCHAR(2),
course_length_weeks INT,
start_date DATE,
in_school BOOLEAN,
teacher INT,
client INT
);
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, create_client_table)
execute_query(connection, create_participant_table)
execute_query(connection, create_course_table)
MySQL Database connection successful Query successful Query successful Query successful
3.3 - Define Foreign Key Relationships¶
Now altering the tables to create Foreign Key relationships, and creating our final table, takes_course
alter_participant = """
ALTER TABLE participant
ADD FOREIGN KEY(client)
REFERENCES client(client_id)
ON DELETE SET NULL;
"""
alter_course = """
ALTER TABLE course
ADD FOREIGN KEY(teacher)
REFERENCES teacher(teacher_id)
ON DELETE SET NULL;
"""
alter_course_again = """
ALTER TABLE course
ADD FOREIGN KEY(client)
REFERENCES client(client_id)
ON DELETE SET NULL;
"""
create_takescourse_table = """
CREATE TABLE takes_course (
participant_id INT,
course_id INT,
PRIMARY KEY(participant_id, course_id),
FOREIGN KEY(participant_id) REFERENCES participant(participant_id) ON DELETE CASCADE, -- it makes no sense to keep this rtelation when a participant or course is no longer in the system, hence why CASCADE this time
FOREIGN KEY(course_id) REFERENCES course(course_id) ON DELETE CASCADE
);
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, alter_participant)
execute_query(connection, alter_course)
execute_query(connection, alter_course_again)
execute_query(connection, create_takescourse_table)
MySQL Database connection successful Query successful Query successful Query successful Query successful
pop_teacher = """
INSERT INTO teacher VALUES
(1, 'James', 'Smith', 'ENG', NULL, '1985-04-20', 12345, '+491774553676'),
(2, 'Stefanie', 'Martin', 'FRA', NULL, '1970-02-17', 23456, '+491234567890'),
(3, 'Steve', 'Wang', 'MAN', 'ENG', '1990-11-12', 34567, '+447840921333'),
(4, 'Friederike', 'Müller-Rossi', 'DEU', 'ITA', '1987-07-07', 45678, '+492345678901'),
(5, 'Isobel', 'Ivanova', 'RUS', 'ENG', '1963-05-30', 56789, '+491772635467'),
(6, 'Niamh', 'Murphy', 'ENG', 'IRI', '1995-09-08', 67890, '+491231231232');
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, pop_teacher)
MySQL Database connection successful Query successful
4.2 - Populate Remaining Tables¶
Now, let's populate the remaining tables.
pop_client = """
INSERT INTO client VALUES
(101, 'Big Business Federation', '123 Falschungstraße, 10999 Berlin', 'NGO'),
(102, 'eCommerce GmbH', '27 Ersatz Allee, 10317 Berlin', 'Retail'),
(103, 'AutoMaker AG', '20 Künstlichstraße, 10023 Berlin', 'Auto'),
(104, 'Banko Bank', '12 Betrugstraße, 12345 Berlin', 'Banking'),
(105, 'WeMoveIt GmbH', '138 Arglistweg, 10065 Berlin', 'Logistics');
"""
pop_participant = """
INSERT INTO participant VALUES
(101, 'Marina', 'Berg','491635558182', 101),
(102, 'Andrea', 'Duerr', '49159555740', 101),
(103, 'Philipp', 'Probst', '49155555692', 102),
(104, 'René', 'Brandt', '4916355546', 102),
(105, 'Susanne', 'Shuster', '49155555779', 102),
(106, 'Christian', 'Schreiner', '49162555375', 101),
(107, 'Harry', 'Kim', '49177555633', 101),
(108, 'Jan', 'Nowak', '49151555824', 101),
(109, 'Pablo', 'Garcia', '49162555176', 101),
(110, 'Melanie', 'Dreschler', '49151555527', 103),
(111, 'Dieter', 'Durr', '49178555311', 103),
(112, 'Max', 'Mustermann', '49152555195', 104),
(113, 'Maxine', 'Mustermann', '49177555355', 104),
(114, 'Heiko', 'Fleischer', '49155555581', 105);
"""
pop_course = """
INSERT INTO course VALUES
(12, 'English for Logistics', 'ENG', 'A1', 10, '2020-02-01', TRUE, 1, 105),
(13, 'Beginner English', 'ENG', 'A2', 40, '2019-11-12', FALSE, 6, 101),
(14, 'Intermediate English', 'ENG', 'B2', 40, '2019-11-12', FALSE, 6, 101),
(15, 'Advanced English', 'ENG', 'C1', 40, '2019-11-12', FALSE, 6, 101),
(16, 'Mandarin für Autoindustrie', 'MAN', 'B1', 15, '2020-01-15', TRUE, 3, 103),
(17, 'Français intermédiaire', 'FRA', 'B1', 18, '2020-04-03', FALSE, 2, 101),
(18, 'Deutsch für Anfänger', 'DEU', 'A2', 8, '2020-02-14', TRUE, 4, 102),
(19, 'Intermediate English', 'ENG', 'B2', 10, '2020-03-29', FALSE, 1, 104),
(20, 'Fortgeschrittenes Russisch', 'RUS', 'C1', 4, '2020-04-08', FALSE, 5, 103);
"""
pop_takescourse = """
INSERT INTO takes_course VALUES
(101, 15),
(101, 17),
(102, 17),
(103, 18),
(104, 18),
(105, 18),
(106, 13),
(107, 13),
(108, 13),
(109, 14),
(109, 15),
(110, 16),
(110, 20),
(111, 16),
(114, 12),
(112, 19),
(113, 19);
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, pop_client)
execute_query(connection, pop_participant)
execute_query(connection, pop_course)
execute_query(connection, pop_takescourse)
MySQL Database connection successful Query successful Query successful Query successful Query successful
def read_query(connection, query):
cursor = connection.cursor()
result = None
try:
cursor.execute(query)
result = cursor.fetchall()
return result
except Error as err:
print(f"Error: '{err}'")
5.2 - Read Data from Database¶
Let's try this with a simple query to begin with.
q1 = """
SELECT *
FROM teacher;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q1)
for result in results:
print(result)
MySQL Database connection successful (1, 'James', 'Smith', 'ENG', None, datetime.date(1985, 4, 20), 12345, '+491774553676') (2, 'Stefanie', 'Martin', 'FRA', None, datetime.date(1970, 2, 17), 23456, '+491234567890') (3, 'Steve', 'Wang', 'MAN', 'ENG', datetime.date(1990, 11, 12), 34567, '+447840921333') (4, 'Friederike', 'Müller-Rossi', 'DEU', 'ITA', datetime.date(1987, 7, 7), 45678, '+492345678901') (5, 'Isobel', 'Ivanova', 'RUS', 'ENG', datetime.date(1963, 5, 30), 56789, '+491772635467') (6, 'Niamh', 'Murphy', 'ENG', 'IRI', datetime.date(1995, 9, 8), 67890, '+491231231232')
And here are some more queries to try.
q2 = """
SELECT last_name, dob
FROM teacher;
"""
q3 = """
SELECT *
FROM course
WHERE language = 'ENG'
ORDER BY start_date DESC;
"""
q4 = """
SELECT first_name, last_name, phone_no
FROM teacher
WHERE dob < '1990-01-01';
"""
q5 = """
SELECT course.course_id, course.course_name, course.language, client.client_name, client.address
FROM course
JOIN client
ON course.client = client.client_id
WHERE course.in_school = FALSE;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q5)
for result in results:
print(result)
MySQL Database connection successful (13, 'Beginner English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin') (14, 'Intermediate English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin') (15, 'Advanced English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin') (17, 'Français intermédiaire', 'FRA', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin') (19, 'Intermediate English', 'ENG', 'Banko Bank', '12 Betrugstraße, 12345 Berlin') (20, 'Fortgeschrittenes Russisch', 'RUS', 'AutoMaker AG', '20 Künstlichstraße, 10023 Berlin')
5.3 - Formatting Output into a List¶
Now we can assign the results to a list, to use further in our python applications or scripts.
The following code returns the results of our query as a list of tuples.
#Initialise empty list
from_db = []
# Loop over the results and append them into our list, different styles
# Returns a list of tuples
for result in results:
result = result
from_db.append(result)
print(from_db)
[(13, 'Beginner English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'), (14, 'Intermediate English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'), (15, 'Advanced English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'), (17, 'Français intermédiaire', 'FRA', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'), (19, 'Intermediate English', 'ENG', 'Banko Bank', '12 Betrugstraße, 12345 Berlin'), (20, 'Fortgeschrittenes Russisch', 'RUS', 'AutoMaker AG', '20 Künstlichstraße, 10023 Berlin')]
5.4 - Formatting Output into a List of Lists¶
If we want to, we can make this return a list of lists instead, like so:
# Returns a list of lists
from_db = []
for result in results:
result = list(result)
from_db.append(result)
print(from_db)
[[13, 'Beginner English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'], [14, 'Intermediate English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'], [15, 'Advanced English', 'ENG', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'], [17, 'Français intermédiaire', 'FRA', 'Big Business Federation', '123 Falschungstraße, 10999 Berlin'], [19, 'Intermediate English', 'ENG', 'Banko Bank', '12 Betrugstraße, 12345 Berlin'], [20, 'Fortgeschrittenes Russisch', 'RUS', 'AutoMaker AG', '20 Künstlichstraße, 10023 Berlin']]
# Returns a list of lists and then creates a pandas DataFrame
from_db = []
for result in results:
result = list(result)
from_db.append(result)
columns = ["course_id", "course_name", "language", "client_name", "address"]
df = pd.DataFrame(from_db, columns=columns)
display(df)
| course_id | course_name | language | client_name | address | |
|---|---|---|---|---|---|
| 0 | 13 | Beginner English | ENG | Big Business Federation | 123 Falschungstraße, 10999 Berlin |
| 1 | 14 | Intermediate English | ENG | Big Business Federation | 123 Falschungstraße, 10999 Berlin |
| 2 | 15 | Advanced English | ENG | Big Business Federation | 123 Falschungstraße, 10999 Berlin |
| 3 | 17 | Français intermédiaire | FRA | Big Business Federation | 123 Falschungstraße, 10999 Berlin |
| 4 | 19 | Intermediate English | ENG | Banko Bank | 12 Betrugstraße, 12345 Berlin |
| 5 | 20 | Fortgeschrittenes Russisch | RUS | AutoMaker AG | 20 Künstlichstraße, 10023 Berlin |
6. Updating Records¶
Sometimes we will need to update our Database. We can do this very easily using our execute_query function alongside the SQL UPDATE statement.
6.1 - Updating Client Address¶
The School receives notification that the Big Business Federation has moved office, and now they are located at 23 Fingiertweg, 14534 Berlin. The database can be updated like so:
update = """
UPDATE client
SET address = '23 Fingiertweg, 14534 Berlin'
WHERE client_id = 101;
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, update)
MySQL Database connection successful Query successful
Let's see if that worked.
q1 = """
SELECT *
FROM client
WHERE client_id = 101;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q1)
for result in results:
print(result)
MySQL Database connection successful (101, 'Big Business Federation', '23 Fingiertweg, 14534 Berlin', 'NGO')
q1 = """
SELECT *
FROM course;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q1)
from_db = []
for result in results:
print(result)
MySQL Database connection successful (12, 'English for Logistics', 'ENG', 'A1', 10, datetime.date(2020, 2, 1), 1, 1, 105) (13, 'Beginner English', 'ENG', 'A2', 40, datetime.date(2019, 11, 12), 0, 6, 101) (14, 'Intermediate English', 'ENG', 'B2', 40, datetime.date(2019, 11, 12), 0, 6, 101) (15, 'Advanced English', 'ENG', 'C1', 40, datetime.date(2019, 11, 12), 0, 6, 101) (16, 'Mandarin für Autoindustrie', 'MAN', 'B1', 15, datetime.date(2020, 1, 15), 1, 3, 103) (17, 'Français intermédiaire', 'FRA', 'B1', 18, datetime.date(2020, 4, 3), 0, 2, 101) (18, 'Deutsch für Anfänger', 'DEU', 'A2', 8, datetime.date(2020, 2, 14), 1, 4, 102) (19, 'Intermediate English', 'ENG', 'B2', 10, datetime.date(2020, 3, 29), 0, 1, 104) (20, 'Fortgeschrittenes Russisch', 'RUS', 'C1', 4, datetime.date(2020, 4, 8), 0, 5, 103)
Let's delete the course with course_id 20 - 'Fortgeschrittenes Russisch'. For this we will use the DELETE SQL command.
delete_course = """
DELETE FROM course WHERE course_id = 20;
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, delete_course)
MySQL Database connection successful Query successful
Let's confirm that the course is gone.
q1 = """
SELECT *
FROM course;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q1)
from_db = []
for result in results:
print(result)
MySQL Database connection successful (12, 'English for Logistics', 'ENG', 'A1', 10, datetime.date(2020, 2, 1), 1, 1, 105) (13, 'Beginner English', 'ENG', 'A2', 40, datetime.date(2019, 11, 12), 0, 6, 101) (14, 'Intermediate English', 'ENG', 'B2', 40, datetime.date(2019, 11, 12), 0, 6, 101) (15, 'Advanced English', 'ENG', 'C1', 40, datetime.date(2019, 11, 12), 0, 6, 101) (16, 'Mandarin für Autoindustrie', 'MAN', 'B1', 15, datetime.date(2020, 1, 15), 1, 3, 103) (17, 'Français intermédiaire', 'FRA', 'B1', 18, datetime.date(2020, 4, 3), 0, 2, 101) (18, 'Deutsch für Anfänger', 'DEU', 'A2', 8, datetime.date(2020, 2, 14), 1, 4, 102) (19, 'Intermediate English', 'ENG', 'B2', 10, datetime.date(2020, 3, 29), 0, 1, 104)
7.2 - Restoring the Course¶
Let's put that course back - it's a perfectly good course.
restore_russian = """
INSERT INTO course VALUES
(20, 'Fortgeschrittenes Russisch', 'RUS', 'C1', 4, '2020-04-08', FALSE, 5, 103);
"""
connection = create_db_connection("localhost", "root", pw, db)
execute_query(connection, restore_russian)
q1 = """
SELECT *
FROM course;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q1)
from_db = []
for result in results:
print(result)
MySQL Database connection successful Query successful MySQL Database connection successful (12, 'English for Logistics', 'ENG', 'A1', 10, datetime.date(2020, 2, 1), 1, 1, 105) (13, 'Beginner English', 'ENG', 'A2', 40, datetime.date(2019, 11, 12), 0, 6, 101) (14, 'Intermediate English', 'ENG', 'B2', 40, datetime.date(2019, 11, 12), 0, 6, 101) (15, 'Advanced English', 'ENG', 'C1', 40, datetime.date(2019, 11, 12), 0, 6, 101) (16, 'Mandarin für Autoindustrie', 'MAN', 'B1', 15, datetime.date(2020, 1, 15), 1, 3, 103) (17, 'Français intermédiaire', 'FRA', 'B1', 18, datetime.date(2020, 4, 3), 0, 2, 101) (18, 'Deutsch für Anfänger', 'DEU', 'A2', 8, datetime.date(2020, 2, 14), 1, 4, 102) (19, 'Intermediate English', 'ENG', 'B2', 10, datetime.date(2020, 3, 29), 0, 1, 104) (20, 'Fortgeschrittenes Russisch', 'RUS', 'C1', 4, datetime.date(2020, 4, 8), 0, 5, 103)
8. Creating Records from Lists¶
8.1 - Create Execute List Query Function¶
Let's add a couple of new teachers to our teacher table.
Fist let's modify our execute_query function to use executemany() and to accept one more argument.
def execute_list_query(connection, sql, val):
cursor = connection.cursor()
try:
cursor.executemany(sql, val)
connection.commit()
print("Query successful")
except Error as err:
print(f"Error: '{err}'")
8.2 - Add New Teachers¶
Now let's create a list containing the data for our new teachers, and the SQL command to perform our action.
sql = '''
INSERT INTO teacher (teacher_id, first_name, last_name, language_1, language_2, dob, tax_id, phone_no)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
'''
val = [
(7, 'Hank', 'Dodson', 'ENG', None, '1991-12-23', 11111, '+491772345678'),
(8, 'Sue', 'Perkins', 'MAN', 'ENG', '1976-02-02', 22222, '+491443456432')
]
connection = create_db_connection("localhost", "root", pw, db)
execute_list_query(connection, sql, val)
MySQL Database connection successful Query successful
q1 = """
SELECT *
FROM teacher;
"""
connection = create_db_connection("localhost", "root", pw, db)
results = read_query(connection, q1)
from_db = []
for result in results:
print(result)
MySQL Database connection successful (1, 'James', 'Smith', 'ENG', None, datetime.date(1985, 4, 20), 12345, '+491774553676') (2, 'Stefanie', 'Martin', 'FRA', None, datetime.date(1970, 2, 17), 23456, '+491234567890') (3, 'Steve', 'Wang', 'MAN', 'ENG', datetime.date(1990, 11, 12), 34567, '+447840921333') (4, 'Friederike', 'Müller-Rossi', 'DEU', 'ITA', datetime.date(1987, 7, 7), 45678, '+492345678901') (5, 'Isobel', 'Ivanova', 'RUS', 'ENG', datetime.date(1963, 5, 30), 56789, '+491772635467') (6, 'Niamh', 'Murphy', 'ENG', 'IRI', datetime.date(1995, 9, 8), 67890, '+491231231232') (7, 'Hank', 'Dodson', 'ENG', None, datetime.date(1991, 12, 23), 11111, '+491772345678') (8, 'Sue', 'Perkins', 'MAN', 'ENG', datetime.date(1976, 2, 2), 22222, '+491443456432')
9. Conclusion¶
9.1 - Conclusion¶
This project used Python and MySQL Connector to create an entirely new database in MySQL Server, creating tables, defining their relationships to one another and populating them with data.
We have looked at how to extract data from existing databases and load them into pandas DataFrames, ready for analysis and further work taking advantage of all the possibilities offered by the PyData stack. Going the other direction, this project has also taken data generated by Python scripts and applications, and written those into a database where they can be safely stored for later retrieval.