import sqlalchemy as sqla
import pandas as pdConnect to MySql with Pandas
mysql
database
pandas
This blog show how to connect to MySql database via Pandas
Import packages
Connect to a database
db = sqla.create_engine('mysql+mysqlconnector://root:@localhost:3306/farmers_market')Get all the tables in the database
db_tables = pd.read_sql('show tables from farmers_market', db)
db_tables| Tables_in_farmers_market | |
|---|---|
| 0 | booth |
| 1 | customer |
| 2 | customer_purchases |
| 3 | market_date_info |
| 4 | product |
| 5 | product_category |
| 6 | vendor |
| 7 | vendor_booth_assignments |
| 8 | vendor_inventory |
| 9 | zip_data |
Get information about the schema of each table
tables = db_tables['Tables_in_farmers_market']
for table in tables:
desc = pd.read_sql('describe {}'.format(table), db)
print('*'*30, table, '*'*30)
print(desc, '\n')****************************** booth ******************************
Field Type Null Key Default Extra
0 booth_number int(11) NO PRI None
1 booth_price_level varchar(45) NO None
2 booth_description varchar(255) NO None
3 booth_type varchar(45) NO None
****************************** customer ******************************
Field Type Null Key Default Extra
0 customer_id int(11) NO PRI None
1 customer_first_name varchar(45) YES None
2 customer_last_name varchar(45) YES None
3 customer_zip varchar(45) YES None
****************************** customer_purchases ******************************
Field Type Null Key Default Extra
0 product_id int(11) NO PRI None
1 vendor_id int(11) NO PRI None
2 market_date date NO PRI None
3 customer_id int(11) NO PRI None
4 quantity decimal(16,2) YES None
5 cost_to_customer_per_qty decimal(16,2) YES None
6 transaction_time time NO PRI None
****************************** market_date_info ******************************
Field Type Null Key Default Extra
0 market_date date NO PRI None
1 market_day varchar(45) YES None
2 market_week varchar(45) YES None
3 market_year varchar(45) YES None
4 market_start_time varchar(45) YES None
5 market_end_time varchar(45) YES None
6 special_notes blob YES None
7 market_season varchar(45) YES None
8 market_min_temp varchar(200) YES None
9 market_max_temp varchar(45) YES None
10 market_rain_flag int(11) YES None
11 market_snow_flag int(11) YES None
****************************** product ******************************
Field Type Null Key Default Extra
0 product_id int(11) NO PRI None
1 product_name varchar(45) YES None
2 product_size varchar(45) YES None
3 product_category_id int(11) NO PRI None
4 product_qty_type varchar(45) YES None
****************************** product_category ******************************
Field Type Null Key Default Extra
0 product_category_id int(11) NO PRI None auto_increment
1 product_category_name varchar(45) YES None
****************************** vendor ******************************
Field Type Null Key Default Extra
0 vendor_id int(11) NO PRI None auto_increment
1 vendor_name varchar(45) NO UNI None
2 vendor_type varchar(45) NO None
3 vendor_owner_first_name varchar(45) NO None
4 vendor_owner_last_name varchar(45) NO None
****************************** vendor_booth_assignments ******************************
Field Type Null Key Default Extra
0 vendor_id int(11) NO PRI None
1 booth_number int(11) NO PRI None
2 market_date date NO PRI None
****************************** vendor_inventory ******************************
Field Type Null Key Default Extra
0 market_date date NO PRI None
1 quantity decimal(16,2) YES None
2 vendor_id int(11) NO PRI None
3 product_id int(11) NO PRI None
4 original_price decimal(16,2) YES None
****************************** zip_data ******************************
Field Type Null Key Default Extra
0 zip_code_5 char(5) NO PRI None
1 median_household_income float YES None
2 percent_high_income float YES None
3 percent_under_18 float YES None
4 percent_over_65 float YES None
5 people_per_sq_mile float YES None
6 latitude float YES None
7 longitude float YES None
product = pd.read_sql('select * from product', db)
product.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 product_id 23 non-null int64
1 product_name 23 non-null object
2 product_size 22 non-null object
3 product_category_id 23 non-null int64
4 product_qty_type 21 non-null object
dtypes: int64(2), object(3)
memory usage: 1.0+ KB
pd.read_sql('select product_id from product limit 5', db)| product_id | |
|---|---|
| 0 | 1 |
| 1 | 2 |
| 2 | 3 |
| 3 | 9 |
| 4 | 12 |