import re

import os
os.environ['PROJ_LIB'] = r"C:\ProgramData\Anaconda3\pkgs\proj4-5.2.0-ha925a31_1\Library\share"
from mpl_toolkits.basemap import Basemap

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

import matplotlib.image as pic
from wordcloud import WordCloud as wc

import fuzzywuzzy
from fuzzywuzzy import process

food_df = pd.read_csv(r"C:\Users\byamu\Downloads\fast-food-restaurants\FastFoodRestaurants.csv")

food_df.head()

A BRIEF INTRODUCTION TO OUR DATASET¶

a = pic.imread(r"C:\Users\byamu\Downloads\fast-food-restaurants\Capture.PNG")
plt.figure( figsize=(20,4))
plt.imshow(a)
plt.axis("off")
plt.show()

print('|=================================================================================================|')
print(food_df.info())
print('|=================================MISSING DATA====================================================|')
print('websites',food_df.websites.isnull().sum())
print('|=================================TOTAL OBSERVATIONS==============================================|')
print(food_df.shape)
print('|=================================TOTAL OF FAST FOOD RESTAURANTS==================================|')
print(len(food_df.name.unique()))
print('|=================================CITIES AND PROVINCES============================================|')
print(f"TOTAL OF PROVINCEs:{len(food_df.province.unique())}\nTOTAL OF CITIES:{len(food_df.city.unique())}" )
print('|=================================ALL COLUMN HEADERS==============================================|')
print(food_df.columns)
print('|=================================INDEX===========================================================|')
print(food_df.index)

print('|=================================DUPLICATED ROWS=================================================|')
print(food_df.duplicated().sum())
print('|=================================================================================================|')

|=================================================================================================|
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
address       10000 non-null object
city          10000 non-null object
country       10000 non-null object
keys          10000 non-null object
latitude      10000 non-null float64
longitude     10000 non-null float64
name          10000 non-null object
postalCode    10000 non-null object
province      10000 non-null object
websites      9535 non-null object
dtypes: float64(2), object(8)
memory usage: 781.3+ KB
None
|=================================MISSING DATA====================================================|
websites 465
|=================================TOTAL OBSERVATIONS==============================================|
(10000, 10)
|=================================TOTAL OF FAST FOOD RESTAURANTS==================================|
548
|=================================CITIES AND PROVINCES============================================|
TOTAL OF PROVINCEs:52
TOTAL OF CITIES:2775
|=================================ALL COLUMN HEADERS==============================================|
Index(['address', 'city', 'country', 'keys', 'latitude', 'longitude', 'name',
       'postalCode', 'province', 'websites'],
      dtype='object')
|=================================INDEX===========================================================|
RangeIndex(start=0, stop=10000, step=1)
|=================================DUPLICATED ROWS=================================================|
0
|=================================================================================================|

Environmental Factors Affecting the Fast Food Industry In 1970, the fast food industry was worth only \$6 billion. Today, according to statista.com, it is worth \\$198.9 billion and is set to grow to over \$223 billion by 2020. In less than 50 years, this industry has evolved a great deal. https://smallbusiness.chron.com/types-economic-factors-can-affect-fast-food-industry-36923.html

top_10_cities = pd.DataFrame(food_df['city'].value_counts(ascending=False).head(10).reset_index())
top_10_cities.columns=['City','total of restaurant']
top_10_cities

top_pro = pd.DataFrame(food_df['province'].value_counts(ascending=False).head(10).reset_index())
top_pro.columns=['province','total of restaurant']
top_pro

top_restaurant = pd.DataFrame(food_df['name'].value_counts(ascending=False).head(10).reset_index())
top_restaurant.columns=['restaurant','total of shops']

top_restaurant

THE ABOVE RESULTS MIGHT OR MIGHT NOT BE TRUE DUE TO THE FACT THAT THERE MIGHT BE DATA INCONSISTANCY IN THE DATAFRAME

MAYBE WE SHOULD START BY CLEANING THE DATAFRAME BEFORE MAKING ANY CONCLUSION

#OVER HERE WE LOWER AND STRIP EXTRA WHITE SPACE TO PREPARE THE COLUMN FOR CLEANSING
food_df['name'] = food_df['name'].str.lower()
food_df['name'] = food_df['name'].str.strip()
food_df['name'].sort_values().unique()

array(['31st avenue gyro', '7-11', '7-eleven', 'a & w family restaurant',
       'a&w', 'a&w all american food', 'a&w all-american food',
       'a&w all-american foods', 'a&w restaurant',
       'a&w/long john silvers', "aj's hotdogs & gyros", "alice's",
       'ama ristorante', 'amigos/kings classic', 'aprisa mexican cuisine',
       "arby's", "arby's antigo", 'arbys', 'arctic circle', 'au bon pain',
       "auntie anne's", 'aw restaurant', 'b k',
       'b z breakfast burgers beyond', 'b. good', 'b.good',
       'back yard burgers', 'backyard burgers', 'bad daddys burger bar',
       'baja fresh', 'baja fresh mexican grill', "baker's drive thru",
       'ball state lafollette dining', 'bamboo bistro', 'baskin-robbins',
       'bb&t', "ben & jerry's", 'bgr - the burger joint',
       "big & little's restaurant", 'bills place', "blake's lotaburger",
       'blakes lotaburger', 'blimpie', 'blimpie subs & sandwiches',
       'blondies', 'bob evans', 'bob evans restaurant',
       "bobby's burger palace", "bojangles' famous chicken 'n biscuits",
       'boston chowda company', 'boston market', 'boston market 381',
       "braum's", "braum's ice cream & dairy store", 'bravo tacos',
       'breadeaux pizza', "brixton's", 'buds chicken & seafood',
       'burger king', 'burger king salou', 'burger time',
       "burger's landing", 'burgerville', 'burgerville, usa',
       "bush's chicken", 'cafe rio', "capriotti's sandwich shop",
       "capt crab's take-away", "captain d's", "captain d's seafood",
       "car's late night delivery", "carl's / green burrito", "carl's jr",
       "carl's jr - green burrito", "carl's jr / green burrito",
       "carl's jr.", "carl's jr. / green burrito", 'carls jr',
       'carls jr. / green burrito', 'char-grill', 'charcoal delights',
       'charkies', "charley's grilled subs", 'charleys philly steaks',
       'charlie browns', "charlie's chicken", "checker's",
       "checker's drive-in restaurant", "checker's hamburgers",
       'checkers', 'checkers burgers and fries',
       'cherry hill drive in ice cream', "chester's grill",
       'chicago union station food court', 'chick-fil-a',
       'chick-fil-a magnolia mall', 'chick-fil-a orange avenue',
       'chick-fil-a superstition springs center',
       "chick-fil-a truett's luau", 'chicken express', 'chicken house',
       'chickenow', 'china buddha inn ii', 'china king',
       'china king restaurant', 'chipotle mexican grill',
       "chris' drive-in", 'chuck wagon', "church's chicken", 'cinnabon',
       'circle drive in', 'city bites', "cody's cafe",
       'comilao lanches bakery', 'convergence zone', 'cook out',
       'cook-out', 'cookout', 'coop deville', 'costa vida',
       'costa vida fresh mexican grill', 'cousins subs',
       'cousins subs of green bay - oneida st. & ramada way',
       'crispy chicken & seafood', 'crown fried chicken',
       'crystal restaurant', "culver's", 'culvers', 'cusinera',
       'custard hut & pizza', 'cypress best',
       "d'angelo grilled sandwiches", 'dairy cheer', 'dairy freeze',
       'dairy isle', 'dairy queen', 'dairy queen - round rock',
       'dairy queen grill & chill',
       'dairy queen grill & chill / orange julius',
       "dan's seafood & chicken", 'dawg house grill', 'del taco',
       "denton's", "dewey's pizza", "dino's gyros", "domino's pizza",
       'dominos pizza', "don & millie's", 'dq grill & chill',
       'dq grill & chill / orange julius', 'dq woodlands',
       'dual - carls / grn burrito 1100654', 'dunkin donuts',
       "dunkin' donuts", "eagan's drive-in", 'einstein bros bagels',
       'el paragua restaurant', 'el pollo loco',
       'el tarasco mexican food', 'eldon drive in', 'elevation burger',
       "ernie's all american burger", 'everest', 'exxon',
       'fab wings n burgers', 'fallguys burger company', "famous dave's",
       'farmer boys', 'fatburger', "fazoli's", 'firehouse subs',
       'fitness 19', 'five guys', "flash foods / mcdonald's",
       'flip burger bar', 'food 4 less', "foster's freeze",
       'fosters freeze', "fred's store", "freddy's steakburger",
       'fresh city', "frisch's big boy", 'froots', "frugal's", 'frugals',
       'fuku', "furr's", "gabby's burgers & fries", 'garden catering',
       'garden fast food', "geno's fast break", "george's gyros spot",
       "gianni's pizza", 'gold star chili', 'golden chick',
       'good times burgers & frozen custard', 'goodcents deli fresh subs',
       "grandy's", "granny's drive-in", 'great steak',
       'great steak & potato co', 'great steak and potato co',
       'greek oven pizza and wings', 'greyhound', 'gyros kings',
       'hamburger king', "handy's lunch", "hardee's",
       "hardee's / red burrito", "hardee's / red burrito / dough boys",
       'hardees', 'hegenburgers', 'hermosa', 'hoka hoka bento',
       'home town buffet', 'hometown buffet', 'hot dog heaven',
       'hot dog on a stick', 'hotdoks', 'hub city express',
       'hunam garden', "hungry andy's", 'iceberg drive inn', 'in-a-tub',
       'in-n-out burger',
       'integrated therapy specialists: arnold d larson',
       "isabella's pizzeria", 'islands restaurant',
       'italian pizzeria restaurant', "ivar's seafood bar",
       'j j fish and chicken', 'jack in the box', 'jack pirtles chicken',
       "jack's", 'jacks grocery', 'jamba juice', 'jamjam', "jason's deli",
       "jcw's", 'jersey jacks eatery', "jersey mike's subs",
       "jim bob's chicken fingers", "jim's coney island", "jimmy john's",
       "jimmy john's gourmet sandwiches", 'jimmy johns', 'johnny rockets',
       'jollibee', "jolly's drive-in", "kam's chop suey", "kasa's pizza",
       'kennedy fried chicken & pizza', "kenny's drive-in",
       'kentucky fried chicken', "kerby's koney island",
       'kettle restaurant', 'kfc', 'kfc - kentucky fried chicken',
       'kfc / a&w', 'kfc / taco bell', 'kfc/taco bell',
       'kim lee fast food', 'king grill', 'kings contrivance jewelers',
       'krystal', 'kum & go', 'kum go', 'l&l hawaiian barbecue',
       'lanesplitter pizza & pub', "lee's famous recipe chicken",
       "lee's oriental martial arts", "lenny's sub shop",
       'lime ricky world', "lion's choice",
       "little caesar's pizza smithfield", 'little caesars pizza',
       'll hawaiian barbecue', 'loco moco drive-inn', 'locol',
       "long john silver's", "long john silver's / a&w",
       'long john silver/ a&w', 'long john silvers',
       'long john silvers / a&w', 'los verdes', "love's", "luby's",
       'lukes on harlem', 'luna corner pizza', 'luxor food court',
       'madison street subs', "maria's fry bread & mexican food",
       "mario's natural roman pizza", 'mariu kebabberia gastronomica',
       'maui tacos', 'mayfair boardwalk grill',
       'mazatlan mexican restaurant', "mc donald's", 'mc donalds',
       "mcalister's deli", "mcdonald's", "mcdonald's #25557",
       "mcdonald's - walmart", "mcdonald's of mililani mauka",
       'mcdonalds', 'mcdonalds @ old atlanta', 'mcdonald’s',
       "mclaren's pantry", 'meatheads', "mickey's gyros & ribs",
       "milo's hamburgers", 'mocha express', "moe's original bar b que",
       "moe's southwest grill", 'mooyah burgers, fries & shakes',
       'mr char-burger', 'mr hero', 'mr. hero', 'music city subs',
       "nathan's famous", "nick's burgers", "nick's gyros",
       "nielsen's frozen custard", "nino's pizza", 'noodles & company',
       'northeastern state university', 'ny chicken fish',
       'old country buffet', 'omega drive-in', 'omelette house',
       'omg! rotisserie', 'original tracks', "osgood's", "p. terry's",
       'pacific rim', "paesano's", "pal's sudden service", 'panda',
       'panda express', 'panera bread', "papa gino's", "papa john's",
       "papa john's pizza", 'papa johns', 'papa johns pizza',
       'papa john’s pizza', 'papagus gyros', 'pei wei', 'peking wok',
       'penguin point', 'penn station east coast subs', "pete's burgers",
       "petro's chili & chips", 'pick up stix', 'pita pit', 'pizza barn',
       'pizza express', 'pizza factory', 'pizza hut',
       'pizza hut wingstreet', 'pizza one', 'pizza primetime',
       'pollo campero', 'pollo tropical', 'pop-a-nugget', "popeye's",
       "popeye's chicken", "popeye's chicken & biscuits",
       "popeye's louisiana kitchen", 'popeyes',
       'popeyes chicken & biscuits', 'popeyes chicken and biscuits',
       'popeyes louisiana kitchen', 'port of subs', "portillo's",
       'qdoba mexican eats', 'qdoba mexican grill', 'quick', "quizno's +",
       'quiznos', 'quiznos sub', "rafael's",
       'rahway chicken & burger inc', "raising cane's",
       "raising cane's chicken fingers", "rally's", "rally's hamburgers",
       "rally's springfield", 'rallys', "rancher's roast beef inc.",
       "rapid ray's", 'red robin gourmet burgers', 'redbox',
       'regal cinemas tikahtnu  16 imax & rpx',
       "rich's mighty fine burgers grub", "richie's restaurant",
       "rocky's", 'roly poly', 'round table pizza', 'roy rogers',
       'royal buffet', "rubio's", "rubio's coastal grill", 'runza',
       "rush's", "ryan's", 'saladworks', 'sammy carry-out',
       "samurai sam's", 'sarku japan', 'sbarro', "schlotzsky's",
       'sconecutter', 'scoopers', 'seaside eatery',
       'sensenig poultry llc', 'sheetz', "shepard's drive-in",
       'shloma inc', "shoney's on the go", 'shoprite', 'shot tower inn',
       'siam to go', 'simmies arirang hill cheese steak', 'skyline chili',
       'slim chickens', 'smashburger', 'smoothie king', 'snack shack',
       'sonic', 'sonic drive in', 'sonic drive-in',
       'sonic drive-in (trailer)', 'soulfish grill',
       'south-a-philly steaks and hoagies', "spangle's", 'spangles',
       'st louis original hamburgers', 'starbucks', 'stars drive-in',
       "steak 'n shake", 'steak escape', 'steak n shake', 'steak-out',
       'stripes store #2276', 'strongwater spirits & botanicals',
       'subway', 'subway (johnson) @ jonesboro', "subway - bell's ferry",
       'subway - roland park', 'subway citgo of quarryville',
       'subway guitars', 'subway sandwiches',
       'subway sandwiches and salads', 'subway®', 'sunrise cafe',
       'taco bell', 'taco bell/pizza hut express', 'taco bueno',
       'taco cabana', 'taco casa', 'taco del mar', 'taco delight',
       "taco john's", 'taco local', 'taco mayo', 'taco time', 'tacotime',
       'taqueria', 'tasty carryout', "teddy's bigger burgers",
       'tellers tap room & kitchen', 'the agora', 'the biscuit factory',
       'the break away sports cafe', 'the chicken place',
       'the flame broiler', 'the great steak & potato co.',
       'the halal guys', 'the honeybaked ham company',
       'the people connection', 'the pita pit', 'the pizza pipe line',
       'the press club', "thrasher's french fries",
       'thrashers french fries', 'thunder ridge ampride',
       "tina's carryout restaurant", "tom & jerry's", "tom's drive in",
       "tom's grill", "tommy's sub shop", "topper's pizza",
       'toppers pizza', 'travelcenters of america',
       'tropical smoothie cafe', "tubby's grilled submarines",
       "tudor's biscuit world", 'twisters', 'vista drive in',
       'waffle house', 'wendy two two eight', "wendy's", 'wendys',
       "wetzel's pretzels", 'whataburger', 'white castle',
       'white manna hamburgers', 'wienerschnitzel',
       'windmill gourmet fast foods', 'wings and philly',
       "wings n' more wings", 'wingstop', 'wingstreet',
       "woody's sloppy dogs", 'world wrapps', "wu's fine chinese cuisine",
       'yellow cab pizza co.', 'yoshinoya', 'your healthy habit',
       'yum brands inc', "zaxby's", "zip's hamburgers and fish",
       "ziti's italian express"], dtype=object)

WE NOTICED THAT THERE'S DATA INCONSISTANCY IN THE NAME OF RESTAURANT SO WE GONNA CREATE A FUNCTION TO CLEAN THA MESS UP¶

def replace_matches_in_column(df, column, string_to_match, min_ratio = 90):
   
    # get a list of unique strings
    strings = df[column].unique()
    
    # get the top 10 closest matches to our input string
    matches = fuzzywuzzy.process.extract(string_to_match, strings, 
                                         limit=10, scorer=fuzzywuzzy.fuzz.token_sort_ratio)

    # only get matches with a ratio > 90
    close_matches = [matchs[0] for matchs in matches if matchs[1] >= min_ratio]

    # get the rows of all the close matches in our dataframe
    rows_with_matches = df[column].isin(close_matches)

    # replace all rows with close matches with the input matches 
    df.loc[rows_with_matches, column] = string_to_match

print(len(food_df.name.unique()), 'RESTAURANT IN TOTAL BEFORE CLEANING')

524 RESTAURANT IN TOTAL BEFORE CLEANING

# over here we loop through the intire column and clean one name after another
for e in food_df['name'].unique():
    replace_matches_in_column(df=food_df,column='name', string_to_match=e)
    
len(food_df['name'].sort_values().unique()), 'UNIQUE VALUES LEFT AFTER CLEANING'

(454, 'UNIQUE VALUES LEFT AFTER CLEANING')

food_df['name'].sort_values().unique()[0:10]

array(['31st avenue gyro', '7-11', '7-eleven', 'a & w family restaurant',
       'a&w', 'a&w all-american foods', "aj's hotdogs & gyros", "alice's",
       'ama ristorante', 'amigos/kings classic'], dtype=object)

AFTER IMPUTATION WE REALIZE THAT WE STILL HAVE SOME REPEATED NAMES IN THE COLUMN SO, SO I DECIDED TO HELP THE FUZZY FUZZY DECIDE THE MATCHING NAMES

HOW WHAT IS HAPPENING BELOW IS THE SAME WHAT THE FUNCTION ABOVE DID BUT THIS TIME INSTEAD OF LETTING THE FUNCTION DECIDE WHAT TO SELECT I DO MANUALY

from colorama import Fore
print(Fore.RED + '\n\n\n\n\nYOU MIGHT NOT HAVE TO RUN THE BELOW CELL SEE THAT YOU WILL BE REQUIRE TO CLEAN/CONFIRM MORETHAN 400 NAMES through input \n I HAVE SAVE A COPY OF A CLEAN DATASET HASSLE FREE')



YOU MIGHT NOT HAVE TO RUN THIS CELL SEE THAT YOU WILL BE REQUIRE TO CLEAN CONFIRM MORETHAN 400 NAMES through input 
 I HAVE SAVE A COPY OF A CLEAN DATASET HASSLE FREE

for each in food_df['name'].sort_values().unique():#WE LOOP THROUGH THE UNIQUE VALUE
    
    # get the top 10 closest matches to our ITERATOR
    matches = fuzzywuzzy.process.extract(each,food_df['name'].unique() , 
                                             limit=10, scorer=fuzzywuzzy.fuzz.token_sort_ratio)
    
    
    # TO CHECK IF THE FUNCTION IS WORKING WE PRINT THE CURRENT LENGHT OF UNIQUE VALUES IN THE COLUMN
    print(len(food_df.name.unique()), '\n')
    # AND WE ALSO DISPLAY THE ITERATOR AND THE 10 STRING THAT FUZZYWUZZY THINK THEIR THE CLOSEST  
    print(each,'\n\n',matches)
    
    
    # TO PREVENT OUR FLOW FROM CRASHING WE CATCH IT WITH TRY EXCEPT
    try:
        
        #THIS LIST WILL SAVE ALL THE STRING THAT I THINK THEY MATCH THE ITERATOR
        good = []   
        # THEN WE ASK THE USER (IN THIS CASE ME) TO ENTER THE TAGS (IN NUMBER) OF ALL THE NAMES THAT I THINK THEY MATCH THE ITERATOR
        percent = input('enter the the tag(percentage)')
        
        #THEN WE SPLIT THOSE NUMBERS ON A COMMA AND USE THE LIST COMPREHESION TO CHANGE ALL THE CHARACTER TO INTEGER
        percent = percent.split(',')
        percent = [int(i) for i in percent]
    
    
        # NOW IT FUN TIME! WE LOOP THROUGH THE MATCHES NAMES WHICH COMES AS A TUPLE CONSISTING OF THE MATCH NAME AND ITS PERCENTAGE 
        for e in matches:

            #THEN WE SAY IF THE ITERATOR IN THIS CASE e (BECAUSE IT A TUPLE WE CAN USE THE INDEX TO SELECT THE PERCENTAGE) IS IN percent 
            #WE APPEND THE RESULT TO THE LIST good AND PRINT THE TO CONFIRM THE TRANSACTION
            if e[1] in percent:

                good.append(e[0])
        print(good,'\n')

    except:
        print('not match\n\n')
        
            
            
        
    # get the rows of all the close matches in our dataframe
    rows_with_matches = food_df['name'].isin(good)
    # replace all rows with close matches with the ITERATOR
    food_df.loc[rows_with_matches, 'name'] = each

454 

31st avenue gyro 

 [('31st avenue gyro', 100), ('hot dog heaven', 47), ('chick-fil-a orange avenue', 44), ("wendy's", 43), ("dino's gyros", 43), ('pop-a-nugget', 43), ('boston market 381', 42), ('cafe rio', 42), ('taqueria', 42), ('five guys', 40)]
enter the the indexes
not match


454 

7-11 

 [('7-11', 100), ('a&w', 29), ('fitness 19', 29), ('b k', 29), ('bb&t', 25), ("jcw's", 22), ('b.good', 20), ("ryan's", 20), ("rush's", 20), ('kum go', 20)]
enter the the indexes
not match


454 

7-eleven 

 [('7-eleven', 100), ('elevation burger', 50), ('bob evans', 47), ('hot dog heaven', 45), ('omelette house', 45), ('circle drive in', 43), ("love's", 43), ('dairy queen', 42), ("wendy's", 40), ('coop deville', 40)]
enter the the indexes
not match


454 

a & w family restaurant 

 [('a & w family restaurant', 100), ("big & little's restaurant", 68), ('crystal restaurant', 67), ('islands restaurant', 67), ('china king restaurant', 67), ('bob evans restaurant', 63), ('kettle restaurant', 63), ('el paragua restaurant', 62), ('mazatlan mexican restaurant', 62), ("tina's carryout restaurant", 60)]
enter the the indexes
not match


454 

a&w 

 [('a&w', 100), ('kfc / a&w', 60), ("ryan's", 44), ("jack's", 44), ("wendy's", 40), ('pei wei', 40), ('white castle', 40), ("braum's", 40), ("alice's", 40), ('carls jr', 36)]
enter the the indexes
not match


454 

a&w all-american foods 

 [('a&w all-american foods', 100), ("ernie's all american burger", 57), ('qdoba mexican grill', 54), ('a & w family restaurant', 51), ('qdoba mexican eats', 50), ("ivar's seafood bar", 50), ('costa vida fresh mexican grill', 50), ("isabella's pizzeria", 49), ('mazatlan mexican restaurant', 49), ('baja fresh mexican grill', 48)]
enter the the indexes51
['a & w family restaurant'] 

453 

aj's hotdogs & gyros 

 [("aj's hotdogs & gyros", 100), ('hot dog on a stick', 56), ('gyros kings', 55), ("george's gyros spot", 54), ("nick's gyros", 53), ("dino's gyros", 53), ('papagus gyros', 52), ("mickey's gyros & ribs", 49), ('hotdoks', 48), ('great steak and potato co', 47)]
enter the the indexes
not match


453 

alice's 

 [("alice's", 100), ("fazoli's", 67), ("jack's", 62), ("love's", 62), ("charlie's chicken", 58), ("frugal's", 53), ('blondies', 53), ("don & millie's", 53), ("rafael's", 53), ('charkies', 53)]
enter the the indexes
not match


453 

ama ristorante 

 [('ama ristorante', 100), ('el paragua restaurant', 57), ('crystal restaurant', 56), ('islands restaurant', 56), ('mazatlan mexican restaurant', 54), ('italian pizzeria restaurant', 54), ('bob evans restaurant', 53), ('travelcenters of america', 53), ('bamboo bistro', 52), ('the agora', 52)]
enter the the indexes
not match


453 

amigos/kings classic 

 
 [('the agora', 100), ('great steak', 60), ('the flame broiler', 54), ('siam to go', 53), ('the halal guys', 52), ('ama ristorante', 52), ('jack in the box', 50), ('burger time', 50), ('the biscuit factory', 50), ('the pita pit', 48)]
enter the the indexes
not match


396 

the biscuit factory 

 [('the biscuit factory', 100), ("tudor's biscuit world", 60), ('jack in the box', 53), ('the pita pit', 52), ('pizza factory', 50), ('the agora', 50), ('the chicken place', 50), ('the flame broiler', 50), ('old country buffet', 49), ('city bites', 48)]
enter the the indexes
not match


396 

the break away sports cafe 

 [('the break away sports cafe', 100), ("capt crab's take-away", 60), ("geno's fast break", 51), ('bgr - the burger joint', 48), ('sunrise cafe', 47), ("braum's ice cream & dairy store", 47), ('the flame broiler', 47), ('checkers burgers and fries', 46), ('strongwater spirits & botanicals', 46), ("baker's drive thru", 45)]
enter the the indexes
not match


396 

the chicken place 

 [('the chicken place', 100), ('jack pirtles chicken', 70), ('chicken house', 67), ("church's chicken", 61), ('crown fried chicken', 61), ('slim chickens', 60), ('the flame broiler', 59), ('the people connection', 58), ("dan's seafood & chicken", 58), ('jack in the box', 56)]
enter the the indexes
not match


396 

the flame broiler 

 [('the flame broiler', 100), ('the chicken place', 59), ('jack in the box', 56), ('the agora', 54), ("moe's southwest grill", 53), ("pete's burgers", 52), ('comilao lanches bakery', 51), ('burger time', 50), ('the biscuit factory', 50), ('farmer boys', 50)]
enter the the indexes
not match


396 

the halal guys 

 [('the halal guys', 100), ('the agora', 52), ("shoney's on the go", 50), ('charleys philly steaks', 50), ('dawg house grill', 47), ('waffle house', 46), ('the pita pit', 46), ('the chicken place', 45), ('the honeybaked ham company', 45), ('the flame broiler', 45)]
enter the the indexes
not match


396 

the honeybaked ham company 

 [('the honeybaked ham company', 100), ("shoney's on the go", 59), ('noodles & company', 54), ('the people connection', 47), ('steak n shake', 46), ('the halal guys', 45), ('jack in the box', 44), ("kam's chop suey", 44), ("popeye's louisiana kitchen", 42), ('round table pizza', 42)]
enter the the indexes
not match


396 

the people connection 

 [('the people connection', 100), ('the chicken place', 58), ("shoney's on the go", 51), ('sconecutter', 50), ('the pizza pipe line', 50), ('the honeybaked ham company', 47), ('the flame broiler', 47), ('kings contrivance jewelers', 47), ('omelette house', 46), ('the press club', 46)]
enter the the indexes
not match


396 

the pita pit 

 [('the pita pit', 100), ('the pizza pipe line', 65), ('the chicken place', 55), ('jack in the box', 52), ('the biscuit factory', 52), ('pizza primetime', 52), ('pizza hut', 48), ('penguin point', 48), ('thunder ridge ampride', 48), ("p. terry's", 48)]
enter the the indexes
not match


396 

the pizza pipe line 

 [('the pizza pipe line', 100), ('the pita pit', 65), ('lanesplitter pizza & pub', 63), ("gianni's pizza", 61), ('pizza one', 57), ("dewey's pizza", 56), ('pizza express', 56), ('taco bell/pizza hut express', 52), ('little caesars pizza', 51), ('dominos pizza', 50)]
enter the the indexes
not match


396 

the press club 

 [('the press club', 100), ('hub city express', 60), ('cypress best', 54), ("wetzel's pretzels", 52), ('the chicken place', 52), ('mocha express', 52), ('baja fresh', 50), ("baker's drive thru", 50), ('thrashers french fries', 50), ('fresh city', 50)]
enter the the indexes
not match


396 

thrashers french fries 

 [('thrashers french fries', 100), ('mooyah burgers, fries & shakes', 57), ("fred's store", 53), ('thunder ridge ampride', 51), ("gabby's burgers & fries", 51), ("raising cane's chicken fingers", 50), ("baker's drive thru", 50), ("shepard's drive-in", 50), ('sonic drive-in (trailer)', 50), ('the press club', 50)]
enter the the indexes
not match


396 

thunder ridge ampride 

 [('thunder ridge ampride', 100), ("baker's drive thru", 51), ('thrashers french fries', 51), ('the pita pit', 48), ('hunam garden', 48), ('the flame broiler', 47), ("ernie's all american burger", 46), ("shepard's drive-in", 46), ('ama ristorante', 46), ('the pizza pipe line', 45)]
enter the the indexes
not match


396 

tina's carryout restaurant 

 [("tina's carryout restaurant", 100), ('tasty carryout', 65), ('el paragua restaurant', 60), ("big & little's restaurant", 57), ('kettle restaurant', 56), ('islands restaurant', 55), ('sammy carry-out', 54), ("richie's restaurant", 53), ("ziti's italian express", 50), ('mazatlan mexican restaurant', 49)]
enter the the indexes
not match


396 

tom & jerry's 

 [("tom & jerry's", 100), ("ben & jerry's", 64), ("tom's grill", 64), ("tom's drive in", 56), ("taco john's", 55), ("fred's store", 52), ("jersey mike's subs", 48), ("baker's drive thru", 48), ('roy rogers', 48), ('tasty carryout', 48)]
enter the the indexes
not match


396 

tom's drive in 

 [("tom's drive in", 100), ('sonic drive-in', 79), ('vista drive in', 79), ('omega drive-in', 79), ("shepard's drive-in", 69), ("eagan's drive-in", 67), ("jolly's drive-in", 67), ("kenny's drive-in", 67), ('sonic drive-in (trailer)', 67), ('loco moco drive-inn', 67)]
enter the the indexes
not match


396 

tom's grill 

 [("tom's grill", 100), ("tom's drive in", 64), ('soulfish grill', 64), ("tom & jerry's", 64), ("portillo's", 57), ('siam to go', 57), ('king grill', 57), ("moe's southwest grill", 56), ("chester's grill", 54), ('burgerville, usa', 54)]
enter the the indexes
not match


396 

tommy's sub shop 

 [("tommy's sub shop", 100), ("lenny's sub shop", 62), ("schlotzsky's", 50), ("p. terry's", 48), ("tom's drive in", 47), ('shot tower inn', 47), ("woody's sloppy dogs", 46), ('siam to go', 46), ('sammy carry-out', 45), ("kam's chop suey", 45)]
enter the the indexes
not match


396 

topper's pizza 

 [("topper's pizza", 100), ("p. terry's", 61), ('round table pizza', 58), ('shot tower inn', 57), ('pizza primetime', 55), ("kasa's pizza", 54), ("dewey's pizza", 52), ("gianni's pizza", 50), ('the pizza pipe line', 48), ("tom's grill", 48)]
enter the the indexes
not match


396 

travelcenters of america 

 [('travelcenters of america', 100), ("ernie's all american burger", 55), ('ama ristorante', 53), ('tellers tap room & kitchen', 50), ('thrashers french fries', 48), ('sonic drive-in (trailer)', 48), ('comilao lanches bakery', 48), ('madison street subs', 47), ('mazatlan mexican restaurant', 47), ("lee's oriental martial arts", 47)]
enter the the indexes
not match


396 

tropical smoothie cafe 

 [('tropical smoothie cafe', 100), ('pollo tropical', 61), ('smoothie king', 51), ("chester's grill", 49), ('little caesars pizza', 48), ('cafe rio', 47), ('sunrise cafe', 47), ('royal buffet', 47), ('steak escape', 47), ('bamboo bistro', 46)]
enter the the indexes
not match


396 

tubby's grilled submarines 

 [("tubby's grilled submarines", 100), ("moe's southwest grill", 60), ("charley's grilled subs", 58), ("gabby's burgers & fries", 51), ('soulfish grill', 50), ('qdoba mexican grill', 49), ("tom's grill", 49), ("d'angelo grilled sandwiches", 49), ("fred's store", 47), ("jersey mike's subs", 45)]
enter the the indexes
not match


396 

tudor's biscuit world 

 [("tudor's biscuit world", 100), ('the biscuit factory', 60), ('old country buffet', 51), ('lime ricky world', 49), ('shot tower inn', 46), ("hardee's / red burrito / dough boys", 46), ("big & little's restaurant", 45), ("woody's sloppy dogs", 45), ('wendy two two eight', 45), ('buds chicken & seafood', 44)]
enter the the indexes
not match


396 

twisters 

 [('twisters', 100), ('wingstreet', 56), ('white castle', 50), ('wingstop', 50), ("mcalister's deli", 50), ('scoopers', 50), ('steak-out', 47), ("p. terry's", 47), ('starbucks', 47), ('omg! rotisserie', 45)]
enter the the indexes
not match


396 

vista drive in 

 [('vista drive in', 100), ("tom's drive in", 79), ('sonic drive-in', 71), ('omega drive-in', 71), ("shepard's drive-in", 69), ("eagan's drive-in", 67), ("jolly's drive-in", 67), ("kenny's drive-in", 67), ('sonic drive-in (trailer)', 67), ('eldon drive in', 64)]
enter the the indexes
not match


396 

waffle house 

 [('waffle house', 100), ('omelette house', 62), ('steak-out', 48), ("spangle's", 48), ('firehouse subs', 46), ('the halal guys', 46), ('fosters freeze', 46), ('charlie browns', 46), ('farmer boys', 43), ('chuck wagon', 43)]
enter the the indexes
not match


396 

wendy two two eight 

 [('wendy two two eight', 100), ('taco delight', 52), ('home town buffet', 51), ('shot tower inn', 48), ("wendy's", 46), ("tudor's biscuit world", 45), ('the pita pit', 45), ("woody's sloppy dogs", 42), ('the biscuit factory', 42), ('peking wok', 41)]
enter the the indexes
not match


396 

wendy's 

 [("wendy's", 100), ('wendys', 77), ("p. terry's", 50), ('subway®', 46), ('wendy two two eight', 46), ('pei wei', 43), ('31st avenue gyro', 43), ("zaxby's", 43), ('waffle house', 42), ('white castle', 42)]
enter the the indexes
not match


396 

wendys 

 [('wendys', 100), ("wendy's", 77), ("grandy's", 57), ("ben & jerry's", 47), ("cody's cafe", 47), ('popeyes', 46), ('dq woodlands', 44), ('blondies', 43), ('wingstop', 43), ('twisters', 43)]
enter the the indexes77
["wendy's"] 

395 

wetzel's pretzels 

 [("wetzel's pretzels", 100), ('the press club', 52), ('great steak', 50), ('einstein bros bagels', 49), ("fred's store", 48), ('pizza express', 47), ("p. terry's", 46), ("ziti's italian express", 46), ('seaside eatery', 45), ("topper's pizza", 45)]
enter the the indexes
not match


395 

white castle 

 [('white castle', 100), ("charlie's chicken", 55), ('charleys philly steaks', 53), ('little caesars pizza', 50), ('cafe rio', 50), ('sunrise cafe', 50), ('twisters', 50), ('shoprite', 50), ('steak escape', 50), ('white manna hamburgers', 47)]
enter the the indexes
not match


395 

white manna hamburgers 

 [('white manna hamburgers', 100), ("milo's hamburgers", 67), ('fab wings n burgers', 63), ('st louis original hamburgers', 60), ('in-n-out burger', 59), ("burger's landing", 58), ("nick's burgers", 56), ("pete's burgers", 56), ("zip's hamburgers and fish", 55), ('burger time', 55)]
enter the the indexes
not match


395 

wienerschnitzel 

 [('wienerschnitzel', 100), ('wingstreet', 48), ('city bites', 48), ('charlie browns', 48), ('everest', 45), ("wetzel's pretzels", 44), ('wingstop', 43), ('twisters', 43), ('smoothie king', 43), ('shoprite', 43)]
enter the the indexes
not match


395 

windmill gourmet fast foods 

 [('windmill gourmet fast foods', 100), ('garden fast food', 65), ('kim lee fast food', 59), ('red robin gourmet burgers', 46), ('einstein bros bagels', 43), ("tudor's biscuit world", 42), ("maria's fry bread & mexican food", 42), ('home town buffet', 42), ('luxor food court', 42), ("d'angelo grilled sandwiches", 41)]
enter the the indexes
not match


395 

wings and philly 

 [('wings and philly', 100), ('greek oven pizza and wings', 57), ("hungry andy's", 55), ('king grill', 54), ('charleys philly steaks', 53), ("wings n' more wings", 53), ("d'angelo grilled sandwiches", 51), ("jolly's drive-in", 50), ("don & millie's", 50), ("grandy's", 50)]
enter the the indexes
not match


395 

wings n' more wings 

 [("wings n' more wings", 100), ('fab wings n burgers', 59), ('greek oven pizza and wings', 55), ("raising cane's chicken fingers", 54), ("eagan's drive-in", 53), ("burger's landing", 53), ('wings and philly', 53), ("jim bob's chicken fingers", 51), ('vista drive in', 50), ('kings contrivance jewelers', 50)]
enter the the indexes
not match


395 

wingstop 

 [('wingstop', 100), ('wingstreet', 67), ('twisters', 50), ('pop-a-nugget', 50), ('smoothie king', 48), ("tom's drive in", 45), ("topper's pizza", 45), ('shot tower inn', 45), ('shloma inc', 44), ('peking wok', 44)]
enter the the indexes
not match


395 

wingstreet 

 [('wingstreet', 100), ('wingstop', 67), ('twisters', 56), ('madison street subs', 55), ('smoothie king', 52), ('sheetz', 50), ('wienerschnitzel', 48), ('islands restaurant', 43), ('shot tower inn', 42), ('ama ristorante', 42)]
enter the the indexes
not match


395 

woody's sloppy dogs 

 [("woody's sloppy dogs", 100), ("george's gyros spot", 47), ("tommy's sub shop", 46), ("tudor's biscuit world", 45), ("shoney's on the go", 43), ('roly poly', 43), ('wendy two two eight', 42), ('loco moco drive-inn', 42), ('buds chicken & seafood', 41), ('noodles & company', 41)]
enter the the indexes
not match


395 

world wrapps 

 [('world wrapps', 100), ('original tracks', 52), ("rapid ray's", 52), ('mcdonald’s', 45), ('saladworks', 45), ("portillo's", 45), ("papa john's", 43), ("papa gino's", 43), ('dq woodlands', 42), ('jacks grocery', 40)]
enter the the indexes
not match


395 

wu's fine chinese cuisine 

 [("wu's fine chinese cuisine", 100), ("raising cane's chicken fingers", 55), ('aprisa mexican cuisine', 55), ("lion's choice", 53), ("charlie's chicken", 52), ("popeye's louisiana kitchen", 51), ("jim's coney island", 51), ("lee's famous recipe chicken", 50), ("petro's chili & chips", 50), ('ny chicken fish', 50)]
enter the the indexes
not match


395 

yellow cab pizza co. 

 [('yellow cab pizza co.', 100), ('pizza factory', 56), ("kasa's pizza", 52), ('el pollo loco', 50), ('custard hut & pizza', 50), ('pizza one', 50), ('pollo campero', 50), ('greek oven pizza and wings', 49), ('pizza barn', 48), ("gianni's pizza", 48)]
enter the the indexes
not match


395 

yoshinoya 

 [('yoshinoya', 100), ('gyros kings', 50), ('cusinera', 47), ('dominos pizza', 45), ('chickenow', 44), ('roy rogers', 42), ('shloma inc', 42), ('costa vida', 42), ('china king', 42), ('siam to go', 42)]
enter the the indexes
not match


395 

your healthy habit 

 [('your healthy habit', 100), ('shot tower inn', 44), ('city bites', 43), ('the biscuit factory', 43), ('hoka hoka bento', 42), ("tudor's biscuit world", 41), ('thunder ridge ampride', 41), ('home town buffet', 41), ("rapid ray's", 41), ('the pita pit', 40)]
enter the the indexes
not match


395 

yum brands inc 

 [('yum brands inc', 100), ('in-a-tub', 55), ("burger's landing", 53), ("handy's lunch", 52), ("nick's burgers", 50), ('in-n-out burger', 48), ("braum's", 48), ("eagan's drive-in", 47), ("nick's gyros", 46), ('rahway chicken & burger inc', 46)]
enter the the indexes
not match


395 

zaxby's 

 [("zaxby's", 100), ('subway®', 46), ("kasa's pizza", 42), ("schlotzsky's", 42), ('dominos pizza', 40), ("arby's antigo", 40), ('pizza express', 40), ('pizza hut', 38), ("p. terry's", 38), ("spangle's", 38)]
enter the the indexes
not match


395 

zip's hamburgers and fish 

 [("zip's hamburgers and fish", 100), ("checker's hamburgers", 62), ('checkers burgers and fries', 59), ("milo's hamburgers", 57), ('b z breakfast burgers beyond', 57), ('flip burger bar', 55), ('white manna hamburgers', 55), ('st louis original hamburgers', 53), ("hungry andy's", 53), ('bad daddys burger bar', 52)]
enter the the indexes
not match


395 

ziti's italian express 

 [("ziti's italian express", 100), ('panda express', 57), ('pizza express', 57), ('taco bell/pizza hut express', 53), ('mocha express', 51), ("tom's drive in", 50), ("tina's carryout restaurant", 50), ("richie's restaurant", 49), ('el paragua restaurant', 47), ("wetzel's pretzels", 46)]
enter the the indexes
not match

len(food_df.name.unique()), 'AFTER HUSTLE CLEANING'

395

food_df.name.sort_values().unique()

array(['31st avenue gyro', '7-11', '7-eleven', 'a&w',
       'a&w all-american foods', "aj's hotdogs & gyros", "alice's",
       'ama ristorante', 'amigos/kings classic', 'aprisa mexican cuisine',
       "arby's antigo", 'arctic circle', 'au bon pain', "auntie anne's",
       'b k', 'b z breakfast burgers beyond', 'b.good',
       'backyard burgers', 'bad daddys burger bar', 'baja fresh',
       "baker's drive thru", 'ball state lafollette dining',
       'bamboo bistro', 'baskin-robbins', 'bb&t', "ben & jerry's",
       'bgr - the burger joint', "big & little's restaurant",
       'bills place', "blake's lotaburger", 'blimpie', 'blondies',
       'bob evans', "bobby's burger palace",
       "bojangles' famous chicken 'n biscuits", 'boston chowda company',
       'boston market 381', "braum's", "braum's ice cream & dairy store",
       'bravo tacos', 'breadeaux pizza', "brixton's",
       'buds chicken & seafood', 'burger king', 'burger time',
       "burger's landing", 'burgerville, usa', "bush's chicken",
       'cafe rio', "capriotti's sandwich shop", "capt crab's take-away",
       "captain d's", "car's late night delivery", 'carls jr',
       'char-grill', 'charcoal delights', 'charkies',
       "charley's grilled subs", 'charleys philly steaks',
       'charlie browns', "charlie's chicken", "checker's",
       "checker's hamburgers", 'checkers burgers and fries',
       'cherry hill drive in ice cream', "chester's grill",
       'chicago union station food court',
       'chick-fil-a superstition springs center', 'chicken house',
       'chickenow', 'china buddha inn ii', 'china king',
       'chipotle mexican grill', 'chuck wagon', "church's chicken",
       'cinnabon', 'circle drive in', 'city bites', "cody's cafe",
       'comilao lanches bakery', 'convergence zone', 'cookout',
       'coop deville', 'costa vida', 'cousins subs',
       'cousins subs of green bay - oneida st. & ramada way',
       'crispy chicken & seafood', 'crown fried chicken', 'culvers',
       'cusinera', 'custard hut & pizza', 'cypress best',
       "d'angelo grilled sandwiches", 'dairy cheer', 'dairy freeze',
       'dairy queen', "dan's seafood & chicken", 'dawg house grill',
       "denton's", "dewey's pizza", "dino's gyros", 'dominos pizza',
       "don & millie's", 'dq grill & chill', 'dq woodlands',
       "dunkin' donuts", "eagan's drive-in", 'einstein bros bagels',
       'el paragua restaurant', 'el pollo loco',
       'el tarasco mexican food', 'eldon drive in', 'elevation burger',
       "ernie's all american burger", 'everest', 'exxon',
       'fab wings n burgers', 'fallguys burger company', "famous dave's",
       'farmer boys', 'fatburger', "fazoli's", 'firehouse subs',
       'fitness 19', 'five guys', 'flip burger bar', 'food 4 less',
       'fosters freeze', "fred's store", "freddy's steakburger",
       'fresh city', "frisch's big boy", 'froots', "frugal's", 'fuku',
       "furr's", "gabby's burgers & fries", 'garden catering',
       'garden fast food', "geno's fast break", "george's gyros spot",
       "gianni's pizza", 'gold star chili',
       'good times burgers & frozen custard', 'goodcents deli fresh subs',
       "grandy's", 'great steak', 'greek oven pizza and wings',
       'greyhound', 'gyros kings', "handy's lunch",
       "hardee's / red burrito / dough boys", 'hardees', 'hegenburgers',
       'hermosa', 'hoka hoka bento', 'home town buffet', 'hot dog heaven',
       'hot dog on a stick', 'hotdoks', 'hub city express',
       'hunam garden', "hungry andy's", 'iceberg drive inn', 'in-a-tub',
       'in-n-out burger',
       'integrated therapy specialists: arnold d larson',
       "isabella's pizzeria", 'islands restaurant',
       'italian pizzeria restaurant', "ivar's seafood bar",
       'jack in the box', 'jack pirtles chicken', "jack's",
       'jacks grocery', 'jamba juice', 'jamjam', "jcw's",
       'jersey jacks eatery', "jersey mike's subs",
       "jim bob's chicken fingers", "jim's coney island", 'jimmy johns',
       'johnny rockets', 'jollibee', "jolly's drive-in",
       "kam's chop suey", "kasa's pizza", 'kennedy fried chicken & pizza',
       "kenny's drive-in", 'kentucky fried chicken',
       "kerby's koney island", 'kettle restaurant', 'kfc', 'kfc / a&w',
       'kfc/taco bell', 'kim lee fast food', 'king grill',
       'kings contrivance jewelers', 'krystal', 'kum go',
       'lanesplitter pizza & pub', "lee's famous recipe chicken",
       "lee's oriental martial arts", "lenny's sub shop",
       'lime ricky world', "lion's choice", 'little caesars pizza',
       'll hawaiian barbecue', 'loco moco drive-inn', 'locol',
       'long john silver/ a&w', "love's", 'luna corner pizza',
       'luxor food court', 'madison street subs',
       "maria's fry bread & mexican food", "mario's natural roman pizza",
       'mariu kebabberia gastronomica', 'maui tacos',
       'mayfair boardwalk grill', 'mazatlan mexican restaurant',
       "mcalister's deli", 'mcdonald’s', "mclaren's pantry", 'meatheads',
       "mickey's gyros & ribs", "milo's hamburgers", 'mocha express',
       "moe's original bar b que", "moe's southwest grill",
       'mooyah burgers, fries & shakes', 'mr. hero', 'music city subs',
       "nathan's famous", "nick's burgers", "nick's gyros",
       "nielsen's frozen custard", 'noodles & company',
       'northeastern state university', 'ny chicken fish',
       'old country buffet', 'omega drive-in', 'omelette house',
       'omg! rotisserie', 'original tracks', "osgood's", "p. terry's",
       'pacific rim', "paesano's", "pal's sudden service",
       'panda express', "papa gino's", "papa john's", 'papagus gyros',
       'pei wei', 'peking wok', 'penguin point',
       'penn station east coast subs', "pete's burgers",
       "petro's chili & chips", 'pick up stix', 'pizza barn',
       'pizza express', 'pizza factory', 'pizza hut', 'pizza one',
       'pizza primetime', 'pollo campero', 'pollo tropical',
       'pop-a-nugget', "popeye's louisiana kitchen", 'popeyes',
       'port of subs', "portillo's", 'qdoba mexican grill', 'quick',
       'quiznos sub', "rafael's", 'rahway chicken & burger inc',
       "raising cane's chicken fingers", 'rallys',
       "rancher's roast beef inc.", "rapid ray's",
       'red robin gourmet burgers', 'redbox',
       'regal cinemas tikahtnu  16 imax & rpx',
       "rich's mighty fine burgers grub", "richie's restaurant",
       "rocky's", 'roly poly', 'round table pizza', 'roy rogers',
       'royal buffet', "rubio's", "rubio's coastal grill", 'runza',
       "rush's", "ryan's", 'saladworks', 'sammy carry-out',
       "samurai sam's", 'sarku japan', "schlotzsky's", 'sconecutter',
       'scoopers', 'seaside eatery', 'sensenig poultry llc', 'sheetz',
       "shepard's drive-in", 'shloma inc', "shoney's on the go",
       'shoprite', 'shot tower inn', 'siam to go', 'skyline chili',
       'slim chickens', 'smashburger', 'smoothie king', 'snack shack',
       'sonic', 'sonic drive-in', 'sonic drive-in (trailer)',
       'soulfish grill', 'south-a-philly steaks and hoagies', "spangle's",
       'st louis original hamburgers', 'starbucks', 'steak escape',
       'steak n shake', 'steak-out', 'stripes store #2276',
       'strongwater spirits & botanicals', 'subway guitars', 'subway®',
       'sunrise cafe', 'taco bell/pizza hut express', 'taco bueno',
       'taco cabana', 'taco delight', "taco john's", 'taco local',
       'taco mayo', 'tacotime', 'taqueria', 'tasty carryout',
       "teddy's bigger burgers", 'tellers tap room & kitchen',
       'the agora', 'the biscuit factory', 'the break away sports cafe',
       'the chicken place', 'the flame broiler', 'the halal guys',
       'the honeybaked ham company', 'the people connection',
       'the pita pit', 'the pizza pipe line', 'the press club',
       'thrashers french fries', 'thunder ridge ampride',
       "tina's carryout restaurant", "tom & jerry's", "tom's drive in",
       "tom's grill", "tommy's sub shop", "topper's pizza",
       'travelcenters of america', 'tropical smoothie cafe',
       "tubby's grilled submarines", "tudor's biscuit world", 'twisters',
       'vista drive in', 'waffle house', 'wendy two two eight', 'wendys',
       "wetzel's pretzels", 'white castle', 'white manna hamburgers',
       'wienerschnitzel', 'windmill gourmet fast foods',
       'wings and philly', "wings n' more wings", 'wingstop',
       'wingstreet', "woody's sloppy dogs", 'world wrapps',
       "wu's fine chinese cuisine", 'yellow cab pizza co.', 'yoshinoya',
       'your healthy habit', 'yum brands inc', "zaxby's",
       "zip's hamburgers and fish", "ziti's italian express"],
      dtype=object)

SO THAT IS MUCH BETTER FOR NOW. BEFORE WE DO THE LAST CHECK OF THAT COLUMN LET'S LOOK AT SOME PROBLEMS WE MIGHT HAVE IN THE PROVINCE COLUMN¶

SO FAR, WE ARE ABLE TO SEE THE NUMBER OF FAST FOOD RESTAURANT IN EACH PROVINCE, AND TO TELL WHICH STATE HAS THE MOST FAST FOOD AND WHERE FAST FOOD RESTAURANTS ARE SCARCE.
BUT WE CAN'T TELL WHICH STATE HAS THE FAST FOOD PER METER OR MILE SQUARE. (this is important because let's say for instance you want to find a city where you can start a fast food business, you will more likely go where the competition is not high. but if you looking at a 2000m² state that has 30 restaurants and another 1000m² one that has 20 restaurants if you don't take the surface into consideration you might end up making the mistake of investing in the state with 20 restaurant)
HAVING SAY THAT WE GONNA GO AHEAD AND BRING IN A PIECE OF ANOTHER DATASET THAT WILL GET US THE STATEs area

mile_sq = pd.read_csv(r"https://people.sc.fsu.edu/~jburkardt/datasets/states/state_area.txt", delim_whitespace=True,header=None, names = ['province','mile_sq']).set_index('province')

mile_sq.head()

#food_df.province['Co Spgs']

print(len(food_df.province.unique()))
pro = food_df.province.unique()
pro.sort()
print(pro)
print(mile_sq.shape)
sta = mile_sq.index.unique()

print(sta)

52
['AK' 'AL' 'AR' 'AZ' 'CA' 'CO' 'CT' 'Co Spgs' 'DC' 'DE' 'FL' 'GA' 'HI'
 'IA' 'ID' 'IL' 'IN' 'KS' 'KY' 'LA' 'MA' 'MD' 'ME' 'MI' 'MN' 'MO' 'MS'
 'MT' 'NC' 'ND' 'NE' 'NH' 'NJ' 'NM' 'NV' 'NY' 'OH' 'OK' 'OR' 'PA' 'RI'
 'SC' 'SD' 'TN' 'TX' 'UT' 'VA' 'VT' 'WA' 'WI' 'WV' 'WY']
(53, 1)
Index(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI',
       'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN',
       'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH',
       'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN', 'TX', 'US', 'UT', 'VT',
       'VA', 'WA', 'WV', 'WI', 'WY'],
      dtype='object', name='province')

Notice that we have two different list of USA states food_df=52 and mile_sq=53.
In our original food_df we have an extra COLORADO states named "Co Spgs" stand which stand for Cplorada Spring now I'm gonna assume that everybody knows that "CO" and "Co Spgs" mean the same thing. what we are going to do is to prove that since we have the location coordinates of both states.
below is Map that prove that.
if we set that "Co spgs" to "CO" we gonna be left with 51 states.
Now the question is, what we gonna do with the other df where we have 53 states. well let's first identify those extra states then we can decide what to with them.
so we have["PR" For puerto rica and "US" for UNITED STATE] now we know we do not need the "US" and about Puerto rica below is some useful info and reason why we should drop it. and why we will be left with 51 states instead of 50.

51st state refers to a place or territory that is not one of the 50 states of the United States, but people think about making it the 51st state. ... It is usually said about the possibility of Puerto Rico or other U.S. territories becoming part of the United States. https://simple.m.wikipedia.org/wiki/51st_state

The United States of America. The District of Columbia is a federal district, not a state. Many lists include DC and Puerto Rico, which makes for 52 "states and other jurisdictions". ... The flag has 50 stars, one for each state.

#draw a map to prove that "CO" and "Co Spgs" are the same.
word  = Basemap(projection='mill', llcrnrlat=20, urcrnrlat=50, 
                                   llcrnrlon=-130, urcrnrlon=-60, resolution='c')#  over here we draw the map limit
                                                                                 #  in this case USA limit



#then we draw the necessary lines, boundaries and colors
word.drawcoastlines()
word.fillcontinents(color='tan',lake_color='aqua',alpha=0.5)
word.drawmapboundary(fill_color='lightblue')
word.drawstates()
word.drawcountries()


#over here we set the the longitude and latitude for "Co spgs" we just gonna use one value
lat = list(food_df.loc[food_df.province == 'Co Spgs', 'latitude'])[0]
lon = list(food_df.loc[food_df.province == 'Co Spgs', 'longitude'])[0]

#then we set x,y axis based on the "Co spgs" data we have colected
x,y= word(lon,lat)

#now we plot the one of the restaurant and set the color red
word.plot(x,y,'ro',markersize=20, alpha=1)


 #we do for "CO" same as we did with "Co Spgs" but here we gonna use green for color
lat = list(food_df.loc[food_df.province == 'CO', 'latitude'])[0]
lon = list(food_df.loc[food_df.province == 'CO', 'longitude'])[0]

x,y= word(lon,lat)

word.plot(x,y,'go',markersize=20, alpha=1)

[<matplotlib.lines.Line2D at 0x460354cd68>]

#now that we have no doubt that they are the same its time to rename them
food_df.loc[food_df.province=="Co Spgs", 'province'] = 'CO'
d = food_df.province.unique()
d.sort()
d

array(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
       'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
       'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
       'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

# as agreed above we're going to drop the "US" and "PR"
mile_sq.drop(["US","PR"], axis=0, inplace=True)

#now the time to join the mile_sq df to food_df  

#basically here what we're doing is: we take the mile_sq df and join it to food_df based on province
food_df = food_df.join(mile_sq, on='province', rsuffix='_mile_sq')

food_df.head()

AND WE'RE GONNA ALSO DROP THE COLUMN THAT WE'RE NOT GOING TO USE

food_df.drop(['postalCode', 'websites'], axis=1, inplace=True)

check the credibility of our df¶

print('duplitcates\n',food_df.duplicated().sum())
print('missing values\n',food_df.isnull().sum())
#cool let's move on

duplitcates
 0
missing values
 address      0
city         0
country      0
keys         0
latitude     0
longitude    0
name         0
province     0
mile_sq      0
dtype: int64

SWEET¶

#now just in case, we gonna save this CHANGES AS a check point IN A CSV FILEcsv file and take it from there
food_df.to_csv(r"C:\Users\...\fast_food_clean.CSV", index=False)

new_df = pd.read_csv(r"C:\Users\...\fast_food_clean.CSV")
new_df.head()

print('TOTAL RESTAURANTS(shops)\n\n', '   ',new_df['keys'].count(),'\n')
print('TOTAL BRANDS\n\n','     ', len(new_df.name.unique()))
top_10_restaurant = pd.DataFrame(new_df.name.value_counts(ascending=False).head(10)).reset_index()
top_10_restaurant.columns = ['restaurant' , 'total']
top_10_restaurant

TOTAL RESTAURANTS(shops)

     10000 

TOTAL BRANDS

       395

THE CITY COLUMN IS CLEAN BUT JUST FOR ARGUMENT'S SAKE WE'RE GOING TO DOUBLE CHECK IT WITH OUR FUNCTION

new_df['city'] = new_df['city'].str.lower()
new_df['city'] = new_df['city'].str.strip()
len(new_df.city.unique()),'CITY COLUMN LENGHT BEFORE CLEANING'

(2775, 'CITY COLUMN LENGHT BEFORE CLEANING')

# over here we loop through the intire column and clean one city after another
for e in new_df['city'].unique():
    replace_matches_in_column(df=new_df,column='city', string_to_match=e)
    
len(new_df['city'].sort_values().unique()), 'UNIQUE VALUES LEFT AFTER CLEANING'

(2693, 'UNIQUE VALUES LEFT AFTER CLEANING')

list(food_df['city'].sort_values().unique())  #the city is clean as well

But we know that the restaurant is not completly clean. ("name" column)

since we know that the big part of the jon has been computly done by the fuzzywuzzy we can also check manually just to make sure than we gonna use regular expression to finish the job

names = new_df['name'].unique()
names.sort()
names

array(['31st avenue gyro', '7-11', '7-eleven', 'a&w',
       'a&w all-american foods', "aj's hotdogs & gyros", "alice's",
       'ama ristorante', 'amigos/kings classic', 'aprisa mexican cuisine',
       "arby's antigo", 'arctic circle', 'au bon pain', "auntie anne's",
       'b k', 'b z breakfast burgers beyond', 'b.good',
       'backyard burgers', 'bad daddys burger bar', 'baja fresh',
       "baker's drive thru", 'ball state lafollette dining',
       'bamboo bistro', 'baskin-robbins', 'bb&t', "ben & jerry's",
       'bgr - the burger joint', "big & little's restaurant",
       'bills place', "blake's lotaburger", 'blimpie', 'blondies',
       'bob evans', "bobby's burger palace",
       "bojangles' famous chicken 'n biscuits", 'boston chowda company',
       'boston market 381', "braum's", "braum's ice cream & dairy store",
       'bravo tacos', 'breadeaux pizza', "brixton's",
       'buds chicken & seafood', 'burger king', 'burger time',
       "burger's landing", 'burgerville, usa', "bush's chicken",
       'cafe rio', "capriotti's sandwich shop", "capt crab's take-away",
       "captain d's", "car's late night delivery", 'carls jr',
       'char-grill', 'charcoal delights', 'charkies',
       "charley's grilled subs", 'charleys philly steaks',
       'charlie browns', "charlie's chicken", "checker's",
       "checker's hamburgers", 'checkers burgers and fries',
       'cherry hill drive in ice cream', "chester's grill",
       'chicago union station food court',
       'chick-fil-a superstition springs center', 'chicken house',
       'chickenow', 'china buddha inn ii', 'china king',
       'chipotle mexican grill', 'chuck wagon', "church's chicken",
       'cinnabon', 'circle drive in', 'city bites', "cody's cafe",
       'comilao lanches bakery', 'convergence zone', 'cookout',
       'coop deville', 'costa vida', 'cousins subs',
       'cousins subs of green bay - oneida st. & ramada way',
       'crispy chicken & seafood', 'crown fried chicken', 'culvers',
       'cusinera', 'custard hut & pizza', 'cypress best',
       "d'angelo grilled sandwiches", 'dairy cheer', 'dairy freeze',
       'dairy queen', "dan's seafood & chicken", 'dawg house grill',
       "denton's", "dewey's pizza", "dino's gyros", 'dominos pizza',
       "don & millie's", 'dq grill & chill', 'dq woodlands',
       "dunkin' donuts", "eagan's drive-in", 'einstein bros bagels',
       'el paragua restaurant', 'el pollo loco',
       'el tarasco mexican food', 'eldon drive in', 'elevation burger',
       "ernie's all american burger", 'everest', 'exxon',
       'fab wings n burgers', 'fallguys burger company', "famous dave's",
       'farmer boys', 'fatburger', "fazoli's", 'firehouse subs',
       'fitness 19', 'five guys', 'flip burger bar', 'food 4 less',
       'fosters freeze', "fred's store", "freddy's steakburger",
       'fresh city', "frisch's big boy", 'froots', "frugal's", 'fuku',
       "furr's", "gabby's burgers & fries", 'garden catering',
       'garden fast food', "geno's fast break", "george's gyros spot",
       "gianni's pizza", 'gold star chili',
       'good times burgers & frozen custard', 'goodcents deli fresh subs',
       "grandy's", 'great steak', 'greek oven pizza and wings',
       'greyhound', 'gyros kings', "handy's lunch",
       "hardee's / red burrito / dough boys", 'hardees', 'hegenburgers',
       'hermosa', 'hoka hoka bento', 'home town buffet', 'hot dog heaven',
       'hot dog on a stick', 'hotdoks', 'hub city express',
       'hunam garden', "hungry andy's", 'iceberg drive inn', 'in-a-tub',
       'in-n-out burger',
       'integrated therapy specialists: arnold d larson',
       "isabella's pizzeria", 'islands restaurant',
       'italian pizzeria restaurant', "ivar's seafood bar",
       'jack in the box', 'jack pirtles chicken', "jack's",
       'jacks grocery', 'jamba juice', 'jamjam', "jcw's",
       'jersey jacks eatery', "jersey mike's subs",
       "jim bob's chicken fingers", "jim's coney island", 'jimmy johns',
       'johnny rockets', 'jollibee', "jolly's drive-in",
       "kam's chop suey", "kasa's pizza", 'kennedy fried chicken & pizza',
       "kenny's drive-in", 'kentucky fried chicken',
       "kerby's koney island", 'kettle restaurant', 'kfc', 'kfc / a&w',
       'kfc/taco bell', 'kim lee fast food', 'king grill',
       'kings contrivance jewelers', 'krystal', 'kum go',
       'lanesplitter pizza & pub', "lee's famous recipe chicken",
       "lee's oriental martial arts", "lenny's sub shop",
       'lime ricky world', "lion's choice", 'little caesars pizza',
       'll hawaiian barbecue', 'loco moco drive-inn', 'locol',
       'long john silver/ a&w', "love's", 'luna corner pizza',
       'luxor food court', 'madison street subs',
       "maria's fry bread & mexican food", "mario's natural roman pizza",
       'mariu kebabberia gastronomica', 'maui tacos',
       'mayfair boardwalk grill', 'mazatlan mexican restaurant',
       "mcalister's deli", 'mcdonald’s', "mclaren's pantry", 'meatheads',
       "mickey's gyros & ribs", "milo's hamburgers", 'mocha express',
       "moe's original bar b que", "moe's southwest grill",
       'mooyah burgers, fries & shakes', 'mr. hero', 'music city subs',
       "nathan's famous", "nick's burgers", "nick's gyros",
       "nielsen's frozen custard", 'noodles & company',
       'northeastern state university', 'ny chicken fish',
       'old country buffet', 'omega drive-in', 'omelette house',
       'omg! rotisserie', 'original tracks', "osgood's", "p. terry's",
       'pacific rim', "paesano's", "pal's sudden service",
       'panda express', "papa gino's", "papa john's", 'papagus gyros',
       'pei wei', 'peking wok', 'penguin point',
       'penn station east coast subs', "pete's burgers",
       "petro's chili & chips", 'pick up stix', 'pizza barn',
       'pizza express', 'pizza factory', 'pizza hut', 'pizza one',
       'pizza primetime', 'pollo campero', 'pollo tropical',
       'pop-a-nugget', "popeye's louisiana kitchen", 'popeyes',
       'port of subs', "portillo's", 'qdoba mexican grill', 'quick',
       'quiznos sub', "rafael's", 'rahway chicken & burger inc',
       "raising cane's chicken fingers", 'rallys',
       "rancher's roast beef inc.", "rapid ray's",
       'red robin gourmet burgers', 'redbox',
       'regal cinemas tikahtnu  16 imax & rpx',
       "rich's mighty fine burgers grub", "richie's restaurant",
       "rocky's", 'roly poly', 'round table pizza', 'roy rogers',
       'royal buffet', "rubio's", "rubio's coastal grill", 'runza',
       "rush's", "ryan's", 'saladworks', 'sammy carry-out',
       "samurai sam's", 'sarku japan', "schlotzsky's", 'sconecutter',
       'scoopers', 'seaside eatery', 'sensenig poultry llc', 'sheetz',
       "shepard's drive-in", 'shloma inc', "shoney's on the go",
       'shoprite', 'shot tower inn', 'siam to go', 'skyline chili',
       'slim chickens', 'smashburger', 'smoothie king', 'snack shack',
       'sonic', 'sonic drive-in', 'sonic drive-in (trailer)',
       'soulfish grill', 'south-a-philly steaks and hoagies', "spangle's",
       'st louis original hamburgers', 'starbucks', 'steak escape',
       'steak n shake', 'steak-out', 'stripes store #2276',
       'strongwater spirits & botanicals', 'subway guitars', 'subway®',
       'sunrise cafe', 'taco bell/pizza hut express', 'taco bueno',
       'taco cabana', 'taco delight', "taco john's", 'taco local',
       'taco mayo', 'tacotime', 'taqueria', 'tasty carryout',
       "teddy's bigger burgers", 'tellers tap room & kitchen',
       'the agora', 'the biscuit factory', 'the break away sports cafe',
       'the chicken place', 'the flame broiler', 'the halal guys',
       'the honeybaked ham company', 'the people connection',
       'the pita pit', 'the pizza pipe line', 'the press club',
       'thrashers french fries', 'thunder ridge ampride',
       "tina's carryout restaurant", "tom & jerry's", "tom's drive in",
       "tom's grill", "tommy's sub shop", "topper's pizza",
       'travelcenters of america', 'tropical smoothie cafe',
       "tubby's grilled submarines", "tudor's biscuit world", 'twisters',
       'vista drive in', 'waffle house', 'wendy two two eight', 'wendys',
       "wetzel's pretzels", 'white castle', 'white manna hamburgers',
       'wienerschnitzel', 'windmill gourmet fast foods',
       'wings and philly', "wings n' more wings", 'wingstop',
       'wingstreet', "woody's sloppy dogs", 'world wrapps',
       "wu's fine chinese cuisine", 'yellow cab pizza co.', 'yoshinoya',
       'your healthy habit', 'yum brands inc', "zaxby's",
       "zip's hamburgers and fish", "ziti's italian express"],
      dtype=object)

# we can see that 7 eleven is repeated and a&w familly restaurant and some other name that the fuzzywuzzy did not detect
import re

# over here we're check in the name column and check if it contains a given string than replace it with one comon name
new_df.loc[new_df.name.str.contains("seven|7",flags=re.I, regex=True), 'name'] = '7-eleven'
new_df.loc[new_df.name.str.contains("seven|7",flags=re.I, regex=True), 'name']

6506    7-eleven
8733    7-eleven
8834    7-eleven
Name: name, dtype: object

#for a&w all-american foods   we gonna specify that it need to start with 'a&w' because for other shop that conains
#a&W it problably an special arrangement type of brand. like the yum brands or the c'mon one we can be familiar with in 
# SA is dunk and dougnut & burgerking now if you have a shop like that you cannot say is dunk & doughnut or is burger. 
#same apply with the arrangement like kfc/ tako bell. two brand form one brand
new_df.loc[new_df.name.str.contains("^a&w",flags=re.I, regex=True), 'name'] = 'a&w all-american foods'

new_df.loc[new_df.name.str.contains("checker",flags=re.I, regex=True), 'name'] = "checker's hamburgers"

new_df.loc[new_df.name.str.contains("chipot",flags=re.I, regex=True), 'name'] = 'chipotle mexican grill'

new_df.loc[new_df.name.str.contains("|canes",flags=re.I, regex=True), 'name'] = "raising cane's chicken fingers"

new_df.loc[new_df.name.str.contains("raising|canes",flags=re.I, regex=True), 'name'] = "raising cane's chicken fingers"

#now let's see the difference
#from last check 395
print(new_df.name.unique().shape)

(390,)

province = food_df.province.value_counts(ascending=False)
print('THE 5 TOP PROVINCE WITH THE MOST FAST FOOD RESTAURANT IN THE US\n\n\n',
      province.head(),'\n\n\n',
      'THE 5 TOP PROVINCES WITH THE LEAST FAST FOOD IN THE US\n\n\n', province.tail())

THE 5 TOP PROVINCE WITH THE MOST FAST FOOD RESTAURANT IN THE US


 CA    676
TX    634
OH    543
FL    471
IN    379
Name: province, dtype: int64 


 THE 5 TOP PROVINCES WITH THE LEAST FAST FOOD IN THE US


 MT         25
RI         24
DC         21
AK         14
Co Spgs     5
Name: province, dtype: int64

Now onto visualization¶

first we want to see how the restaurant are doing Province wize.¶

new_df

descending_order = new_df.province.value_counts(ascending=False).index
plt.figure( figsize=(18,6))
plt.subplot(211)
sns.countplot(x='province', data=new_df, order=descending_order)
plt.title('TOTAL OF FAST FOOD RESTAURANTS  IN EACH PROVINCE')

plt.subplot(212)
pd.DataFrame(new_df['name'].groupby(new_df['province']
                                   ).value_counts(ascending=False).unstack(
                                   )).loc[:,"mcdonald’s"].sort_values(ascending=False).plot.bar()
plt.ylabel("total of mcdonald's")

Text(0, 0.5, "total of mcdonald's")

top_restaurant = pd.DataFrame(new_df['name'].value_counts(ascending=False).head(10).reset_index())
top_restaurant.columns=['restaurant','total of shops']

top_restaurant

We see that macdonald not only is the biggest fast food brand in the US but also it has the tendency of leading the industry

We can also masure the other two following brands and see how they are doing compare to macdonalds

plt.figure( figsize=(18,6), dpi=200)
pd.DataFrame(food_df['name'].groupby(food_df['province']).value_counts(ascending=False).unstack()
            ).loc[:,['burger king',"mcdonald’s",'kfc/taco bell']].sort_values(by ="mcdonald’s",
            ascending=False).plot.bar(width=0.8,figsize=(18,6))

plt.title('MACDONALDS COMPARE TO BURGER KING AND KFC/TACO BELL')

Text(0.5, 1.0, 'MACDONALDS COMPARE TO BURGER KING AND KFC/TACO BELL')

<Figure size 3600x1200 with 0 Axes>

name = list(new_df.name)
resto = [a.replace(' ', '~') for a in name]
word = ' '.join(resto)
t = wc(width=1200, height=1000,collocations = False).generate(word)

plt.figure( figsize=(26,12), dpi=300)
plt.imshow(t)
plt.axis('off')
plt.title('FAST FOOD LEADING BRANDS IN THE US')

Text(0.5, 1.0, 'FAST FOOD LEADING BRANDS IN THE US')

top_10_cities = pd.DataFrame(new_df['city'].value_counts(ascending=False).head(10).reset_index())
top_10_cities.columns=['city','total of restaurant']
top_10_cities

plt.figure(figsize=(15,10),dpi=100)
sns.barplot(x='city',y='total of restaurant',data=top_10_cities)
plt.title('TOP 10 CITY IN THE US WITH HIGH FAST food RATE')

Text(0.5, 1.0, 'TOP 10 CITY IN THE US WITH HIGH FAST food RATE')

NOW LET'S SEE HOW THE FAST FOOD INDUSTRY INVADE THE AREA.¶

# SO TO THAT WE GONNA JOIN WE GONNA BRING IN THE MILE SQUARE TABLE
mile_pro = pd.DataFrame(new_df.province.value_counts().sort_values())
mile_pro = mile_pro.join(mile_sq, rsuffix='_province')

mile_pro.rename(columns={'province' : 'total_restaurants', 'mile_sq':'land_area'}, inplace=True)

mile_pro.head(10)

mile_pro.sort_values(by='total_restaurants', ascending=False).head(10)

NOW WHAT ROLE DOES THE LAND AREA PLAYS IN THE FAST FOOD INDUSTRY¶

mile_pro.describe()

we gonna use an histogram to understand the relation if there's any. but let's first scale the data to make it fit within one scale

from mlxtend.preprocessing import minmax_scaling #we import the necessary tool

#then fit or scale the data whithin 0-1 
scaled_rest = minmax_scaling(mile_pro['total_restaurants'], columns = [0])

scaled_mile = minmax_scaling(mile_pro['land_area'], columns = [0])

scaled_rest[0:5]

array([[0.        ],
       [0.01057402],
       [0.01510574],
       [0.01661631],
       [0.02719033]])

plt.figure(figsize=(10,10))

sns.distplot(scaled_mile, color='r')
sns.distplot(scaled_rest)
plt.title('RELATIONSHIP BETWEEN RESTAURANTS AND SIZE OF LAND IN EACH STATES')

Text(0.5, 1.0, 'RELATIONSHIP BETWEEN RESTAURANTS AND SIZE OF LAND IN EACH STATES')

WELL DAPENDING ON HOW YOU LOOK AT IT.

WHAT IS TRUE THOUGH IS THAT THIS TWO VARIABLES ARE INDEPENDANT AND ALTHOUGH IT MIGHT LOOKS LIKE THERE'S A RELATIOSHIP IT IS VERY WEAK.

z=new_df.set_index('province')
z.loc['OH',['longitude','latitude']].mean()

longitude   -82.791522
latitude     40.103287
dtype: float64

plt.figure(figsize=(18,18))
mapa  = Basemap(projection='mill', llcrnrlat=20, urcrnrlat=50, 
                                   llcrnrlon=-130, urcrnrlon=-60, resolution='c',
                                   width = 90000, height=120000)#  over here we draw the map limit
                                                                                 #  in this case USA limit



#then we draw the necessary lines, boundaries and colors
mapa.drawcoastlines()
mapa.fillcontinents(color='white',lake_color='aqua',alpha=0.5)
mapa.drawmapboundary(fill_color='lightblue')
mapa.drawstates()
mapa.drawcountries()


#OVER HERE WE LOOP THROUGH THE INDES OF THE MILE_PRO DF WHICH AS THE STATES AS IT INDEX
#AND WE GONNA USE THE SCALES RESTAURANT DATA TO SET THE SIZE OF THE MARKER
for i,each in zip(mile_pro.index,scaled_rest):
    z=new_df.set_index('province')
    #WE GONNA USE THE MEDIAN TO GET THE CENTRAL LOCATION
    lat = z.loc[i,'latitude'].median()
    lon = z.loc[i,'longitude'].median()


    #then we set x,y axis based on the "Co spgs" data we have colected
    x,y= mapa(lon,lat)
    #plt.figure(num=None,figsize=(18,18))
    #now we plot the one of the restaurant and set the color red
    mapa.plot(x,y,'ro',markersize=(each+1)**4*4, alpha=1)
plt.title('FAST FOOD LAND AREA INVASION')

warning: width and height keywords ignored for Miller Cylindrical projection

Text(0.5, 1.0, 'FAST FOOD LAND AREA INVASION')

LET'S GET ALSO A VISUAL REPRESENTATION OF EACH RESTAURANT ON IT ACTUAL LOCATION¶

m  = Basemap(projection='mill', llcrnrlat=20, urcrnrlat=50, llcrnrlon=-130,\
             urcrnrlon=-60, resolution='c')

plt.figure(num=None,figsize=(18,18))
m.drawcoastlines()
m.fillcontinents(color='tan',lake_color='aqua',alpha=0.5)
m.drawmapboundary(fill_color='lightblue')
m.drawstates()
m.drawcountries()
m.drawmapboundary()

lat = list(new_df['latitude'])
lon = list(new_df['longitude'])
x,y = m(lon,lat)


m.plot(x,y,'ro',markersize=5, alpha=0.3)

[<matplotlib.lines.Line2D at 0xf6d13277b8>]

	address	city	country	keys	latitude	longitude	name	postalCode	province	websites
0	324 Main St	Massena	US	us/ny/massena/324mainst/-1161002137	44.92130	-74.89021	McDonald's	13662	NY	http://mcdonalds.com,http://www.mcdonalds.com/...
1	530 Clinton Ave	Washington Court House	US	us/oh/washingtoncourthouse/530clintonave/-7914...	39.53255	-83.44526	Wendy's	43160	OH	http://www.wendys.com
2	408 Market Square Dr	Maysville	US	us/ky/maysville/408marketsquaredr/1051460804	38.62736	-83.79141	Frisch's Big Boy	41056	KY	http://www.frischs.com,https://www.frischs.com...
3	6098 State Highway 37	Massena	US	us/ny/massena/6098statehighway37/-1161002137	44.95008	-74.84553	McDonald's	13662	NY	http://mcdonalds.com,http://www.mcdonalds.com/...
4	139 Columbus Rd	Athens	US	us/oh/athens/139columbusrd/990890980	39.35155	-82.09728	OMG! Rotisserie	45701	OH	http://www.omgrotisserie.com,http://omgrotisse...

	restaurant	total of shops
0	McDonald's	1886
1	Burger King	1154
2	Taco Bell	873
3	Wendy's	731
4	Arby's	518
5	KFC	421
6	Domino's Pizza	345
7	Subway	322
8	SONIC Drive In	226
9	Hardee's	192

	address	city	country	keys	latitude	longitude	name	postalCode	province	websites	mile_sq	mile_sq_mile_sq
0	324 Main St	Massena	US	us/ny/massena/324mainst/-1161002137	44.92130	-74.89021	mcdonald’s	13662	NY	http://mcdonalds.com,http://www.mcdonalds.com/...	54556	54556
1	530 Clinton Ave	Washington Court House	US	us/oh/washingtoncourthouse/530clintonave/-7914...	39.53255	-83.44526	wendy's	43160	OH	http://www.wendys.com	44825	44825
2	408 Market Square Dr	Maysville	US	us/ky/maysville/408marketsquaredr/1051460804	38.62736	-83.79141	frisch's big boy	41056	KY	http://www.frischs.com,https://www.frischs.com...	40410	40410
3	6098 State Highway 37	Massena	US	us/ny/massena/6098statehighway37/-1161002137	44.95008	-74.84553	mcdonald’s	13662	NY	http://mcdonalds.com,http://www.mcdonalds.com/...	54556	54556
4	139 Columbus Rd	Athens	US	us/oh/athens/139columbusrd/990890980	39.35155	-82.09728	omg! rotisserie	45701	OH	http://www.omgrotisserie.com,http://omgrotisse...	44825	44825

	address	city	country	keys	latitude	longitude	name	province	mile_sq
0	324 Main St	massena	US	us/ny/massena/324mainst/-1161002137	44.92130	-74.89021	mcdonald’s	NY	54556
1	530 Clinton Ave	washington court house	US	us/oh/washingtoncourthouse/530clintonave/-7914...	39.53255	-83.44526	wendys	OH	44825
2	408 Market Square Dr	maysville	US	us/ky/maysville/408marketsquaredr/1051460804	38.62736	-83.79141	frisch's big boy	KY	40410
3	6098 State Highway 37	massena	US	us/ny/massena/6098statehighway37/-1161002137	44.95008	-74.84553	mcdonald’s	NY	54556
4	139 Columbus Rd	athens	US	us/oh/athens/139columbusrd/990890980	39.35155	-82.09728	omg! rotisserie	OH	44825

	total_restaurants	mile_sq
count	51.000000	51.000000
mean	196.078431	74398.392157
std	154.745448	96677.051185
min	14.000000	68.000000
25%	92.500000	35901.500000
50%	159.000000	56271.000000
75%	260.000000	84234.000000
max	676.000000	663267.000000

Exploratory Data Analysis on the Fast Food Restaurants Dataset of the U.S.A

Introduction

Data Cleaning

Missing Data

Again, thanks to the Data Processor we didn’t have to deal with Missing values. The only column that had missing values was the Website column which I removed from the Data because the column was not relevant and was not going to help us to answer any of the questions we were interested in.

Data Stories and Visualisations