#!/usr/bin/env python
# coding: utf-8

# In[30]:


# Use the os module to gain access to an object which can
# traverse file trees and provide us file names to 
# anlayze
import os
import re

# Create a workhorse method to visit a given file
def analyze_file_list(dirname, filelist, extdict):
    '''
    Examines a list of files in a given directory
    
    Takes in a single directory relative to the location of this
    script and a list of all files inside that directory. Third arg
    is the dictionary into which the extension should be tallied
    '''
    # Regexp to grab just the file extension, 
    # $ means start matching at end of string
    fext_regexp = re.compile(r'\.(\w+)$')
    # visit each file in the list and ask the OS for its size
    # Build a path to each file by postponding filename on dirname
    for fl in filelist:
        filepath = dirname + str(os.sep) + fl
#         print("File at: ", filepath)
        # check that we have read access on the file
        # then ask its size
        try:
            if os.path.isfile(filepath):
#                 print(os.path.getsize(filepath), 'B')
                # uses our compiled regexp and matches against filename
                ext = re.search(fext_regexp, fl)
                if ext:
                    # groups in regexp are 1-indexed,
                    # group(0) is the ungrouped full match
                    fx = ext.group(1)
#                     print('extension: ', fx)
                    if fx in extdict:
                        extdict[fx] += 1
                    else:
                        extdict[fx] = 1
        except OSError:
            print("Error at: ", filepath)
#         print("--------------------------")
            

    


# In[ ]:


import pandas as pd
# test the walk method to see what we get out
# remember, we get back a three-tuple from os.walk
# which we pipe into an interator structure like four
extdict = {}
for loc, dirs, file in os.walk('/'):
#     print('Location: ', loc)
#     print('Directories: ', dirs)
#     print('File(s): ', file)
    # pass the buck to our analysis method who will
    # visit each file one at a time
    analyze_file_list(loc, file, extdict)
#     print('*******************************')
print(extdict)
extdf = pd.DataFrame.from_dict(extdict,orient='index')
extdf.plot(kind='barh')


# In[6]:


print(os.sep)

