import csv
import json
import hashlib

def map_employee(input_row):
    json_data = {}

    #--required attributes
    json_data['DATA_SOURCE'] = 'EMPLOYEE'
    json_data['ENTITY_TYPE'] = 'GENERIC'
    json_data['RECORD_ID'] = input_row['emp_num']

    #--attributes used for resolution
    json_data['PRIMARY_NAME_LAST'] = input_row['last_name']
    json_data['PRIMARY_NAME_FIRST'] = input_row['first_name']
    json_data['PRIMARY_NAME_MIDDLE'] = input_row['middle_name']
    json_data['HOME_ADDR_LINE1'] = input_row['addr1']
    json_data['HOME_ADDR_CITY'] = input_row['city']
    json_data['HOME_ADDR_STATE'] = input_row['state']
    json_data['HOME_ADDR_POSTAL_CODE'] = input_row['postal_code']
    json_data['HOME_PHONE_NUMBER'] = input_row['home_phone']
    json_data['DATE_OF_BIRTH'] = input_row['dob']
    json_data['SSN_NUMBER'] = input_row['ssn']

    #--classify other id
    if input_row['other_id_number']:
        if input_row['other_id_type'] == 'DL':
            json_data['DRIVERS_LICENSE_NUMBER'] = input_row['other_id_number']
            json_data['DRIVERS_LICENSE_STATE'] = input_row['other_id_country']
        elif input_row['other_id_type'] == 'PP':
            json_data['PASSPORT_NUMBER'] = input_row['other_id_number']
            json_data['PASSPORT_COUNTRY'] = input_row['other_id_country']
        else:
            json_data['OTHER_ID_TYPE'] = input_row['other_id_type'].upper()
            json_data['OTHER_ID_NUMBER'] = input_row['other_id_number']
            json_data['OTHER_ID_COUNTRY'] = input_row['other_id_country']

    if input_row['sherrifs_card']:
        sherrifs_card_list = []
        for id_number in input_row['sherrifs_card'].split(','):
            sherrifs_card_list.append({"SHERRIFS_CARD": id_number})
        json_data['SHERIFFS_CARD_LIST'] = sherrifs_card_list

    #--payload attributes
    json_data['job_category'] = input_row['job_category']
    json_data['job_title'] = input_row['job_title']
    json_data['hire_date'] = input_row['hire_date']

    #--relationships
    relationship_list = []

    #--add an anchor so others can point to me
    relationship_data = {}
    relationship_data['REL_ANCHOR_DOMAIN'] = 'EMPLOYEE_NUM'
    relationship_data['REL_ANCHOR_KEY'] = input_row['emp_num']
    relationship_list.append(relationship_data)

    #--point to my employer
    relationship_data = {}
    relationship_data['REL_POINTER_DOMAIN'] = 'EMPLOYER_ID'
    relationship_data['REL_POINTER_KEY'] = input_row['employer_id']
    relationship_data['REL_POINTER_ROLE'] = input_row['job_category']
    relationship_list.append(relationship_data)

    #--point to my manager
    if input_row['manager_id']:
        relationship_data = {}
        relationship_data['REL_POINTER_DOMAIN'] = 'EMPLOYEE_NUM'
        relationship_data['REL_POINTER_KEY'] = input_row['manager_id']
        relationship_data['REL_POINTER_ROLE'] = 'MANAGED_BY'
        relationship_list.append(relationship_data)

    json_data['RELATIONSHIP_LIST'] = relationship_list

    return json.dumps(json_data)

def map_employer(input_row):
    json_data = {}

    #--required
    json_data['DATA_SOURCE'] = 'EMPLOYER'
    json_data['ENTITY_TYPE'] = 'GENERIC'
    json_data['RECORD_ID'] = input_row['employer_id']

    #--senzing attributes
    json_data['PRIMARY_NAME_ORG'] = input_row['employer_name']
    json_data['BUSINESS_ADDR_FULL'] = input_row['employer_addr']

    #--payload attributes

    #--relationships

    #--add an anchor so others can point to me
    json_data['REL_ANCHOR_DOMAIN'] = 'EMPLOYER_ID'
    json_data['REL_ANCHOR_KEY'] = input_row['employer_id']

    return json.dumps(json_data)

if __name__ == '__main__':

    input_file = open('us-small-employee-raw.csv', 'r', encoding='utf-8')
    output_file1_employee = open('us-small-employee-mapped.json', 'w', encoding='utf-8')
    output_file2_employer = open('us-small-employer-mapped.json', 'w', encoding='utf-8')

    employers_mapped = {}  #--to help eliminate duplicate companies

    input_row_count = 0
    for input_row in csv.DictReader(input_file):
        input_row_count += 1

        #--create any new fields needed
        string_to_hash = input_row['employer_name'] + input_row['employer_addr']
        input_row['employer_id'] = hashlib.md5(bytes(string_to_hash, 'utf-8')).hexdigest()

        #--map the employee
        output_file1_employee.write(map_employee(input_row) + '\n')

        #--map the employer
        if input_row['employer_id'] not in employers_mapped:
            output_file2_employer.write(map_employer(input_row) + '\n')
            employers_mapped[input_row['employer_id']] = True

    print('%s rows processed' % input_row_count)

    input_file.close()
    output_file1_employee.close()
    output_file2_employer.close()