diff --git a/README.rst b/README.rst index 18eedbe..a6b24a0 100644 --- a/README.rst +++ b/README.rst @@ -30,7 +30,7 @@ Both functions, and the class init, take an address string, or a dict-like objec from scourgify import normalize_address_record, NormalizeAddress normalize_address_record('123 southwest Main street, Boring, or, 97203') - + normalize_address_record({ 'address_line_1': '123 southwest Main street', 'address_line_2': 'unit 2', @@ -155,6 +155,12 @@ Contributing ------------ Create a new branch to hold your change; no pull requests submitted directly to dev or master will be approved. Please include a comment explain the issue your pull request solves. Make sure all appropriate test, and tox, updates are included and that all tests are passing. +To setup a local environment and run tests: + +```bash +tox +``` + License ------- usaddress-scourgify is released under the terms of the MIT license. Full details in LICENSE file. diff --git a/requirements/dev.txt b/requirements/dev.txt index a822afd..cc0e2d0 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -7,4 +7,4 @@ pep8>=1.7.0 pylama>=7.3.3 pylint>=1.6.4 tox>=2.7.0 - +black>=23.12.1 diff --git a/scourgify/__init__.py b/scourgify/__init__.py index 922bfc5..15b0221 100644 --- a/scourgify/__init__.py +++ b/scourgify/__init__.py @@ -9,5 +9,5 @@ from scourgify.normalize import ( get_geocoder_normalized_addr, normalize_address_record, - NormalizeAddress + NormalizeAddress, ) diff --git a/scourgify/address_constants.py b/scourgify/address_constants.py index cee607c..abe583a 100644 --- a/scourgify/address_constants.py +++ b/scourgify/address_constants.py @@ -12,902 +12,896 @@ KNOWN_ODDITIES = {} ABNORMAL_OCCUPANCY_ABBRVS = {} -PROBLEM_ST_TYPE_ABBRVS = { - 'CT': 'COURT' -} +PROBLEM_ST_TYPE_ABBRVS = {"CT": "COURT"} AMBIGUOUS_DIRECTIONALS = { - 'NORTH-WEST': 'NW', - 'NORTH-EAST': 'NE', - 'SOUTH-WEST': 'SW', - 'SOUTH-EAST': 'SE' + "NORTH-WEST": "NW", + "NORTH-EAST": "NE", + "SOUTH-WEST": "SW", + "SOUTH-EAST": "SE", } DIRECTIONAL_REPLACEMENTS = { - 'EAST': 'E', - 'WEST': 'W', - 'NORTH': 'N', - 'SOUTH': 'S', - 'NORTHEAST': 'NE', - 'NORTHWEST': 'NW', - 'SOUTHEAST': 'SE', - 'SOUTHWEST': 'SW' + "EAST": "E", + "WEST": "W", + "NORTH": "N", + "SOUTH": "S", + "NORTHEAST": "NE", + "NORTHWEST": "NW", + "SOUTHEAST": "SE", + "SOUTHWEST": "SW", } LONGHAND_DIRECTIONALS = {v: k for k, v in DIRECTIONAL_REPLACEMENTS.items()} CITY_ABBREVIATIONS = LONGHAND_DIRECTIONALS.copy() -CITY_ABBRS = { - 'ST': 'SAINT', - 'MT': 'MOUNT', - 'FT': 'FORT', - 'VA': 'VIRGINIA' -} +CITY_ABBRS = {"ST": "SAINT", "MT": "MOUNT", "FT": "FORT", "VA": "VIRGINIA"} CITY_ABBREVIATIONS.update(CITY_ABBRS) STREET_TYPE_ABBREVIATIONS = { - 'ALLEE': 'ALY', - 'ALLEY': 'ALY', - 'ALLY': 'ALY', - 'ALY': 'ALY', - 'ANEX': 'ANX', - 'ANNEX': 'ANX', - 'ANNX': 'ANX', - 'ANX': 'ANX', - 'ARC': 'ARC', - 'ARCADE': 'ARC', - 'AV': 'AVE', - 'AVE': 'AVE', - 'AVEN': 'AVE', - 'AVENU': 'AVE', - 'AVENUE': 'AVE', - 'AVN': 'AVE', - 'AVNUE': 'AVE', - 'BAYOO': 'BYU', - 'BAYOU': 'BYU', - 'BCH': 'BCH', - 'BEACH': 'BCH', - 'BEND': 'BND', - 'BND': 'BND', - 'BLF': 'BLF', - 'BLUF': 'BLF', - 'BLUFF': 'BLF', - 'BLUFFS': 'BLFS', - 'BOT': 'BTM', - 'BOTTM': 'BTM', - 'BOTTOM': 'BTM', - 'BTM': 'BTM', - 'BLVD': 'BLVD', - 'BOUL': 'BLVD', - 'BOULEVARD': 'BLVD', - 'BOULV': 'BLVD', - 'BR': 'BR', - 'BRANCH': 'BR', - 'BRNCH': 'BR', - 'BRDGE': 'BRG', - 'BRG': 'BRG', - 'BRIDGE': 'BRG', - 'BRK': 'BRK', - 'BROOK': 'BRK', - 'BROOKS': 'BRKS', - 'BURG': 'BG', - 'BURGS': 'BGS', - 'BYP': 'BYP', - 'BYPA': 'BYP', - 'BYPAS': 'BYP', - 'BYPASS': 'BYP', - 'BYPS': 'BYP', - 'CAMP': 'CP', - 'CMP': 'CP', - 'CP': 'CP', - 'CANYN': 'CYN', - 'CANYON': 'CYN', - 'CNYN': 'CYN', - 'CYN': 'CYN', - 'CAPE': 'CPE', - 'CPE': 'CPE', - 'CAUSEWAY': 'CSWY', - 'CAUSWAY': 'CSWY', - 'CSWY': 'CSWY', - 'CEN': 'CTR', - 'CENT': 'CTR', - 'CENTER': 'CTR', - 'CENTR': 'CTR', - 'CENTRE': 'CTR', - 'CNTER': 'CTR', - 'CNTR': 'CTR', - 'CTR': 'CTR', - 'CENTERS': 'CTRS', - 'CIR': 'CIR', - 'CIRC': 'CIR', - 'CIRCL': 'CIR', - 'CIRCLE': 'CIR', - 'CRCL': 'CIR', - 'CRCLE': 'CIR', - 'CIRCLES': 'CIRS', - 'CLF': 'CLF', - 'CLIFF': 'CLF', - 'CLFS': 'CLFS', - 'CLIFFS': 'CLFS', - 'CLB': 'CLB', - 'CLUB': 'CLB', - 'COMMON': 'CMN', - 'COR': 'COR', - 'CORNER': 'COR', - 'CORNERS': 'CORS', - 'CORS': 'CORS', - 'COURSE': 'CRSE', - 'CRSE': 'CRSE', - 'COURT': 'CT', - 'CRT': 'CT', - 'CT': 'CT', - 'COURTS': 'CTS', - 'COVE': 'CV', - 'CV': 'CV', - 'COVES': 'CVS', - 'CK': 'CRK', - 'CR': 'CRK', - 'CREEK': 'CRK', - 'CRK': 'CRK', - 'CRECENT': 'CRES', - 'CRES': 'CRES', - 'CRESCENT': 'CRES', - 'CRESENT': 'CRES', - 'CRSCNT': 'CRES', - 'CRSENT': 'CRES', - 'CRSNT': 'CRES', - 'CREST': 'CRST', - 'CROSSING': 'XING', - 'CRSSING': 'XING', - 'CRSSNG': 'XING', - 'XING': 'XING', - 'CROSSROAD': 'XRD', - 'CURVE': 'CURV', - 'DALE': 'DL', - 'DL': 'DL', - 'DAM': 'DM', - 'DM': 'DM', - 'DIV': 'DV', - 'DIVIDE': 'DV', - 'DV': 'DV', - 'DVD': 'DV', - 'DR': 'DR', - 'DRIV': 'DR', - 'DRIVE': 'DR', - 'DRV': 'DR', - 'DRIVES': 'DRS', - 'EST': 'EST', - 'ESTATE': 'EST', - 'ESTATES': 'ESTS', - 'ESTS': 'ESTS', - 'EXP': 'EXPY', - 'EXPR': 'EXPY', - 'EXPRESS': 'EXPY', - 'EXPRESSWAY': 'EXPY', - 'EXPW': 'EXPY', - 'EXPY': 'EXPY', - 'EXT': 'EXT', - 'EXTENSION': 'EXT', - 'EXTN': 'EXT', - 'EXTNSN': 'EXT', - 'EXTENSIONS': 'EXTS', - 'EXTS': 'EXTS', - 'FALL': 'FALL', - 'FALLS': 'FLS', - 'FLS': 'FLS', - 'FERRY': 'FRY', - 'FRRY': 'FRY', - 'FRY': 'FRY', - 'FIELD': 'FLD', - 'FLD': 'FLD', - 'FIELDS': 'FLDS', - 'FLDS': 'FLDS', - 'FLAT': 'FLT', - 'FLT': 'FLT', - 'FLATS': 'FLTS', - 'FLTS': 'FLTS', - 'FORD': 'FRD', - 'FRD': 'FRD', - 'FORDS': 'FRDS', - 'FOREST': 'FRST', - 'FORESTS': 'FRST', - 'FRST': 'FRST', - 'FORG': 'FRG', - 'FORGE': 'FRG', - 'FRG': 'FRG', - 'FORGES': 'FRGS', - 'FORK': 'FRK', - 'FRK': 'FRK', - 'FORKS': 'FRKS', - 'FRKS': 'FRKS', - 'FORT': 'FT', - 'FRT': 'FT', - 'FT': 'FT', - 'FREEWAY': 'FWY', - 'FREEWY': 'FWY', - 'FRWAY': 'FWY', - 'FRWY': 'FWY', - 'FWY': 'FWY', - 'GARDEN': 'GDN', - 'GARDN': 'GDN', - 'GDN': 'GDN', - 'GRDEN': 'GDN', - 'GRDN': 'GDN', - 'GARDENS': 'GDNS', - 'GDNS': 'GDNS', - 'GRDNS': 'GDNS', - 'GATEWAY': 'GTWY', - 'GATEWY': 'GTWY', - 'GATWAY': 'GTWY', - 'GTWAY': 'GTWY', - 'GTWY': 'GTWY', - 'GLEN': 'GLN', - 'GLN': 'GLN', - 'GLENS': 'GLNS', - 'GREEN': 'GRN', - 'GRN': 'GRN', - 'GREENS': 'GRNS', - 'GROV': 'GRV', - 'GROVE': 'GRV', - 'GRV': 'GRV', - 'GROVES': 'GRVS', - 'HARB': 'HBR', - 'HARBOR': 'HBR', - 'HARBR': 'HBR', - 'HBR': 'HBR', - 'HRBOR': 'HBR', - 'HARBORS': 'HBRS', - 'HAVEN': 'HVN', - 'HAVN': 'HVN', - 'HVN': 'HVN', - 'HEIGHT': 'HTS', - 'HEIGHTS': 'HTS', - 'HGTS': 'HTS', - 'HT': 'HTS', - 'HTS': 'HTS', - 'HIGHWAY': 'HWY', - 'HIGHWY': 'HWY', - 'HIWAY': 'HWY', - 'HIWY': 'HWY', - 'HWAY': 'HWY', - 'HWY': 'HWY', - 'HILL': 'HL', - 'HL': 'HL', - 'HILLS': 'HLS', - 'HLS': 'HLS', - 'HLLW': 'HOLW', - 'HOLLOW': 'HOLW', - 'HOLLOWS': 'HOLW', - 'HOLW': 'HOLW', - 'HOLWS': 'HOLW', - 'INLET': 'INLT', - 'INLT': 'INLT', - 'IS': 'IS', - 'ISLAND': 'IS', - 'ISLND': 'IS', - 'ISLANDS': 'ISS', - 'ISLNDS': 'ISS', - 'ISS': 'ISS', - 'ISLE': 'ISLE', - 'ISLES': 'ISLE', - 'JCT': 'JCT', - 'JCTION': 'JCT', - 'JCTN': 'JCT', - 'JUNCTION': 'JCT', - 'JUNCTN': 'JCT', - 'JUNCTON': 'JCT', - 'JCTNS': 'JCTS', - 'JCTS': 'JCTS', - 'JUNCTIONS': 'JCTS', - 'KEY': 'KY', - 'KY': 'KY', - 'KEYS': 'KYS', - 'KYS': 'KYS', - 'KNL': 'KNL', - 'KNOL': 'KNL', - 'KNOLL': 'KNL', - 'KNLS': 'KNLS', - 'KNOLLS': 'KNLS', - 'LAKE': 'LK', - 'LK': 'LK', - 'LAKES': 'LKS', - 'LKS': 'LKS', - 'LAND': 'LAND', - 'LANDING': 'LNDG', - 'LNDG': 'LNDG', - 'LNDNG': 'LNDG', - 'LA': 'LN', - 'LANE': 'LN', - 'LANES': 'LN', - 'LN': 'LN', - 'LGT': 'LGT', - 'LIGHT': 'LGT', - 'LIGHTS': 'LGTS', - 'LF': 'LF', - 'LOAF': 'LF', - 'LCK': 'LCK', - 'LOCK': 'LCK', - 'LCKS': 'LCKS', - 'LOCKS': 'LCKS', - 'LDG': 'LDG', - 'LDGE': 'LDG', - 'LODG': 'LDG', - 'LODGE': 'LDG', - 'LOOP': 'LOOP', - 'LOOPS': 'LOOP', - 'MALL': 'MALL', - 'MANOR': 'MNR', - 'MNR': 'MNR', - 'MANORS': 'MNRS', - 'MNRS': 'MNRS', - 'MDW': 'MDW', - 'MEADOW': 'MDW', - 'MDWS': 'MDWS', - 'MEADOWS': 'MDWS', - 'MEDOWS': 'MDWS', - 'MEWS': 'MEWS', - 'MILL': 'ML', - 'ML': 'ML', - 'MILLS': 'MLS', - 'MLS': 'MLS', - 'MISSION': 'MSN', - 'MISSN': 'MSN', - 'MSN': 'MSN', - 'MSSN': 'MSN', - 'MOTORWAY': 'MTWY', - 'MNT': 'MT', - 'MOUNT': 'MT', - 'MT': 'MT', - 'MNTAIN': 'MTN', - 'MNTN': 'MTN', - 'MOUNTAIN': 'MTN', - 'MOUNTIN': 'MTN', - 'MTIN': 'MTN', - 'MTN': 'MTN', - 'MNTNS': 'MTNS', - 'MOUNTAINS': 'MTNS', - 'NCK': 'NCK', - 'NECK': 'NCK', - 'ORCH': 'ORCH', - 'ORCHARD': 'ORCH', - 'ORCHRD': 'ORCH', - 'OVAL': 'OVAL', - 'OVL': 'OVAL', - 'OVERPASS': 'OPAS', - 'PARK': 'PARK', - 'PK': 'PARK', - 'PRK': 'PARK', - 'PARKS': 'PARK', - 'PARKWAY': 'PKWY', - 'PARKWY': 'PKWY', - 'PKWAY': 'PKWY', - 'PKWY': 'PKWY', - 'PKY': 'PKWY', - 'PARKWAYS': 'PKWY', - 'PKWYS': 'PKWY', - 'PASS': 'PASS', - 'PASSAGE': 'PSGE', - 'PATH': 'PATH', - 'PATHS': 'PATH', - 'PIKE': 'PIKE', - 'PIKES': 'PIKE', - 'PINE': 'PNE', - 'PINES': 'PNES', - 'PNES': 'PNES', - 'PL': 'PL', - 'PLACE': 'PL', - 'PLAIN': 'PLN', - 'PLN': 'PLN', - 'PLAINES': 'PLNS', - 'PLAINS': 'PLNS', - 'PLNS': 'PLNS', - 'PLAZA': 'PLZ', - 'PLZ': 'PLZ', - 'PLZA': 'PLZ', - 'POINT': 'PT', - 'PT': 'PT', - 'POINTS': 'PTS', - 'PTS': 'PTS', - 'PORT': 'PRT', - 'PRT': 'PRT', - 'PORTS': 'PRTS', - 'PRTS': 'PRTS', - 'PR': 'PR', - 'PRAIRIE': 'PR', - 'PRARIE': 'PR', - 'PRR': 'PR', - 'RAD': 'RADL', - 'RADIAL': 'RADL', - 'RADIEL': 'RADL', - 'RADL': 'RADL', - 'RAMP': 'RAMP', - 'RANCH': 'RNCH', - 'RANCHES': 'RNCH', - 'RNCH': 'RNCH', - 'RNCHS': 'RNCH', - 'RAPID': 'RPD', - 'RPD': 'RPD', - 'RAPIDS': 'RPDS', - 'RPDS': 'RPDS', - 'REST': 'RST', - 'RST': 'RST', - 'RDG': 'RDG', - 'RDGE': 'RDG', - 'RIDGE': 'RDG', - 'RDGS': 'RDGS', - 'RIDGES': 'RDGS', - 'RIV': 'RIV', - 'RIVER': 'RIV', - 'RIVR': 'RIV', - 'RVR': 'RIV', - 'RD': 'RD', - 'ROAD': 'RD', - 'RDS': 'RDS', - 'ROADS': 'RDS', - 'ROUTE': 'RTE', - 'ROW': 'ROW', - 'RUE': 'RUE', - 'RUN': 'RUN', - 'SHL': 'SHL', - 'SHOAL': 'SHL', - 'SHLS': 'SHLS', - 'SHOALS': 'SHLS', - 'SHOAR': 'SHR', - 'SHORE': 'SHR', - 'SHR': 'SHR', - 'SHOARS': 'SHRS', - 'SHORES': 'SHRS', - 'SHRS': 'SHRS', - 'SKYWAY': 'SKWY', - 'SPG': 'SPG', - 'SPNG': 'SPG', - 'SPRING': 'SPG', - 'SPRNG': 'SPG', - 'SPGS': 'SPGS', - 'SPNGS': 'SPGS', - 'SPRINGS': 'SPGS', - 'SPRNGS': 'SPGS', - 'SPUR': 'SPUR', - 'SPURS': 'SPUR', - 'SQ': 'SQ', - 'SQR': 'SQ', - 'SQRE': 'SQ', - 'SQU': 'SQ', - 'SQUARE': 'SQ', - 'SQRS': 'SQS', - 'SQUARES': 'SQS', - 'STA': 'STA', - 'STATION': 'STA', - 'STATN': 'STA', - 'STN': 'STA', - 'STRA': 'STRA', - 'STRAV': 'STRA', - 'STRAVE': 'STRA', - 'STRAVEN': 'STRA', - 'STRAVENUE': 'STRA', - 'STRAVN': 'STRA', - 'STRVN': 'STRA', - 'STRVNUE': 'STRA', - 'STREAM': 'STRM', - 'STREME': 'STRM', - 'STRM': 'STRM', - 'ST': 'ST', - 'STR': 'ST', - 'STREET': 'ST', - 'STRT': 'ST', - 'STREETS': 'STS', - 'SMT': 'SMT', - 'SUMIT': 'SMT', - 'SUMITT': 'SMT', - 'SUMMIT': 'SMT', - 'TER': 'TER', - 'TERR': 'TER', - 'TERRACE': 'TER', - 'THROUGHWAY': 'TRWY', - 'TRACE': 'TRCE', - 'TRACES': 'TRCE', - 'TRCE': 'TRCE', - 'TRACK': 'TRAK', - 'TRACKS': 'TRAK', - 'TRAK': 'TRAK', - 'TRK': 'TRAK', - 'TRKS': 'TRAK', - 'TRAFFICWAY': 'TRFY', - 'TRFY': 'TRFY', - 'TR': 'TRL', - 'TRAIL': 'TRL', - 'TRAILS': 'TRL', - 'TRL': 'TRL', - 'TRLS': 'TRL', - 'TUNEL': 'TUNL', - 'TUNL': 'TUNL', - 'TUNLS': 'TUNL', - 'TUNNEL': 'TUNL', - 'TUNNELS': 'TUNL', - 'TUNNL': 'TUNL', - 'TPK': 'TPKE', - 'TPKE': 'TPKE', - 'TRNPK': 'TPKE', - 'TRPK': 'TPKE', - 'TURNPIKE': 'TPKE', - 'TURNPK': 'TPKE', - 'UNDERPASS': 'UPAS', - 'UN': 'UN', - 'UNION': 'UN', - 'UNIONS': 'UNS', - 'VALLEY': 'VLY', - 'VALLY': 'VLY', - 'VLLY': 'VLY', - 'VLY': 'VLY', - 'VALLEYS': 'VLYS', - 'VLYS': 'VLYS', - 'VDCT': 'VIA', - 'VIA': 'VIA', - 'VIADCT': 'VIA', - 'VIADUCT': 'VIA', - 'VIEW': 'VW', - 'VW': 'VW', - 'VIEWS': 'VWS', - 'VWS': 'VWS', - 'VILL': 'VLG', - 'VILLAG': 'VLG', - 'VILLAGE': 'VLG', - 'VILLG': 'VLG', - 'VILLIAGE': 'VLG', - 'VLG': 'VLG', - 'VILLAGES': 'VLGS', - 'VLGS': 'VLGS', - 'VILLE': 'VL', - 'VL': 'VL', - 'VIS': 'VIS', - 'VIST': 'VIS', - 'VISTA': 'VIS', - 'VST': 'VIS', - 'VSTA': 'VIS', - 'WALK': 'WALK', - 'WALKS': 'WALK', - 'WALL': 'WALL', - 'WAY': 'WAY', - 'WY': 'WAY', - 'WAYS': 'WAYS', - 'WELL': 'WL', - 'WELLS': 'WLS', - 'WLS': 'WLS' + "ALLEE": "ALY", + "ALLEY": "ALY", + "ALLY": "ALY", + "ALY": "ALY", + "ANEX": "ANX", + "ANNEX": "ANX", + "ANNX": "ANX", + "ANX": "ANX", + "ARC": "ARC", + "ARCADE": "ARC", + "AV": "AVE", + "AVE": "AVE", + "AVEN": "AVE", + "AVENU": "AVE", + "AVENUE": "AVE", + "AVN": "AVE", + "AVNUE": "AVE", + "BAYOO": "BYU", + "BAYOU": "BYU", + "BCH": "BCH", + "BEACH": "BCH", + "BEND": "BND", + "BND": "BND", + "BLF": "BLF", + "BLUF": "BLF", + "BLUFF": "BLF", + "BLUFFS": "BLFS", + "BOT": "BTM", + "BOTTM": "BTM", + "BOTTOM": "BTM", + "BTM": "BTM", + "BLVD": "BLVD", + "BOUL": "BLVD", + "BOULEVARD": "BLVD", + "BOULV": "BLVD", + "BR": "BR", + "BRANCH": "BR", + "BRNCH": "BR", + "BRDGE": "BRG", + "BRG": "BRG", + "BRIDGE": "BRG", + "BRK": "BRK", + "BROOK": "BRK", + "BROOKS": "BRKS", + "BURG": "BG", + "BURGS": "BGS", + "BYP": "BYP", + "BYPA": "BYP", + "BYPAS": "BYP", + "BYPASS": "BYP", + "BYPS": "BYP", + "CAMP": "CP", + "CMP": "CP", + "CP": "CP", + "CANYN": "CYN", + "CANYON": "CYN", + "CNYN": "CYN", + "CYN": "CYN", + "CAPE": "CPE", + "CPE": "CPE", + "CAUSEWAY": "CSWY", + "CAUSWAY": "CSWY", + "CSWY": "CSWY", + "CEN": "CTR", + "CENT": "CTR", + "CENTER": "CTR", + "CENTR": "CTR", + "CENTRE": "CTR", + "CNTER": "CTR", + "CNTR": "CTR", + "CTR": "CTR", + "CENTERS": "CTRS", + "CIR": "CIR", + "CIRC": "CIR", + "CIRCL": "CIR", + "CIRCLE": "CIR", + "CRCL": "CIR", + "CRCLE": "CIR", + "CIRCLES": "CIRS", + "CLF": "CLF", + "CLIFF": "CLF", + "CLFS": "CLFS", + "CLIFFS": "CLFS", + "CLB": "CLB", + "CLUB": "CLB", + "COMMON": "CMN", + "COR": "COR", + "CORNER": "COR", + "CORNERS": "CORS", + "CORS": "CORS", + "COURSE": "CRSE", + "CRSE": "CRSE", + "COURT": "CT", + "CRT": "CT", + "CT": "CT", + "COURTS": "CTS", + "COVE": "CV", + "CV": "CV", + "COVES": "CVS", + "CK": "CRK", + "CR": "CRK", + "CREEK": "CRK", + "CRK": "CRK", + "CRECENT": "CRES", + "CRES": "CRES", + "CRESCENT": "CRES", + "CRESENT": "CRES", + "CRSCNT": "CRES", + "CRSENT": "CRES", + "CRSNT": "CRES", + "CREST": "CRST", + "CROSSING": "XING", + "CRSSING": "XING", + "CRSSNG": "XING", + "XING": "XING", + "CROSSROAD": "XRD", + "CURVE": "CURV", + "DALE": "DL", + "DL": "DL", + "DAM": "DM", + "DM": "DM", + "DIV": "DV", + "DIVIDE": "DV", + "DV": "DV", + "DVD": "DV", + "DR": "DR", + "DRIV": "DR", + "DRIVE": "DR", + "DRV": "DR", + "DRIVES": "DRS", + "EST": "EST", + "ESTATE": "EST", + "ESTATES": "ESTS", + "ESTS": "ESTS", + "EXP": "EXPY", + "EXPR": "EXPY", + "EXPRESS": "EXPY", + "EXPRESSWAY": "EXPY", + "EXPW": "EXPY", + "EXPY": "EXPY", + "EXT": "EXT", + "EXTENSION": "EXT", + "EXTN": "EXT", + "EXTNSN": "EXT", + "EXTENSIONS": "EXTS", + "EXTS": "EXTS", + "FALL": "FALL", + "FALLS": "FLS", + "FLS": "FLS", + "FERRY": "FRY", + "FRRY": "FRY", + "FRY": "FRY", + "FIELD": "FLD", + "FLD": "FLD", + "FIELDS": "FLDS", + "FLDS": "FLDS", + "FLAT": "FLT", + "FLT": "FLT", + "FLATS": "FLTS", + "FLTS": "FLTS", + "FORD": "FRD", + "FRD": "FRD", + "FORDS": "FRDS", + "FOREST": "FRST", + "FORESTS": "FRST", + "FRST": "FRST", + "FORG": "FRG", + "FORGE": "FRG", + "FRG": "FRG", + "FORGES": "FRGS", + "FORK": "FRK", + "FRK": "FRK", + "FORKS": "FRKS", + "FRKS": "FRKS", + "FORT": "FT", + "FRT": "FT", + "FT": "FT", + "FREEWAY": "FWY", + "FREEWY": "FWY", + "FRWAY": "FWY", + "FRWY": "FWY", + "FWY": "FWY", + "GARDEN": "GDN", + "GARDN": "GDN", + "GDN": "GDN", + "GRDEN": "GDN", + "GRDN": "GDN", + "GARDENS": "GDNS", + "GDNS": "GDNS", + "GRDNS": "GDNS", + "GATEWAY": "GTWY", + "GATEWY": "GTWY", + "GATWAY": "GTWY", + "GTWAY": "GTWY", + "GTWY": "GTWY", + "GLEN": "GLN", + "GLN": "GLN", + "GLENS": "GLNS", + "GREEN": "GRN", + "GRN": "GRN", + "GREENS": "GRNS", + "GROV": "GRV", + "GROVE": "GRV", + "GRV": "GRV", + "GROVES": "GRVS", + "HARB": "HBR", + "HARBOR": "HBR", + "HARBR": "HBR", + "HBR": "HBR", + "HRBOR": "HBR", + "HARBORS": "HBRS", + "HAVEN": "HVN", + "HAVN": "HVN", + "HVN": "HVN", + "HEIGHT": "HTS", + "HEIGHTS": "HTS", + "HGTS": "HTS", + "HT": "HTS", + "HTS": "HTS", + "HIGHWAY": "HWY", + "HIGHWY": "HWY", + "HIWAY": "HWY", + "HIWY": "HWY", + "HWAY": "HWY", + "HWY": "HWY", + "HILL": "HL", + "HL": "HL", + "HILLS": "HLS", + "HLS": "HLS", + "HLLW": "HOLW", + "HOLLOW": "HOLW", + "HOLLOWS": "HOLW", + "HOLW": "HOLW", + "HOLWS": "HOLW", + "INLET": "INLT", + "INLT": "INLT", + "IS": "IS", + "ISLAND": "IS", + "ISLND": "IS", + "ISLANDS": "ISS", + "ISLNDS": "ISS", + "ISS": "ISS", + "ISLE": "ISLE", + "ISLES": "ISLE", + "JCT": "JCT", + "JCTION": "JCT", + "JCTN": "JCT", + "JUNCTION": "JCT", + "JUNCTN": "JCT", + "JUNCTON": "JCT", + "JCTNS": "JCTS", + "JCTS": "JCTS", + "JUNCTIONS": "JCTS", + "KEY": "KY", + "KY": "KY", + "KEYS": "KYS", + "KYS": "KYS", + "KNL": "KNL", + "KNOL": "KNL", + "KNOLL": "KNL", + "KNLS": "KNLS", + "KNOLLS": "KNLS", + "LAKE": "LK", + "LK": "LK", + "LAKES": "LKS", + "LKS": "LKS", + "LAND": "LAND", + "LANDING": "LNDG", + "LNDG": "LNDG", + "LNDNG": "LNDG", + "LA": "LN", + "LANE": "LN", + "LANES": "LN", + "LN": "LN", + "LGT": "LGT", + "LIGHT": "LGT", + "LIGHTS": "LGTS", + "LF": "LF", + "LOAF": "LF", + "LCK": "LCK", + "LOCK": "LCK", + "LCKS": "LCKS", + "LOCKS": "LCKS", + "LDG": "LDG", + "LDGE": "LDG", + "LODG": "LDG", + "LODGE": "LDG", + "LOOP": "LOOP", + "LOOPS": "LOOP", + "MALL": "MALL", + "MANOR": "MNR", + "MNR": "MNR", + "MANORS": "MNRS", + "MNRS": "MNRS", + "MDW": "MDW", + "MEADOW": "MDW", + "MDWS": "MDWS", + "MEADOWS": "MDWS", + "MEDOWS": "MDWS", + "MEWS": "MEWS", + "MILL": "ML", + "ML": "ML", + "MILLS": "MLS", + "MLS": "MLS", + "MISSION": "MSN", + "MISSN": "MSN", + "MSN": "MSN", + "MSSN": "MSN", + "MOTORWAY": "MTWY", + "MNT": "MT", + "MOUNT": "MT", + "MT": "MT", + "MNTAIN": "MTN", + "MNTN": "MTN", + "MOUNTAIN": "MTN", + "MOUNTIN": "MTN", + "MTIN": "MTN", + "MTN": "MTN", + "MNTNS": "MTNS", + "MOUNTAINS": "MTNS", + "NCK": "NCK", + "NECK": "NCK", + "ORCH": "ORCH", + "ORCHARD": "ORCH", + "ORCHRD": "ORCH", + "OVAL": "OVAL", + "OVL": "OVAL", + "OVERPASS": "OPAS", + "PARK": "PARK", + "PK": "PARK", + "PRK": "PARK", + "PARKS": "PARK", + "PARKWAY": "PKWY", + "PARKWY": "PKWY", + "PKWAY": "PKWY", + "PKWY": "PKWY", + "PKY": "PKWY", + "PARKWAYS": "PKWY", + "PKWYS": "PKWY", + "PASS": "PASS", + "PASSAGE": "PSGE", + "PATH": "PATH", + "PATHS": "PATH", + "PIKE": "PIKE", + "PIKES": "PIKE", + "PINE": "PNE", + "PINES": "PNES", + "PNES": "PNES", + "PL": "PL", + "PLACE": "PL", + "PLAIN": "PLN", + "PLN": "PLN", + "PLAINES": "PLNS", + "PLAINS": "PLNS", + "PLNS": "PLNS", + "PLAZA": "PLZ", + "PLZ": "PLZ", + "PLZA": "PLZ", + "POINT": "PT", + "PT": "PT", + "POINTS": "PTS", + "PTS": "PTS", + "PORT": "PRT", + "PRT": "PRT", + "PORTS": "PRTS", + "PRTS": "PRTS", + "PR": "PR", + "PRAIRIE": "PR", + "PRARIE": "PR", + "PRR": "PR", + "RAD": "RADL", + "RADIAL": "RADL", + "RADIEL": "RADL", + "RADL": "RADL", + "RAMP": "RAMP", + "RANCH": "RNCH", + "RANCHES": "RNCH", + "RNCH": "RNCH", + "RNCHS": "RNCH", + "RAPID": "RPD", + "RPD": "RPD", + "RAPIDS": "RPDS", + "RPDS": "RPDS", + "REST": "RST", + "RST": "RST", + "RDG": "RDG", + "RDGE": "RDG", + "RIDGE": "RDG", + "RDGS": "RDGS", + "RIDGES": "RDGS", + "RIV": "RIV", + "RIVER": "RIV", + "RIVR": "RIV", + "RVR": "RIV", + "RD": "RD", + "ROAD": "RD", + "RDS": "RDS", + "ROADS": "RDS", + "ROUTE": "RTE", + "ROW": "ROW", + "RUE": "RUE", + "RUN": "RUN", + "SHL": "SHL", + "SHOAL": "SHL", + "SHLS": "SHLS", + "SHOALS": "SHLS", + "SHOAR": "SHR", + "SHORE": "SHR", + "SHR": "SHR", + "SHOARS": "SHRS", + "SHORES": "SHRS", + "SHRS": "SHRS", + "SKYWAY": "SKWY", + "SPG": "SPG", + "SPNG": "SPG", + "SPRING": "SPG", + "SPRNG": "SPG", + "SPGS": "SPGS", + "SPNGS": "SPGS", + "SPRINGS": "SPGS", + "SPRNGS": "SPGS", + "SPUR": "SPUR", + "SPURS": "SPUR", + "SQ": "SQ", + "SQR": "SQ", + "SQRE": "SQ", + "SQU": "SQ", + "SQUARE": "SQ", + "SQRS": "SQS", + "SQUARES": "SQS", + "STA": "STA", + "STATION": "STA", + "STATN": "STA", + "STN": "STA", + "STRA": "STRA", + "STRAV": "STRA", + "STRAVE": "STRA", + "STRAVEN": "STRA", + "STRAVENUE": "STRA", + "STRAVN": "STRA", + "STRVN": "STRA", + "STRVNUE": "STRA", + "STREAM": "STRM", + "STREME": "STRM", + "STRM": "STRM", + "ST": "ST", + "STR": "ST", + "STREET": "ST", + "STRT": "ST", + "STREETS": "STS", + "SMT": "SMT", + "SUMIT": "SMT", + "SUMITT": "SMT", + "SUMMIT": "SMT", + "TER": "TER", + "TERR": "TER", + "TERRACE": "TER", + "THROUGHWAY": "TRWY", + "TRACE": "TRCE", + "TRACES": "TRCE", + "TRCE": "TRCE", + "TRACK": "TRAK", + "TRACKS": "TRAK", + "TRAK": "TRAK", + "TRK": "TRAK", + "TRKS": "TRAK", + "TRAFFICWAY": "TRFY", + "TRFY": "TRFY", + "TR": "TRL", + "TRAIL": "TRL", + "TRAILS": "TRL", + "TRL": "TRL", + "TRLS": "TRL", + "TUNEL": "TUNL", + "TUNL": "TUNL", + "TUNLS": "TUNL", + "TUNNEL": "TUNL", + "TUNNELS": "TUNL", + "TUNNL": "TUNL", + "TPK": "TPKE", + "TPKE": "TPKE", + "TRNPK": "TPKE", + "TRPK": "TPKE", + "TURNPIKE": "TPKE", + "TURNPK": "TPKE", + "UNDERPASS": "UPAS", + "UN": "UN", + "UNION": "UN", + "UNIONS": "UNS", + "VALLEY": "VLY", + "VALLY": "VLY", + "VLLY": "VLY", + "VLY": "VLY", + "VALLEYS": "VLYS", + "VLYS": "VLYS", + "VDCT": "VIA", + "VIA": "VIA", + "VIADCT": "VIA", + "VIADUCT": "VIA", + "VIEW": "VW", + "VW": "VW", + "VIEWS": "VWS", + "VWS": "VWS", + "VILL": "VLG", + "VILLAG": "VLG", + "VILLAGE": "VLG", + "VILLG": "VLG", + "VILLIAGE": "VLG", + "VLG": "VLG", + "VILLAGES": "VLGS", + "VLGS": "VLGS", + "VILLE": "VL", + "VL": "VL", + "VIS": "VIS", + "VIST": "VIS", + "VISTA": "VIS", + "VST": "VIS", + "VSTA": "VIS", + "WALK": "WALK", + "WALKS": "WALK", + "WALL": "WALL", + "WAY": "WAY", + "WY": "WAY", + "WAYS": "WAYS", + "WELL": "WL", + "WELLS": "WLS", + "WLS": "WLS", } OCCUPANCY_TYPE_ABBREVIATIONS = { - 'APARTMENT': 'APT', - 'BUILDING': 'BLDG', - 'BASEMENT': 'BSMT', - 'DEPARTMENT': 'DEPT', - 'FLOOR': 'FL', - 'FRONT': 'FRNT', - 'HANGER': 'HNGR', - 'KEY': 'KEY', - 'LOBBY': 'LBBY', - 'LOT': 'LOT', - 'LOWER': 'LOWR', - 'OFFICE': 'OFC', - 'PENTHOUSE': 'PH', - 'PIER': 'PIER', - 'REAR': 'REAR', - 'ROOM': 'RM', - 'SIDE': 'SIDE', - 'SLIP': 'SLIP', - 'SPACE': 'SPC', - 'STOP': 'STOP', - 'SUITE': 'STE', - 'TRAILER': 'TRLR', - 'UNIT': 'UNIT', - 'UPPER': 'UPPER', - '#': '#' + "APARTMENT": "APT", + "BUILDING": "BLDG", + "BASEMENT": "BSMT", + "DEPARTMENT": "DEPT", + "FLOOR": "FL", + "FRONT": "FRNT", + "HANGER": "HNGR", + "KEY": "KEY", + "LOBBY": "LBBY", + "LOT": "LOT", + "LOWER": "LOWR", + "OFFICE": "OFC", + "PENTHOUSE": "PH", + "PIER": "PIER", + "REAR": "REAR", + "ROOM": "RM", + "SIDE": "SIDE", + "SLIP": "SLIP", + "SPACE": "SPC", + "STOP": "STOP", + "SUITE": "STE", + "TRAILER": "TRLR", + "UNIT": "UNIT", + "UPPER": "UPPER", + "#": "#", } LONGHAND_STREET_TYPES = { - 'ALY': 'ALLEY', - 'ANX': 'ANNEX', - 'ARC': 'ARCADE', - 'AVE': 'AVENUE', - 'BYU': 'BAYOU', - 'BCH': 'BEACH', - 'BND': 'BEND', - 'BLF': 'BLUFF', - 'BLFS': 'BLUFFS', - 'BTM': 'BOTTOM', - 'BLVD': 'BOULEVARD', - 'BR': 'BRANCH', - 'BRG': 'BRIDGE', - 'BRK': 'BROOK', - 'BRKS': 'BROOKS', - 'BGS': 'BURGS', - 'BYP': 'BYPASS', - 'CP': 'CAMP', - 'CYN': 'CANYON', - 'CPE': 'CAPE', - 'CSWY': 'CAUSEWAY', - 'CTR': 'CENTER', - 'CTRS': 'CENTERS', - 'CIR': 'CIRCLE', - 'CIRS': 'CIRCLES', - 'CLF': 'CLIFF', - 'CLFS': 'CLIFFS', - 'CMN': 'COMMON', - 'COR': 'CORNER', - 'CORS': 'CORNERS', - 'CRSE': 'COURSE', - 'CT': 'COURT', - 'CTS': 'COURTS', - 'CVS': 'COVES', - 'CRK': 'CREEK', - 'CRES': 'CRESCENT', - 'CRST': 'CREST', - 'XING': 'CROSSING', - 'XRD': 'CROSSROAD', - 'CURV': 'CURVE', - 'DL': 'DALE', - 'DM': 'DAM', - 'DV': 'DIVIDE', - 'DR': 'DRIVE', - 'DRS': 'DRIVES', - 'EST': 'ESTATE', - 'ESTS': 'ESTATES', - 'EXPY': 'EXPRESSWAY', - 'EXT': 'EXTENSION', - 'EXTS': 'EXTENSIONS', - 'FALL': 'FALL', - 'FLS': 'FALLS', - 'FRY': 'FERRY', - 'FLD': 'FIELD', - 'FLDS': 'FIELDS', - 'FLT': 'FLAT', - 'FLTS': 'FLATS', - 'FRD': 'FORD', - 'FRDS': 'FORDS', - 'FRST': 'FORESTS', - 'FRG': 'FORGE', - 'FRGS': 'FORGES', - 'FRK': 'FORK', - 'FRKS': 'FORKS', - 'FT': 'FORT', - 'FWY': 'FREEWAY', - 'GDN': 'GARDEN', - 'GDNS': 'GARDENS', - 'GTWY': 'GATEWAY', - 'GLN': 'GLEN', - 'GLNS': 'GLENS', - 'GRNS': 'GREENS', - 'GRV': 'GROVE', - 'GRVS': 'GROVES', - 'HBR': 'HARBOR', - 'HBRS': 'HARBORS', - 'HVN': 'HAVEN', - 'HTS': 'HEIGHTS', - 'HWY': 'HIGHWAY', - 'HL': 'HILL', - 'HLS': 'HILLS', - 'HOLW': 'HOLLOW', - 'INLT': 'INLET', - 'IS': 'ISLAND', - 'ISS': 'ISLANDS', - 'ISLE': 'ISLE', - 'JCT': 'JUNCTION', - 'JCTS': 'JUNCTIONS', - 'KY': 'KEY', - 'KYS': 'KEYS', - 'KNL': 'KNOLL', - 'KNLS': 'KNOLLS', - 'LK': 'LAKE', - 'LKS': 'LAKES', - 'LAND': 'LAND', - 'LNDG': 'LANDING', - 'LN': 'LANE', - 'LGT': 'LIGHT', - 'LGTS': 'LIGHTS', - 'LF': 'LOAF', - 'LCK': 'LOCK', - 'LCKS': 'LOCKS', - 'LDG': 'LODGE', - 'LOOP': 'LOOP', - 'MALL': 'MALL', - 'MNR': 'MANOR', - 'MNRS': 'MANORS', - 'MDW': 'MEADOW', - 'MDWS': 'MEADOWS', - 'MEWS': 'MEWS', - 'ML': 'MILL', - 'MLS': 'MILLS', - 'MSN': 'MISSION', - 'MTWY': 'MOTORWAY', - 'MT': 'MOUNT', - 'MTN': 'MOUNTAIN', - 'MTNS': 'MOUNTAINS', - 'NCK': 'NECK', - 'ORCH': 'ORCHARD', - 'OVAL': 'OVAL', - 'OPAS': 'OVERPASS', - 'PARK': 'PARKS', - 'PKWY': 'PARKWAY', - 'PASS': 'PASS', - 'PSGE': 'PASSAGE', - 'PATH': 'PATHS', - 'PIKE': 'PIKES', - 'PNE': 'PINE', - 'PNES': 'PINES', - 'PL': 'PLACE', - 'PLN': 'PLAIN', - 'PLNS': 'PLAINS', - 'PLZ': 'PLAZA', - 'PT': 'POINT', - 'PTS': 'POINTS', - 'PRT': 'PORT', - 'PRTS': 'PORTS', - 'PR': 'PRAIRIE', - 'RADL': 'RADIAL', - 'RAMP': 'RAMP', - 'RNCH': 'RANCH', - 'RPD': 'RAPID', - 'RPDS': 'RAPIDS', - 'RST': 'REST', - 'RDG': 'RIDGE', - 'RDGS': 'RIDGES', - 'RIV': 'RIVER', - 'RD': 'ROAD', - 'RDS': 'ROADS', - 'RTE': 'ROUTE', - 'ROW': 'ROW', - 'RUE': 'RUE', - 'RUN': 'RUN', - 'SHL': 'SHOAL', - 'SHLS': 'SHOALS', - 'SHR': 'SHORE', - 'SHRS': 'SHORES', - 'SKWY': 'SKYWAY', - 'SPG': 'SPRING', - 'SPGS': 'SPRINGS', - 'SPUR': 'SPURS', - 'SQ': 'SQUARE', - 'SQS': 'SQUARES', - 'STA': 'STATION', - 'STRA': 'STRAVENUE', - 'STRM': 'STREAM', - 'ST': 'STREET', - 'STS': 'STREETS', - 'SMT': 'SUMMIT', - 'TER': 'TERRACE', - 'TRWY': 'THROUGHWAY', - 'TRCE': 'TRACE', - 'TRAK': 'TRACK', - 'TRFY': 'TRAFFICWAY', - 'TRL': 'TRAIL', - 'TUNL': 'TUNNEL', - 'TPKE': 'TURNPIKE', - 'UPAS': 'UNDERPASS', - 'UN': 'UNION', - 'UNS': 'UNIONS', - 'VLY': 'VALLEY', - 'VLYS': 'VALLEYS', - 'VIA': 'VIADUCT', - 'VW': 'VIEW', - 'VWS': 'VIEWS', - 'VLG': 'VILLAGE', - 'VLGS': 'VILLAGES', - 'VL': 'VILLE', - 'VIS': 'VISTA', - 'WALK': 'WALK', - 'WALL': 'WALL', - 'WAY': 'WAY', - 'WL': 'WELL', - 'WLS': 'WELLS' + "ALY": "ALLEY", + "ANX": "ANNEX", + "ARC": "ARCADE", + "AVE": "AVENUE", + "BYU": "BAYOU", + "BCH": "BEACH", + "BND": "BEND", + "BLF": "BLUFF", + "BLFS": "BLUFFS", + "BTM": "BOTTOM", + "BLVD": "BOULEVARD", + "BR": "BRANCH", + "BRG": "BRIDGE", + "BRK": "BROOK", + "BRKS": "BROOKS", + "BGS": "BURGS", + "BYP": "BYPASS", + "CP": "CAMP", + "CYN": "CANYON", + "CPE": "CAPE", + "CSWY": "CAUSEWAY", + "CTR": "CENTER", + "CTRS": "CENTERS", + "CIR": "CIRCLE", + "CIRS": "CIRCLES", + "CLF": "CLIFF", + "CLFS": "CLIFFS", + "CMN": "COMMON", + "COR": "CORNER", + "CORS": "CORNERS", + "CRSE": "COURSE", + "CT": "COURT", + "CTS": "COURTS", + "CVS": "COVES", + "CRK": "CREEK", + "CRES": "CRESCENT", + "CRST": "CREST", + "XING": "CROSSING", + "XRD": "CROSSROAD", + "CURV": "CURVE", + "DL": "DALE", + "DM": "DAM", + "DV": "DIVIDE", + "DR": "DRIVE", + "DRS": "DRIVES", + "EST": "ESTATE", + "ESTS": "ESTATES", + "EXPY": "EXPRESSWAY", + "EXT": "EXTENSION", + "EXTS": "EXTENSIONS", + "FALL": "FALL", + "FLS": "FALLS", + "FRY": "FERRY", + "FLD": "FIELD", + "FLDS": "FIELDS", + "FLT": "FLAT", + "FLTS": "FLATS", + "FRD": "FORD", + "FRDS": "FORDS", + "FRST": "FORESTS", + "FRG": "FORGE", + "FRGS": "FORGES", + "FRK": "FORK", + "FRKS": "FORKS", + "FT": "FORT", + "FWY": "FREEWAY", + "GDN": "GARDEN", + "GDNS": "GARDENS", + "GTWY": "GATEWAY", + "GLN": "GLEN", + "GLNS": "GLENS", + "GRNS": "GREENS", + "GRV": "GROVE", + "GRVS": "GROVES", + "HBR": "HARBOR", + "HBRS": "HARBORS", + "HVN": "HAVEN", + "HTS": "HEIGHTS", + "HWY": "HIGHWAY", + "HL": "HILL", + "HLS": "HILLS", + "HOLW": "HOLLOW", + "INLT": "INLET", + "IS": "ISLAND", + "ISS": "ISLANDS", + "ISLE": "ISLE", + "JCT": "JUNCTION", + "JCTS": "JUNCTIONS", + "KY": "KEY", + "KYS": "KEYS", + "KNL": "KNOLL", + "KNLS": "KNOLLS", + "LK": "LAKE", + "LKS": "LAKES", + "LAND": "LAND", + "LNDG": "LANDING", + "LN": "LANE", + "LGT": "LIGHT", + "LGTS": "LIGHTS", + "LF": "LOAF", + "LCK": "LOCK", + "LCKS": "LOCKS", + "LDG": "LODGE", + "LOOP": "LOOP", + "MALL": "MALL", + "MNR": "MANOR", + "MNRS": "MANORS", + "MDW": "MEADOW", + "MDWS": "MEADOWS", + "MEWS": "MEWS", + "ML": "MILL", + "MLS": "MILLS", + "MSN": "MISSION", + "MTWY": "MOTORWAY", + "MT": "MOUNT", + "MTN": "MOUNTAIN", + "MTNS": "MOUNTAINS", + "NCK": "NECK", + "ORCH": "ORCHARD", + "OVAL": "OVAL", + "OPAS": "OVERPASS", + "PARK": "PARKS", + "PKWY": "PARKWAY", + "PASS": "PASS", + "PSGE": "PASSAGE", + "PATH": "PATHS", + "PIKE": "PIKES", + "PNE": "PINE", + "PNES": "PINES", + "PL": "PLACE", + "PLN": "PLAIN", + "PLNS": "PLAINS", + "PLZ": "PLAZA", + "PT": "POINT", + "PTS": "POINTS", + "PRT": "PORT", + "PRTS": "PORTS", + "PR": "PRAIRIE", + "RADL": "RADIAL", + "RAMP": "RAMP", + "RNCH": "RANCH", + "RPD": "RAPID", + "RPDS": "RAPIDS", + "RST": "REST", + "RDG": "RIDGE", + "RDGS": "RIDGES", + "RIV": "RIVER", + "RD": "ROAD", + "RDS": "ROADS", + "RTE": "ROUTE", + "ROW": "ROW", + "RUE": "RUE", + "RUN": "RUN", + "SHL": "SHOAL", + "SHLS": "SHOALS", + "SHR": "SHORE", + "SHRS": "SHORES", + "SKWY": "SKYWAY", + "SPG": "SPRING", + "SPGS": "SPRINGS", + "SPUR": "SPURS", + "SQ": "SQUARE", + "SQS": "SQUARES", + "STA": "STATION", + "STRA": "STRAVENUE", + "STRM": "STREAM", + "ST": "STREET", + "STS": "STREETS", + "SMT": "SUMMIT", + "TER": "TERRACE", + "TRWY": "THROUGHWAY", + "TRCE": "TRACE", + "TRAK": "TRACK", + "TRFY": "TRAFFICWAY", + "TRL": "TRAIL", + "TUNL": "TUNNEL", + "TPKE": "TURNPIKE", + "UPAS": "UNDERPASS", + "UN": "UNION", + "UNS": "UNIONS", + "VLY": "VALLEY", + "VLYS": "VALLEYS", + "VIA": "VIADUCT", + "VW": "VIEW", + "VWS": "VIEWS", + "VLG": "VILLAGE", + "VLGS": "VILLAGES", + "VL": "VILLE", + "VIS": "VISTA", + "WALK": "WALK", + "WALL": "WALL", + "WAY": "WAY", + "WL": "WELL", + "WLS": "WELLS", } STATE_ABBREVIATIONS = { - 'ALABAMA': 'AL', - 'ALA': 'AL', - 'ALASKA': 'AK', - 'ALAS': 'AK', - 'ARIZONA': 'AZ', - 'ARIZ': 'AZ', - 'ARKANSAS': 'AR', - 'ARK': 'AR', - 'CALIFORNIA': 'CA', - 'CALIF': 'CA', - 'CAL': 'CA', - 'COLORADO': 'CO', - 'COLO': 'CO', - 'COL': 'CO', - 'CONNECTICUT': 'CT', - 'CONN': 'CT', - 'DELAWARE': 'DE', - 'DEL': 'DE', - 'DISTRICT OF COLUMBIA': 'DC', - 'FLORIDA': 'FL', - 'FLA': 'FL', - 'FLOR': 'FL', - 'GEORGIA': 'GA', - 'GA': 'GA', - 'HAWAII': 'HI', - 'IDAHO': 'ID', - 'IDA': 'ID', - 'ILLINOIS': 'IL', - 'ILL': 'IL', - 'INDIANA': 'IN', - 'IND': 'IN', - 'IOWA': 'IA', - 'KANSAS': 'KS', - 'KANS': 'KS', - 'KAN': 'KS', - 'KENTUCKY': 'KY', - 'KEN': 'KY', - 'KENT': 'KY', - 'LOUISIANA': 'LA', - 'MAINE': 'ME', - 'MARYLAND': 'MD', - 'MASSACHUSETTS': 'MA', - 'MASS': 'MA', - 'MICHIGAN': 'MI', - 'MICH': 'MI', - 'MINNESOTA': 'MN', - 'MINN': 'MN', - 'MISSISSIPPI': 'MS', - 'MISS': 'MS', - 'MISSOURI': 'MO', - 'MONTANA': 'MT', - 'MONT': 'MT', - 'NEBRASKA': 'NE', - 'NEBR': 'NE', - 'NEB': 'NE', - 'NEVADA': 'NV', - 'NEV': 'NV', - 'NEW HAMPSHIRE': 'NH', - 'NEW JERSEY': 'NJ', - 'NEW MEXICO': 'NM', - 'N MEX': 'NM', - 'NEW M': 'NM', - 'NEW YORK': 'NY', - 'NORTH CAROLINA': 'NC', - 'NORTH DAKOTA': 'ND', - 'N DAK': 'ND', - 'OHIO': 'OH', - 'OKLAHOMA': 'OK', - 'OKLA': 'OK', - 'OREGON': 'OR', - 'OREG': 'OR', - 'ORE': 'OR', - 'PENNSYLVANIA': 'PA', - 'PENN': 'PA', - 'RHODE ISLAND': 'RI', - 'SOUTH CAROLINA': 'SC', - 'SOUTH DAKOTA': 'SD', - 'S DAK': 'SD', - 'TENNESSEE': 'TN', - 'TENN': 'TN', - 'TEXAS': 'TX', - 'TEX': 'TX', - 'UTAH': 'UT', - 'VERMONT': 'VT', - 'VIRGINIA': 'VA', - 'WASHINGTON': 'WA', - 'WASH': 'WA', - 'WEST VIRGINIA': 'WV', - 'W VA': 'WV', - 'WISCONSIN': 'WI', - 'WIS': 'WI', - 'WISC': 'WI', - 'WYOMING': 'WY', - 'WYO': 'WY' + "ALABAMA": "AL", + "ALA": "AL", + "ALASKA": "AK", + "ALAS": "AK", + "ARIZONA": "AZ", + "ARIZ": "AZ", + "ARKANSAS": "AR", + "ARK": "AR", + "CALIFORNIA": "CA", + "CALIF": "CA", + "CAL": "CA", + "COLORADO": "CO", + "COLO": "CO", + "COL": "CO", + "CONNECTICUT": "CT", + "CONN": "CT", + "DELAWARE": "DE", + "DEL": "DE", + "DISTRICT OF COLUMBIA": "DC", + "FLORIDA": "FL", + "FLA": "FL", + "FLOR": "FL", + "GEORGIA": "GA", + "GA": "GA", + "HAWAII": "HI", + "IDAHO": "ID", + "IDA": "ID", + "ILLINOIS": "IL", + "ILL": "IL", + "INDIANA": "IN", + "IND": "IN", + "IOWA": "IA", + "KANSAS": "KS", + "KANS": "KS", + "KAN": "KS", + "KENTUCKY": "KY", + "KEN": "KY", + "KENT": "KY", + "LOUISIANA": "LA", + "MAINE": "ME", + "MARYLAND": "MD", + "MASSACHUSETTS": "MA", + "MASS": "MA", + "MICHIGAN": "MI", + "MICH": "MI", + "MINNESOTA": "MN", + "MINN": "MN", + "MISSISSIPPI": "MS", + "MISS": "MS", + "MISSOURI": "MO", + "MONTANA": "MT", + "MONT": "MT", + "NEBRASKA": "NE", + "NEBR": "NE", + "NEB": "NE", + "NEVADA": "NV", + "NEV": "NV", + "NEW HAMPSHIRE": "NH", + "NEW JERSEY": "NJ", + "NEW MEXICO": "NM", + "N MEX": "NM", + "NEW M": "NM", + "NEW YORK": "NY", + "NORTH CAROLINA": "NC", + "NORTH DAKOTA": "ND", + "N DAK": "ND", + "OHIO": "OH", + "OKLAHOMA": "OK", + "OKLA": "OK", + "OREGON": "OR", + "OREG": "OR", + "ORE": "OR", + "PENNSYLVANIA": "PA", + "PENN": "PA", + "RHODE ISLAND": "RI", + "SOUTH CAROLINA": "SC", + "SOUTH DAKOTA": "SD", + "S DAK": "SD", + "TENNESSEE": "TN", + "TENN": "TN", + "TEXAS": "TX", + "TEX": "TX", + "UTAH": "UT", + "VERMONT": "VT", + "VIRGINIA": "VA", + "WASHINGTON": "WA", + "WASH": "WA", + "WEST VIRGINIA": "WV", + "W VA": "WV", + "WISCONSIN": "WI", + "WIS": "WI", + "WISC": "WI", + "WYOMING": "WY", + "WYO": "WY", } -ADDRESS_KEYS = ( - 'address_line_1', 'address_line_2', 'city', 'state', 'postal_code' -) +ADDRESS_KEYS = ("address_line_1", "address_line_2", "city", "state", "postal_code") class NormalizationConfig(Config): """Config class for GBR""" + # pylint: disable=too-few-public-methods - default_file = 'address_constants.yaml' + default_file = "address_constants.yaml" def __init__(self, config_file=None, config_dir=None, section=None): super(NormalizationConfig, self).__init__( - config_file=config_file, config_dir=config_dir, section=section, - env_prefix='ADDRESS_CONFIG' + config_file=config_file, + config_dir=config_dir, + section=section, + env_prefix="ADDRESS_CONFIG", ) @@ -915,34 +909,32 @@ def set_address_constants(): config = NormalizationConfig() if config: addr_constants = ( - 'DIRECTIONAL_REPLACEMENTS', - 'OCCUPANCY_TYPE_ABBREVIATIONS', - 'STATE_ABBREVIATIONS', - 'STREET_TYPE_ABBREVIATIONS', - 'KNOWN_ODDITIES', - 'PROBLEM_ST_TYPE_ABBRVS', - 'LONGHAND_DIRECTIONALS', - 'LONGHAND_STREET_TYPES', + "DIRECTIONAL_REPLACEMENTS", + "OCCUPANCY_TYPE_ABBREVIATIONS", + "STATE_ABBREVIATIONS", + "STREET_TYPE_ABBREVIATIONS", + "KNOWN_ODDITIES", + "PROBLEM_ST_TYPE_ABBRVS", + "LONGHAND_DIRECTIONALS", + "LONGHAND_STREET_TYPES", ) - insertion_method = config.get('insertion_method', default='update') - update = ('update', 'insert') - replace = ('replace', 'overwrite') + insertion_method = config.get("insertion_method", default="update") + update = ("update", "insert") + replace = ("replace", "overwrite") if insertion_method not in update + replace: msg = "'{}' is not a valid option for 'insertion_method'".format( insertion_method ) raise ConfigError(msg) - globals()['ADDRESS_KEYS'] = config.get( - 'ADDRESS_KEYS', default=globals()['ADDRESS_KEYS'] + globals()["ADDRESS_KEYS"] = config.get( + "ADDRESS_KEYS", default=globals()["ADDRESS_KEYS"] ) for key in addr_constants: new_vals = config.get(key, default={}) - if key == 'OCCUPANCY_TYPE_ABBREVIATIONS' and new_vals: + if key == "OCCUPANCY_TYPE_ABBREVIATIONS" and new_vals: org_keys = OCCUPANCY_TYPE_ABBREVIATIONS.keys() new_keys = new_vals.keys() - globals()['ABNORMAL_OCCUPANCY_ABBRVS'] = ( - set(new_keys) - set(org_keys) - ) + globals()["ABNORMAL_OCCUPANCY_ABBRVS"] = set(new_keys) - set(org_keys) if new_vals and insertion_method in update: globals()[key].update(**new_vals) elif new_vals and insertion_method in replace: diff --git a/scourgify/cleaning.py b/scourgify/cleaning.py index c190138..27854fa 100644 --- a/scourgify/cleaning.py +++ b/scourgify/cleaning.py @@ -21,7 +21,7 @@ KNOWN_ODDITIES, OCCUPANCY_TYPE_ABBREVIATIONS, PROBLEM_ST_TYPE_ABBRVS, - AMBIGUOUS_DIRECTIONALS + AMBIGUOUS_DIRECTIONALS, ) # Setup @@ -35,10 +35,8 @@ PRECLEAN_EXCLUDE = [40, 41, 44] EXCLUDE_ALL = ALLOWED_CHARS + PRECLEAN_EXCLUDE -STRIP_CHAR_CATS = ( - 'M', 'S', 'C', 'Nl', 'No', 'Pc', 'Ps', 'Pe', 'Pi', 'Pf', 'Po' -) -STRIP_PUNC_CATS = ('Z', 'Pd') +STRIP_CHAR_CATS = ("M", "S", "C", "Nl", "No", "Pc", "Ps", "Pe", "Pi", "Pf", "Po") +STRIP_PUNC_CATS = ("Z", "Pd") STRIP_ALL_CATS = STRIP_CHAR_CATS + STRIP_PUNC_CATS # Data Structure Definitions @@ -48,6 +46,7 @@ # Public Classes and Functions + def pre_clean_addr_str(addr_str, state=None): # type: (str, Optional[str]) -> str """Remove any known undesirable sub-strings and special characters. @@ -71,11 +70,11 @@ def pre_clean_addr_str(addr_str, state=None): """ # replace any easily handled, undesirable sub-strings if any(oddity in addr_str for oddity in KNOWN_ODDITIES.keys()): - for key, replacement in KNOWN_ODDITIES.items(): # pragma: no cover + for key, replacement in KNOWN_ODDITIES.items(): # pragma: no cover addr_str = addr_str.replace(key, replacement) # remove non-decimal point period chars. - if '.' in addr_str: # pragma: no cover + if "." in addr_str: # pragma: no cover addr_str = clean_period_char(addr_str) addr_str = pre_clean_directionals(addr_str) @@ -84,9 +83,7 @@ def pre_clean_addr_str(addr_str, state=None): # intersection addresses, and - which impacts range addresses and zipcodes. # ',', '(' and ')' are also left for potential use in additional line 2 # processing functions - addr_str = clean_upper( - addr_str, exclude=EXCLUDE_ALL, removal_cats=STRIP_CHAR_CATS - ) + addr_str = clean_upper(addr_str, exclude=EXCLUDE_ALL, removal_cats=STRIP_CHAR_CATS) # to prevent any potential confusion between CT = COURT v CT = Connecticut, # clean_ambiguous_street_types is not applied if state is CT. @@ -115,7 +112,7 @@ def clean_ambiguous_street_types(addr_str): for key in PROBLEM_ST_TYPE_ABBRVS: if key in split_addr: split_addr[split_addr.index(key)] = PROBLEM_ST_TYPE_ABBRVS[key] - addr_str = ' '.join(split_addr) + addr_str = " ".join(split_addr) break return addr_str @@ -148,7 +145,7 @@ def _parse_occupancy(addr_line_2): except usaddress.RepeatedLabelError: pass if parsed: - occupancy = parsed[0].get('OccupancyIdentifier') + occupancy = parsed[0].get("OccupancyIdentifier") return occupancy @@ -163,7 +160,7 @@ def strip_occupancy_type(addr_line_2): """ occupancy = None if addr_line_2: - addr_line_2 = addr_line_2.replace('#', '').strip().upper() + addr_line_2 = addr_line_2.replace("#", "").strip().upper() occupancy = _parse_occupancy(addr_line_2) # if that doesn't work, clean abbrevs and try again @@ -179,23 +176,23 @@ def strip_occupancy_type(addr_line_2): # if that doesn't work, dissect it manually if not occupancy: occupancy = addr_line_2 - types = ( - list(OCCUPANCY_TYPE_ABBREVIATIONS.keys()) - + list(OCCUPANCY_TYPE_ABBREVIATIONS.values()) + types = list(OCCUPANCY_TYPE_ABBREVIATIONS.keys()) + list( + OCCUPANCY_TYPE_ABBREVIATIONS.values() ) if parts and len(parts) > 1: ids = [p for p in parts if p not in types] print(ids) - occupancy = ' '.join(ids) + occupancy = " ".join(ids) return occupancy -def clean_upper(text, # type: Any - exclude=None, # type: Optional[Sequence[int]] - removal_cats=STRIP_CHAR_CATS, # type: Optional[Sequence[str]] - strip_spaces=False # type: Optional[bool] - ): +def clean_upper( + text, # type: Any + exclude=None, # type: Optional[Sequence[int]] + removal_cats=STRIP_CHAR_CATS, # type: Optional[Sequence[str]] + strip_spaces=False, # type: Optional[bool] +): # type: (str, Optional[Sequence[int]], Optional[Sequence[str]]) -> str """ Return text as upper case unicode string and remove unwanted characters. @@ -218,8 +215,8 @@ def clean_upper(text, # type: Any if not isinstance(text, str): # pragma: no cover text = str(text) # catch and convert fractions - text = unicodedata.normalize('NFKD', text) - text = text.translate({8260: '/'}) + text = unicodedata.normalize("NFKD", text) + text = text.translate({8260: "/"}) # evaluate string without commas (,) or ampersand (&) to determine if # further processing is necessary @@ -227,16 +224,18 @@ def clean_upper(text, # type: Any # remove unwanted non-alphanumeric characters and convert all dash type # characters to hyphen - if not alnum_text.replace(' ', '').isalnum(): + if not alnum_text.replace(" ", "").isalnum(): for char in text: - if (unicodedata.category(char).startswith(removal_cats) - and ord(char) not in exclude): + if ( + unicodedata.category(char).startswith(removal_cats) + and ord(char) not in exclude + ): text = text.translate({ord(char): None}) - elif unicodedata.category(char).startswith('Pd'): - text = text.translate({ord(char): '-'}) - join_char = ' ' + elif unicodedata.category(char).startswith("Pd"): + text = text.translate({ord(char): "-"}) + join_char = " " if strip_spaces: - join_char = '' + join_char = "" # remove extra spaces and convert to uppercase return join_char.join(text.split()).upper() @@ -249,8 +248,8 @@ def clean_period_char(text): :return: cleaned string :rtype: str """ - period_pattern = re.compile(r'\.(?!\d)') - return re.sub(period_pattern, '', text) + period_pattern = re.compile(r"\.(?!\d)") + return re.sub(period_pattern, "", text) def pre_clean_directionals(text): diff --git a/scourgify/exceptions.py b/scourgify/exceptions.py index 5ef6947..46055ae 100644 --- a/scourgify/exceptions.py +++ b/scourgify/exceptions.py @@ -14,8 +14,10 @@ # Public Classes and Functions + class AddressNormalizationError(Exception): """Indicates error during normalization""" + TITLE = None MESSAGE = None @@ -28,31 +30,33 @@ def __init__(self, error=None, title=None, *args): def __str__(self): msg = "{}: {}".format(self.title, self.error) if len(self.args) > 2: - msg = "{}, {}".format( - msg, ', '.join(str(a) for a in self.args[2:]) - ) + msg = "{}, {}".format(msg, ", ".join(str(a) for a in self.args[2:])) return msg class AmbiguousAddressError(AddressNormalizationError): """Indicates an error from ambiguous addresses or address parts.""" + MESSAGE = "This address contains ambiguous elements." TITLE = "AMBIGUOUS ADDRESS" class UnParseableAddressError(AddressNormalizationError): """Indicates an error from addresses that cannot be parsed.""" + MESSAGE = "Unable to break this address into its component parts" TITLE = "UNPARSEABLE ADDRESS" class IncompleteAddressError(AddressNormalizationError): """Indicates error from addresses that don't have enough data to index.""" + MESSAGE = "This address is missing one or more required elements" TITLE = "INCOMPLETE ADDRESS" class AddressValidationError(AddressNormalizationError): """Indicates address elements that don't meet format standards.""" + MESSAGE = "Address contains invalid formatting" TITLE = "ADDRESS FORMAT VALIDATION" diff --git a/scourgify/normalize.py b/scourgify/normalize.py index db47b50..e4494a3 100644 --- a/scourgify/normalize.py +++ b/scourgify/normalize.py @@ -69,52 +69,50 @@ # Constants LINE1_USADDRESS_LABELS = ( - 'AddressNumber', - 'StreetName', - 'AddressNumberPrefix', - 'AddressNumberSuffix', - 'StreetNamePreDirectional', - 'StreetNamePostDirectional', - 'StreetNamePreModifier', - 'StreetNamePostType', - 'StreetNamePreType', - 'IntersectionSeparator', - 'SecondStreetNamePreDirectional', - 'SecondStreetNamePostDirectional', - 'SecondStreetNamePreModifier', - 'SecondStreetNamePostType', - 'SecondStreetNamePreType', - 'LandmarkName', - 'CornerOf', - 'IntersectionSeparator', - 'BuildingName', + "AddressNumber", + "StreetName", + "AddressNumberPrefix", + "AddressNumberSuffix", + "StreetNamePreDirectional", + "StreetNamePostDirectional", + "StreetNamePreModifier", + "StreetNamePostType", + "StreetNamePreType", + "IntersectionSeparator", + "SecondStreetNamePreDirectional", + "SecondStreetNamePostDirectional", + "SecondStreetNamePreModifier", + "SecondStreetNamePostType", + "SecondStreetNamePreType", + "LandmarkName", + "CornerOf", + "IntersectionSeparator", + "BuildingName", ) LINE2_USADDRESS_LABELS = ( - 'OccupancyType', - 'OccupancyIdentifier', - 'SubaddressIdentifier', - 'SubaddressType', + "OccupancyType", + "OccupancyIdentifier", + "SubaddressIdentifier", + "SubaddressType", ) LAST_LINE_LABELS = ( - 'PlaceName', - 'StateName', - 'ZipCode', + "PlaceName", + "StateName", + "ZipCode", ) AMBIGUOUS_LABELS = ( - 'Recipient', - 'USPSBoxType', - 'USPSBoxID', - 'USPSBoxGroupType', - 'USPSBoxGroupID', - 'NotAddress' + "Recipient", + "USPSBoxType", + "USPSBoxID", + "USPSBoxGroupType", + "USPSBoxGroupID", + "NotAddress", ) -STRIP_CHAR_CATS = ( - 'M', 'S', 'C', 'Nl', 'No', 'Pc', 'Ps', 'Pe', 'Pi', 'Pf', 'Po' -) -STRIP_PUNC_CATS = ('Z', 'Pd') +STRIP_CHAR_CATS = ("M", "S", "C", "Nl", "No", "Pc", "Ps", "Pe", "Pi", "Pf", "Po") +STRIP_PUNC_CATS = ("Z", "Pd") STRIP_ALL_CATS = STRIP_CHAR_CATS + STRIP_PUNC_CATS @@ -122,10 +120,14 @@ # Public Classes and Functions -def normalize_address_record(address: str | dict, addr_map: dict = None, - addtl_funcs: [Callable] = None, - strict: bool = True, - long_hand: bool = False) -> dict: + +def normalize_address_record( + address: str | dict, + addr_map: dict = None, + addtl_funcs: [Callable] = None, + strict: bool = True, + long_hand: bool = False, +) -> dict: """Normalize an address according to USPS pub. 28 standards. Takes an address string, or a dict-like with standard address fields @@ -156,20 +158,26 @@ def normalize_address_record(address: str | dict, addr_map: dict = None, :rtype: Mapping[str, str] """ if isinstance(address, str): - return normalize_addr_str( - address, addtl_funcs=addtl_funcs, long_hand=long_hand - ) + return normalize_addr_str(address, addtl_funcs=addtl_funcs, long_hand=long_hand) else: return normalize_addr_dict( - address, addr_map=addr_map, addtl_funcs=addtl_funcs, - strict=strict, long_hand=long_hand + address, + addr_map=addr_map, + addtl_funcs=addtl_funcs, + strict=strict, + long_hand=long_hand, ) -def normalize_addr_str(addr_str: str, line2: str = None, city: str = None, - state: str = None, zipcode: str = None, - addtl_funcs: [Callable] = None, - long_hand: bool = False) -> dict: +def normalize_addr_str( + addr_str: str, + line2: str = None, + city: str = None, + state: str = None, + zipcode: str = None, + addtl_funcs: [Callable] = None, + long_hand: bool = False, +) -> dict: """Normalize a complete or partial address string. :param addr_str: str containing address data. @@ -209,17 +217,24 @@ def normalize_addr_str(addr_str: str, line2: str = None, city: str = None, error = False # send refactored line_1 and line_2 back through processing return normalize_addr_str( - line1, line2=line2, city=city, - state=state, zipcode=zipcode, long_hand=long_hand + line1, + line2=line2, + city=city, + state=state, + zipcode=zipcode, + long_hand=long_hand, ) except ValueError: # try a different additional processing function pass - if parsed_addr and not parsed_addr.get('StreetName'): + if parsed_addr and not parsed_addr.get("StreetName"): addr_dict = dict( - address_line_1=addr_str, address_line_2=line2, city=city, - state=state, postal_code=zipcode + address_line_1=addr_str, + address_line_2=line2, + city=city, + state=state, + postal_code=zipcode, ) full_addr = format_address_record(addr_dict) try: @@ -229,39 +244,34 @@ def normalize_addr_str(addr_str: str, line2: str = None, city: str = None, error = err if parsed_addr: - parsed_addr = normalize_address_components( - parsed_addr, long_hand=long_hand - ) - zipcode = get_parsed_values( - parsed_addr, zipcode, 'ZipCode', addr_str - ) - city = get_parsed_values( - parsed_addr, city, 'PlaceName', addr_str - ) - state = get_parsed_values( - parsed_addr, state, 'StateName', addr_str - ) + parsed_addr = normalize_address_components(parsed_addr, long_hand=long_hand) + zipcode = get_parsed_values(parsed_addr, zipcode, "ZipCode", addr_str) + city = get_parsed_values(parsed_addr, city, "PlaceName", addr_str) + state = get_parsed_values(parsed_addr, state, "StateName", addr_str) state = normalize_state(state) # assumes if line2 is passed in that it need not be parsed from # addr_str. Primarily used to allow advanced processing of otherwise # unparsable addresses. - line2 = line2 if line2 else get_normalized_line_segment( - parsed_addr, LINE2_USADDRESS_LABELS + line2 = ( + line2 + if line2 + else get_normalized_line_segment(parsed_addr, LINE2_USADDRESS_LABELS) ) line2 = post_clean_addr_str(line2) # line 1 is fully post cleaned in get_normalized_line_segment. - line1 = get_normalized_line_segment( - parsed_addr, LINE1_USADDRESS_LABELS - ) + line1 = get_normalized_line_segment(parsed_addr, LINE1_USADDRESS_LABELS) validate_parens_groups_parsed(line1) else: # line1 is set to addr_str so complete dict can be passed to error. line1 = addr_str addr_rec = OrderedDict( - address_line_1=line1, address_line_2=line2, city=city, - state=state, postal_code=zipcode + address_line_1=line1, + address_line_2=line2, + city=city, + state=state, + postal_code=zipcode, ) if error: raise UnParseableAddressError(None, None, addr_rec) @@ -269,9 +279,13 @@ def normalize_addr_str(addr_str: str, line2: str = None, city: str = None, return addr_rec -def normalize_addr_dict(addr_dict: dict, addr_map: dict = None, - addtl_funcs: [Callable] = None, - strict: bool = True, long_hand: bool = False) -> dict: +def normalize_addr_dict( + addr_dict: dict, + addr_map: dict = None, + addtl_funcs: [Callable] = None, + strict: bool = True, + long_hand: bool = False, +) -> dict: """Normalize an address from dict or dict-like object. Assumes addr_dict will have standard address related keys (address_line_1, @@ -301,24 +315,32 @@ def normalize_addr_dict(addr_dict: dict, addr_map: dict = None, # line 1 and line 2 elements are combined to ensure consistent processing # whether the line 2 elements are pre-parsed or included in line 1 addr_str = get_addr_line_str(addr_dict, comma_separate=True) - postal_code = addr_dict.get('postal_code') - zipcode = validate_us_postal_code_format( - postal_code, addr_dict - ) if postal_code else None - city = addr_dict.get('city') - state = addr_dict.get('state') + postal_code = addr_dict.get("postal_code") + zipcode = ( + validate_us_postal_code_format(postal_code, addr_dict) if postal_code else None + ) + city = addr_dict.get("city") + state = addr_dict.get("state") try: address = normalize_addr_str( - addr_str, city=city, state=state, zipcode=zipcode, - addtl_funcs=addtl_funcs, long_hand=long_hand + addr_str, + city=city, + state=state, + zipcode=zipcode, + addtl_funcs=addtl_funcs, + long_hand=long_hand, ) except AddressNormalizationError: addr_str = get_addr_line_str( addr_dict, comma_separate=True, addr_parts=ADDRESS_KEYS ) address = normalize_addr_str( - addr_str, city=city, state=state, zipcode=zipcode, - addtl_funcs=addtl_funcs, long_hand=long_hand + addr_str, + city=city, + state=state, + zipcode=zipcode, + addtl_funcs=addtl_funcs, + long_hand=long_hand, ) return address @@ -342,15 +364,15 @@ def parse_address_string(addr_str: str) -> dict: parsed_addr = parsed_results[0] # if the address is parseable but some form of ambiguity is found that # may result in data corruption NormalizationError is raised. - if (parsed_results[1] == 'Ambiguous' or - any(key in AMBIGUOUS_LABELS for key in parsed_addr.keys())): + if parsed_results[1] == "Ambiguous" or any( + key in AMBIGUOUS_LABELS for key in parsed_addr.keys() + ): raise AmbiguousAddressError() parsed_addr = handle_abnormal_occupancy(parsed_addr, addr_str) return parsed_addr -def handle_abnormal_occupancy(parsed_addr: OrderedDict, - addr_str: str) -> OrderedDict: +def handle_abnormal_occupancy(parsed_addr: OrderedDict, addr_str: str) -> OrderedDict: """Handle abnormal occupancy abbreviations that are parsed as street type. Evaluates addresses with an Occupancy or Subaddress identifier whose type @@ -372,10 +394,10 @@ def handle_abnormal_occupancy(parsed_addr: OrderedDict, :rtype: OrderedDict """ occupancy_id_key = None - occupany_type_key = 'OccupancyType' - street_type_key = 'StreetNamePostType' - occupany_type_keys = (occupany_type_key, 'SubaddressType') - occupancy_identifier_keys = ('OccupancyIdentifier', 'SubaddressIdentifier') + occupany_type_key = "OccupancyType" + street_type_key = "StreetNamePostType" + occupany_type_keys = (occupany_type_key, "SubaddressType") + occupancy_identifier_keys = ("OccupancyIdentifier", "SubaddressIdentifier") street_type = parsed_addr.get(street_type_key) if street_type in ABNORMAL_OCCUPANCY_ABBRVS: occupancy_type = None @@ -395,19 +417,20 @@ def handle_abnormal_occupancy(parsed_addr: OrderedDict, break if occupancy and not occupancy_type: if street_type in occupancy: - occupancy = occupancy.replace(street_type, '').strip() + occupancy = occupancy.replace(street_type, "").strip() del parsed_addr[occupancy_id_key] else: line2 = "{} {}".format(street_type, occupancy) - addr_str = addr_str.replace(line2, '') + addr_str = addr_str.replace(line2, "") parsed_addr = parse_address_string(addr_str) parsed_addr.update({occupany_type_key: street_type}) parsed_addr.update({occupancy_id_key: occupancy}) return parsed_addr -def get_parsed_values(parsed_addr: OrderedDict, orig_val: str, - val_label: str, orig_addr_str: str) -> str | None: +def get_parsed_values( + parsed_addr: OrderedDict, orig_val: str, val_label: str, orig_addr_str: str +) -> str | None: """Get valid values from parsed_addr corresponding to val_label. Retrieves values from parsed_addr corresponding to the label supplied in @@ -438,16 +461,17 @@ def get_parsed_values(parsed_addr: OrderedDict, orig_val: str, non_null_val_set = {orig_val, val_from_parse} - {None} if len(non_null_val_set) > 1: msg = ( - f'Parsed {val_label} does not align with submitted value: ' - f'Parsed: {val_from_parse}. Original: {orig_val}' + f"Parsed {val_label} does not align with submitted value: " + f"Parsed: {val_from_parse}. Original: {orig_val}" ) raise AmbiguousAddressError(None, msg, orig_addr_str) else: return non_null_val_set.pop() if non_null_val_set else None -def normalize_address_components(parsed_addr: OrderedDict, - long_hand: bool = False) -> OrderedDict: +def normalize_address_components( + parsed_addr: OrderedDict, long_hand: bool = False +) -> OrderedDict: """Normalize parsed sections of address as appropriate. Processes parsed address through subsets of normalization rules. @@ -472,9 +496,9 @@ def normalize_numbered_streets(parsed_addr: OrderedDict) -> OrderedDict: :type parsed_addr: Mapping :return: parsed_addr with ordinal identifiers appended to numbered streets. :rtype: dict""" - street_tags = ['StreetName', 'SecondStreetName'] + street_tags = ["StreetName", "SecondStreetName"] for tag in street_tags: - post_type_tag = '{}PostType'.format(tag) + post_type_tag = "{}PostType".format(tag) # limits updates to numbered street names that include a post street # type, since an ordinal indicator would be inappropriate for some # numbered streets (ie. Country Road 97). @@ -482,14 +506,13 @@ def normalize_numbered_streets(parsed_addr: OrderedDict) -> OrderedDict: try: cardinal = int(parsed_addr[tag]) ord_indicator = get_ordinal_indicator(cardinal) - parsed_addr[tag] = '{}{}'.format(cardinal, ord_indicator) + parsed_addr[tag] = "{}{}".format(cardinal, ord_indicator) except ValueError: pass return parsed_addr -def normalize_directionals(parsed_addr: OrderedDict, - long_hand=False) -> OrderedDict: +def normalize_directionals(parsed_addr: OrderedDict, long_hand=False) -> OrderedDict: """Change directional notations to standard abbreviations. :param parsed_addr: address parsed into ordereddict per usaddress. @@ -500,9 +523,7 @@ def normalize_directionals(parsed_addr: OrderedDict, :rtype: dict """ # get the directional related keys from the current address. - found_directional_tags = ( - tag for tag in parsed_addr.keys() if 'Directional' in tag - ) + found_directional_tags = (tag for tag in parsed_addr.keys() if "Directional" in tag) found_directional_tags = list(found_directional_tags) for found in found_directional_tags: # get the original directional related value per key. @@ -521,8 +542,7 @@ def normalize_directionals(parsed_addr: OrderedDict, return parsed_addr -def normalize_street_types(parsed_addr: OrderedDict, - long_hand=False) -> OrderedDict: +def normalize_street_types(parsed_addr: OrderedDict, long_hand=False) -> OrderedDict: """Change street types to accepted abbreviated format. No change is made if street types do not conform to common usages per @@ -537,7 +557,7 @@ def normalize_street_types(parsed_addr: OrderedDict, """ # get the *Street*Type keys from the current parsed address. found_type_tags = ( - tag for tag in parsed_addr.keys() if 'Street' in tag and 'Type' in tag + tag for tag in parsed_addr.keys() if "Street" in tag and "Type" in tag ) for found in found_type_tags: street_type = parsed_addr[found] @@ -552,8 +572,7 @@ def normalize_street_types(parsed_addr: OrderedDict, return parsed_addr -def normalize_occupancy_type(parsed_addr: OrderedDict, - default=None) -> OrderedDict: +def normalize_occupancy_type(parsed_addr: OrderedDict, default=None) -> OrderedDict: """Change occupancy types to accepted abbreviated format. If there is an occupancy and it does not conform to one of the @@ -569,26 +588,25 @@ def normalize_occupancy_type(parsed_addr: OrderedDict, :return: parsed_addr with occupancy types updated to abbreviated format. :rtype: dict """ - default = default if default is not None else 'UNIT' - occupancy_type_label = 'OccupancyType' + default = default if default is not None else "UNIT" + occupancy_type_label = "OccupancyType" occupancy_type = parsed_addr.pop(occupancy_type_label, None) occupancy_type_abbr = ( occupancy_type if occupancy_type in OCCUPANCY_TYPE_ABBREVIATIONS.values() else OCCUPANCY_TYPE_ABBREVIATIONS.get(occupancy_type) ) - occupancy_id = parsed_addr.get('OccupancyIdentifier') - if ((occupancy_id and not occupancy_id.startswith('#')) - and not occupancy_type_abbr): + occupancy_id = parsed_addr.get("OccupancyIdentifier") + if (occupancy_id and not occupancy_id.startswith("#")) and not occupancy_type_abbr: occupancy_type_abbr = default if occupancy_type_abbr: parsed_list = list(parsed_addr.items()) try: - index = parsed_list.index(('OccupancyIdentifier', occupancy_id)) + index = parsed_list.index(("OccupancyIdentifier", occupancy_id)) except ValueError: msg = ( - 'Address has an occupancy type (ie: Apt, Unit, etc) ' - 'but no occupancy identifier (ie: 101, A, etc)' + "Address has an occupancy type (ie: Apt, Unit, etc) " + "but no occupancy identifier (ie: 101, A, etc)" ) raise AddressNormalizationError(msg) parsed_list.insert(index, (occupancy_type_label, occupancy_type_abbr)) @@ -615,14 +633,13 @@ def normalize_state(state: str | None) -> str | None: def normalize_city(city: str): city = city.split() for i, part in enumerate(city): - replacement = CITY_ABBREVIATIONS.get(part.replace('.', '')) + replacement = CITY_ABBREVIATIONS.get(part.replace(".", "")) if replacement: city[i] = replacement - return ' '.join(city) + return " ".join(city) -def get_normalized_line_segment(parsed_addr: OrderedDict, - line_labels: [str]) -> str: +def get_normalized_line_segment(parsed_addr: OrderedDict, line_labels: [str]) -> str: """ :param parsed_addr: address parsed into ordereddict per usaddress. @@ -630,15 +647,14 @@ def get_normalized_line_segment(parsed_addr: OrderedDict, to the desired address segment (ie address_line_1 or address_line_2). :return: s/r joined values from parsed_addr corresponding to given labels. """ - line_elems = [ - elem for key, elem in parsed_addr.items() if key in line_labels - ] - line_str = ' '.join(line_elems) if line_elems else None + line_elems = [elem for key, elem in parsed_addr.items() if key in line_labels] + line_str = " ".join(line_elems) if line_elems else None return post_clean_addr_str(line_str) -def get_addr_line_str(addr_dict: dict, addr_parts: [str] = None, - comma_separate: bool = False) -> str: +def get_addr_line_str( + addr_dict: dict, addr_parts: [str] = None, comma_separate: bool = False +) -> str: """Get address 'line' elements as a single string. Combines 'address_line_1' and 'address_line_2' elements as a single string @@ -656,10 +672,10 @@ def get_addr_line_str(addr_dict: dict, addr_parts: [str] = None, :rtype: str """ if not addr_parts: - addr_parts = ['address_line_1', 'address_line_2'] + addr_parts = ["address_line_1", "address_line_2"] if not isinstance(addr_parts, (list, tuple)): - raise TypeError('addr_parts must be a list or tuple') - separator = ', ' if comma_separate else ' ' + raise TypeError("addr_parts must be a list or tuple") + separator = ", " if comma_separate else " " addr_str = separator.join( str(addr_dict[elem]) for elem in addr_parts if addr_dict.get(elem) ) @@ -669,16 +685,15 @@ def get_addr_line_str(addr_dict: dict, addr_parts: [str] = None, def format_address_record(address: dict) -> str: # type AddressRecord -> str """Format AddressRecord as string.""" - address_template = Template('$address') + address_template = Template("$address") address = dict(address) - addr_parts = [ - str(address[field]) for field in ADDRESS_KEYS if address.get(field) - ] - return address_template.safe_substitute(address=', '.join(addr_parts)) + addr_parts = [str(address[field]) for field in ADDRESS_KEYS if address.get(field)] + return address_template.safe_substitute(address=", ".join(addr_parts)) -def get_geocoder_normalized_addr(address: dict | str, - addr_keys: [str] = ADDRESS_KEYS) -> dict: +def get_geocoder_normalized_addr( + address: dict | str, addr_keys: [str] = ADDRESS_KEYS +) -> dict: """Get geocoder normalized address parsed to dict with addr_keys. :param address: string or dict-like containing address data @@ -689,18 +704,17 @@ def get_geocoder_normalized_addr(address: dict | str, address_line_2 = None geo_addr_dict = {} if not isinstance(address, str): - address_line_2 = address.get('address_line_2') + address_line_2 = address.get("address_line_2") address = get_addr_line_str(address, addr_parts=addr_keys) geo_resp = geocoder.google(address) if geo_resp.ok and geo_resp.housenumber: line2 = geo_resp.subpremise or address_line_2 geo_addr_dict = { - 'address_line_1': - ' '.join([geo_resp.housenumber, geo_resp.street]), - 'address_line_2': strip_occupancy_type(line2), - 'city': geo_resp.city, - 'state': geo_resp.state, - 'postal_code': geo_resp.postal + "address_line_1": " ".join([geo_resp.housenumber, geo_resp.street]), + "address_line_2": strip_occupancy_type(line2), + "city": geo_resp.city, + "state": geo_resp.state, + "postal_code": geo_resp.postal, } for key, value in geo_addr_dict.items(): geo_addr_dict[key] = value.upper() if value else None @@ -722,14 +736,14 @@ def get_ordinal_indicator(number: int) -> str: """ str_num = str(number) digits = len(str_num) - if str_num[-1] == '1' and not (digits >= 2 and str_num[-2:] == '11'): - return 'st' - elif str_num[-1] == '2' and not (digits >= 2 and str_num[-2:] == '12'): - return 'nd' - elif str_num[-1] == '3' and not (digits >= 2 and str_num[-2:] == '13'): - return 'rd' + if str_num[-1] == "1" and not (digits >= 2 and str_num[-2:] == "11"): + return "st" + elif str_num[-1] == "2" and not (digits >= 2 and str_num[-2:] == "12"): + return "nd" + elif str_num[-1] == "3" and not (digits >= 2 and str_num[-2:] == "13"): + return "rd" else: - return 'th' + return "th" class NormalizeAddress(object): @@ -759,6 +773,7 @@ class NormalizeAddress(object): directionals and street types in the output. :return: address dict containing parsed and normalized address values. """ + parse_address_string = staticmethod(parse_address_string) pre_clean_addr_str = staticmethod(pre_clean_addr_str) post_clean_addr_str = staticmethod(post_clean_addr_str) @@ -766,16 +781,15 @@ class NormalizeAddress(object): normalize_address_components = staticmethod(normalize_address_components) get_parsed_values = staticmethod(get_parsed_values) - def __init__(self, address, addr_map=None, addtl_funcs=None, - strict=None, long_hand=False): + def __init__( + self, address, addr_map=None, addtl_funcs=None, strict=None, long_hand=False + ): self.address = address self.addtl_funcs = addtl_funcs self.strict = True if strict is None else strict self.long_hand = long_hand if addr_map and not isinstance(self.address, str): - self.address = { - key: self.address.get(val) for key, val in addr_map.items() - } + self.address = {key: self.address.get(val) for key, val in addr_map.items()} @staticmethod def get_normalized_line_1(parsed_addr, line_labels=LINE1_USADDRESS_LABELS): @@ -787,19 +801,19 @@ def get_normalized_line_2(parsed_addr, line_labels=LINE2_USADDRESS_LABELS): def normalize(self): if isinstance(self.address, str): - return self.normalize_addr_str( - self.address, long_hand=self.long_hand - ) + return self.normalize_addr_str(self.address, long_hand=self.long_hand) else: return self.normalize_addr_dict() - def normalize_addr_str(self, addr_str, # type: str - line2=None, # type: Optional[str] - city=None, # type: Optional[str] - state=None, # type: Optional[str] - zipcode=None, # type: Optional[str] - long_hand=False - ): # noqa + def normalize_addr_str( + self, + addr_str, # type: str + line2=None, # type: Optional[str] + city=None, # type: Optional[str] + state=None, # type: Optional[str] + zipcode=None, # type: Optional[str] + long_hand=False, + ): # noqa # get address parsed into usaddress components. error = None parsed_addr = None @@ -816,24 +830,29 @@ def normalize_addr_str(self, addr_str, # type: str # send refactored line_1 and line_2 back through # processing return self.normalize_addr_str( - line1, line2=line2, - city=city, state=state, zipcode=zipcode, - long_hand=long_hand + line1, + line2=line2, + city=city, + state=state, + zipcode=zipcode, + long_hand=long_hand, ) except ValueError: # try a different additional processing function pass - if parsed_addr and not parsed_addr.get('StreetName'): + if parsed_addr and not parsed_addr.get("StreetName"): addr_dict = dict( - address_line_1=addr_str, address_line_2=line2, city=city, - state=state, postal_code=zipcode + address_line_1=addr_str, + address_line_2=line2, + city=city, + state=state, + postal_code=zipcode, ) full_addr = self.format_address_record(addr_dict) try: parsed_addr = self.parse_address_string(full_addr) - except (usaddress.RepeatedLabelError, - AmbiguousAddressError) as err: + except (usaddress.RepeatedLabelError, AmbiguousAddressError) as err: parsed_addr = None error = err @@ -841,13 +860,9 @@ def normalize_addr_str(self, addr_str, # type: str parsed_addr = self.normalize_address_components( parsed_addr, long_hand=long_hand ) - zipcode = self.get_parsed_values( - parsed_addr, zipcode, 'ZipCode', addr_str - ) + zipcode = self.get_parsed_values(parsed_addr, zipcode, "ZipCode", addr_str) city = self.normalize_city(parsed_addr, addr_str, city) - state = self.get_parsed_values( - parsed_addr, state, 'StateName', addr_str - ) + state = self.get_parsed_values(parsed_addr, state, "StateName", addr_str) state = normalize_state(state) # assumes if line2 is passed in that it need not be parsed from @@ -863,8 +878,11 @@ def normalize_addr_str(self, addr_str, # type: str line1 = addr_str addr_rec = OrderedDict( - address_line_1=line1, address_line_2=line2, city=city, - state=state, postal_code=zipcode + address_line_1=line1, + address_line_2=line2, + city=city, + state=state, + postal_code=zipcode, ) if error: raise UnParseableAddressError(None, None, addr_rec) @@ -872,36 +890,40 @@ def normalize_addr_str(self, addr_str, # type: str return addr_rec def normalize_addr_dict(self): - addr_dict = validate_address_components( - self.address, strict=self.strict - ) + addr_dict = validate_address_components(self.address, strict=self.strict) # line 1 and line 2 elements are combined to ensure consistent # processing whether the line 2 elements are pre-parsed or # included in line 1 addr_str = get_addr_line_str(addr_dict, comma_separate=True) - postal_code = addr_dict.get('postal_code') - zipcode = validate_us_postal_code_format( - postal_code, addr_dict - ) if postal_code else None - city = addr_dict.get('city') - state = addr_dict.get('state') + postal_code = addr_dict.get("postal_code") + zipcode = ( + validate_us_postal_code_format(postal_code, addr_dict) + if postal_code + else None + ) + city = addr_dict.get("city") + state = addr_dict.get("state") try: address = self.normalize_addr_str( - addr_str, city=city, state=state, - zipcode=zipcode, long_hand=self.long_hand + addr_str, + city=city, + state=state, + zipcode=zipcode, + long_hand=self.long_hand, ) except AddressNormalizationError: addr_str = get_addr_line_str( addr_dict, comma_separate=True, addr_parts=ADDRESS_KEYS ) address = self.normalize_addr_str( - addr_str, city=city, state=state, - zipcode=zipcode, long_hand=self.long_hand + addr_str, + city=city, + state=state, + zipcode=zipcode, + long_hand=self.long_hand, ) return address def normalize_city(self, parsed_addr, addr_str, city=None): - return self.get_parsed_values( - parsed_addr, city, 'PlaceName', addr_str - ) + return self.get_parsed_values(parsed_addr, city, "PlaceName", addr_str) diff --git a/scourgify/tests/test_address_normalization.py b/scourgify/tests/test_address_normalization.py index f894629..1997f11 100644 --- a/scourgify/tests/test_address_normalization.py +++ b/scourgify/tests/test_address_normalization.py @@ -30,6 +30,7 @@ UnParseableAddressError, ) from scourgify.normalize import ( + NormalizeAddress, get_addr_line_str, get_geocoder_normalized_addr, get_normalized_line_segment, @@ -44,7 +45,6 @@ normalize_state, normalize_street_types, parse_address_string, - NormalizeAddress ) from scourgify.validations import ( validate_address_components, @@ -53,86 +53,89 @@ ) # Constants -SERVICE = 'GBR Test Normalization' +SERVICE = "GBR Test Normalization" # Helper Functions & Classes # Tests class TestAddressNormalization(TestCase): """Unit tests for scourgify""" + # pylint:disable=too-many-arguments def setUp(self): """setUp""" self.expected = dict( - address_line_1='123 NOWHERE ST', - address_line_2='STE 0', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="123 NOWHERE ST", + address_line_2="STE 0", + city="BORING", + state="OR", + postal_code="97009", ) self.address_dict = dict( - address_line_1='123 Nowhere St', - address_line_2='Suite 0', - city='Boring', - state='OR', - postal_code='97009' + address_line_1="123 Nowhere St", + address_line_2="Suite 0", + city="Boring", + state="OR", + postal_code="97009", ) self.ordinal_addr = dict( - address_line_1='4333 NE 113th', - city='Boring', - state='OR', - postal_code='97009' + address_line_1="4333 NE 113th", + city="Boring", + state="OR", + postal_code="97009", ) self.ordinal_expected = dict( - address_line_1='4333 NE 113TH', + address_line_1="4333 NE 113TH", address_line_2=None, - city='BORING', - state='OR', - postal_code='97009' - ) - self.parseable_addr_str = '123 Nowhere Street Suite 0 Boring OR 97009' - self.parsed_addr = OrderedDict([ - ('AddressNumber', '123'), - ('StreetName', 'NOWHERE'), - ('StreetNamePostType', 'STREET'), - ('OccupancyType', 'SUITE'), - ('OccupancyIdentifier', '0'), - ('PlaceName', 'BORING'), - ('StateName', 'OR'), - ('ZipCode', '97009') - ]) - self.hash_tag = '999 Nowhere Street # 12 Boring OR 97009' + city="BORING", + state="OR", + postal_code="97009", + ) + self.parseable_addr_str = "123 Nowhere Street Suite 0 Boring OR 97009" + self.parsed_addr = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetName", "NOWHERE"), + ("StreetNamePostType", "STREET"), + ("OccupancyType", "SUITE"), + ("OccupancyIdentifier", "0"), + ("PlaceName", "BORING"), + ("StateName", "OR"), + ("ZipCode", "97009"), + ] + ) + self.hash_tag = "999 Nowhere Street # 12 Boring OR 97009" self.hash_expected = dict( - address_line_1='999 NOWHERE ST', - address_line_2='# 12', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="999 NOWHERE ST", + address_line_2="# 12", + city="BORING", + state="OR", + postal_code="97009", ) - self.unparesable_addr_str = '6000 SW 1000TH AVE (BLDG A5 RIGHT)' + self.unparseable_addr_str = "6000 SW 1000TH AVE (BLDG A5 RIGHT)" self.direction_expected = dict( - address_line_1='123 SW NOWHERE ST', - address_line_2='STE 0', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="123 SW NOWHERE ST", + address_line_2="STE 0", + city="BORING", + state="OR", + postal_code="97009", ) self.long_hand_expected = dict( - address_line_1='123 SOUTHWEST NOWHERE STREET', - address_line_2='STE 0', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="123 SOUTHWEST NOWHERE STREET", + address_line_2="STE 0", + city="BORING", + state="OR", + postal_code="97009", ) self.abnormal_direction = dict( - address_line_1='123 South-West Nowhere St', - address_line_2='Suite 0', - city='Boring', - state='OR', - postal_code='97009' + address_line_1="123 South-West Nowhere St", + address_line_2="Suite 0", + city="Boring", + state="OR", + postal_code="97009", ) def test_normalize_address_record(self): @@ -152,9 +155,7 @@ def test_normalize_address_record(self): result = normalize_address_record(self.abnormal_direction) self.assertDictEqual(self.direction_expected, result) - result = normalize_address_record( - self.abnormal_direction, long_hand=True - ) + result = normalize_address_record(self.abnormal_direction, long_hand=True) self.assertDictEqual(self.long_hand_expected, result) def test_normalize_class(self): @@ -174,9 +175,7 @@ def test_normalize_class(self): result = NormalizeAddress(self.abnormal_direction).normalize() self.assertDictEqual(self.direction_expected, result) - result = NormalizeAddress( - self.abnormal_direction, long_hand=True - ).normalize() + result = NormalizeAddress(self.abnormal_direction, long_hand=True).normalize() self.assertDictEqual(self.long_hand_expected, result) def test_normalize_addr_str(self): @@ -184,46 +183,55 @@ def test_normalize_addr_str(self): result = normalize_addr_str(self.parseable_addr_str) self.assertDictEqual(self.expected, result) - broken_line1 = '6000 SW 1000TH AVE ' - broken_line2 = '(BLDG A1 RIGHT)' + broken_line1 = "6000 SW 1000TH AVE " + broken_line2 = "(BLDG A1 RIGHT)" result = normalize_addr_str( - broken_line1, line2=broken_line2, - city='Portland', state='OR', zipcode='97203' + broken_line1, + line2=broken_line2, + city="Portland", + state="OR", + zipcode="97203", ) expected = { - 'address_line_1': '6000 SW 1000TH AVE', - 'address_line_2': 'BLDG A1 RIGHT', - 'state': 'OR', 'city': 'PORTLAND', - 'postal_code': '97203' + "address_line_1": "6000 SW 1000TH AVE", + "address_line_2": "BLDG A1 RIGHT", + "state": "OR", + "city": "PORTLAND", + "postal_code": "97203", } self.assertDictEqual(expected, result) def addtl_test_func(addr_str): - if 'BLDG A1' in addr_str: - return '123 NOWHERE STREET', 'BLDG A1 RIGHT' + if "BLDG A1" in addr_str: + return "123 NOWHERE STREET", "BLDG A1 RIGHT" else: raise ValueError - addtl_processing = '123 Nowhere Street (BLDG A1 RIGHT)' + addtl_processing = "123 Nowhere Street (BLDG A1 RIGHT)" expected = { - 'address_line_1': '123 NOWHERE ST', - 'address_line_2': 'BLDG A1 RIGHT', - 'state': 'OR', 'city': 'PORTLAND', - 'postal_code': '97203' + "address_line_1": "123 NOWHERE ST", + "address_line_2": "BLDG A1 RIGHT", + "state": "OR", + "city": "PORTLAND", + "postal_code": "97203", } result = normalize_addr_str( - addtl_processing, city='Portland', state='OR', zipcode='97203', - addtl_funcs=[addtl_test_func] + addtl_processing, + city="Portland", + state="OR", + zipcode="97203", + addtl_funcs=[addtl_test_func], ) self.assertDictEqual(expected, result) self.assertRaises( UnParseableAddressError, normalize_addr_str, - self.unparesable_addr_str, - city='Portland', state='OR', zipcode='97203', - addtl_funcs=[addtl_test_func] - + self.unparseable_addr_str, + city="Portland", + state="OR", + zipcode="97203", + addtl_funcs=[addtl_test_func], ) def test_normalize_addr_dict(self): @@ -232,18 +240,18 @@ def test_normalize_addr_dict(self): self.assertDictEqual(self.expected, result) alternate_dict = dict( - address1='123 Nowhere St', - address2='Suite 0', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="Suite 0", + city="Boring", + state="OR", + zip="97009", ) dict_map = { - 'address_line_1': 'address1', - 'address_line_2': 'address2', - 'city': 'city', - 'state': 'state', - 'postal_code': 'zip' + "address_line_1": "address1", + "address_line_2": "address2", + "city": "city", + "state": "state", + "postal_code": "zip", } result = normalize_addr_dict(alternate_dict, addr_map=dict_map) self.assertDictEqual(self.expected, result) @@ -253,7 +261,7 @@ def test_parse_address_string(self): result = parse_address_string(self.parseable_addr_str) self.assertIsInstance(result, OrderedDict) - ambig_addr_str = 'AWBREY VILLAGE' + ambig_addr_str = "AWBREY VILLAGE" with self.assertRaises(AmbiguousAddressError): parse_address_string(ambig_addr_str) @@ -265,92 +273,92 @@ def test_normalize_occupancies(self): through even though no unit should have existed on the home. """ dict_map = { - 'address_line_1': 'address1', - 'address_line_2': 'address2', - 'city': 'city', - 'state': 'state', - 'postal_code': 'zip' + "address_line_1": "address1", + "address_line_2": "address2", + "city": "city", + "state": "state", + "postal_code": "zip", } weird_unit = dict( - address1='123 Nowhere St', - address2='Ave 345', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="Ave 345", + city="Boring", + state="OR", + zip="97009", ) expected = dict( - address_line_1='123 NOWHERE ST', - address_line_2='UNIT 345', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="123 NOWHERE ST", + address_line_2="UNIT 345", + city="BORING", + state="OR", + postal_code="97009", ) result = normalize_addr_dict(weird_unit, addr_map=dict_map) self.assertDictEqual(expected, result) late_unit_add = dict( - address1='123 Nowhere St', - address2='345', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="345", + city="Boring", + state="OR", + zip="97009", ) result = normalize_addr_dict(late_unit_add, addr_map=dict_map) self.assertDictEqual(expected, result) expected = dict( - address_line_1='123 NOWHERE ST', - address_line_2='# 345', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="123 NOWHERE ST", + address_line_2="# 345", + city="BORING", + state="OR", + postal_code="97009", ) hashtag_unit = dict( - address1='123 Nowhere St', - address2='# 345', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="# 345", + city="Boring", + state="OR", + zip="97009", ) result = normalize_addr_dict(hashtag_unit, addr_map=dict_map) self.assertDictEqual(expected, result) hashtag_unit = dict( - address1='123 Nowhere St', - address2='#345', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="#345", + city="Boring", + state="OR", + zip="97009", ) result = normalize_addr_dict(hashtag_unit, addr_map=dict_map) self.assertDictEqual(expected, result) expected = dict( - address_line_1='123 NOWHERE ST', - address_line_2='APT 345', - city='BORING', - state='OR', - postal_code='97009' + address_line_1="123 NOWHERE ST", + address_line_2="APT 345", + city="BORING", + state="OR", + postal_code="97009", ) abbreviation = dict( - address1='123 Nowhere St', - address2='Apt 345', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="Apt 345", + city="Boring", + state="OR", + zip="97009", ) result = normalize_addr_dict(abbreviation, addr_map=dict_map) self.assertDictEqual(expected, result) full_name = dict( - address1='123 Nowhere St', - address2='Apartment 345', - city='Boring', - state='OR', - zip='97009' + address1="123 Nowhere St", + address2="Apartment 345", + city="Boring", + state="OR", + zip="97009", ) result = normalize_addr_dict(full_name, addr_map=dict_map) self.assertDictEqual(expected, result) @@ -360,97 +368,111 @@ class TestAddressNormalizationUtils(TestCase): """Unit tests for scourgify utils""" def setUp(self): - self.address_dict = dict( - address_line_1='123 Nowhere St', - address_line_2='Suite 0', - city='Boring', - state='OR', - postal_code='97009' - ) - self.parseable_addr = '123 Nowhere Street Suite 0 Boring OR 97009' - self.parsed_addr = OrderedDict([ - ('AddressNumber', '123'), - ('StreetName', 'NOWHERE'), - ('StreetNamePostType', 'STREET'), - ('OccupancyType', 'SUITE'), - ('OccupancyIdentifier', '0'), - ('PlaceName', 'BORING'), - ('StateName', 'OR'), - ('ZipCode', '97009') - ]) - - self.unparesable_addr = '6000 SW 1000TH AVE (BLDG A1 RIGHT)' - - self.unparesable_addr_dict = OrderedDict([ - ('AddressNumber', '6000'), - ('StreetNamePreDirectional', 'SW'), - ('StreetName', '1000TH'), - ('StreetNamePostType', 'AVE'), - ('SubaddressType', 'BLDG'), - ('SubaddressIdentifier', 'A1'), - ('SubaddressType', 'RIGHT') - ]) + address_line_1="123 Nowhere St", + address_line_2="Suite 0", + city="Boring", + state="OR", + postal_code="97009", + ) + self.parseable_addr = "123 Nowhere Street Suite 0 Boring OR 97009" + self.parsed_addr = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetName", "NOWHERE"), + ("StreetNamePostType", "STREET"), + ("OccupancyType", "SUITE"), + ("OccupancyIdentifier", "0"), + ("PlaceName", "BORING"), + ("StateName", "OR"), + ("ZipCode", "97009"), + ] + ) + + self.unparseable_addr = "6000 SW 1000TH AVE (BLDG A1 RIGHT)" + + self.unparseable_addr_dict = OrderedDict( + [ + ("AddressNumber", "6000"), + ("StreetNamePreDirectional", "SW"), + ("StreetName", "1000TH"), + ("StreetNamePostType", "AVE"), + ("SubaddressType", "BLDG"), + ("SubaddressIdentifier", "A1"), + ("SubaddressType", "RIGHT"), + ] + ) def test_get_parsed_values(self): """Test get_parsed_values function.""" - expected = 'BORING' - result = get_parsed_values(self.parsed_addr, 'Boring', - 'PlaceName', self.parseable_addr) + expected = "BORING" + result = get_parsed_values( + self.parsed_addr, "Boring", "PlaceName", self.parseable_addr + ) self.assertEqual(expected, result) - expected = 'ONE VALUE PRESENT' - result = get_parsed_values(self.parsed_addr, 'One Value Present', - 'LandmarkName', self.parseable_addr) + expected = "ONE VALUE PRESENT" + result = get_parsed_values( + self.parsed_addr, "One Value Present", "LandmarkName", self.parseable_addr + ) self.assertEqual(expected, result) - result = get_parsed_values(self.parsed_addr, None, - 'LandmarkName', self.parseable_addr) + result = get_parsed_values( + self.parsed_addr, None, "LandmarkName", self.parseable_addr + ) self.assertIsNone(result) with self.assertRaises(AmbiguousAddressError): - get_parsed_values(self.parsed_addr, 'UnMatched City', - 'PlaceName', self.parseable_addr) + get_parsed_values( + self.parsed_addr, "UnMatched City", "PlaceName", self.parseable_addr + ) def test_get_norm_line_segment(self): """Test get normalized_line_segment function.""" - result = get_normalized_line_segment(self.parsed_addr, - ['StreetName', 'AddressNumber']) - expected = '{} {}'.format(self.parsed_addr['AddressNumber'], - self.parsed_addr['StreetName']) + result = get_normalized_line_segment( + self.parsed_addr, ["StreetName", "AddressNumber"] + ) + expected = "{} {}".format( + self.parsed_addr["AddressNumber"], self.parsed_addr["StreetName"] + ) self.assertEqual(expected, result) result = get_normalized_line_segment( self.parsed_addr, - ['StreetName', 'StreetNamePostType', 'IntersectionSeparator'] + ["StreetName", "StreetNamePostType", "IntersectionSeparator"], + ) + expected = "{} {}".format( + self.parsed_addr["StreetName"], self.parsed_addr["StreetNamePostType"] ) - expected = '{} {}'.format(self.parsed_addr['StreetName'], - self.parsed_addr['StreetNamePostType']) self.assertEqual(expected, result) def test_normalize_numbered_streets(self): """Test normalize_numbered_streets function.""" - numbered_addr = OrderedDict([ - ('AddressNumber', '123'), - ('StreetName', '100'), - ('StreetNamePostType', 'STREET') - ]) - county_road = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreType', 'COUNTY ROAD'), - ('StreetName', '100') - ]) - string_addr = OrderedDict([ - ('AddressNumber', '123'), - ('StreetName', '91st'), - ('StreetNamePostType', 'STREET') - ]) - - expected = '{}{}'.format( - numbered_addr['StreetName'], 'th' - ) + numbered_addr = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetName", "100"), + ("StreetNamePostType", "STREET"), + ] + ) + county_road = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetNamePreType", "COUNTY ROAD"), + ("StreetName", "100"), + ] + ) + string_addr = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetName", "91st"), + ("StreetNamePostType", "STREET"), + ] + ) + + expected = "{}{}".format(numbered_addr["StreetName"], "th") result = normalize_numbered_streets(numbered_addr) - self.assertEqual(expected, result['StreetName']) + self.assertEqual(expected, result["StreetName"]) result = normalize_numbered_streets(county_road) self.assertDictEqual(county_road, result) @@ -460,27 +482,36 @@ def test_normalize_numbered_streets(self): def test_normalize_directionals(self): """Test normalize_directionals function.""" - unabbr_directional = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'South West', ), - ('StreetName', '100'), - ('StreetNamePostType', 'STREET') - ]) - abbrev_directional = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW'), - ('StreetNamePreType', 'COUNTY ROAD'), - ('StreetName', '100') - ]) - no_directional = OrderedDict([ - ('AddressNumber', '123'), - ('StreetName', '91st'), - ('StreetNamePostType', 'STREET') - ]) - - expected = 'SW' + unabbr_directional = OrderedDict( + [ + ("AddressNumber", "123"), + ( + "StreetNamePreDirectional", + "South West", + ), + ("StreetName", "100"), + ("StreetNamePostType", "STREET"), + ] + ) + abbrev_directional = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetNamePreDirectional", "SW"), + ("StreetNamePreType", "COUNTY ROAD"), + ("StreetName", "100"), + ] + ) + no_directional = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetName", "91st"), + ("StreetNamePostType", "STREET"), + ] + ) + + expected = "SW" result = normalize_directionals(unabbr_directional) - self.assertEqual(expected, result['StreetNamePreDirectional']) + self.assertEqual(expected, result["StreetNamePreDirectional"]) result = normalize_directionals(abbrev_directional) self.assertDictEqual(abbrev_directional, result) @@ -488,39 +519,59 @@ def test_normalize_directionals(self): result = normalize_directionals(no_directional) self.assertDictEqual(no_directional, result) - expected = 'SOUTHWEST' + expected = "SOUTHWEST" result = normalize_directionals(abbrev_directional, long_hand=True) - self.assertEqual(expected, result['StreetNamePreDirectional']) + self.assertEqual(expected, result["StreetNamePreDirectional"]) def test_normalize_street_types(self): """Test normalize_street_types function.""" - unabbr_type = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW', ), - ('StreetName', 'MAIN'), - ('StreetNamePostType', 'STREET') - ]) - abbrev_type = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW', ), - ('StreetName', 'MAIN'), - ('StreetNamePostType', 'AVE') - ]) - typo_type = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW', ), - ('StreetName', 'MAIN'), - ('StreetNamePostType', 'STROET') - ]) - no_type = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW', ), - ('StreetName', 'MAIN'), - ]) - - expected = 'ST' + unabbr_type = OrderedDict( + [ + ("AddressNumber", "123"), + ( + "StreetNamePreDirectional", + "SW", + ), + ("StreetName", "MAIN"), + ("StreetNamePostType", "STREET"), + ] + ) + abbrev_type = OrderedDict( + [ + ("AddressNumber", "123"), + ( + "StreetNamePreDirectional", + "SW", + ), + ("StreetName", "MAIN"), + ("StreetNamePostType", "AVE"), + ] + ) + typo_type = OrderedDict( + [ + ("AddressNumber", "123"), + ( + "StreetNamePreDirectional", + "SW", + ), + ("StreetName", "MAIN"), + ("StreetNamePostType", "STROET"), + ] + ) + no_type = OrderedDict( + [ + ("AddressNumber", "123"), + ( + "StreetNamePreDirectional", + "SW", + ), + ("StreetName", "MAIN"), + ] + ) + + expected = "ST" result = normalize_street_types(unabbr_type) - self.assertEqual(expected, result['StreetNamePostType']) + self.assertEqual(expected, result["StreetNamePostType"]) result = normalize_street_types(abbrev_type) self.assertDictEqual(abbrev_type, result) @@ -531,24 +582,24 @@ def test_normalize_street_types(self): result = normalize_street_types(no_type) self.assertDictEqual(no_type, result) - expected = 'AVENUE' + expected = "AVENUE" result = normalize_street_types(abbrev_type, long_hand=True) - self.assertEqual(expected, result['StreetNamePostType']) + self.assertEqual(expected, result["StreetNamePostType"]) def test_normalize_occupancy_type(self): """Test normalize_occupancy_type function.""" - expected = 'STE' + expected = "STE" result = normalize_occupancy_type(self.parsed_addr) - self.assertEqual(expected, result['OccupancyType']) + self.assertEqual(expected, result["OccupancyType"]) def test_normalize_state(self): """Test normalize_state function""" - state = 'ore' - expected = 'OR' + state = "ore" + expected = "OR" result = normalize_state(state) self.assertEqual(expected, result) - state = 'oregano' + state = "oregano" expected = state result = normalize_state(state) self.assertEqual(expected, result) @@ -557,22 +608,22 @@ def test_normalize_state(self): def test_pre_clean_addr_str(self): """Test pre_clean_addr_str function""" - odd_addr = '123 Nowhere Street, Suite 0; @Boring OR 97009' + odd_addr = "123 Nowhere Street, Suite 0; @Boring OR 97009" # we're leaving commas in the pre-clean until norm can be revisited - expected = '123 Nowhere Street, Suite 0 Boring OR 97009'.upper() + expected = "123 Nowhere Street, Suite 0 Boring OR 97009".upper() # expected = '123 Nowhere Street Suite 0 Boring OR 97009'.upper() result = pre_clean_addr_str(odd_addr) self.assertEqual(expected, result) def test_post_clean_addr_str(self): """Test post_clean_addr_str function.""" - addr_str = '(100-104) SW NO WHERE st' - expected = '100-104 SW NO WHERE ST' + addr_str = "(100-104) SW NO WHERE st" + expected = "100-104 SW NO WHERE ST" result = post_clean_addr_str(addr_str) self.assertEqual(expected, result) self.assertIsNone(post_clean_addr_str(None)) - self.assertEqual('', post_clean_addr_str('')) + self.assertEqual("", post_clean_addr_str("")) def test_validate_address(self): """Test validate_address_components function.""" @@ -582,41 +633,41 @@ def test_validate_address(self): minus_line1 = dict( address_line_1=None, - address_line_2='Suite 0', - city='Boring', - state='OR', - postal_code='97009' + address_line_2="Suite 0", + city="Boring", + state="OR", + postal_code="97009", ) with self.assertRaises(IncompleteAddressError): validate_address_components(minus_line1) minus_zip = dict( - address_line_1='123 NoWhere St', - address_line_2='Suite 0', - city='Boring', - state='OR', - postal_code=None + address_line_1="123 NoWhere St", + address_line_2="Suite 0", + city="Boring", + state="OR", + postal_code=None, ) with self.assertRaises(IncompleteAddressError): validate_address_components(minus_zip) minus_city_state = dict( - address_line_1='123 NoWhere St', - address_line_2='Suite 0', + address_line_1="123 NoWhere St", + address_line_2="Suite 0", city=None, state=None, - postal_code='97009' + postal_code="97009", ) with self.assertRaises(IncompleteAddressError): validate_address_components(minus_city_state) minus_city_state_zip = dict( - address_line_1='123 NoWhere St', - address_line_2='Suite 0', + address_line_1="123 NoWhere St", + address_line_2="Suite 0", city=None, state=None, - postal_code=None + postal_code=None, ) with self.assertRaises(IncompleteAddressError): validate_address_components(minus_city_state_zip) @@ -625,98 +676,90 @@ def test_validate_postal_code(self): """Test validate_us_postal_code_format""" with self.assertRaises(AddressValidationError): - zip_five = 'AAAAA' + zip_five = "AAAAA" validate_us_postal_code_format(zip_five, self.address_dict) with self.assertRaises(AddressValidationError): - zip_five = '97219-AAAA' + zip_five = "97219-AAAA" validate_us_postal_code_format(zip_five, self.address_dict) with self.assertRaises(AddressValidationError): - zip_plus = '97219-000100' + zip_plus = "97219-000100" validate_us_postal_code_format(zip_plus, self.address_dict) with self.assertRaises(AddressValidationError): - zip_plus = '97219-0001-00' + zip_plus = "97219-0001-00" validate_us_postal_code_format(zip_plus, self.address_dict) with self.assertRaises(AddressValidationError): - zip_five = '9721900' + zip_five = "9721900" validate_us_postal_code_format(zip_five, self.address_dict) - zip_five = '972' - expected = '00972' + zip_five = "972" + expected = "00972" result = validate_us_postal_code_format(zip_five, self.address_dict) self.assertEqual(expected, result) - zip_plus = '97219-00' - expected = '97219-0000' + zip_plus = "97219-00" + expected = "97219-0000" result = validate_us_postal_code_format(zip_plus, self.address_dict) self.assertEqual(expected, result) - zip_plus = '972-0001' - expected = '00972-0001' + zip_plus = "972-0001" + expected = "00972-0001" result = validate_us_postal_code_format(zip_plus, self.address_dict) self.assertEqual(expected, result) - zip_plus = '972190001' - expected = '97219-0001' + zip_plus = "972190001" + expected = "97219-0001" result = validate_us_postal_code_format(zip_plus, self.address_dict) self.assertEqual(expected, result) - expected = '97219' + expected = "97219" result = validate_us_postal_code_format(expected, self.address_dict) self.assertEqual(expected, result) def test_get_addr_line_str(self): """Test get_addr_line_str function.""" - expected = '{} {}'.format( - self.address_dict['address_line_1'], - self.address_dict['address_line_2'] + expected = "{} {}".format( + self.address_dict["address_line_1"], self.address_dict["address_line_2"] ) result = get_addr_line_str(self.address_dict) self.assertEqual(expected, result) - no_line_2 = { - 'address_line_1': 'address line 1' - } - expected = no_line_2['address_line_1'] + no_line_2 = {"address_line_1": "address line 1"} + expected = no_line_2["address_line_1"] result = get_addr_line_str(no_line_2) self.assertEqual(expected, result) - empty_line_2 = { - 'address_line_1': 'address line 1', - 'address_line_2': None - } - expected = no_line_2['address_line_1'] + empty_line_2 = {"address_line_1": "address line 1", "address_line_2": None} + expected = no_line_2["address_line_1"] result = get_addr_line_str(empty_line_2) self.assertEqual(expected, result) with self.assertRaises(TypeError): - get_addr_line_str(self.address_dict, addr_parts='line1') + get_addr_line_str(self.address_dict, addr_parts="line1") - @mock.patch( - 'scourgify.normalize.geocoder' - ) + @mock.patch("scourgify.normalize.geocoder") def test_get_geocoder_normalized_addr(self, mock_geocoder): """Test get_geocoder_normalized_addr""" geo_addr = mock.MagicMock() geo_addr.ok = True - geo_addr.housenumber = '1234' + geo_addr.housenumber = "1234" geo_addr.street = "Main" - geo_addr.subpremise = '' - geo_addr.city = 'Boring' - geo_addr.state = 'OR' - geo_addr.postal = '97000' + geo_addr.subpremise = "" + geo_addr.city = "Boring" + geo_addr.state = "OR" + geo_addr.postal = "97000" mock_geocoder.google.return_value = geo_addr address = { - 'address_line_1': '1234 Main', - 'address_line_2': None, - 'city': 'Boring', - 'state': 'OR', - 'postal_code': '97000' + "address_line_1": "1234 Main", + "address_line_2": None, + "city": "Boring", + "state": "OR", + "postal_code": "97000", } addr_str_return_value = "1234 Main Boring OR 97000" get_geocoder_normalized_addr(address) @@ -725,7 +768,7 @@ def test_get_geocoder_normalized_addr(self, mock_geocoder): def test_get_ordinal_indicator(self): """Test get_ordinal_indicator""" result = get_ordinal_indicator(11) - expected = 'th' + expected = "th" self.assertEqual(expected, result) result = get_ordinal_indicator(112) @@ -735,15 +778,15 @@ def test_get_ordinal_indicator(self): self.assertEqual(expected, result) result = get_ordinal_indicator(1) - expected = 'st' + expected = "st" self.assertEqual(expected, result) result = get_ordinal_indicator(22) - expected = 'nd' + expected = "nd" self.assertEqual(expected, result) result = get_ordinal_indicator(31243) - expected = 'rd' + expected = "rd" self.assertEqual(expected, result) def test_clean_period_char(self): @@ -755,16 +798,16 @@ def test_clean_period_char(self): def test_validate_parens_group_parsed(self): """Test validate_parens_groups_parsed""" - broken_line1 = '6000 SW 1000TH AVE' + broken_line1 = "6000 SW 1000TH AVE" result = validate_parens_groups_parsed(broken_line1) self.assertEqual(broken_line1, result) - bad_addr = '10000 NE 8TH (ROW HOUSE)' + bad_addr = "10000 NE 8TH (ROW HOUSE)" with self.assertRaises(AmbiguousAddressError): validate_parens_groups_parsed(bad_addr) def test_clean_ambiguous_street_types(self): - """ Test clean_ambiguous_street_types""" + """Test clean_ambiguous_street_types""" problem_addr = "1234 BROKEN CT" expected = "1234 BROKEN COURT" result = clean_ambiguous_street_types(problem_addr) @@ -775,78 +818,99 @@ def test_clean_ambiguous_street_types(self): self.assertEqual(normal_addr, result) def test_address_normalization_error(self): - error_msg = 'Error Message' - error_title = 'ERROR TITLE' - addtl_args = 'Addition info' + error_msg = "Error Message" + error_title = "ERROR TITLE" + addtl_args = "Addition info" expected = "{}: {}, {}".format(error_title, error_msg, addtl_args) error = AddressNormalizationError(error_msg, error_title, addtl_args) self.assertEqual(expected, str(error)) - @mock.patch.object(address_constants.NormalizationConfig, 'get') + @mock.patch.object(address_constants.NormalizationConfig, "get") def test_set_constants(self, mock_config_get): - new_addr_keys = ['new keys'] - new_problem_st = { - "PS": 'STREET' - } + new_addr_keys = ["new keys"] + new_problem_st = {"PS": "STREET"} mock_config_get.side_effect = ( - 'update', new_addr_keys, - None, None, None, None, None, - new_problem_st, None, None, + "update", + new_addr_keys, + None, + None, + None, + None, + None, + new_problem_st, + None, + None, ) address_constants.set_address_constants() self.assertEqual(address_constants.ADDRESS_KEYS, new_addr_keys) self.assertIn("PS", address_constants.PROBLEM_ST_TYPE_ABBRVS.keys()) mock_config_get.side_effect = ( - 'replace', new_addr_keys, - None, None, None, None, None, - new_problem_st, None, None, + "replace", + new_addr_keys, + None, + None, + None, + None, + None, + new_problem_st, + None, + None, ) address_constants.set_address_constants() self.assertEqual(address_constants.ADDRESS_KEYS, new_addr_keys) - self.assertDictEqual( - new_problem_st, address_constants.PROBLEM_ST_TYPE_ABBRVS - ) + self.assertDictEqual(new_problem_st, address_constants.PROBLEM_ST_TYPE_ABBRVS) mock_config_get.side_effect = ( - 'invalid', new_addr_keys, - None, None, None, None, None, - new_problem_st, None, None, - ) - self.assertRaises( - ConfigError, address_constants.set_address_constants - ) + "invalid", + new_addr_keys, + None, + None, + None, + None, + None, + new_problem_st, + None, + None, + ) + self.assertRaises(ConfigError, address_constants.set_address_constants) def test_handle_abnormal_occupancy(self): - addr_str = '123 SW MAIN UN' - expected = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW'), - ('StreetName', 'MAIN'), - ('StreetNamePostType', 'UN'), - ]) + addr_str = "123 SW MAIN UN" + expected = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetNamePreDirectional", "SW"), + ("StreetName", "MAIN"), + ("StreetNamePostType", "UN"), + ] + ) result = parse_address_string(addr_str) self.assertEqual(expected, result) - addr_str = '123 SW MAIN UN A' - expected = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW'), - ('StreetName', 'MAIN'), - ('OccupancyType', 'UN'), - ('OccupancyIdentifier', 'A') - ]) + addr_str = "123 SW MAIN UN A" + expected = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetNamePreDirectional", "SW"), + ("StreetName", "MAIN"), + ("OccupancyType", "UN"), + ("OccupancyIdentifier", "A"), + ] + ) result = parse_address_string(addr_str) self.assertEqual(expected, result) - addr_str = '123 SW MAIN UN, UN A' - expected = OrderedDict([ - ('AddressNumber', '123'), - ('StreetNamePreDirectional', 'SW'), - ('StreetName', 'MAIN'), - ('StreetNamePostType', 'UN'), - ('OccupancyType', 'UN'), - ('OccupancyIdentifier', 'A') - ]) + addr_str = "123 SW MAIN UN, UN A" + expected = OrderedDict( + [ + ("AddressNumber", "123"), + ("StreetNamePreDirectional", "SW"), + ("StreetName", "MAIN"), + ("StreetNamePostType", "UN"), + ("OccupancyType", "UN"), + ("OccupancyIdentifier", "A"), + ] + ) result = parse_address_string(addr_str) self.assertEqual(expected, result) diff --git a/scourgify/tests/test_cleaning.py b/scourgify/tests/test_cleaning.py index ace0681..80049d6 100644 --- a/scourgify/tests/test_cleaning.py +++ b/scourgify/tests/test_cleaning.py @@ -13,30 +13,29 @@ class CleaningTests(TestCase): - def test_strip_occupancy_type(self): - expected = '33' + expected = "33" - line2 = 'Unit 33' + line2 = "Unit 33" result = strip_occupancy_type(line2) self.assertEqual(result, expected) - line2 = 'Apartment 33' + line2 = "Apartment 33" result = strip_occupancy_type(line2) self.assertEqual(result, expected) - line2 = 'Unit #33' + line2 = "Unit #33" result = strip_occupancy_type(line2) self.assertEqual(result, expected) - line2 = 'Building 3 Unit 33' + line2 = "Building 3 Unit 33" result = strip_occupancy_type(line2) self.assertEqual(result, expected) - line2 = 'Building 3 UN 33' + line2 = "Building 3 UN 33" result = strip_occupancy_type(line2) self.assertEqual(result, expected) - line2 = '33' + line2 = "33" result = strip_occupancy_type(line2) self.assertEqual(result, expected) diff --git a/scourgify/validations.py b/scourgify/validations.py index e742ab6..0eb323d 100644 --- a/scourgify/validations.py +++ b/scourgify/validations.py @@ -62,20 +62,21 @@ def validate_address_components(address_dict, strict=True): :rtype: Mapping """ locality = ( - address_dict.get('postal_code') and - address_dict.get('city') and address_dict.get('state') - if strict else - address_dict.get('postal_code') or - (address_dict.get('city') and address_dict.get('state')) + address_dict.get("postal_code") + and address_dict.get("city") + and address_dict.get("state") + if strict + else address_dict.get("postal_code") + or (address_dict.get("city") and address_dict.get("state")) ) - if not address_dict.get('address_line_1'): - msg = 'Address records must include Line 1 data.' + if not address_dict.get("address_line_1"): + msg = "Address records must include Line 1 data." raise IncompleteAddressError(msg) elif not locality: msg = ( - 'Address records must contain a city, state, and postal_code.' - if strict else - 'Address records must contain a city and state, or a postal_code' + "Address records must contain a city, state, and postal_code." + if strict + else "Address records must contain a city and state, or a postal_code" ) raise IncompleteAddressError(msg) return address_dict @@ -93,28 +94,26 @@ def validate_us_postal_code_format(postal_code, address): :rtype: str """ error = None - msg = ( - 'US Postal Codes must conform to five-digit Zip or Zip+4 standards.' - ) + msg = "US Postal Codes must conform to five-digit Zip or Zip+4 standards." postal_code = post_clean_addr_str(postal_code) - plus_four_code = postal_code.split('-') + plus_four_code = postal_code.split("-") for code in plus_four_code: try: int(code) except ValueError: error = True if not error: - if '-' in postal_code: - if len(postal_code.replace('-', '')) > 9: + if "-" in postal_code: + if len(postal_code.replace("-", "")) > 9: error = True elif len(plus_four_code) != 2: error = True else: - postal_code = '-'.join([ - plus_four_code[0].zfill(5), plus_four_code[1].zfill(4) - ]) + postal_code = "-".join( + [plus_four_code[0].zfill(5), plus_four_code[1].zfill(4)] + ) elif len(postal_code) == 9: - postal_code = '-'.join([postal_code[:5], postal_code[5:]]) + postal_code = "-".join([postal_code[:5], postal_code[5:]]) elif len(postal_code) > 5: error = True else: @@ -140,7 +139,7 @@ def validate_parens_groups_parsed(line1): :return: line1 address string :rtype: str """ - parenthesis_groups = _get_substrings_with_regex(line1, r'\((.+?)\)') + parenthesis_groups = _get_substrings_with_regex(line1, r"\((.+?)\)") if parenthesis_groups: raise AmbiguousAddressError(None, None, line1) else: diff --git a/tox.ini b/tox.ini index d7c9236..5825f6e 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ envlist = py35,py36,py37,py38 setenv = ADDRESS_CONFIG_DIR = {toxinidir}/scourgify/tests/config deps= - -rrequirements/dev.txt + -r requirements/dev.txt pytest pytest-cov pytest-xdist @@ -13,7 +13,10 @@ deps= commands = pytest --cov=. --cov-report= --cov-append -s + black scourgify flake8 scourgify [flake8] +# E501: line too long (Varying opinions on this but can conflict with some Black auto-formatting) +extend-ignore = E501 exclude=__init__.py \ No newline at end of file