[ create a new paste ] login | about

Link: http://codepad.org/pkTdUDL6    [ raw code | fork ]

PHP, pasted on Sep 20:
<?php
function parse_address($address){

	$dir=array(
		'N'=>'N','S'=>'S','E'=>'E','W'=>'W','NW'=>'NW','SW'=>'SW','NE'=>'NE','SE'=>'SE',
		'North'=>'N','South'=>'S','East'=>'E','West'=>'W','Northwest'=>'NW','Southwest'=>'SW','Northeast'=>'NE','Southeast'=>'SE'
	);
	$type=array(
		'ave'=>'Ave','blvd'=>'Blvd','st'=>'St','wy'=>'Wy','cir'=>'Cir','dr'=>'Dr','ln'=>'Ln','Pl'=>'Pl','Rd'=>'Rd',
		'Bvd'=>'Blvd',
		'Avenue'=>'Ave','Boulevard'=>'Blvd','Street'=>'St','Way'=>'Wy','Circle'=>'Cir','Drive'=>'Dr','Lane'=>'Ln','Place'=>'Pl','Road'=>'Rd'
	);
	
	$address=trim($address);
	$address=str_replace('.', '', $address);
	$address=str_replace(',', '', $address);
	$b['raw_address']=$address;
	$original=$address;
	//remove any unit or apt # from the end
	//a number alone at the end is not enough, we need at least # or one of the descriptors in ()
	if(preg_match('/(\s+(Apt|Apartment|Suite|Ste|Unit|Bldg|Building|Room|Rm|#)\s*)+#?[-a-z0-9]+$/i',
	$address,$a)){
		$b['raw_unit']=$a[0];
		$b['unit']=preg_replace('/(\s+(Apt|Apartment|Suite|Ste|Unit|Bldg|Building|Room|Rm|#)\s*)+#?/i','',$a[0]);
		//break raw unit down
		$address=substr($address,0,strlen($address)-strlen($a[0]));
	}
	//parse suffix direction (SW)
	if(preg_match('/\s+(North|South|East|West|Northeast|Southeast|Southwest|Northwest|N|S|E|W|NE|SE|SW|NW)$/i',
	$address,$a)){
		$b['raw_suffix_direction']=$a[0];
		$b['suffix_direction']=$dir[$b['raw_suffix_direction']];
		$address=substr($address,0,strlen($address)-strlen($a[0]));
	}
	//remove type of street
	if(preg_match('/\s+(St|Bvd|Ave|Wy|Cir|Dr|Ln|Pl|Boulevard|Blvd|Street|Avenue|Way|Circle|Drive|Lane|Place)$/i',
	$address,$a)){
		$b['raw_type']=$a[0];
		strlen($b['raw_type'])>3 || strtolower($b['raw_type'])=='way' || strtolower($b['raw_type'])=='bvd'?$typeDefinite=false:$typeDefinite=true;
		$b['type']=isset($type[strtolower($b['raw_type'])]) ? $type[strtolower($b['raw_type'])] : $b['raw_type'];
		$address=substr($address,0,strlen($address)-strlen($a[0]));
	}
	//remove number and fraction
	if(preg_match('/^[0-9]+(\s+[0-9]+\/[0-9]+)*/',$address,$a)){
		$address=substr($address,strlen($a[0]),strlen($address)-strlen($a[0]));
		if(preg_match('/\s+[0-9]+\/[0-9]+$/',$a[0],$aa)){
			$b['fraction']=$aa[0];
			$a[0]=substr($a[0],0,strlen($a[0])-strlen($aa[0]));
		}
		$b['number']=trim($a[0]);
		$numberFormat='standard';
	}else{
		$numberFormat='irregular';
		//account for possible P.O. Boxes and Rural Routes
		if(preg_match('/^(POB\s+|P\s*O\s*Box|Post Office Box|Postal Box|Box|Boite Postal)\s*[0-9a-z]+(-[0-9a-z]+)*/i',$address,$a)){
			$b['raw_po_box']=$a[0];
			preg_match('/[0-9a-z]+(-[0-9a-z]+)*$/i',$a[0],$aa);
			$b['po_box']=strtoupper($aa[0]);
			$b['address_type']="Post Office Box";
		}
		if(preg_match('/(Rrte|RR|Rural Route|Rt|Rte|Route)\s+[0-9]+\s+(Box|Bx)\s+[0-9]+/i',$address,$a)){
			$b['raw_route']=$a[0];
			$a=explode('b',strtolower($a[0]));
			$b['route_number']=preg_replace('/[^0-9]+/','',$a[0]);
			$b['route_box_number']=preg_replace('/[^0-9]+/','',$a[1]);
			$b['address_type']="Rural Route";
		}
		//Account for HC nomenclature -- for drawmack
		if(preg_match('/(HC|Highway County|Hwy Cty|Hwy County)\s+[0-9]+\s+(Box|Bx)\s+[0-9]+/i',$address,$a)){
			$b['raw_hc']=$a[0];
			$a=explode('b',strtolower($a[0]));
			$b['hc_number']=preg_replace('/[^0-9]+/','',$a[0]);
			$b['hc_box_number']=preg_replace('/[^0-9]+/','',$a[1]);
			$b['address_type']="Highway County Route";
		}
		//Account for * | Star Route
		if(preg_match('/(\*\s+Rte|\*\s+Route|Star\s+Route|Star\s+Rte)\s+[0-9]+\s+(Box|Bx)\s+[0-9]+/i',$address,$a)){
			$b['raw_starrt']=$a[0];
			$a=explode('b',strtolower($a[0]));
			$b['starrt_number']=preg_replace('/[^0-9]+/','',$a[0]);
			$b['starrt_box_number']=preg_replace('/[^0-9]+/','',$a[1]);
			$b['address_type']="Star Route";
		}
		/***
		Note on the above 4 nodes: we don't check that an address only partially conforms, such as Rte 1 (no box number), and perhaps we should.  Perhaps "Route 1" is even OK in some areas :-|
		***/
		
	}
	//what remains is the prefix direction, and street, several analyses to make here
	//note that if there is still an address left over yet we pulled a PO Box above or a Rural Route, then either something is wrong or our code missed something, this should be flagged.
	$address=trim($address);
	if(preg_match('/^(North|South|East|West|Northeast|Southeast|Southwest|Northwest|N|S|E|W|NE|SE|SW|NW)\s+/i',$address,$a)){
		$b['prefix_direction']=$dir[trim($a[0])];
		strlen($a[0])>2?$b['raw_prefix_direction']=$a[0]:'';
		$address = substr($address,strlen($a[0]),strlen($address)-strlen($a[0]));
	}else{
		//presume all else is the name
		$b['name']=trim($address);
		$b['address_type']="Presumed Standard";
	}
	
	//present the array visibly in a logical order -- not required for operation but nice
	$order=array(
		'type_definite',
		'address_type',
		'raw_po_box',
		'po_box',
		'raw_route',
		'route_number',
		'route_box_number',
		'raw_hc',
		'hc_number',
		'hc_box_number',
		'raw_starrt',
		'starrt_number',
		'starrt_box_number',
		'number',
		'fraction',
		'prefix_direction',
		'raw_prefix_direction',
		'name',
		'type',
		'raw_type',
		'suffix_direction',
		'raw_suffix_direction',
		'unit',
		'raw_unit',
		'raw_address'
	);
	foreach($order as $v){
		isset($b[$v])?$c[$v]=$b[$v]:'';
	}
	return $c;
}

?>


Output:
No errors or program output.


Create a new paste based on this one


Comments: