codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
<?php function parse_address($address){ $dir=array( 'N'=>'N','S'=>'S','E'=>'E','W'=>'W','NW'=>'NW','SW'=>'SW','NE'=>'NE','SE'=>'SE', 'North'=>'N','South'=>'S','East'=>'E','West'=>'W','Northwest'=>'NW','Southwest'=>'SW','Northeast'=>'NE','Southeast'=>'SE' ); $type=array( 'ave'=>'Ave','blvd'=>'Blvd','st'=>'St','wy'=>'Wy','cir'=>'Cir','dr'=>'Dr','ln'=>'Ln','Pl'=>'Pl','Rd'=>'Rd', 'Bvd'=>'Blvd', 'Avenue'=>'Ave','Boulevard'=>'Blvd','Street'=>'St','Way'=>'Wy','Circle'=>'Cir','Drive'=>'Dr','Lane'=>'Ln','Place'=>'Pl','Road'=>'Rd' ); $address=trim($address); $address=str_replace('.', '', $address); $address=str_replace(',', '', $address); $b['raw_address']=$address; $original=$address; //remove any unit or apt # from the end //a number alone at the end is not enough, we need at least # or one of the descriptors in () if(preg_match('/(\s+(Apt|Apartment|Suite|Ste|Unit|Bldg|Building|Room|Rm|#)\s*)+#?[-a-z0-9]+$/i', $address,$a)){ $b['raw_unit']=$a[0]; $b['unit']=preg_replace('/(\s+(Apt|Apartment|Suite|Ste|Unit|Bldg|Building|Room|Rm|#)\s*)+#?/i','',$a[0]); //break raw unit down $address=substr($address,0,strlen($address)-strlen($a[0])); } //parse suffix direction (SW) if(preg_match('/\s+(North|South|East|West|Northeast|Southeast|Southwest|Northwest|N|S|E|W|NE|SE|SW|NW)$/i', $address,$a)){ $b['raw_suffix_direction']=$a[0]; $b['suffix_direction']=$dir[$b['raw_suffix_direction']]; $address=substr($address,0,strlen($address)-strlen($a[0])); } //remove type of street if(preg_match('/\s+(St|Bvd|Ave|Wy|Cir|Dr|Ln|Pl|Boulevard|Blvd|Street|Avenue|Way|Circle|Drive|Lane|Place)$/i', $address,$a)){ $b['raw_type']=$a[0]; strlen($b['raw_type'])>3 || strtolower($b['raw_type'])=='way' || strtolower($b['raw_type'])=='bvd'?$typeDefinite=false:$typeDefinite=true; $b['type']=isset($type[strtolower($b['raw_type'])]) ? $type[strtolower($b['raw_type'])] : $b['raw_type']; $address=substr($address,0,strlen($address)-strlen($a[0])); } //remove number and fraction if(preg_match('/^[0-9]+(\s+[0-9]+\/[0-9]+)*/',$address,$a)){ $address=substr($address,strlen($a[0]),strlen($address)-strlen($a[0])); if(preg_match('/\s+[0-9]+\/[0-9]+$/',$a[0],$aa)){ $b['fraction']=$aa[0]; $a[0]=substr($a[0],0,strlen($a[0])-strlen($aa[0])); } $b['number']=trim($a[0]); $numberFormat='standard'; }else{ $numberFormat='irregular'; //account for possible P.O. Boxes and Rural Routes if(preg_match('/^(POB\s+|P\s*O\s*Box|Post Office Box|Postal Box|Box|Boite Postal)\s*[0-9a-z]+(-[0-9a-z]+)*/i',$address,$a)){ $b['raw_po_box']=$a[0]; preg_match('/[0-9a-z]+(-[0-9a-z]+)*$/i',$a[0],$aa); $b['po_box']=strtoupper($aa[0]); $b['address_type']="Post Office Box"; } if(preg_match('/(Rrte|RR|Rural Route|Rt|Rte|Route)\s+[0-9]+\s+(Box|Bx)\s+[0-9]+/i',$address,$a)){ $b['raw_route']=$a[0]; $a=explode('b',strtolower($a[0])); $b['route_number']=preg_replace('/[^0-9]+/','',$a[0]); $b['route_box_number']=preg_replace('/[^0-9]+/','',$a[1]); $b['address_type']="Rural Route"; } //Account for HC nomenclature -- for drawmack if(preg_match('/(HC|Highway County|Hwy Cty|Hwy County)\s+[0-9]+\s+(Box|Bx)\s+[0-9]+/i',$address,$a)){ $b['raw_hc']=$a[0]; $a=explode('b',strtolower($a[0])); $b['hc_number']=preg_replace('/[^0-9]+/','',$a[0]); $b['hc_box_number']=preg_replace('/[^0-9]+/','',$a[1]); $b['address_type']="Highway County Route"; } //Account for * | Star Route if(preg_match('/(\*\s+Rte|\*\s+Route|Star\s+Route|Star\s+Rte)\s+[0-9]+\s+(Box|Bx)\s+[0-9]+/i',$address,$a)){ $b['raw_starrt']=$a[0]; $a=explode('b',strtolower($a[0])); $b['starrt_number']=preg_replace('/[^0-9]+/','',$a[0]); $b['starrt_box_number']=preg_replace('/[^0-9]+/','',$a[1]); $b['address_type']="Star Route"; } /*** Note on the above 4 nodes: we don't check that an address only partially conforms, such as Rte 1 (no box number), and perhaps we should. Perhaps "Route 1" is even OK in some areas :-| ***/ } //what remains is the prefix direction, and street, several analyses to make here //note that if there is still an address left over yet we pulled a PO Box above or a Rural Route, then either something is wrong or our code missed something, this should be flagged. $address=trim($address); if(preg_match('/^(North|South|East|West|Northeast|Southeast|Southwest|Northwest|N|S|E|W|NE|SE|SW|NW)\s+/i',$address,$a)){ $b['prefix_direction']=$dir[trim($a[0])]; strlen($a[0])>2?$b['raw_prefix_direction']=$a[0]:''; $address = substr($address,strlen($a[0]),strlen($address)-strlen($a[0])); }else{ //presume all else is the name $b['name']=trim($address); $b['address_type']="Presumed Standard"; } //present the array visibly in a logical order -- not required for operation but nice $order=array( 'type_definite', 'address_type', 'raw_po_box', 'po_box', 'raw_route', 'route_number', 'route_box_number', 'raw_hc', 'hc_number', 'hc_box_number', 'raw_starrt', 'starrt_number', 'starrt_box_number', 'number', 'fraction', 'prefix_direction', 'raw_prefix_direction', 'name', 'type', 'raw_type', 'suffix_direction', 'raw_suffix_direction', 'unit', 'raw_unit', 'raw_address' ); foreach($order as $v){ isset($b[$v])?$c[$v]=$b[$v]:''; } return $c; } ?>
Private
[
?
]
Run code
Submit