Sie sind auf Seite 1von 4

package tw.idv.kayjean.waggle.jgeocoder.tiger; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.

ArrayList; import java.util. Collections; import java.util.EnumMap; import java.util.HashMap; import java.uti l.HashSet; import java.util.List; import java.util.Map; import java.util.Set; i mport org.apache.commons.collections.CollectionUtils; import org.apache.commons. logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.com mons.lang.StringUtils; import com.sleepycat.je.DatabaseException; import com.sl eepycat.persist.EntityCursor; import com.sleepycat.persist.PrimaryIndex; import static tw.idv.kayjean.waggle.jgeocoder.AddressComponent.CITY; import static tw. idv.kayjean.waggle.jgeocoder.AddressComponent.COUNTY; import static tw.idv.kayje an.waggle.jgeocoder.AddressComponent.LAT; import static tw.idv.kayjean.waggle.jg eocoder.AddressComponent.LON; import static tw.idv.kayjean.waggle.jgeocoder.Addr essComponent.POSTDIR; import static tw.idv.kayjean.waggle.jgeocoder.AddressCompo nent.PREDIR; import static tw.idv.kayjean.waggle.jgeocoder.AddressComponent.STAT E; import static tw.idv.kayjean.waggle.jgeocoder.AddressComponent.TLID; import s tatic tw.idv.kayjean.waggle.jgeocoder.AddressComponent.TYPE; import static tw.id v.kayjean.waggle.jgeocoder.AddressComponent.ZIP; import tw.idv.kayjean.waggle.j geocoder.AddressComponent; import tw.idv.kayjean.waggle.jgeocoder.CommonUtils; i mport tw.idv.kayjean.waggle.jgeocoder.GeocodeAcuracy; import tw.idv.kayjean.wagg le.jgeocoder.JGeocodeAddress; import tw.idv.kayjean.waggle.jgeocoder.us.AddressP arser; import tw.idv.kayjean.waggle.jgeocoder.us.AddressStandardizer; public cl ass JGeocoder{ private static final Log LOGGER = LogFactory.getLog(JGe ocoder.class); private ZipCodesDb _zipDb; private ZipCodeDAO _zipDao; private TigerLineDao _tigerDao; public JGeocoder() { this(JGeocoderConfig.DEFAULT); } private Tige rLineHit getTigerLineHitByZip(Map<AddressComponent, String> normalizedAddr, Stri ng zip) throws TigerQueryFailedException, DatabaseException{ if(zi p == null || !_zipDao.fillInCSByZip(normalizedAddr, zip)){ ret urn null; } normalizedAddr.put(ZIP, zip); //??DB return _tigerDao.getTigerLineHit(normalizedAddr); } private List<ZipCode> getZips(String city, String st ate) throws DatabaseException{ if(city == null || state == null){ return Collections.emptyList(); } List<ZipCode> ret = new ArrayList<ZipCode>(); Location loc = new L ocation(); loc.setCity(city.replaceAll("\\s+", "")); loc.setState(state); EntityCursor<ZipCode> zips = null; try{ zips = _zipDao.getZipCodeByLocation().subIndex(loc).en tities(); for(ZipCode zip : zips){ ret.add(zip ); } }finally{ if(zips != null){ zips.close(); } } r eturn ret; } private TigerLineHit getT igerLineHit(Map<AddressComponent, String> normalizedAddr) throws DatabaseExcepti on{ Map<AddressComponent, String> myMap = new EnumMap<AddressCompone nt, String>(normalizedAddr); TigerLineHit hit = null; Se t<String> attemptedZips = new HashSet<String>(); try { //try the par sed zip hit = getTigerLineHitByZip(normalizedAddr, normalizedAddr. get(ZIP)); if(normalizedAddr.get(ZIP)!=null){ at temptedZips.add(normalizedAddr.get(ZIP)); } if(hit ! = null){ return hit; } if(myMap.ge t(CITY)==null || myMap.get(STATE) == null){ //use the zip's city, state if the i nput does not have one myMap.put(CITY, normalizedAddr.get(CITY )); myMap.put(STATE, normalizedAddr.get(STATE)); } List<TigerLineHit> zipHits = new ArrayList<TigerLineHit>(); for(ZipCode zipcode : getZips(myMap.get(CITY), myMap.g et(STATE))){ if(!attemptedZips.contains(zipcode.getZip())){ hit = getTigerLineHitByZip(myMap, zipcode.getZip()); if(hit != null){ zipHits.add(hit); } attemptedZips.add(zipcode.getZip()); } } if(CollectionUtils.isNotEmpty(z

ipHits)){ hit = TigerLineDao.findBest(myMap, zipHits); }else{ County county = _zipDao.getCounty(normalizedAddr .get(CITY), normalizedAddr.get(STATE)); if(county != null){ for(String s : county.getZips()){ if(!attemptedZips.contains(s)){ hit = getTigerLine HitByZip(myMap, s); } if(hit != null){ zipHits.add(hit); } attemptedZips.add(s); // } } if(CollectionUtils.isNotEmpty(zipHits)){ hit = TigerLineDao.findBest(myMap, zipHits); } } if(hit != null){ String zip = CommonUtils.nvl(hit.zipL, hit.zipR); _zipDao.fillInCSByZ ip(myMap, zip); normalizedAddr.putAll(myMap); return hit; } } catch (TigerQueryFail edException e) { LOGGER.warn("Tiger/Line DB query failed, street level geocoding will be skipped: "+e.getMessage()); if(LOGGER.i sDebugEnabled()){ LOGGER.debug("", e); } return null; } return null; } public JGeocodeAddress geocodeAddress(String addrLine){ JGeocodeAddress ret = new JGeocodeAddress(); Map<AddressCompon ent, String> m = AddressParser.parseAddress(addrLine); ret.setParse dAddr(m); if(m == null) return ret;//FIXME: throw exception instead m = AddressStandardizer.normalizeParsedAddress(m); ret.setNormalizedAddr(m); if(m.get(ZIP) == null && //if zip is missing (m.get(STATE) == null || m.get(CITY)==n ull)){ //city or state is missing return ret; } GeocodeAcuracy acuracy = GeocodeAcuracy.STREET; //???? m = new EnumMap<AddressComponent, String>(m); TigerLineHit hi t = null; try { hit = getTigerLineHit(m); } catch (DatabaseException e) { throw new RuntimeException("Unable to query tiger/line database "+e.getMessage()); } if(hi t != null){ acuracy = GeocodeAcuracy.STREET; Geo geo = Geocoder.geocodeFromHit(Integer.parseInt(hit.streetNum), hit); m.put(ZIP, String.valueOf(geo.zip)); m.put(PREDIR, hit.fedirp); m.put(POSTDIR, hit.fedirs); m.put(TYPE, hit.fetype); m.put(TLID, String.valueOf(hit.tlid)); m.put(LAT, Str ing.valueOf(geo.lat)); m.put(LON, String.valueOf(geo.lon)); ret.setGeocodedAddr(m); }else if(_zipDao.geocodeByZip(m)){ acuracy = GeocodeAcuracy.ZIP; ret.setGeocodedAddr(m); }else if(_zipDao.geocodeByCityState(m)){ acuracy = Ge ocodeAcuracy.CITY_STATE; ret.setGeocodedAddr(m); }else { return ret; } if(ret.getGeo codedAddr()!=null && ret.getGeocodedAddr().get(COUNTY) == null & & ret.getGeocodedAddr().get(ZIP) != null){ try { _zipDao.fillInCSByZip(ret.getGeocodedAddr(), ret.getGeocodedAddr() .get(ZIP)); } catch (DatabaseException e) { LOGGER .warn("Unable to query zip code", e); } } ret.setAcuracy(acuracy); return ret; } public JGeocoder(JGeocoderConfig config){ _zipDb = ne w ZipCodesDb(); _tigerDao = new TigerLineDao(config.getTigerDataSour ce()); try { String home = config.getJgeocoderDataHo me(); _zipDb.init(new File(config.getJgeocoderDataHome()), false, false); _zipDao = new ZipCodeDAO(_zipDb.getStore()); } catch (Exception e) { throw new RuntimeException("Unable to creat e zip db, make sure your system property 'jgeocoder.data.home' is correct" +e.getMessage()); } } public void createsql() throws TigerQueryFailedException, DatabaseEx ception{ _tigerDao.createsql(); } public void insertsql() throws TigerQueryFailedException, DatabaseException{ _tigerDao.insertsql(); } public void indexsql() t

hrows TigerQueryFailedException, DatabaseException{ _tigerDao. indexsql(); } public void createdb(){ PrimaryIndex<Location, County> idx = _zipDao.getCountyByLocation(); BufferedReader br = null; try { br = new Buff eredReader(new InputStreamReader( new FileInputStream("D:\\jgeocoder-0.4.1-jar-w ith-dependencies\\county.txt") )); String line = null; while((line=br.readLine())!= null){ String[] item s = line.split("\\s*[|]\\s*"); String state = items[0].t oUpperCase(); String county = items[1].replaceAll("\\s", "").toUpperCase(); Float lat = Float.valueOf(items[2]); Float lon = Float.valueOf(items[3]); String[] zips = items[4].split("\\s+"); Location l oc = new Location(); loc.setCity(county); loc.setState(state); County c = new County(); c.setLat(lat); c.setLon(lon); c.setLocation(loc); c.setZips(zips ); idx.put(c); } Location loc = new Location(); loc.setCity ("BUCKS"); loc.setState("PA"); System.ou t.println( idx.get(loc) ); } catch (Exception e) { throw new Error("Unable to initalize ZIPCODE", e); }finally{ if(br != null){ try { br.close(); } catch (IOException e) {} } } return; } public void creat edbzip_codes(){ PrimaryIndex<String, ZipCode> idx = _zipDao.ge tZipCodeByZip(); PrimaryIndex<String, CityWithSpaces> idx2 = _ zipDao.getCityWithSpaceByNoSpace(); BufferedReader br = null; try { br = new Buffered Reader(new InputStreamReader( new FileInputStream("D:\\jgeocoder-0.4.1-jar-withdependencies\\ZIP_CODES.txt") )); String line = null; while((line=br.readLine())!= null){ String[] items = line.split(","); if(items[3].contains( " " )){ CityWithSpaces c = new CityWithSpaces(); c.setNoSpace(StringUtils.upperCase(items[3].replaceAll("[\"\\ s]",""))); c.setWithSpace(StringUtils.upperCase( items[3].replaceAll("\"",""))); idx2.put(c); } for (int i = 0; i < items.length; i++){ items[i] = StringUtils.upperCase(items[i]).repl aceAll("[\"]", "").trim(); } Loc ation loc = new Location(); loc.setCity(items[3].replace All("[\"\\s]","")); loc.setState(items[4]); Float lat = StringUtils.isBlank(items[1]) ? -1f : Float.valueOf(item s[1].trim()); Float lon = StringUtils.isBlank(items[2]) ? -1f : Float.valueOf(items[2].trim()); ZipCode z = new ZipCode(); z.setZip(items[0]); z .setLocation(loc); z.setLat(lat); z.setLon(lon); z.setCounty(items[5]); z.setZipClass(items[6]); idx.put(z); } System.out.println( idx.get("19148") ); System.out.println( idx2.get("KINGOFPRUSSIA") ); } catch (Exception e) { throw new Error("Unable to initalize Z IPCODE", e); }finally{ if(br != null){ try { br.close(); } catch (IOException e) {} } } return; } public void createdbcity_ state(){ PrimaryIndex<Location, CityStateGeo> idx = _zipDao.ge tCityStateGeoByLocation(); BufferedReader br = nu ll; try { br = new BufferedReader(new InputStreamRea der( new FileInputStream("D:\\jgeocoder-0.4.1-jar-with-dependencies\\city_state. txt") )); String line = null; while((line=br.rea dLine())!= null){ String[] items = line.split("[|]");

for (int i = 0; i < items.length; i++){ items[i] = StringUtils.upperCase(items[i]).replaceAll("[\"\\s]", " ").trim(); } String[] citystate = items[0].split(","); Location loc = new Location(); loc.setCity(citystate[0]); loc.set State(citystate[1]); Float lat = StringUtils.isBlank(ite ms[1]) ? -1f : Float.valueOf(items[1].trim()); Float lon = StringUtils.isBlank(items[2]) ? -1f : Float.valueOf(items[2].trim()); CityStateGeo csg = new CityStateGeo(); csg.setLocation(loc); csg.setLat(lat); csg.setLon(lon); idx.put(csg); } Location loc = new Location(); loc.setCity( "KINGOFPRUSSIA"); loc.setState("PA"); System.out .println( idx.get(loc) ); } catch (Exception e) { throw new Error("Unable to initalize ZIPCODE", e); }finally{ if(br != null){ try { br.close(); } catch (IOException e) {} } } re turn; } public void cleanup(){ if(_zi pDb != null){ try { _zipDb.shutdown(); } catch (DatabaseException e) { throw new RuntimeException("U nable to shutdown zip db, "+e.getMessage()); } _zipD b = null; } } @Override pro tected void finalize() throws Throwable { super.finalize(); cleanup(); } }

Das könnte Ihnen auch gefallen