`

Captcha的对立面OCR

    博客分类:
  • Lab
阅读更多

需求分析(从系统分析看):  下载网页上的验证码图片,将其解析为可识别的文字

软件设计(从系统架构看):   Http get -> image -> ocr->word

资源实现(从项目管理看):  环境:  Ubuntu 7.10 

  • sudo apt-get install ocrad
  • sudo apt-get install gocr

   技术:

  • java
  • shell

 

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;

public class TestAtSoodinDotCom {
	
	
	public static String callCmd(String[] cmd) {
		String result = "";
		 String line = "";
		 try {
			 Process proc = Runtime.getRuntime().exec(cmd);
			 InputStreamReader is = new InputStreamReader(proc.getInputStream());   
			 BufferedReader br = new BufferedReader (is);   
			 while ((line = br.readLine ()) != null) {  
				 result += line;
			 }  
		 }
		catch(Exception e) {
			e.printStackTrace();
		}
		return result;
	}
	
	
	/*--------------------------------------------------
	  * Process a response from a server
	  *-------------------------------------------------*/
	  private static  boolean processServerResponse(HttpURLConnection http, InputStream iStrm) throws IOException
	  {
	    // 1) Get status Line
	    if (http.getResponseCode() == HttpURLConnection.HTTP_OK)
	    {
	      // 2) Get header information - none
	      // 3) Get body (data)
	      int length = (int) http.getContentLength();
	      if (length != -1)
	      {
	        byte servletData[] = new byte[length];
	        iStrm.read(servletData);
	      }
	      else  // Length not available...
	      {
	    	   OutputStream oStrm = null;
	    	  oStrm =  new BufferedOutputStream( new FileOutputStream("/tmp/verifycode.jpeg")); //图片是jpeg格式
	        int ch;
	        while ((ch = iStrm.read()) != -1)
	        	oStrm.write(ch);

	        oStrm.close();
	      }
	     return true;
	    }

	    return false;      
	  }
	  
	  
	  
	  
	  
	  
	  public static void main(String[] args) throws Exception{
		 
		    InputStream iStrm = null;    
		    String result = null;
		    boolean ret = false;
	            String address = "http://www.soodin.com/verifycode";
		    URL url = new URL(address);
			HttpURLConnection connection = (HttpURLConnection) url.openConnection();
			connection.setRequestMethod("GET");
			connection.setRequestProperty("Host"," www.soodin.com");
			connection.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2");
			connection.setRequestProperty("Cookie:","__utma=166789948.4122467951340428000.1244696088.1262916100.1263192353.114; __utmz=166789948.1256018048.99.5.utmcsr=bbs.soodin.com|utmccn=(referral)|utmcmd=referral|utmcct=/search.php; rtime=21; ltime=1263194805481; cnzz_eid=68012444-1253685384-; JSESSIONID=750D1D2078D37976A15EF35B5FF5899C; cnzz_a1688487=10; sin1688487=; __utmc=166789948");
			connection.setRequestProperty("Referer",  "http://www.soodin.com/user/login.do?method=login");
			connection.setDoOutput(true);
			
		    iStrm = connection.getInputStream();
			
			processServerResponse(connection, iStrm);
			
			String[] cmd = {
					"/bin/sh",
					"-c",
					"djpeg -grey -pnm /tmp/verifycode.jpeg |  ocrad -x /tmp/b.txt",  //图片是jpeg格式
					};

			String verifycode = callCmd(cmd);
			verifycode = verifycode.replaceAll("[\\  ]", "");
			System.out.println(verifycode);

			
	  }

 测试结果:  经过噪音干扰后的识别率低于10%.

0
0
分享到:
评论
3 楼 beneo 2010-07-01  
阿诺,知道有啥支持中文的么?
2 楼 Ihavegotyou 2010-07-01  
引用
能支持中文么?我猜测应该不行的把

不支持中文。
1 楼 beneo 2010-07-01  
能支持中文么?我猜测应该不行的把

相关推荐

Global site tag (gtag.js) - Google Analytics