小编们猎取的Heritrix,Mysql数据库备份以及还原

关于Heritrix U途锐L乱码难点, 能够透过修改源码化解.

java完毕mysql数据库的备份及回复

java 常见三种发送http央求案例

org.archive.crawler.writer.MirrorWriterProcessor.joinParts()

备份1:

import java.io.BufferedReader;

import java.io.File; 

import java.io.FileOutputStream; 

import java.io.IOException; 

import java.io.InputStreamReader; 

import java.io.OutputStreamWriter; 

import java.io.PrintWriter; 

/**

* MySQL数据库备份

* @author GaoHuanjie

*/ 

public class MySQLDatabaseBackup { 

    /**

    * Java代码完成MySQL数据库导出

    * 

    * @author GaoHuanjie

    * @param hostIP MySQL数据库所在服务器地址IP

    * @param userName 步向数据库所急需的用户名

    * @param password 进入数据库所急需的密码

    * @param savePath 数据库导出文件保留路线

    * @param fileName 数据库导出文件文件名

    * @param databaseName 要导出的数量库名

    * @return 重返true表示导出成功,否则再次来到false。

    */ 

    public static boolean exportDatabaseTool(String hostIP, String
userName, String password, String savePath, String fileName, String
databaseName) throws InterruptedException { 

        File saveFile = new File(savePath); 

        if (!saveFile.exists()) {// 假使目录官样文章 

            saveFile.mkdirs();// 成立文件夹 

        } 

        if(!savePath.endsWith(File.separator)){ 

            savePath = savePath + File.separator; 

        } 

        PrintWriter printWriter = null; 

        BufferedReader bufferedReader = null; 

        try { 

            printWriter = new PrintWriter(new OutputStreamWriter(new
FileOutputStream(savePath + fileName), “utf8”)); 

            Process process = Runtime.getRuntime().exec(” mysqldump -h”

  • hostIP + ” -u” + userName + ” -p” + password + ” –set-charset=UTF8 “
  • databaseName); 

            InputStreamReader inputStreamReader = new
InputStreamReader(process.getInputStream(), “utf8”); 

            bufferedReader = new BufferedReader(inputStreamReader); 

            String line; 

            while((line = bufferedReader.readLine())!= null){ 

                printWriter.println(line); 

            } 

            printWriter.flush(); 

            if(process.waitFor() == 0){//0 表示线程符合规律终止。 

                return true; 

            } 

        }catch (IOException e) { 

            e.printStackTrace(); 

        } finally { 

            try { 

                if (bufferedReader != null) { 

                    bufferedReader.close(); 

                } 

                if (printWriter != null) { 

                    printWriter.close(); 

                } 

            } catch (IOException e) { 

                e.printStackTrace(); 

            } 

        } 

        return false; 

    } 

    public static void main(String[] args){ 

        try { 

            if (exportDatabaseTool(“172.16.0.127”, “root”, “123456”,
“D:/backupDatabase”, “2014-10-14.sql”, “test”)) { 

                System.out.println(“数据库成功备份!!!”); 

            } else { 

                System.out.println(“数据库备份失利!!!”); 

            } 

        } catch (InterruptedException e) { 

            e.printStackTrace(); 

        } 

    } 

一向换来CloseableHttpClient还特别,供给如此使用CloseableHttpClient httpClient = HttpClientBuilder.create().build();import org.apache.http.impl.client.DefaultHttpClient;

StringBuffer sb = new StringBuffer(length());
            String ss = null;
            sb.append(mainPart.asStringBuffer());
            if (null != uniquePart) {
                sb.append(uniquePart);
            }           
            if (suffixAtEnd) {
                if (null != query) {
                sb.append(“@”);
                    sb.append(query);
                }
                if (null != suffix) {
                    sb.append(‘.’);
                    sb.append(suffix);
                }
            } else {
                if (null != suffix) {
                    sb.append(‘.’);
                    sb.append(suffix);
                }
                if (null != query) {               
                    sb.append(query);
                }
            }            
            try {
     ss = new String(sb.toString().getBytes(“ISO-8859-1″),”GB2312”);
    } catch (UnsupportedEncodingException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
    }
    return ss;

备份2:

public static void backup() {

        try {

            Runtime rt = Runtime.getRuntime();

            // 调用 调用mysql的安装目录的吩咐

            Process child = rt

                    .exec(“e:\\MySQL\\bin\\mysqldump -h localhost
-uroot -p123 db_name”);

            // 设置导出编码为utf-8。这里不可不是utf-8

            //
把经过试行中的调控台出口音讯写入.sql文件,即生成了备份文件。注:假使不对调节台消息举办读出,则会产生进程堵塞不恐怕运行

            InputStream in = child.getInputStream();//
调控台的出口音信作为输入流

            InputStreamReader xx = new InputStreamReader(in, “utf-8”);

            //
设置输出流编码为utf-8。这里不可不是utf-8,不然从流中读入的是乱码

            String inStr;

            StringBuffer sb = new StringBuffer(“”);

            String outStr;

            // 组合调控台出口新闻字符串

            BufferedReader br = new BufferedReader(xx);

            while ((inStr = br.readLine()) != null) {

                sb.append(inStr + “\r\n”);

            }

            outStr = sb.toString();

            // 要用来做导入用的sql指标文件:

            FileOutputStream fout = new
FileOutputStream(“c:\\test.sql”);

            OutputStreamWriter writer = new OutputStreamWriter(fout,
“utf-8”);

            writer.write(outStr);

            writer.flush();

            in.close();

            xx.close();

            br.close();

            writer.close();

            fout.close();

            System.out.println(“”);

        } catch (Exception e) {

            e.printStackTrace();

        }

    }

早就不再推荐应用有亟待同的同桌把那个类替换到import org.apache.http.impl.client.CloseableHttpClient;使用就能够了

原文:

  1. import java.io.FileOutputStream;  
  2. import java.io.IOException;  
  3. import java.io.InputStream;  
  4. import java.io.InputStreamReader;  
  5. import java.io.OutputStreamWriter;  
  6. import java.io.UnsupportedEncodingException;  
  7. import java.net.HttpURLConnection;  
  8. import java.net.Socket;  
  9. 网赌十大信誉的平台,import java.net.URL;  
  10. import java.net.URLConnection;  
  11. import java.net.URLEncoder;  
  12. import java.util.ArrayList;  
  13. import java.util.HashMap;  
  14. import java.util.Iterator;  
  15. import java.util.List;  
  16. import java.util.Map;  
  17. import java.util.Map.Entry;  
  18.   
  19. import org.apache.http.HttpResponse;  
  20. import org.apache.http.NameValuePair;  
  21. import org.apache.http.client.HttpClient;  
  22. import org.apache.http.client.entity.UrlEncodedFormEntity;  
  23. import org.apache.http.client.methods.HttpGet;  
  24. import org.apache.http.client.methods.HttpPost;  
  25. import org.apache.http.impl.client.DefaultHttpClient;  
  26. import org.apache.http.message.BasicNameValuePair;  
  27.   
  28. /** 
  29.  * @Description:发送http乞求扶助类 
  30.  * @author:liuyc 
  31.  * @time:2016年5月17日 下午3:25:32 
  32.  */  
  33. public class HttpClientHelper {  
  34.     /** 
  35.      * @Description:使用HttpURLConnection发送post请求 
  36.      * @author:liuyc 
  37.      * @time:2016年5月17日 下午3:26:07 
  38.      */  
  39.     public static String sendPost(String urlParam, Map<String, Object> params, String charset) {  
  40.         StringBuffer resultBuffer = null;  
  41.         // 营造央浼参数  
  42.         StringBuffer sbParams = new StringBuffer();  
  43.         if (params != null && params.size() > 0) {  
  44.             for (Entry<String, Object> e : params.entrySet()) {  
  45.                 sbParams.append(e.getKey());  
  46.                 sbParams.append(“=”);  
  47.                 sbParams.append(e.getValue());  
  48.                 sbParams.append(“&”);  
  49.             }  
  50.         }  
  51.         HttpURLConnection con = null;  
  52.         OutputStreamWriter osw = null;  
  53.         BufferedReader br = null;  
  54.         // 发送央浼  
  55.         try {  
  56.             URL url = new URL(urlParam);  
  57.             con = (HttpURLConnection) url.openConnection();  
  58.             con.setRequestMethod(“POST”);  
  59.             con.setDoOutput(true);  
  60.             con.setDoInput(true);  
  61.             con.setUseCaches(false);  
  62.             con.setRequestProperty(“Content-Type”, “application/x-www-form-urlencoded”);  
  63.             if (sbParams != null && sbParams.length() > 0) {  
  64.                 osw = new OutputStreamWriter(con.getOutputStream(), charset);  
  65.                 osw.write(sbParams.substring(0, sbParams.length() – 1));  
  66.                 osw.flush();  
  67.             }  
  68.             // 读取再次来到内容  
  69.             resultBuffer = new StringBuffer();  
  70.             int contentLength = Integer.parseInt(con.getHeaderField(“Content-Length”));  
  71.             if (contentLength > 0) {  
  72.                 br = new BufferedReader(new InputStreamReader(con.getInputStream(), charset));  
  73.                 String temp;  
  74.                 while ((temp = br.readLine()) != null) {  
  75.                     resultBuffer.append(temp);  
  76.                 }  
  77.             }  
  78.         } catch (Exception e) {  
  79.             throw new RuntimeException(e);  
  80.         } finally {  
  81.             if (osw != null) {  
  82.                 try {  
  83.                     osw.close();  
  84.                 } catch (IOException e) {  
  85.                     osw = null;  
  86.                     throw new RuntimeException(e);  
  87.                 } finally {  
  88.                     if (con != null) {  
  89.                         con.disconnect();  
  90.                         con = null;  
  91.                     }  
  92.                 }  
  93.             }  
  94.             if (br != null) {  
  95.                 try {  
  96.                     br.close();  
  97.                 } catch (IOException e) {  
  98.                     br = null;  
  99.                     throw new RuntimeException(e);  
  100.                 } finally {  
  101.                     if (con != null) {  
  102.                         con.disconnect();  
  103.                         con = null;  
  104.                     }  
  105.                 }  
  106.             }  
  107.         }  
  108.   
  109.         return resultBuffer.toString();  
  110.     }  
  111.   
  112.     /** 
  113.      * @Description:使用URLConnection发送post 
  114.      * @author:liuyc 
  115.      * @time:2016年5月17日 下午3:26:52 
  116.      */  
  117.     public static String sendPost2(String urlParam, Map<String, Object> params, String charset) {  
  118.         StringBuffer resultBuffer = null;  
  119.         // 营造央求参数  
  120.         StringBuffer sbParams = new StringBuffer();  
  121.         if (params != null && params.size() > 0) {  
  122.             for (Entry<String, Object> e : params.entrySet()) {  
  123.                 sbParams.append(e.getKey());  
  124.                 sbParams.append(“=”);  
  125.                 sbParams.append(e.getValue());  
  126.                 sbParams.append(“&”);  
  127.             }  
  128.         }  
  129.         URLConnection con = null;  
  130.         OutputStreamWriter osw = null;  
  131.         BufferedReader br = null;  
  132.         try {  
  133.             URL realUrl = new URL(urlParam);  
  134.             // 张开和ULacrosseL之间的连年  
  135.             con = realUrl.openConnection();  
  136.             // 设置通用的央求属性  
  137.             con.setRequestProperty(“accept”, “*/*”);  
  138.             con.setRequestProperty(“connection”, “Keep-Alive”);  
  139.             con.setRequestProperty(“Content-Type”, “application/x-www-form-urlencoded”);  
  140.             con.setRequestProperty(“user-agent”, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)”);  
  141.             // 发送POST央浼必须安装如下两行  
  142.             con.setDoOutput(true);  
  143.             con.setDoInput(true);  
  144.             // 获取U奇骏LConnection对象对应的出口流  
  145.             osw = new OutputStreamWriter(con.getOutputStream(), charset);  
  146.             if (sbParams != null && sbParams.length() > 0) {  
  147.                 // 发送央求参数  
  148.                 osw.write(sbParams.substring(0, sbParams.length() – 1));  
  149.                 // flush输出流的缓冲  
  150.                 osw.flush();  
  151.             }  
  152.             // 定义BufferedReader输入流来读取U凯雷德L的响应  
  153.             resultBuffer = new StringBuffer();  
  154.             int contentLength = Integer.parseInt(con.getHeaderField(“Content-Length”));  
  155.             if (contentLength > 0) {  
  156.                 br = new BufferedReader(new InputStreamReader(con.getInputStream(), charset));  
  157.                 String temp;  
  158.                 while ((temp = br.readLine()) != null) {  
  159.                     resultBuffer.append(temp);  
  160.                 }  
  161.             }  
  162.         } catch (Exception e) {  
  163.             throw new RuntimeException(e);  
  164.         } finally {  
  165.             if (osw != null) {  
  166.                 try {  
  167.                     osw.close();  
  168.                 } catch (IOException e) {  
  169.                     osw = null;  
  170.                     throw new RuntimeException(e);  
  171.                 }  
  172.             }  
  173.             if (br != null) {  
  174.                 try {  
  175.                     br.close();  
  176.                 } catch (IOException e) {  
  177.                     br = null;  
  178.                     throw new RuntimeException(e);  
  179.                 }  
  180.             }  
  181.         }  
  182.         return resultBuffer.toString();  
  183.     }  
  184.   
  185.     /**  
  186.      * @Description:发送get请求保存下载文件  
  187.      * @author:liuyc  
  188.      * @time:2016年5月17日 下午3:27:29  
  189.      */  
  190.     public static void sendGetAndSaveFile(String urlParam, Map<String, Object> params, String fileSavePath) {  
  191.         // 营造恳求参数  
  192.         StringBuffer sbParams = new StringBuffer();  
  193.         if (params != null && params.size() > 0) {  
  194.             for (Entry<String, Object> entry : params.entrySet()) {  
  195.                 sbParams.append(entry.getKey());  
  196.                 sbParams.append(“=”);  
  197.                 sbParams.append(entry.getValue());  
  198.                 sbParams.append(“&”);  
  199.             }  
  200.         }  
  201.         HttpURLConnection con = null;  
  202.         BufferedReader br = null;  
  203.         FileOutputStream os = null;  
  204.         try {  
  205.             URL url = null;  
  206.             if (sbParams != null && sbParams.length() > 0) {  
  207.                 url = new URL(urlParam + “?” + sbParams.substring(0, sbParams.length() – 1));  
  208.             } else {  
  209.                 url = new URL(urlParam);  
  210.             }  
  211.             con = (HttpURLConnection) url.openConnection();  
  212.             con.setRequestProperty(“Content-Type”, “application/x-www-form-urlencoded”);  
  213.             con.connect();  
  214.             InputStream is = con.getInputStream();  
  215.             os = new FileOutputStream(fileSavePath);  
  216.             byte buf[] = new byte[1024];  
  217.             int count = 0;  
  218.             while ((count = is.read(buf)) != -1) {  
  219.                 os.write(buf, 0, count);  
  220.             }  
  221.             os.flush();  
  222.         } catch (Exception e) {  
  223.             throw new RuntimeException(e);  
  224.         } finally {  
  225.             if (os != null) {  
  226.                 try {  
  227.                     os.close();  
  228.                 } catch (IOException e) {  
  229.                     os = null;  
  230.                     throw new RuntimeException(e);  
  231.                 } finally {  
  232.                     if (con != null) {  
  233.                         con.disconnect();  
  234.                         con = null;  
  235.                     }  
  236.                 }  
  237.             }  
  238.             if (br != null) {  
  239.                 try {  
  240.                     br.close();  
  241.                 } catch (IOException e) {  
  242.                     br = null;  
  243.                     throw new RuntimeException(e);  
  244.                 } finally {  
  245.                     if (con != null) {  
  246.                         con.disconnect();  
  247.                         con = null;  
  248.                     }  
  249.                 }  
  250.             }  
  251.         }  
  252.     }  
  253.   
  254.     /** 
  255.      * @Description:使用HttpURLConnection发送get请求 
  256.      * @author:liuyc 
  257.      * @time:2016年5月17日 下午3:27:29 
  258.      */  
  259.     public static String sendGet(String urlParam, Map<String, Object> params, String charset) {  
  260.         StringBuffer resultBuffer = null;  
  261.         // 构建需要参数  
  262.         StringBuffer sbParams = new StringBuffer();  
  263.         if (params != null && params.size() > 0) {  
  264.             for (Entry<String, Object> entry : params.entrySet()) {  
  265.                 sbParams.append(entry.getKey());  
  266.                 sbParams.append(“=”);  
  267.                 sbParams.append(entry.getValue());  
  268.                 sbParams.append(“&”);  
  269.             }  
  270.         }  
  271.         HttpURLConnection con = null;  
  272.         BufferedReader br = null;  
  273.         try {  
  274.             URL url = null;  
  275.             if (sbParams != null && sbParams.length() > 0) {  
  276.                 url = new URL(urlParam + “?” + sbParams.substring(0, sbParams.length() – 1));  
  277.             } else {  
  278.                 url = new URL(urlParam);  
  279.             }  
  280.             con = (HttpURLConnection) url.openConnection();  
  281.             con.setRequestProperty(“Content-Type”, “application/x-www-form-urlencoded”);  
  282.             con.connect();  
  283.             resultBuffer = new StringBuffer();  
  284.             br = new BufferedReader(new InputStreamReader(con.getInputStream(), charset));  
  285.             String temp;  
  286.             while ((temp = br.readLine()) != null) {  
  287.                 resultBuffer.append(temp);  
  288.             }  
  289.         } catch (Exception e) {  
  290.             throw new RuntimeException(e);  
  291.         } finally {  
  292.             if (br != null) {  
  293.                 try {  
  294.                     br.close();  
  295.                 } catch (IOException e) {  
  296.                     br = null;  
  297.                     throw new RuntimeException(e);  
  298.                 } finally {  
  299.                     if (con != null) {  
  300.                         con.disconnect();  
  301.                         con = null;  
  302.                     }  
  303.                 }  
  304.             }  
  305.         }  
  306.         return resultBuffer.toString();  
  307.     }  
  308.   
  309.     /** 
  310.      * @Description:使用URLConnection发送get请求 
  311.      * @author:liuyc 
  312.      * @time:2016年5月17日 下午3:27:58 
  313.      */  
  314.     public static String sendGet2(String urlParam, Map<String, Object> params, String charset) {  
  315.         StringBuffer resultBuffer = null;  
  316.         // 创设央求参数  
  317.         StringBuffer sbParams = new StringBuffer();  
  318.         if (params != null && params.size() > 0) {  
  319.             for (Entry<String, Object> entry : params.entrySet()) {  
  320.                 sbParams.append(entry.getKey());  
  321.                 sbParams.append(“=”);  
  322.                 sbParams.append(entry.getValue());  
  323.                 sbParams.append(“&”);  
  324.             }  
  325.         }  
  326.         BufferedReader br = null;  
  327.         try {  
  328.             URL url = null;  
  329.             if (sbParams != null && sbParams.length() > 0) {  
  330.                 url = new URL(urlParam + “?” + sbParams.substring(0, sbParams.length() – 1));  
  331.             } else {  
  332.                 url = new URL(urlParam);  
  333.             }  
  334.             URLConnection con = url.openConnection();  
  335.             // 设置央浼属性  
  336.             con.setRequestProperty(“accept”, “*/*”);  
  337.             con.setRequestProperty(“connection”, “Keep-Alive”);  
  338.             con.setRequestProperty(“Content-Type”, “application/x-www-form-urlencoded”);  
  339.             con.setRequestProperty(“user-agent”, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)”);  
  340.             // 创设连接  
  341.             con.connect();  
  342.             resultBuffer = new StringBuffer();  
  343.             br = new BufferedReader(new InputStreamReader(con.getInputStream(), charset));  
  344.             String temp;  
  345.             while ((temp = br.readLine()) != null) {  
  346.                 resultBuffer.append(temp);  
  347.             }  
  348.         } catch (Exception e) {  
  349.             throw new RuntimeException(e);  
  350.         } finally {  
  351.             if (br != null) {  
  352.                 try {  
  353.                     br.close();  
  354.                 } catch (IOException e) {  
  355.                     br = null;  
  356.                     throw new RuntimeException(e);  
  357.                 }  
  358.             }  
  359.         }  
  360.         return resultBuffer.toString();  
  361.     }  
  362.   
  363.     /**  
  364.      * @Description:使用HttpClient发送post请求  
  365.      * @author:liuyc  
  366.      * @time:2016年5月17日 下午3:28:23  
  367.      */  
  368.     public static String httpClientPost(String urlParam, Map<String, Object> params, String charset) {  
  369.         StringBuffer resultBuffer = null;  
  370.         HttpClient client = new DefaultHttpClient();  
  371.         HttpPost httpPost = new HttpPost(urlParam);  
  372.         // 塑造乞请参数  
  373.         List<NameValuePair> list = new ArrayList<NameValuePair>();  
  374.         Iterator<Entry<String, Object>> iterator = params.entrySet().iterator();  
  375.         while (iterator.hasNext()) {  
  376.             Entry<String, Object> elem = iterator.next();  
  377.             list.add(new BasicNameValuePair(elem.getKey(), String.valueOf(elem.getValue())));  
  378.         }  
  379.         BufferedReader br = null;  
  380.         try {  
  381.             if (list.size() > 0) {  
  382.                 UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, charset);  
  383.                 httpPost.setEntity(entity);  
  384.             }  
  385.             HttpResponse response = client.execute(httpPost);  
  386.             // 读取服务器响应数据  
  387.             resultBuffer = new StringBuffer();  
  388.             br = new BufferedReader(new InputStreamReader(response.getEntity().getContent()));  
  389.             String temp;  
  390.             while ((temp = br.readLine()) != null) {  
  391.                 resultBuffer.append(temp);  
  392.             }  
  393.         } catch (Exception e) {  
  394.             throw new RuntimeException(e);  
  395.         } finally {  
  396.             if (br != null) {  
  397.                 try {  
  398.                     br.close();  
  399.                 } catch (IOException e) {  
  400.                     br = null;  
  401.                     throw new RuntimeException(e);  
  402.                 }  
  403.             }  
  404.         }  
  405.         return resultBuffer.toString();  
  406.     }  
  407.   
  408.     /** 
  409.      * @Description:使用HttpClient发送get请求 
  410.      * @author:liuyc 
  411.      * @time:2016年5月17日 下午3:28:56 
  412.      */  
  413.     public static String httpClientGet(String urlParam, Map<String, Object> params, String charset) {  
  414.         StringBuffer resultBuffer = null;  
  415.         HttpClient client = new DefaultHttpClient();  
  416.         BufferedReader br = null;  
  417.         // 营造央求参数  
  418.         StringBuffer sbParams = new StringBuffer();  
  419.         if (params != null && params.size() > 0) {  
  420.             for (Entry<String, Object> entry : params.entrySet()) {  
  421.                 sbParams.append(entry.getKey());  
  422.                 sbParams.append(“=”);  
  423.                 try {  
  424.                     sbParams.append(URLEncoder.encode(String.valueOf(entry.getValue()), charset));  
  425.                 } catch (UnsupportedEncodingException e) {  
  426.                     throw new RuntimeException(e);  
  427.                 }  
  428.                 sbParams.append(“&”);  
  429.             }  
  430.         }  
  431.         if (sbParams != null && sbParams.length() > 0) {  
  432.             urlParam = urlParam + “?” + sbParams.substring(0, sbParams.length() – 1);  
  433.         }  
  434.         HttpGet httpGet = new HttpGet(urlParam);  
  435.         try {  
  436.             HttpResponse response = client.execute(httpGet);  
  437.             // 读取服务器响应数据  
  438.             br = new BufferedReader(new InputStreamReader(response.getEntity().getContent()));  
  439.             String temp;  
  440.             resultBuffer = new StringBuffer();  
  441.             while ((temp = br.readLine()) != null) {  
  442.                 resultBuffer.append(temp);  
  443.             }  
  444.         } catch (Exception e) {  
  445.             throw new RuntimeException(e);  
  446.         } finally {  
  447.             if (br != null) {  
  448.                 try {  
  449.                     br.close();  
  450.                 } catch (IOException e) {  
  451.                     br = null;  
  452.                     throw new RuntimeException(e);  
  453.                 }  
  454.             }  
  455.         }  
  456.         return resultBuffer.toString();  
  457.     }  
  458.   
  459.     /** 
  460.      * @Description:使用socket发送post请求 
  461.      * @author:liuyc 
  462.      * @time:2016年5月18日 上午9:26:22 
  463.      */  
  464.     public static String sendSocketPost(String urlParam, Map<String, Object> params, String charset) {  
  465.         String result = “”;  
  466.         // 营造诉求参数  
  467.         StringBuffer sbParams = new StringBuffer();  
  468.         if (params != null && params.size() > 0) {  
  469.             for (Entry<String, Object> entry : params.entrySet()) {  
  470.                 sbParams.append(entry.getKey());  
  471.                 sbParams.append(“=”);  
  472.                 sbParams.append(entry.getValue());  
  473.                 sbParams.append(“&”);  
  474.             }  
  475.         }  
  476.         Socket socket = null;  
  477.         OutputStreamWriter osw = null;  
  478.         InputStream is = null;  
  479.         try {  
  480.             URL url = new URL(urlParam);  
  481.             String host = url.getHost();  
  482.             int port = url.getPort();  
  483.             if (-1 == port) {  
  484.                 port = 80;  
  485.             }  
  486.             String path = url.getPath();  
  487.             socket = new Socket(host, port);  
  488.             StringBuffer sb = new StringBuffer();  
  489.             sb.append(“POST ” + path + ” HTTP/1.1\r\n”);  
  490.             sb.append(“Host: ” + host + “\r\n”);  
  491.             sb.append(“Connection: Keep-Alive\r\n”);  
  492.             sb.append(“Content-Type: application/x-www-form-urlencoded; charset=utf-8 \r\n”);  
  493.             sb.append(“Content-Length: “).append(sb.toString().getBytes().length).append(“\r\n”);  
  494.             // 这里四个回车换行,表示新闻头写完,不然服务器会一而再伺机  
  495.             sb.append(“\r\n”);  
  496.             if (sbParams != null && sbParams.length() > 0) {  
  497.                 sb.append(sbParams.substring(0, sbParams.length() – 1));  
  498.             }  
  499.             osw = new OutputStreamWriter(socket.getOutputStream());  
  500.             osw.write(sb.toString());  
  501.             osw.flush();  
  502.             is = socket.getInputStream();  
  503.             String line = null;  
  504.             // 服务器响应体数据长度  
  505.             int contentLength = 0;  
  506.             // 读取http响应尾部消息  
  507.             do {  
  508.                 line = readLine(is, 0, charset);  
  509.                 if (line.startsWith(“Content-Length”)) {  
  510.                     // 得到响应体内容长度  
  511.                     contentLength = Integer.parseInt(line.split(“:”)[1].trim());  
  512.                 }  
  513.                 // 倘诺蒙受了三个独门的回车换行,则意味着哀告头截止  
  514.             } while (!line.equals(“\r\n”));  
  515.             // 读抽取响应体数据(正是你要的数码)  
  516.             result = readLine(is, contentLength, charset);  
  517.         } catch (Exception e) {  
  518.             throw new RuntimeException(e);  
  519.         } finally {  
  520.             if (osw != null) {  
  521.                 try {  
  522.                     osw.close();  
  523.                 } catch (IOException e) {  
  524.                     osw = null;  
  525.                     throw new RuntimeException(e);  
  526.                 } finally {  
  527.                     if (socket != null) {  
  528.                         try {  
  529.                             socket.close();  
  530.                         } catch (IOException e) {  
  531.                             socket = null;  
  532.                             throw new RuntimeException(e);  
  533.                         }  
  534.                     }  
  535.                 }  
  536.             }  
  537.             if (is != null) {  
  538.                 try {  
  539.                     is.close();  
  540.                 } catch (IOException e) {  
  541.                     is = null;  
  542.                     throw new RuntimeException(e);  
  543.                 } finally {  
  544.                     if (socket != null) {  
  545.                         try {  
  546.                             socket.close();  
  547.                         } catch (IOException e) {  
  548.                             socket = null;  
  549.                             throw new RuntimeException(e);  
  550.                         }  
  551.                     }  
  552.                 }  
  553.             }  
  554.         }  
  555.         return result;  
  556.     }  
  557.   
  558.     /** 
  559.      * @Description:使用socket发送get请求 
  560.      * @author:liuyc 
  561.      * @time:2016年5月18日 上午9:27:18 
  562.      */  
  563.     public static String sendSocketGet(String urlParam, Map<String, Object> params, String charset) {  
  564.         String result = “”;  
  565.         // 创设须要参数  
  566.         StringBuffer sbParams = new StringBuffer();  
  567.         if (params != null && params.size() > 0) {  
  568.             for (Entry<String, Object> entry : params.entrySet()) {  
  569.                 sbParams.append(entry.getKey());  
  570.                 sbParams.append(“=”);  
  571.                 sbParams.append(entry.getValue());  
  572.                 sbParams.append(“&”);  
  573.             }  
  574.         }  
  575.         Socket socket = null;  
  576.         OutputStreamWriter osw = null;  
  577.         InputStream is = null;  
  578.         try {  
  579.             URL url = new URL(urlParam);  
  580.             String host = url.getHost();  
  581.             int port = url.getPort();  
  582.             if (-1 == port) {  
  583.                 port = 80;  
  584.             }  
  585.             String path = url.getPath();  
  586.             socket = new Socket(host, port);  
  587.             StringBuffer sb = new StringBuffer();  
  588.             sb.append(“GET ” + path + ” HTTP/1.1\r\n”);  
  589.             sb.append(“Host: ” + host + “\r\n”);  
  590.             sb.append(“Connection: Keep-Alive\r\n”);  
  591.             sb.append(“Content-Type: application/x-www-form-urlencoded; charset=utf-8 \r\n”);  
  592.             sb.append(“Content-Length: “).append(sb.toString().getBytes().length).append(“\r\n”);  
  593.             // 这里二个回车换行,表示消息头写完,不然服务器会一连伺机  
  594.             sb.append(“\r\n”);  
  595.             if (sbParams != null && sbParams.length() > 0) {  
  596.                 sb.append(sbParams.substring(0, sbParams.length() – 1));  
  597.             }  
  598.             osw = new OutputStreamWriter(socket.getOutputStream());  
  599.             osw.write(sb.toString());  
  600.             osw.flush();  
  601.             is = socket.getInputStream();  
  602.             String line = null;  
  603.             // 服务器响应体数据长度  
  604.             int contentLength = 0;  
  605.             // 读取http响应底部音讯  
  606.             do {  
  607.                 line = readLine(is, 0, charset);  
  608.                 if (line.startsWith(“Content-Length”)) {  
  609.                     // 得到响应体内容长度  
  610.                     contentLength = Integer.parseInt(line.split(“:”)[1].trim());  
  611.                 }  
  612.                 // 要是遭逢了二个单独的回车换行,则表示诉求头甘休  
  613.             } while (!line.equals(“\r\n”));  
  614.             // 读抽取响应体数据(就是您要的数额)  
  615.             result = readLine(is, contentLength, charset);  
  616.         } catch (Exception e) {  
  617.             throw new RuntimeException(e);  
  618.         } finally {  
  619.             if (osw != null) {  
  620.                 try {  
  621.                     osw.close();  
  622.                 } catch (IOException e) {  
  623.                     osw = null;  
  624.                     throw new RuntimeException(e);  
  625.                 } finally {  
  626.                     if (socket != null) {  
  627.                         try {  
  628.                             socket.close();  
  629.                         } catch (IOException e) {  
  630.                             socket = null;  
  631.                             throw new RuntimeException(e);  
  632.                         }  
  633.                     }  
  634.                 }  
  635.             }  
  636.             if (is != null) {  
  637.                 try {  
  638.                     is.close();  
  639.                 } catch (IOException e) {  
  640.                     is = null;  
  641.                     throw new RuntimeException(e);  
  642.                 } finally {  
  643.                     if (socket != null) {  
  644.                         try {  
  645.                             socket.close();  
  646.                         } catch (IOException e) {  
  647.                             socket = null;  
  648.                             throw new RuntimeException(e);  
  649.                         }  
  650.                     }  
  651.                 }  
  652.             }  
  653.         }  
  654.         return result;  
  655.     }  
  656.   
  657.     /** 
  658.      * @Description:读取一行数据,contentLe内容长度为0时,读取响应头音讯,不为0时读正文 
  659.      * @time:2016年5月17日 下午6:11:07 
  660.      */  
  661.     private static String readLine(InputStream is, int contentLength, String charset) throws IOException {  
  662.         List<Byte> lineByte = new ArrayList<Byte>();  
  663.         byte tempByte;  
  664.         int cumsum = 0;  
  665.         if (contentLength != 0) {  
  666.             do {  
  667.                 tempByte = (byte) is.read();  
  668.                 lineByte.add(Byte.valueOf(tempByte));  
  669.                 cumsum++;  
  670.             } while (cumsum < contentLength);// cumsum等于contentLength表示已读完  
  671.         } else {  
  672.             do {  
  673.                 tempByte = (byte) is.read();  
  674.                 lineByte.add(Byte.valueOf(tempByte));  
  675.             } while (tempByte != 10);// 换行符的ascii码值为10  
  676.         }  
  677.   
  678.         byte[] resutlBytes = new byte[lineByte.size()];  
  679.         for (int i = 0; i < lineByte.size(); i++) {  
  680.             resutlBytes[i] = (lineByte.get(i)).byteValue();  
  681.         }  
  682.         return new String(resutlBytes, charset);  
  683.     }  
  684.       
  685. }  

如上4种分别可发送get和post要求的不二等秘书诀,第1种:HttpU君越LConnection、第2种:UWranglerLConnection、第3种:HttpClient、第4种:Socket,朋友们要小心的是,使用第3种HttpClient时供给借助于多个jar包,分别是:apache-httpcomponents-httpclient.jar、commons-logging-1.0.4.jar、httpcore-4.1.1.jar。好了就到这边吧,有标题记得留言哦

在对Heritrix进行布署并能够成功运转后, 我们调节了Heritrix的参数,
使其能够更加快地抓取页面.

还原:

public static void restore(String databaseName) {

        try {

            Runtime runtime = Runtime.getRuntime();

            Process process = runtime

                    .exec(“e:\\MySQL\\bin\\mysql.exe -hlocalhost
-uroot -p123 –default-character-set=utf8 “

                            + databaseName);

            OutputStream outputStream = process.getOutputStream();

            BufferedReader br = new BufferedReader(new
InputStreamReader(

                    new FileInputStream(“C:\\test.sql”), “utf-8”));

            String str = null;

            StringBuffer sb = new StringBuffer();

            while ((str = br.readLine()) != null) {

                sb.append(str + “\r\n”);

            }

            str = sb.toString();

            // System.out.println(str);

            OutputStreamWriter writer = new
OutputStreamWriter(outputStream,

                    “utf-8”);

            writer.write(str);

            writer.flush();

            outputStream.close();

            br.close();

            writer.close();

        } catch (UnsupportedEncodingException e) {

            e.printStackTrace();

        } catch (FileNotFoundException e) {

            e.printStackTrace();

        } catch (IOException e) {

            e.printStackTrace();

        }

    }

咱俩重视调度的是Frontier的参数, 调解后的参数如下:

网赌十大信誉的平台 1

跟着大家开头了抓取专门的学业, 终于在六月一号上午抓完了ccer全站,
用时约19.5钟头, 数据量1.6G, 文件数23814,
当中网页(txt/html/asp)数量一九六二2.

以下是本次Crawl Job的告诉:

网赌十大信誉的平台 2

抓取工作完成后大家发掘了二个相比较严重的标题: 编码难点.
比非常多网页抓到本地后文件名是乱码, 並且没法解码(大概是因为文件名被损坏了).
不理解把Writer改为org.archive.crawler.writer.ARCWriterProcessor会不会有的缓慢解决那些难题.

下一步我们将开展Heritrix源代码深入分析和链接数据深入分析专门的学业.

网赌十大信誉的平台 3

Post Author: admin

发表评论

电子邮件地址不会被公开。 必填项已用*标注