Java-许多txt文件之间的字符串比较



我有一个简单的程序:

  • 用java解析txt文件
  • 转换unix时间戳中的数据
  • 每秒只保留一行值

下面是代码。txt文件的格式如下:

2013年3月21日04:18:23 6890 6830 6850 6770 6830 6400 6630 6710 6770 6850 35024 349762013年3月21日04:18:23 6910 6800 6850 6770 6820 6410 6590 6710 6780 6820 35056 349762013年3月21日04:18:24 6890 6820 6860 6770 6830 6400 6580 6720 6770 6860 34912 348802013年3月21日04:18:24 6860 6840 6840 6770 6830 6390 6660 6700 6740 6890 35008 34880

我的程序以这种方式转换代码:

放置传感器.rat.128 1364278801 7100传感器=

将传感器.rat.128 1364278801 6910传感器=B

将传感器.rat.128 1364278801 6890传感器=C

放置传感器.rat.128 1364278801 6630传感器=D

该程序在txt文件上运行得很好,因为它每秒只保留一行值,但如果不同的txt文件中有相同的第二个值,它就无法识别它们。

所以问题是:如何让代码每秒只保存一个值列表,并跨多个文件保存?我希望你们都能理解。

import java.util.Scanner;
import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.io.FileUtils;

public class Downsampler {

    public static void main(String[] args) throws Exception{
        /*
        * Scans all the files in a specified folder
        * Obtains Cell number from the file name
        */
        String path = "/home/alessandro/Data128prova"; // name of path
        File folder = new File(path);
        for (File file : folder.listFiles()) {
            Scanner s = new Scanner(file);
            ArrayList<String> list = new ArrayList<String>();
                while (s.hasNext()){
                    list.add(s.next());
                }
            s.close();
            //Arraylist to save modified values
            ArrayList<String> ds = new ArrayList<String>();
           int i;
            String app = "";
                for(i=0; i<=list.size()-13; i=i+14){
        //combining the first to values to obtain data  
        String str = list.get(i)+" "+list.get(i+1);
        //------convert data in epoch time
            Date dt= new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str);
            long epochlong = dt.getTime()/1000;
            String epoch = Long.toString(epochlong);
        //------end conversion data 
                    if (!str.equalsIgnoreCase(app)){
                    //add all the other values to arraylist ds
                    ds.add(epoch);
                    int j;
            for(j=1; j<14; j++){
            ds.add(list.get(i+j));
                        }
                    }
        app = str;
                }
    int k;
        String metric = "sensor.rat.128.riprova"; //name of the metric
    for (k=0; k<=ds.size()-13; k=k+14){
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+2)+" sensor=A");
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+3)+" sensor=B");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+4)+" sensor=C");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+5)+" sensor=D");
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+6)+" sensor=E");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+7)+" sensor=F");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+8)+" sensor=G");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+9)+" sensor=H");
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+10)+" sensor=I");  
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+11)+" sensor=L");  
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+12)+" sensor=M");  
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+13)+" sensor=N");
    }

    } //end of for  
}
}

使用Map和时间作为键来存储数据集。下次在您正在分析的任何文件中看到该时间点时,您可以决定如何处理新数据集(将其丢弃,将其与该时间的当前数据集进行平均等)。以下是您更新的代码,用于使用Map将数据集与特定时间相关联。这段代码只是打印一条语句,表示它已经在特定的时间内被处理了。您应该插入自己的代码来处理重复项。

import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.io.FileUtils;
public class Downsampler
{
   public static void main(String[] args) throws Exception
   {
        /*
        * Scans all the files in a specified folder
        * Obtains Cell number from the file name
        */
      String path = "/tmp/data"; // name of path
      File folder = new File(path);
      Map<Long, List<String>> timeValuesMap = new HashMap<Long, List<String>>();
      for (File file : folder.listFiles())
      {
         Scanner s = new Scanner(file);
         ArrayList<String> list = new ArrayList<String>();
         while (s.hasNext())
         {
            list.add(s.next());
         }
         s.close();
         //Arraylist to save modified values
         ArrayList<String> ds = new ArrayList<String>();
         int i;
         String app = "";
         for (i = 0; i <= list.size() - 13; i = i + 14)
         {
            //combining the first to values to obtain data
            String str = list.get(i) + " " + list.get(i + 1);
            //------convert data in epoch time
            Date dt = new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str);
            long epochlong = dt.getTime() / 1000;
            String epoch = Long.toString(epochlong);
            //------end conversion data
            if (!str.equalsIgnoreCase(app))
            {
               //add all the other values to arraylist ds
               ds.add(epoch);
               int j;
               for (j = 1; j < 14; j++)
               {
                  ds.add(list.get(i + j));
               }
            }
            app = str;
            if(timeValuesMap.containsKey(epochlong))
            {
               System.out.println("Already processed time: " +  str);
               //do something - ignore values, average across sensor, min/max, etc...
               //newds = doSomeOperation(ds);
               //timeValuesMap.put(epochlong, newds);
            }
            else
            {
               System.out.println("New time: " +  str);
               timeValuesMap.put(epochlong, ds);
            }
         }
         int k;
         String metric = "sensor.rat.128.riprova"; //name of the metric
         for (k = 0; k <= ds.size() - 13; k = k + 14)
         {
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 2) + " sensor=A");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 3) + " sensor=B");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 4) + " sensor=C");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 5) + " sensor=D");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 6) + " sensor=E");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 7) + " sensor=F");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 8) + " sensor=G");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 9) + " sensor=H");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 10) + " sensor=I");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 11) + " sensor=L");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 12) + " sensor=M");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 13) + " sensor=N");
         }
      } //end of for
   }
}

注意:对于非常大的数据集,这可能会导致JVM内存不足,因为Map中的项目数量变得非常大。

最新更新