用python和numpy处理数据次数比较多,写了几个小函数,可以方便地读写数据:
- # -*- coding: utf-8 -*-
- #----------------------------------------------------------------------
- # FileName:gettxtdata.py
- #功能:读取字符串和文件中的数值数据(浮点数)
- #主要提供类似matlab中的dlmread和dlmwrite函数
- #同时提供loadtxtdata和savetxtdata函数
- #Data: 2013-1-10
- #Author:吴徐平
- #----------------------------------------------------------------------
- import numpy
- #----------------------------------------------------------------------
- def StringToDoubleArray(String):
- """
- #将字符串中的所有非Double类型的字符全部替换成空格
- #以'#'开头注释直至行尾,都被清空
- #返回一维numpy.array数组
- """
- from StringIO import StringIO
- import re
- DataArray=numpy.empty([0],numpy.float64)
- if len(String.strip())>0:
- #清空注释行,都是以'#'开头子字符
- doublestring=re.sub('#.*$', " ", String, count=0, flags=re.IGNORECASE)
- #删除非数字字符
- doublestring=re.sub('[^0-9.e+-]', " ", doublestring, count=0, flags=re.IGNORECASE)
- #去掉不正确的数字格式(代码重复是有必要的)
- doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
- doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
- doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
- doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
- #去掉首尾空格
- doublestring=doublestring.strip()
- if len(doublestring)>0:
- StrIOds=StringIO(doublestring)
- DataArray= numpy.genfromtxt(StrIOds)
- return DataArray
- #----------------------------------------------------------------------
- def GetDoubleListFromString(String):
- """
- #使用换行符分割字符串
- #将字符串中的所有非Double类型的字符全部替换成空格
- #以'#'开头注释直至行尾,都被清空
- #将每一行转换成numpy.array数组
- #返回numpy.array数组的列表
- """
- from StringIO import StringIO
- import re
- DoubleList=[]
- StringList=String.split('\n')#使用换行符分割字符串
- for Line in StringList:
- if len(Line.strip())>0:
- #清空注释行,都是以'#'开头子字符
- doublestring=re.sub('#.*$', " ", Line, count=0, flags=re.IGNORECASE)
- #删除非数字字符
- doublestring=re.sub('[^0-9.e+-]', " ", doublestring, count=0, flags=re.IGNORECASE)
- #去掉不正确的数字格式(代码重复是有必要的)
- doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
- doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
- doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
- doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
- #去掉首尾空格
- doublestring=doublestring.strip()
- if len(doublestring)>0:
- StrIOds=StringIO(doublestring)
- DoubleList.append(numpy.genfromtxt(StrIOds))
- return DoubleList
- #----------------------------------------------------------------------
- def GetDoubleListFromFile(FileName):
- """
- #将文本文件中的所有Double类型的字符全部替换成numpy.array数组
- #每一行都是numpy.array数组
- ##返回numpy.array数组的列表
- #注意:返回列表的每个元素又都是一个numpy.array数组
- #注意:返回列表的每个元素(或文件每行)可以包含不同多个数的数字
- """
- file=open(FileName, 'r')
- read_file = file.read()
- file.close()
- DoubleList=GetDoubleListFromString(read_file)
- return DoubleList
- def dlmread(FileName,dtype=numpy.float64):
- """
- #Load Data From Txt-File.
- #分隔符默认是:";",",",空格类 (包括\t)等等
- #以#开头的被认为是注释,不会被读取
- #Return Value:二维数值数组(numpy.ndarray)
- #对文本中数据的排列格式要求最低,且容许出现注释字符,智能化程度最高,但速度较慢
- """
- DoubleList=GetDoubleListFromFile(FileName)
- dlsize=[]#每一行数组的大小
- for dL in DoubleList:
- dlsize.append(dL.size)
- MinColumnSize=min(dlsize)#数组的最大列数
- MaxColumnSize=max(dlsize)#数组的最小列数
- #数组创建和赋值
- DoubleArray=numpy.empty([len(DoubleList),MinColumnSize],dtype=dtype)
- row=range(0,len(DoubleList))
- colum=range(0,MinColumnSize)
- for i in row:
- for j in colum:
- DoubleArray[i][j]=DoubleList[i][j]
- return DoubleArray
- #----------------------------------------------------------------------
- def loadtxtdata(filename,delimiter=""):
- """
- #Load Data From Txt-File with delimiter.
- #分隔符默认是:";",",",空格类 (包括\t)和自定义的delimiter等
- #Return Value: 二维数值数组(numpy.ndarray)
- #对文本中数据的排列格式要求较高,且不容许出现注释字符,智能化程度较低,但速度较快
- """
- from StringIO import StringIO
- import re
- file_handle=open(filename,'r')
- LinesALL=file_handle.read()#读入字符串
- file_handle.close()
- DelimiterALL=delimiter+",;"#分隔符
- SpaceString=" "#空格
- for RChar in DelimiterALL:
- LinesALL=LinesALL.replace(RChar,SpaceString)
- return numpy.genfromtxt(StringIO(LinesALL))
- #----------------------------------------------------------------------
- def savetxtdata(filename, X, fmt='%.8e', delimiter=' ', newline='\n'):
- """
- Save Data To Txt-File.
- """
- numpy.savetxt(filename, X, fmt=fmt, delimiter=delimiter, newline=newline)
- return True
- #----------------------------------------------------------------------
- def dlmwrite(filename, X, fmt='%.8e', delimiter=' ', newline='\n'):
- """
- Save Data To Txt-File.
- """
- numpy.savetxt(filename, X, fmt=fmt, delimiter=delimiter, newline=newline)
- return True
- #----------------------------------------------------------------------
- #测试程序
- #----------------------------------------------------------------------
- if __name__ == '__main__':
- #生成随机数
- data=numpy.random.randn(3,4)
- filename='D:/x.txt'
- #写入文件
- dlmwrite(filename,data)
- x=GetDoubleListFromFile(filename)
- print(x)
- print(dlmread(filename))
- y=StringToDoubleArray('79l890joj')
- print(y)
- z=loadtxtdata(filename)
- print(z)
我只在python2.7中试过,如果要在python3.x中使用,可自行测试.