Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
Plio
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
aflab
astrogeology
Plio
Commits
3f9640d6
Commit
3f9640d6
authored
7 years ago
by
Adam Paquette
Browse files
Options
Downloads
Patches
Plain Diff
Removed io_ccs and associated test file.
parent
89ae2f2b
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
plio/io/io_ccs.py
+0
-198
0 additions, 198 deletions
plio/io/io_ccs.py
plio/io/tests/test_io_ccs.py
+0
-19
0 additions, 19 deletions
plio/io/tests/test_io_ccs.py
with
0 additions
and
217 deletions
plio/io/io_ccs.py
deleted
100644 → 0
+
0
−
198
View file @
89ae2f2b
# This code is used to read individual ChemCam CCS files
# Header data is stored as attributes of the data frame
# White space is stripped from the column names
import
os
import
time
import
numpy
as
np
import
pandas
as
pd
import
scipy.io
as
io
from
plio.utils.utils
import
lookup
from
plio.utils.utils
import
file_search
def
CCAM_CSV
(
input_data
):
try
:
df
=
pd
.
read_csv
(
input_data
,
header
=
14
,
engine
=
'
c
'
)
cols
=
list
(
df
.
columns
.
values
)
df
.
columns
=
[
i
.
strip
().
replace
(
'
#
'
,
''
)
for
i
in
cols
]
# strip whitespace from column names
df
.
set_index
([
'
wave
'
],
inplace
=
True
)
# use wavelengths as indices
# read the file header and put information into the dataframe as new columns
metadata
=
pd
.
read_csv
(
input_data
,
sep
=
'
=
'
,
nrows
=
14
,
comment
=
'
,
'
,
engine
=
'
c
'
,
index_col
=
0
,
header
=
None
)
except
:
# handle files with an extra header row containing temperature
df
=
pd
.
read_csv
(
input_data
,
header
=
15
,
engine
=
'
c
'
)
cols
=
list
(
df
.
columns
.
values
)
df
.
columns
=
[
i
.
strip
().
replace
(
'
#
'
,
''
)
for
i
in
cols
]
# strip whitespace from column names
df
.
set_index
([
'
wave
'
],
inplace
=
True
)
# use wavelengths as indices
# read the file header and put information into the dataframe as new columns
metadata
=
pd
.
read_csv
(
input_data
,
sep
=
'
=
'
,
nrows
=
15
,
comment
=
'
,
'
,
engine
=
'
c
'
,
index_col
=
0
,
header
=
None
)
df
.
index
=
[[
'
wvl
'
]
*
len
(
df
.
index
),
df
.
index
.
values
.
round
(
4
)]
# create multiindex so spectra can be easily extracted with a single key
df
=
df
.
T
# transpose so that each spectrum is a row
# remove extraneous stuff from the metadataindices
metadata
.
index
=
[
i
.
strip
().
strip
(
'
#
'
).
replace
(
'
FLOAT
'
,
''
).
lower
()
for
i
in
metadata
.
index
.
values
]
metadata
=
metadata
.
T
# extract info from the file name
fname
=
os
.
path
.
basename
(
input_data
)
metadata
[
'
sclock
'
]
=
fname
[
4
:
13
]
metadata
[
'
seqid
'
]
=
fname
[
25
:
34
].
upper
()
metadata
[
'
Pversion
'
]
=
fname
[
34
:
36
]
# duplicate the metadata for each row in the df
metadata
=
metadata
.
append
([
metadata
]
*
(
len
(
df
.
index
)
-
1
),
ignore_index
=
True
)
metadata
.
index
=
df
.
index
# make the indices match
metadata
.
columns
=
[[
'
meta
'
]
*
len
(
metadata
.
columns
),
metadata
.
columns
.
values
]
# make the columns into multiindex
df
=
pd
.
concat
([
metadata
,
df
],
axis
=
1
)
# combine the spectra with the metadata
return
df
def
CCAM_SAV
(
input_data
,
ave
=
True
):
# read the IDL .SAV file
data
=
io
.
readsav
(
input_data
,
python_dict
=
True
)
# put the spectra into data frames and combine them
df_UV
=
pd
.
DataFrame
(
data
[
'
uv
'
],
index
=
data
[
'
defuv
'
])
df_VIS
=
pd
.
DataFrame
(
data
[
'
vis
'
],
index
=
data
[
'
defvis
'
])
df_VNIR
=
pd
.
DataFrame
(
data
[
'
vnir
'
],
index
=
data
[
'
defvnir
'
])
df_spect
=
pd
.
concat
([
df_UV
,
df_VIS
,
df_VNIR
])
df_spect
.
columns
=
[
'
shot
'
+
str
(
i
+
1
)
for
i
in
df_spect
.
columns
]
# add 1 to the columns so they correspond to shot number
df_aUV
=
pd
.
DataFrame
(
data
[
'
auv
'
],
index
=
data
[
'
defuv
'
],
columns
=
[
'
average
'
])
df_aVIS
=
pd
.
DataFrame
(
data
[
'
avis
'
],
index
=
data
[
'
defvis
'
],
columns
=
[
'
average
'
])
df_aVNIR
=
pd
.
DataFrame
(
data
[
'
avnir
'
],
index
=
data
[
'
defvnir
'
],
columns
=
[
'
average
'
])
df_ave
=
pd
.
concat
([
df_aUV
,
df_aVIS
,
df_aVNIR
])
df_mUV
=
pd
.
DataFrame
(
data
[
'
muv
'
],
index
=
data
[
'
defuv
'
],
columns
=
[
'
median
'
])
df_mVIS
=
pd
.
DataFrame
(
data
[
'
mvis
'
],
index
=
data
[
'
defvis
'
],
columns
=
[
'
median
'
])
df_mVNIR
=
pd
.
DataFrame
(
data
[
'
mvnir
'
],
index
=
data
[
'
defvnir
'
],
columns
=
[
'
median
'
])
df_med
=
pd
.
concat
([
df_mUV
,
df_mVIS
,
df_mVNIR
])
df
=
pd
.
concat
([
df_spect
,
df_ave
,
df_med
],
axis
=
1
)
# create multiindex to access wavelength values
# also, round the wavlength values to a more reasonable level of precision
df
.
index
=
[[
'
wvl
'
]
*
len
(
df
.
index
),
df
.
index
.
values
.
round
(
4
)]
# transpose so that spectra are rows rather than columns
df
=
df
.
T
# extract metadata from the file name and add it to the data frame
# use the multiindex label "meta" for all metadata
fname
=
os
.
path
.
basename
(
input_data
)
# for some reason, some ChemCam files have the 'darkname' key, others call it 'darkspect'
# this try-except pair converts to 'darkname' when needed
try
:
data
[
'
darkname
'
]
except
:
data
[
'
darkname
'
]
=
data
[
'
darkspec
'
]
metadata
=
[
fname
,
fname
[
4
:
13
],
fname
[
25
:
34
].
upper
(),
fname
[
34
:
36
],
data
[
'
continuumvismin
'
],
data
[
'
continuumvnirmin
'
],
data
[
'
continuumuvmin
'
],
data
[
'
continuumvnirend
'
],
data
[
'
distt
'
],
data
[
'
darkname
'
],
data
[
'
nshots
'
],
data
[
'
dnoiseiter
'
],
data
[
'
dnoisesig
'
],
data
[
'
matchedfilter
'
]]
metadata
=
np
.
tile
(
metadata
,
(
len
(
df
.
index
),
1
))
metadata_cols
=
list
(
zip
([
'
meta
'
]
*
len
(
df
.
index
),
[
'
file
'
,
'
sclock
'
,
'
seqid
'
,
'
Pversion
'
,
'
continuumvismin
'
,
'
continuumvnirmin
'
,
'
continuumuvmin
'
,
'
continuumvnirend
'
,
'
distt
'
,
'
dark
'
,
'
nshots
'
,
'
dnoiseiter
'
,
'
dnoisesig
'
,
'
matchedfilter
'
]))
metadata
=
pd
.
DataFrame
(
metadata
,
columns
=
pd
.
MultiIndex
.
from_tuples
(
metadata_cols
),
index
=
df
.
index
)
df
=
pd
.
concat
([
metadata
,
df
],
axis
=
1
)
if
ave
==
True
:
df
=
df
.
loc
[
'
average
'
]
df
=
df
.
to_frame
().
T
else
:
pass
return
df
def
ccam_batch
(
directory
,
searchstring
=
'
*.csv
'
,
to_csv
=
None
,
lookupfile
=
None
,
ave
=
True
,
progressbar
=
None
):
# Determine if the file is a .csv or .SAV
if
'
.sav
'
in
searchstring
.
lower
():
is_sav
=
True
else
:
is_sav
=
False
filelist
=
file_search
(
directory
,
searchstring
)
basenames
=
np
.
zeros_like
(
filelist
)
sclocks
=
np
.
zeros_like
(
filelist
)
P_version
=
np
.
zeros_like
(
filelist
,
dtype
=
'
int
'
)
# Extract the sclock and version for each file and ensure that only one
# file per sclock is being read, and that it is the one with the highest version number
for
i
,
name
in
enumerate
(
filelist
):
basenames
[
i
]
=
os
.
path
.
basename
(
name
)
sclocks
[
i
]
=
basenames
[
i
][
4
:
13
]
# extract the sclock
P_version
[
i
]
=
basenames
[
i
][
-
5
:
-
4
]
# extract the version
sclocks_unique
=
np
.
unique
(
sclocks
)
# find unique sclocks
filelist_new
=
np
.
array
([],
dtype
=
'
str
'
)
for
i
in
sclocks_unique
:
match
=
(
sclocks
==
i
)
# find all instances with matching sclocks
maxP
=
P_version
[
match
]
==
max
(
P_version
[
match
])
# find the highest version among these files
filelist_new
=
np
.
append
(
filelist_new
,
filelist
[
match
][
maxP
])
# keep only the file with thei highest version
filelist
=
filelist_new
# Should add a progress bar for importing large numbers of files
dt
=
[]
for
i
in
filelist
:
print
(
i
)
try
:
if
is_sav
:
t
=
time
.
time
()
tmp
=
CCAM_SAV
(
i
,
ave
=
ave
)
dt
.
append
(
time
.
time
()
-
t
)
else
:
t
=
time
.
time
()
tmp
=
CCAM_CSV
(
i
)
dt
.
append
(
time
.
time
()
-
t
)
if
i
==
filelist
[
0
]:
combined
=
tmp
else
:
# This ensures that rounding errors are not causing mismatches in columns
cols1
=
list
(
combined
[
'
wvl
'
].
columns
)
cols2
=
list
(
tmp
[
'
wvl
'
].
columns
)
if
set
(
cols1
)
==
set
(
cols2
):
combined
=
pd
.
concat
([
combined
,
tmp
])
else
:
print
(
"
Wavelengths don
'
t match!
"
)
except
:
pass
combined
.
loc
[:,
(
'
meta
'
,
'
sclock
'
)]
=
pd
.
to_numeric
(
combined
.
loc
[:,
(
'
meta
'
,
'
sclock
'
)])
if
lookupfile
is
not
None
:
combined
=
lookup
(
combined
,
lookupfile
=
lookupfile
)
if
to_csv
is
not
None
:
combined
.
to_csv
(
to_csv
)
return
combined
This diff is collapsed.
Click to expand it.
plio/io/tests/test_io_ccs.py
deleted
100644 → 0
+
0
−
19
View file @
89ae2f2b
import
os
import
sys
import
unittest
sys
.
path
.
insert
(
0
,
os
.
path
.
abspath
(
'
..
'
))
from
plio.examples
import
get_path
from
plio.io
import
io_ccs
class
Test_CSS_IO
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
examplefile
=
get_path
(
'
CL5_398645626CCS_F0030004CCAM02013P3.csv
'
)
def
test_14_item_header_csv
(
self
):
io_ccs
.
CCAM_CSV
(
self
.
examplefile
)
if
__name__
==
'
__main__
'
:
unittest
.
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment