Skip to content
Snippets Groups Projects
Commit 321002fa authored by iris.dumeur6@etu.univ-lorraine.fr's avatar iris.dumeur6@etu.univ-lorraine.fr
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
Showing with 833 additions and 0 deletions
exclude: '^docs/conf.py'
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace
# - id: check-added-large-files
- id: check-ast
- id: check-json
- id: check-merge-conflict
- id: check-xml
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: mixed-line-ending
args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
language_version: python3.10
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
hooks:
- id: flake8
## You can add flake8 plugins via `additional_dependencies`:
# additional_dependencies: [flake8-bugbear]
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
hooks:
- id: pyupgrade
args: [--py310-plus]
- repo: https://github.com/dosisod/refurb
rev: v1.12.0
hooks:
- id: refurb
This diff is collapsed.
Makefile 0 → 100644
CONDA_ENV = new_dl_env
CONDA_SRC = echo ""
MODULE_PURGE = echo ""
CONDA_ACT = echo ""
ifneq ($(wildcard /work/CESBIO/*),)
MODULE_PURGE:=module purge
CONDA_SRC:=module load conda/22.11.1
CONDA_ACT:=conda activate /work/scratch/$(USER)/virtualenv/$(CONDA_ENV)
CONDA = $(MODULE_PURGE) && $(CONDA_SRC) && $(CONDA_ACT)
else
CONDA_ACT:=conda activate /home/dumeuri/virtualenv/$(CONDA_ENV)
CONDA = $(CONDA_ACT)
endif
PYPATH = PYTHONPATH=./src/:${PYTHONPATH}
#####################
# Environment setup #
#####################
.PHONY:
build_conda:
bash create-conda-env.sh $(CONDA_ENV)
######################
# Testing and linting #
#######################
.PHONY: check
check: test pylint mypy
.PHONY: test
test:
pytest -vv test/
test_no_slow:
pytest -vv -m "not slow" test/
test_no_hal:
pytest -vv -m "not hal" test/
install:
pip install -e .
flake8:
flake8 .
pre_commit_all:
pre-commit run --all-files
This code helsp handling msenge dataset
# Convert the dataset
# Load the data
dataset_path:
list_area:
year:
sats:
label_dir:
path_df:
exit_dir:
s2_bands:
dataset_path: /home/ad/dumeuri/DeepChange/MSENGE/
list_area:
year:
label_dir:
output_dir: /home/ad/dumeuri/results/MsenGE/
export python_version="3.10"
export name="mt_ubarn"
if ! [ -z "$1" ]
then
export name=$1
fi
source ~/set_proxy.sh
if [ -z "$https_proxy" ]
then
echo "Please set https_proxy environment variable before running this script"
exit 1
fi
export target=/work/scratch/$USER/virtualenv/$name
if ! [ -z "$2" ]
then
export target="$2/$name"
fi
echo "Installing $name in $target ..."
if [ -d "$target" ]; then
echo "Cleaning previous conda env"
rm -rf $target
fi
# Create blank virtualenv
module purge
module load conda
conda activate
conda create --yes --prefix $target python==${python_version} pip
# Enter virtualenv
conda deactivate
conda activate $target
which python
python --version
conda install --yes pytorch=2.0.0 torchvision -c pytorch -c nvidia
conda install rasterio --yes
conda install pytorch-lightning --yes
conda deactivate
conda activate $target
conda install tsnecuda -c conda-forge
# Requirements
pip install -r environment.txt
python -m pip install "dask[distributed]" --upgrade
pip install 'python-lsp-server[all]'
pip install 'black[d]'
python -m ipykernel install --user --name "$name"
pip install pytest
pip install tabulate
pip install pre-commit
pip install -U setuptools setuptools_scm wheel
python -m pip install lightning
# End
#Install torchmuntan be sure that it is downlaoded beforehand
# shellcheck disable=SC2164
cd ../torchmuntan
pip install -e .
# shellcheck disable=SC2164
cd ../openeo-datasets
pip install -e .
# shellcheck disable=SC2164
cd ../multitask_ubarn
pip install -e .
conda deactivate
export python_version="3.10"
export name="mt_ubarn"
if ! [ -z "$1" ]
then
export name=$1
fi
export target=/home/dumeuri/virtualenv/$name
if ! [ -z "$2" ]
then
export target="$2/$name"
fi
echo "Installing $name in $target ..."
if [ -d "$target" ]; then
echo "Cleaning previous conda env"
rm -rf $target
fi
# Create blank virtualenv
conda activate
conda create --yes --prefix $target python==${python_version} pip
# Enter virtualenv
conda deactivate
conda activate $target
which python
python --version
conda install --yes pytorch=2.0.0 torchvision -c pytorch -c nvidia
conda install rasterio --yes
conda install pytorch-lightning --yes
conda deactivate
conda activate $target
conda install tsnecuda -c conda-forge
# Requirements
pip install -r environment.txt
python -m pip install "dask[distributed]" --upgrade
pip install 'python-lsp-server[all]'
pip install 'black[d]'
python -m ipykernel install --user --name "$name"
pip install pytest
pip install tabulate
pip install pre-commit
pip install -U setuptools setuptools_scm wheel
# End
#Install torchmuntan be sure that it is downlaoded beforehand
# shellcheck disable=SC2164
cd ../torchmuntan
pip install -e .
# shellcheck disable=SC2164
cd ../multitask_ubarn
pip install -e .
conda deactivate
pytorch-lightning
seaborn
omegaconf
matplotlib
scipy
shapely
tensorboard
torchmetrics
lpips
xarray
pandas
geopandas
dask
click
plotnine
ipykernel
hydra-core
einops
ruamel.yaml
seaborn-image
scipy
netCDF4
h5netcdf
[tool.pytest.ini_options]
log_cli = true
log_cli_level = "DEBUG"
log_cli_format = "%(message)s"
log_file = "pytest.log"
log_file_level = "DEBUG"
log_file_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
log_file_date_format = "%Y-%m-%d %H:%M:%S"
[tool.black]
line-length = 79
target-version = ['py310', 'py38']
include = '\.pyi?$'
extend-exclude = '''
/(
# The following are specific to Black, you probably don't want those.
| blib2to3
| tests/data
| profiling
)/
'''
# We use preview style for formatting Black itself. If you
# want stable formatting across releases, you should keep
# this off.
preview = true
[tool.isort]
profile = "black"
line_length=79
wrap_length=79
[tool.flake8] #not sure iyt is workins as supposed to
extend_ignore = ['F841','E303','E501']
max-line-length = 79
import logging
from ast import literal_eval
import dask
import hydra
import pandas as pd
from omegaconf import DictConfig
from msenge.convert import save_sits_msenge_label
from msenge.open import load_dataset
my_logger = logging.getLogger(__name__)
@hydra.main(config_path="../config/", config_name="convert2pt.yaml")
def convert_msenge_dataset(config: DictConfig):
my_logger.debug(config)
# step 1 load the dataset
if config.path_dataframe is not None:
df_msenge = pd.read_csv(
config.path_dataframe, converters={"path": literal_eval}
)
else:
df_msenge = load_dataset(
dataset_path=config.dataset_path,
list_area=config.list_area,
year=config.year,
sats=config.sats,
label_dir=config.label_dir,
)
my_logger.debug(f"{df_msenge.head}")
assert len(df_msenge) > 0, "Dataset empty, no file found at "
# step 2 convert all images in the dataset and save them as pt tensor
l_path_sits = []
for item in range(len(df_msenge)):
l_path_sits += [
dask.delayed(
save_sits_msenge_label(
item,
config.s2_bands,
df_msenge,
config.exit_dir,
return_label_path=True,
)
)
]
with dask.config.set(scheduler="processes"):
dask.compute(l_path_sits)
# Press the green button in the gutter to run the script.
if __name__ == "__main__":
convert_msenge_dataset()
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
import logging
from pathlib import Path
import hydra
import torch
from omegaconf import DictConfig
from msenge.display import show_count
from msenge.open import load_dataset
my_logger = logging.getLogger(__name__)
@hydra.main(config_path="../config/", config_name="split.yaml")
def split_msenge(config: DictConfig):
all_df_info = load_dataset(
config.dataset_path,
config.list_area,
config.year,
config.sats,
config.label_dir,
)
my_logger.info(all_df_info.index.is_unique)
tot_count = show_count(all_df_info, "all_msenge_count.pdf")
my_logger.debug(f"tot {tot_count.sort_values()}")
my_logger.debug(f" sum {len(all_df_info)} ")
seed = 1
train_df = all_df_info.sample(frac=0.6, random_state=seed)
count_train = show_count(train_df, "train_msenge_count.pdf")
res_train_df = all_df_info.drop(index=train_df.index, axis=0)
my_logger.info(
" all {} train {} rest {}".format(
len(all_df_info), len(train_df.index), len(res_train_df)
)
)
val_df = res_train_df.sample(frac=0.4, random_state=seed)
count_val = show_count(val_df, "val_msenge_count.pdf")
test_df = res_train_df.drop(index=val_df.index, axis=0)
count_test = show_count(test_df, "test_msenge_count.pdf")
print(f"train {count_train.sort_values()}")
print(f"val {count_val.sort_values()}")
print(f"test {count_test.sort_values()}")
torch.save(
train_df, Path(config.output_dir).joinpath("msenge_train_count.pt")
)
torch.save(
test_df, Path(config.output_dir).joinpath("msenge_test_count.pt")
)
torch.save(val_df, Path(config.output_dir).joinpath("msenge_val_count.pt"))
# Press the green button in the gutter to run the script.
if __name__ == "__main__":
split_msenge()
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
[metadata]
name = msenge
description = Pip project to load MultiSenGE dataset
author = Iris Dumeur
author_email = iris.dumeur@univ-tlse3.fr
license = AGPL-3.0
license_files = LICENSE.txt
long_description = file: README.rst
long_description_content_type = text/x-rst; charset=UTF-8
url = https://gitlab.cesbio.omp.eu/dumeuri/msenge_dataset
# Add here related links, for example:
project_urls =
Documentation = https://gitlab.cesbio.omp.eu/dumeuri/msenge_dataset
# Source = https://github.com/pyscaffold/pyscaffold/
# Changelog = https://pyscaffold.org/en/latest/changelog.html
# Tracker = https://github.com/pyscaffold/pyscaffold/issues
# Conda-Forge = https://anaconda.org/conda-forge/pyscaffold
# Download = https://pypi.org/project/PyScaffold/#files
# Twitter = https://twitter.com/PyScaffold
[options]
zip_safe = False
packages = find_namespace:
include_package_data = True
package_dir =
=src
testing =
setuptools
pytest
pytest-cov
[flake8]
# Some sane defaults for the code style checker flake8
max_line_length = 79
extend_ignore = E203, W503,E231,E501
ignore = E203, W503,E231,E501
# ^ Black-compatible
# E203 and W503 have edge cases handled by black
exclude =
.tox
build
dist
.eggs
docs/conf.py
[options.packages.find]
where = src
exclude =
tests
testing =
setuptools
pytest
pytest-cov
[tool:pytest]
# Specify command line options as you would do when invoking pytest directly.
# e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml
# in order to write a coverage file that can be read by Jenkins.
# CAUTION: --cov flags may prohibit setting breakpoints while debugging.
# Comment those flags to avoid this pytest issue.
addopts =
--cov msenge-dataset --cov-report term-missing
--verbose
norecursedirs =
dist
build
.tox
testpaths = tests
# Use pytest markers to select/deselect specific tests
markers =
slow: mark tests as slow (deselect with '-m "not slow"')
hal: mark tests that need to run on HAL
critical: mark test that have to pass
local: require external disk
# system: mark end-to-end system tests
from setuptools import setup
if __name__ == "__main__":
setup()
MLP_DEFAULT_RCPARAMS = {
"font.family": "serif",
"font.serif": ["Times"],
"text.usetex": True,
"axes.titlesize": 14,
"figure.titlesize": "xx-large",
}
import logging
from pathlib import Path
import pandas as pd
import torch
from msenge.open import load_sits_msenge_label
my_logger = logging.getLogger(__name__)
def save_sits_msenge_label(
item,
s2_bands: list,
dataset_frame: pd.DataFrame,
exit_dir,
return_label_path=False,
):
msenge_data = load_sits_msenge_label(
item, s2_bands, dataset_frame, return_label_path=return_label_path
)
path = Path(exit_dir).joinpath(f"{msenge_data.name_id}.pt")
my_logger.debug(f"save image at {path}")
torch.save(msenge_data, path)
return path
from dataclasses import dataclass
import torch
@dataclass
class MsenGESits:
sits: torch.Tensor
doy: torch.Tensor
labels: torch.Tensor
label_path: str | None = None
name_id: str | None = "im"
import glob
from pathlib import Path
import pandas as pd
def sort_path_roi(paths: list, area, year, label_dir: str):
df = pd.DataFrame(columns=["path", "roi", "area", "year"])
for i, path in enumerate(paths):
roi = extract_idx_patch(path)
temp_df = pd.DataFrame(
{
"x": roi[0],
"y": roi[1],
"path": path,
"area": area,
"year": year,
"label_path": extract_label_path(
label_dir, area=area, x=roi[0], y=roi[1]
),
},
index=[i],
)
df = pd.concat([df, temp_df], ignore_index=True)
# df=df.sort_vaues(by=["doy"])
df = df.groupby(["x", "y", "area", "year"])["path"].agg(list)
return df.to_frame()
def load_dataset(dataset_path, list_area, year, sats, label_dir):
l_df = []
for area in list_area:
l_ps = []
l_s2 = glob.glob(f"{dataset_path}/{area}_{year}*_{sats}_*.tif")
if len(l_s2) > 0:
sub_df = sort_path_roi(l_s2, area, year, label_dir=label_dir)
l_ps += [sub_df]
else:
raise FileNotFoundError(
"No image found at {}".format(
f"{dataset_path}/{area}_{year}*_{sats}_*.tif"
)
)
l_df += [pd.concat(l_ps, names=["x", "y", "area", "year"])]
df = pd.concat(l_df)
df["x"] = df.apply(lambda x: x.name[0], axis=1)
df["y"] = df.apply(lambda x: x.name[1], axis=1)
df["area"] = df.apply(lambda x: x.name[2], axis=1)
df["year"] = df.apply(lambda x: x.name[3], axis=1)
# print(df)
return df
def extract_label_path(label_dir, area, x, y):
path = f"{area}_GR_{x}_{y}.tif"
return Path(label_dir).joinpath(path)
def extract_idx_patch(patch_name: str):
l_patch_name = patch_name.split("_")
x = l_patch_name[-2]
y = l_patch_name[-1].split(".")[0]
return (x, y)
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from msenge.constant import MLP_DEFAULT_RCPARAMS
def show_count(
df: pd.DataFrame, name_save: str, ignore_index: None | list = None
):
sum_df = df.sum(axis=0)
sum_df = sum_df.T.sort_index()
if ignore_index is not None:
sum_df = sum_df.drop(ignore_index)
with mpl.rc_context(MLP_DEFAULT_RCPARAMS):
fig, ax = plt.subplots(figsize=(15, 10), layout="constrained")
sum_df.plot.bar(ax=ax)
fig.savefig("./images/msenge/" + name_save)
return sum_df
import glob
import logging
from datetime import date
from pathlib import Path
import numpy as np
import pandas as pd
import rasterio
import torch
from einops import rearrange
from msenge.dataclass import MsenGESits
from msenge.utils import create_image_name
my_logger = logging.getLogger(__name__)
def load_dataset(
dataset_path: str, list_area: list, year: int, sats: int, label_dir: str
) -> pd.DataFrame:
"""
Create a dataset which stores every path of the images to load a satellite image time series
Args:
dataset_path ():
list_area ():
year ():
sats ():
label_dir ():
Returns:
"""
l_df = []
for area in list_area:
l_ps = []
l_s2 = glob.glob(f"{dataset_path}/{area}_{year}*_{sats}_*.tif")
if l_s2:
sub_df = sort_path_roi(l_s2, area, year, label_dir=label_dir)
l_ps += [sub_df]
else:
raise FileNotFoundError(
"No image found at {}".format(
f"{dataset_path}/{area}_{year}*_{sats}_*.tif"
)
)
l_df += [pd.concat(l_ps, names=["x", "y", "area", "year"])]
df = pd.concat(l_df)
df["x"] = df.apply(lambda x: x.name[0], axis=1)
df["y"] = df.apply(lambda x: x.name[1], axis=1)
df["area"] = df.apply(lambda x: x.name[2], axis=1)
df["year"] = df.apply(lambda x: x.name[3], axis=1)
return df
def open_image_msenge(
row: pd.Series | dict,
lband: list,
):
"""Load a .tif image with rasterio
Args:
row: pandas series of dict which should contains the key "doy",
and "image_path"
lband: list of the bands that will be loaded by rasterio
Returns:
Tuple with the image and the value contained in row[doy]
"""
image_path = row["image_path"]
with rasterio.open(image_path, mode="r") as dataset:
image = dataset.read(lband, out_dtype=np.float32).transpose(1, 2, 0)
return image, row["doy"]
def sort_path_roi(
paths: list, area: str, year: int, label_dir: str
) -> pd.DataFrame:
"""
Extract all the paths of the images required to build the SITS
Args:
paths ():
area ():
year ():
label_dir ():
Returns:
"""
df = pd.DataFrame(columns=["path", "roi", "area", "year"])
for i, path in enumerate(paths):
roi = extract_idx_patch(path)
temp_df = pd.DataFrame(
{
"x": roi[0],
"y": roi[1],
"path": path,
"area": area,
"year": year,
"label_path": extract_label_path(
label_dir, area=area, x=roi[0], y=roi[1]
),
},
index=[i],
)
df = pd.concat([df, temp_df], ignore_index=True)
# df=df.sort_vaues(by=["doy"])
df = df.groupby(["x", "y", "area", "year"])["path"].agg(list)
return df.to_frame()
def extract_label_path(
label_dir: str, area: str, x: str | int, y: str | int
) -> Path:
"""
extract path of ground truth labels for a segmentation maps
"""
path = f"{area}_GR_{x}_{y}.tif"
return Path(label_dir).joinpath(path)
def extract_idx_patch(patch_name: str) -> [str, str]:
l_patch_name = patch_name.split("_")
x = l_patch_name[-2]
y = l_patch_name[-1].split(".")[0]
return (x, y)
def sort_image_by_doy(
l_image_path: list, extract_type: str = "t2v"
): # ensure that only one acquisition is possible per day...
l_dict = []
for i, image in enumerate(l_image_path):
l_dict += [
pd.DataFrame(
{"image_path": image, "date": extract_date(image)}, index=[i]
)
]
df = pd.concat(l_dict, ignore_index=True)
df = df.sort_values(by=["date"])
return df.drop_duplicates(subset="date")
def extract_date(image_path: str) -> date:
tuple_day = extract_day_from_path(image_path)
my_logger.debug(f"tuple day {tuple_day}")
return date(*tuple_day)
def extract_day_from_path(image_path: str) -> tuple:
split1 = image_path.split("_201")
year = "201"
if len(split1) < 2:
split1 = image_path.split("_202")
year = "202"
assert len(split1) >= 2, "Not able to extract the doy in {}".format(
image_path
)
year += split1[1][0]
month_str, day_str = split1[1][1:3], split1[1][3:5]
return int(year), int(month_str), int(day_str)
def load_sits_msenge_label(
item, s2_bands: list, dataset_frame: pd.DataFrame, return_label_path=False
) -> MsenGESits:
"""
Load all the patch of the patch time series
Args:
item ():
s2_bands ():
dataset_frame ():
return_label_path ():
Returns:
""" ""
l_sits = dataset_frame[["path"]].iloc[item].tolist()[0]
label_path = dataset_frame[["label_path"]].iloc[item].tolist()[0]
sorted_image_df = sort_image_by_doy(l_sits)
result = [
open_image_msenge(row, s2_bands)
for index, row in sorted_image_df.iterrows()
]
assert Path(label_path).exists(), "No target found at {}".format(
label_path
)
labels, _ = open_image_msenge({"image_path": label_path, "doy": None}, [1])
l_image, l_index_doy = zip(*result)
sits = torch.tensor(np.stack(l_image, axis=0))
sits = rearrange(sits, "n h w c -> n c h w")
trg = torch.tensor(labels).type_as(sits)
if return_label_path:
return MsenGESits(
sits,
torch.tensor(l_index_doy).type_as(sits),
torch.squeeze(trg),
label_path,
)
image_name = create_image_name(l_sits[0])
return MsenGESits(
sits,
torch.tensor(l_index_doy).type_as(sits),
torch.squeeze(trg),
name_id=image_name,
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment