[
{"company":" ", "debt": 800, "phones": [123, 234, 456]},
{"company":" ", "debt": 1200, "phones": ["2128506", 456, 789]},
{"company":" ", "debt": "666", "phones": 666},
{"company": " ", "debt": 1500, "phones": [234567, "34567"], "phone": 666},
{"company": {"name": ""}, "debt": 2550, "phones": 788, "phone": 789},
...
//source data
class DebtRec {
var company: String
var phones: Array<String>
var debt: Double
}
//result data
class Debtor {
var companies: Set<String>
var phones: Set<String>
var debt: Double
}
class Debtors {
var all: Array<Debtor>
var index_by_phone: Dictionary<String, Int>
}
if let Null = myVal {
...
}
match myVal {
Null => {
...
}
_ => {}
}
if myVal is Null {
...
}
if myVal == Option::Null {
...
}
import Foundation
let FILE_BUFFER_SIZE = 50000
//source data
class DebtRec {
var company: String = ""
var phones: Array<String> = []
var debt: Double = 0.0
}
//result data
class Debtor {
var companies: Set<String> = []
var phones: Set<String> = []
var debt: Double = 0.0
}
class Debtors {
var all: Array<Debtor> = []
var index_by_phone: Dictionary<String, Int> = [:]
}
func main() {
var res = Debtors()
var fflag = 0
for arg in CommandLine.arguments {
if arg == "-f" {
fflag = 1
}
else if fflag == 1 {
fflag = 2
print("\(arg):")
let tbegin = Date()
let (count, errcount) = process_file(fname: arg, res: &res)
print("PROCESSED: \(count) objects in \(DateInterval(start: tbegin, end: Date()).duration)s, \(errcount) errors found")
}
}
for (di, d) in res.all.enumerated() {
print("-------------------------------")
print("#\(di): debt: \(d.debt)")
print("companies: \(d.companies)\nphones: \(d.phones)")
}
if fflag < 2 {
print("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...")
}
}
func process_file(fname: String, res: inout Debtors) -> (Int, Int) {
var count = 0
var errcount = 0
if let f = FileHandle(forReadingAtPath: fname) {
var obj: Array<UInt8> = []
var braces = 0
while true {
let buf = f.readData(ofLength: FILE_BUFFER_SIZE)
if buf.isEmpty {
break //EOF
}
for b in buf {
if b == 123 { // {
braces += 1
obj.append(b)
}
else if b == 125 { // }
braces -= 1
obj.append(b)
if braces == 0 { //object formed !
do {
let o = try JSONSerialization.jsonObject(with: Data(obj))
process_object(o: (o as! Dictionary<String, Any>), res: &res)
} catch {
print("JSON ERROR: \(obj)")
errcount += 1
}
count += 1
obj = []
}
}
else if braces > 0 {
obj.append(b)
}
}
}
} else {
print("ERROR: Unable to open file")
}
return (count, errcount)
}
func process_object(o: Dictionary<String, Any>, res: inout Debtors) {
let dr = extract_data(o)
//print("\(dr.company) - \(dr.phones) - \(dr.debt)")
var di: Optional<Int> = Optional.none //debtor index search result
for p in dr.phones {
if let i = res.index_by_phone[p] {
di = Optional.some(i)
break
}
}
if let i = di { //existing debtor
let d = res.all[i]
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt += dr.debt
}
else { //new debtor
let d = Debtor()
let i = res.all.count
d.companies.insert(dr.company)
for p in dr.phones {
d.phones.insert(p)
res.index_by_phone[p] = i
}
d.debt = dr.debt
res.all.append(d)
}
}
func extract_data(_ o: Dictionary<String, Any>) -> DebtRec {
func val2str(_ v: Any) -> String {
if let vs = (v as? String) {
return vs
}
else if let vi = (v as? Int) {
return String(vi)
}
else {
return "null"
}
}
let dr = DebtRec()
let c = o["company"]!
if let company = (c as? Dictionary<String, Any>) {
dr.company = val2str(company["name"]!)
} else {
dr.company = val2str(c)
}
let pp = o["phones"]
if let pp = (pp as? Array<Any>) {
for p in pp {
dr.phones.append(val2str(p))
}
}
else if pp != nil {
dr.phones.append(val2str(pp!))
}
let p = o["phone"]
if p != nil {
dr.phones.append(val2str(p!))
}
if let d = o["debt"] {
if let dd = (d as? Double) {
dr.debt = dd
}
else if let ds = (d as? String) {
dr.debt = Double(ds)!
}
}
return dr
}
main()
//[dependencies]
//serde_json = "1.0"
use std::collections::{HashMap, HashSet};
use serde_json::Value;
const FILE_BUFFER_SIZE: usize = 50000;
//source data
struct DebtRec {
company: String,
phones: Vec<String>,
debt: f64
}
//result data
struct Debtor {
companies: HashSet<String>,
phones: HashSet<String>,
debt: f64
}
struct Debtors {
all: Vec<Debtor>,
index_by_phone: HashMap<String, usize>
}
impl DebtRec {
fn new() -> DebtRec {
DebtRec {
company: String::new(),
phones: Vec::new(),
debt: 0.0
}
}
}
impl Debtor {
fn new() -> Debtor {
Debtor {
companies: HashSet::new(),
phones: HashSet::new(),
debt: 0.0
}
}
}
impl Debtors {
fn new() -> Debtors {
Debtors {
all: Vec::new(),
index_by_phone: HashMap::new()
}
}
}
fn main() {
let mut res = Debtors::new();
let mut fflag = 0;
for arg in std::env::args() {
if arg == "-f" {
fflag = 1;
}
else if fflag == 1 {
fflag = 2;
println!("{}:", &arg);
let tbegin = std::time::SystemTime::now();
let (count, errcount) = process_file(&arg, &mut res);
println!("PROCESSED: {} objects in {:?}, {} errors found", count, tbegin.elapsed().unwrap(), errcount);
}
}
for (di, d) in res.all.iter().enumerate() {
println!("-------------------------------");
println!("#{}: debt: {}", di, &d.debt);
println!("companies: {:?}\nphones: {:?}", &d.companies, &d.phones);
}
if fflag < 2 {
println!("USAGE: fastpivot -f \"file 1\" -f \"file 2\" ...");
}
}
fn process_file(fname: &str, res: &mut Debtors) -> (i32, i32) {
use std::io::prelude::*;
let mut count = 0;
let mut errcount = 0;
match std::fs::File::open(fname) {
Ok(file) => {
let mut freader = std::io::BufReader::with_capacity(FILE_BUFFER_SIZE, file);
let mut obj = Vec::new();
let mut braces = 0;
loop {
let buf = freader.fill_buf().unwrap();
let blen = buf.len();
if blen == 0 {
break; //EOF
}
for b in buf {
if *b == b'{' {
braces += 1;
obj.push(*b);
}
else if *b == b'}' {
braces -= 1;
obj.push(*b);
if braces == 0 { //object formed !
match serde_json::from_slice(&obj) {
Ok(o) => {
process_object(&o, res);
}
Err(e) => {
println!("JSON ERROR: {}:\n{:?}", e, &obj);
errcount +=1;
}
}
count += 1;
obj = Vec::new();
}
}
else if braces > 0 {
obj.push(*b);
}
}
freader.consume(blen);
}
}
Err(e) => {
println!("ERROR: {}", e);
}
}
return (count, errcount);
}
fn process_object(o: &Value, res: &mut Debtors) {
let dr = extract_data(o);
//println!("{} - {:?} - {}", &dr.company, &dr.phones, &dr.debt,);
let mut di: Option<usize> = Option::None; //debtor index search result
for p in &dr.phones {
if let Some(i) = res.index_by_phone.get(p) {
di = Some(*i);
break;
}
}
match di {
Some(i) => { //existing debtor
let d = &mut res.all[i];
d.companies.insert(dr.company);
for p in &dr.phones {
d.phones.insert(p.to_string());
res.index_by_phone.insert(p.to_string(), i);
}
d.debt += dr.debt;
}
None => { //new debtor
let mut d = Debtor::new();
let i = res.all.len();
d.companies.insert(dr.company);
for p in &dr.phones {
d.phones.insert(p.to_string());
res.index_by_phone.insert(p.to_string(), i);
}
d.debt = dr.debt;
res.all.push(d);
}
}
}
fn extract_data(o: &Value) -> DebtRec {
use std::str::FromStr;
let mut dr = DebtRec::new();
let c = &o["company"];
dr.company =
match c {
Value::Object(c1) =>
match &c1["name"] {
Value::String(c2) => c2.to_string(),
_ => val2str(c)
},
_ => val2str(c)
};
let pp = &o["phones"];
match pp {
Value::Null => {}
Value::Array(pp) => {
for p in pp {
dr.phones.push(val2str(&p));
}
}
_ => {dr.phones.push(val2str(&pp))}
}
let p = &o["phone"];
match p {
Value::Null => {}
_ => {dr.phones.push(val2str(&p))}
}
dr.debt =
match &o["debt"] {
Value::Number(d) => d.as_f64().unwrap_or(0.0),
Value::String(d) => f64::from_str(&d).unwrap_or(0.0),
_ => 0.0
};
return dr;
fn val2str(v: &Value) -> String {
match v {
Value::String(vs) => vs.to_string(), //to avoid additional quotes
_ => v.to_string()
}
}
}
Source: https://habr.com/ru/post/450512/
All Articles