Verify/LC_multivariate_convolution_cyclic.test.cpp

View this file on GitHub
Last update: 2024-04-26 03:32:16+09:00
Problem: https://judge.yosupo.jp/problem/multivariate_convolution_cyclic

Depends on

Code

#define PROBLEM                                                                \
    "https://judge.yosupo.jp/problem/multivariate_convolution_cyclic"

#include "Template/template.hpp"
#include "Utility/fastio.hpp"

#include "Math/dynamic.hpp"
#include "Convolution/arbitrary.hpp"
#include "FPS/arbitraryfps.hpp"
template <>
vector<Fp> Poly<Fp>::mult(const vector<Fp> &a, const vector<Fp> &b) const {
    return ArbitraryMult<Fp>(a, b);
}
#include "Convolution/multivariatecyclic.hpp"

int main() {
    int p, k;
    read(p, k);
    Fp::set_mod(p);
    vector<int> a(k);
    read(a);
    int n = 1;
    for (auto &x : a)
        n *= x;
    vector<Fp> f(n), g(n);
    rep(i, 0, n) read(f[i].v);
    rep(i, 0, n) read(g[i].v);

    auto ret = MultivariateCyclic(f, g, a);
    rep(i, 0, n) print(ret[i].v);
    return 0;
}

#line 1 "Verify/LC_multivariate_convolution_cyclic.test.cpp"
#define PROBLEM                                                                \
    "https://judge.yosupo.jp/problem/multivariate_convolution_cyclic"

#line 1 "Template/template.hpp"
#include <bits/stdc++.h>

using namespace std;

#define rep(i, a, b) for (int i = (int)(a); i < (int)(b); i++)
#define rrep(i, a, b) for (int i = (int)(b-1); i >= (int)(a); i--)
#define ALL(v) (v).begin(), (v).end()
#define UNIQUE(v) sort(ALL(v)), (v).erase(unique(ALL(v)), (v).end())
#define SZ(v) (int)v.size()
#define MIN(v) *min_element(ALL(v))
#define MAX(v) *max_element(ALL(v))
#define LB(v, x) int(lower_bound(ALL(v), (x)) - (v).begin())
#define UB(v, x) int(upper_bound(ALL(v), (x)) - (v).begin())

using uint = unsigned int;
using ll = long long int;
using ull = unsigned long long;
using i128 = __int128_t;
using u128 = __uint128_t;
const int inf = 0x3fffffff;
const ll INF = 0x1fffffffffffffff;

template <typename T> inline bool chmax(T &a, T b) {
    if (a < b) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T> inline bool chmin(T &a, T b) {
    if (a > b) {
        a = b;
        return 1;
    }
    return 0;
}
template <typename T, typename U> T ceil(T x, U y) {
    assert(y != 0);
    if (y < 0)
        x = -x, y = -y;
    return (x > 0 ? (x + y - 1) / y : x / y);
}
template <typename T, typename U> T floor(T x, U y) {
    assert(y != 0);
    if (y < 0)
        x = -x, y = -y;
    return (x > 0 ? x / y : (x - y + 1) / y);
}
template <typename T> int popcnt(T x) {
    return __builtin_popcountll(x);
}
template <typename T> int topbit(T x) {
    return (x == 0 ? -1 : 63 - __builtin_clzll(x));
}
template <typename T> int lowbit(T x) {
    return (x == 0 ? -1 : __builtin_ctzll(x));
}

#ifdef LOCAL
#define show(...) _show(0, #__VA_ARGS__, __VA_ARGS__)
#else
#define show(...) true
#endif
template <typename T> void _show(int i, T name) {
    cerr << '\n';
}
template <typename T1, typename T2, typename... T3>
void _show(int i, const T1 &a, const T2 &b, const T3 &...c) {
    for (; a[i] != ',' && a[i] != '\0'; i++)
        cerr << a[i];
    cerr << ":" << b << " ";
    _show(i + 1, a, c...);
}
template <class T, class U>
ostream &operator<<(ostream &os, const pair<T, U> &p) {
    os << "P(" << p.first << ", " << p.second << ")";
    return os;
}
template <typename T, template <class> class C>
ostream &operator<<(ostream &os, const C<T> &v) {
    os << "[";
    for (auto d : v)
        os << d << ", ";
    os << "]";
    return os;
}
#line 2 "Utility/fastio.hpp"
#include <unistd.h>

namespace fastio {
static constexpr uint32_t SZ = 1 << 17;
char ibuf[SZ];
char obuf[SZ];
char out[100];
// pointer of ibuf, obuf


uint32_t pil = 0, pir = 0, por = 0;

struct Pre {
    char num[10000][4];
    constexpr Pre() : num() {
        for (int i = 0; i < 10000; i++) {
            int n = i;
            for (int j = 3; j >= 0; j--) {
                num[i][j] = n % 10 | '0';
                n /= 10;
            }
        }
    }
} constexpr pre;

inline void load() {
    memmove(ibuf, ibuf + pil, pir - pil);
    pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin);
    pil = 0;
    if (pir < SZ)
        ibuf[pir++] = '\n';
}

inline void flush() {
    fwrite(obuf, 1, por, stdout);
    por = 0;
}

void rd(char &c) {
    do {
        if (pil + 1 > pir)
            load();
        c = ibuf[pil++];
    } while (isspace(c));
}

void rd(string &x) {
    x.clear();
    char c;
    do {
        if (pil + 1 > pir)
            load();
        c = ibuf[pil++];
    } while (isspace(c));
    do {
        x += c;
        if (pil == pir)
            load();
        c = ibuf[pil++];
    } while (!isspace(c));
}

template <typename T> void rd_real(T &x) {
    string s;
    rd(s);
    x = stod(s);
}

template <typename T> void rd_integer(T &x) {
    if (pil + 100 > pir)
        load();
    char c;
    do
        c = ibuf[pil++];
    while (c < '-');
    bool minus = 0;
    if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
        if (c == '-') {
            minus = 1, c = ibuf[pil++];
        }
    }
    x = 0;
    while ('0' <= c) {
        x = x * 10 + (c & 15), c = ibuf[pil++];
    }
    if constexpr (is_signed<T>::value || is_same_v<T, i128>) {
        if (minus)
            x = -x;
    }
}

void rd(int &x) {
    rd_integer(x);
}
void rd(ll &x) {
    rd_integer(x);
}
void rd(i128 &x) {
    rd_integer(x);
}
void rd(uint &x) {
    rd_integer(x);
}
void rd(ull &x) {
    rd_integer(x);
}
void rd(u128 &x) {
    rd_integer(x);
}
void rd(double &x) {
    rd_real(x);
}
void rd(long double &x) {
    rd_real(x);
}

template <class T, class U> void rd(pair<T, U> &p) {
    return rd(p.first), rd(p.second);
}
template <size_t N = 0, typename T> void rd_tuple(T &t) {
    if constexpr (N < std::tuple_size<T>::value) {
        auto &x = std::get<N>(t);
        rd(x);
        rd_tuple<N + 1>(t);
    }
}
template <class... T> void rd(tuple<T...> &tpl) {
    rd_tuple(tpl);
}

template <size_t N = 0, typename T> void rd(array<T, N> &x) {
    for (auto &d : x)
        rd(d);
}
template <class T> void rd(vector<T> &x) {
    for (auto &d : x)
        rd(d);
}

void read() {}
template <class H, class... T> void read(H &h, T &...t) {
    rd(h), read(t...);
}

void wt(const char c) {
    if (por == SZ)
        flush();
    obuf[por++] = c;
}
void wt(const string s) {
    for (char c : s)
        wt(c);
}
void wt(const char *s) {
    size_t len = strlen(s);
    for (size_t i = 0; i < len; i++)
        wt(s[i]);
}

template <typename T> void wt_integer(T x) {
    if (por > SZ - 100)
        flush();
    if (x < 0) {
        obuf[por++] = '-', x = -x;
    }
    int outi;
    for (outi = 96; x >= 10000; outi -= 4) {
        memcpy(out + outi, pre.num[x % 10000], 4);
        x /= 10000;
    }
    if (x >= 1000) {
        memcpy(obuf + por, pre.num[x], 4);
        por += 4;
    } else if (x >= 100) {
        memcpy(obuf + por, pre.num[x] + 1, 3);
        por += 3;
    } else if (x >= 10) {
        int q = (x * 103) >> 10;
        obuf[por] = q | '0';
        obuf[por + 1] = (x - q * 10) | '0';
        por += 2;
    } else
        obuf[por++] = x | '0';
    memcpy(obuf + por, out + outi + 4, 96 - outi);
    por += 96 - outi;
}

template <typename T> void wt_real(T x) {
    ostringstream oss;
    oss << fixed << setprecision(15) << double(x);
    string s = oss.str();
    wt(s);
}

void wt(int x) {
    wt_integer(x);
}
void wt(ll x) {
    wt_integer(x);
}
void wt(i128 x) {
    wt_integer(x);
}
void wt(uint x) {
    wt_integer(x);
}
void wt(ull x) {
    wt_integer(x);
}
void wt(u128 x) {
    wt_integer(x);
}
void wt(double x) {
    wt_real(x);
}
void wt(long double x) {
    wt_real(x);
}

template <class T, class U> void wt(const pair<T, U> val) {
    wt(val.first);
    wt(' ');
    wt(val.second);
}
template <size_t N = 0, typename T> void wt_tuple(const T t) {
    if constexpr (N < std::tuple_size<T>::value) {
        if constexpr (N > 0) {
            wt(' ');
        }
        const auto x = std::get<N>(t);
        wt(x);
        wt_tuple<N + 1>(t);
    }
}
template <class... T> void wt(tuple<T...> tpl) {
    wt_tuple(tpl);
}
template <class T, size_t S> void wt(const array<T, S> val) {
    auto n = val.size();
    for (size_t i = 0; i < n; i++) {
        if (i)
            wt(' ');
        wt(val[i]);
    }
}
template <class T> void wt(const vector<T> val) {
    auto n = val.size();
    for (size_t i = 0; i < n; i++) {
        if (i)
            wt(' ');
        wt(val[i]);
    }
}

void print() {
    wt('\n');
}
template <class Head, class... Tail> void print(Head &&head, Tail &&...tail) {
    wt(head);
    if (sizeof...(Tail))
        wt(' ');
    print(forward<Tail>(tail)...);
}
void __attribute__((destructor)) _d() {
    flush();
}
} // namespace fastio


using fastio::flush;
using fastio::print;
using fastio::read;

inline void first(bool i = true) {
    print(i ? "first" : "second");
}
inline void Alice(bool i = true) {
    print(i ? "Alice" : "Bob");
}
inline void yes(bool i = true) {
    print(i ? "yes" : "no");
}
inline void Yes(bool i = true) {
    print(i ? "Yes" : "No");
}
inline void No() {
    print("No");
}
inline void YES(bool i = true) {
    print(i ? "YES" : "NO");
}
inline void NO() {
    print("NO");
}
inline void Yay(bool i = true) {
    print(i ? "Yay!" : ":(");
}
inline void Possible(bool i = true) {
    print(i ? "Possible" : "Impossible");
}
inline void POSSIBLE(bool i = true) {
    print(i ? "POSSIBLE" : "IMPOSSIBLE");
}

/**
 * @brief Fast IO
 */
#line 6 "Verify/LC_multivariate_convolution_cyclic.test.cpp"

#line 2 "Math/fastdiv.hpp"

struct FastDiv{
    using u64=uint64_t;
    using u128=__uint128_t;
    constexpr FastDiv():m(),s(),x(){}
    constexpr FastDiv(int _m)
        :m(_m),s(__lg(m-1)),x(((u128(1)<<(s+64))+m-1)/m){}
    constexpr int get(){return m;}
    constexpr friend u64 operator/(u64 n,const FastDiv& d){
        return (u128(n)*d.x>>d.s)>>64;
    }
    constexpr friend int operator%(u64 n,const FastDiv& d){
        return n-n/d*d.m;
    }
    constexpr pair<u64,int> divmod(u64 n)const{
        u64 q=n/(*this);
        return {q,n-q*m};
    }
    int m,s; u64 x;
};

/**
 * @brief Fast Division
*/
#line 3 "Math/dynamic.hpp"

struct Fp{
    using u64=uint64_t;
    int v;
    static int get_mod(){return _getmod();}
    static void set_mod(int _m){bar=FastDiv(_m);}
    Fp inv() const{
        int tmp,a=v,b=get_mod(),x=1,y=0;
        while(b){
            tmp=a/b,a-=tmp*b;
            swap(a,b);
            x-=tmp*y;
            swap(x,y);
        }
        if(x<0){x+=get_mod();}
        return x;
    }
    Fp():v(0){}
    Fp(ll x){
        v=x%get_mod();
        if(v<0)v+=get_mod();
    }
    Fp operator-()const{return Fp()-*this;}
    Fp pow(ll t){
        assert(t>=0);
        Fp res=1,b=*this;
        while(t){
            if(t&1)res*=b;
            b*=b;
            t>>=1;
        }
        return res;
    }
    Fp& operator+=(const Fp& x){
        v+=x.v;
        if(v>=get_mod())v-=get_mod();
        return *this;
    }
    Fp& operator-=(const Fp& x){
        v+=get_mod()-x.v;
        if(v>=get_mod())v-=get_mod();
        return *this;
    }
    Fp& operator*=(const Fp& x){
        v=(u64(v)*x.v)%bar;
        return *this;
    }
    Fp& operator/=(const Fp& x){
        (*this)*=x.inv();
        return *this;
    }
    Fp operator+(const Fp& x)const{return Fp(*this)+=x;}
    Fp operator-(const Fp& x)const{return Fp(*this)-=x;}
    Fp operator*(const Fp& x)const{return Fp(*this)*=x;}
    Fp operator/(const Fp& x)const{return Fp(*this)/=x;}
    bool operator==(const Fp& x)const{return v==x.v;}
    bool operator!=(const Fp& x)const{return v!=x.v;}
private:
    static FastDiv bar;
    static int _getmod(){return bar.get();}
};
FastDiv Fp::bar(998244353);

/**
 * @brief Dynamic Modint
 */
#line 2 "Convolution/ntt.hpp"

template <typename T> struct NTT {
    static constexpr int rank2 = __builtin_ctzll(T::get_mod() - 1);
    std::array<T, rank2 + 1> root;  // root[i]^(2^i) == 1

    std::array<T, rank2 + 1> iroot; // root[i] * iroot[i] == 1


    std::array<T, std::max(0, rank2 - 2 + 1)> rate2;
    std::array<T, std::max(0, rank2 - 2 + 1)> irate2;

    std::array<T, std::max(0, rank2 - 3 + 1)> rate3;
    std::array<T, std::max(0, rank2 - 3 + 1)> irate3;

    NTT() {
        T g = 2;
        while (g.pow((T::get_mod() - 1) >> 1) == 1) {
            g += 1;
        }
        root[rank2] = g.pow((T::get_mod() - 1) >> rank2);
        iroot[rank2] = root[rank2].inv();
        for (int i = rank2 - 1; i >= 0; i--) {
            root[i] = root[i + 1] * root[i + 1];
            iroot[i] = iroot[i + 1] * iroot[i + 1];
        }

        {
            T prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 2; i++) {
                rate2[i] = root[i + 2] * prod;
                irate2[i] = iroot[i + 2] * iprod;
                prod *= iroot[i + 2];
                iprod *= root[i + 2];
            }
        }
        {
            T prod = 1, iprod = 1;
            for (int i = 0; i <= rank2 - 3; i++) {
                rate3[i] = root[i + 3] * prod;
                irate3[i] = iroot[i + 3] * iprod;
                prod *= iroot[i + 3];
                iprod *= root[i + 3];
            }
        }
    }

    void ntt(std::vector<T> &a, bool type = 0) {
        int n = int(a.size());
        int h = __builtin_ctzll((unsigned int)n);
        a.resize(1 << h);

        if (type) {
            int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed

            while (len) {
                if (len == 1) {
                    int p = 1 << (h - len);
                    T irot = 1;
                    for (int s = 0; s < (1 << (len - 1)); s++) {
                        int offset = s << (h - len + 1);
                        for (int i = 0; i < p; i++) {
                            auto l = a[i + offset];
                            auto r = a[i + offset + p];
                            a[i + offset] = l + r;
                            a[i + offset + p] =
                                (unsigned long long)(T::get_mod() + l.v - r.v) *
                                irot.v;
                            ;
                        }
                        if (s + 1 != (1 << (len - 1)))
                            irot *= irate2[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len--;
                } else {
                    // 4-base

                    int p = 1 << (h - len);
                    T irot = 1, iimag = iroot[2];
                    for (int s = 0; s < (1 << (len - 2)); s++) {
                        T irot2 = irot * irot;
                        T irot3 = irot2 * irot;
                        int offset = s << (h - len + 2);
                        for (int i = 0; i < p; i++) {
                            auto a0 = 1ULL * a[i + offset + 0 * p].v;
                            auto a1 = 1ULL * a[i + offset + 1 * p].v;
                            auto a2 = 1ULL * a[i + offset + 2 * p].v;
                            auto a3 = 1ULL * a[i + offset + 3 * p].v;

                            auto a2na3iimag =
                                1ULL * T((T::get_mod() + a2 - a3) * iimag.v).v;

                            a[i + offset] = a0 + a1 + a2 + a3;
                            a[i + offset + 1 * p] =
                                (a0 + (T::get_mod() - a1) + a2na3iimag) *
                                irot.v;
                            a[i + offset + 2 * p] =
                                (a0 + a1 + (T::get_mod() - a2) +
                                 (T::get_mod() - a3)) *
                                irot2.v;
                            a[i + offset + 3 * p] =
                                (a0 + (T::get_mod() - a1) +
                                 (T::get_mod() - a2na3iimag)) *
                                irot3.v;
                        }
                        if (s + 1 != (1 << (len - 2)))
                            irot *= irate3[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len -= 2;
                }
            }
            T e = T(n).inv();
            for (auto &x : a)
                x *= e;
        } else {
            int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed

            while (len < h) {
                if (h - len == 1) {
                    int p = 1 << (h - len - 1);
                    T rot = 1;
                    for (int s = 0; s < (1 << len); s++) {
                        int offset = s << (h - len);
                        for (int i = 0; i < p; i++) {
                            auto l = a[i + offset];
                            auto r = a[i + offset + p] * rot;
                            a[i + offset] = l + r;
                            a[i + offset + p] = l - r;
                        }
                        if (s + 1 != (1 << len))
                            rot *= rate2[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len++;
                } else {
                    // 4-base

                    int p = 1 << (h - len - 2);
                    T rot = 1, imag = root[2];
                    for (int s = 0; s < (1 << len); s++) {
                        T rot2 = rot * rot;
                        T rot3 = rot2 * rot;
                        int offset = s << (h - len);
                        for (int i = 0; i < p; i++) {
                            auto mod2 = 1ULL * T::get_mod() * T::get_mod();
                            auto a0 = 1ULL * a[i + offset].v;
                            auto a1 = 1ULL * a[i + offset + p].v * rot.v;
                            auto a2 = 1ULL * a[i + offset + 2 * p].v * rot2.v;
                            auto a3 = 1ULL * a[i + offset + 3 * p].v * rot3.v;
                            auto a1na3imag =
                                1ULL * T(a1 + mod2 - a3).v * imag.v;
                            auto na2 = mod2 - a2;
                            a[i + offset] = a0 + a2 + a1 + a3;
                            a[i + offset + 1 * p] =
                                a0 + a2 + (2 * mod2 - (a1 + a3));
                            a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
                            a[i + offset + 3 * p] =
                                a0 + na2 + (mod2 - a1na3imag);
                        }
                        if (s + 1 != (1 << len))
                            rot *= rate3[__builtin_ctzll(~(unsigned int)(s))];
                    }
                    len += 2;
                }
            }
        }
    }
    vector<T> mult(const vector<T> &a, const vector<T> &b) {
        if (a.empty() or b.empty())
            return vector<T>();
        int as = a.size(), bs = b.size();
        int n = as + bs - 1;
        if (as <= 30 or bs <= 30) {
            if (as > 30)
                return mult(b, a);
            vector<T> res(n);
            rep(i, 0, as) rep(j, 0, bs) res[i + j] += a[i] * b[j];
            return res;
        }
        int m = 1;
        while (m < n)
            m <<= 1;
        vector<T> res(m);
        rep(i, 0, as) res[i] = a[i];
        ntt(res);
        if (a == b)
            rep(i, 0, m) res[i] *= res[i];
        else {
            vector<T> c(m);
            rep(i, 0, bs) c[i] = b[i];
            ntt(c);
            rep(i, 0, m) res[i] *= c[i];
        }
        ntt(res, 1);
        res.resize(n);
        return res;
    }
};

/**
 * @brief Number Theoretic Transform
 */
#line 2 "Math/modint.hpp"

template <unsigned mod = 1000000007> struct fp {
    unsigned v;
    static constexpr int get_mod() {
        return mod;
    }
    constexpr unsigned inv() const {
        assert(v != 0);
        int x = v, y = mod, p = 1, q = 0, t = 0, tmp = 0;
        while (y > 0) {
            t = x / y;
            x -= t * y, p -= t * q;
            tmp = x, x = y, y = tmp;
            tmp = p, p = q, q = tmp;
        }
        if (p < 0)
            p += mod;
        return p;
    }
    constexpr fp(ll x = 0) : v(x >= 0 ? x % mod : (mod - (-x) % mod) % mod) {}
    fp operator-() const {
        return fp() - *this;
    }
    fp pow(ull t) {
        fp res = 1, b = *this;
        while (t) {
            if (t & 1)
                res *= b;
            b *= b;
            t >>= 1;
        }
        return res;
    }
    fp &operator+=(const fp &x) {
        if ((v += x.v) >= mod)
            v -= mod;
        return *this;
    }
    fp &operator-=(const fp &x) {
        if ((v += mod - x.v) >= mod)
            v -= mod;
        return *this;
    }
    fp &operator*=(const fp &x) {
        v = ull(v) * x.v % mod;
        return *this;
    }
    fp &operator/=(const fp &x) {
        v = ull(v) * x.inv() % mod;
        return *this;
    }
    fp operator+(const fp &x) const {
        return fp(*this) += x;
    }
    fp operator-(const fp &x) const {
        return fp(*this) -= x;
    }
    fp operator*(const fp &x) const {
        return fp(*this) *= x;
    }
    fp operator/(const fp &x) const {
        return fp(*this) /= x;
    }
    bool operator==(const fp &x) const {
        return v == x.v;
    }
    bool operator!=(const fp &x) const {
        return v != x.v;
    }
    friend istream &operator>>(istream &is, fp &x) {
        return is >> x.v;
    }
    friend ostream &operator<<(ostream &os, const fp &x) {
        return os << x.v;
    }
};

template <unsigned mod> void rd(fp<mod> &x) {
    fastio::rd(x.v);
}
template <unsigned mod> void wt(fp<mod> x) {
    fastio::wt(x.v);
}

template <typename T> T Inv(ll n) {
    static const int md = T::get_mod();
    static vector<T> buf({0, 1});
    assert(n > 0);
    n %= md;
    while (SZ(buf) <= n) {
        int k = SZ(buf), q = (md + k - 1) / k;
        buf.push_back(buf[k * q - md] * q);
    }
    return buf[n];
}

template <typename T> T Fact(ll n, bool inv = 0) {
    static const int md = T::get_mod();
    static vector<T> buf({1, 1}), ibuf({1, 1});
    assert(n >= 0 and n < md);
    while (SZ(buf) <= n) {
        buf.push_back(buf.back() * SZ(buf));
        ibuf.push_back(ibuf.back() * Inv<T>(SZ(ibuf)));
    }
    return inv ? ibuf[n] : buf[n];
}

template <typename T> T nPr(int n, int r, bool inv = 0) {
    if (n < 0 || n < r || r < 0)
        return 0;
    return Fact<T>(n, inv) * Fact<T>(n - r, inv ^ 1);
}
template <typename T> T nCr(int n, int r, bool inv = 0) {
    if (n < 0 || n < r || r < 0)
        return 0;
    return Fact<T>(n, inv) * Fact<T>(r, inv ^ 1) * Fact<T>(n - r, inv ^ 1);
}
template <typename T> T nHr(int n, int r, bool inv = 0) {
    return nCr<T>(n + r - 1, r, inv);
}

/**
 * @brief Modint
 */
#line 4 "Convolution/arbitrary.hpp"

using M1 = fp<1045430273>;
using M2 = fp<1051721729>;
using M3 = fp<1053818881>;
NTT<M1> N1;
NTT<M2> N2;
NTT<M3> N3;
constexpr int r_12 = M2(M1::get_mod()).inv();
constexpr int r_13 = M3(M1::get_mod()).inv();
constexpr int r_23 = M3(M2::get_mod()).inv();
constexpr int r_1323 = M3(ll(r_13) * r_23).v;
constexpr ll w1 = M1::get_mod();
constexpr ll w2 = ll(w1) * M2::get_mod();
template <typename T>
vector<T> ArbitraryMult(const vector<int> &a, const vector<int> &b) {
    if (a.empty() or b.empty())
        return vector<T>();
    int n = a.size() + b.size() - 1;
    vector<T> res(n);
    if (min(a.size(), b.size()) <= 60) {
        rep(i, 0, a.size()) rep(j, 0, b.size()) res[i + j] += T(a[i]) * b[j];
        return res;
    }
    vector<int> vals[3];
    vector<M1> a1(ALL(a)), b1(ALL(b)), c1 = N1.mult(a1, b1);
    vector<M2> a2(ALL(a)), b2(ALL(b)), c2 = N2.mult(a2, b2);
    vector<M3> a3(ALL(a)), b3(ALL(b)), c3 = N3.mult(a3, b3);
    for (M1 x : c1)
        vals[0].push_back(x.v);
    for (M2 x : c2)
        vals[1].push_back(x.v);
    for (M3 x : c3)
        vals[2].push_back(x.v);
    rep(i, 0, n) {
        ll p = vals[0][i];
        ll q = (vals[1][i] + M2::get_mod() - p) * r_12 % M2::get_mod();
        ll r = ((vals[2][i] + M3::get_mod() - p) * r_1323 +
                (M3::get_mod() - q) * r_23) %
               M3::get_mod();
        res[i] = (T(r) * w2 + q * w1 + p);
    }
    return res;
}

template <typename T>
vector<T> ArbitraryMult(const vector<T> &a, const vector<T> &b) {
    vector<int> A, B;
    for (auto &x : a)
        A.push_back(x.v);
    for (auto &x : b)
        B.push_back(x.v);
    return ArbitraryMult<T>(A, B);
}

/**
 * @brief Arbitrary Mod Convolution
 */
#line 2 "FPS/arbitraryfps.hpp"

template <typename T> struct Poly : vector<T> {
    Poly(int n = 0) {
        this->assign(n, T());
    }
    Poly(const initializer_list<T> f) : vector<T>::vector(f) {}
    Poly(const vector<T> &f) {
        this->assign(ALL(f));
    }
    int deg() const {
        return this->size() - 1;
    }
    T eval(const T &x) {
        T res;
        for (int i = this->size() - 1; i >= 0; i--)
            res *= x, res += this->at(i);
        return res;
    }
    Poly rev() const {
        Poly res = *this;
        reverse(ALL(res));
        return res;
    }
    void shrink() {
        while (!this->empty() and this->back() == 0)
            this->pop_back();
    }
    Poly operator>>(ll sz) const {
        if ((int)this->size() <= sz)
            return {};
        Poly ret(*this);
        ret.erase(ret.begin(), ret.begin() + sz);
        return ret;
    }
    Poly operator<<(ll sz) const {
        Poly ret(*this);
        ret.insert(ret.begin(), sz, T(0));
        return ret;
    }
    Poly inv() const {
        assert(this->front() != 0);
        const int n = this->size();
        Poly res(1);
        res.front() = T(1) / this->front();
        for (int k = 1; k < n; k <<= 1) {
            Poly g = res, h = *this;
            h.resize(k * 2);
            res.resize(k * 2);
            g = (g.square() * h);
            g.resize(k * 2);
            rep(i, k, min(k * 2, n)) res[i] -= g[i];
        }
        res.resize(n);
        return res;
    }
    Poly square() const {
        return Poly(mult(*this, *this));
    }
    Poly operator-() const {
        return Poly() - *this;
    }
    Poly operator+(const Poly &g) const {
        return Poly(*this) += g;
    }
    Poly operator+(const T &g) const {
        return Poly(*this) += g;
    }
    Poly operator-(const Poly &g) const {
        return Poly(*this) -= g;
    }
    Poly operator-(const T &g) const {
        return Poly(*this) -= g;
    }
    Poly operator*(const Poly &g) const {
        return Poly(*this) *= g;
    }
    Poly operator*(const T &g) const {
        return Poly(*this) *= g;
    }
    Poly operator/(const Poly &g) const {
        return Poly(*this) /= g;
    }
    Poly operator%(const Poly &g) const {
        return Poly(*this) %= g;
    }
    pair<Poly, Poly> divmod(const Poly &g) const {
        Poly q = *this / g, r = *this - g * q;
        r.shrink();
        return {q, r};
    }
    Poly &operator+=(const Poly &g) {
        if (g.size() > this->size())
            this->resize(g.size());
        rep(i, 0, g.size()) {
            (*this)[i] += g[i];
        }
        return *this;
    }
    Poly &operator+=(const T &g) {
        if (this->empty())
            this->push_back(0);
        (*this)[0] += g;
        return *this;
    }
    Poly &operator-=(const Poly &g) {
        if (g.size() > this->size())
            this->resize(g.size());
        rep(i, 0, g.size()) {
            (*this)[i] -= g[i];
        }
        return *this;
    }
    Poly &operator-=(const T &g) {
        if (this->empty())
            this->push_back(0);
        (*this)[0] -= g;
        return *this;
    }
    Poly &operator*=(const Poly &g) {
        *this = mult(*this, g);
        return *this;
    }
    Poly &operator*=(const T &g) {
        rep(i, 0, this->size())(*this)[i] *= g;
        return *this;
    }
    Poly &operator/=(const Poly &g) {
        if (g.size() > this->size()) {
            this->clear();
            return *this;
        }
        Poly g2 = g;
        reverse(ALL(*this));
        reverse(ALL(g2));
        int n = this->size() - g2.size() + 1;
        this->resize(n);
        g2.resize(n);
        *this *= g2.inv();
        this->resize(n);
        reverse(ALL(*this));
        shrink();
        return *this;
    }
    Poly &operator%=(const Poly &g) {
        *this -= *this / g * g;
        shrink();
        return *this;
    }
    Poly diff() const {
        Poly res(this->size() - 1);
        rep(i, 0, res.size()) res[i] = (*this)[i + 1] * (i + 1);
        return res;
    }
    Poly inte() const {
        Poly res(this->size() + 1);
        for (int i = res.size() - 1; i; i--)
            res[i] = (*this)[i - 1] / i;
        return res;
    }
    Poly log() const {
        assert(this->front() == 1);
        const int n = this->size();
        Poly res = diff() * inv();
        res = res.inte();
        res.resize(n);
        return res;
    }
    Poly exp() const {
        assert(this->front() == 0);
        const int n = this->size();
        Poly res(1), g(1);
        res.front() = g.front() = 1;
        for (int k = 1; k < n; k <<= 1) {
            g = (g + g - g.square() * res);
            g.resize(k);
            Poly q = *this;
            q.resize(k);
            q = q.diff();
            Poly w = (q + g * (res.diff() - res * q)), t = *this;
            w.resize(k * 2 - 1);
            t.resize(k * 2);
            res = (res + res * (t - w.inte()));
            res.resize(k * 2);
        }
        res.resize(n);
        return res;
    }
    Poly shift(const int &c) const {
        const int n = this->size();
        Poly res = *this, g(n);
        g[0] = 1;
        rep(i, 1, n) g[i] = g[i - 1] * c / i;
        vector<T> fact(n, 1);
        rep(i, 0, n) {
            if (i)
                fact[i] = fact[i - 1] * i;
            res[i] *= fact[i];
        }
        res = res.rev();
        res *= g;
        res.resize(n);
        res = res.rev();
        rep(i, 0, n) res[i] /= fact[i];
        return res;
    }
    Poly pow(ll t) {
        if (t == 0) {
            Poly res(this->size());
            res[0] = 1;
            return res;
        }
        int n = this->size(), k = 0;
        while (k < n and (*this)[k] == 0)
            k++;
        Poly res(n);
        if (__int128_t(t) * k >= n)
            return res;
        n -= t * k;
        Poly g(n);
        T c = (*this)[k], ic = c.inv();
        rep(i, 0, n) g[i] = (*this)[i + k] * ic;
        g = g.log();
        for (auto &x : g)
            x *= t;
        g = g.exp();
        c = c.pow(t);
        rep(i, 0, n) res[i + t * k] = g[i] * c;
        return res;
    }
    vector<T> mult(const vector<T> &a, const vector<T> &b) const;
};

/**
 * @brief Formal Power Series (Arbitrary mod)
 */
#line 10 "Verify/LC_multivariate_convolution_cyclic.test.cpp"
template <>
vector<Fp> Poly<Fp>::mult(const vector<Fp> &a, const vector<Fp> &b) const {
    return ArbitraryMult<Fp>(a, b);
}
#line 2 "Math/miller.hpp"

struct m64 {
    using i64 = int64_t;
    using u64 = uint64_t;
    using u128 = __uint128_t;

    static u64 mod;
    static u64 r;
    static u64 n2;

    static u64 get_r() {
        u64 ret = mod;
        rep(_,0,5) ret *= 2 - mod * ret;
        return ret;
    }

    static void set_mod(u64 m) {
        assert(m < (1LL << 62));
        assert((m & 1) == 1);
        mod = m;
        n2 = -u128(m) % m;
        r = get_r();
        assert(r * mod == 1);
    }
    static u64 get_mod() { return mod; }

    u64 a;
    m64() : a(0) {}
    m64(const int64_t &b) : a(reduce((u128(b) + mod) * n2)){};

    static u64 reduce(const u128 &b) {
        return (b + u128(u64(b) * u64(-r)) * mod) >> 64;
    }
    u64 get() const {
        u64 ret = reduce(a);
        return ret >= mod ? ret - mod : ret;
    }
    m64 &operator*=(const m64 &b) {
        a = reduce(u128(a) * b.a);
        return *this;
    }
    m64 operator*(const m64 &b) const { return m64(*this) *= b; }
    bool operator==(const m64 &b) const {
        return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a);
    }
    bool operator!=(const m64 &b) const {
        return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a);
    }
    m64 pow(u128 n) const {
        m64 ret(1), mul(*this);
        while (n > 0) {
        if (n & 1) ret *= mul;
        mul *= mul;
        n >>= 1;
        }
        return ret;
    }
};
typename m64::u64 m64::mod, m64::r, m64::n2;

bool Miller(ll n){
    if(n<2 or (n&1)==0)return (n==2);
    m64::set_mod(n);
    ll d=n-1; while((d&1)==0)d>>=1;
    vector<ll> seeds;
    if(n<(1<<30))seeds={2, 7, 61};
    else seeds={2, 325, 9375, 28178, 450775, 9780504};
    for(auto& x:seeds){
        if(n<=x)break;
        ll t=d;
        m64 y=m64(x).pow(t);
        while(t!=n-1 and y!=1 and y!=n-1){
            y*=y;
            t<<=1;
        }
        if(y!=n-1 and (t&1)==0)return 0;
    } return 1;
}

/**
 * @brief Miller-Rabin
 */
#line 2 "Utility/random.hpp"

namespace Random {
mt19937_64 randgen(chrono::steady_clock::now().time_since_epoch().count());
using u64 = unsigned long long;
u64 get() {
    return randgen();
}
template <typename T> T get(T L) { // [0,L]

    return get() % (L + 1);
}
template <typename T> T get(T L, T R) { // [L,R]

    return get(R - L) + L;
}
double uniform() {
    return double(get(1000000000)) / 1000000000;
}
string str(int n) {
    string ret;
    rep(i, 0, n) ret += get('a', 'z');
    return ret;
}
template <typename Iter> void shuffle(Iter first, Iter last) {
    if (first == last)
        return;
    int len = 1;
    for (auto it = first + 1; it != last; it++) {
        len++;
        int j = get(0, len - 1);
        if (j != len - 1)
            iter_swap(it, first + j);
    }
}
template <typename T> vector<T> select(int n, T L, T R) { // [L,R]

    if (n * 2 >= R - L + 1) {
        vector<T> ret(R - L + 1);
        iota(ALL(ret), L);
        shuffle(ALL(ret));
        ret.resize(n);
        return ret;
    } else {
        unordered_set<T> used;
        vector<T> ret;
        while (SZ(used) < n) {
            T x = get(L, R);
            if (!used.count(x)) {
                used.insert(x);
                ret.push_back(x);
            }
        }
        return ret;
    }
}

void relabel(int n, vector<pair<int, int>> &es) {
    shuffle(ALL(es));
    vector<int> ord(n);
    iota(ALL(ord), 0);
    shuffle(ALL(ord));
    for (auto &[u, v] : es)
        u = ord[u], v = ord[v];
}
template <bool directed, bool simple> vector<pair<int, int>> genGraph(int n) {
    vector<pair<int, int>> cand, es;
    rep(u, 0, n) rep(v, 0, n) {
        if (simple and u == v)
            continue;
        if (!directed and u > v)
            continue;
        cand.push_back({u, v});
    }
    int m = get(SZ(cand));
    vector<int> ord;
    if (simple)
        ord = select(m, 0, SZ(cand) - 1);
    else {
        rep(_, 0, m) ord.push_back(get(SZ(cand) - 1));
    }
    for (auto &i : ord)
        es.push_back(cand[i]);
    relabel(n, es);
    return es;
}
vector<pair<int, int>> genTree(int n) {
    vector<pair<int, int>> es;
    rep(i, 1, n) es.push_back({get(i - 1), i});
    relabel(n, es);
    return es;
}
}; // namespace Random


/**
 * @brief Random
 */
#line 4 "Math/pollard.hpp"

vector<ll> Pollard(ll n) {
    if (n <= 1)
        return {};
    if (Miller(n))
        return {n};
    if ((n & 1) == 0) {
        vector<ll> v = Pollard(n >> 1);
        v.push_back(2);
        return v;
    }
    for (ll x = 2, y = 2, d;;) {
        ll c = Random::get(2LL, n - 1);
        do {
            x = (__int128_t(x) * x + c) % n;
            y = (__int128_t(y) * y + c) % n;
            y = (__int128_t(y) * y + c) % n;
            d = __gcd(x - y + n, n);
        } while (d == 1);
        if (d < n) {
            vector<ll> lb = Pollard(d), rb = Pollard(n / d);
            lb.insert(lb.end(), ALL(rb));
            return lb;
        }
    }
}

/**
 * @brief Pollard-Rho
 */
#line 4 "Math/primitive.hpp"

ll mpow(ll a, ll t, ll m) {
    ll res = 1;
    FastDiv im(m);
    while (t) {
        if (t & 1)
            res = __int128_t(res) * a % im;
        a = __int128_t(a) * a % im;
        t >>= 1;
    }
    return res;
}
ll minv(ll a, ll m) {
    ll b = m, u = 1, v = 0;
    while (b) {
        ll t = a / b;
        a -= t * b;
        swap(a, b);
        u -= t * v;
        swap(u, v);
    }
    u = (u % m + m) % m;
    return u;
}
ll getPrimitiveRoot(ll p) {
    vector<ll> ps = Pollard(p - 1);
    sort(ALL(ps));
    rep(x, 1, inf) {
        for (auto &q : ps) {
            if (mpow(x, (p - 1) / q, p) == 1)
                goto fail;
        }
        return x;
    fail:;
    }
    assert(0);
}
ll extgcd(ll a, ll b, ll &p, ll &q) {
    if (b == 0) {
        p = 1;
        q = 0;
        return a;
    }
    ll d = extgcd(b, a % b, q, p);
    q -= a / b * p;
    return d;
}
pair<ll, ll> crt(const vector<ll> &vs, const vector<ll> &ms) {
    ll V = vs[0], M = ms[0];
    rep(i, 1, vs.size()) {
        ll p, q, v = vs[i], m = ms[i];
        if (M < m)
            swap(M, m), swap(V, v);
        ll d = extgcd(M, m, p, q);
        if ((v - V) % d != 0)
            return {0, -1};
        ll md = m / d, tmp = (v - V) / d % md * p % md;
        V += M * tmp;
        M *= md;
    }
    V = (V % M + M) % M;
    return {V, M};
}
ll ModLog(ll a, ll b, ll p) {
    ll g = 1;
    for (ll t = p; t; t >>= 1)
        g = g * a % p;
    g = __gcd(g, p);
    ll t = 1, c = 0;
    for (; t % g; c++) {
        if (t == b)
            return c;
        t = t * a % p;
    }
    if (b % g)
        return -1;
    t /= g, b /= g;
    ll n = p / g, h = 0, gs = 1;
    for (; h * h < n; h++)
        gs = gs * a % n;
    unordered_map<ll, ll> bs;
    for (ll s = 0, e = b; s < h; bs[e] = ++s)
        e = e * a % n;
    for (ll s = 0, e = t; s < n;) {
        e = e * gs % n, s += h;
        if (bs.count(e)) {
            return c + s - bs[e];
        }
    }
    return -1;
}

ll mod_root(ll k, ll a, ll m) {
    if (a == 0)
        return k ? 0 : -1;
    if (m == 2)
        return a & 1;
    k %= m - 1;
    ll g = gcd(k, m - 1);
    if (mpow(a, (m - 1) / g, m) != 1)
        return -1;
    a = mpow(a, minv(k / g, (m - 1) / g), m);
    FastDiv im(m);

    auto _subroot = [&](ll p, int e, ll a) -> ll { // x^(p^e)==a(mod m)

        ll q = m - 1;
        int s = 0;
        while (q % p == 0) {
            q /= p;
            s++;
        }
        int d = s - e;
        ll pe = mpow(p, e, m),
           res = mpow(a, ((pe - 1) * minv(q, pe) % pe * q + 1) / pe, m), c = 1;
        while (mpow(c, (m - 1) / p, m) == 1)
            c++;
        c = mpow(c, q, m);
        map<ll, ll> mp;
        ll v = 1, block = sqrt(d * p) + 1,
           bs = mpow(c, mpow(p, s - 1, m - 1) * block % (m - 1), m);
        rep(i, 0, block + 1) mp[v] = i, v = v * bs % im;
        ll gs = minv(mpow(c, mpow(p, s - 1, m - 1), m), m);
        rep(i, 0, d) {
            ll err = a * minv(mpow(res, pe, m), m) % im;
            ll pos = mpow(err, mpow(p, d - 1 - i, m - 1), m);
            rep(j, 0, block + 1) {
                if (mp.count(pos)) {
                    res = res *
                          mpow(c,
                               (block * mp[pos] + j) * mpow(p, i, m - 1) %
                                   (m - 1),
                               m) %
                          im;
                    break;
                }
                pos = pos * gs % im;
            }
        }
        return res;
    };

    for (ll d = 2; d * d <= g; d++)
        if (g % d == 0) {
            int sz = 0;
            while (g % d == 0) {
                g /= d;
                sz++;
            }
            a = _subroot(d, sz, a);
        }
    if (g > 1)
        a = _subroot(g, 1, a);
    return a;
}

ull floor_root(ull a, ull k) {
    if (a <= 1 or k == 1)
        return a;
    if (k >= 64)
        return 1;
    if (k == 2)
        return sqrtl(a);
    constexpr ull LIM = -1;
    if (a == LIM)
        a--;
    auto mul = [&](ull &x, const ull &y) {
        if (x <= LIM / y)
            x *= y;
        else
            x = LIM;
    };
    auto pw = [&](ull x, ull t) -> ull {
        ull y = 1;
        while (t) {
            if (t & 1)
                mul(y, x);
            mul(x, x);
            t >>= 1;
        }
        return y;
    };
    ull ret = (k == 3 ? cbrt(a) - 1 : pow(a, nextafter(1 / double(k), 0)));
    while (pw(ret + 1, k) <= a)
        ret++;
    return ret;
}

/**
 * @brief Primitive Function
 */
#line 2 "FPS/multievalgeom.hpp"

template<typename T>vector<T> MultievalGeomSeq(vector<T>& f,T a,T w,int m){
    int n=f.size();
    vector<T> ret(m);
    if(w==0){
        T base=1;
        rep(i,0,n)ret[0]+=base*f[i],base*=a;
        rep(i,1,m)ret[i]=f[0];
        return ret;
    }
    vector<T> tri(n+m-1),itri(n+m-1);
    tri[0]=itri[0]=1;
    T iw=w.inv(),pww=1,pwiw=1;
    for(int i=1;i<n+m-1;i++,pww*=w,pwiw*=iw){
        tri[i]=tri[i-1]*pww;
        itri[i]=itri[i-1]*pwiw;
    }

    Poly<T> y(n),v(n+m-1);
    T pwa=1;
    for(int i=0;i<n;i++,pwa*=a){
        y[i]=f[i]*itri[i]*pwa;
    }
    rep(i,0,n+m-1)v[i]=tri[i];
    reverse(ALL(y));
    y*=v;
    rep(i,0,m)ret[i]=y[n-1+i]*itri[i];
    return ret;
}

/**
 * @brief Multipoint Evaluation on Geometric Sequence
*/
#line 4 "Convolution/multivariatecyclic.hpp"

template<typename T>vector<T> MultivariateCyclic
    (vector<T> f,vector<T> g,vector<int>& a){
    int MO=T::get_mod();
    int k=a.size(),n=1;
    for(auto& x:a)n*=x;
    T pr=getPrimitiveRoot(MO),ipr=T(pr).inv();

    int offset=1;
    rep(x,0,k){
        assert((MO-1)%a[x]==0);
        T w=pr.pow((MO-1)/a[x]);
        rep(i,0,n)if(i%(offset*a[x])<offset){
            vector<T> na(a[x]),nb(a[x]);
            rep(j,0,a[x]){
                na[j]=f[i+offset*j];
                nb[j]=g[i+offset*j];
            }
            na=MultievalGeomSeq(na,T(1),w,a[x]);
            nb=MultievalGeomSeq(nb,T(1),w,a[x]);
            rep(j,0,a[x]){
                f[i+offset*j]=na[j];
                g[i+offset*j]=nb[j];
            }
        }
        offset*=a[x];
    }

    rep(i,0,n)f[i]*=g[i];
    
    offset=1;
    rep(x,0,k){
        T iw=ipr.pow((MO-1)/a[x]);
        rep(i,0,n)if(i%(offset*a[x])<offset){
            vector<T> na(a[x]);
            rep(j,0,a[x])na[j]=f[i+offset*j];
            na=MultievalGeomSeq(na,T(1),iw,a[x]);
            rep(j,0,a[x])f[i+offset*j]=na[j];
        }
        offset*=a[x];
    }
    T ninv=T(n).inv();
    rep(i,0,n)f[i]*=ninv;
    return f;
}

/**
 * @brief Multivarate Convolution Cyclic
*/
#line 15 "Verify/LC_multivariate_convolution_cyclic.test.cpp"

int main() {
    int p, k;
    read(p, k);
    Fp::set_mod(p);
    vector<int> a(k);
    read(a);
    int n = 1;
    for (auto &x : a)
        n *= x;
    vector<Fp> f(n), g(n);
    rep(i, 0, n) read(f[i].v);
    rep(i, 0, n) read(g[i].v);

    auto ret = MultivariateCyclic(f, g, a);
    rep(i, 0, n) print(ret[i].v);
    return 0;
}